Pārlūkot izejas kodu

Simplify accesses to the ordered buffers

Samuel Thibault 7 gadi atpakaļ
vecāks
revīzija
388ada1269

+ 14 - 12
src/core/dependencies/data_arbiter_concurrency.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2017                                Université de Bordeaux
+ * Copyright (C) 2009-2018                                Université de Bordeaux
  * Copyright (C) 2013,2015,2017                           Inria
  * Copyright (C) 2010-2013,2016-2017                      CNRS
  *
@@ -408,7 +408,8 @@ static void ___starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, u
 #else // LOCK_OR_DELEGATE
 void _starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned buf, unsigned nbuffers)
 {
-	starpu_arbiter_t arbiter = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf)->arbiter;
+	struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
+	starpu_arbiter_t arbiter = descrs[buf].handle->arbiter;
 	STARPU_PTHREAD_MUTEX_LOCK(&arbiter->mutex);
 #endif
 	STARPU_ASSERT(arbiter);
@@ -422,14 +423,14 @@ void _starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned b
 
 	for (idx_buf_arbiter = start_buf_arbiter; idx_buf_arbiter < nbuffers; idx_buf_arbiter++)
 	{
-		handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, idx_buf_arbiter);
-		mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, idx_buf_arbiter) & ~STARPU_COMMUTE;
+		handle = descrs[idx_buf_arbiter].handle;
+		mode = descrs[idx_buf_arbiter].mode & ~STARPU_COMMUTE;
 
 		mode = _starpu_arbiter_filter_modes(mode);
 
 		STARPU_ASSERT_MSG(!(mode & STARPU_REDUX), "REDUX with arbiter is not implemented\n");
 
-		if (idx_buf_arbiter && (_STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, idx_buf_arbiter-1)==handle))
+		if (idx_buf_arbiter && (descrs[idx_buf_arbiter-1].handle == handle))
 			/* We have already requested this data, skip it. This
 			 * depends on ordering putting writes before reads, see
 			 * _starpu_compar_handles.  */
@@ -483,9 +484,9 @@ void _starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned b
 		unsigned idx_buf_cancel;
 		for (idx_buf_cancel = start_buf_arbiter; idx_buf_cancel < idx_buf_arbiter ; idx_buf_cancel++)
 		{
-			starpu_data_handle_t cancel_handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, idx_buf_cancel);
+			starpu_data_handle_t cancel_handle = descrs[idx_buf_cancel].handle;
 
-			if (idx_buf_cancel && (_STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, idx_buf_cancel-1)==cancel_handle))
+			if (idx_buf_cancel && (descrs[idx_buf_cancel-1].handle == cancel_handle))
 				continue;
 			if (cancel_handle->arbiter != arbiter)
 				/* Will have to process another arbiter, will do that later */
@@ -630,11 +631,12 @@ void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle)
 		enum starpu_data_access_mode mode;
 
 		unsigned start_buf_arbiter = r->buffer_index;
+		struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
 
 		for (idx_buf_arbiter = start_buf_arbiter; idx_buf_arbiter < nbuffers; idx_buf_arbiter++)
 		{
-			handle_arbiter = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, idx_buf_arbiter);
-			if (idx_buf_arbiter && (_STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, idx_buf_arbiter-1)==handle_arbiter))
+			handle_arbiter = descrs[idx_buf_arbiter].handle;
+			if (idx_buf_arbiter && (descrs[idx_buf_arbiter-1].handle == handle_arbiter))
 				/* We have already requested this data, skip it. This
 				 * depends on ordering putting writes before reads, see
 				 * _starpu_compar_handles.  */
@@ -643,7 +645,7 @@ void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle)
 				/* Will have to process another arbiter, will do that later */
 				break;
 
-			mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, idx_buf_arbiter);
+			mode = descrs[idx_buf_arbiter].mode;
 			mode = _starpu_arbiter_filter_modes(mode);
 
 			/* we post all arbiter  */
@@ -711,8 +713,8 @@ void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle)
 			unsigned idx_buf_cancel;
 			for (idx_buf_cancel = start_buf_arbiter; idx_buf_cancel < idx_buf_arbiter ; idx_buf_cancel++)
 			{
-				starpu_data_handle_t cancel_handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, idx_buf_cancel);
-				if (idx_buf_cancel && (_STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, idx_buf_cancel-1)==cancel_handle))
+				starpu_data_handle_t cancel_handle = descrs[idx_buf_cancel].handle;
+				if (idx_buf_cancel && (descrs[idx_buf_cancel-1].handle == cancel_handle))
 					continue;
 				if (cancel_handle->arbiter != arbiter)
 					break;

+ 9 - 11
src/core/dependencies/data_concurrency.c

@@ -252,8 +252,9 @@ static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, u
 {
 	/* Note that we do not access j->task->handles, but j->ordered_buffers
 	 * which is a sorted copy of it. */
-	starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buffer_index);
-	enum starpu_data_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer_index) & ~STARPU_COMMUTE;
+	struct _starpu_data_descr *buffer = &(_STARPU_JOB_GET_ORDERED_BUFFERS(j)[buffer_index]);
+	starpu_data_handle_t handle = buffer->handle;
+	enum starpu_data_access_mode mode = buffer->mode & ~STARPU_COMMUTE;
 
 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
 }
@@ -343,23 +344,20 @@ void _starpu_job_set_ordered_buffers(struct _starpu_job *j)
 	unsigned i;
 	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task);
 	struct starpu_task *task = j->task;
+	struct _starpu_data_descr *buffers = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
 
 	for (i=0 ; i<nbuffers; i++)
 	{
-		_STARPU_JOB_SET_ORDERED_BUFFER_INDEX(j, i, i);
-
-		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
-		_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
-
-		enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, i);
-		_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
+		buffers[i].index = i;
+		buffers[i].handle = STARPU_TASK_GET_HANDLE(task, i);
+		buffers[i].mode = STARPU_TASK_GET_MODE(task, i);
 
 		int node = -1;
 		if (task->cl->specific_nodes)
 			node = STARPU_CODELET_GET_NODE(task->cl, i);
-		_STARPU_JOB_SET_ORDERED_BUFFER_NODE(j, node, i);
+		buffers[i].node = node;
 	}
-	_starpu_sort_task_handles(_STARPU_JOB_GET_ORDERED_BUFFERS(j), nbuffers);
+	_starpu_sort_task_handles(buffers, nbuffers);
 }
 
 /* Sort the data used by the given job by handle pointer value order, and

+ 13 - 11
src/core/dependencies/implicit_data_deps.c

@@ -386,13 +386,14 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 	j->sequential_consistency = 1;
 
 	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
+        struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
 	struct _starpu_task_wrapper_dlist *dep_slots = _STARPU_JOB_GET_DEP_SLOTS(j);
 
 	unsigned buffer;
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
-		starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buffer);
-		enum starpu_data_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer);
+		starpu_data_handle_t handle = descrs[buffer].handle;
+		enum starpu_data_access_mode mode = descrs[buffer].mode;
 		struct starpu_task *new_task;
 
 		/* Scratch memory does not introduce any deps */
@@ -401,8 +402,8 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 
 		if (buffer)
 		{
-			starpu_data_handle_t handle_m1 = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buffer - 1);
-			enum starpu_data_access_mode mode_m1 = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer - 1);
+			starpu_data_handle_t handle_m1 = descrs[buffer-1].handle;
+			enum starpu_data_access_mode mode_m1 = descrs[buffer-1].mode;
 			if (handle_m1 == handle && mode_m1 == mode)
 				/* We have already added dependencies for this
 				 * data, skip it. This reduces the number of
@@ -414,7 +415,7 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 		}
 
 		STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
-		unsigned index = _STARPU_JOB_GET_ORDERED_BUFFER_INDEX(j, buffer);
+		unsigned index = descrs[buffer].index;
 		unsigned task_handle_sequential_consistency = task->handles_sequential_consistency ? task->handles_sequential_consistency[index] : handle->sequential_consistency;
 		if (!task_handle_sequential_consistency)
 			j->sequential_consistency = 0;
@@ -506,25 +507,26 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 void _starpu_release_task_enforce_sequential_consistency(struct _starpu_job *j)
 {
 	struct starpu_task *task = j->task;
-        struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
-	struct _starpu_task_wrapper_dlist *slots = _STARPU_JOB_GET_DEP_SLOTS(j);
 
 	if (!task->cl)
 		return;
 
+        struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
+	struct _starpu_task_wrapper_dlist *slots = _STARPU_JOB_GET_DEP_SLOTS(j);
+
         unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 	unsigned index;
 
 	/* Release all implicit dependencies */
 	for (index = 0; index < nbuffers; index++)
 	{
-		starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, index);
-		enum starpu_data_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, index);
+		starpu_data_handle_t handle = descrs[index].handle;
+		enum starpu_data_access_mode mode = descrs[index].mode;
 
 		if (index)
 		{
-			starpu_data_handle_t handle_m1 = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, index - 1);
-			enum starpu_data_access_mode mode_m1 = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, index - 1);
+			starpu_data_handle_t handle_m1 = descrs[index-1].handle;
+			enum starpu_data_access_mode mode_m1 = descrs[index-1].mode;
 			if (handle_m1 == handle && mode_m1 == mode)
 				/* See _starpu_detect_implicit_data_deps */
 				continue;

+ 1 - 1
src/core/jobs.h

@@ -278,7 +278,7 @@ int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *t
 #define _STARPU_JOB_SET_ORDERED_BUFFER_NODE(job, __node, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i].node = __node; else job->ordered_buffers[i].node = __node;} while(0)
 
 #define _STARPU_JOB_SET_ORDERED_BUFFER(job, buffer, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i] = buffer; else job->ordered_buffers[i] = buffer;} while(0)
-#define _STARPU_JOB_GET_ORDERED_BUFFERS(job) (job->dyn_ordered_buffers) ? job->dyn_ordered_buffers : job->ordered_buffers
+#define _STARPU_JOB_GET_ORDERED_BUFFERS(job) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers : &job->ordered_buffers[0])
 
 #define _STARPU_JOB_GET_DEP_SLOTS(job) (((job)->dyn_dep_slots) ? (job)->dyn_dep_slots : (job)->dep_slots)