Bladeren bron

Add COMMUTE flag to let StarPU commute task with write access

Samuel Thibault 12 jaren geleden
bovenliggende
commit
02499756ce

+ 2 - 0
ChangeLog

@@ -30,6 +30,8 @@ New features:
 	  before the corresponding data, which allows the receiver to
 	  allocate data correctly, and to submit the matching receive of
 	  the envelope.
+  * New STARPU_COMMUTE flag which can be passed along STARPU_W or STARPU_RW to
+    let starpu commute write accesses.
 
 Small features:
   * Add cl_arg_free field to enable automatic free(cl_arg) on task

+ 6 - 0
doc/chapters/api.texi

@@ -568,6 +568,12 @@ ignored for now.
 @end table
 @end deftp
 
+In addition to that, @code{STARPU_COMMUTE} can be passed along @code{STARPU_W}
+or @code{STARPU_RW} to express that StarPU can let tasks commute, which is
+useful e.g. when bringing a contribution into some data, which can be done
+in any order (but still require sequential consistency against reads or
+non-commutative writes).
+
 @deftp {Data Type} {starpu_data_handle_t}
 StarPU uses @code{starpu_data_handle_t} as an opaque handle to manage a piece of
 data. Once a piece of data has been registered to StarPU, it is associated to a

+ 3 - 1
doc/chapters/perf-optimization.texi

@@ -79,7 +79,9 @@ dependencies on that data.
 
 In the same vein, accumulation of results in the same data can become a
 bottleneck. The use of the @code{STARPU_REDUX} mode permits to optimize such
-accumulation (@pxref{Data reduction}).
+accumulation (@pxref{Data reduction}). To a lesser extent, the use of the
+@code{STARPU_COMMUTE} flag keeps the bottleneck, but at least permits the
+accumulation to happen in any order.
 
 Applications often need a data just for temporary results.  In such a case,
 registration can be made without an initial value, for instance this produces a vector data:

+ 3 - 1
include/starpu_data.h

@@ -35,7 +35,9 @@ enum starpu_data_access_mode
 	STARPU_W=(1<<1),
 	STARPU_RW=(STARPU_R|STARPU_W),
 	STARPU_SCRATCH=(1<<2),
-	STARPU_REDUX=(1<<3)
+	STARPU_REDUX=(1<<3),
+	STARPU_COMMUTE=(1<<4)
+	/* Note: other STARPU_* values in include/starpu_task_util.h */
 };
 
 struct starpu_data_descr

+ 12 - 12
include/starpu_task_util.h

@@ -35,18 +35,18 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 				void (*callback)(void *), void *callback_arg);
 
 /* Constants used by the starpu_insert_task helper to determine the different types of argument */
-#define STARPU_VALUE		(1<<4)	/* Pointer to a constant value */
-#define STARPU_CALLBACK		(1<<5)	/* Callback function */
-#define STARPU_CALLBACK_WITH_ARG	(1<<6)	/* Callback function */
-#define STARPU_CALLBACK_ARG	(1<<7)	/* Argument of the callback function (of type void *) */
-#define STARPU_PRIORITY		(1<<8)	/* Priority associated to the task */
-#define STARPU_EXECUTE_ON_NODE	(1<<9)	/* Used by MPI to define which task is going to execute the codelet */
-#define STARPU_EXECUTE_ON_DATA	(1<<10)	/* Used by MPI to define which task is going to execute the codelet */
-#define STARPU_DATA_ARRAY       (1<<11) /* Array of data handles */
-#define STARPU_TAG              (1<<12) /* Tag */
-#define STARPU_HYPERVISOR_TAG	(1<<13)	/* Used to tag a task after whose execution we'll execute  a code */
-#define STARPU_FLOPS	        (1<<14)	/* Used to specify the number of flops needed to be executed by a task */
-#define STARPU_SCHED_CTX	(1<<15)	/* Used to specify the sched_ctx to which the task will be submitted */
+#define STARPU_VALUE		(1<<19)	/* Pointer to a constant value */
+#define STARPU_CALLBACK		(1<<20)	/* Callback function */
+#define STARPU_CALLBACK_WITH_ARG	(1<<21)	/* Callback function */
+#define STARPU_CALLBACK_ARG	(1<<22)	/* Argument of the callback function (of type void *) */
+#define STARPU_PRIORITY		(1<<23)	/* Priority associated to the task */
+#define STARPU_EXECUTE_ON_NODE	(1<<24)	/* Used by MPI to define which task is going to execute the codelet */
+#define STARPU_EXECUTE_ON_DATA	(1<<25)	/* Used by MPI to define which task is going to execute the codelet */
+#define STARPU_DATA_ARRAY       (1<<26) /* Array of data handles */
+#define STARPU_TAG              (1<<27) /* Tag */
+#define STARPU_HYPERVISOR_TAG	(1<<28)	/* Used to tag a task after whose execution we'll execute  a code */
+#define STARPU_FLOPS	        (1<<29)	/* Used to specify the number of flops needed to be executed by a task */
+#define STARPU_SCHED_CTX	(1<<30)	/* Used to specify the sched_ctx to which the task will be submitted */
 
 /* Wrapper to create a task. */
 int starpu_insert_task(struct starpu_codelet *cl, ...);

+ 3 - 2
src/core/dependencies/data_concurrency.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -55,6 +55,7 @@ static struct _starpu_data_requester *may_unlock_data_req_list_head(starpu_data_
 	if (handle->refcnt == 0)
 		return _starpu_data_requester_list_pop_front(req_list);
 
+	/* Already writing to it, do not let another write access through */
 	if (handle->current_mode == STARPU_W)
 		return NULL;
 
@@ -193,7 +194,7 @@ static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, u
 	/* Note that we do not access j->task->handles, but j->ordered_buffers
 	 * which is a sorted copy of it. */
 	starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buffer_index);
-	enum starpu_data_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer_index);
+	enum starpu_data_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer_index) & ~STARPU_COMMUTE;
 
 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
 }

+ 3 - 1
src/core/dependencies/implicit_data_deps.c

@@ -267,7 +267,9 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 			if (previous_mode & STARPU_W)
 			{
 				_STARPU_DEP_DEBUG("WAW %p\n", handle);
-				_starpu_add_writer_after_writer(handle, pre_sync_task, post_sync_task);
+				/* Add WAW dependency if any of the two writers refuse to commute */
+				if (! (mode & STARPU_COMMUTE && previous_mode & STARPU_COMMUTE))
+					_starpu_add_writer_after_writer(handle, pre_sync_task, post_sync_task);
 			}
 			else
 			{

+ 3 - 0
src/datawizard/coherency.c

@@ -351,6 +351,9 @@ struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_ha
 								  unsigned async,
 								  void (*callback_func)(void *), void *callback_arg)
 {
+	/* We don't care about commuting for data requests, that was handled before. */
+	mode &= ~STARPU_COMMUTE;
+
 	/* This function is called with handle's header lock taken */
 	_starpu_spin_checklocked(&handle->header_lock);
 

+ 4 - 1
src/datawizard/coherency.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -106,6 +106,9 @@ struct _starpu_data_state
 	 * the req_list anymore), i.e. the number of holders of the
 	 * current_mode rwlock */
 	unsigned refcnt;
+	/* Current access mode. Is always either STARPU_R, STARPU_W,
+	 * STARPU_SCRATCH or STARPU_REDUX, but never a combination such as
+	 * STARPU_RW. */
 	enum starpu_data_access_mode current_mode;
 	/* protect meta data */
 	struct _starpu_spinlock header_lock;

+ 3 - 3
src/util/starpu_insert_task_utils.c

@@ -54,7 +54,7 @@ size_t _starpu_insert_task_get_arg_size(va_list varg_list)
 
 	while ((arg_type = va_arg(varg_list, int)) != 0)
 	{
-		if (arg_type==STARPU_R || arg_type==STARPU_W || arg_type==STARPU_RW || arg_type == STARPU_SCRATCH || arg_type == STARPU_REDUX)
+		if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
 		{
 			(void)va_arg(varg_list, starpu_data_handle_t);
 		}
@@ -135,7 +135,7 @@ int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list
 
 	while((arg_type = va_arg(varg_list, int)) != 0)
 	{
-		if (arg_type==STARPU_R || arg_type==STARPU_W || arg_type==STARPU_RW || arg_type == STARPU_SCRATCH || arg_type == STARPU_REDUX)
+		if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
 		{
 			(void)va_arg(varg_list, starpu_data_handle_t);
 		}
@@ -230,7 +230,7 @@ int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_si
 
 	while((arg_type = va_arg(varg_list, int)) != 0)
 	{
-		if (arg_type==STARPU_R || arg_type==STARPU_W || arg_type==STARPU_RW || arg_type == STARPU_SCRATCH || arg_type == STARPU_REDUX)
+		if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
 		{
 			/* We have an access mode : we expect to find a handle */
 			starpu_data_handle_t handle = va_arg(varg_list, starpu_data_handle_t);

+ 1 - 0
tests/Makefile.am

@@ -146,6 +146,7 @@ noinst_PROGRAMS =				\
 	datawizard/acquire_cb_insert		\
 	datawizard/acquire_release		\
 	datawizard/acquire_release2		\
+	datawizard/commute			\
 	datawizard/copy				\
 	datawizard/data_implicit_deps		\
 	datawizard/data_lookup			\