Browse Source

Add COMMUTE flag to let StarPU commute task with write access

Samuel Thibault 12 years ago
parent
commit
02499756ce

+ 2 - 0
ChangeLog

@@ -30,6 +30,8 @@ New features:
 	  before the corresponding data, which allows the receiver to
 	  before the corresponding data, which allows the receiver to
 	  allocate data correctly, and to submit the matching receive of
 	  allocate data correctly, and to submit the matching receive of
 	  the envelope.
 	  the envelope.
+  * New STARPU_COMMUTE flag which can be passed along STARPU_W or STARPU_RW to
+    let starpu commute write accesses.
 
 
 Small features:
 Small features:
   * Add cl_arg_free field to enable automatic free(cl_arg) on task
   * Add cl_arg_free field to enable automatic free(cl_arg) on task

+ 6 - 0
doc/chapters/api.texi

@@ -568,6 +568,12 @@ ignored for now.
 @end table
 @end table
 @end deftp
 @end deftp
 
 
+In addition to that, @code{STARPU_COMMUTE} can be passed along @code{STARPU_W}
+or @code{STARPU_RW} to express that StarPU can let tasks commute, which is
+useful e.g. when bringing a contribution into some data, which can be done
+in any order (but still require sequential consistency against reads or
+non-commutative writes).
+
 @deftp {Data Type} {starpu_data_handle_t}
 @deftp {Data Type} {starpu_data_handle_t}
 StarPU uses @code{starpu_data_handle_t} as an opaque handle to manage a piece of
 StarPU uses @code{starpu_data_handle_t} as an opaque handle to manage a piece of
 data. Once a piece of data has been registered to StarPU, it is associated to a
 data. Once a piece of data has been registered to StarPU, it is associated to a

+ 3 - 1
doc/chapters/perf-optimization.texi

@@ -79,7 +79,9 @@ dependencies on that data.
 
 
 In the same vein, accumulation of results in the same data can become a
 In the same vein, accumulation of results in the same data can become a
 bottleneck. The use of the @code{STARPU_REDUX} mode permits to optimize such
 bottleneck. The use of the @code{STARPU_REDUX} mode permits to optimize such
-accumulation (@pxref{Data reduction}).
+accumulation (@pxref{Data reduction}). To a lesser extent, the use of the
+@code{STARPU_COMMUTE} flag keeps the bottleneck, but at least permits the
+accumulation to happen in any order.
 
 
 Applications often need a data just for temporary results.  In such a case,
 Applications often need a data just for temporary results.  In such a case,
 registration can be made without an initial value, for instance this produces a vector data:
 registration can be made without an initial value, for instance this produces a vector data:

+ 3 - 1
include/starpu_data.h

@@ -35,7 +35,9 @@ enum starpu_data_access_mode
 	STARPU_W=(1<<1),
 	STARPU_W=(1<<1),
 	STARPU_RW=(STARPU_R|STARPU_W),
 	STARPU_RW=(STARPU_R|STARPU_W),
 	STARPU_SCRATCH=(1<<2),
 	STARPU_SCRATCH=(1<<2),
-	STARPU_REDUX=(1<<3)
+	STARPU_REDUX=(1<<3),
+	STARPU_COMMUTE=(1<<4)
+	/* Note: other STARPU_* values in include/starpu_task_util.h */
 };
 };
 
 
 struct starpu_data_descr
 struct starpu_data_descr

+ 12 - 12
include/starpu_task_util.h

@@ -35,18 +35,18 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 				void (*callback)(void *), void *callback_arg);
 				void (*callback)(void *), void *callback_arg);
 
 
 /* Constants used by the starpu_insert_task helper to determine the different types of argument */
 /* Constants used by the starpu_insert_task helper to determine the different types of argument */
-#define STARPU_VALUE		(1<<4)	/* Pointer to a constant value */
-#define STARPU_CALLBACK		(1<<5)	/* Callback function */
-#define STARPU_CALLBACK_WITH_ARG	(1<<6)	/* Callback function */
-#define STARPU_CALLBACK_ARG	(1<<7)	/* Argument of the callback function (of type void *) */
-#define STARPU_PRIORITY		(1<<8)	/* Priority associated to the task */
-#define STARPU_EXECUTE_ON_NODE	(1<<9)	/* Used by MPI to define which task is going to execute the codelet */
-#define STARPU_EXECUTE_ON_DATA	(1<<10)	/* Used by MPI to define which task is going to execute the codelet */
-#define STARPU_DATA_ARRAY       (1<<11) /* Array of data handles */
-#define STARPU_TAG              (1<<12) /* Tag */
-#define STARPU_HYPERVISOR_TAG	(1<<13)	/* Used to tag a task after whose execution we'll execute  a code */
-#define STARPU_FLOPS	        (1<<14)	/* Used to specify the number of flops needed to be executed by a task */
-#define STARPU_SCHED_CTX	(1<<15)	/* Used to specify the sched_ctx to which the task will be submitted */
+#define STARPU_VALUE		(1<<19)	/* Pointer to a constant value */
+#define STARPU_CALLBACK		(1<<20)	/* Callback function */
+#define STARPU_CALLBACK_WITH_ARG	(1<<21)	/* Callback function */
+#define STARPU_CALLBACK_ARG	(1<<22)	/* Argument of the callback function (of type void *) */
+#define STARPU_PRIORITY		(1<<23)	/* Priority associated to the task */
+#define STARPU_EXECUTE_ON_NODE	(1<<24)	/* Used by MPI to define which task is going to execute the codelet */
+#define STARPU_EXECUTE_ON_DATA	(1<<25)	/* Used by MPI to define which task is going to execute the codelet */
+#define STARPU_DATA_ARRAY       (1<<26) /* Array of data handles */
+#define STARPU_TAG              (1<<27) /* Tag */
+#define STARPU_HYPERVISOR_TAG	(1<<28)	/* Used to tag a task after whose execution we'll execute  a code */
+#define STARPU_FLOPS	        (1<<29)	/* Used to specify the number of flops needed to be executed by a task */
+#define STARPU_SCHED_CTX	(1<<30)	/* Used to specify the sched_ctx to which the task will be submitted */
 
 
 /* Wrapper to create a task. */
 /* Wrapper to create a task. */
 int starpu_insert_task(struct starpu_codelet *cl, ...);
 int starpu_insert_task(struct starpu_codelet *cl, ...);

+ 3 - 2
src/core/dependencies/data_concurrency.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -55,6 +55,7 @@ static struct _starpu_data_requester *may_unlock_data_req_list_head(starpu_data_
 	if (handle->refcnt == 0)
 	if (handle->refcnt == 0)
 		return _starpu_data_requester_list_pop_front(req_list);
 		return _starpu_data_requester_list_pop_front(req_list);
 
 
+	/* Already writing to it, do not let another write access through */
 	if (handle->current_mode == STARPU_W)
 	if (handle->current_mode == STARPU_W)
 		return NULL;
 		return NULL;
 
 
@@ -193,7 +194,7 @@ static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, u
 	/* Note that we do not access j->task->handles, but j->ordered_buffers
 	/* Note that we do not access j->task->handles, but j->ordered_buffers
 	 * which is a sorted copy of it. */
 	 * which is a sorted copy of it. */
 	starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buffer_index);
 	starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buffer_index);
-	enum starpu_data_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer_index);
+	enum starpu_data_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer_index) & ~STARPU_COMMUTE;
 
 
 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
 }
 }

+ 3 - 1
src/core/dependencies/implicit_data_deps.c

@@ -267,7 +267,9 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 			if (previous_mode & STARPU_W)
 			if (previous_mode & STARPU_W)
 			{
 			{
 				_STARPU_DEP_DEBUG("WAW %p\n", handle);
 				_STARPU_DEP_DEBUG("WAW %p\n", handle);
-				_starpu_add_writer_after_writer(handle, pre_sync_task, post_sync_task);
+				/* Add WAW dependency if any of the two writers refuse to commute */
+				if (! (mode & STARPU_COMMUTE && previous_mode & STARPU_COMMUTE))
+					_starpu_add_writer_after_writer(handle, pre_sync_task, post_sync_task);
 			}
 			}
 			else
 			else
 			{
 			{

+ 3 - 0
src/datawizard/coherency.c

@@ -351,6 +351,9 @@ struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_ha
 								  unsigned async,
 								  unsigned async,
 								  void (*callback_func)(void *), void *callback_arg)
 								  void (*callback_func)(void *), void *callback_arg)
 {
 {
+	/* We don't care about commuting for data requests, that was handled before. */
+	mode &= ~STARPU_COMMUTE;
+
 	/* This function is called with handle's header lock taken */
 	/* This function is called with handle's header lock taken */
 	_starpu_spin_checklocked(&handle->header_lock);
 	_starpu_spin_checklocked(&handle->header_lock);
 
 

+ 4 - 1
src/datawizard/coherency.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -106,6 +106,9 @@ struct _starpu_data_state
 	 * the req_list anymore), i.e. the number of holders of the
 	 * the req_list anymore), i.e. the number of holders of the
 	 * current_mode rwlock */
 	 * current_mode rwlock */
 	unsigned refcnt;
 	unsigned refcnt;
+	/* Current access mode. Is always either STARPU_R, STARPU_W,
+	 * STARPU_SCRATCH or STARPU_REDUX, but never a combination such as
+	 * STARPU_RW. */
 	enum starpu_data_access_mode current_mode;
 	enum starpu_data_access_mode current_mode;
 	/* protect meta data */
 	/* protect meta data */
 	struct _starpu_spinlock header_lock;
 	struct _starpu_spinlock header_lock;

+ 3 - 3
src/util/starpu_insert_task_utils.c

@@ -54,7 +54,7 @@ size_t _starpu_insert_task_get_arg_size(va_list varg_list)
 
 
 	while ((arg_type = va_arg(varg_list, int)) != 0)
 	while ((arg_type = va_arg(varg_list, int)) != 0)
 	{
 	{
-		if (arg_type==STARPU_R || arg_type==STARPU_W || arg_type==STARPU_RW || arg_type == STARPU_SCRATCH || arg_type == STARPU_REDUX)
+		if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
 		{
 		{
 			(void)va_arg(varg_list, starpu_data_handle_t);
 			(void)va_arg(varg_list, starpu_data_handle_t);
 		}
 		}
@@ -135,7 +135,7 @@ int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list
 
 
 	while((arg_type = va_arg(varg_list, int)) != 0)
 	while((arg_type = va_arg(varg_list, int)) != 0)
 	{
 	{
-		if (arg_type==STARPU_R || arg_type==STARPU_W || arg_type==STARPU_RW || arg_type == STARPU_SCRATCH || arg_type == STARPU_REDUX)
+		if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
 		{
 		{
 			(void)va_arg(varg_list, starpu_data_handle_t);
 			(void)va_arg(varg_list, starpu_data_handle_t);
 		}
 		}
@@ -230,7 +230,7 @@ int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_si
 
 
 	while((arg_type = va_arg(varg_list, int)) != 0)
 	while((arg_type = va_arg(varg_list, int)) != 0)
 	{
 	{
-		if (arg_type==STARPU_R || arg_type==STARPU_W || arg_type==STARPU_RW || arg_type == STARPU_SCRATCH || arg_type == STARPU_REDUX)
+		if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
 		{
 		{
 			/* We have an access mode : we expect to find a handle */
 			/* We have an access mode : we expect to find a handle */
 			starpu_data_handle_t handle = va_arg(varg_list, starpu_data_handle_t);
 			starpu_data_handle_t handle = va_arg(varg_list, starpu_data_handle_t);

+ 1 - 0
tests/Makefile.am

@@ -146,6 +146,7 @@ noinst_PROGRAMS =				\
 	datawizard/acquire_cb_insert		\
 	datawizard/acquire_cb_insert		\
 	datawizard/acquire_release		\
 	datawizard/acquire_release		\
 	datawizard/acquire_release2		\
 	datawizard/acquire_release2		\
+	datawizard/commute			\
 	datawizard/copy				\
 	datawizard/copy				\
 	datawizard/data_implicit_deps		\
 	datawizard/data_implicit_deps		\
 	datawizard/data_lookup			\
 	datawizard/data_lookup			\