Browse Source

Add starpu_data_release_to and starpu_data_release_to_on_node

Samuel Thibault 4 years ago
parent
commit
c70a3e76f2

+ 1 - 0
ChangeLog

@@ -42,6 +42,7 @@ New features:
   * Add a task prefetch level, to improve retaining data in accelerators so we
   * Add a task prefetch level, to improve retaining data in accelerators so we
     can make prefetch more aggressive.
     can make prefetch more aggressive.
   * Add starpu_data_dup_ro().
   * Add starpu_data_dup_ro().
+  * Add starpu_data_release_to() and starpu_data_release_to_on_node().
 
 
 Small changes:
 Small changes:
   * Add a synthetic energy efficiency testcase.
   * Add a synthetic energy efficiency testcase.

+ 19 - 1
include/starpu_data.h

@@ -350,12 +350,30 @@ void starpu_data_release(starpu_data_handle_t handle);
 
 
 /**
 /**
    Similar to starpu_data_release(), except that the data
    Similar to starpu_data_release(), except that the data
-   will be available on the given memory \p node instead of main memory.
+   was made available on the given memory \p node instead of main memory.
    The \p node parameter must be exactly the same as the corresponding \c
    The \p node parameter must be exactly the same as the corresponding \c
    starpu_data_acquire_on_node* call.
    starpu_data_acquire_on_node* call.
 */
 */
 void starpu_data_release_on_node(starpu_data_handle_t handle, int node);
 void starpu_data_release_on_node(starpu_data_handle_t handle, int node);
 
 
+/**
+   Partly release the piece of data acquired by the application either by
+   starpu_data_acquire() or by starpu_data_acquire_cb(), switching the
+   acquisition down to \p down_to_mode. For now, only releasing from STARPU_RW
+   or STARPU_W acquisition down to STARPU_R is supported.  STARPU_NONE can also
+   be passed as \p down_to_mode, in which case this is equivalent to calling
+   starpu_data_release().
+*/
+void starpu_data_release_to(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode);
+
+/**
+   Similar to starpu_data_release_to(), except that the data
+   was made available on the given memory \p node instead of main memory.
+   The \p node parameter must be exactly the same as the corresponding \c
+   starpu_data_acquire_on_node* call.
+*/
+void starpu_data_release_to_on_node(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode, int node);
+
 /** @} */
 /** @} */
 
 
 /**
 /**

+ 33 - 9
src/core/dependencies/data_arbiter_concurrency.c

@@ -533,7 +533,7 @@ void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle)
 }
 }
 void ___starpu_notify_arbitered_dependencies(starpu_data_handle_t handle)
 void ___starpu_notify_arbitered_dependencies(starpu_data_handle_t handle)
 #else // LOCK_OR_DELEGATE
 #else // LOCK_OR_DELEGATE
-void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle)
+void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode)
 #endif
 #endif
 {
 {
 	starpu_arbiter_t arbiter = handle->arbiter;
 	starpu_arbiter_t arbiter = handle->arbiter;
@@ -546,10 +546,22 @@ void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle)
 	{
 	{
 		/* No waiter, just remove our reference */
 		/* No waiter, just remove our reference */
 		_starpu_spin_lock(&handle->header_lock);
 		_starpu_spin_lock(&handle->header_lock);
-		STARPU_ASSERT(handle->refcnt > 0);
-		handle->refcnt--;
-		STARPU_ASSERT(handle->busy_count > 0);
-		handle->busy_count--;
+		if (down_to_mode == STARPU_NONE)
+		{
+			STARPU_ASSERT(handle->refcnt > 0);
+			handle->refcnt--;
+			STARPU_ASSERT(handle->busy_count > 0);
+			handle->busy_count--;
+		}
+		else
+		{
+			/* Downgrade from W or RW down to R, keeping the same reference,
+			 * but thus allowing other readers without allowing writers.  */
+			STARPU_ASSERT(down_to_mode == STARPU_R &&
+					(handle->current_mode == STARPU_RW ||
+					 handle->current_mode == STARPU_W));
+			handle->current_mode = down_to_mode;
+		}
 #ifndef LOCK_OR_DELEGATE
 #ifndef LOCK_OR_DELEGATE
 		STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex);
 #endif
 #endif
@@ -562,10 +574,22 @@ void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle)
 
 
 	/* There is a waiter, remove our reference */
 	/* There is a waiter, remove our reference */
 	_starpu_spin_lock(&handle->header_lock);
 	_starpu_spin_lock(&handle->header_lock);
-	STARPU_ASSERT(handle->refcnt > 0);
-	handle->refcnt--;
-	STARPU_ASSERT(handle->busy_count > 0);
-	handle->busy_count--;
+	if (down_to_mode == STARPU_NONE)
+	{
+		STARPU_ASSERT(handle->refcnt > 0);
+		handle->refcnt--;
+		STARPU_ASSERT(handle->busy_count > 0);
+		handle->busy_count--;
+	}
+	else
+	{
+		/* Downgrade from W or RW down to R, keeping the same reference,
+		 * but thus allowing other readers without allowing writers.  */
+		STARPU_ASSERT(down_to_mode == STARPU_R &&
+				(handle->current_mode == STARPU_RW ||
+				 handle->current_mode == STARPU_W));
+		handle->current_mode = down_to_mode;
+	}
 	/* There should be at least one busy_count reference for the waiter
 	/* There should be at least one busy_count reference for the waiter
 	 * (thus we don't risk to see the handle disappear below) */
 	 * (thus we don't risk to see the handle disappear below) */
 	STARPU_ASSERT(handle->busy_count > 0);
 	STARPU_ASSERT(handle->busy_count > 0);

+ 23 - 11
src/core/dependencies/data_concurrency.c

@@ -509,7 +509,7 @@ void _starpu_submit_job_take_data_deps(struct _starpu_job *j)
  * This may free the handle if it was lazily unregistered (1 is returned in
  * This may free the handle if it was lazily unregistered (1 is returned in
  * that case). The handle pointer thus becomes invalid for the caller.
  * that case). The handle pointer thus becomes invalid for the caller.
  */
  */
-int _starpu_notify_data_dependencies(starpu_data_handle_t handle)
+int _starpu_notify_data_dependencies(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode)
 {
 {
 	_starpu_spin_checklocked(&handle->header_lock);
 	_starpu_spin_checklocked(&handle->header_lock);
 
 
@@ -521,22 +521,34 @@ int _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 		STARPU_ASSERT(_starpu_data_requester_prio_list_empty(&handle->reduction_req_list));
 		STARPU_ASSERT(_starpu_data_requester_prio_list_empty(&handle->reduction_req_list));
 		_starpu_spin_unlock(&handle->header_lock);
 		_starpu_spin_unlock(&handle->header_lock);
 		/* _starpu_notify_arbitered_dependencies will handle its own locking */
 		/* _starpu_notify_arbitered_dependencies will handle its own locking */
-		_starpu_notify_arbitered_dependencies(handle);
+		_starpu_notify_arbitered_dependencies(handle, down_to_mode);
 		/* We have already unlocked */
 		/* We have already unlocked */
 		return 1;
 		return 1;
 	}
 	}
 
 
-	/* A data access has finished so we remove a reference. */
-	STARPU_ASSERT(handle->refcnt > 0);
-	handle->refcnt--;
-	STARPU_ASSERT(handle->busy_count > 0);
-	handle->busy_count--;
-	if (_starpu_data_check_not_busy(handle))
-		/* Handle was destroyed, nothing left to do.  */
-		return 1;
-
 	STARPU_ASSERT(_starpu_data_requester_prio_list_empty(&handle->arbitered_req_list));
 	STARPU_ASSERT(_starpu_data_requester_prio_list_empty(&handle->arbitered_req_list));
 
 
+	if (down_to_mode == STARPU_NONE)
+	{
+		/* A data access has finished so we remove a reference. */
+		STARPU_ASSERT(handle->refcnt > 0);
+		handle->refcnt--;
+		STARPU_ASSERT(handle->busy_count > 0);
+		handle->busy_count--;
+		if (_starpu_data_check_not_busy(handle))
+			/* Handle was destroyed, nothing left to do.  */
+			return 1;
+	}
+	else
+	{
+		/* Downgrade from W or RW down to R, keeping the same reference,
+		 * but thus allowing other readers without allowing writers.  */
+		STARPU_ASSERT(down_to_mode == STARPU_R &&
+				(handle->current_mode == STARPU_RW ||
+				 handle->current_mode == STARPU_W));
+		handle->current_mode = down_to_mode;
+	}
+
 	/* In case there is a pending reduction, and that this is the last
 	/* In case there is a pending reduction, and that this is the last
 	 * requester, we may go back to a "normal" coherency model. */
 	 * requester, we may go back to a "normal" coherency model. */
 	if (handle->reduction_refcnt > 0)
 	if (handle->reduction_refcnt > 0)

+ 2 - 2
src/core/dependencies/data_concurrency.h

@@ -28,8 +28,8 @@ void _starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned b
 void _starpu_submit_job_take_data_deps(struct _starpu_job *j);
 void _starpu_submit_job_take_data_deps(struct _starpu_job *j);
 void _starpu_enforce_data_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data);
 void _starpu_enforce_data_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data);
 
 
-int _starpu_notify_data_dependencies(starpu_data_handle_t handle);
-void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle);
+int _starpu_notify_data_dependencies(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode);
+void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode);
 
 
 unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle_t handle,
 unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle_t handle,
 							  enum starpu_data_access_mode mode,
 							  enum starpu_data_access_mode mode,

+ 13 - 10
src/datawizard/coherency.c

@@ -855,7 +855,7 @@ uint32_t _starpu_data_get_footprint(starpu_data_handle_t handle)
 
 
 /* in case the data was accessed on a write mode, do not forget to
 /* in case the data was accessed on a write mode, do not forget to
  * make it accessible again once it is possible ! */
  * make it accessible again once it is possible ! */
-void _starpu_release_data_on_node(starpu_data_handle_t handle, uint32_t default_wt_mask, struct _starpu_data_replicate *replicate)
+void _starpu_release_data_on_node(starpu_data_handle_t handle, uint32_t default_wt_mask, enum starpu_data_access_mode down_to_mode, struct _starpu_data_replicate *replicate)
 {
 {
 	uint32_t wt_mask;
 	uint32_t wt_mask;
 	wt_mask = default_wt_mask | handle->wt_mask;
 	wt_mask = default_wt_mask | handle->wt_mask;
@@ -880,14 +880,17 @@ void _starpu_release_data_on_node(starpu_data_handle_t handle, uint32_t default_
 	if (cpt == STARPU_SPIN_MAXTRY)
 	if (cpt == STARPU_SPIN_MAXTRY)
 		_starpu_spin_lock(&handle->header_lock);
 		_starpu_spin_lock(&handle->header_lock);
 
 
-	/* Release refcnt taken by fetch_data_on_node */
-	replicate->refcnt--;
-	STARPU_ASSERT_MSG(replicate->refcnt >= 0, "handle %p released too many times", handle);
+	if (down_to_mode == STARPU_NONE)
+	{
+		/* Release refcnt taken by fetch_data_on_node */
+		replicate->refcnt--;
+		STARPU_ASSERT_MSG(replicate->refcnt >= 0, "handle %p released too many times", handle);
 
 
-	STARPU_ASSERT_MSG(handle->busy_count > 0, "handle %p released too many times", handle);
-	handle->busy_count--;
+		STARPU_ASSERT_MSG(handle->busy_count > 0, "handle %p released too many times", handle);
+		handle->busy_count--;
+	}
 
 
-	if (!_starpu_notify_data_dependencies(handle))
+	if (!_starpu_notify_data_dependencies(handle, down_to_mode))
 		_starpu_spin_unlock(&handle->header_lock);
 		_starpu_spin_unlock(&handle->header_lock);
 }
 }
 
 
@@ -1222,7 +1225,7 @@ enomem:
 
 
 		local_replicate = get_replicate(handle, mode, workerid, node);
 		local_replicate = get_replicate(handle, mode, workerid, node);
 
 
-		_starpu_release_data_on_node(handle, 0, local_replicate);
+		_starpu_release_data_on_node(handle, 0, STARPU_NONE, local_replicate);
 	}
 	}
 
 
 	return -1;
 	return -1;
@@ -1331,13 +1334,13 @@ void __starpu_push_task_output(struct _starpu_job *j)
 		if (node == -1)
 		if (node == -1)
 		{
 		{
 			/* NOWHERE case, just notify dependencies */
 			/* NOWHERE case, just notify dependencies */
-			if (!_starpu_notify_data_dependencies(handle))
+			if (!_starpu_notify_data_dependencies(handle, STARPU_NONE))
 				_starpu_spin_unlock(&handle->header_lock);
 				_starpu_spin_unlock(&handle->header_lock);
 		}
 		}
 		else
 		else
 		{
 		{
 			_starpu_spin_unlock(&handle->header_lock);
 			_starpu_spin_unlock(&handle->header_lock);
-			_starpu_release_data_on_node(handle, 0, local_replicate);
+			_starpu_release_data_on_node(handle, 0, STARPU_NONE, local_replicate);
 		}
 		}
 	}
 	}
 
 

+ 1 - 0
src/datawizard/coherency.h

@@ -316,6 +316,7 @@ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, int node, struct _st
 			       void (*callback_func)(void *), void *callback_arg, int prio, const char *origin);
 			       void (*callback_func)(void *), void *callback_arg, int prio, const char *origin);
 /** This releases a reference on the handle */
 /** This releases a reference on the handle */
 void _starpu_release_data_on_node(struct _starpu_data_state *state, uint32_t default_wt_mask,
 void _starpu_release_data_on_node(struct _starpu_data_state *state, uint32_t default_wt_mask,
+				  enum starpu_data_access_mode down_to_mode,
 				  struct _starpu_data_replicate *replicate);
 				  struct _starpu_data_replicate *replicate);
 
 
 void _starpu_update_data_state(starpu_data_handle_t handle,
 void _starpu_update_data_state(starpu_data_handle_t handle,

+ 2 - 2
src/datawizard/interfaces/data_interface.c

@@ -749,12 +749,12 @@ void _starpu_check_if_valid_and_fetch_data_on_node(starpu_data_handle_t handle,
 	{
 	{
 		int ret = _starpu_fetch_data_on_node(handle, handle->home_node, replicate, STARPU_R, 0, STARPU_FETCH, 0, NULL, NULL, 0, origin);
 		int ret = _starpu_fetch_data_on_node(handle, handle->home_node, replicate, STARPU_R, 0, STARPU_FETCH, 0, NULL, NULL, 0, origin);
 		STARPU_ASSERT(!ret);
 		STARPU_ASSERT(!ret);
-		_starpu_release_data_on_node(handle, 0, replicate);
+		_starpu_release_data_on_node(handle, 0, STARPU_NONE, replicate);
 	}
 	}
 	else
 	else
 	{
 	{
 		_starpu_spin_lock(&handle->header_lock);
 		_starpu_spin_lock(&handle->header_lock);
-		if (!_starpu_notify_data_dependencies(handle))
+		if (!_starpu_notify_data_dependencies(handle, STARPU_NONE))
 			_starpu_spin_unlock(&handle->header_lock);
 			_starpu_spin_unlock(&handle->header_lock);
 	}
 	}
 }
 }

+ 23 - 7
src/datawizard/user_interactions.c

@@ -485,16 +485,22 @@ int starpu_data_acquire_try(starpu_data_handle_t handle, enum starpu_data_access
 
 
 /* This function must be called after starpu_data_acquire so that the
 /* This function must be called after starpu_data_acquire so that the
  * application release the data */
  * application release the data */
-void starpu_data_release_on_node(starpu_data_handle_t handle, int node)
+void starpu_data_release_to_on_node(starpu_data_handle_t handle, enum starpu_data_access_mode mode, int node)
 {
 {
 	STARPU_ASSERT(handle);
 	STARPU_ASSERT(handle);
 
 
+	STARPU_ASSERT_MSG(mode == STARPU_NONE ||
+			  (mode == STARPU_R &&
+			    (handle->current_mode == STARPU_RW ||
+			     handle->current_mode == STARPU_W)),
+		"We only support releasing from W or RW to R");
+
 	/* In case there are some implicit dependencies, unlock the "post sync" tasks */
 	/* In case there are some implicit dependencies, unlock the "post sync" tasks */
 	_starpu_unlock_post_sync_tasks(handle);
 	_starpu_unlock_post_sync_tasks(handle);
 
 
 	/* The application can now release the rw-lock */
 	/* The application can now release the rw-lock */
 	if (node >= 0)
 	if (node >= 0)
-		_starpu_release_data_on_node(handle, 0, &handle->per_node[node]);
+		_starpu_release_data_on_node(handle, 0, mode, &handle->per_node[node]);
 	else
 	else
 	{
 	{
 		_starpu_spin_lock(&handle->header_lock);
 		_starpu_spin_lock(&handle->header_lock);
@@ -505,17 +511,27 @@ void starpu_data_release_on_node(starpu_data_handle_t handle, int node)
 				handle->per_node[i].refcnt--;
 				handle->per_node[i].refcnt--;
 		}
 		}
 		handle->busy_count--;
 		handle->busy_count--;
-		if (!_starpu_notify_data_dependencies(handle))
+		if (!_starpu_notify_data_dependencies(handle, mode))
 			_starpu_spin_unlock(&handle->header_lock);
 			_starpu_spin_unlock(&handle->header_lock);
 	}
 	}
 }
 }
 
 
-void starpu_data_release(starpu_data_handle_t handle)
+void starpu_data_release_on_node(starpu_data_handle_t handle, int node)
+{
+	starpu_data_release_to_on_node(handle, STARPU_NONE, node);
+}
+
+void starpu_data_release_to(starpu_data_handle_t handle, enum starpu_data_access_mode mode)
 {
 {
 	int home_node = handle->home_node;
 	int home_node = handle->home_node;
 	if (home_node < 0)
 	if (home_node < 0)
 		home_node = STARPU_MAIN_RAM;
 		home_node = STARPU_MAIN_RAM;
-	starpu_data_release_on_node(handle, home_node);
+	starpu_data_release_to_on_node(handle, mode, home_node);
+}
+
+void starpu_data_release(starpu_data_handle_t handle)
+{
+	starpu_data_release_to(handle, STARPU_NONE);
 }
 }
 
 
 static void _prefetch_data_on_node(void *arg)
 static void _prefetch_data_on_node(void *arg)
@@ -531,7 +547,7 @@ static void _prefetch_data_on_node(void *arg)
 		_starpu_data_acquire_wrapper_finished(wrapper);
 		_starpu_data_acquire_wrapper_finished(wrapper);
 
 
 	_starpu_spin_lock(&handle->header_lock);
 	_starpu_spin_lock(&handle->header_lock);
-	if (!_starpu_notify_data_dependencies(handle))
+	if (!_starpu_notify_data_dependencies(handle, STARPU_NONE))
 		_starpu_spin_unlock(&handle->header_lock);
 		_starpu_spin_unlock(&handle->header_lock);
 }
 }
 
 
@@ -581,7 +597,7 @@ int _starpu_prefetch_data_on_node_with_mode(starpu_data_handle_t handle, unsigne
 		/* In case there was a temporary handle (eg. used for reduction), this
 		/* In case there was a temporary handle (eg. used for reduction), this
 		 * handle may have requested to be destroyed when the data is released
 		 * handle may have requested to be destroyed when the data is released
 		 * */
 		 * */
-		if (!_starpu_notify_data_dependencies(handle))
+		if (!_starpu_notify_data_dependencies(handle, STARPU_NONE))
 			_starpu_spin_unlock(&handle->header_lock);
 			_starpu_spin_unlock(&handle->header_lock);
 	}
 	}
 	else if (!async)
 	else if (!async)

+ 1 - 1
src/datawizard/write_back.c

@@ -24,7 +24,7 @@ static void wt_callback(void *arg)
 	starpu_data_handle_t handle = (starpu_data_handle_t) arg;
 	starpu_data_handle_t handle = (starpu_data_handle_t) arg;
 
 
 	_starpu_spin_lock(&handle->header_lock);
 	_starpu_spin_lock(&handle->header_lock);
-	if (!_starpu_notify_data_dependencies(handle))
+	if (!_starpu_notify_data_dependencies(handle, STARPU_NONE))
 		_starpu_spin_unlock(&handle->header_lock);
 		_starpu_spin_unlock(&handle->header_lock);
 }
 }
 
 

+ 2 - 2
src/debug/latency.c

@@ -36,7 +36,7 @@ void _starpu_benchmark_ping_pong(starpu_data_handle_t handle,
 		struct _starpu_data_replicate *replicate_0 = &handle->per_node[node0];
 		struct _starpu_data_replicate *replicate_0 = &handle->per_node[node0];
 		ret = _starpu_fetch_data_on_node(handle, node0, replicate_0, STARPU_RW, 0, STARPU_FETCH, 0, NULL, NULL, 0, "_starpu_benchmark_ping_pong");
 		ret = _starpu_fetch_data_on_node(handle, node0, replicate_0, STARPU_RW, 0, STARPU_FETCH, 0, NULL, NULL, 0, "_starpu_benchmark_ping_pong");
 		STARPU_ASSERT(!ret);
 		STARPU_ASSERT(!ret);
-		_starpu_release_data_on_node(handle, 0, replicate_0);
+		_starpu_release_data_on_node(handle, 0, STARPU_NONE, replicate_0);
 
 
 		_starpu_spin_lock(&handle->header_lock);
 		_starpu_spin_lock(&handle->header_lock);
 		handle->refcnt++;
 		handle->refcnt++;
@@ -46,6 +46,6 @@ void _starpu_benchmark_ping_pong(starpu_data_handle_t handle,
 		struct _starpu_data_replicate *replicate_1 = &handle->per_node[node1];
 		struct _starpu_data_replicate *replicate_1 = &handle->per_node[node1];
 		ret = _starpu_fetch_data_on_node(handle, node1, replicate_1, STARPU_RW, 0, STARPU_FETCH, 0, NULL, NULL, 0, "_starpu_benchmark_ping_pong");
 		ret = _starpu_fetch_data_on_node(handle, node1, replicate_1, STARPU_RW, 0, STARPU_FETCH, 0, NULL, NULL, 0, "_starpu_benchmark_ping_pong");
 		STARPU_ASSERT(!ret);
 		STARPU_ASSERT(!ret);
-		_starpu_release_data_on_node(handle, 0, replicate_1);
+		_starpu_release_data_on_node(handle, 0, STARPU_NONE, replicate_1);
 	}
 	}
 }
 }

+ 12 - 0
tests/Makefile.am

@@ -268,6 +268,7 @@ myPROGRAMS +=				\
 	datawizard/acquire_cb_insert		\
 	datawizard/acquire_cb_insert		\
 	datawizard/acquire_release		\
 	datawizard/acquire_release		\
 	datawizard/acquire_release2		\
 	datawizard/acquire_release2		\
+	datawizard/acquire_release_to		\
 	datawizard/acquire_try			\
 	datawizard/acquire_try			\
 	datawizard/bcsr				\
 	datawizard/bcsr				\
 	datawizard/cache			\
 	datawizard/cache			\
@@ -494,6 +495,17 @@ datawizard_acquire_release2_SOURCES +=		\
 	datawizard/acquire_release_opencl.c
 	datawizard/acquire_release_opencl.c
 endif
 endif
 
 
+datawizard_acquire_release_to_SOURCES =		\
+	datawizard/acquire_release_to.c
+if STARPU_USE_CUDA
+datawizard_acquire_release_to_SOURCES +=		\
+	datawizard/acquire_release_cuda.cu
+endif
+if STARPU_USE_OPENCL
+datawizard_acquire_release_to_SOURCES +=		\
+	datawizard/acquire_release_opencl.c
+endif
+
 datawizard_scratch_SOURCES =			\
 datawizard_scratch_SOURCES =			\
 	datawizard/scratch.c
 	datawizard/scratch.c
 if STARPU_USE_CUDA
 if STARPU_USE_CUDA

+ 207 - 0
tests/datawizard/acquire_release_to.c

@@ -0,0 +1,207 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include "../helper.h"
+
+/*
+ * Check that _release_to correctly interacts with tasks working on the same data
+ */
+
+#ifdef STARPU_QUICK_CHECK
+static unsigned ntasks = 10;
+#elif !defined(STARPU_LONG_CHECK)
+static unsigned ntasks = 1000;
+#else
+static unsigned ntasks = 10000;
+#endif
+
+#ifdef STARPU_USE_CUDA
+extern void increment_cuda(void *descr[], void *_args);
+#endif
+#ifdef STARPU_USE_OPENCL
+extern void increment_opencl(void *buffers[], void *args);
+#endif
+
+void increment_cpu(void *descr[], void *arg)
+{
+	(void)arg;
+	unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
+	(*tokenptr)++;
+}
+
+static struct starpu_codelet increment_cl =
+{
+	.modes = { STARPU_RW },
+	.cpu_funcs = {increment_cpu},
+#ifdef STARPU_USE_CUDA
+	.cuda_funcs = {increment_cuda},
+	.cuda_flags = {STARPU_CUDA_ASYNC},
+#endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {increment_opencl},
+	.opencl_flags = {STARPU_OPENCL_ASYNC},
+#endif
+	.cpu_funcs_name = {"increment_cpu"},
+	.nbuffers = 1
+};
+
+void check_cpu(void *descr[], void *arg)
+{
+	unsigned *val = arg;
+	unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
+	STARPU_ASSERT(*tokenptr == *val);
+}
+
+static struct starpu_codelet check_cl =
+{
+	.modes = { STARPU_R },
+	.cpu_funcs = {check_cpu},
+	.cpu_funcs_name = {"increment_cpu"},
+	.nbuffers = 1
+};
+
+unsigned token = 0;
+starpu_data_handle_t token_handle;
+
+static
+int increment_token(void)
+{
+	int ret;
+	struct starpu_task *task = starpu_task_create();
+	task->cl = &increment_cl;
+	task->handles[0] = token_handle;
+	ret = starpu_task_submit(task);
+	return ret;
+}
+
+static
+int check_token(unsigned value)
+{
+	unsigned *value_p;
+	int ret;
+	struct starpu_task *task = starpu_task_create();
+	task->cl = &check_cl;
+	task->handles[0] = token_handle;
+	task->cl_arg = value_p = malloc(sizeof(*value_p));
+	task->cl_arg_size = sizeof(*value_p);
+	task->cl_arg_free = 1;
+	*value_p = value;
+	ret = starpu_task_submit(task);
+	return ret;
+}
+
+static
+void callback(void *arg)
+{
+	(void)arg;
+	token++;
+	starpu_data_release_to(token_handle, STARPU_R);
+	starpu_sleep(0.001);
+	starpu_data_release(token_handle);
+}
+
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program opencl_program;
+#endif
+int main(int argc, char **argv)
+{
+	unsigned i;
+	int ret;
+
+        ret = starpu_initialize(NULL, &argc, &argv);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+#ifdef STARPU_USE_OPENCL
+	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/acquire_release_opencl_kernel.cl",
+						  &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+#endif
+	starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned));
+
+        FPRINTF(stderr, "Token: %u\n", token);
+
+	for(i=0; i<ntasks; i++)
+	{
+		/* synchronize data in RAM */
+                ret = starpu_data_acquire(token_handle, STARPU_RW);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
+
+                token ++;
+
+		ret = check_token(4*i+1);
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
+		ret = increment_token();
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
+		ret = check_token(4*i+2);
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
+		starpu_sleep(0.001);
+		starpu_data_release_to(token_handle, STARPU_R);
+
+		starpu_sleep(0.001);
+		starpu_data_release(token_handle);
+
+		ret = starpu_data_acquire_cb(token_handle, STARPU_RW, callback, NULL);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb");
+
+		ret = check_token(4*i+3);
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
+		ret = increment_token();
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
+		ret = check_token(4*i+4);
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
+	}
+
+	starpu_data_unregister(token_handle);
+
+#ifdef STARPU_USE_OPENCL
+        ret = starpu_opencl_unload_opencl(&opencl_program);
+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
+#endif
+	starpu_shutdown();
+
+        FPRINTF(stderr, "Token: %u\n", token);
+	if (token == ntasks * 4)
+		ret = EXIT_SUCCESS;
+	else
+		ret = EXIT_FAILURE;
+	return ret;
+
+enodev:
+	starpu_data_unregister(token_handle);
+	fprintf(stderr, "WARNING: No one can execute this task\n");
+	/* yes, we do not perform the computation but we did detect that no one
+ 	 * could perform the kernel, so this is not an error from StarPU */
+#ifdef STARPU_USE_OPENCL
+        ret = starpu_opencl_unload_opencl(&opencl_program);
+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
+#endif
+	starpu_shutdown();
+	return STARPU_TEST_SKIPPED;
+}