4 gadi atpakaļ · c3390198fb
--- a/ChangeLog
+++ b/ChangeLog
@@ -41,6 +41,7 @@ New features:
 
				     StarPU.
			
 
				   * Add a task prefetch level, to improve retaining data in accelerators so we
			
 
				     can make prefetch more aggressive.
			
 
				+  * Add starpu_data_dup_ro().
			
 
				 
			
 
				 Small changes:
			
 
				   * Use the S4U interface of Simgrid instead of xbt and MSG.
			
--- a/doc/doxygen/chapters/380_offline_performance_tools.doxy
+++ b/doc/doxygen/chapters/380_offline_performance_tools.doxy
@@ -59,7 +59,7 @@ StarPU can use the FxT library (see
 
				 https://savannah.nongnu.org/projects/fkt/) to generate traces
			
 
				 with a limited runtime overhead.
			
 
				 
			
 
				-You can get a tarball from http://download.savannah.gnu.org/releases/fkt/
			
 
				+You can get a tarball from http://download.savannah.gnu.org/releases/fkt/?C=M
			
 
				 
			
 
				 Compiling and installing the FxT library in the <c>$FXTDIR</c> path is
			
 
				 done following the standard procedure:
			
--- a/include/starpu_helper.h
+++ b/include/starpu_helper.h
@@ -187,6 +187,21 @@ double starpu_timing_now(void);
 
				 int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
			
 
				 
			
 
				 /**
			
 
				+   Create a copy of \p src_handle, and return a new handle in \p dst_handle,
			
 
				+   which is to be used only for read accesses. This allows StarPU to optimize it
			
 
				+   by not actually copying the data whenever possible (e.g. it may possibly
			
 
				+   simply return src_handle itself).
			
 
				+   The parameter \p asynchronous indicates whether the function should block
			
 
				+   or not. In the case of an asynchronous call, it is possible to synchronize
			
 
				+   with the termination of this operation either by the means of implicit
			
 
				+   dependencies (if enabled) or by calling starpu_task_wait_for_all(). If
			
 
				+   \p callback_func is not <c>NULL</c>, this callback function is executed after
			
 
				+   the handle has been copied, and it is given the pointer \p
			
 
				+   callback_arg as argument.
			
 
				+*/
			
 
				+int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
			
 
				+
			
 
				+/**
			
 
				    Call hwloc-ps to display binding of each processus and thread running on
			
 
				    the machine.<br>
			
 
				    Use the environment variable \ref STARPU_DISPLAY_BINDINGS to automatically
			
--- a/src/core/dependencies/implicit_data_deps.c
+++ b/src/core/dependencies/implicit_data_deps.c
@@ -227,7 +227,12 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 
				 
			
 
				 		if (mode & STARPU_W || mode == STARPU_REDUX)
			
 
				 		{
			
 
				+
			
 
				+			STARPU_ASSERT_MSG(!handle->readonly, "Read-only handles can not be written to");
			
 
				+
			
 
				 			handle->initialized = 1;
			
 
				+			/* We will change our value, disconnect from our readonly duplicates */
			
 
				+			handle->readonly_dup = NULL;
			
 
				 			if (write_hook)
			
 
				 				write_hook(handle);
			
 
				 		}
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -196,10 +196,12 @@ void _starpu_update_data_state(starpu_data_handle_t handle,
 
				 		unsigned node;
			
 
				 		for (node = 0; node < nnodes; node++)
			
 
				 		{
			
 
				-                       _STARPU_TRACE_DATA_STATE_INVALID(handle, node);
			
 
				+			if (handle->per_node[node].state != STARPU_INVALID)
			
 
				+			       _STARPU_TRACE_DATA_STATE_INVALID(handle, node);
			
 
				 			handle->per_node[node].state = STARPU_INVALID;
			
 
				 		}
			
 
				-               _STARPU_TRACE_DATA_STATE_OWNER(handle, requesting_node);
			
 
				+		if (requesting_replicate->state != STARPU_OWNER)
			
 
				+			_STARPU_TRACE_DATA_STATE_OWNER(handle, requesting_node);
			
 
				 		requesting_replicate->state = STARPU_OWNER;
			
 
				 		if (handle->home_node != -1 && handle->per_node[handle->home_node].state == STARPU_INVALID)
			
 
				 			/* Notify that this MC is now dirty */
			
@@ -215,13 +217,15 @@ void _starpu_update_data_state(starpu_data_handle_t handle,
 
				 			for (node = 0; node < nnodes; node++)
			
 
				 			{
			
 
				 				struct _starpu_data_replicate *replicate = &handle->per_node[node];
			
 
				-                               if (replicate->state != STARPU_INVALID)
			
 
				-			       {
			
 
				-                                       _STARPU_TRACE_DATA_STATE_SHARED(handle, node);
			
 
				+				if (replicate->state != STARPU_INVALID)
			
 
				+				{
			
 
				+					if (replicate->state != STARPU_SHARED)
			
 
				+						_STARPU_TRACE_DATA_STATE_SHARED(handle, node);
			
 
				 					replicate->state = STARPU_SHARED;
			
 
				-                               }
			
 
				+				}
			
 
				 			}
			
 
				-                       _STARPU_TRACE_DATA_STATE_SHARED(handle, requesting_node);
			
 
				+			if (requesting_replicate->state != STARPU_SHARED)
			
 
				+				_STARPU_TRACE_DATA_STATE_SHARED(handle, requesting_node);
			
 
				 			requesting_replicate->state = STARPU_SHARED;
			
 
				 		}
			
 
				 	}
			
--- a/src/datawizard/coherency.h
+++ b/src/datawizard/coherency.h
@@ -170,7 +170,7 @@ struct _starpu_data_state
 
				 	 */
			
 
				 	unsigned partitioned;
			
 
				 	/** Whether a partition plan is currently submitted in readonly mode */
			
 
				-	unsigned readonly:1;
			
 
				+	unsigned part_readonly:1;
			
 
				 
			
 
				 	/** Whether our father is currently partitioned into ourself */
			
 
				 	unsigned active:1;
			
@@ -191,16 +191,35 @@ struct _starpu_data_state
 
				 	/** what is the default write-through mask for that data ? */
			
 
				 	uint32_t wt_mask;
			
 
				 
			
 
				+	/** for a readonly handle, the number of times that we have returned again the
			
 
				+	    same handle and thus the number of times we have to ignore unregistration requests */
			
 
				+	unsigned aliases;
			
 
				+	/** for a non-readonly handle, a readonly-only duplicate, that we can
			
 
				+	    return from starpu_data_dup_ro */
			
 
				+	starpu_data_handle_t readonly_dup;
			
 
				+
			
 
				 	/** in some case, the application may explicitly tell StarPU that a
			
 
				  	 * piece of data is not likely to be used soon again */
			
 
				-	unsigned is_not_important;
			
 
				+	unsigned is_not_important:1;
			
 
				 
			
 
				 	/** Does StarPU have to enforce some implicit data-dependencies ? */
			
 
				-	unsigned sequential_consistency;
			
 
				+	unsigned sequential_consistency:1;
			
 
				 	/** Is the data initialized, or a task is already submitted to initialize it */
			
 
				-	unsigned initialized;
			
 
				+	unsigned initialized:1;
			
 
				+	/** Whether we shall not ever write to this handle, thus allowing various optimizations */
			
 
				+	unsigned readonly:1;
			
 
				 	/** Can the data be pushed to the disk? */
			
 
				-	unsigned ooc;
			
 
				+	unsigned ooc:1;
			
 
				+
			
 
				+	/** Whether lazy unregistration was requested throught starpu_data_unregister_submit */
			
 
				+	unsigned lazy_unregister:1;
			
 
				+
			
 
				+	/** Whether automatic planned partitioning/unpartitioning should not be done */
			
 
				+	int partition_automatic_disabled:1;
			
 
				+
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	unsigned removed_from_context_hash:1;
			
 
				+#endif
			
 
				 
			
 
				 	/** This lock should protect any operation to enforce
			
 
				 	 * sequential_consistency */
			
@@ -254,12 +273,6 @@ struct _starpu_data_state
 
				 	/** Final request for write invalidation */
			
 
				 	struct _starpu_data_request *write_invalidation_req;
			
 
				 
			
 
				-	unsigned lazy_unregister;
			
 
				-
			
 
				-#ifdef STARPU_OPENMP
			
 
				-	unsigned removed_from_context_hash;
			
 
				-#endif
			
 
				-
			
 
				         /** Used for MPI */
			
 
				 	void *mpi_data;
			
 
				 
			
@@ -279,8 +292,6 @@ struct _starpu_data_state
 
				 	 * took it yet */
			
 
				 	int last_locality;
			
 
				 
			
 
				-	int partition_automatic_disabled;
			
 
				-
			
 
				 	/** Application-provided coordinates. The maximum dimension (5) is
			
 
				 	  * relatively arbitrary. */
			
 
				 	unsigned dimensions;
			
--- a/src/datawizard/filters.c
+++ b/src/datawizard/filters.c
@@ -232,19 +232,9 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 
				 		memset(child, 0, sizeof(*child));
			
 
				 		_starpu_data_handle_init(child, ops, initial_handle->mf_node);
			
 
				 
			
 
				-		//child->nchildren = 0;
			
 
				-		//child->nplans = 0;
			
 
				-		//child->switch_cl = NULL;
			
 
				-		//child->partitioned = 0;
			
 
				-		//child->readonly = 0;
			
 
				-		child->active = inherit_state;
			
 
				-		//child->active_ro = 0;
			
 
				-                //child->mpi_data = NULL;
			
 
				 		child->root_handle = initial_handle->root_handle;
			
 
				 		child->father_handle = initial_handle;
			
 
				-		//child->active_children = NULL;
			
 
				-		//child->active_readonly_children = NULL;
			
 
				-		//child->nactive_readonly_children = 0;
			
 
				+
			
 
				 		child->nsiblings = nparts;
			
 
				 		if (inherit_state)
			
 
				 		{
			
@@ -255,59 +245,25 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 
				 		child->sibling_index = i;
			
 
				 		child->depth = initial_handle->depth + 1;
			
 
				 
			
 
				-		child->is_not_important = initial_handle->is_not_important;
			
 
				-		child->wt_mask = initial_handle->wt_mask;
			
 
				+		child->active = inherit_state;
			
 
				+
			
 
				 		child->home_node = initial_handle->home_node;
			
 
				+		child->wt_mask = initial_handle->wt_mask;
			
 
				 
			
 
				-		/* initialize the chunk lock */
			
 
				-		_starpu_data_requester_prio_list_init(&child->req_list);
			
 
				-		_starpu_data_requester_prio_list_init(&child->reduction_req_list);
			
 
				-		//child->reduction_tmp_handles = NULL;
			
 
				-		//child->write_invalidation_req = NULL;
			
 
				-		//child->refcnt = 0;
			
 
				-		//child->unlocking_reqs = 0;
			
 
				-		//child->busy_count = 0;
			
 
				-		//child->busy_waiting = 0;
			
 
				-		STARPU_PTHREAD_MUTEX_INIT0(&child->busy_mutex, NULL);
			
 
				-		STARPU_PTHREAD_COND_INIT0(&child->busy_cond, NULL);
			
 
				-		//child->reduction_refcnt = 0;
			
 
				-		_starpu_spin_init(&child->header_lock);
			
 
				+		child->aliases = initial_handle->aliases;
			
 
				+
			
 
				+		child->is_not_important = initial_handle->is_not_important;
			
 
				 
			
 
				 		child->sequential_consistency = initial_handle->sequential_consistency;
			
 
				 		child->initialized = initial_handle->initialized;
			
 
				+		child->readonly = initial_handle->readonly;
			
 
				 		child->ooc = initial_handle->ooc;
			
 
				 
			
 
				-		STARPU_PTHREAD_MUTEX_INIT0(&child->sequential_consistency_mutex, NULL);
			
 
				-		child->last_submitted_mode = STARPU_R;
			
 
				-		//child->last_sync_task = NULL;
			
 
				-		//child->last_submitted_accessors.task = NULL;
			
 
				-		child->last_submitted_accessors.next = &child->last_submitted_accessors;
			
 
				-		child->last_submitted_accessors.prev = &child->last_submitted_accessors;
			
 
				-		//child->post_sync_tasks = NULL;
			
 
				-		/* Tell helgrind that the race in _starpu_unlock_post_sync_tasks is fine */
			
 
				-		STARPU_HG_DISABLE_CHECKING(child->post_sync_tasks_cnt);
			
 
				-		//child->post_sync_tasks_cnt = 0;
			
 
				-
			
 
				 		/* The methods used for reduction are propagated to the
			
 
				 		 * children. */
			
 
				 		child->redux_cl = initial_handle->redux_cl;
			
 
				 		child->init_cl = initial_handle->init_cl;
			
 
				 
			
 
				-#ifdef STARPU_USE_FXT
			
 
				-		//child->last_submitted_ghost_sync_id_is_valid = 0;
			
 
				-		//child->last_submitted_ghost_sync_id = 0;
			
 
				-		//child->last_submitted_ghost_accessors_id = NULL;
			
 
				-#endif
			
 
				-
			
 
				-		if (_starpu_global_arbiter)
			
 
				-			/* Just for testing purpose */
			
 
				-			starpu_data_assign_arbiter(child, _starpu_global_arbiter);
			
 
				-		else
			
 
				-		{
			
 
				-			//child->arbiter = NULL;
			
 
				-		}
			
 
				-		_starpu_data_requester_prio_list_init0(&child->arbitered_req_list);
			
 
				-
			
 
				 		for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 		{
			
 
				 			struct _starpu_data_replicate *initial_replicate;
			
@@ -346,13 +302,6 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 
				 			f->filter_func(initial_interface, child_interface, f, i, nparts);
			
 
				 		}
			
 
				 
			
 
				-		//child->per_worker = NULL;
			
 
				-		//child->user_data = NULL;
			
 
				-
			
 
				-		/* We compute the size and the footprint of the child once and
			
 
				-		 * store it in the handle */
			
 
				-		child->footprint = _starpu_compute_data_footprint(child);
			
 
				-
			
 
				 		for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 		{
			
 
				 			if (starpu_node_get_kind(node) != STARPU_CPU_RAM)
			
@@ -693,7 +642,7 @@ void _starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned
 
				 	STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency");
			
 
				 	_starpu_spin_lock(&initial_handle->header_lock);
			
 
				 	STARPU_ASSERT_MSG(initial_handle->partitioned == 0, "One can't submit several partition plannings at the same time");
			
 
				-	STARPU_ASSERT_MSG(initial_handle->readonly == 0, "One can't submit a partition planning while a readonly partitioning is active");
			
 
				+	STARPU_ASSERT_MSG(initial_handle->part_readonly == 0, "One can't submit a partition planning while a readonly partitioning is active");
			
 
				 	STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts");
			
 
				 	initial_handle->partitioned++;
			
 
				 	initial_handle->active_children = children[0]->siblings;
			
@@ -753,10 +702,10 @@ void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle,
 
				 	unsigned i;
			
 
				 	STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency");
			
 
				 	_starpu_spin_lock(&initial_handle->header_lock);
			
 
				-	STARPU_ASSERT_MSG(initial_handle->partitioned == 0 || initial_handle->readonly, "One can't submit a readonly partition planning at the same time as a readwrite partition planning");
			
 
				+	STARPU_ASSERT_MSG(initial_handle->partitioned == 0 || initial_handle->part_readonly, "One can't submit a readonly partition planning at the same time as a readwrite partition planning");
			
 
				 	STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts");
			
 
				 	initial_handle->partitioned++;
			
 
				-	initial_handle->readonly = 1;
			
 
				+	initial_handle->part_readonly = 1;
			
 
				 	if (initial_handle->nactive_readonly_children < initial_handle->partitioned)
			
 
				 	{
			
 
				 		_STARPU_REALLOC(initial_handle->active_readonly_children, initial_handle->partitioned * sizeof(initial_handle->active_readonly_children[0]));
			
@@ -790,9 +739,9 @@ void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial
 
				 	STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency");
			
 
				 	_starpu_spin_lock(&initial_handle->header_lock);
			
 
				 	STARPU_ASSERT_MSG(initial_handle->partitioned == 1, "One can't upgrade a readonly partition planning to readwrite while other readonly partition plannings are active");
			
 
				-	STARPU_ASSERT_MSG(initial_handle->readonly == 1, "One can only upgrade a readonly partition planning");
			
 
				+	STARPU_ASSERT_MSG(initial_handle->part_readonly == 1, "One can only upgrade a readonly partition planning");
			
 
				 	STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts");
			
 
				-	initial_handle->readonly = 0;
			
 
				+	initial_handle->part_readonly = 0;
			
 
				 	initial_handle->active_children = initial_handle->active_readonly_children[0];
			
 
				 	initial_handle->active_readonly_children[0] = NULL;
			
 
				 	_starpu_spin_unlock(&initial_handle->header_lock);
			
@@ -819,7 +768,7 @@ void _starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsign
 
				 	_starpu_spin_lock(&initial_handle->header_lock);
			
 
				 	STARPU_ASSERT_MSG(initial_handle->partitioned >= 1, "No partition planning is active for handle %p", initial_handle);
			
 
				 	STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts");
			
 
				-	if (initial_handle->readonly)
			
 
				+	if (initial_handle->part_readonly)
			
 
				 	{
			
 
				 		/* Replace this children set with the last set in the list of readonly children sets */
			
 
				 		for (i = 0; i < initial_handle->partitioned-1; i++)
			
@@ -838,7 +787,7 @@ void _starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsign
 
				 	}
			
 
				 	initial_handle->partitioned--;
			
 
				 	if (!initial_handle->partitioned)
			
 
				-		initial_handle->readonly = 0;
			
 
				+		initial_handle->part_readonly = 0;
			
 
				 	initial_handle->active_children = NULL;
			
 
				 	_starpu_spin_unlock(&initial_handle->header_lock);
			
 
				 
			
@@ -914,7 +863,7 @@ void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle
 
				 	_starpu_spin_lock(&initial_handle->header_lock);
			
 
				 	STARPU_ASSERT_MSG(initial_handle->partitioned >= 1, "No partition planning is active for handle %p", initial_handle);
			
 
				 	STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts");
			
 
				-	initial_handle->readonly = 1;
			
 
				+	initial_handle->part_readonly = 1;
			
 
				 	_starpu_spin_unlock(&initial_handle->header_lock);
			
 
				 
			
 
				 	unsigned i, n;
			
@@ -941,7 +890,7 @@ void starpu_data_unpartition_submit_r(starpu_data_handle_t ancestor, int gatheri
 
				 		/* It's already unpartitioned */
			
 
				 		return;
			
 
				 	_STARPU_DEBUG("ancestor %p needs unpartitioning\n", ancestor);
			
 
				-	if (ancestor->readonly)
			
 
				+	if (ancestor->part_readonly)
			
 
				 	{
			
 
				 		unsigned n = ancestor->partitioned;
			
 
				 		/* Uh, has to go through all read-only partitions */
			
@@ -988,16 +937,16 @@ static void _starpu_data_partition_access_look_up(starpu_data_handle_t ancestor,
 
				 		_STARPU_DEBUG("ancestor %p was ready\n", ancestor);
			
 
				 
			
 
				 	/* We shouldn't be called for nothing */
			
 
				-	STARPU_ASSERT(!ancestor->partitioned || !target || ancestor->active_children != target->siblings || (ancestor->readonly && write));
			
 
				+	STARPU_ASSERT(!ancestor->partitioned || !target || ancestor->active_children != target->siblings || (ancestor->part_readonly && write));
			
 
				 
			
 
				 	/* Then unpartition ancestor if needed */
			
 
				 	if (ancestor->partitioned &&
			
 
				 			/* Not the right children, unpartition ourself */
			
 
				 			((target && write && ancestor->active_children != target->siblings) ||
			
 
				-			 (target && !write && !ancestor->readonly) ||
			
 
				+			 (target && !write && !ancestor->part_readonly) ||
			
 
				 			/* We are partitioned and we want to write or some child
			
 
				 			 * is writing and we want to read, unpartition ourself*/
			
 
				-			(!target && (write || !ancestor->readonly))))
			
 
				+			(!target && (write || !ancestor->part_readonly))))
			
 
				 	{
			
 
				 #ifdef STARPU_DEVEL
			
 
				 #warning FIXME: better choose gathering node
			
@@ -1016,14 +965,14 @@ static void _starpu_data_partition_access_look_up(starpu_data_handle_t ancestor,
 
				 	if (ancestor->partitioned)
			
 
				 	{
			
 
				 		/* That must be readonly, otherwise we would have unpartitioned it */
			
 
				-		STARPU_ASSERT(ancestor->readonly);
			
 
				+		STARPU_ASSERT(ancestor->part_readonly);
			
 
				 		if (write)
			
 
				 		{
			
 
				 			_STARPU_DEBUG("ancestor %p is already partitioned RO, turn RW\n", ancestor);
			
 
				 			/* Already partitioned, normally it's already for the target */
			
 
				 			STARPU_ASSERT(ancestor->active_children == target->siblings);
			
 
				 			/* And we are here just because we haven't partitioned rw */
			
 
				-			STARPU_ASSERT(ancestor->readonly && write);
			
 
				+			STARPU_ASSERT(ancestor->part_readonly && write);
			
 
				 			/* So we just need to upgrade ro to rw */
			
 
				 			starpu_data_partition_readwrite_upgrade_submit(ancestor, target->nsiblings, target->siblings);
			
 
				 		}
			
--- a/src/datawizard/interfaces/data_interface.c
+++ b/src/datawizard/interfaces/data_interface.c
@@ -259,90 +259,36 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 
				 
			
 
				 	STARPU_ASSERT(handle);
			
 
				 
			
 
				-	/* initialize the new lock */
			
 
				-	_starpu_data_requester_prio_list_init0(&handle->req_list);
			
 
				-	//handle->refcnt = 0;
			
 
				-	//handle->unlocking_reqs = 0;
			
 
				-	//handle->busy_count = 0;
			
 
				-	//handle->busy_waiting = 0;
			
 
				-	STARPU_PTHREAD_MUTEX_INIT0(&handle->busy_mutex, NULL);
			
 
				-	STARPU_PTHREAD_COND_INIT0(&handle->busy_cond, NULL);
			
 
				-	_starpu_spin_init(&handle->header_lock);
			
 
				-
			
 
				 	/* first take care to properly lock the data */
			
 
				 	_starpu_spin_lock(&handle->header_lock);
			
 
				 
			
 
				-	/* there is no hierarchy yet */
			
 
				-	//handle->nchildren = 0;
			
 
				-	//handle->nplans = 0;
			
 
				-	//handle->switch_cl = NULL;
			
 
				-	//handle->partitioned = 0;
			
 
				-	//handle->readonly = 0;
			
 
				-	handle->active = 1;
			
 
				-	//handle->active_ro = 0;
			
 
				 	handle->root_handle = handle;
			
 
				 	//handle->father_handle = NULL;
			
 
				-	//handle->active_children = NULL;
			
 
				-	//handle->active_readonly_children = NULL;
			
 
				-	//handle->nactive_readonly_children = 0;
			
 
				 	//handle->nsiblings = 0;
			
 
				 	//handle->siblings = NULL;
			
 
				 	//handle->sibling_index = 0; /* could be anything for the root */
			
 
				 	handle->depth = 1; /* the tree is just a node yet */
			
 
				-        //handle->mpi_data = NULL; /* invalid until set */
			
 
				+
			
 
				+	handle->active = 1;
			
 
				+
			
 
				+	handle->home_node = home_node;
			
 
				+
			
 
				+	handle->wt_mask = wt_mask;
			
 
				+
			
 
				+	//handle->aliases = 0;
			
 
				 
			
 
				 	//handle->is_not_important = 0;
			
 
				 
			
 
				 	handle->sequential_consistency =
			
 
				 		starpu_data_get_default_sequential_consistency_flag();
			
 
				 	handle->initialized = home_node != -1;
			
 
				+	//handle->readonly = 0;
			
 
				 	handle->ooc = 1;
			
 
				 
			
 
				-	STARPU_PTHREAD_MUTEX_INIT0(&handle->sequential_consistency_mutex, NULL);
			
 
				-	handle->last_submitted_mode = STARPU_R;
			
 
				-	//handle->last_sync_task = NULL;
			
 
				-	//handle->last_submitted_accessors.task = NULL;
			
 
				-	handle->last_submitted_accessors.next = &handle->last_submitted_accessors;
			
 
				-	handle->last_submitted_accessors.prev = &handle->last_submitted_accessors;
			
 
				-	//handle->post_sync_tasks = NULL;
			
 
				-
			
 
				-	/* Tell helgrind that the race in _starpu_unlock_post_sync_tasks is fine */
			
 
				-	STARPU_HG_DISABLE_CHECKING(handle->post_sync_tasks_cnt);
			
 
				-	//handle->post_sync_tasks_cnt = 0;
			
 
				-
			
 
				 	/* By default, there are no methods available to perform a reduction */
			
 
				 	//handle->redux_cl = NULL;
			
 
				 	//handle->init_cl = NULL;
			
 
				 
			
 
				-	//handle->reduction_refcnt = 0;
			
 
				-	_starpu_data_requester_prio_list_init0(&handle->reduction_req_list);
			
 
				-	//handle->reduction_tmp_handles = NULL;
			
 
				-	//handle->write_invalidation_req = NULL;
			
 
				-
			
 
				-#ifdef STARPU_USE_FXT
			
 
				-	//handle->last_submitted_ghost_sync_id_is_valid = 0;
			
 
				-	//handle->last_submitted_ghost_sync_id = 0;
			
 
				-	//handle->last_submitted_ghost_accessors_id = NULL;
			
 
				-#endif
			
 
				-
			
 
				-	handle->wt_mask = wt_mask;
			
 
				-
			
 
				-	/* Store some values directly in the handle not to recompute them all
			
 
				-	 * the time. */
			
 
				-	handle->footprint = _starpu_compute_data_footprint(handle);
			
 
				-
			
 
				-	handle->home_node = home_node;
			
 
				-
			
 
				-	if (_starpu_global_arbiter)
			
 
				-		/* Just for testing purpose */
			
 
				-		starpu_data_assign_arbiter(handle, _starpu_global_arbiter);
			
 
				-	else
			
 
				-	{
			
 
				-		//handle->arbiter = NULL;
			
 
				-	}
			
 
				-	_starpu_data_requester_prio_list_init0(&handle->arbitered_req_list);
			
 
				-	handle->last_locality = -1;
			
 
				-
			
 
				 	/* that new data is invalid from all nodes perpective except for the
			
 
				 	 * home node */
			
 
				 	unsigned node;
			
@@ -372,9 +318,6 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	//handle->per_worker = NULL;
			
 
				-	//handle->user_data = NULL;
			
 
				-
			
 
				 	/* now the data is available ! */
			
 
				 	_starpu_spin_unlock(&handle->header_lock);
			
 
				 
			
@@ -449,14 +392,47 @@ int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_int
 
				 	STARPU_HG_DISABLE_CHECKING(handle->busy_count);
			
 
				 
			
 
				 	handle->magic = 42;
			
 
				-	handle->ops = interface_ops;
			
 
				-	handle->mf_node = mf_node;
			
 
				-	//handle->mpi_data = NULL;
			
 
				-	//handle->partition_automatic_disabled = 0;
			
 
				 
			
 
				+	/* When not specified, the fields are initialized in _starpu_register_new_data and _starpu_data_partition */
			
 
				+
			
 
				+	_starpu_data_requester_prio_list_init0(&handle->req_list);
			
 
				+	//handle->refcnt = 0;
			
 
				+	//handle->unlocking_reqs = 0;
			
 
				+	//handle->current_mode = STARPU_NONE;
			
 
				+	_starpu_spin_init(&handle->header_lock);
			
 
				+
			
 
				+	//handle->busy_count = 0;
			
 
				+	//handle->busy_waiting = 0;
			
 
				+	STARPU_PTHREAD_MUTEX_INIT0(&handle->busy_mutex, NULL);
			
 
				+	STARPU_PTHREAD_COND_INIT0(&handle->busy_cond, NULL);
			
 
				+
			
 
				+	//handle->root_handle
			
 
				+	//handle->father_handle
			
 
				+	//handle->active_children = NULL;
			
 
				+	//handle->active_readonly_children = NULL;
			
 
				+	//handle->nactive_readonly_children = 0;
			
 
				+	//handle->nsiblings
			
 
				+	//handle->siblings
			
 
				+	//handle->sibling_index
			
 
				+	//handle->depth
			
 
				+
			
 
				+	/* there is no hierarchy yet */
			
 
				+	//handle->children = NULL;
			
 
				+	//handle->nchildren = 0;
			
 
				+	//handle->nplans = 0;
			
 
				+	//handle->switch_cl = NULL;
			
 
				+	//handle->switch_cl_nparts = 0;
			
 
				+	//handle->partitioned = 0;
			
 
				+	//handle->part_readonly = 0;
			
 
				+
			
 
				+	//handle->active
			
 
				+	//handle->active_ro = 0;
			
 
				+
			
 
				+	//handle->per_node below
			
 
				+
			
 
				+	handle->ops = interface_ops;
			
 
				 	size_t interfacesize = interface_ops->interface_size;
			
 
				 
			
 
				-	_starpu_memory_stats_init(handle);
			
 
				 	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 	{
			
 
				 		_starpu_memory_stats_init_per_node(handle, node);
			
@@ -471,6 +447,80 @@ int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_int
 
				 		if (handle->ops->init) handle->ops->init(replicate->data_interface);
			
 
				 	}
			
 
				 
			
 
				+	//handle->per_worker = NULL;
			
 
				+	//handle->ops above
			
 
				+
			
 
				+	/* Store some values directly in the handle not to recompute them all
			
 
				+	 * the time. */
			
 
				+	handle->footprint = _starpu_compute_data_footprint(handle);
			
 
				+
			
 
				+	//handle->home_node
			
 
				+	//handle->wt_mask
			
 
				+	//handle->aliases = 0;
			
 
				+	//handle->is_not_important
			
 
				+	//handle->sequential_consistency
			
 
				+	//handle->initialized
			
 
				+	//handle->readonly
			
 
				+	//handle->ooc
			
 
				+	//handle->lazy_unregister = 0;
			
 
				+	//handle->partition_automatic_disabled = 0;
			
 
				+	//handle->removed_from_context_hash = 0;
			
 
				+
			
 
				+	STARPU_PTHREAD_MUTEX_INIT0(&handle->sequential_consistency_mutex, NULL);
			
 
				+
			
 
				+	handle->last_submitted_mode = STARPU_R;
			
 
				+	//handle->last_sync_task = NULL;
			
 
				+	//handle->last_submitted_accessors.task = NULL;
			
 
				+	handle->last_submitted_accessors.next = &handle->last_submitted_accessors;
			
 
				+	handle->last_submitted_accessors.prev = &handle->last_submitted_accessors;
			
 
				+
			
 
				+#ifdef STARPU_USE_FXT
			
 
				+	//handle->last_submitted_ghost_sync_id_is_valid = 0;
			
 
				+	//handle->last_submitted_ghost_sync_id = 0;
			
 
				+	//handle->last_submitted_ghost_accessors_id = NULL;
			
 
				+#endif
			
 
				+
			
 
				+	//handle->post_sync_tasks = NULL;
			
 
				+	/* Tell helgrind that the race in _starpu_unlock_post_sync_tasks is fine */
			
 
				+	STARPU_HG_DISABLE_CHECKING(handle->post_sync_tasks_cnt);
			
 
				+	//handle->post_sync_tasks_cnt = 0;
			
 
				+
			
 
				+	//handle->redux_cl
			
 
				+	//handle->init_cl
			
 
				+
			
 
				+	//handle->reduction_refcnt = 0;
			
 
				+
			
 
				+	_starpu_data_requester_prio_list_init0(&handle->reduction_req_list);
			
 
				+
			
 
				+	//handle->reduction_tmp_handles = NULL;
			
 
				+
			
 
				+	//handle->write_invalidation_req = NULL;
			
 
				+
			
 
				+        //handle->mpi_data = NULL; /* invalid until set */
			
 
				+
			
 
				+	_starpu_memory_stats_init(handle);
			
 
				+
			
 
				+	handle->mf_node = mf_node;
			
 
				+
			
 
				+        //handle->unregister_hook = NULL;
			
 
				+
			
 
				+	if (_starpu_global_arbiter)
			
 
				+		/* Just for testing purpose */
			
 
				+		starpu_data_assign_arbiter(handle, _starpu_global_arbiter);
			
 
				+	else
			
 
				+	{
			
 
				+		//handle->arbiter = NULL;
			
 
				+	}
			
 
				+	_starpu_data_requester_prio_list_init0(&handle->arbitered_req_list);
			
 
				+
			
 
				+	handle->last_locality = -1;
			
 
				+
			
 
				+	//handle->dimensions = 0;
			
 
				+	//handle->coordinates = {};
			
 
				+
			
 
				+	//handle->user_data = NULL;
			
 
				+
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -743,13 +793,22 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
				 	/* TODO: also check that it has the latest coherency */
			
 
				 	STARPU_ASSERT(!(nowait && handle->busy_count != 0));
			
 
				 
			
 
				+	_starpu_spin_lock(&handle->header_lock);
			
 
				+	if (handle->aliases)
			
 
				+	{
			
 
				+		handle->aliases--;
			
 
				+		_starpu_spin_unlock(&handle->header_lock);
			
 
				+		return;
			
 
				+	}
			
 
				+        _starpu_spin_unlock(&handle->header_lock);
			
 
				+
			
 
				 	int sequential_consistency = handle->sequential_consistency;
			
 
				 	if (sequential_consistency && !nowait)
			
 
				 	{
			
 
				 		STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_data_unregister must not be called from a task or callback, perhaps you can use starpu_data_unregister_submit instead");
			
 
				 
			
 
				 		/* If sequential consistency is enabled, wait until data is available */
			
 
				-		_starpu_data_wait_until_available(handle, STARPU_RW, "starpu_data_unregister");
			
 
				+		_starpu_data_wait_until_available(handle, handle->readonly?STARPU_R:STARPU_RW, "starpu_data_unregister");
			
 
				 	}
			
 
				 
			
 
				 	if (coherent && !nowait)
			
@@ -982,6 +1041,14 @@ void starpu_data_unregister_submit(starpu_data_handle_t handle)
 
				 {
			
 
				 	STARPU_ASSERT_MSG(handle->magic == 42, "data %p is invalid (was it already registered?)", handle);
			
 
				 	STARPU_ASSERT_MSG(!handle->lazy_unregister, "data %p can not be unregistered twice", handle);
			
 
				+	_starpu_spin_lock(&handle->header_lock);
			
 
				+	if (handle->aliases)
			
 
				+	{
			
 
				+		handle->aliases--;
			
 
				+		_starpu_spin_unlock(&handle->header_lock);
			
 
				+		return;
			
 
				+	}
			
 
				+        _starpu_spin_unlock(&handle->header_lock);
			
 
				 
			
 
				 	/* Wait for all task dependencies on this handle before putting it for free */
			
 
				 	starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, handle->initialized?STARPU_RW:STARPU_W, _starpu_data_unregister_submit_cb, handle);
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -349,7 +349,8 @@ static int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT transfer_subtree_to_node(starpu_d
 
				 			unsigned cnt = 0;
			
 
				 
			
 
				 			/* some other node may have the copy */
			
 
				-			_STARPU_TRACE_DATA_STATE_INVALID(handle, src_node);
			
 
				+			if (src_replicate->state != STARPU_INVALID)
			
 
				+				_STARPU_TRACE_DATA_STATE_INVALID(handle, src_node);
			
 
				 			src_replicate->state = STARPU_INVALID;
			
 
				 
			
 
				 			/* count the number of copies */
			
@@ -365,7 +366,8 @@ static int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT transfer_subtree_to_node(starpu_d
 
				 
			
 
				 			if (cnt == 1)
			
 
				 			{
			
 
				-				_STARPU_TRACE_DATA_STATE_OWNER(handle, last);
			
 
				+				if (handle->per_node[last].state != STARPU_OWNER)
			
 
				+					_STARPU_TRACE_DATA_STATE_OWNER(handle, last);
			
 
				 				handle->per_node[last].state = STARPU_OWNER;
			
 
				 			}
			
 
				 
			
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -4441,11 +4441,13 @@ void _starpu_fxt_number_events_file_close(void)
 
				 {
			
 
				 	if (number_events_file)
			
 
				 	{
			
 
				+		int i;
			
 
				+
			
 
				 		assert(number_events != NULL);
			
 
				 
			
 
				 		fprintf(number_events_file, "# Use starpu_fxt_number_events_to_names.py to convert event keys to event names.\n");
			
 
				 
			
 
				-		for (int i = 0; i <= FUT_SETUP_CODE; i++)
			
 
				+		for (i = 0; i <= FUT_SETUP_CODE; i++)
			
 
				 		{
			
 
				 			if (number_events[i] > 0)
			
 
				 				fprintf(number_events_file, "0x%x\t%lu\n", i, number_events[i]);
			
--- a/src/util/starpu_data_cpy.c
+++ b/src/util/starpu_data_cpy.c
@@ -175,3 +175,40 @@ int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_ha
 
				 {
			
 
				 	return _starpu_data_cpy(dst_handle, src_handle, asynchronous, callback_func, callback_arg, 0, NULL);
			
 
				 }
			
 
				+
			
 
				+/* TODO: implement copy on write, and introduce starpu_data_dup as well */
			
 
				+int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle,
			
 
				+			int asynchronous, void (*callback_func)(void*), void *callback_arg)
			
 
				+{
			
 
				+	_starpu_spin_lock(&src_handle->header_lock);
			
 
				+	if (src_handle->readonly_dup) {
			
 
				+		/* Already a ro duplicate, just return it with one more ref */
			
 
				+		*dst_handle = src_handle->readonly_dup;
			
 
				+		_starpu_spin_unlock(&src_handle->header_lock);
			
 
				+		_starpu_spin_lock(&(*dst_handle)->header_lock);
			
 
				+		(*dst_handle)->aliases++;
			
 
				+		_starpu_spin_unlock(&(*dst_handle)->header_lock);
			
 
				+		if (callback_func)
			
 
				+			callback_func(callback_arg);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	if (src_handle->readonly) {
			
 
				+		src_handle->aliases++;
			
 
				+		_starpu_spin_unlock(&src_handle->header_lock);
			
 
				+		*dst_handle = src_handle;
			
 
				+		if (callback_func)
			
 
				+			callback_func(callback_arg);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	_starpu_spin_unlock(&src_handle->header_lock);
			
 
				+
			
 
				+	starpu_data_register_same(dst_handle, src_handle);
			
 
				+	_starpu_data_cpy(*dst_handle, src_handle, asynchronous, callback_func, callback_arg, 0, NULL);
			
 
				+	(*dst_handle)->readonly = 1;
			
 
				+
			
 
				+	_starpu_spin_lock(&src_handle->header_lock);
			
 
				+	src_handle->readonly_dup = (*dst_handle);
			
 
				+	_starpu_spin_unlock(&src_handle->header_lock);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -344,6 +344,7 @@ myPROGRAMS +=				\
 
				 	errorcheck/workers_cpuid		\
			
 
				 	fault-tolerance/retry			\
			
 
				 	helper/starpu_data_cpy			\
			
 
				+	helper/starpu_data_dup_ro		\
			
 
				 	helper/starpu_create_sync_task		\
			
 
				 	microbenchs/async_tasks_overhead	\
			
 
				 	microbenchs/sync_tasks_overhead		\
			
@@ -359,11 +360,11 @@ myPROGRAMS +=				\
 
				 	parallel_tasks/parallel_kernels_spmd	\
			
 
				 	parallel_tasks/spmd_peager		\
			
 
				 	parallel_tasks/cuda_only		\
			
 
				-	perfmodels/regression_based		\
			
 
				-	perfmodels/regression_based_01		\
			
 
				-	perfmodels/regression_based_02		\
			
 
				-	perfmodels/regression_based_03		\
			
 
				-	perfmodels/regression_based_04		\
			
 
				+	perfmodels/regression_based_memset	\
			
 
				+	perfmodels/regression_based_check	\
			
 
				+	perfmodels/regression_based_multiimpl	\
			
 
				+	perfmodels/regression_based_energy	\
			
 
				+	perfmodels/regression_based_gpu		\
			
 
				 	perfmodels/non_linear_regression_based	\
			
 
				 	perfmodels/feed				\
			
 
				 	perfmodels/user_base			\
			
@@ -652,28 +653,32 @@ main_insert_task_where_SOURCES +=		\
 
				 endif
			
 
				 
			
 
				 main_subgraph_repeat_SOURCES =		\
			
 
				-	main/subgraph_repeat.c
			
 
				+	main/subgraph_repeat.c		\
			
 
				+	main/increment_codelet.c
			
 
				 if STARPU_USE_CUDA
			
 
				 main_subgraph_repeat_SOURCES +=		\
			
 
				 	main/increment.cu
			
 
				 endif
			
 
				 
			
 
				 main_subgraph_repeat_tag_SOURCES =		\
			
 
				-	main/subgraph_repeat_tag.c
			
 
				+	main/subgraph_repeat_tag.c		\
			
 
				+	main/increment_codelet.c
			
 
				 if STARPU_USE_CUDA
			
 
				 main_subgraph_repeat_tag_SOURCES +=		\
			
 
				 	main/increment.cu
			
 
				 endif
			
 
				 
			
 
				 main_subgraph_repeat_regenerate_SOURCES =		\
			
 
				-	main/subgraph_repeat_regenerate.c
			
 
				+	main/subgraph_repeat_regenerate.c		\
			
 
				+	main/increment_codelet.c
			
 
				 if STARPU_USE_CUDA
			
 
				 main_subgraph_repeat_regenerate_SOURCES +=		\
			
 
				 	main/increment.cu
			
 
				 endif
			
 
				 
			
 
				 main_subgraph_repeat_regenerate_tag_SOURCES =		\
			
 
				-	main/subgraph_repeat_regenerate_tag.c
			
 
				+	main/subgraph_repeat_regenerate_tag.c		\
			
 
				+	main/increment_codelet.c
			
 
				 if STARPU_USE_CUDA
			
 
				 main_subgraph_repeat_regenerate_tag_SOURCES +=		\
			
 
				 	main/increment.cu
			
@@ -799,6 +804,14 @@ fortran90_init_01_SOURCES =	\
 
				 	fortran90/init_01.f90
			
 
				 endif
			
 
				 
			
 
				+helper_starpu_data_dup_ro_SOURCES =		\
			
 
				+	helper/starpu_data_dup_ro.c		\
			
 
				+	main/increment_codelet.c
			
 
				+if STARPU_USE_CUDA
			
 
				+helper_starpu_data_dup_ro_SOURCES +=		\
			
 
				+	main/increment.cu
			
 
				+endif
			
 
				+
			
 
				 ###################
			
 
				 # Block interface #
			
 
				 ###################
			
@@ -1006,20 +1019,20 @@ overlap_gpu_concurrency_SOURCES+=\
 
				 	overlap/long_kernel.cu
			
 
				 endif
			
 
				 
			
 
				-perfmodels_regression_based_SOURCES=\
			
 
				-	perfmodels/regression_based.c
			
 
				+perfmodels_regression_based_memset_SOURCES=\
			
 
				+	perfmodels/regression_based_memset.c
			
 
				 
			
 
				-perfmodels_regression_based_01_SOURCES=\
			
 
				-	perfmodels/regression_based_01.c
			
 
				+perfmodels_regression_based_check_SOURCES=\
			
 
				+	perfmodels/regression_based_check.c
			
 
				 
			
 
				-perfmodels_regression_based_02_SOURCES=\
			
 
				-	perfmodels/regression_based_02.c
			
 
				+perfmodels_regression_based_multiimpl_SOURCES=\
			
 
				+	perfmodels/regression_based_multiimpl.c
			
 
				 
			
 
				-perfmodels_regression_based_03_SOURCES=\
			
 
				-	perfmodels/regression_based_03.c
			
 
				+perfmodels_regression_based_energy_SOURCES=\
			
 
				+	perfmodels/regression_based_energy.c
			
 
				 
			
 
				-perfmodels_regression_based_04_SOURCES=\
			
 
				-	perfmodels/regression_based_04.c
			
 
				+perfmodels_regression_based_gpu_SOURCES=\
			
 
				+	perfmodels/regression_based_gpu.c
			
 
				 
			
 
				 perfmodels_max_fpga_SOURCES=\
			
 
				 	perfmodels/max_fpga.c
			
@@ -1027,10 +1040,10 @@ perfmodels_max_fpga_LDADD = $(LDADD) \
 
				 	$(srcdir)/perfmodels/slic_StreamFMA.o
			
 
				 
			
 
				 if STARPU_USE_OPENCL
			
 
				-perfmodels_regression_based_SOURCES+=\
			
 
				+perfmodels_regression_based_memset_SOURCES+=\
			
 
				 	perfmodels/opencl_memset.c
			
 
				 
			
 
				-perfmodels_regression_based_04_SOURCES+=\
			
 
				+perfmodels_regression_based_gpu_SOURCES+=\
			
 
				 	perfmodels/opencl_memset.c
			
 
				 
			
 
				 nobase_STARPU_OPENCL_DATA_DATA += \
			
--- a/tests/helper/starpu_data_dup_ro.c
+++ b/tests/helper/starpu_data_dup_ro.c
@@ -0,0 +1,107 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include "../main/increment_codelet.h"
			
 
				+#include "../helper.h"
			
 
				+
			
 
				+/*
			
 
				+ * Test starpu_data_dup_ro
			
 
				+ */
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	int ret;
			
 
				+	unsigned var1, *var;
			
 
				+	starpu_data_handle_t var1_handle, var2_handle, var3_handle, var4_handle, var5_handle;
			
 
				+
			
 
				+	ret = starpu_initialize(NULL, &argc, &argv);
			
 
				+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	var1 = 42;
			
 
				+
			
 
				+	starpu_variable_data_register(&var1_handle, STARPU_MAIN_RAM, (uintptr_t)&var1, sizeof(var1));
			
 
				+
			
 
				+	/* Make a duplicate of the original data */
			
 
				+	ret = starpu_data_dup_ro(&var2_handle, var1_handle, 1, NULL, NULL);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
			
 
				+
			
 
				+	/* Make a second duplicate of the original data */
			
 
				+	ret = starpu_data_dup_ro(&var3_handle, var1_handle, 1, NULL, NULL);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
			
 
				+	STARPU_ASSERT(var3_handle == var2_handle);
			
 
				+
			
 
				+	/* Make a duplicate of a duplicate */
			
 
				+	ret = starpu_data_dup_ro(&var4_handle, var2_handle, 1, NULL, NULL);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
			
 
				+	STARPU_ASSERT(var4_handle == var2_handle);
			
 
				+
			
 
				+	starpu_task_insert(&increment_codelet, STARPU_RW, var1_handle, 0);
			
 
				+
			
 
				+	/* Make a duplicate of the new value */
			
 
				+	ret = starpu_data_dup_ro(&var5_handle, var1_handle, 1, NULL, NULL);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
			
 
				+
			
 
				+	starpu_data_acquire(var2_handle, STARPU_R);
			
 
				+	var = starpu_data_get_local_ptr(var2_handle);
			
 
				+	ret = EXIT_SUCCESS;
			
 
				+	if (*var != 42)
			
 
				+	{
			
 
				+	     FPRINTF(stderr, "var2 is %d but it should be %d\n", *var, 42);
			
 
				+	     ret = EXIT_FAILURE;
			
 
				+	}
			
 
				+	starpu_data_release(var2_handle);
			
 
				+
			
 
				+	starpu_data_acquire(var3_handle, STARPU_R);
			
 
				+	var = starpu_data_get_local_ptr(var3_handle);
			
 
				+	ret = EXIT_SUCCESS;
			
 
				+	if (*var != 42)
			
 
				+	{
			
 
				+	     FPRINTF(stderr, "var3 is %d but it should be %d\n", *var, 42);
			
 
				+	     ret = EXIT_FAILURE;
			
 
				+	}
			
 
				+	starpu_data_release(var3_handle);
			
 
				+
			
 
				+	starpu_data_acquire(var4_handle, STARPU_R);
			
 
				+	var = starpu_data_get_local_ptr(var4_handle);
			
 
				+	ret = EXIT_SUCCESS;
			
 
				+	if (*var != 42)
			
 
				+	{
			
 
				+	     FPRINTF(stderr, "var4 is %d but it should be %d\n", *var, 42);
			
 
				+	     ret = EXIT_FAILURE;
			
 
				+	}
			
 
				+	starpu_data_release(var4_handle);
			
 
				+
			
 
				+	starpu_data_acquire(var5_handle, STARPU_R);
			
 
				+	var = starpu_data_get_local_ptr(var5_handle);
			
 
				+	ret = EXIT_SUCCESS;
			
 
				+	if (*var != 43)
			
 
				+	{
			
 
				+	     FPRINTF(stderr, "var5 is %d but it should be %d\n", *var, 43);
			
 
				+	     ret = EXIT_FAILURE;
			
 
				+	}
			
 
				+	starpu_data_release(var5_handle);
			
 
				+
			
 
				+	starpu_data_unregister(var1_handle);
			
 
				+	starpu_data_unregister(var2_handle);
			
 
				+	starpu_data_unregister(var3_handle);
			
 
				+	starpu_data_unregister(var4_handle);
			
 
				+	starpu_data_unregister(var5_handle);
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	STARPU_RETURN(ret);
			
 
				+}
			
--- a/tests/main/increment_codelet.c
+++ b/tests/main/increment_codelet.c
@@ -0,0 +1,40 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include "increment_codelet.h"
			
 
				+
			
 
				+void cpu_increment(void *descr[], void *arg)
			
 
				+{
			
 
				+	(void)arg;
			
 
				+	unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
			
 
				+	(*var)++;
			
 
				+}
			
 
				+
			
 
				+struct starpu_codelet increment_codelet =
			
 
				+{
			
 
				+	.cpu_funcs = {cpu_increment},
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_funcs = {cuda_host_increment},
			
 
				+	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+	// TODO
			
 
				+	//.opencl_funcs = {dummy_func},
			
 
				+	.cpu_funcs_name = {"cpu_increment"},
			
 
				+	.model = NULL,
			
 
				+	.modes = { STARPU_RW },
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+
			
--- a/tests/main/increment_codelet.h
+++ b/tests/main/increment_codelet.h
@@ -0,0 +1,21 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+extern void cuda_host_increment(void *descr[], void *_args);
			
 
				+extern void cpu_increment(void *descr[], void *arg);
			
 
				+extern struct starpu_codelet increment_codelet;
			
--- a/tests/main/subgraph_repeat.c
+++ b/tests/main/subgraph_repeat.c
@@ -17,6 +17,7 @@
 
				 #include <starpu.h>
			
 
				 #include <common/thread.h>
			
 
				 
			
 
				+#include "increment_codelet.h"
			
 
				 #include "../helper.h"
			
 
				 
			
 
				 /*
			
@@ -51,30 +52,6 @@ static unsigned *check_cnt;
 
				 static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER;
			
 
				 static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
			
 
				 
			
 
				-extern void cuda_host_increment(void *descr[], void *_args);
			
 
				-
			
 
				-void cpu_increment(void *descr[], void *arg)
			
 
				-{
			
 
				-	(void)arg;
			
 
				-	unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
			
 
				-	(*var)++;
			
 
				-}
			
 
				-
			
 
				-static struct starpu_codelet dummy_codelet =
			
 
				-{
			
 
				-	.cpu_funcs = {cpu_increment},
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	.cuda_funcs = {cuda_host_increment},
			
 
				-	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				-#endif
			
 
				-	// TODO
			
 
				-	//.opencl_funcs = {dummy_func},
			
 
				-	.cpu_funcs_name = {"cpu_increment"},
			
 
				-	.model = NULL,
			
 
				-	.modes = { STARPU_RW },
			
 
				-	.nbuffers = 1
			
 
				-};
			
 
				-
			
 
				 static void callback_task_D(void *arg)
			
 
				 {
			
 
				 	(void)arg;
			
@@ -120,19 +97,19 @@ int main(int argc, char **argv)
 
				 	starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt));
			
 
				 
			
 
				 	starpu_task_init(&taskA);
			
 
				-	taskA.cl = &dummy_codelet;
			
 
				+	taskA.cl = &increment_codelet;
			
 
				 	taskA.handles[0] = check_data;
			
 
				 
			
 
				 	starpu_task_init(&taskB);
			
 
				-	taskB.cl = &dummy_codelet;
			
 
				+	taskB.cl = &increment_codelet;
			
 
				 	taskB.handles[0] = check_data;
			
 
				 
			
 
				 	starpu_task_init(&taskC);
			
 
				-	taskC.cl = &dummy_codelet;
			
 
				+	taskC.cl = &increment_codelet;
			
 
				 	taskC.handles[0] = check_data;
			
 
				 
			
 
				 	starpu_task_init(&taskD);
			
 
				-	taskD.cl = &dummy_codelet;
			
 
				+	taskD.cl = &increment_codelet;
			
 
				 	taskD.callback_func = callback_task_D;
			
 
				 	taskD.handles[0] = check_data;
			
 
				 
			
--- a/tests/main/subgraph_repeat_regenerate.c
+++ b/tests/main/subgraph_repeat_regenerate.c
@@ -17,6 +17,7 @@
 
				 #include <starpu.h>
			
 
				 #include <common/thread.h>
			
 
				 
			
 
				+#include "increment_codelet.h"
			
 
				 #include "../helper.h"
			
 
				 
			
 
				 /*
			
@@ -55,28 +56,6 @@ static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
 
				 
			
 
				 extern void cuda_host_increment(void *descr[], void *_args);
			
 
				 
			
 
				-void cpu_increment(void *descr[], void *arg)
			
 
				-{
			
 
				-	(void)arg;
			
 
				-	unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
			
 
				-	(*var)++;
			
 
				-}
			
 
				-
			
 
				-static struct starpu_codelet dummy_codelet =
			
 
				-{
			
 
				-	.cpu_funcs = {cpu_increment},
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	.cuda_funcs = {cuda_host_increment},
			
 
				-	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				-#endif
			
 
				-	// TODO
			
 
				-	//.opencl_funcs = {dummy_func},
			
 
				-	.cpu_funcs_name = {"cpu_increment"},
			
 
				-	.model = NULL,
			
 
				-	.modes = { STARPU_RW },
			
 
				-	.nbuffers = 1
			
 
				-};
			
 
				-
			
 
				 static void callback_task_B(void *arg)
			
 
				 {
			
 
				 	(void)arg;
			
@@ -136,24 +115,24 @@ int main(int argc, char **argv)
 
				 	starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt));
			
 
				 
			
 
				 	starpu_task_init(&taskA);
			
 
				-	taskA.cl = &dummy_codelet;
			
 
				+	taskA.cl = &increment_codelet;
			
 
				 	taskA.regenerate = 0; /* this task will be explicitely resubmitted if needed */
			
 
				 	taskA.handles[0] = check_data;
			
 
				 
			
 
				 	starpu_task_init(&taskB);
			
 
				-	taskB.cl = &dummy_codelet;
			
 
				+	taskB.cl = &increment_codelet;
			
 
				 	taskB.callback_func = callback_task_B;
			
 
				 	taskB.regenerate = 1;
			
 
				 	taskB.handles[0] = check_data;
			
 
				 
			
 
				 	starpu_task_init(&taskC);
			
 
				-	taskC.cl = &dummy_codelet;
			
 
				+	taskC.cl = &increment_codelet;
			
 
				 	taskC.callback_func = callback_task_C;
			
 
				 	taskC.regenerate = 1;
			
 
				 	taskC.handles[0] = check_data;
			
 
				 
			
 
				 	starpu_task_init(&taskD);
			
 
				-	taskD.cl = &dummy_codelet;
			
 
				+	taskD.cl = &increment_codelet;
			
 
				 	taskD.callback_func = callback_task_D;
			
 
				 	taskD.regenerate = 1;
			
 
				 	taskD.handles[0] = check_data;
			
--- a/tests/main/subgraph_repeat_regenerate_tag.c
+++ b/tests/main/subgraph_repeat_regenerate_tag.c
@@ -17,6 +17,7 @@
 
				 #include <starpu.h>
			
 
				 #include <common/thread.h>
			
 
				 
			
 
				+#include "increment_codelet.h"
			
 
				 #include "../helper.h"
			
 
				 
			
 
				 /*
			
@@ -61,30 +62,6 @@ static unsigned *check_cnt;
 
				 static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER;
			
 
				 static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
			
 
				 
			
 
				-extern void cuda_host_increment(void *descr[], void *_args);
			
 
				-
			
 
				-void cpu_increment(void *descr[], void *arg)
			
 
				-{
			
 
				-	(void)arg;
			
 
				-	unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
			
 
				-	(*var)++;
			
 
				-}
			
 
				-
			
 
				-static struct starpu_codelet dummy_codelet =
			
 
				-{
			
 
				-	.cpu_funcs = {cpu_increment},
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	.cuda_funcs = {cuda_host_increment},
			
 
				-	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				-#endif
			
 
				-	// TODO
			
 
				-	//.opencl_funcs = {dummy_func},
			
 
				-	.cpu_funcs_name = {"cpu_increment"},
			
 
				-	.model = NULL,
			
 
				-	.modes = { STARPU_RW },
			
 
				-	.nbuffers = 1
			
 
				-};
			
 
				-
			
 
				 static void callback_task_A(void *arg)
			
 
				 {
			
 
				 	(void)arg;
			
@@ -169,7 +146,7 @@ int main(int argc, char **argv)
 
				 	starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt));
			
 
				 
			
 
				 	starpu_task_init(&taskA);
			
 
				-	taskA.cl = &dummy_codelet;
			
 
				+	taskA.cl = &increment_codelet;
			
 
				 	taskA.regenerate = 1; /* this task will be explicitely resubmitted if needed */
			
 
				 	taskA.use_tag = 1;
			
 
				 	taskA.tag_id = TAG_A;
			
@@ -177,7 +154,7 @@ int main(int argc, char **argv)
 
				 	taskA.handles[0] = check_data;
			
 
				 
			
 
				 	starpu_task_init(&taskB);
			
 
				-	taskB.cl = &dummy_codelet;
			
 
				+	taskB.cl = &increment_codelet;
			
 
				 	taskB.regenerate = 1;
			
 
				 	taskB.use_tag = 1;
			
 
				 	taskB.tag_id = TAG_B;
			
@@ -185,7 +162,7 @@ int main(int argc, char **argv)
 
				 	taskB.handles[0] = check_data;
			
 
				 
			
 
				 	starpu_task_init(&taskC);
			
 
				-	taskC.cl = &dummy_codelet;
			
 
				+	taskC.cl = &increment_codelet;
			
 
				 	taskC.regenerate = 1;
			
 
				 	taskC.use_tag = 1;
			
 
				 	taskC.tag_id = TAG_C;
			
@@ -193,7 +170,7 @@ int main(int argc, char **argv)
 
				 	taskC.handles[0] = check_data;
			
 
				 
			
 
				 	starpu_task_init(&taskD);
			
 
				-	taskD.cl = &dummy_codelet;
			
 
				+	taskD.cl = &increment_codelet;
			
 
				 	taskD.callback_func = callback_task_D;
			
 
				 	taskD.regenerate = 1;
			
 
				 	taskD.use_tag = 1;
			
--- a/tests/main/subgraph_repeat_tag.c
+++ b/tests/main/subgraph_repeat_tag.c
@@ -16,6 +16,7 @@
 
				 
			
 
				 #include <starpu.h>
			
 
				 
			
 
				+#include "increment_codelet.h"
			
 
				 #include "../helper.h"
			
 
				 
			
 
				 /*
			
@@ -52,30 +53,6 @@ static unsigned *check_cnt;
 
				 static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER;
			
 
				 static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
			
 
				 
			
 
				-extern void cuda_host_increment(void *descr[], void *_args);
			
 
				-
			
 
				-void cpu_increment(void *descr[], void *arg)
			
 
				-{
			
 
				-	(void)arg;
			
 
				-	unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
			
 
				-	(*var)++;
			
 
				-}
			
 
				-
			
 
				-static struct starpu_codelet dummy_codelet =
			
 
				-{
			
 
				-	.cpu_funcs = {cpu_increment},
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	.cuda_funcs = {cuda_host_increment},
			
 
				-	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				-#endif
			
 
				-	// TODO
			
 
				-	//.opencl_funcs = {dummy_func},
			
 
				-	.cpu_funcs_name = {"cpu_increment"},
			
 
				-	.model = NULL,
			
 
				-	.modes = { STARPU_RW },
			
 
				-	.nbuffers = 1
			
 
				-};
			
 
				-
			
 
				 static void callback_task_B(void *arg)
			
 
				 {
			
 
				 	(void)arg;
			
@@ -145,24 +122,24 @@ int main(int argc, char **argv)
 
				 	starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt));
			
 
				 
			
 
				 	starpu_task_init(&taskA);
			
 
				-	taskA.cl = &dummy_codelet;
			
 
				+	taskA.cl = &increment_codelet;
			
 
				 	taskA.regenerate = 0; /* this task will be explicitely resubmitted if needed */
			
 
				 	taskA.handles[0] = check_data;
			
 
				 
			
 
				 	starpu_task_init(&taskB);
			
 
				-	taskB.cl = &dummy_codelet;
			
 
				+	taskB.cl = &increment_codelet;
			
 
				 	taskB.regenerate = 1;
			
 
				 	taskB.callback_func = callback_task_B;
			
 
				 	taskB.handles[0] = check_data;
			
 
				 
			
 
				 	starpu_task_init(&taskC);
			
 
				-	taskC.cl = &dummy_codelet;
			
 
				+	taskC.cl = &increment_codelet;
			
 
				 	taskC.regenerate = 1;
			
 
				 	taskC.callback_func = callback_task_C;
			
 
				 	taskC.handles[0] = check_data;
			
 
				 
			
 
				 	starpu_task_init(&taskD);
			
 
				-	taskD.cl = &dummy_codelet;
			
 
				+	taskD.cl = &increment_codelet;
			
 
				 	taskD.callback_func = callback_task_D;
			
 
				 	taskD.regenerate = 1;
			
 
				 	taskD.handles[0] = check_data;
			
--- a/tests/perfmodels/regression_based_check.c
+++ b/tests/perfmodels/regression_based_check.c
--- a/tests/perfmodels/regression_based_energy.c
+++ b/tests/perfmodels/regression_based_energy.c
--- a/tests/perfmodels/regression_based_gpu.c
+++ b/tests/perfmodels/regression_based_gpu.c
--- a/tests/perfmodels/regression_based.c
+++ b/tests/perfmodels/regression_based.c
@@ -20,7 +20,7 @@
 
				 #include "../helper.h"
			
 
				 
			
 
				 /*
			
 
				- * Benchmark memset with a linear regression
			
 
				+ * Benchmark memset with a linear and non-linear regression
			
 
				  */
			
 
				 
			
 
				 #define STARTlin 1024
			
--- a/tests/perfmodels/regression_based_multiimpl.c
+++ b/tests/perfmodels/regression_based_multiimpl.c