Browse Source

Optimize calling starpu_data_dup_ro on a read-only data

Samuel Thibault 4 years ago
parent
commit
f0fbd7cdbc

+ 2 - 1
include/starpu_helper.h

@@ -189,7 +189,8 @@ int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_ha
 /**
    Create a copy of \p src_handle, and return a new handle in \p dst_handle,
    which is to be used only for read accesses. This allows StarPU to optimize it
-   by not actually copying the data whenever possible.
+   by not actually copying the data whenever possible (e.g. it may possibly
+   simply return src_handle itself).
    The parameter \p asynchronous indicates whether the function should block
    or not. In the case of an asynchronous call, it is possible to synchronize
    with the termination of this operation either by the means of implicit

+ 2 - 0
src/core/dependencies/data_concurrency.c

@@ -142,6 +142,8 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 		_starpu_spin_lock(&handle->header_lock);
 	}
 
+	STARPU_ASSERT_MSG(!(handle->readonly && (mode & STARPU_W)), "Read-only handles can not be written to");
+
 	/* If we have a request that is not used for the reduction, and that a
 	 * reduction is pending, we put it at the end of normal list, and we
 	 * use the reduction_req_list instead */

+ 6 - 0
src/datawizard/coherency.h

@@ -191,6 +191,10 @@ struct _starpu_data_state
 	/** what is the default write-through mask for that data ? */
 	uint32_t wt_mask;
 
+	/** for a readonly handle, the number of times that we have returned again the
+	    same handle and thus the number of times we have to ignore unregistration requests */
+	unsigned aliases;
+
 	/** in some case, the application may explicitly tell StarPU that a
  	 * piece of data is not likely to be used soon again */
 	unsigned is_not_important:1;
@@ -199,6 +203,8 @@ struct _starpu_data_state
 	unsigned sequential_consistency:1;
 	/** Is the data initialized, or a task is already submitted to initialize it */
 	unsigned initialized:1;
+	/** Whether we shall not ever write to this handle, thus allowing various optimizations */
+	unsigned readonly:1;
 	/** Can the data be pushed to the disk? */
 	unsigned ooc:1;
 

+ 3 - 0
src/datawizard/filters.c

@@ -250,10 +250,13 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 		child->home_node = initial_handle->home_node;
 		child->wt_mask = initial_handle->wt_mask;
 
+		child->aliases = initial_handle->aliases;
+
 		child->is_not_important = initial_handle->is_not_important;
 
 		child->sequential_consistency = initial_handle->sequential_consistency;
 		child->initialized = initial_handle->initialized;
+		child->readonly = initial_handle->readonly;
 		child->ooc = initial_handle->ooc;
 
 		/* The methods used for reduction are propagated to the

+ 22 - 0
src/datawizard/interfaces/data_interface.c

@@ -275,11 +275,14 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 
 	handle->wt_mask = wt_mask;
 
+	//handle->aliases = 0;
+
 	//handle->is_not_important = 0;
 
 	handle->sequential_consistency =
 		starpu_data_get_default_sequential_consistency_flag();
 	handle->initialized = home_node != -1;
+	//handle->readonly = 0;
 	handle->ooc = 1;
 
 	/* By default, there are no methods available to perform a reduction */
@@ -453,9 +456,11 @@ int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_int
 
 	//handle->home_node
 	//handle->wt_mask
+	//handle->aliases = 0;
 	//handle->is_not_important
 	//handle->sequential_consistency
 	//handle->initialized
+	//handle->readonly
 	//handle->ooc
 	//handle->lazy_unregister = 0;
 	//handle->partition_automatic_disabled = 0;
@@ -788,6 +793,15 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 	/* TODO: also check that it has the latest coherency */
 	STARPU_ASSERT(!(nowait && handle->busy_count != 0));
 
+	_starpu_spin_lock(&handle->header_lock);
+	if (handle->aliases)
+	{
+		handle->aliases--;
+		_starpu_spin_unlock(&handle->header_lock);
+		return;
+	}
+        _starpu_spin_unlock(&handle->header_lock);
+
 	int sequential_consistency = handle->sequential_consistency;
 	if (sequential_consistency && !nowait)
 	{
@@ -1027,6 +1041,14 @@ void starpu_data_unregister_submit(starpu_data_handle_t handle)
 {
 	STARPU_ASSERT_MSG(handle->magic == 42, "data %p is invalid (was it already registered?)", handle);
 	STARPU_ASSERT_MSG(!handle->lazy_unregister, "data %p can not be unregistered twice", handle);
+	_starpu_spin_lock(&handle->header_lock);
+	if (handle->aliases)
+	{
+		handle->aliases--;
+		_starpu_spin_unlock(&handle->header_lock);
+		return;
+	}
+        _starpu_spin_unlock(&handle->header_lock);
 
 	/* Wait for all task dependencies on this handle before putting it for free */
 	starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, handle->initialized?STARPU_RW:STARPU_W, _starpu_data_unregister_submit_cb, handle);

+ 13 - 2
src/util/starpu_data_cpy.c

@@ -179,7 +179,18 @@ int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_ha
 int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle,
 			int asynchronous, void (*callback_func)(void*), void *callback_arg)
 {
-	/* TODO: optimize when src_handle is already a read-only handle or already has a read-only duplicate */
+	_starpu_spin_lock(&src_handle->header_lock);
+	if (src_handle->readonly) {
+		src_handle->aliases++;
+		_starpu_spin_unlock(&src_handle->header_lock);
+		*dst_handle = src_handle;
+		return 0;
+	}
+	_starpu_spin_unlock(&src_handle->header_lock);
+
+	/* TODO: optimize when src_handle already has a read-only duplicate */
 	starpu_data_register_same(dst_handle, src_handle);
-	return _starpu_data_cpy(*dst_handle, src_handle, asynchronous, callback_func, callback_arg, 0, NULL);
+	_starpu_data_cpy(*dst_handle, src_handle, asynchronous, callback_func, callback_arg, 0, NULL);
+	(*dst_handle)->readonly = 1;
+	return 0;
 }

+ 2 - 0
tests/helper/starpu_data_dup_ro.c

@@ -47,6 +47,8 @@ int main(int argc, char **argv)
 	ret = starpu_data_dup_ro(&var4_handle, var2_handle, 1, NULL, NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
 
+	STARPU_ASSERT(var4_handle == var2_handle);
+
 	starpu_data_acquire(var2_handle, STARPU_R);
 	var2 = starpu_data_get_local_ptr(var2_handle);
 	ret = EXIT_SUCCESS;