Browse Source

Optimize calling starpu_data_dup_ro on a read-only data

Samuel Thibault 4 years ago
parent
commit
f0fbd7cdbc

+ 2 - 1
include/starpu_helper.h

@@ -189,7 +189,8 @@ int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_ha
 /**
 /**
    Create a copy of \p src_handle, and return a new handle in \p dst_handle,
    Create a copy of \p src_handle, and return a new handle in \p dst_handle,
    which is to be used only for read accesses. This allows StarPU to optimize it
    which is to be used only for read accesses. This allows StarPU to optimize it
-   by not actually copying the data whenever possible.
+   by not actually copying the data whenever possible (e.g. it may possibly
+   simply return src_handle itself).
    The parameter \p asynchronous indicates whether the function should block
    The parameter \p asynchronous indicates whether the function should block
    or not. In the case of an asynchronous call, it is possible to synchronize
    or not. In the case of an asynchronous call, it is possible to synchronize
    with the termination of this operation either by the means of implicit
    with the termination of this operation either by the means of implicit

+ 2 - 0
src/core/dependencies/data_concurrency.c

@@ -142,6 +142,8 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 		_starpu_spin_lock(&handle->header_lock);
 		_starpu_spin_lock(&handle->header_lock);
 	}
 	}
 
 
+	STARPU_ASSERT_MSG(!(handle->readonly && (mode & STARPU_W)), "Read-only handles can not be written to");
+
 	/* If we have a request that is not used for the reduction, and that a
 	/* If we have a request that is not used for the reduction, and that a
 	 * reduction is pending, we put it at the end of normal list, and we
 	 * reduction is pending, we put it at the end of normal list, and we
 	 * use the reduction_req_list instead */
 	 * use the reduction_req_list instead */

+ 6 - 0
src/datawizard/coherency.h

@@ -191,6 +191,10 @@ struct _starpu_data_state
 	/** what is the default write-through mask for that data ? */
 	/** what is the default write-through mask for that data ? */
 	uint32_t wt_mask;
 	uint32_t wt_mask;
 
 
+	/** for a readonly handle, the number of times that we have returned again the
+	    same handle and thus the number of times we have to ignore unregistration requests */
+	unsigned aliases;
+
 	/** in some case, the application may explicitly tell StarPU that a
 	/** in some case, the application may explicitly tell StarPU that a
  	 * piece of data is not likely to be used soon again */
  	 * piece of data is not likely to be used soon again */
 	unsigned is_not_important:1;
 	unsigned is_not_important:1;
@@ -199,6 +203,8 @@ struct _starpu_data_state
 	unsigned sequential_consistency:1;
 	unsigned sequential_consistency:1;
 	/** Is the data initialized, or a task is already submitted to initialize it */
 	/** Is the data initialized, or a task is already submitted to initialize it */
 	unsigned initialized:1;
 	unsigned initialized:1;
+	/** Whether we shall not ever write to this handle, thus allowing various optimizations */
+	unsigned readonly:1;
 	/** Can the data be pushed to the disk? */
 	/** Can the data be pushed to the disk? */
 	unsigned ooc:1;
 	unsigned ooc:1;
 
 

+ 3 - 0
src/datawizard/filters.c

@@ -250,10 +250,13 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 		child->home_node = initial_handle->home_node;
 		child->home_node = initial_handle->home_node;
 		child->wt_mask = initial_handle->wt_mask;
 		child->wt_mask = initial_handle->wt_mask;
 
 
+		child->aliases = initial_handle->aliases;
+
 		child->is_not_important = initial_handle->is_not_important;
 		child->is_not_important = initial_handle->is_not_important;
 
 
 		child->sequential_consistency = initial_handle->sequential_consistency;
 		child->sequential_consistency = initial_handle->sequential_consistency;
 		child->initialized = initial_handle->initialized;
 		child->initialized = initial_handle->initialized;
+		child->readonly = initial_handle->readonly;
 		child->ooc = initial_handle->ooc;
 		child->ooc = initial_handle->ooc;
 
 
 		/* The methods used for reduction are propagated to the
 		/* The methods used for reduction are propagated to the

+ 22 - 0
src/datawizard/interfaces/data_interface.c

@@ -275,11 +275,14 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 
 
 	handle->wt_mask = wt_mask;
 	handle->wt_mask = wt_mask;
 
 
+	//handle->aliases = 0;
+
 	//handle->is_not_important = 0;
 	//handle->is_not_important = 0;
 
 
 	handle->sequential_consistency =
 	handle->sequential_consistency =
 		starpu_data_get_default_sequential_consistency_flag();
 		starpu_data_get_default_sequential_consistency_flag();
 	handle->initialized = home_node != -1;
 	handle->initialized = home_node != -1;
+	//handle->readonly = 0;
 	handle->ooc = 1;
 	handle->ooc = 1;
 
 
 	/* By default, there are no methods available to perform a reduction */
 	/* By default, there are no methods available to perform a reduction */
@@ -453,9 +456,11 @@ int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_int
 
 
 	//handle->home_node
 	//handle->home_node
 	//handle->wt_mask
 	//handle->wt_mask
+	//handle->aliases = 0;
 	//handle->is_not_important
 	//handle->is_not_important
 	//handle->sequential_consistency
 	//handle->sequential_consistency
 	//handle->initialized
 	//handle->initialized
+	//handle->readonly
 	//handle->ooc
 	//handle->ooc
 	//handle->lazy_unregister = 0;
 	//handle->lazy_unregister = 0;
 	//handle->partition_automatic_disabled = 0;
 	//handle->partition_automatic_disabled = 0;
@@ -788,6 +793,15 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 	/* TODO: also check that it has the latest coherency */
 	/* TODO: also check that it has the latest coherency */
 	STARPU_ASSERT(!(nowait && handle->busy_count != 0));
 	STARPU_ASSERT(!(nowait && handle->busy_count != 0));
 
 
+	_starpu_spin_lock(&handle->header_lock);
+	if (handle->aliases)
+	{
+		handle->aliases--;
+		_starpu_spin_unlock(&handle->header_lock);
+		return;
+	}
+        _starpu_spin_unlock(&handle->header_lock);
+
 	int sequential_consistency = handle->sequential_consistency;
 	int sequential_consistency = handle->sequential_consistency;
 	if (sequential_consistency && !nowait)
 	if (sequential_consistency && !nowait)
 	{
 	{
@@ -1027,6 +1041,14 @@ void starpu_data_unregister_submit(starpu_data_handle_t handle)
 {
 {
 	STARPU_ASSERT_MSG(handle->magic == 42, "data %p is invalid (was it already registered?)", handle);
 	STARPU_ASSERT_MSG(handle->magic == 42, "data %p is invalid (was it already registered?)", handle);
 	STARPU_ASSERT_MSG(!handle->lazy_unregister, "data %p can not be unregistered twice", handle);
 	STARPU_ASSERT_MSG(!handle->lazy_unregister, "data %p can not be unregistered twice", handle);
+	_starpu_spin_lock(&handle->header_lock);
+	if (handle->aliases)
+	{
+		handle->aliases--;
+		_starpu_spin_unlock(&handle->header_lock);
+		return;
+	}
+        _starpu_spin_unlock(&handle->header_lock);
 
 
 	/* Wait for all task dependencies on this handle before putting it for free */
 	/* Wait for all task dependencies on this handle before putting it for free */
 	starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, handle->initialized?STARPU_RW:STARPU_W, _starpu_data_unregister_submit_cb, handle);
 	starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, handle->initialized?STARPU_RW:STARPU_W, _starpu_data_unregister_submit_cb, handle);

+ 13 - 2
src/util/starpu_data_cpy.c

@@ -179,7 +179,18 @@ int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_ha
 int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle,
 int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle,
 			int asynchronous, void (*callback_func)(void*), void *callback_arg)
 			int asynchronous, void (*callback_func)(void*), void *callback_arg)
 {
 {
-	/* TODO: optimize when src_handle is already a read-only handle or already has a read-only duplicate */
+	_starpu_spin_lock(&src_handle->header_lock);
+	if (src_handle->readonly) {
+		src_handle->aliases++;
+		_starpu_spin_unlock(&src_handle->header_lock);
+		*dst_handle = src_handle;
+		return 0;
+	}
+	_starpu_spin_unlock(&src_handle->header_lock);
+
+	/* TODO: optimize when src_handle already has a read-only duplicate */
 	starpu_data_register_same(dst_handle, src_handle);
 	starpu_data_register_same(dst_handle, src_handle);
-	return _starpu_data_cpy(*dst_handle, src_handle, asynchronous, callback_func, callback_arg, 0, NULL);
+	_starpu_data_cpy(*dst_handle, src_handle, asynchronous, callback_func, callback_arg, 0, NULL);
+	(*dst_handle)->readonly = 1;
+	return 0;
 }
 }

+ 2 - 0
tests/helper/starpu_data_dup_ro.c

@@ -47,6 +47,8 @@ int main(int argc, char **argv)
 	ret = starpu_data_dup_ro(&var4_handle, var2_handle, 1, NULL, NULL);
 	ret = starpu_data_dup_ro(&var4_handle, var2_handle, 1, NULL, NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
 
 
+	STARPU_ASSERT(var4_handle == var2_handle);
+
 	starpu_data_acquire(var2_handle, STARPU_R);
 	starpu_data_acquire(var2_handle, STARPU_R);
 	var2 = starpu_data_get_local_ptr(var2_handle);
 	var2 = starpu_data_get_local_ptr(var2_handle);
 	ret = EXIT_SUCCESS;
 	ret = EXIT_SUCCESS;