浏览代码

Reduce the memory footprint of the "requested" bitfield

Samuel Thibault 11 年之前
父节点
当前提交
4a9e46d095

+ 15 - 8
src/datawizard/coherency.c

@@ -150,7 +150,7 @@ void _starpu_update_data_state(starpu_data_handle_t handle,
 
 
 	/* the data is present now */
 	/* the data is present now */
 	unsigned requesting_node = requesting_replicate->memory_node;
 	unsigned requesting_node = requesting_replicate->memory_node;
-	requesting_replicate->requested[requesting_node] = 0;
+	requesting_replicate->requested &= ~(1UL << requesting_node);
 
 
 	if (mode & STARPU_W)
 	if (mode & STARPU_W)
 	{
 	{
@@ -655,18 +655,25 @@ void _starpu_release_data_on_node(starpu_data_handle_t handle, uint32_t default_
 		_starpu_spin_unlock(&handle->header_lock);
 		_starpu_spin_unlock(&handle->header_lock);
 }
 }
 
 
-static void _starpu_set_data_requested_flag_if_needed(struct _starpu_data_replicate *replicate)
+static void _starpu_set_data_requested_flag_if_needed(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate)
 {
 {
-// XXX : this is just a hint, so we don't take the lock ...
-//	_starpu_spin_lock(&handle->header_lock);
+	unsigned local_node = _starpu_memory_node_get_local_key();
+	int cpt = 0;
+	while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
+	{
+		cpt++;
+		_starpu_datawizard_progress(local_node, 1);
+	}
+	if (cpt == STARPU_SPIN_MAXTRY)
+		_starpu_spin_lock(&handle->header_lock);
 
 
 	if (replicate->state == STARPU_INVALID)
 	if (replicate->state == STARPU_INVALID)
 	{
 	{
 		unsigned dst_node = replicate->memory_node;
 		unsigned dst_node = replicate->memory_node;
-		replicate->requested[dst_node] = 1;
+		replicate->requested |= 1UL << dst_node;
 	}
 	}
 
 
-//	_starpu_spin_unlock(&handle->header_lock);
+	_starpu_spin_unlock(&handle->header_lock);
 }
 }
 
 
 int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node)
 int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node)
@@ -685,7 +692,7 @@ int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node)
 		struct _starpu_data_replicate *replicate = &handle->per_node[node];
 		struct _starpu_data_replicate *replicate = &handle->per_node[node];
 		prefetch_data_on_node(handle, replicate, mode);
 		prefetch_data_on_node(handle, replicate, mode);
 
 
-		_starpu_set_data_requested_flag_if_needed(replicate);
+		_starpu_set_data_requested_flag_if_needed(handle, replicate);
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -876,7 +883,7 @@ unsigned _starpu_is_data_present_or_requested(starpu_data_handle_t handle, unsig
 
 
 		for (i = 0; i < nnodes; i++)
 		for (i = 0; i < nnodes; i++)
 		{
 		{
-			if (handle->per_node[node].requested[i] || handle->per_node[node].request[i])
+			if ((handle->per_node[node].requested & (1UL << i)) || handle->per_node[node].request[i])
 				ret = 1;
 				ret = 1;
 		}
 		}
 
 

+ 1 - 1
src/datawizard/coherency.h

@@ -78,7 +78,7 @@ LIST_TYPE(_starpu_data_replicate,
 	   flag when it assigns a task to a queue, policies which do not
 	   flag when it assigns a task to a queue, policies which do not
 	   use this hint can simply ignore it.
 	   use this hint can simply ignore it.
 	 */
 	 */
-	uint8_t requested[STARPU_MAXNODES];
+	uint32_t requested;
 	struct _starpu_data_request *request[STARPU_MAXNODES];
 	struct _starpu_data_request *request[STARPU_MAXNODES];
 )
 )
 
 

+ 1 - 1
src/datawizard/filters.c

@@ -241,10 +241,10 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 			child_replicate->automatically_allocated = 0;
 			child_replicate->automatically_allocated = 0;
 			child_replicate->refcnt = 0;
 			child_replicate->refcnt = 0;
 			child_replicate->memory_node = starpu_worker_get_memory_node(worker);
 			child_replicate->memory_node = starpu_worker_get_memory_node(worker);
+			child_replicate->requested = 0;
 
 
 			for (node = 0; node < STARPU_MAXNODES; node++)
 			for (node = 0; node < STARPU_MAXNODES; node++)
 			{
 			{
-				child_replicate->requested[node] = 0;
 				child_replicate->request[node] = NULL;
 				child_replicate->request[node] = NULL;
 			}
 			}
 
 

+ 1 - 1
src/datawizard/interfaces/data_interface.c

@@ -269,10 +269,10 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 		replicate->state = STARPU_INVALID;
 		replicate->state = STARPU_INVALID;
 		replicate->refcnt = 0;
 		replicate->refcnt = 0;
 		replicate->handle = handle;
 		replicate->handle = handle;
+		replicate->requested = 0;
 
 
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 		{
-			replicate->requested[node] = 0;
 			replicate->request[node] = NULL;
 			replicate->request[node] = NULL;
 		}
 		}
 
 

+ 2 - 4
src/datawizard/user_interactions.c

@@ -519,9 +519,7 @@ void starpu_data_set_default_sequential_consistency_flag(unsigned flag)
 /* Query the status of the handle on the specified memory node. */
 /* Query the status of the handle on the specified memory node. */
 void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested)
 void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested)
 {
 {
-#ifdef STARPU_DEVEL
-#warning FIXME
-#endif
+// XXX : this is just a hint, so we don't take the lock ...
 //	_starpu_spin_lock(&handle->header_lock);
 //	_starpu_spin_lock(&handle->header_lock);
 
 
 	if (is_allocated)
 	if (is_allocated)
@@ -537,7 +535,7 @@ void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int
 		unsigned node;
 		unsigned node;
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 		{
-			if (handle->per_node[memory_node].requested[node])
+			if (handle->per_node[memory_node].requested & (1UL << node))
 			{
 			{
 				requested = 1;
 				requested = 1;
 				break;
 				break;