hace 14 años · bfe3303c4b
--- a/src/datawizard/copy_driver.c
+++ b/src/datawizard/copy_driver.c
@@ -81,28 +81,31 @@ void starpu_wake_all_blocked_workers(void)
 
				 static unsigned communication_cnt = 0;
			
 
				 #endif
			
 
				 
			
 
				-static int copy_data_1_to_1_generic(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, struct starpu_data_request_s *req __attribute__((unused)))
			
 
				+static int copy_data_1_to_1_generic(starpu_data_handle handle, struct starpu_data_replicate_s *src_replicate, struct starpu_data_replicate_s *dst_replicate, struct starpu_data_request_s *req __attribute__((unused)))
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				 
			
 
				+	unsigned src_node = src_replicate->memory_node;
			
 
				+	unsigned dst_node = dst_replicate->memory_node;
			
 
				+
			
 
				 	starpu_node_kind src_kind = _starpu_get_node_kind(src_node);
			
 
				 	starpu_node_kind dst_kind = _starpu_get_node_kind(dst_node);
			
 
				 
			
 
				-	STARPU_ASSERT(handle->per_node[src_node].refcnt);
			
 
				-	STARPU_ASSERT(handle->per_node[dst_node].refcnt);
			
 
				+	STARPU_ASSERT(src_replicate->refcnt);
			
 
				+	STARPU_ASSERT(dst_replicate->refcnt);
			
 
				 
			
 
				-	STARPU_ASSERT(handle->per_node[src_node].allocated);
			
 
				-	STARPU_ASSERT(handle->per_node[dst_node].allocated);
			
 
				+	STARPU_ASSERT(src_replicate->allocated);
			
 
				+	STARPU_ASSERT(dst_replicate->allocated);
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	cudaError_t cures;
			
 
				 	cudaStream_t *stream;
			
 
				 #endif
			
 
				 
			
 
				-	void *src_interface = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	void *dst_interface = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	void *src_interface = src_replicate->interface;
			
 
				+	void *dst_interface = dst_replicate->interface;
			
 
				 
			
 
				 	switch (_STARPU_MEMORY_NODE_TUPLE(src_kind,dst_kind)) {
			
 
				 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CPU_RAM):
			
@@ -197,25 +200,32 @@ static int copy_data_1_to_1_generic(starpu_data_handle handle, uint32_t src_node
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int __attribute__((warn_unused_result)) _starpu_driver_copy_data_1_to_1(starpu_data_handle handle, uint32_t src_node, 
			
 
				-		uint32_t dst_node, unsigned donotread, struct starpu_data_request_s *req, unsigned may_alloc)
			
 
				+int __attribute__((warn_unused_result)) _starpu_driver_copy_data_1_to_1(starpu_data_handle handle,
			
 
				+						struct starpu_data_replicate_s *src_replicate,
			
 
				+						struct starpu_data_replicate_s *dst_replicate,
			
 
				+						unsigned donotread,
			
 
				+						struct starpu_data_request_s *req,
			
 
				+						unsigned may_alloc)
			
 
				 {
			
 
				 	if (!donotread)
			
 
				 	{
			
 
				-		STARPU_ASSERT(handle->per_node[src_node].allocated);
			
 
				-		STARPU_ASSERT(handle->per_node[src_node].refcnt);
			
 
				+		STARPU_ASSERT(src_replicate->allocated);
			
 
				+		STARPU_ASSERT(src_replicate->refcnt);
			
 
				 	}
			
 
				 
			
 
				 	int ret_alloc, ret_copy;
			
 
				 	unsigned __attribute__((unused)) com_id = 0;
			
 
				 
			
 
				+	unsigned src_node = src_replicate->memory_node;
			
 
				+	unsigned dst_node = dst_replicate->memory_node;
			
 
				+
			
 
				 	/* first make sure the destination has an allocated buffer */
			
 
				 	ret_alloc = _starpu_allocate_memory_on_node(handle, dst_node, may_alloc);
			
 
				 	if (ret_alloc)
			
 
				 		goto nomem;
			
 
				 
			
 
				-	STARPU_ASSERT(handle->per_node[dst_node].allocated);
			
 
				-	STARPU_ASSERT(handle->per_node[dst_node].refcnt);
			
 
				+	STARPU_ASSERT(dst_replicate->allocated);
			
 
				+	STARPU_ASSERT(dst_replicate->refcnt);
			
 
				 
			
 
				 	/* if there is no need to actually read the data, 
			
 
				 	 * we do not perform any transfer */
			
@@ -235,7 +245,7 @@ int __attribute__((warn_unused_result)) _starpu_driver_copy_data_1_to_1(starpu_d
 
				 
			
 
				 		/* for now we set the size to 0 in the FxT trace XXX */
			
 
				 		STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, 0, com_id);
			
 
				-		ret_copy = copy_data_1_to_1_generic(handle, src_node, dst_node, req);
			
 
				+		ret_copy = copy_data_1_to_1_generic(handle, src_replicate, dst_replicate, req);
			
 
				 
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 		if (ret_copy != -EAGAIN)
			
--- a/src/datawizard/copy_driver.h
+++ b/src/datawizard/copy_driver.h
@@ -33,6 +33,7 @@
 
				 #endif
			
 
				 
			
 
				 struct starpu_data_request_s;
			
 
				+struct starpu_data_replicate_s;
			
 
				 
			
 
				 /* this is a structure that can be queried to see whether an asynchronous
			
 
				  * transfer has terminated or not */
			
@@ -48,9 +49,12 @@ typedef union {
 
				 
			
 
				 void _starpu_wake_all_blocked_workers_on_node(unsigned nodeid);
			
 
				 
			
 
				-__attribute__((warn_unused_result))
			
 
				-int _starpu_driver_copy_data_1_to_1(starpu_data_handle handle, uint32_t node, 
			
 
				-		uint32_t requesting_node, unsigned donotread, struct starpu_data_request_s *req, unsigned may_allloc);
			
 
				+int _starpu_driver_copy_data_1_to_1(starpu_data_handle handle,
			
 
				+					struct starpu_data_replicate_s *src_replicate,
			
 
				+					struct starpu_data_replicate_s *dst_replicate,
			
 
				+					unsigned donotread,
			
 
				+					struct starpu_data_request_s *req,
			
 
				+					unsigned may_alloc);
			
 
				 
			
 
				 unsigned _starpu_driver_test_request_completion(starpu_async_channel *async_channel, unsigned handling_node);
			
 
				 void _starpu_driver_wait_request_completion(starpu_async_channel *async_channel, unsigned handling_node);
			
--- a/src/datawizard/data_request.c
+++ b/src/datawizard/data_request.c
@@ -299,8 +299,8 @@ static int starpu_handle_data_request(starpu_data_request_t r, unsigned may_allo
 
				 
			
 
				 	/* perform the transfer */
			
 
				 	/* the header of the data must be locked by the worker that submitted the request */
			
 
				-	r->retval = _starpu_driver_copy_data_1_to_1(handle, r->src_replicate->memory_node,
			
 
				-			r->dst_replicate->memory_node, !(r->mode & STARPU_R), r, may_alloc);
			
 
				+	r->retval = _starpu_driver_copy_data_1_to_1(handle, r->src_replicate,
			
 
				+			r->dst_replicate, !(r->mode & STARPU_R), r, may_alloc);
			
 
				 
			
 
				 	if (r->retval == -ENOMEM)
			
 
				 	{
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -131,29 +131,32 @@ static void transfer_subtree_to_node(starpu_data_handle handle, unsigned src_nod
 
				 
			
 
				 	if (handle->nchildren == 0)
			
 
				 	{
			
 
				+		struct starpu_data_replicate_s *src_replicate = &handle->per_node[src_node];
			
 
				+		struct starpu_data_replicate_s *dst_replicate = &handle->per_node[dst_node];
			
 
				+
			
 
				 		/* this is a leaf */
			
 
				-		switch(handle->per_node[src_node].state) {
			
 
				+		switch(src_replicate->state) {
			
 
				 		case STARPU_OWNER:
			
 
				 			/* the local node has the only copy */
			
 
				 			/* the owner is now the destination_node */
			
 
				-			handle->per_node[src_node].state = STARPU_INVALID;
			
 
				-			handle->per_node[dst_node].state = STARPU_OWNER;
			
 
				+			src_replicate->state = STARPU_INVALID;
			
 
				+			dst_replicate->state = STARPU_OWNER;
			
 
				 
			
 
				 #warning we should use requests during memory reclaim
			
 
				 			/* TODO use request !! */
			
 
				-			handle->per_node[src_node].refcnt++;
			
 
				-			handle->per_node[dst_node].refcnt++;
			
 
				+			src_replicate->refcnt++;
			
 
				+			dst_replicate->refcnt++;
			
 
				 
			
 
				-			ret = _starpu_driver_copy_data_1_to_1(handle, src_node, dst_node, 0, NULL, 1);
			
 
				+			ret = _starpu_driver_copy_data_1_to_1(handle, &handle->per_node[src_node], &handle->per_node[dst_node], 0, NULL, 1);
			
 
				 			STARPU_ASSERT(ret == 0);
			
 
				 
			
 
				-			handle->per_node[src_node].refcnt--;
			
 
				-			handle->per_node[dst_node].refcnt--;
			
 
				+			src_replicate->refcnt--;
			
 
				+			dst_replicate->refcnt--;
			
 
				 
			
 
				 			break;
			
 
				 		case STARPU_SHARED:
			
 
				 			/* some other node may have the copy */
			
 
				-			handle->per_node[src_node].state = STARPU_INVALID;
			
 
				+			src_replicate->state = STARPU_INVALID;
			
 
				 
			
 
				 			/* count the number of copies */
			
 
				 			cnt = 0;