14 years ago · a1d45828d9
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -107,8 +107,11 @@ void _starpu_update_data_state(starpu_data_handle handle,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int worker_supports_direct_access(unsigned node)
			
 
				+static int worker_supports_direct_access(unsigned node, unsigned handling_node)
			
 
				 {
			
 
				+	if (node == handling_node)
			
 
				+		return 1;
			
 
				+
			
 
				 	int type = _starpu_get_node_kind(node);
			
 
				 	switch (type)
			
 
				 	{
			
@@ -116,7 +119,7 @@ static int worker_supports_direct_access(unsigned node)
 
				 #ifdef HAVE_CUDA_MEMCPY_PEER
			
 
				 			/* GPUs not always allow direct remote access: if CUDA4
			
 
				 			 * is enabled, we allow two CUDA devices to communicate. */
			
 
				-			return 1;
			
 
				+			return (type_handling != STARPU_OPENCL_RAM);
			
 
				 #else
			
 
				 			/* Direct GPU-GPU transfers are not allowed in general */
			
 
				 			return 0;
			
@@ -128,14 +131,8 @@ static int worker_supports_direct_access(unsigned node)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int link_supports_direct_transfers(starpu_data_handle handle, unsigned src_node, unsigned dst_node)
			
 
				+static int link_supports_direct_transfers(starpu_data_handle handle, unsigned src_node, unsigned dst_node, unsigned *handling_node)
			
 
				 {
			
 
				-	/* NB: when OpenCL and CUDA support peer transfers, we'll need to apply
			
 
				-	 * a little more checking here! */
			
 
				-
			
 
				-	int valid_link = (worker_supports_direct_access(src_node)
			
 
				-			&& worker_supports_direct_access(dst_node));
			
 
				-
			
 
				 	/* XXX That's a hack until we get cudaMemcpy3DPeerAsync to work !
			
 
				 	 * Perhaps not all data interface provide a direct GPU-GPU transfer
			
 
				 	 * method ! */
			
@@ -143,10 +140,24 @@ static int link_supports_direct_transfers(starpu_data_handle handle, unsigned sr
 
				 	if (src_node != dst_node && _starpu_get_node_kind(src_node) == STARPU_CUDA_RAM && _starpu_get_node_kind(dst_node) == STARPU_CUDA_RAM)
			
 
				 	{
			
 
				 		const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				-		return (!!copy_methods->cuda_to_cuda_async && valid_link);
			
 
				+		if (!copy_methods->cuda_to_cuda_async)
			
 
				+			return 0;
			
 
				 	}
			
 
				 #endif
			
 
				-	return valid_link;
			
 
				+
			
 
				+	if (worker_supports_direct_access(src_node, dst_node))
			
 
				+	{
			
 
				+		*handling_node = dst_node;
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	if (worker_supports_direct_access(dst_node, src_node))
			
 
				+	{
			
 
				+		*handling_node = src_node;
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /* Determines the path of a request : each hop is defined by (src,dst) and the
			
@@ -159,9 +170,20 @@ static int determine_request_path(starpu_data_handle handle,
 
				 				unsigned *src_nodes, unsigned *dst_nodes,
			
 
				 				unsigned *handling_nodes)
			
 
				 {
			
 
				-	int link_is_valid = link_supports_direct_transfers(handle, src_node, dst_node);
			
 
				+	if (!(mode & STARPU_R))
			
 
				+	{
			
 
				+		/* The destination node should only allocate the data, no transfer is required */
			
 
				+		STARPU_ASSERT(max_len >= 1);
			
 
				+		src_nodes[0] = 0; // ignored
			
 
				+		dst_nodes[0] = dst_node;
			
 
				+		handling_nodes[0] = dst_node;
			
 
				+		return 1;
			
 
				+	}
			
 
				 
			
 
				-	if (!link_is_valid && (mode & STARPU_R)) {
			
 
				+	unsigned handling_node;
			
 
				+	int link_is_valid = link_supports_direct_transfers(handle, src_node, dst_node, &handling_node);
			
 
				+
			
 
				+	if (!link_is_valid) {
			
 
				 		/* We need an intermediate hop to implement data staging
			
 
				 		 * through main memory. */
			
 
				 		STARPU_ASSERT(max_len >= 2);
			
@@ -181,17 +203,8 @@ static int determine_request_path(starpu_data_handle handle,
 
				 		return 2;
			
 
				 	}
			
 
				 	else {
			
 
				-		unsigned handling_node;
			
 
				-		int src_supports_peer;
			
 
				-
			
 
				 		STARPU_ASSERT(max_len >= 1);
			
 
				 		
			
 
				-		/* If we do have to perform a transfer (mode & STARPU_R), but
			
 
				-		 * the source does not support peer access, then we need to
			
 
				-		 * rely on the source to perform the transfer. */
			
 
				-		src_supports_peer = worker_supports_direct_access(src_node);
			
 
				-		handling_node = ((mode & STARPU_R) && !src_supports_peer)?src_node:dst_node;
			
 
				-
			
 
				 		src_nodes[0] = src_node;
			
 
				 		dst_nodes[0] = dst_node;
			
 
				 		handling_nodes[0] = handling_node;