10 years ago · ef61dfaf6a
--- a/doc/doxygen/chapters/api/data_interfaces.doxy
+++ b/doc/doxygen/chapters/api/data_interfaces.doxy
@@ -57,8 +57,9 @@ case of e.g. available particular CUDA or OpenCL support.
 
				 \ingroup API_Data_Interfaces
			
 
				 \var starpu_data_copy_methods::can_copy
			
 
				 If defined, allows the interface to declare whether it supports transferring
			
 
				-from \p src_interface on node \p src_node to \p dst_interface on node \p. If not
			
 
				-defined, it is assumed that the interface supports all transfers.
			
 
				+from \p src_interface on node \p src_node to \p dst_interface on node \p
			
 
				+dst_node, run from node \p handling_node. If not defined, it is assumed that the
			
 
				+interface supports all transfers.
			
 
				 \var starpu_data_copy_methods::ram_to_ram
			
 
				 Define how to copy data from the \p src_interface interface on the \p
			
 
				 src_node CPU node to the \p dst_interface interface on the \p dst_node
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -74,15 +74,6 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 
				 				double time = starpu_transfer_predict(i, destination, size);
			
 
				 				unsigned handling_node;
			
 
				 
			
 
				-				/* Avoid transfers which the interface does not want */
			
 
				-				if (copy_methods->can_copy)
			
 
				-				{
			
 
				-					void *src_interface = handle->per_node[i].data_interface;
			
 
				-					void *dst_interface = handle->per_node[destination].data_interface;
			
 
				-					if (!copy_methods->can_copy(src_interface, i, dst_interface, destination))
			
 
				-						continue;
			
 
				-				}
			
 
				-
			
 
				 				/* Avoid indirect transfers */
			
 
				 				if (!link_supports_direct_transfers(handle, i, destination, &handling_node))
			
 
				 					continue;
			
@@ -115,22 +106,22 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 
				 		
			
 
				 		if (src_node_mask & (1<<i))
			
 
				 		{
			
 
				+			int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node) = handle->ops->copy_methods->can_copy;
			
 
				 			/* Avoid transfers which the interface does not want */
			
 
				-			if (copy_methods->can_copy)
			
 
				+			if (can_copy)
			
 
				 			{
			
 
				 				void *src_interface = handle->per_node[i].data_interface;
			
 
				 				void *dst_interface = handle->per_node[destination].data_interface;
			
 
				 				unsigned handling_node;
			
 
				 
			
 
				-				if (!copy_methods->can_copy(src_interface, i, dst_interface, destination))
			
 
				-					continue;
			
 
				-
			
 
				 				if (!link_supports_direct_transfers(handle, i, destination, &handling_node))
			
 
				 				{
			
 
				 					/* Avoid through RAM if the interface does not want it */
			
 
				 					void *ram_interface = handle->per_node[STARPU_MAIN_RAM].data_interface;
			
 
				-					if (!copy_methods->can_copy(src_interface, i, ram_interface, STARPU_MAIN_RAM)
			
 
				-					 || !copy_methods->can_copy(ram_interface, STARPU_MAIN_RAM, dst_interface, destination))
			
 
				+					if ((!can_copy(src_interface, i, ram_interface, STARPU_MAIN_RAM, i)
			
 
				+					  && !can_copy(src_interface, i, ram_interface, STARPU_MAIN_RAM, STARPU_MAIN_RAM))
			
 
				+					 || (!can_copy(ram_interface, STARPU_MAIN_RAM, dst_interface, destination, STARPU_MAIN_RAM)
			
 
				+					  && !can_copy(ram_interface, STARPU_MAIN_RAM, dst_interface, destination, destination)))
			
 
				 						continue;
			
 
				 				}
			
 
				 			}
			
@@ -251,7 +242,9 @@ static int worker_supports_direct_access(unsigned node, unsigned handling_node)
 
				 
			
 
				 static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node)
			
 
				 {
			
 
				-	(void) handle; // unused
			
 
				+	int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node) = handle->ops->copy_methods->can_copy;
			
 
				+	void *src_interface = handle->per_node[src_node].data_interface;
			
 
				+	void *dst_interface = handle->per_node[dst_node].data_interface;
			
 
				 
			
 
				 	/* XXX That's a hack until we fix cudaMemcpy3DPeerAsync in the block interface
			
 
				 	 * Perhaps not all data interface provide a direct GPU-GPU transfer
			
@@ -266,13 +259,13 @@ static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned
 
				 #endif
			
 
				 
			
 
				 	/* Note: with CUDA, performance seems a bit better when issuing the transfer from the destination (tested without GPUDirect, but GPUDirect probably behave the same) */
			
 
				-	if (worker_supports_direct_access(src_node, dst_node))
			
 
				+	if (worker_supports_direct_access(src_node, dst_node) && (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, dst_node)))
			
 
				 	{
			
 
				 		*handling_node = dst_node;
			
 
				 		return 1;
			
 
				 	}
			
 
				 
			
 
				-	if (worker_supports_direct_access(dst_node, src_node))
			
 
				+	if (worker_supports_direct_access(dst_node, src_node) && (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, src_node)))
			
 
				 	{
			
 
				 		*handling_node = src_node;
			
 
				 		return 1;
			
@@ -319,6 +312,10 @@ static int determine_request_path(starpu_data_handle_t handle,
 
				 
			
 
				 	if (!link_is_valid)
			
 
				 	{
			
 
				+		int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node) = handle->ops->copy_methods->can_copy;
			
 
				+		void *src_interface = handle->per_node[src_node].data_interface;
			
 
				+		void *dst_interface = handle->per_node[dst_node].data_interface;
			
 
				+
			
 
				 		/* We need an intermediate hop to implement data staging
			
 
				 		 * through main memory. */
			
 
				 		STARPU_ASSERT(max_len >= 2);
			
@@ -326,12 +323,36 @@ static int determine_request_path(starpu_data_handle_t handle,
 
				 		/* GPU -> RAM */
			
 
				 		src_nodes[0] = src_node;
			
 
				 		dst_nodes[0] = STARPU_MAIN_RAM;
			
 
				-		handling_nodes[0] = starpu_node_get_kind(src_node) == STARPU_DISK_RAM ? dst_node : src_node;
			
 
				+
			
 
				+		if (starpu_node_get_kind(src_node) == STARPU_DISK_RAM)
			
 
				+			/* Disks don't have their own driver thread */
			
 
				+			handling_nodes[0] = dst_node;
			
 
				+		else if (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, src_node))
			
 
				+		{
			
 
				+			handling_nodes[0] = src_node;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			STARPU_ASSERT_MSG(can_copy(src_interface, src_node, dst_interface, dst_node, dst_node), "interface %d refuses all kinds of transfers from node %u to node %u\n", handle->ops->interfaceid, src_node, dst_node);
			
 
				+			handling_nodes[0] = dst_node;
			
 
				+		}
			
 
				 
			
 
				 		/* RAM -> GPU */
			
 
				 		src_nodes[1] = STARPU_MAIN_RAM;
			
 
				 		dst_nodes[1] = dst_node;
			
 
				-		handling_nodes[1] = starpu_node_get_kind(dst_node) == STARPU_DISK_RAM ? src_node : dst_node;
			
 
				+
			
 
				+		if (starpu_node_get_kind(dst_node) == STARPU_DISK_RAM)
			
 
				+			/* Disks don't have their own driver thread */
			
 
				+			handling_nodes[1] = src_node;
			
 
				+		else if (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, dst_node))
			
 
				+		{
			
 
				+			handling_nodes[1] = dst_node;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			STARPU_ASSERT_MSG(can_copy(src_interface, src_node, dst_interface, dst_node, src_node), "interface %d refuses all kinds of transfers from node %u to node %u\n", handle->ops->interfaceid, src_node, dst_node);
			
 
				+			handling_nodes[1] = src_node;
			
 
				+		}
			
 
				 
			
 
				 		return 2;
			
 
				 	}