Browse Source

Bug fix: properly determine whether it is possible to transfer data between two
memory nodes.

Cédric Augonnet 14 years ago
parent
commit
a1d45828d9
1 changed files with 35 additions and 22 deletions
  1. 35 22
      src/datawizard/coherency.c

+ 35 - 22
src/datawizard/coherency.c

@@ -107,8 +107,11 @@ void _starpu_update_data_state(starpu_data_handle handle,
 	}
 }
 
-static int worker_supports_direct_access(unsigned node)
+static int worker_supports_direct_access(unsigned node, unsigned handling_node)
 {
+	if (node == handling_node)
+		return 1;
+
 	int type = _starpu_get_node_kind(node);
 	switch (type)
 	{
@@ -116,7 +119,7 @@ static int worker_supports_direct_access(unsigned node)
 #ifdef HAVE_CUDA_MEMCPY_PEER
 			/* GPUs not always allow direct remote access: if CUDA4
 			 * is enabled, we allow two CUDA devices to communicate. */
-			return 1;
+			return (type_handling != STARPU_OPENCL_RAM);
 #else
 			/* Direct GPU-GPU transfers are not allowed in general */
 			return 0;
@@ -128,14 +131,8 @@ static int worker_supports_direct_access(unsigned node)
 	}
 }
 
-static int link_supports_direct_transfers(starpu_data_handle handle, unsigned src_node, unsigned dst_node)
+static int link_supports_direct_transfers(starpu_data_handle handle, unsigned src_node, unsigned dst_node, unsigned *handling_node)
 {
-	/* NB: when OpenCL and CUDA support peer transfers, we'll need to apply
-	 * a little more checking here! */
-
-	int valid_link = (worker_supports_direct_access(src_node)
-			&& worker_supports_direct_access(dst_node));
-
 	/* XXX That's a hack until we get cudaMemcpy3DPeerAsync to work !
 	 * Perhaps not all data interface provide a direct GPU-GPU transfer
 	 * method ! */
@@ -143,10 +140,24 @@ static int link_supports_direct_transfers(starpu_data_handle handle, unsigned sr
 	if (src_node != dst_node && _starpu_get_node_kind(src_node) == STARPU_CUDA_RAM && _starpu_get_node_kind(dst_node) == STARPU_CUDA_RAM)
 	{
 		const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
-		return (!!copy_methods->cuda_to_cuda_async && valid_link);
+		if (!copy_methods->cuda_to_cuda_async)
+			return 0;
 	}
 #endif
-	return valid_link;
+
+	if (worker_supports_direct_access(src_node, dst_node))
+	{
+		*handling_node = dst_node;
+		return 1;
+	}
+
+	if (worker_supports_direct_access(dst_node, src_node))
+	{
+		*handling_node = src_node;
+		return 1;
+	}
+
+	return 0;
 }
 
 /* Determines the path of a request : each hop is defined by (src,dst) and the
@@ -159,9 +170,20 @@ static int determine_request_path(starpu_data_handle handle,
 				unsigned *src_nodes, unsigned *dst_nodes,
 				unsigned *handling_nodes)
 {
-	int link_is_valid = link_supports_direct_transfers(handle, src_node, dst_node);
+	if (!(mode & STARPU_R))
+	{
+		/* The destination node should only allocate the data, no transfer is required */
+		STARPU_ASSERT(max_len >= 1);
+		src_nodes[0] = 0; // ignored
+		dst_nodes[0] = dst_node;
+		handling_nodes[0] = dst_node;
+		return 1;
+	}
 
-	if (!link_is_valid && (mode & STARPU_R)) {
+	unsigned handling_node;
+	int link_is_valid = link_supports_direct_transfers(handle, src_node, dst_node, &handling_node);
+
+	if (!link_is_valid) {
 		/* We need an intermediate hop to implement data staging
 		 * through main memory. */
 		STARPU_ASSERT(max_len >= 2);
@@ -181,17 +203,8 @@ static int determine_request_path(starpu_data_handle handle,
 		return 2;
 	}
 	else {
-		unsigned handling_node;
-		int src_supports_peer;
-
 		STARPU_ASSERT(max_len >= 1);
 		
-		/* If we do have to perform a transfer (mode & STARPU_R), but
-		 * the source does not support peer access, then we need to
-		 * rely on the source to perform the transfer. */
-		src_supports_peer = worker_supports_direct_access(src_node);
-		handling_node = ((mode & STARPU_R) && !src_supports_peer)?src_node:dst_node;
-
 		src_nodes[0] = src_node;
 		dst_nodes[0] = dst_node;
 		handling_nodes[0] = handling_node;