Explorar o código

Add CUDA to CUDA support to starpu_cuda_copy_async_sync().

Cyril Roelandt %!s(int64=12) %!d(string=hai) anos
pai
achega
31de845f49
Modificáronse 1 ficheiros con 48 adicións e 5 borrados
  1. 48 5
      src/drivers/cuda/driver_cuda.c

+ 48 - 5
src/drivers/cuda/driver_cuda.c

@@ -507,21 +507,64 @@ void starpu_cuda_report_error(const char *func, const char *file, int line, cuda
 	STARPU_ABORT();
 }
 
-int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind)
+int
+starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node,
+			    void *dst_ptr, unsigned dst_node,
+			    size_t ssize, cudaStream_t stream,
+			    enum cudaMemcpyKind kind)
 {
+#ifdef HAVE_CUDA_MEMCPY_PEER
+	int peer_copy = 0;
+	int src_dev = -1, dst_dev = -1;
+#endif
 	cudaError_t cures = 0;
 
+	if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node)
+	{
+#ifdef HAVE_CUDA_MEMCPY_PEER
+		peer_copy = 1;
+		src_dev = _starpu_memory_node_to_devid(src_node);
+		dst_dev = _starpu_memory_node_to_devid(dst_node);
+#else
+		STARPU_ABORT();
+#endif
+	}
+
 	if (stream)
 	{
-	     _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
-	     cures = cudaMemcpyAsync((char *)dst_ptr, (char *)src_ptr, ssize, kind, stream);
-	     _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
+		_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
+#ifdef HAVE_CUDA_MEMCPY_PEER
+		if (peer_copy)
+		{
+			cures = cudaMemcpyPeerAsync((char *) dst_ptr, dst_dev,
+						    (char *) src_ptr, src_dev,
+						    ssize, stream);
+		}
+		else
+#endif
+		{
+			cures = cudaMemcpyAsync((char *)dst_ptr, (char *)src_ptr, ssize, kind, stream);
+		}
+		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
 	}
+
 	/* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */
 	if (stream == NULL || cures)
 	{
 		/* do it in a synchronous fashion */
-		cures = cudaMemcpy((char *)dst_ptr, (char *)src_ptr, ssize, kind);
+#ifdef HAVE_CUDA_MEMCPY_PEER
+		if (peer_copy)
+		{
+			cures = cudaMemcpyPeer((char *) dst_ptr, dst_dev,
+					       (char *) src_ptr, src_dev,
+					       ssize);
+		}
+		else
+#endif
+		{
+			cures = cudaMemcpy((char *)dst_ptr, (char *)src_ptr, ssize, kind);
+		}
+		
 
 		if (STARPU_UNLIKELY(cures))
 			STARPU_CUDA_REPORT_ERROR(cures);