Browse Source

Add STARPU_DISABLE_CUDA_GPU_GPU_DIRECT to disable GPU-Direct

Samuel Thibault 13 years ago
parent
commit
6b446a9ab8
3 changed files with 30 additions and 20 deletions
  1. 4 0
      doc/chapters/configuration.texi
  2. 14 10
      src/core/perfmodel/perfmodel_bus.c
  3. 12 10
      src/drivers/cuda/driver_cuda.c

+ 4 - 0
doc/chapters/configuration.texi

@@ -342,6 +342,10 @@ The AMD implementation of OpenCL is known to
 fail when copying data asynchronously. When using this implementation,
 fail when copying data asynchronously. When using this implementation,
 it is therefore necessary to disable asynchronous data transfers.
 it is therefore necessary to disable asynchronous data transfers.
 
 
+@item @code{STARPU_DISABLE_CUDA_GPU_GPU_DIRECT}
+Disable direct CUDA transfers from GPU to GPU, and let CUDA copy through RAM
+instead. This permits to test the performance effect of GPU-Direct.
+
 @end table
 @end table
 
 
 @node Scheduling
 @node Scheduling

+ 14 - 10
src/core/perfmodel/perfmodel_bus.c

@@ -191,11 +191,13 @@ static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst)
 	/* Initialize CUDA context on the source */
 	/* Initialize CUDA context on the source */
 	cudaSetDevice(src);
 	cudaSetDevice(src);
 
 
-	cures = cudaDeviceCanAccessPeer(&can, src, dst);
-	if (!cures && can) {
-		cures = cudaDeviceEnablePeerAccess(dst, 0);
-		if (!cures)
-			_STARPU_DISP("GPU-Direct %d -> %d\n", dst, src);
+	if (starpu_get_env_number("STARPU_DISABLE_CUDA_GPU_GPU_DIRECT") > 0) {
+		cures = cudaDeviceCanAccessPeer(&can, src, dst);
+		if (!cures && can) {
+			cures = cudaDeviceEnablePeerAccess(dst, 0);
+			if (!cures)
+				_STARPU_DISP("GPU-Direct %d -> %d\n", dst, src);
+		}
 	}
 	}
 
 
 	/* Allocate a buffer on the device */
 	/* Allocate a buffer on the device */
@@ -207,11 +209,13 @@ static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst)
 	/* Initialize CUDA context on the destination */
 	/* Initialize CUDA context on the destination */
 	cudaSetDevice(dst);
 	cudaSetDevice(dst);
 
 
-	cures = cudaDeviceCanAccessPeer(&can, dst, src);
-	if (!cures && can) {
-		cures = cudaDeviceEnablePeerAccess(src, 0);
-		if (!cures)
-			_STARPU_DISP("GPU-Direct %d -> %d\n", src, dst);
+	if (starpu_get_env_number("STARPU_DISABLE_CUDA_GPU_GPU_DIRECT") > 0) {
+		cures = cudaDeviceCanAccessPeer(&can, dst, src);
+		if (!cures && can) {
+			cures = cudaDeviceEnablePeerAccess(src, 0);
+			if (!cures)
+				_STARPU_DISP("GPU-Direct %d -> %d\n", src, dst);
+		}
 	}
 	}
 
 
 	/* Allocate a buffer on the device */
 	/* Allocate a buffer on the device */

+ 12 - 10
src/drivers/cuda/driver_cuda.c

@@ -156,16 +156,18 @@ static void init_context(int devid)
 	starpu_cuda_set_device(devid);
 	starpu_cuda_set_device(devid);
 
 
 #ifdef HAVE_CUDA_MEMCPY_PEER
 #ifdef HAVE_CUDA_MEMCPY_PEER
-	int nworkers = starpu_worker_get_count();
-	for (workerid = 0; workerid < nworkers; workerid++) {
-		struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
-		if (worker->arch == STARPU_CUDA_WORKER && worker->devid != devid) {
-			int can;
-			cures = cudaDeviceCanAccessPeer(&can, devid, worker->devid);
-			if (!cures && can) {
-				cures = cudaDeviceEnablePeerAccess(worker->devid, 0);
-				if (!cures)
-					_STARPU_DEBUG("GPU-Direct %d -> %d\n", worker->devid, devid);
+	if (starpu_get_env_number("STARPU_DISABLE_CUDA_GPU_GPU_DIRECT") > 0) {
+		int nworkers = starpu_worker_get_count();
+		for (workerid = 0; workerid < nworkers; workerid++) {
+			struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
+			if (worker->arch == STARPU_CUDA_WORKER && worker->devid != devid) {
+				int can;
+				cures = cudaDeviceCanAccessPeer(&can, devid, worker->devid);
+				if (!cures && can) {
+					cures = cudaDeviceEnablePeerAccess(worker->devid, 0);
+					if (!cures)
+						_STARPU_DEBUG("GPU-Direct %d -> %d\n", worker->devid, devid);
+				}
 			}
 			}
 		}
 		}
 	}
 	}