Просмотр исходного кода

Add STARPU_DISABLE_CUDA_GPU_GPU_DIRECT to disable GPU-Direct

Samuel Thibault лет назад: 13
Родитель
Сommit
6b446a9ab8
3 измененных файлов с 30 добавлено и 20 удалено
  1. 4 0
      doc/chapters/configuration.texi
  2. 14 10
      src/core/perfmodel/perfmodel_bus.c
  3. 12 10
      src/drivers/cuda/driver_cuda.c

+ 4 - 0
doc/chapters/configuration.texi

@@ -342,6 +342,10 @@ The AMD implementation of OpenCL is known to
 fail when copying data asynchronously. When using this implementation,
 it is therefore necessary to disable asynchronous data transfers.
 
+@item @code{STARPU_DISABLE_CUDA_GPU_GPU_DIRECT}
+Disable direct CUDA transfers from GPU to GPU, and let CUDA copy through RAM
+instead. This permits to test the performance effect of GPU-Direct.
+
 @end table
 
 @node Scheduling

+ 14 - 10
src/core/perfmodel/perfmodel_bus.c

@@ -191,11 +191,13 @@ static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst)
 	/* Initialize CUDA context on the source */
 	cudaSetDevice(src);
 
-	cures = cudaDeviceCanAccessPeer(&can, src, dst);
-	if (!cures && can) {
-		cures = cudaDeviceEnablePeerAccess(dst, 0);
-		if (!cures)
-			_STARPU_DISP("GPU-Direct %d -> %d\n", dst, src);
+	if (starpu_get_env_number("STARPU_DISABLE_CUDA_GPU_GPU_DIRECT") > 0) {
+		cures = cudaDeviceCanAccessPeer(&can, src, dst);
+		if (!cures && can) {
+			cures = cudaDeviceEnablePeerAccess(dst, 0);
+			if (!cures)
+				_STARPU_DISP("GPU-Direct %d -> %d\n", dst, src);
+		}
 	}
 
 	/* Allocate a buffer on the device */
@@ -207,11 +209,13 @@ static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst)
 	/* Initialize CUDA context on the destination */
 	cudaSetDevice(dst);
 
-	cures = cudaDeviceCanAccessPeer(&can, dst, src);
-	if (!cures && can) {
-		cures = cudaDeviceEnablePeerAccess(src, 0);
-		if (!cures)
-			_STARPU_DISP("GPU-Direct %d -> %d\n", src, dst);
+	if (starpu_get_env_number("STARPU_DISABLE_CUDA_GPU_GPU_DIRECT") > 0) {
+		cures = cudaDeviceCanAccessPeer(&can, dst, src);
+		if (!cures && can) {
+			cures = cudaDeviceEnablePeerAccess(src, 0);
+			if (!cures)
+				_STARPU_DISP("GPU-Direct %d -> %d\n", src, dst);
+		}
 	}
 
 	/* Allocate a buffer on the device */

+ 12 - 10
src/drivers/cuda/driver_cuda.c

@@ -156,16 +156,18 @@ static void init_context(int devid)
 	starpu_cuda_set_device(devid);
 
 #ifdef HAVE_CUDA_MEMCPY_PEER
-	int nworkers = starpu_worker_get_count();
-	for (workerid = 0; workerid < nworkers; workerid++) {
-		struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
-		if (worker->arch == STARPU_CUDA_WORKER && worker->devid != devid) {
-			int can;
-			cures = cudaDeviceCanAccessPeer(&can, devid, worker->devid);
-			if (!cures && can) {
-				cures = cudaDeviceEnablePeerAccess(worker->devid, 0);
-				if (!cures)
-					_STARPU_DEBUG("GPU-Direct %d -> %d\n", worker->devid, devid);
+	if (starpu_get_env_number("STARPU_DISABLE_CUDA_GPU_GPU_DIRECT") > 0) {
+		int nworkers = starpu_worker_get_count();
+		for (workerid = 0; workerid < nworkers; workerid++) {
+			struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
+			if (worker->arch == STARPU_CUDA_WORKER && worker->devid != devid) {
+				int can;
+				cures = cudaDeviceCanAccessPeer(&can, devid, worker->devid);
+				if (!cures && can) {
+					cures = cudaDeviceEnablePeerAccess(worker->devid, 0);
+					if (!cures)
+						_STARPU_DEBUG("GPU-Direct %d -> %d\n", worker->devid, devid);
+				}
 			}
 		}
 	}