浏览代码

Add environment variables to disable only CUDA or only OpenCL asynchronous copies, namely STARPU_DISABLE_CUDA_ASYNCHRONOUS_COPY and STARPU_DISABLE_OPENCL_ASYNCHRONOUS_COPY

Nathalie Furmento 13 年之前
父节点
当前提交
c0c5adb057
共有 4 个文件被更改,包括 71 次插入2 次删除
  1. 24 1
      doc/chapters/basic-api.texi
  2. 7 1
      include/starpu.h
  3. 20 0
      src/core/workers.c
  4. 20 0
      src/datawizard/interfaces/data_interface.c

+ 24 - 1
doc/chapters/basic-api.texi

@@ -143,12 +143,25 @@ This can also be specified with the @code{STARPU_SINGLE_COMBINED_WORKER} environ
 
 @item @code{int disable_asynchronous_copy} (default = 0)
 This flag should be set to 1 to disable asynchronous copies between
-CPUs and accelerators. This can also be specified with the
+CPUs and all accelerators. This can also be specified with the
 @code{STARPU_DISABLE_ASYNCHRONOUS_COPY} environment variable.
 The AMD implementation of OpenCL is known to
 fail when copying data asynchronously. When using this implementation,
 it is therefore necessary to disable asynchronous data transfers.
 
+@item @code{int disable_cuda_asynchronous_copy} (default = 0)
+This flag should be set to 1 to disable asynchronous copies between
+CPUs and CUDA accelerators. This can also be specified with the
+@code{STARPU_DISABLE_CUDA_ASYNCHRONOUS_COPY} environment variable.
+
+@item @code{int disable_opencl_asynchronous_copy} (default = 0)
+This flag should be set to 1 to disable asynchronous copies between
+CPUs and OpenCL accelerators. This can also be specified with the
+@code{STARPU_DISABLE_OPENCL_ASYNCHRONOUS_COPY} environment variable.
+The AMD implementation of OpenCL is known to
+fail when copying data asynchronously. When using this implementation,
+it is therefore necessary to disable asynchronous data transfers.
+
 @item @code{int *cuda_opengl_interoperability} (default = NULL)
 This can be set to an array of CUDA device identifiers for which
 @code{cudaGLSetGLDevice} should be called instead of @code{cudaSetDevice}. Its
@@ -190,6 +203,16 @@ Return 1 if asynchronous data transfers between CPU and accelerators
 are disabled.
 @end deftypefun
 
+@deftypefun int starpu_asynchronous_cuda_copy_disabled ()
+Return 1 if asynchronous data transfers between CPU and CUDA accelerators
+are disabled.
+@end deftypefun
+
+@deftypefun int starpu_asynchronous_opencl_copy_disabled ()
+Return 1 if asynchronous data transfers between CPU and OpenCL accelerators
+are disabled.
+@end deftypefun
+
 @node Workers' Properties
 @section Workers' Properties
 

+ 7 - 1
include/starpu.h

@@ -130,9 +130,15 @@ struct starpu_conf
 	/* Create only one combined worker, containing all CPU workers */
 	int single_combined_worker;
 
-        /* indicate if the asynchronous copies should be disabled */
+        /* indicate if all asynchronous copies should be disabled */
 	int disable_asynchronous_copy;
 
+        /* indicate if asynchronous copies to CUDA devices should be disabled */
+	int disable_cuda_asynchronous_copy;
+
+        /* indicate if asynchronous copies to OpenCL devices should be disabled */
+	int disable_opencl_asynchronous_copy;
+
 	/* Enable CUDA/OpenGL interoperation on these CUDA devices */
 	int *cuda_opengl_interoperability;
 	unsigned n_cuda_opengl_interoperability;

+ 20 - 0
src/core/workers.c

@@ -493,6 +493,14 @@ int starpu_conf_init(struct starpu_conf *conf)
 	if (conf->disable_asynchronous_copy == -1)
 		conf->disable_asynchronous_copy = 0;
 
+	conf->disable_cuda_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_CUDA_ASYNCHRONOUS_COPY");
+	if (conf->disable_cuda_asynchronous_copy == -1)
+		conf->disable_cuda_asynchronous_copy = 0;
+
+	conf->disable_opencl_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_OPENCL_ASYNCHRONOUS_COPY");
+	if (conf->disable_opencl_asynchronous_copy == -1)
+		conf->disable_opencl_asynchronous_copy = 0;
+
 	return 0;
 }
 
@@ -523,6 +531,8 @@ static void _starpu_conf_check_environment(struct starpu_conf *conf)
 	_starpu_conf_set_value_against_environment("STARPU_BUS_CALIBRATE", &conf->bus_calibrate);
 	_starpu_conf_set_value_against_environment("STARPU_SINGLE_COMBINED_WORKER", &conf->single_combined_worker);
 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy);
+	_starpu_conf_set_value_against_environment("STARPU_DISABLE_CUDA_ASYNCHRONOUS_COPY", &conf->disable_cuda_asynchronous_copy);
+	_starpu_conf_set_value_against_environment("STARPU_DISABLE_OPENCL_ASYNCHRONOUS_COPY", &conf->disable_opencl_asynchronous_copy);
 }
 
 int starpu_init(struct starpu_conf *user_conf)
@@ -872,6 +882,16 @@ int starpu_asynchronous_copy_disabled()
 	return config.conf->disable_asynchronous_copy;
 }
 
+int starpu_asynchronous_cuda_copy_disabled()
+{
+	return config.conf->disable_cuda_asynchronous_copy;
+}
+
+int starpu_asynchronous_opencl_copy_disabled()
+{
+	return config.conf->disable_opencl_asynchronous_copy;
+}
+
 /* When analyzing performance, it is useful to see what is the processing unit
  * that actually performed the task. This function returns the id of the
  * processing unit actually executing it, therefore it makes no sense to use it

+ 20 - 0
src/datawizard/interfaces/data_interface.c

@@ -306,6 +306,26 @@ void starpu_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
 #endif
 	}
 
+#ifdef STARPU_USE_CUDA
+	int asynchronous_cuda_copy_disabled = starpu_asynchronous_cuda_copy_disabled();
+	if (STARPU_UNLIKELY(asynchronous_cuda_copy_disabled))
+	{
+		ops->copy_methods->ram_to_cuda_async = NULL;
+		ops->copy_methods->cuda_to_ram_async = NULL;
+		ops->copy_methods->cuda_to_cuda_async = NULL;
+	}
+#endif
+
+#ifdef STARPU_USE_OPENCL
+	int asynchronous_opencl_copy_disabled = starpu_asynchronous_opencl_copy_disabled();
+	if (STARPU_UNLIKELY(asynchronous_opencl_copy_disabled))
+	{
+		ops->copy_methods->ram_to_opencl_async = NULL;
+		ops->copy_methods->opencl_to_ram_async = NULL;
+		ops->copy_methods->opencl_to_opencl_async = NULL;
+#endif
+	}
+
 	/* fill the interface fields with the appropriate method */
 	STARPU_ASSERT(ops->register_data_handle);
 	ops->register_data_handle(handle, home_node, data_interface);