Browse Source

Implement OpenCL -> OpenCL transfers (within the same device !) for the
"variable" and the "vector" interfaces.

Cédric Augonnet 14 years ago
parent
commit
2358e3e382

+ 25 - 0
src/datawizard/interfaces/variable_interface.c

@@ -36,6 +36,7 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node, void *dst_i
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
 static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node __attribute__((unused)));
 static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node __attribute__((unused)));
 static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node __attribute__((unused)));
 static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node __attribute__((unused)));
+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node __attribute__((unused)));
 static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node __attribute__((unused)), void *_event);
 static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node __attribute__((unused)), void *_event);
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node __attribute__((unused)), void *_event);
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node __attribute__((unused)), void *_event);
 #endif
 #endif
@@ -53,6 +54,7 @@ static const struct starpu_data_copy_methods variable_copy_data_methods_s = {
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
 	.ram_to_opencl = copy_ram_to_opencl,
 	.ram_to_opencl = copy_ram_to_opencl,
 	.opencl_to_ram = copy_opencl_to_ram,
 	.opencl_to_ram = copy_opencl_to_ram,
+	.opencl_to_opencl = copy_opencl_to_opencl,
         .ram_to_opencl_async = copy_ram_to_opencl_async,
         .ram_to_opencl_async = copy_ram_to_opencl_async,
 	.opencl_to_ram_async = copy_opencl_to_ram_async,
 	.opencl_to_ram_async = copy_opencl_to_ram_async,
 #endif
 #endif
@@ -385,6 +387,29 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node __attribute
         return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
         return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
 }
 }
 
 
+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)))
+{
+	cl_int err;
+
+	starpu_variable_interface_t *src_variable = src_interface;
+	starpu_variable_interface_t *dst_variable = dst_interface;
+
+	cl_mem src_ptr = (cl_mem)src_variable->ptr;
+	cl_mem dst_ptr = (cl_mem)dst_variable->ptr;
+
+	cl_command_queue cq;
+	starpu_opencl_get_current_queue(&cq);
+
+	STARPU_ASSERT(src_variable->elemsize == dst_variable->elemsize);
+	err= clEnqueueCopyBuffer(cq, src_ptr, dst_ptr, 0, 0, src_variable->elemsize, 0, NULL, NULL);
+	if (STARPU_UNLIKELY(err))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, elemsize);
+
+	return 0;
+}
+
 #endif
 #endif
 
 
 static int copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)))
 static int copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)))

+ 26 - 0
src/datawizard/interfaces/vector_interface.c

@@ -35,6 +35,7 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node __attribute_
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
 static int copy_ram_to_opencl(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node);
 static int copy_ram_to_opencl(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node);
 static int copy_opencl_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node);
 static int copy_opencl_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node);
+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node);
 static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node, void *_event);
 static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node, void *_event);
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node, void *_event);
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node, void *_event);
 #endif
 #endif
@@ -52,6 +53,7 @@ static const struct starpu_data_copy_methods vector_copy_data_methods_s = {
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
 	.ram_to_opencl = copy_ram_to_opencl,
 	.ram_to_opencl = copy_ram_to_opencl,
 	.opencl_to_ram = copy_opencl_to_ram,
 	.opencl_to_ram = copy_opencl_to_ram,
+	.opencl_to_opencl = copy_opencl_to_opencl,
         .ram_to_opencl_async = copy_ram_to_opencl_async,
         .ram_to_opencl_async = copy_ram_to_opencl_async,
 	.opencl_to_ram_async = copy_opencl_to_ram_async,
 	.opencl_to_ram_async = copy_opencl_to_ram_async,
 #endif
 #endif
@@ -421,6 +423,30 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node __attribute
 {
 {
         return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
         return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
 }
 }
+
+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node __attribute__((unused)),
+                              void *dst_interface, unsigned dst_node __attribute__((unused)))
+{
+        int err;
+
+	starpu_vector_interface_t *src_vector = src_interface;
+	starpu_vector_interface_t *dst_vector = dst_interface;
+
+	cl_command_queue cq;
+	starpu_opencl_get_current_queue(&cq);
+
+	size_t size = src_vector->nx*src_vector->elemsize;
+
+	err = clEnqueueCopyBuffer(cq, (cl_mem)src_vector->dev_handle, (cl_mem)dst_vector->dev_handle, src_vector->offset, dst_vector->offset, size, 0, NULL, NULL); 
+        if (STARPU_UNLIKELY(err))
+                STARPU_OPENCL_REPORT_ERROR(err);
+
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, src_vector->nx*src_vector->elemsize);
+
+	return 0;
+}
+
+
 #endif // STARPU_USE_OPENCL
 #endif // STARPU_USE_OPENCL
 
 
 static int copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__((unused)),
 static int copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__((unused)),

+ 0 - 1
src/drivers/opencl/driver_opencl.c

@@ -1,4 +1,3 @@
-
 /*
 /*
  * StarPU
  * StarPU
  * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
  * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)