Browse Source

Factorize opencl_to_opencl transfer functions

Samuel Thibault 12 years ago
parent
commit
6b31fc397f

+ 3 - 1
include/starpu_opencl.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -106,6 +106,8 @@ cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buf
 
 cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
 
+cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, cl_mem dst, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
+
 #ifdef __cplusplus
 }
 #endif

+ 13 - 23
src/datawizard/interfaces/variable_interface.c

@@ -40,6 +40,7 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_
 static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
 static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
+static int copy_opencl_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
 #endif
 
 static struct starpu_data_copy_methods variable_copy_data_methods_s =
@@ -64,6 +65,7 @@ static struct starpu_data_copy_methods variable_copy_data_methods_s =
 	.opencl_to_opencl = copy_opencl_to_opencl,
         .ram_to_opencl_async = copy_ram_to_opencl_async,
 	.opencl_to_ram_async = copy_opencl_to_ram_async,
+	.opencl_to_opencl_async = copy_opencl_to_opencl_async,
 #endif
 };
 
@@ -353,36 +355,24 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTR
         return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
 }
 
-static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
+static int copy_opencl_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event)
 {
-	cl_int err;
-
 	struct starpu_variable_interface *src_variable = src_interface;
 	struct starpu_variable_interface *dst_variable = dst_interface;
+	int err,ret;
 
-	cl_mem src_ptr = (cl_mem)src_variable->ptr;
-	cl_mem dst_ptr = (cl_mem)dst_variable->ptr;
-
-	cl_command_queue cq;
-	starpu_opencl_get_current_queue(&cq);
-	cl_event event;
-
-	STARPU_ASSERT(src_variable->elemsize == dst_variable->elemsize);
-	err= clEnqueueCopyBuffer(cq, src_ptr, dst_ptr, 0, 0, src_variable->elemsize, 0, NULL, &event);
-	if (STARPU_UNLIKELY(err))
-		STARPU_OPENCL_REPORT_ERROR(err);
-
-	err = clWaitForEvents(1, &event);
-	if (STARPU_UNLIKELY(err))
-		STARPU_OPENCL_REPORT_ERROR(err);
-
-	err = clReleaseEvent(event);
-	if (STARPU_UNLIKELY(err))
-		STARPU_OPENCL_REPORT_ERROR(err);
+	err = starpu_opencl_copy_opencl_to_opencl((cl_mem) src_variable->ptr, src_node, (cl_mem) dst_variable->ptr, dst_node, src_variable->elemsize, 0, event, &ret);
+        if (STARPU_UNLIKELY(err))
+                STARPU_OPENCL_REPORT_ERROR(err);
 
 	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_variable->elemsize);
 
-	return 0;
+	return ret;
+}
+
+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	return copy_opencl_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
 }
 
 #endif

+ 14 - 20
src/datawizard/interfaces/vector_interface.c

@@ -40,6 +40,7 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTR
 static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
 static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
+static int copy_opencl_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
 #endif
 
 static struct starpu_data_copy_methods vector_copy_data_methods_s =
@@ -64,6 +65,7 @@ static struct starpu_data_copy_methods vector_copy_data_methods_s =
 	.opencl_to_opencl = copy_opencl_to_opencl,
         .ram_to_opencl_async = copy_ram_to_opencl_async,
 	.opencl_to_ram_async = copy_opencl_to_ram_async,
+	.opencl_to_opencl_async = copy_opencl_to_opencl_async,
 #endif
 };
 
@@ -405,35 +407,27 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTR
         return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
 }
 
-static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
-				 void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
+static int copy_opencl_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
+                                       void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event)
 {
-        int err;
-
 	struct starpu_vector_interface *src_vector = src_interface;
 	struct starpu_vector_interface *dst_vector = dst_interface;
+        int err, ret;
 
-	cl_command_queue cq;
-	starpu_opencl_get_current_queue(&cq);
-
-	size_t size = src_vector->nx*src_vector->elemsize;
-	cl_event event;
-
-	err = clEnqueueCopyBuffer(cq, (cl_mem)src_vector->dev_handle, (cl_mem)dst_vector->dev_handle, src_vector->offset, dst_vector->offset, size, 0, NULL, &event);
-        if (STARPU_UNLIKELY(err))
-                STARPU_OPENCL_REPORT_ERROR(err);
-
-	err = clWaitForEvents(1, &event);
-        if (STARPU_UNLIKELY(err))
-                STARPU_OPENCL_REPORT_ERROR(err);
-
-	err = clReleaseEvent(event);
+	err = starpu_opencl_copy_opencl_to_opencl((cl_mem)src_vector->dev_handle, src_node, (cl_mem)dst_vector->ptr, dst_node, src_vector->nx*src_vector->elemsize,
+					       src_vector->offset, event, &ret);
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
 	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_vector->nx*src_vector->elemsize);
 
-	return 0;
+	return ret;
+}
+
+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
+				 void *dst_interface, unsigned dst_node)
+{
+	return copy_opencl_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
 }
 
 

+ 48 - 4
src/drivers/opencl/driver_opencl.c

@@ -271,8 +271,12 @@ cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTR
 		if (event == NULL)
 		{
 			/* We want a synchronous copy, let's synchronise the queue */
-			clWaitForEvents(1, &ev);
-			clReleaseEvent(ev);
+			err = clWaitForEvents(1, &ev);
+			if (STARPU_UNLIKELY(err))
+				STARPU_OPENCL_REPORT_ERROR(err);
+			err = clReleaseEvent(ev);
+			if (STARPU_UNLIKELY(err))
+				STARPU_OPENCL_REPORT_ERROR(err);
 		}
 		else
 		{
@@ -303,8 +307,48 @@ cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_
 		if (event == NULL)
 		{
 			/* We want a synchronous copy, let's synchronise the queue */
-			clWaitForEvents(1, &ev);
-			clReleaseEvent(ev);
+			err = clWaitForEvents(1, &ev);
+			if (STARPU_UNLIKELY(err))
+				STARPU_OPENCL_REPORT_ERROR(err);
+			err = clReleaseEvent(ev);
+			if (STARPU_UNLIKELY(err))
+				STARPU_OPENCL_REPORT_ERROR(err);
+		}
+		else
+		{
+			*event = ev;
+		}
+
+		if (ret)
+		{
+			*ret = (event == NULL) ? 0 : -EAGAIN;
+		}
+	}
+	return err;
+}
+
+cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret)
+{
+	cl_int err;
+	struct _starpu_worker *worker = _starpu_get_local_worker_key();
+
+	if (event)
+		_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
+	cl_event ev;
+	err = clEnqueueCopyBuffer(transfer_queues[worker->devid], src, dst, CL_FALSE, offset, size, 0, NULL, &ev);
+	if (event)
+		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
+	if (STARPU_LIKELY(err == CL_SUCCESS))
+	{
+		if (event == NULL)
+		{
+			/* We want a synchronous copy, let's synchronise the queue */
+			err = clWaitForEvents(1, &ev);
+			if (STARPU_UNLIKELY(err))
+				STARPU_OPENCL_REPORT_ERROR(err);
+			err = clReleaseEvent(ev);
+			if (STARPU_UNLIKELY(err))
+				STARPU_OPENCL_REPORT_ERROR(err);
 		}
 		else
 		{