Sfoglia il codice sorgente

OpenCL Driver: only keep one function in public interface to do the copy, there is no need to try a synchronous copy after a asynchronous copy has failed

	If blocking_write is CL_TRUE, the OpenCL implementation copies the data referred to by ptr
	and enqueues the write operation in the command-queue. The memory pointed to by ptr can be
	reused by the application after the clEnqueueWriteBuffer call returns.

we use CL_FALSE to avoid the use of a temporary buffer, and call clFinish to synchronise the
queue if the synchronous copy is required.
Nathalie Furmento 13 anni fa
parent
commit
7eccca1c90

+ 9 - 7
doc/chapters/basic-api.texi

@@ -2271,12 +2271,13 @@ Allocate @var{size} bytes of memory, stored in @var{addr}. @var{flags} must be a
 valid combination of cl_mem_flags values.
 @end deftypefun
 
-@deftypefun cl_int starpu_opencl_copy_ram_to_opencl_async_sync ({void *}@var{ptr}, unsigned @var{src_node}, cl_mem @var{buffer}, unsigned @var{dst_node}, size_t @var{size}, size_t @var{offset}, {cl_event *}@var{event}, {int *}@var{ret})
-Copy @var{size} bytes asynchronously from the given @var{ptr} on
+@deftypefun cl_int starpu_opencl_copy_ram_to_opencl ({void *}@var{ptr}, unsigned @var{src_node}, cl_mem @var{buffer}, unsigned @var{dst_node}, size_t @var{size}, size_t @var{offset}, {cl_event *}@var{event}, {int *}@var{ret})
+Copy @var{size} bytes from the given @var{ptr} on
 @var{src_node} to the given @var{buffer} on @var{dst_node}.
 @var{offset} is the offset, in bytes, in @var{buffer}.
-@var{event} can be used to wait for this particular copy to complete. It can be
-NULL.
+if @var{event} is NULL, the copy is synchronous, i.e the queue is
+synchronised before returning. If non NULL, @var{event} can be used
+after the call to wait for this particular copy to complete.
 This function returns CL_SUCCESS if the copy was successful, or a valid OpenCL error code
 otherwise. The integer pointed to by @var{ret} is set to -EAGAIN if the asynchronous copy
 was successful, or to 0 if event was NULL.
@@ -2291,12 +2292,13 @@ This function returns CL_SUCCESS if the copy was successful, or a valid OpenCL e
 otherwise.
 @end deftypefun
 
-@deftypefun cl_int starpu_opencl_copy_opencl_to_ram_async_sync (cl_mem @var{buffer}, unsigned @var{src_node}, void *@var{ptr}, unsigned @var{dst_node}, size_t @var{size}, size_t @var{offset}, {cl_event *}@var{event}, {int *}@var{ret})
+@deftypefun cl_int starpu_opencl_copy_opencl_to_ram (cl_mem @var{buffer}, unsigned @var{src_node}, void *@var{ptr}, unsigned @var{dst_node}, size_t @var{size}, size_t @var{offset}, {cl_event *}@var{event}, {int *}@var{ret})
 Copy @var{size} bytes asynchronously from the given @var{buffer} on
 @var{src_node} to the given @var{ptr} on @var{dst_node}.
 @var{offset} is the offset, in bytes, in @var{buffer}.
-@var{event} can be used to wait for this particular copy to complete. It can be
-NULL.
+if @var{event} is NULL, the copy is synchronous, i.e the queue is
+synchronised before returning. If non NULL, @var{event} can be used
+after the call to wait for this particular copy to complete.
 This function returns CL_SUCCESS if the copy was successful, or a valid OpenCL error code
 otherwise. The integer pointed to by @var{ret} is set to -EAGAIN if the asynchronous copy
 was successful, or to 0 if event was NULL.

+ 16 - 17
examples/filters/custom_mf/custom_interface.c

@@ -542,14 +542,14 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
 				size, CL_MEM_READ_WRITE);
 		assert(ret == CL_SUCCESS);
 	}
-	err = starpu_opencl_copy_ram_to_opencl_async_sync(src_custom->cpu_ptr,
-							  src_node,
-							  dst_custom->cpu_ptr,
-							  dst_node,
-							  size,
-							  0,
-							  NULL,
-							  &ret);
+	err = starpu_opencl_copy_ram_to_opencl(src_custom->cpu_ptr,
+					       src_node,
+					       dst_custom->cpu_ptr,
+					       dst_node,
+					       size,
+					       0,
+					       NULL,
+					       &ret);
 	assert(err == 0);
 	return 0;
 }
@@ -584,15 +584,14 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
 		assert(dst_custom->opencl_ptr != NULL);
 	}
 
-	err = starpu_opencl_copy_opencl_to_ram_async_sync(
-			src_custom->opencl_ptr,
-			src_node,
-			dst_custom->opencl_ptr,
-			dst_node,
-			size,
-			0,
-			NULL,
-			&ret);
+	err = starpu_opencl_copy_opencl_to_ram(src_custom->opencl_ptr,
+					       src_node,
+					       dst_custom->opencl_ptr,
+					       dst_node,
+					       size,
+					       0,
+					       NULL,
+					       &ret);
 	assert(err == 0);
 	return 0;
 }

+ 32 - 32
examples/interface/complex_interface.c

@@ -204,25 +204,25 @@ static int copy_ram_to_opencl(void *src_interface, unsigned src_node,
 
 	cl_int err;
 
-	err = starpu_opencl_copy_ram_to_opencl(
-		src_complex->real,
-		src_node,
-		(cl_mem) dst_complex->real,
-		dst_node,
-		src_complex->nx * sizeof(src_complex->real[0]),
-		0,
-		NULL);
+	err = starpu_opencl_copy_ram_to_opencl(src_complex->real,
+					       src_node,
+					       (cl_mem) dst_complex->real,
+					       dst_node,
+					       src_complex->nx * sizeof(src_complex->real[0]),
+					       0,
+					       NULL,
+					       NULL);
 	if (STARPU_UNLIKELY(err != CL_SUCCESS))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_ram_to_opencl(
-		src_complex->imaginary,
-		src_node,
-		(cl_mem) dst_complex->imaginary,
-		dst_node,
-		src_complex->nx * sizeof(src_complex->imaginary[0]),
-		0,
-		NULL);
+	err = starpu_opencl_copy_ram_to_opencl(src_complex->imaginary,
+					       src_node,
+					       (cl_mem) dst_complex->imaginary,
+					       dst_node,
+					       src_complex->nx * sizeof(src_complex->imaginary[0]),
+					       0,
+					       NULL,
+					       NULL);
 	if (STARPU_UNLIKELY(err != CL_SUCCESS))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
@@ -236,25 +236,25 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node,
 	struct starpu_complex_interface *dst_complex = dst_interface;
 
 	cl_int err;
-	err = starpu_opencl_copy_opencl_to_ram(
-		(cl_mem) src_complex->real,
-		src_node,
-		dst_complex->real,
-		dst_node,
-		src_complex->nx * sizeof(src_complex->real[0]),
-		0,
-		NULL);
+	err = starpu_opencl_copy_opencl_to_ram((cl_mem) src_complex->real,
+					       src_node,
+					       dst_complex->real,
+					       dst_node,
+					       src_complex->nx * sizeof(src_complex->real[0]),
+					       0,
+					       NULL,
+					       NULL);
 	if (STARPU_UNLIKELY(err != CL_SUCCESS))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_opencl_to_ram(
-		(cl_mem) src_complex->imaginary,
-		src_node,
-		dst_complex->imaginary,
-		dst_node,
-		src_complex->nx * sizeof(src_complex->imaginary[0]),
-		0,
-		NULL);
+	err = starpu_opencl_copy_opencl_to_ram((cl_mem) src_complex->imaginary,
+					       src_node,
+					       dst_complex->imaginary,
+					       dst_node,
+					       src_complex->nx * sizeof(src_complex->imaginary[0]),
+					       0,
+					       NULL,
+					       NULL);
 	if (STARPU_UNLIKELY(err != CL_SUCCESS))
 		STARPU_OPENCL_REPORT_ERROR(err);
 

+ 2 - 6
include/starpu_opencl.h

@@ -94,13 +94,9 @@ int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...);
 
 cl_int starpu_opencl_allocate_memory(cl_mem *addr, size_t size, cl_mem_flags flags);
 
-cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event);
+cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
 
-cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event);
-
-cl_int starpu_opencl_copy_ram_to_opencl_async_sync(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
-
-cl_int starpu_opencl_copy_opencl_to_ram_async_sync(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
+cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
 
 #ifdef __cplusplus
 }

+ 6 - 6
src/datawizard/interfaces/bcsr_interface.c

@@ -483,15 +483,15 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTR
 
         int err;
 
-	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_bcsr->nzval, src_node, (void *)dst_bcsr->nzval, dst_node, nnz*r*c*elemsize, 0, NULL);
+	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_bcsr->nzval, src_node, (void *)dst_bcsr->nzval, dst_node, nnz*r*c*elemsize, 0, NULL, NULL);
 	if (STARPU_UNLIKELY(err))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_bcsr->colind, src_node, (void *)dst_bcsr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL);
+	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_bcsr->colind, src_node, (void *)dst_bcsr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL, NULL);
 	if (STARPU_UNLIKELY(err))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_bcsr->rowptr, src_node, (void *)dst_bcsr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL);
+	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_bcsr->rowptr, src_node, (void *)dst_bcsr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL, NULL);
 	if (STARPU_UNLIKELY(err))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
@@ -514,15 +514,15 @@ static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTR
 
         int err;
 
-	err = starpu_opencl_copy_ram_to_opencl((void *)src_bcsr->nzval, src_node, (cl_mem)dst_bcsr->nzval, dst_node, nnz*r*c*elemsize, 0, NULL);
+	err = starpu_opencl_copy_ram_to_opencl((void *)src_bcsr->nzval, src_node, (cl_mem)dst_bcsr->nzval, dst_node, nnz*r*c*elemsize, 0, NULL, NULL);
 	if (STARPU_UNLIKELY(err))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_ram_to_opencl((void *)src_bcsr->colind, src_node, (cl_mem)dst_bcsr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL);
+	err = starpu_opencl_copy_ram_to_opencl((void *)src_bcsr->colind, src_node, (cl_mem)dst_bcsr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL, NULL);
 	if (STARPU_UNLIKELY(err))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_ram_to_opencl((void *)src_bcsr->rowptr, src_node, (cl_mem)dst_bcsr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL);
+	err = starpu_opencl_copy_ram_to_opencl((void *)src_bcsr->rowptr, src_node, (cl_mem)dst_bcsr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL, NULL);
 	if (STARPU_UNLIKELY(err))
 		STARPU_OPENCL_REPORT_ERROR(err);
 

+ 10 - 10
src/datawizard/interfaces/block_interface.c

@@ -621,9 +621,9 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 		/* Is that a single contiguous buffer ? */
 		if (((nx*ny) == src_block->ldz) && (src_block->ldz == dst_block->ldz))
 		{
-                        err = starpu_opencl_copy_ram_to_opencl_async_sync((void*)src_block->ptr, src_node, (cl_mem)dst_block->dev_handle, dst_node,
-                                                                           src_block->nx*src_block->ny*src_block->nz*src_block->elemsize,
-                                                                           dst_block->offset, (cl_event*)_event, &ret);
+                        err = starpu_opencl_copy_ram_to_opencl((void*)src_block->ptr, src_node, (cl_mem)dst_block->dev_handle, dst_node,
+							       src_block->nx*src_block->ny*src_block->nz*src_block->elemsize,
+							       dst_block->offset, (cl_event*)_event, &ret);
                         if (STARPU_UNLIKELY(err))
                                 STARPU_OPENCL_REPORT_ERROR(err);
                 }
@@ -644,9 +644,9 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 			{
                                 void *ptr = (void*)src_block->ptr+(layer*src_block->ldz*src_block->elemsize)+(j*src_block->ldy*src_block->elemsize);
                                 err = starpu_opencl_copy_ram_to_opencl(ptr, src_node, (cl_mem)dst_block->dev_handle, dst_node,
-                                                                        src_block->nx*src_block->elemsize,
-                                                                        layer*dst_block->ldz*dst_block->elemsize + j*dst_block->ldy*dst_block->elemsize
-                                                                        + dst_block->offset, NULL);
+								       src_block->nx*src_block->elemsize,
+								       layer*dst_block->ldz*dst_block->elemsize + j*dst_block->ldy*dst_block->elemsize
+								       + dst_block->offset, NULL, NULL);
                                 if (STARPU_UNLIKELY(err))
                                         STARPU_OPENCL_REPORT_ERROR(err);
                         }
@@ -687,9 +687,9 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARP
 		/* Is that a single contiguous buffer ? */
 		if (((src_block->nx*src_block->ny) == src_block->ldz) && (src_block->ldz == dst_block->ldz))
 		{
-                        err = starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_block->dev_handle, src_node, (void*)dst_block->ptr, dst_node,
-                                                                           src_block->nx*src_block->ny*src_block->nz*src_block->elemsize,
-                                                                           src_block->offset, (cl_event*)_event, &ret);
+                        err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_block->dev_handle, src_node, (void*)dst_block->ptr, dst_node,
+							       src_block->nx*src_block->ny*src_block->nz*src_block->elemsize,
+							       src_block->offset, (cl_event*)_event, &ret);
                         if (STARPU_UNLIKELY(err))
                                 STARPU_OPENCL_REPORT_ERROR(err);
                 }
@@ -713,7 +713,7 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARP
                                 err = starpu_opencl_copy_opencl_to_ram((void*)src_block->dev_handle, src_node, ptr, dst_node,
                                                                         src_block->nx*src_block->elemsize,
                                                                         layer*src_block->ldz*src_block->elemsize+j*src_block->ldy*src_block->elemsize+
-                                                                        src_block->offset, NULL);
+								       src_block->offset, NULL, NULL);
 				if (STARPU_UNLIKELY(err))
 					STARPU_OPENCL_REPORT_ERROR(err);
                         }

+ 6 - 6
src/datawizard/interfaces/csr_interface.c

@@ -666,15 +666,15 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTR
 
         int err;
 
-        err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->nzval, src_node, (void *)dst_csr->nzval, dst_node, nnz*elemsize, 0, NULL);
+        err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->nzval, src_node, (void *)dst_csr->nzval, dst_node, nnz*elemsize, 0, NULL, NULL);
 	if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->colind, src_node, (void *)dst_csr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL);
+	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->colind, src_node, (void *)dst_csr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL, NULL);
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
-        err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->rowptr, src_node, (void *)dst_csr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL);
+        err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->rowptr, src_node, (void *)dst_csr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL, NULL);
 	if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
@@ -694,15 +694,15 @@ static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTR
 
         int err;
 
-        err = starpu_opencl_copy_ram_to_opencl((void *)src_csr->nzval, src_node, (cl_mem)dst_csr->nzval, dst_node, nnz*elemsize, 0, NULL);
+        err = starpu_opencl_copy_ram_to_opencl((void *)src_csr->nzval, src_node, (cl_mem)dst_csr->nzval, dst_node, nnz*elemsize, 0, NULL, NULL);
 	if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_ram_to_opencl((void *)src_csr->colind, src_node, (cl_mem)dst_csr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL);
+	err = starpu_opencl_copy_ram_to_opencl((void *)src_csr->colind, src_node, (cl_mem)dst_csr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL, NULL);
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
-        err = starpu_opencl_copy_ram_to_opencl((void *)src_csr->rowptr, src_node, (cl_mem)dst_csr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL);
+        err = starpu_opencl_copy_ram_to_opencl((void *)src_csr->rowptr, src_node, (cl_mem)dst_csr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL, NULL);
 	if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 

+ 6 - 6
src/datawizard/interfaces/matrix_interface.c

@@ -607,9 +607,9 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 
 	STARPU_ASSERT_MSG((src_matrix->ld == src_matrix->nx) && (dst_matrix->ld == dst_matrix->nx), "XXX non contiguous buffers are not properly supported in OpenCL yet. (TODO)");
 
-	err = starpu_opencl_copy_ram_to_opencl_async_sync((void*)src_matrix->ptr, src_node, (cl_mem)dst_matrix->dev_handle, dst_node,
-                                                           src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
-                                                           dst_matrix->offset, (cl_event*)_event, &ret);
+	err = starpu_opencl_copy_ram_to_opencl((void*)src_matrix->ptr, src_node, (cl_mem)dst_matrix->dev_handle, dst_node,
+					       src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
+					       dst_matrix->offset, (cl_event*)_event, &ret);
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
@@ -626,9 +626,9 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARP
 
 	STARPU_ASSERT_MSG((src_matrix->ld == src_matrix->nx) && (dst_matrix->ld == dst_matrix->nx), "XXX non contiguous buffers are not properly supported in OpenCL yet. (TODO)");
 
-        err = starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_matrix->dev_handle, src_node, (void*)dst_matrix->ptr, dst_node,
-                                                           src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
-                                                           src_matrix->offset, (cl_event*)_event, &ret);
+        err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_matrix->dev_handle, src_node, (void*)dst_matrix->ptr, dst_node,
+					       src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
+					       src_matrix->offset, (cl_event*)_event, &ret);
 
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);

+ 16 - 16
src/datawizard/interfaces/multiformat_interface.c

@@ -637,14 +637,14 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
 	size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
 
 
-	err = starpu_opencl_copy_ram_to_opencl_async_sync(src_multiformat->cpu_ptr,
-							   src_node,
-							   (cl_mem) dst_multiformat->cpu_ptr,
-							   dst_node,
-							   size,
-							   0,
-							   (cl_event *) _event,
-							   &ret);
+	err = starpu_opencl_copy_ram_to_opencl(src_multiformat->cpu_ptr,
+					       src_node,
+					       (cl_mem) dst_multiformat->cpu_ptr,
+					       dst_node,
+					       size,
+					       0,
+					       (cl_event *) _event,
+					       &ret);
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
@@ -675,14 +675,14 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
 		/* XXX : it is weird that we might have to allocate memory here... */
 		dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
 	}
-	err = starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_multiformat->opencl_ptr,
-							   src_node,
-							   dst_multiformat->opencl_ptr,
-							   dst_node,
-							   size,
-                                                           0,
-							   (cl_event *)_event,
-							   &ret);
+	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_multiformat->opencl_ptr,
+					       src_node,
+					       dst_multiformat->opencl_ptr,
+					       dst_node,
+					       size,
+					       0,
+					       (cl_event *)_event,
+					       &ret);
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 

+ 4 - 4
src/datawizard/interfaces/variable_interface.c

@@ -440,8 +440,8 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 	struct starpu_variable_interface *dst_variable = dst_interface;
         int err,ret;
 
-        err = starpu_opencl_copy_ram_to_opencl_async_sync((void*)src_variable->ptr, src_node, (cl_mem)dst_variable->ptr, dst_node, src_variable->elemsize,
-                                                           0, (cl_event*)_event, &ret);
+        err = starpu_opencl_copy_ram_to_opencl((void*)src_variable->ptr, src_node, (cl_mem)dst_variable->ptr, dst_node, src_variable->elemsize,
+					       0, (cl_event*)_event, &ret);
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
@@ -456,8 +456,8 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARP
 	struct starpu_variable_interface *dst_variable = dst_interface;
         int err, ret;
 
-	err = starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_variable->ptr, src_node, (void*)dst_variable->ptr, dst_node, src_variable->elemsize,
-                                                           0, (cl_event*)_event, &ret);
+	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_variable->ptr, src_node, (void*)dst_variable->ptr, dst_node, src_variable->elemsize,
+					       0, (cl_event*)_event, &ret);
 
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);

+ 5 - 5
src/datawizard/interfaces/vector_interface.c

@@ -483,9 +483,9 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 	struct starpu_vector_interface *dst_vector = dst_interface;
         int err, ret;
 
-	err = starpu_opencl_copy_ram_to_opencl_async_sync((void*)src_vector->ptr, src_node, (cl_mem)dst_vector->dev_handle, dst_node,
-                                                           src_vector->nx*src_vector->elemsize,
-                                                           dst_vector->offset, (cl_event*)_event, &ret);
+	err = starpu_opencl_copy_ram_to_opencl((void*)src_vector->ptr, src_node, (cl_mem)dst_vector->dev_handle, dst_node,
+					       src_vector->nx*src_vector->elemsize,
+					       dst_vector->offset, (cl_event*)_event, &ret);
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
@@ -501,8 +501,8 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARP
 	struct starpu_vector_interface *dst_vector = dst_interface;
         int err, ret;
 
-	err = starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_vector->dev_handle, src_node, (void*)dst_vector->ptr, dst_node, src_vector->nx*src_vector->elemsize,
-                                                           src_vector->offset, (cl_event *)_event, &ret);
+	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_vector->dev_handle, src_node, (void*)dst_vector->ptr, dst_node, src_vector->nx*src_vector->elemsize,
+					       src_vector->offset, (cl_event *)_event, &ret);
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 

+ 22 - 79
src/drivers/opencl/driver_opencl.c

@@ -214,111 +214,54 @@ cl_int starpu_opencl_allocate_memory(cl_mem *mem, size_t size, cl_mem_flags flag
         return CL_SUCCESS;
 }
 
-cl_int starpu_opencl_copy_ram_to_opencl_async_sync(void *ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem buffer, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret)
+cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem buffer, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret)
 {
         cl_int err;
         struct _starpu_worker *worker = _starpu_get_local_worker_key();
-        cl_bool blocking;
-
-        blocking = (event == NULL) ? CL_TRUE : CL_FALSE;
 
         if (event)
                 _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
-        err = clEnqueueWriteBuffer(transfer_queues[worker->devid], buffer, blocking, offset, size, ptr, 0, NULL, event);
+        err = clEnqueueWriteBuffer(transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, event);
         if (event)
                 _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
         if (STARPU_LIKELY(err == CL_SUCCESS))
 	{
-                *ret = (event == NULL) ? 0 : -EAGAIN;
-                return CL_SUCCESS;
-        }
-        else
-	{
-                if (event != NULL)
-		{
-                        /* The asynchronous copy has failed, try to copy synchronously */
-                        err = clEnqueueWriteBuffer(transfer_queues[worker->devid], buffer, CL_TRUE, offset, size, ptr, 0, NULL, NULL);
-                }
-                if (STARPU_LIKELY(err == CL_SUCCESS))
+		if (event == NULL)
 		{
-                        *ret = 0;
-                        return CL_SUCCESS;
-                }
-                else
+			/* We want a synchronous copy, let's synchronise the queue */
+			clFinish(transfer_queues[worker->devid]);
+		}
+		if (ret)
 		{
-                        STARPU_OPENCL_REPORT_ERROR(err);
-                        return err;
-                }
-        }
-}
-
-cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem buffer, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event)
-{
-        cl_int err;
-        struct _starpu_worker *worker = _starpu_get_local_worker_key();
-        cl_bool blocking;
-
-        blocking = (event == NULL) ? CL_TRUE : CL_FALSE;
-        if (event)
-                _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
-        err = clEnqueueWriteBuffer(transfer_queues[worker->devid], buffer, blocking, offset, size, ptr, 0, NULL, event);
-        if (event)
-                _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
-        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
-
-        return CL_SUCCESS;
+			*ret = (event == NULL) ? 0 : -EAGAIN;
+		}
+	}
+	return err;
 }
 
-cl_int starpu_opencl_copy_opencl_to_ram_async_sync(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret)
+cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret)
 {
         cl_int err;
         struct _starpu_worker *worker = _starpu_get_local_worker_key();
-        cl_bool blocking;
 
-        blocking = (event == NULL) ? CL_TRUE : CL_FALSE;
         if (event)
                 _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
-        err = clEnqueueReadBuffer(transfer_queues[worker->devid], buffer, blocking, offset, size, ptr, 0, NULL, event);
+        err = clEnqueueReadBuffer(transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, event);
         if (event)
                 _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
         if (STARPU_LIKELY(err == CL_SUCCESS))
 	{
-                *ret = (event == NULL) ? 0 : -EAGAIN;
-                return CL_SUCCESS;
-        }
-        else
-	{
-                if (event != NULL)
-                        /* The asynchronous copy has failed, try to copy synchronously */
-                        err = clEnqueueReadBuffer(transfer_queues[worker->devid], buffer, CL_TRUE, offset, size, ptr, 0, NULL, NULL);
-                if (STARPU_LIKELY(err == CL_SUCCESS))
+		if (event == NULL)
 		{
-                        *ret = 0;
-                        return CL_SUCCESS;
-                }
-                else
+			/* We want a synchronous copy, let's synchronise the queue */
+			clFinish(transfer_queues[worker->devid]);
+		}
+		if (ret)
 		{
-                        STARPU_OPENCL_REPORT_ERROR(err);
-                        return err;
-                }
-        }
-}
-
-cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event)
-{
-        cl_int err;
-        struct _starpu_worker *worker = _starpu_get_local_worker_key();
-        cl_bool blocking;
-
-        blocking = (event == NULL) ? CL_TRUE : CL_FALSE;
-        if (event)
-                _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
-        err = clEnqueueReadBuffer(transfer_queues[worker->devid], buffer, blocking, offset, size, ptr, 0, NULL, event);
-        if (event)
-                _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
-        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
-
-        return CL_SUCCESS;
+			*ret = (event == NULL) ? 0 : -EAGAIN;
+		}
+	}
+	return err;
 }
 
 #if 0