Browse Source

Separate OpenCL source and destination offsets in starpu_opencl_copy_opencl_to_opencl and starpu_opencl_copy_async_sync, and document these.

Samuel Thibault 12 years ago
parent
commit
a9c2260176

+ 28 - 5
doc/chapters/basic-api.texi

@@ -2502,7 +2502,7 @@ Copy @var{ssize} bytes from the pointer @var{src_ptr} on
 The function first tries to copy the data asynchronous (unless
 @var{stream} is @code{NULL}. If the asynchronous copy fails or if
 @var{stream} is @code{NULL}, it copies the data synchronously.
-The function returns @code{-EAGAIN} if the asynchronous copy was
+The function returns @code{-EAGAIN} if the asynchronous launch was
 successfull. It returns 0 if the synchronous copy was successful, or
 fails otherwise.
 @end deftypefun
@@ -2740,28 +2740,51 @@ valid combination of cl_mem_flags values.
 
 @deftypefun cl_int starpu_opencl_copy_ram_to_opencl ({void *}@var{ptr}, unsigned @var{src_node}, cl_mem @var{buffer}, unsigned @var{dst_node}, size_t @var{size}, size_t @var{offset}, {cl_event *}@var{event}, {int *}@var{ret})
 Copy @var{size} bytes from the given @var{ptr} on
-@var{src_node} to the given @var{buffer} on @var{dst_node}.
+RAM @var{src_node} to the given @var{buffer} on OpenCL @var{dst_node}.
 @var{offset} is the offset, in bytes, in @var{buffer}.
 if @var{event} is NULL, the copy is synchronous, i.e the queue is
 synchronised before returning. If non NULL, @var{event} can be used
 after the call to wait for this particular copy to complete.
 This function returns CL_SUCCESS if the copy was successful, or a valid OpenCL error code
-otherwise. The integer pointed to by @var{ret} is set to -EAGAIN if the asynchronous copy
+otherwise. The integer pointed to by @var{ret} is set to -EAGAIN if the asynchronous launch
 was successful, or to 0 if event was NULL.
 @end deftypefun
 
 @deftypefun cl_int starpu_opencl_copy_opencl_to_ram (cl_mem @var{buffer}, unsigned @var{src_node}, void *@var{ptr}, unsigned @var{dst_node}, size_t @var{size}, size_t @var{offset}, {cl_event *}@var{event}, {int *}@var{ret})
 Copy @var{size} bytes asynchronously from the given @var{buffer} on
-@var{src_node} to the given @var{ptr} on @var{dst_node}.
+OpenCL @var{src_node} to the given @var{ptr} on RAM @var{dst_node}.
 @var{offset} is the offset, in bytes, in @var{buffer}.
 if @var{event} is NULL, the copy is synchronous, i.e the queue is
 synchronised before returning. If non NULL, @var{event} can be used
 after the call to wait for this particular copy to complete.
 This function returns CL_SUCCESS if the copy was successful, or a valid OpenCL error code
-otherwise. The integer pointed to by @var{ret} is set to -EAGAIN if the asynchronous copy
+otherwise. The integer pointed to by @var{ret} is set to -EAGAIN if the asynchronous launch
 was successful, or to 0 if event was NULL.
 @end deftypefun
 
+@deftypefun cl_int starpu_opencl_copy_opencl_to_opencl (cl_mem @var{src}, unsigned @var{src_node}, size_t @var{src_offset}, cl_mem @var{dst}, unsigned @var{dst_node}, size_t @var{dst_offset}, size_t @var{size}, {cl_event *}@var{event}, {int *}@var{ret})
+Copy @var{size} bytes asynchronously from byte offset @var{src_offset} of
+@var{src} on OpenCL @var{src_node} to byte offset @var{dst_offset} of @var{dst} on
+OpenCL @var{dst_node}.
+if @var{event} is NULL, the copy is synchronous, i.e the queue is
+synchronised before returning. If non NULL, @var{event} can be used
+after the call to wait for this particular copy to complete.
+This function returns CL_SUCCESS if the copy was successful, or a valid OpenCL error code
+otherwise. The integer pointed to by @var{ret} is set to -EAGAIN if the asynchronous launch
+was successful, or to 0 if event was NULL.
+@end deftypefun
+
+@deftypefun cl_int starpu_opencl_copy_async_sync (cl_mem @var{src}, unsigned @var{src_node}, size_t @var{src_offset}, cl_mem @var{dst}, unsigned @var{dst_node}, size_t @var{dst_offset}, size_t @var{size}, {cl_event *}@var{event})
+Copy @var{size} bytes from byte offset @var{src_offset} of
+@var{src} on @var{src_node} to byte offset @var{dst_offset} of @var{dst} on
+@var{dst_node}. if @var{event} is NULL, the copy is synchronous, i.e the queue is
+synchronised before returning. If non NULL, @var{event} can be used
+after the call to wait for this particular copy to complete.
+The function returns @code{-EAGAIN} if the asynchronous launch was
+successfull. It returns 0 if the synchronous copy was successful, or
+fails otherwise.
+@end deftypefun
+
 @node Miscellaneous helpers
 @section Miscellaneous helpers
 

+ 2 - 2
include/starpu_opencl.h

@@ -106,9 +106,9 @@ cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buf
 
 cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
 
-cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, cl_mem dst, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
+cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, size_t src_offset, cl_mem dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event, int *ret);
 
-cl_int starpu_opencl_copy_async_sync(uintptr_t src, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size, size_t offset, cl_event *event);
+cl_int starpu_opencl_copy_async_sync(uintptr_t src, unsigned src_node, size_t src_offset, uintptr_t dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event);
 
 #ifdef __cplusplus
 }

+ 3 - 3
src/datawizard/interfaces/bcsr_interface.c

@@ -378,15 +378,15 @@ static int copy_opencl_common(void *src_interface, unsigned src_node STARPU_ATTR
 
         int err;
 
-	err = starpu_opencl_copy_async_sync(src_bcsr->nzval, src_node, dst_bcsr->nzval, dst_node, nnz*r*c*elemsize, 0, NULL);
+	err = starpu_opencl_copy_async_sync(src_bcsr->nzval, src_node, 0, dst_bcsr->nzval, dst_node, 0, nnz*r*c*elemsize, NULL);
 	if (STARPU_UNLIKELY(err))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_async_sync((uintptr_t)src_bcsr->colind, src_node, (uintptr_t)dst_bcsr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL);
+	err = starpu_opencl_copy_async_sync((uintptr_t)src_bcsr->colind, 0, src_node, (uintptr_t)dst_bcsr->colind, dst_node, 0, nnz*sizeof(uint32_t), NULL);
 	if (STARPU_UNLIKELY(err))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_async_sync((uintptr_t)src_bcsr->rowptr, src_node, (uintptr_t)dst_bcsr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL);
+	err = starpu_opencl_copy_async_sync((uintptr_t)src_bcsr->rowptr, 0, src_node, (uintptr_t)dst_bcsr->rowptr, dst_node, 0, (nrow+1)*sizeof(uint32_t), NULL);
 	if (STARPU_UNLIKELY(err))
 		STARPU_OPENCL_REPORT_ERROR(err);
 

+ 6 - 3
src/datawizard/interfaces/coo_interface.c

@@ -175,30 +175,33 @@ copy_opencl_common(void *src_interface, unsigned src_node,
 	ret = starpu_opencl_copy_async_sync(
 		(uintptr_t) src_coo->columns,
 		src_node,
+		0,
 		(uintptr_t) dst_coo->columns,
 		dst_node,
-		size,
 		0,
+		size,
 		event);
 
 	/* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */
 	ret = starpu_opencl_copy_async_sync(
 		(uintptr_t) src_coo->rows,
 		src_node,
+		0,
 		(uintptr_t) dst_coo->rows,
 		dst_node,
-		size,
 		0,
+		size,
 		event);
 
 	size = src_coo->n_values * src_coo->elemsize;
 	ret = starpu_opencl_copy_async_sync(
 		src_coo->values,
 		src_node,
+		0,
 		(uintptr_t) dst_coo->values,
 		dst_node,
-		size,
 		0,
+		size,
 		event);
 
 	_STARPU_TRACE_DATA_COPY(src_node, dst_node,

+ 3 - 3
src/datawizard/interfaces/csr_interface.c

@@ -439,15 +439,15 @@ static int copy_opencl_common(void *src_interface, unsigned src_node STARPU_ATTR
 
         int err;
 
-	err = starpu_opencl_copy_async_sync(src_csr->nzval, src_node, dst_csr->nzval, dst_node, nnz*elemsize, 0, NULL);
+	err = starpu_opencl_copy_async_sync(src_csr->nzval, src_node, 0, dst_csr->nzval, dst_node, 0, nnz*elemsize, NULL);
 	if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_async_sync((uintptr_t)src_csr->colind, src_node, (uintptr_t)dst_csr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL);
+	err = starpu_opencl_copy_async_sync((uintptr_t)src_csr->colind, src_node, 0, (uintptr_t)dst_csr->colind, dst_node, 0, nnz*sizeof(uint32_t), NULL);
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_async_sync((uintptr_t)src_csr->rowptr, src_node, (uintptr_t)dst_csr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL);
+	err = starpu_opencl_copy_async_sync((uintptr_t)src_csr->rowptr, src_node, 0, (uintptr_t)dst_csr->rowptr, dst_node, 0, (nrow+1)*sizeof(uint32_t), NULL);
 	if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 

+ 1 - 1
src/datawizard/interfaces/variable_interface.c

@@ -256,7 +256,7 @@ static int copy_opencl_common(void *src_interface, unsigned src_node,
 	struct starpu_variable_interface *dst_variable = dst_interface;
         int ret;
 
-        ret = starpu_opencl_copy_async_sync(src_variable->ptr, src_node, dst_variable->ptr, dst_node, src_variable->elemsize, 0, event);
+	ret = starpu_opencl_copy_async_sync(src_variable->ptr, src_node, 0, dst_variable->ptr, dst_node, 0, src_variable->elemsize, event);
 
 	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_variable->elemsize);
 

+ 3 - 3
src/datawizard/interfaces/vector_interface.c

@@ -304,9 +304,9 @@ static int copy_opencl_common(void *src_interface, unsigned src_node,
 	struct starpu_vector_interface *dst_vector = dst_interface;
 	int ret;
 
-	ret = starpu_opencl_copy_async_sync(src_vector->ptr, src_node, dst_vector->dev_handle, dst_node,
-					       src_vector->nx*src_vector->elemsize,
-					       dst_vector->offset, event);
+	ret = starpu_opencl_copy_async_sync(src_vector->dev_handle, src_node, src_vector->offset,
+					    dst_vector->dev_handle, dst_node, dst_vector->offset,
+					       src_vector->nx*src_vector->elemsize, event);
 
 	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_vector->nx*src_vector->elemsize);
 	return ret;

+ 10 - 10
src/drivers/opencl/driver_opencl.c

@@ -334,7 +334,7 @@ cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_
 	return err;
 }
 
-cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret)
+cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, size_t src_offset, cl_mem dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t dst_offset, size_t size, cl_event *event, int *ret)
 {
 	cl_int err;
 	struct _starpu_worker *worker = _starpu_get_local_worker_key();
@@ -342,7 +342,7 @@ cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node STARPU_
 	if (event)
 		_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
 	cl_event ev;
-	err = clEnqueueCopyBuffer(transfer_queues[worker->devid], src, dst, CL_FALSE, offset, size, 0, NULL, &ev);
+	err = clEnqueueCopyBuffer(transfer_queues[worker->devid], src, dst, src_offset, dst_offset, size, 0, NULL, &ev);
 	if (event)
 		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
 	if (STARPU_LIKELY(err == CL_SUCCESS))
@@ -371,7 +371,7 @@ cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node STARPU_
 }
 
 #ifdef STARPU_USE_OPENCL
-cl_int starpu_opencl_copy_async_sync(uintptr_t src, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size, size_t offset, cl_event *event)
+cl_int starpu_opencl_copy_async_sync(uintptr_t src, unsigned src_node, size_t src_offset, uintptr_t dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event)
 {
 	enum starpu_node_kind src_kind = starpu_node_get_kind(src_node);
 	enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node);
@@ -383,26 +383,26 @@ cl_int starpu_opencl_copy_async_sync(uintptr_t src, unsigned src_node, uintptr_t
 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_CPU_RAM):
 		err = starpu_opencl_copy_opencl_to_ram(
 				(cl_mem) src, src_node,
-				(void*) dst + offset, dst_node,
-				size, offset, event, &ret);
+				(void*) dst + dst_offset, dst_node,
+				size, src_offset, event, &ret);
 		if (STARPU_UNLIKELY(err))
 			STARPU_OPENCL_REPORT_ERROR(err);
 		return ret;
 
 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_OPENCL_RAM):
 		err = starpu_opencl_copy_ram_to_opencl(
-				(void*) src + offset, src_node,
+				(void*) src + src_offset, src_node,
 				(cl_mem) dst, dst_node,
-				size, offset, event, &ret);
+				size, dst_offset, event, &ret);
 		if (STARPU_UNLIKELY(err))
 			STARPU_OPENCL_REPORT_ERROR(err);
 		return ret;
 
 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_OPENCL_RAM):
 		err = starpu_opencl_copy_opencl_to_opencl(
-				(cl_mem) src, src_node,
-				(cl_mem) dst, dst_node,
-				size, offset, event, &ret);
+				(cl_mem) src, src_node, src_offset,
+				(cl_mem) dst, dst_node, dst_offset,
+				size, event, &ret);
 		if (STARPU_UNLIKELY(err))
 			STARPU_OPENCL_REPORT_ERROR(err);
 		return ret;