Explorar o código

The functions in the starpu_data_copy_methods structure now consider pairs of
(void *interface, unsigned node) rather than a handle and a (src, node) pair.
This will make it possible to use those transfer methods for data which are not
directly accessible from a (handle, node) pair.
The type of the last argument of the OpenCL async is not 'cl_event *' anymore
because public headers may be compiled from compilers which do not support
OpenCL headers (such as nvcc). As a workaround, we use a 'void *' type instead.

Cédric Augonnet %!s(int64=15) %!d(string=hai) anos
pai
achega
48129da2ff

+ 24 - 26
include/starpu_data_interfaces.h

@@ -19,7 +19,6 @@
 
 #include <starpu.h>
 #include <starpu_data.h>
-#include <starpu_opencl.h>
 
 #ifdef STARPU_USE_GORDON
 /* to get the gordon_strideSize_t data structure from gordon */
@@ -41,44 +40,43 @@ extern "C" {
  * different types of memory nodes */
 struct starpu_data_copy_methods {
 	/* src type is ram */
-	int (*ram_to_ram)(starpu_data_handle handle, uint32_t src, uint32_t dst);
-	int (*ram_to_cuda)(starpu_data_handle handle, uint32_t src, uint32_t dst);
-	int (*ram_to_opencl)(starpu_data_handle handle, uint32_t src, uint32_t dst);
-	int (*ram_to_spu)(starpu_data_handle handle, uint32_t src, uint32_t dst);
+	int (*ram_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*ram_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*ram_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*ram_to_spu)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 
 	/* src type is cuda */
-	int (*cuda_to_ram)(starpu_data_handle handle, uint32_t src, uint32_t dst);
-	int (*cuda_to_cuda)(starpu_data_handle handle, uint32_t src, uint32_t dst);
-	int (*cuda_to_opencl)(starpu_data_handle handle, uint32_t src, uint32_t dst);
-	int (*cuda_to_spu)(starpu_data_handle handle, uint32_t src, uint32_t dst);
+	int (*cuda_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*cuda_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*cuda_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*cuda_to_spu)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 
 	/* src type is spu */
-	int (*spu_to_ram)(starpu_data_handle handle, uint32_t src, uint32_t dst);
-	int (*spu_to_cuda)(starpu_data_handle handle, uint32_t src, uint32_t dst);
-	int (*spu_to_opencl)(starpu_data_handle handle, uint32_t src, uint32_t dst);
-	int (*spu_to_spu)(starpu_data_handle handle, uint32_t src, uint32_t dst);
+	int (*spu_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*spu_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*spu_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*spu_to_spu)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 
 	/* src type is opencl */
-	int (*opencl_to_ram)(starpu_data_handle handle, uint32_t src, uint32_t dst);
-	int (*opencl_to_cuda)(starpu_data_handle handle, uint32_t src, uint32_t dst);
-	int (*opencl_to_opencl)(starpu_data_handle handle, uint32_t src, uint32_t dst);
-	int (*opencl_to_spu)(starpu_data_handle handle, uint32_t src, uint32_t dst);
+	int (*opencl_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*opencl_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*opencl_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*opencl_to_spu)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 
 #ifdef STARPU_USE_CUDA
 	/* for asynchronous CUDA transfers */
-	int (*ram_to_cuda_async)(starpu_data_handle handle, uint32_t src,
-					uint32_t dst, cudaStream_t *stream);
-	int (*cuda_to_ram_async)(starpu_data_handle handle, uint32_t src,
-					uint32_t dst, cudaStream_t *stream);
-	int (*cuda_to_cuda_async)(starpu_data_handle handle, uint32_t src,
-					uint32_t dst, cudaStream_t *stream);
+	int (*ram_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
+	int (*cuda_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
+	int (*cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
 #endif
 
 #ifdef STARPU_USE_OPENCL
 	/* for asynchronous OpenCL transfers */
-        int (*ram_to_opencl_async)(starpu_data_handle handle, uint32_t src, uint32_t dst, cl_event *event);
-	int (*opencl_to_ram_async)(starpu_data_handle handle, uint32_t src, uint32_t dst, cl_event *event);
-	int (*opencl_to_opencl_async)(starpu_data_handle handle, uint32_t src, uint32_t dst, cl_event *event);
+	/* XXX we do not use a cl_event *event type for the last argument
+	 * because nvcc does not like when we have to include OpenCL headers */
+        int (*ram_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event);
+	int (*opencl_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event);
+	int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event);
 #endif
 };
 

+ 14 - 13
src/datawizard/copy_driver.c

@@ -80,8 +80,6 @@ static int copy_data_1_to_1_generic(starpu_data_handle handle, uint32_t src_node
 {
 	int ret = 0;
 
-	//ret = handle->ops->copy_data_1_to_1(handle, src_node, dst_node);
-
 	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
 
 	starpu_node_kind src_kind = _starpu_get_node_kind(src_node);
@@ -94,15 +92,18 @@ static int copy_data_1_to_1_generic(starpu_data_handle handle, uint32_t src_node
 	STARPU_ASSERT(handle->per_node[dst_node].allocated);
 
 #ifdef STARPU_USE_CUDA
-cudaError_t cures;
-cudaStream_t *stream;
+	cudaError_t cures;
+	cudaStream_t *stream;
 #endif
 
+	void *src_interface = starpu_data_get_interface_on_node(handle, src_node);
+	void *dst_interface = starpu_data_get_interface_on_node(handle, dst_node);
+
 	switch (MEMORY_NODE_TUPLE(src_kind,dst_kind)) {
       case MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CPU_RAM):
          /* STARPU_CPU_RAM -> STARPU_CPU_RAM */
          STARPU_ASSERT(copy_methods->ram_to_ram);
-         copy_methods->ram_to_ram(handle, src_node, dst_node);
+         copy_methods->ram_to_ram(src_interface, src_node, dst_interface, dst_node);
          break;
 #ifdef STARPU_USE_CUDA
       case MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CPU_RAM):
@@ -113,14 +114,14 @@ cudaStream_t *stream;
             STARPU_ASSERT(copy_methods->cuda_to_ram);
             if (!req || !copy_methods->cuda_to_ram_async) {
                /* this is not associated to a request so it's synchronous */
-               copy_methods->cuda_to_ram(handle, src_node, dst_node);
+               copy_methods->cuda_to_ram(src_interface, src_node, dst_interface, dst_node);
             }
             else {
                cures = cudaEventCreate(&req->async_channel.cuda_event);
                if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
 
                stream = starpu_cuda_get_local_stream();
-               ret = copy_methods->cuda_to_ram_async(handle, src_node, dst_node, stream);
+               ret = copy_methods->cuda_to_ram_async(src_interface, src_node, dst_interface, dst_node, stream);
 
                cures = cudaEventRecord(req->async_channel.cuda_event, *stream);
                if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
@@ -138,14 +139,14 @@ cudaStream_t *stream;
          STARPU_ASSERT(copy_methods->ram_to_cuda);
          if (!req || !copy_methods->ram_to_cuda_async) {
             /* this is not associated to a request so it's synchronous */
-            copy_methods->ram_to_cuda(handle, src_node, dst_node);
+            copy_methods->ram_to_cuda(src_interface, src_node, dst_interface, dst_node);
          }
          else {
             cures = cudaEventCreate(&req->async_channel.cuda_event);
             if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
 
             stream = starpu_cuda_get_local_stream();
-            ret = copy_methods->ram_to_cuda_async(handle, src_node, dst_node, stream);
+            ret = copy_methods->ram_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream);
 
             cures = cudaEventRecord(req->async_channel.cuda_event, *stream);
             if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
@@ -159,10 +160,10 @@ cudaStream_t *stream;
             STARPU_ASSERT(copy_methods->opencl_to_ram);
             if (!req || !copy_methods->opencl_to_ram_async) {
                /* this is not associated to a request so it's synchronous */
-               copy_methods->opencl_to_ram(handle, src_node, dst_node);
+               copy_methods->opencl_to_ram(src_interface, src_node, dst_interface, dst_node);
             }
             else {
-               ret = copy_methods->opencl_to_ram_async(handle, src_node, dst_node, &(req->async_channel.opencl_event));
+               ret = copy_methods->opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.opencl_event));
             }
          }
          else {
@@ -176,10 +177,10 @@ cudaStream_t *stream;
          STARPU_ASSERT(copy_methods->ram_to_opencl);
          if (!req || !copy_methods->ram_to_opencl_async) {
             /* this is not associated to a request so it's synchronous */
-            copy_methods->ram_to_opencl(handle, src_node, dst_node);
+            copy_methods->ram_to_opencl(src_interface, src_node, dst_interface, dst_node);
          }
          else {
-            ret = copy_methods->ram_to_opencl_async(handle, src_node, dst_node, &(req->async_channel.opencl_event));
+            ret = copy_methods->ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.opencl_event));
          }
          break;
 #endif

+ 20 - 35
src/datawizard/interfaces/bcsr_interface.c

@@ -31,14 +31,14 @@
  * BCSR : blocked CSR, we use blocks of size (r x c)
  */
 
-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #ifdef STARPU_USE_CUDA
-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #endif
 #ifdef STARPU_USE_OPENCL
-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #endif
 
 static const struct starpu_data_copy_methods bcsr_copy_data_methods_s = {
@@ -413,13 +413,10 @@ static void free_bcsr_buffer_on_node(void *interface, uint32_t node)
 }
 
 #ifdef STARPU_USE_CUDA
-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_bcsr_interface_t *src_bcsr;
-	starpu_bcsr_interface_t *dst_bcsr;
-
-	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
-	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_bcsr_interface_t *src_bcsr = src_interface;
+	starpu_bcsr_interface_t *dst_bcsr = dst_interface;
 
 	uint32_t nnz = src_bcsr->nnz;
 	uint32_t nrow = src_bcsr->nrow;
@@ -449,13 +446,10 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 	return 0;
 }
 
-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_bcsr_interface_t *src_bcsr;
-	starpu_bcsr_interface_t *dst_bcsr;
-
-	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
-	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_bcsr_interface_t *src_bcsr = src_interface;
+	starpu_bcsr_interface_t *dst_bcsr = dst_interface;
 
 	uint32_t nnz = src_bcsr->nnz;
 	uint32_t nrow = src_bcsr->nrow;
@@ -487,13 +481,10 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 #endif // STARPU_USE_CUDA
 
 #ifdef STARPU_USE_OPENCL
-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_bcsr_interface_t *src_bcsr;
-	starpu_bcsr_interface_t *dst_bcsr;
-
-	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
-	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_bcsr_interface_t *src_bcsr = src_interface;
+	starpu_bcsr_interface_t *dst_bcsr = dst_interface;
 
 	uint32_t nnz = src_bcsr->nnz;
 	uint32_t nrow = src_bcsr->nrow;
@@ -521,13 +512,10 @@ static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint
 	return 0;
 }
 
-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_bcsr_interface_t *src_bcsr;
-	starpu_bcsr_interface_t *dst_bcsr;
-
-	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
-	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_bcsr_interface_t *src_bcsr = src_interface;
+	starpu_bcsr_interface_t *dst_bcsr = dst_interface;
 
 	uint32_t nnz = src_bcsr->nnz;
 	uint32_t nrow = src_bcsr->nrow;
@@ -557,13 +545,10 @@ static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint
 #endif // STARPU_USE_OPENCL
 
 /* as not all platform easily have a BLAS lib installed ... */
-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_bcsr_interface_t *src_bcsr;
-	starpu_bcsr_interface_t *dst_bcsr;
-
-	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
-	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_bcsr_interface_t *src_bcsr = src_interface;
+	starpu_bcsr_interface_t *dst_bcsr = dst_interface;
 
 	uint32_t nnz = src_bcsr->nnz;
 	uint32_t nrow = src_bcsr->nrow;

+ 42 - 61
src/datawizard/interfaces/block_interface.c

@@ -27,18 +27,18 @@
 #include <drivers/opencl/driver_opencl.h>
 #endif
 
-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #ifdef STARPU_USE_CUDA
-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
 #endif
 #ifdef STARPU_USE_OPENCL
-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
 #endif
 
 static const struct starpu_data_copy_methods block_copy_data_methods_s = {
@@ -381,15 +381,12 @@ static void free_block_buffer_on_node(void *interface, uint32_t node)
 }
 
 #ifdef STARPU_USE_CUDA
-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
 	cudaError_t cures;
 
-	starpu_block_interface_t *src_block;
-	starpu_block_interface_t *dst_block;
-
-	src_block = starpu_data_get_interface_on_node(handle, src_node);
-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_block_interface_t *src_block = src_interface;
+	starpu_block_interface_t *dst_block = dst_interface;
 
 	uint32_t nx = src_block->nx;
 	uint32_t ny = src_block->ny;
@@ -429,13 +426,10 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 	return 0;
 }
 
-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
 {
-	starpu_block_interface_t *src_block;
-	starpu_block_interface_t *dst_block;
-
-	src_block = starpu_data_get_interface_on_node(handle, src_node);
-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_block_interface_t *src_block = src_interface;
+	starpu_block_interface_t *dst_block = dst_interface;
 
 	uint32_t nx = src_block->nx;
 	uint32_t ny = src_block->ny;
@@ -549,13 +543,10 @@ no_async_default:
 
 
 
-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
 {
-	starpu_block_interface_t *src_block;
-	starpu_block_interface_t *dst_block;
-
-	src_block = starpu_data_get_interface_on_node(handle, src_node);
-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_block_interface_t *src_block = src_interface;
+	starpu_block_interface_t *dst_block = dst_interface;
 
 	uint32_t nx = src_block->nx;
 	uint32_t ny = src_block->ny;
@@ -667,15 +658,12 @@ no_async_default:
 	}
 }
 
-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
 	cudaError_t cures;
 
-	starpu_block_interface_t *src_block;
-	starpu_block_interface_t *dst_block;
-
-	src_block = starpu_data_get_interface_on_node(handle, src_node);
-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_block_interface_t *src_block = src_interface;
+	starpu_block_interface_t *dst_block = dst_interface;
 
 	uint32_t nx = src_block->nx;
 	uint32_t ny = src_block->ny;
@@ -717,12 +705,12 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 #endif // STARPU_USE_CUDA
 
 #ifdef STARPU_USE_OPENCL
-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
-	starpu_block_interface_t *src_block;
-	starpu_block_interface_t *dst_block;
+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
+{
+	starpu_block_interface_t *src_block = src_interface;
+	starpu_block_interface_t *dst_block = dst_interface;
 
-	src_block = starpu_data_get_interface_on_node(handle, src_node);
-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
+	cl_event *event = _event;
 
 	int err = _starpu_opencl_copy_to_opencl((void*)src_block->ptr, (cl_mem)dst_block->dev_handle,
                                                 src_block->nx*src_block->ny*src_block->nz*src_block->elemsize,
@@ -736,12 +724,12 @@ static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node
 	return EAGAIN;
 }
 
-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
-	starpu_block_interface_t *src_block;
-	starpu_block_interface_t *dst_block;
+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
+{
+	starpu_block_interface_t *src_block = src_interface;
+	starpu_block_interface_t *dst_block = dst_interface;
 
-	src_block = starpu_data_get_interface_on_node(handle, src_node);
-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
+	cl_event *event = _event;
 
 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_block->dev_handle, (void*)dst_block->ptr,
                                                   src_block->nx*src_block->ny*src_block->nz*src_block->elemsize,
@@ -755,12 +743,10 @@ static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node
 	return EAGAIN;
 }
 
-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
-	starpu_block_interface_t *src_block;
-	starpu_block_interface_t *dst_block;
-
-	src_block = starpu_data_get_interface_on_node(handle, src_node);
-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	starpu_block_interface_t *src_block = src_interface;
+	starpu_block_interface_t *dst_block = dst_interface;
 
 	int err = _starpu_opencl_copy_to_opencl((void*)src_block->ptr, (cl_mem)dst_block->dev_handle,
                                                 src_block->nx*src_block->ny*src_block->nz*src_block->elemsize,
@@ -774,12 +760,10 @@ static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint
 	return 0;
 }
 
-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
-	starpu_block_interface_t *src_block;
-	starpu_block_interface_t *dst_block;
-
-	src_block = starpu_data_get_interface_on_node(handle, src_node);
-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	starpu_block_interface_t *src_block = src_interface;
+	starpu_block_interface_t *dst_block = dst_interface;
 
 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_block->dev_handle, (void*)dst_block->ptr,
                                                   src_block->nx*src_block->ny*src_block->nz*src_block->elemsize,
@@ -796,13 +780,10 @@ static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint
 #endif
 
 /* as not all platform easily have a BLAS lib installed ... */
-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_block_interface_t *src_block;
-	starpu_block_interface_t *dst_block;
-
-	src_block = starpu_data_get_interface_on_node(handle, src_node);
-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_block_interface_t *src_block = src_interface;
+	starpu_block_interface_t *dst_block = dst_interface;
 
 	uint32_t nx = dst_block->nx;
 	uint32_t ny = dst_block->ny;

+ 20 - 36
src/datawizard/interfaces/csr_interface.c

@@ -27,14 +27,14 @@
 #include <drivers/opencl/driver_opencl.h>
 #endif
 
-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #ifdef STARPU_USE_CUDA
-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #endif
 #ifdef STARPU_USE_OPENCL
-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #endif
 
 static const struct starpu_data_copy_methods csr_copy_data_methods_s = {
@@ -381,13 +381,10 @@ static void free_csr_buffer_on_node(void *interface, uint32_t node)
 }
 
 #ifdef STARPU_USE_CUDA
-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_csr_interface_t *src_csr;
-	starpu_csr_interface_t *dst_csr;
-
-	src_csr = starpu_data_get_interface_on_node(handle, src_node);
-	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_csr_interface_t *src_csr = src_interface;
+	starpu_csr_interface_t *dst_csr = dst_interface;
 
 	uint32_t nnz = src_csr->nnz;
 	uint32_t nrow = src_csr->nrow;
@@ -414,13 +411,10 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 	return 0;
 }
 
-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_csr_interface_t *src_csr;
-	starpu_csr_interface_t *dst_csr;
-
-	src_csr = starpu_data_get_interface_on_node(handle, src_node);
-	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_csr_interface_t *src_csr = src_interface;
+	starpu_csr_interface_t *dst_csr = dst_interface;
 
 	uint32_t nnz = src_csr->nnz;
 	uint32_t nrow = src_csr->nrow;
@@ -449,13 +443,10 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 #endif // STARPU_USE_CUDA
 
 #ifdef STARPU_USE_OPENCL
-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_csr_interface_t *src_csr;
-	starpu_csr_interface_t *dst_csr;
-
-	src_csr = starpu_data_get_interface_on_node(handle, src_node);
-	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_csr_interface_t *src_csr = src_interface;
+	starpu_csr_interface_t *dst_csr = dst_interface;
 
 	uint32_t nnz = src_csr->nnz;
 	uint32_t nrow = src_csr->nrow;
@@ -480,13 +471,10 @@ static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint
 	return 0;
 }
 
-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_csr_interface_t *src_csr;
-	starpu_csr_interface_t *dst_csr;
-
-	src_csr = starpu_data_get_interface_on_node(handle, src_node);
-	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_csr_interface_t *src_csr = src_interface;
+	starpu_csr_interface_t *dst_csr = dst_interface;
 
 	uint32_t nnz = src_csr->nnz;
 	uint32_t nrow = src_csr->nrow;
@@ -513,14 +501,10 @@ static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint
 #endif // STARPU_USE_OPENCL
 
 /* as not all platform easily have a BLAS lib installed ... */
-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-
-	starpu_csr_interface_t *src_csr;
-	starpu_csr_interface_t *dst_csr;
-
-	src_csr = starpu_data_get_interface_on_node(handle, src_node);
-	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_csr_interface_t *src_csr = src_interface;
+	starpu_csr_interface_t *dst_csr = dst_interface;
 
 	uint32_t nnz = src_csr->nnz;
 	uint32_t nrow = src_csr->nrow;

+ 42 - 61
src/datawizard/interfaces/matrix_interface.c

@@ -31,18 +31,18 @@
 #include <drivers/opencl/driver_opencl.h>
 #endif
 
-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #ifdef STARPU_USE_CUDA
-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
 #endif
 #ifdef STARPU_USE_OPENCL
-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
 #endif
 
 static const struct starpu_data_copy_methods matrix_copy_data_methods_s = {
@@ -356,13 +356,10 @@ static void free_matrix_buffer_on_node(void *interface, uint32_t node)
 }
 
 #ifdef STARPU_USE_CUDA
-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_matrix_interface_t *src_matrix;
-	starpu_matrix_interface_t *dst_matrix;
-
-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_matrix_interface_t *src_matrix = src_interface;
+	starpu_matrix_interface_t *dst_matrix = dst_interface;
 
 	size_t elemsize = src_matrix->elemsize;
 
@@ -378,13 +375,10 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 	return 0;
 }
 
-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_matrix_interface_t *src_matrix;
-	starpu_matrix_interface_t *dst_matrix;
-
-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_matrix_interface_t *src_matrix = src_interface;
+	starpu_matrix_interface_t *dst_matrix = dst_interface;
 
 	size_t elemsize = src_matrix->elemsize;
 
@@ -404,13 +398,10 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 	return 0;
 }
 
-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
 {
-	starpu_matrix_interface_t *src_matrix;
-	starpu_matrix_interface_t *dst_matrix;
-
-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_matrix_interface_t *src_matrix = src_interface;
+	starpu_matrix_interface_t *dst_matrix = dst_interface;
 
 	size_t elemsize = src_matrix->elemsize;
 
@@ -442,13 +433,10 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
 	return EAGAIN;
 }
 
-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
 {
-	starpu_matrix_interface_t *src_matrix;
-	starpu_matrix_interface_t *dst_matrix;
-
-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_matrix_interface_t *src_matrix = src_interface;
+	starpu_matrix_interface_t *dst_matrix = dst_interface;
 
 	size_t elemsize = src_matrix->elemsize;
 
@@ -478,12 +466,12 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 #endif // STARPU_USE_CUDA
 
 #ifdef STARPU_USE_OPENCL
-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
-	starpu_matrix_interface_t *src_matrix;
-	starpu_matrix_interface_t *dst_matrix;
+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
+{
+	starpu_matrix_interface_t *src_matrix = src_interface;
+	starpu_matrix_interface_t *dst_matrix = dst_interface;
 
-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
+	cl_event *event = _event;
 
 	int err = _starpu_opencl_copy_to_opencl((void*)src_matrix->ptr, (cl_mem)dst_matrix->dev_handle, src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
                                                 dst_matrix->offset, event);
@@ -496,12 +484,12 @@ static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node
 	return EAGAIN;
 }
 
-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
-	starpu_matrix_interface_t *src_matrix;
-	starpu_matrix_interface_t *dst_matrix;
+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
+{
+	starpu_matrix_interface_t *src_matrix = src_interface;
+	starpu_matrix_interface_t *dst_matrix = dst_interface;
 
-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
+	cl_event *event = _event;
 
 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_matrix->dev_handle, (void*)dst_matrix->ptr,
                                                   src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
@@ -515,12 +503,10 @@ static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node
 	return EAGAIN;
 }
 
-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
-	starpu_matrix_interface_t *src_matrix;
-	starpu_matrix_interface_t *dst_matrix;
-
-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	starpu_matrix_interface_t *src_matrix = src_interface;
+	starpu_matrix_interface_t *dst_matrix = dst_interface;
 
 	int err = _starpu_opencl_copy_to_opencl((void*)src_matrix->ptr, (cl_mem)dst_matrix->dev_handle, src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
                                                 dst_matrix->offset, NULL);
@@ -533,12 +519,10 @@ static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint
 	return 0;
 }
 
-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
-	starpu_matrix_interface_t *src_matrix;
-	starpu_matrix_interface_t *dst_matrix;
-
-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	starpu_matrix_interface_t *src_matrix = src_interface;
+	starpu_matrix_interface_t *dst_matrix = dst_interface;
 
 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_matrix->dev_handle, (void*)dst_matrix->ptr,
                                                   src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
@@ -555,13 +539,10 @@ static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint
 #endif
 
 /* as not all platform easily have a  lib installed ... */
-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_matrix_interface_t *src_matrix;
-	starpu_matrix_interface_t *dst_matrix;
-
-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_matrix_interface_t *src_matrix = src_interface;
+	starpu_matrix_interface_t *dst_matrix = dst_interface;
 
 	unsigned y;
 	uint32_t nx = dst_matrix->nx;

+ 42 - 61
src/datawizard/interfaces/variable_interface.c

@@ -31,18 +31,18 @@
 #include <drivers/opencl/driver_opencl.h>
 #endif
 
-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #ifdef STARPU_USE_CUDA
-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
 #endif
 #ifdef STARPU_USE_OPENCL
-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
 #endif
 
 static const struct starpu_data_copy_methods variable_copy_data_methods_s = {
@@ -270,13 +270,10 @@ static void free_variable_buffer_on_node(void *interface, uint32_t node)
 }
 
 #ifdef STARPU_USE_CUDA
-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_variable_interface_t *src_variable;
-	starpu_variable_interface_t *dst_variable;
-
-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_variable_interface_t *src_variable = src_interface;
+	starpu_variable_interface_t *dst_variable = dst_interface;
 
 	cudaError_t cures;
 	cures = cudaMemcpy((char *)dst_variable->ptr, (char *)src_variable->ptr, src_variable->elemsize, cudaMemcpyDeviceToHost);
@@ -290,13 +287,10 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 	return 0;
 }
 
-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_variable_interface_t *src_variable;
-	starpu_variable_interface_t *dst_variable;
-
-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_variable_interface_t *src_variable = src_interface;
+	starpu_variable_interface_t *dst_variable = dst_interface;
 
 	cudaError_t cures;
 	cures = cudaMemcpy((char *)dst_variable->ptr, (char *)src_variable->ptr, src_variable->elemsize, cudaMemcpyHostToDevice);
@@ -310,13 +304,10 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 	return 0;
 }
 
-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
 {
-	starpu_variable_interface_t *src_variable;
-	starpu_variable_interface_t *dst_variable;
-
-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_variable_interface_t *src_variable = src_interface;
+	starpu_variable_interface_t *dst_variable = dst_interface;
 
 	cudaError_t cures;
 	cures = cudaMemcpyAsync((char *)dst_variable->ptr, (char *)src_variable->ptr, src_variable->elemsize, cudaMemcpyDeviceToHost, *stream);
@@ -337,13 +328,10 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
 	return EAGAIN;
 }
 
-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
 {
-	starpu_variable_interface_t *src_variable;
-	starpu_variable_interface_t *dst_variable;
-
-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_variable_interface_t *src_variable = src_interface;
+	starpu_variable_interface_t *dst_variable = dst_interface;
 
 	cudaError_t cures;
 	
@@ -369,12 +357,12 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 #endif // STARPU_USE_CUDA
 
 #ifdef STARPU_USE_OPENCL
-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
-	starpu_variable_interface_t *src_variable;
-	starpu_variable_interface_t *dst_variable;
+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
+{
+	starpu_variable_interface_t *src_variable = src_interface;
+	starpu_variable_interface_t *dst_variable = dst_interface;
 
-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+	cl_event *event = _event;
 
 	int err = _starpu_opencl_copy_to_opencl((void*)src_variable->ptr, (cl_mem)dst_variable->ptr, src_variable->elemsize,
                                                 0, event);
@@ -387,12 +375,12 @@ static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node
 	return EAGAIN;
 }
 
-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
-	starpu_variable_interface_t *src_variable;
-	starpu_variable_interface_t *dst_variable;
+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
+{
+	starpu_variable_interface_t *src_variable = src_interface;
+	starpu_variable_interface_t *dst_variable = dst_interface;
 
-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+	cl_event *event = _event;
 
 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_variable->ptr, (void*)dst_variable->ptr, src_variable->elemsize,
                                                   0, event);
@@ -405,12 +393,10 @@ static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node
 	return EAGAIN;
 }
 
-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
-	starpu_variable_interface_t *src_variable;
-	starpu_variable_interface_t *dst_variable;
-
-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	starpu_variable_interface_t *src_variable = src_interface;
+	starpu_variable_interface_t *dst_variable = dst_interface;
 
 	int err = _starpu_opencl_copy_to_opencl((void*)src_variable->ptr, (cl_mem)dst_variable->ptr, src_variable->elemsize,
                                                 0, NULL);
@@ -423,12 +409,10 @@ static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint
 	return 0;
 }
 
-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
-	starpu_variable_interface_t *src_variable;
-	starpu_variable_interface_t *dst_variable;
-
-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	starpu_variable_interface_t *src_variable = src_interface;
+	starpu_variable_interface_t *dst_variable = dst_interface;
 
 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_variable->ptr, (void*)dst_variable->ptr, src_variable->elemsize,
                                                   0, NULL);
@@ -443,13 +427,10 @@ static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint
 
 #endif
 
-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_variable_interface_t *src_variable;
-	starpu_variable_interface_t *dst_variable;
-
-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_variable_interface_t *src_variable = src_interface;
+	starpu_variable_interface_t *dst_variable = dst_interface;
 
 	size_t elemsize = dst_variable->elemsize;
 

+ 42 - 61
src/datawizard/interfaces/vector_interface.c

@@ -30,18 +30,18 @@
 #include <drivers/opencl/driver_opencl.h>
 #endif
 
-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #ifdef STARPU_USE_CUDA
-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
 #endif
 #ifdef STARPU_USE_OPENCL
-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
 #endif
 
 static const struct starpu_data_copy_methods vector_copy_data_methods_s = {
@@ -305,13 +305,10 @@ static void free_vector_buffer_on_node(void *interface, uint32_t node)
 }
 
 #ifdef STARPU_USE_CUDA
-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_vector_interface_t *src_vector;
-	starpu_vector_interface_t *dst_vector;
-
-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_vector_interface_t *src_vector = src_interface;
+	starpu_vector_interface_t *dst_vector = dst_interface;
 
 	cudaError_t cures;
 	cures = cudaMemcpy((char *)dst_vector->ptr, (char *)src_vector->ptr, src_vector->nx*src_vector->elemsize, cudaMemcpyDeviceToHost);
@@ -325,13 +322,10 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 	return 0;
 }
 
-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_vector_interface_t *src_vector;
-	starpu_vector_interface_t *dst_vector;
-
-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_vector_interface_t *src_vector = src_interface;
+	starpu_vector_interface_t *dst_vector = dst_interface;
 
 	cudaError_t cures;
 	cures = cudaMemcpy((char *)dst_vector->ptr, (char *)src_vector->ptr, src_vector->nx*src_vector->elemsize, cudaMemcpyHostToDevice);
@@ -345,13 +339,10 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 	return 0;
 }
 
-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
 {
-	starpu_vector_interface_t *src_vector;
-	starpu_vector_interface_t *dst_vector;
-
-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_vector_interface_t *src_vector = src_interface;
+	starpu_vector_interface_t *dst_vector = dst_interface;
 
 	cudaError_t cures;
 	cures = cudaMemcpyAsync((char *)dst_vector->ptr, (char *)src_vector->ptr, src_vector->nx*src_vector->elemsize, cudaMemcpyDeviceToHost, *stream);
@@ -372,13 +363,10 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
 	return EAGAIN;
 }
 
-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
 {
-	starpu_vector_interface_t *src_vector;
-	starpu_vector_interface_t *dst_vector;
-
-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_vector_interface_t *src_vector = src_interface;
+	starpu_vector_interface_t *dst_vector = dst_interface;
 
 	cudaError_t cures;
 	
@@ -403,12 +391,12 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 
 #endif // STARPU_USE_CUDA
 #ifdef STARPU_USE_OPENCL
-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
-	starpu_vector_interface_t *src_vector;
-	starpu_vector_interface_t *dst_vector;
+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
+{
+	starpu_vector_interface_t *src_vector = src_interface;
+	starpu_vector_interface_t *dst_vector = dst_interface;
 
-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
+	cl_event *event = _event;
 
 	int err = _starpu_opencl_copy_to_opencl((void*)src_vector->ptr, (cl_mem)dst_vector->dev_handle, src_vector->nx*src_vector->elemsize,
                                                 dst_vector->offset, event);
@@ -421,12 +409,12 @@ static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node
 	return EAGAIN;
 }
 
-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
-	starpu_vector_interface_t *src_vector;
-	starpu_vector_interface_t *dst_vector;
+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
+{
+	starpu_vector_interface_t *src_vector = src_interface;
+	starpu_vector_interface_t *dst_vector = dst_interface;
 
-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
+	cl_event *event = _event;
 
 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_vector->dev_handle, (void*)dst_vector->ptr, src_vector->nx*src_vector->elemsize,
                                                   src_vector->offset, event);
@@ -439,12 +427,10 @@ static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node
 	return EAGAIN;
 }
 
-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
-	starpu_vector_interface_t *src_vector;
-	starpu_vector_interface_t *dst_vector;
-
-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	starpu_vector_interface_t *src_vector = src_interface;
+	starpu_vector_interface_t *dst_vector = dst_interface;
 
 	int err = _starpu_opencl_copy_to_opencl((void*)src_vector->ptr, (cl_mem)dst_vector->dev_handle, src_vector->nx*src_vector->elemsize,
                                                 dst_vector->offset, NULL);
@@ -457,12 +443,10 @@ static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint
 	return 0;
 }
 
-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
-	starpu_vector_interface_t *src_vector;
-	starpu_vector_interface_t *dst_vector;
-
-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	starpu_vector_interface_t *src_vector = src_interface;
+	starpu_vector_interface_t *dst_vector = dst_interface;
 
 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_vector->dev_handle, (void*)dst_vector->ptr, src_vector->nx*src_vector->elemsize,
                                                   src_vector->offset, NULL);
@@ -477,13 +461,10 @@ static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint
 
 #endif
 
-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	starpu_vector_interface_t *src_vector;
-	starpu_vector_interface_t *dst_vector;
-
-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
+	starpu_vector_interface_t *src_vector = src_interface;
+	starpu_vector_interface_t *dst_vector = dst_interface;
 
 	uint32_t nx = dst_vector->nx;
 	size_t elemsize = dst_vector->elemsize;