15 years ago · 48129da2ff
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
@@ -19,7 +19,6 @@
 
				 
			
 
				 #include <starpu.h>
			
 
				 #include <starpu_data.h>
			
 
				-#include <starpu_opencl.h>
			
 
				 
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				 /* to get the gordon_strideSize_t data structure from gordon */
			
@@ -41,44 +40,43 @@ extern "C" {
 
				  * different types of memory nodes */
			
 
				 struct starpu_data_copy_methods {
			
 
				 	/* src type is ram */
			
 
				-	int (*ram_to_ram)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				-	int (*ram_to_cuda)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				-	int (*ram_to_opencl)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				-	int (*ram_to_spu)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				+	int (*ram_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*ram_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*ram_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*ram_to_spu)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				 	/* src type is cuda */
			
 
				-	int (*cuda_to_ram)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				-	int (*cuda_to_cuda)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				-	int (*cuda_to_opencl)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				-	int (*cuda_to_spu)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				+	int (*cuda_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*cuda_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*cuda_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*cuda_to_spu)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				 	/* src type is spu */
			
 
				-	int (*spu_to_ram)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				-	int (*spu_to_cuda)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				-	int (*spu_to_opencl)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				-	int (*spu_to_spu)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				+	int (*spu_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*spu_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*spu_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*spu_to_spu)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				 	/* src type is opencl */
			
 
				-	int (*opencl_to_ram)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				-	int (*opencl_to_cuda)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				-	int (*opencl_to_opencl)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				-	int (*opencl_to_spu)(starpu_data_handle handle, uint32_t src, uint32_t dst);
			
 
				+	int (*opencl_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*opencl_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*opencl_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*opencl_to_spu)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	/* for asynchronous CUDA transfers */
			
 
				-	int (*ram_to_cuda_async)(starpu_data_handle handle, uint32_t src,
			
 
				-					uint32_t dst, cudaStream_t *stream);
			
 
				-	int (*cuda_to_ram_async)(starpu_data_handle handle, uint32_t src,
			
 
				-					uint32_t dst, cudaStream_t *stream);
			
 
				-	int (*cuda_to_cuda_async)(starpu_data_handle handle, uint32_t src,
			
 
				-					uint32_t dst, cudaStream_t *stream);
			
 
				+	int (*ram_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
			
 
				+	int (*cuda_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
			
 
				+	int (*cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
			
 
				 #endif
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 	/* for asynchronous OpenCL transfers */
			
 
				-        int (*ram_to_opencl_async)(starpu_data_handle handle, uint32_t src, uint32_t dst, cl_event *event);
			
 
				-	int (*opencl_to_ram_async)(starpu_data_handle handle, uint32_t src, uint32_t dst, cl_event *event);
			
 
				-	int (*opencl_to_opencl_async)(starpu_data_handle handle, uint32_t src, uint32_t dst, cl_event *event);
			
 
				+	/* XXX we do not use a cl_event *event type for the last argument
			
 
				+	 * because nvcc does not like when we have to include OpenCL headers */
			
 
				+        int (*ram_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event);
			
 
				+	int (*opencl_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event);
			
 
				+	int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event);
			
 
				 #endif
			
 
				 };
			
 
				 
			
--- a/src/datawizard/copy_driver.c
+++ b/src/datawizard/copy_driver.c
@@ -80,8 +80,6 @@ static int copy_data_1_to_1_generic(starpu_data_handle handle, uint32_t src_node
 
				 {
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	//ret = handle->ops->copy_data_1_to_1(handle, src_node, dst_node);
			
 
				-
			
 
				 	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				 
			
 
				 	starpu_node_kind src_kind = _starpu_get_node_kind(src_node);
			
@@ -94,15 +92,18 @@ static int copy_data_1_to_1_generic(starpu_data_handle handle, uint32_t src_node
 
				 	STARPU_ASSERT(handle->per_node[dst_node].allocated);
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-cudaError_t cures;
			
 
				-cudaStream_t *stream;
			
 
				+	cudaError_t cures;
			
 
				+	cudaStream_t *stream;
			
 
				 #endif
			
 
				 
			
 
				+	void *src_interface = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	void *dst_interface = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+
			
 
				 	switch (MEMORY_NODE_TUPLE(src_kind,dst_kind)) {
			
 
				       case MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CPU_RAM):
			
 
				          /* STARPU_CPU_RAM -> STARPU_CPU_RAM */
			
 
				          STARPU_ASSERT(copy_methods->ram_to_ram);
			
 
				-         copy_methods->ram_to_ram(handle, src_node, dst_node);
			
 
				+         copy_methods->ram_to_ram(src_interface, src_node, dst_interface, dst_node);
			
 
				          break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				       case MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CPU_RAM):
			
@@ -113,14 +114,14 @@ cudaStream_t *stream;
 
				             STARPU_ASSERT(copy_methods->cuda_to_ram);
			
 
				             if (!req || !copy_methods->cuda_to_ram_async) {
			
 
				                /* this is not associated to a request so it's synchronous */
			
 
				-               copy_methods->cuda_to_ram(handle, src_node, dst_node);
			
 
				+               copy_methods->cuda_to_ram(src_interface, src_node, dst_interface, dst_node);
			
 
				             }
			
 
				             else {
			
 
				                cures = cudaEventCreate(&req->async_channel.cuda_event);
			
 
				                if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				                stream = starpu_cuda_get_local_stream();
			
 
				-               ret = copy_methods->cuda_to_ram_async(handle, src_node, dst_node, stream);
			
 
				+               ret = copy_methods->cuda_to_ram_async(src_interface, src_node, dst_interface, dst_node, stream);
			
 
				 
			
 
				                cures = cudaEventRecord(req->async_channel.cuda_event, *stream);
			
 
				                if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
			
@@ -138,14 +139,14 @@ cudaStream_t *stream;
 
				          STARPU_ASSERT(copy_methods->ram_to_cuda);
			
 
				          if (!req || !copy_methods->ram_to_cuda_async) {
			
 
				             /* this is not associated to a request so it's synchronous */
			
 
				-            copy_methods->ram_to_cuda(handle, src_node, dst_node);
			
 
				+            copy_methods->ram_to_cuda(src_interface, src_node, dst_interface, dst_node);
			
 
				          }
			
 
				          else {
			
 
				             cures = cudaEventCreate(&req->async_channel.cuda_event);
			
 
				             if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				             stream = starpu_cuda_get_local_stream();
			
 
				-            ret = copy_methods->ram_to_cuda_async(handle, src_node, dst_node, stream);
			
 
				+            ret = copy_methods->ram_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream);
			
 
				 
			
 
				             cures = cudaEventRecord(req->async_channel.cuda_event, *stream);
			
 
				             if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
			
@@ -159,10 +160,10 @@ cudaStream_t *stream;
 
				             STARPU_ASSERT(copy_methods->opencl_to_ram);
			
 
				             if (!req || !copy_methods->opencl_to_ram_async) {
			
 
				                /* this is not associated to a request so it's synchronous */
			
 
				-               copy_methods->opencl_to_ram(handle, src_node, dst_node);
			
 
				+               copy_methods->opencl_to_ram(src_interface, src_node, dst_interface, dst_node);
			
 
				             }
			
 
				             else {
			
 
				-               ret = copy_methods->opencl_to_ram_async(handle, src_node, dst_node, &(req->async_channel.opencl_event));
			
 
				+               ret = copy_methods->opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.opencl_event));
			
 
				             }
			
 
				          }
			
 
				          else {
			
@@ -176,10 +177,10 @@ cudaStream_t *stream;
 
				          STARPU_ASSERT(copy_methods->ram_to_opencl);
			
 
				          if (!req || !copy_methods->ram_to_opencl_async) {
			
 
				             /* this is not associated to a request so it's synchronous */
			
 
				-            copy_methods->ram_to_opencl(handle, src_node, dst_node);
			
 
				+            copy_methods->ram_to_opencl(src_interface, src_node, dst_interface, dst_node);
			
 
				          }
			
 
				          else {
			
 
				-            ret = copy_methods->ram_to_opencl_async(handle, src_node, dst_node, &(req->async_channel.opencl_event));
			
 
				+            ret = copy_methods->ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.opencl_event));
			
 
				          }
			
 
				          break;
			
 
				 #endif
			
--- a/src/datawizard/interfaces/bcsr_interface.c
+++ b/src/datawizard/interfaces/bcsr_interface.c
@@ -31,14 +31,14 @@
 
				  * BCSR : blocked CSR, we use blocks of size (r x c)
			
 
				  */
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 #endif
			
 
				 
			
 
				 static const struct starpu_data_copy_methods bcsr_copy_data_methods_s = {
			
@@ -413,13 +413,10 @@ static void free_bcsr_buffer_on_node(void *interface, uint32_t node)
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_bcsr_interface_t *src_bcsr;
			
 
				-	starpu_bcsr_interface_t *dst_bcsr;
			
 
				-
			
 
				-	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_bcsr_interface_t *src_bcsr = src_interface;
			
 
				+	starpu_bcsr_interface_t *dst_bcsr = dst_interface;
			
 
				 
			
 
				 	uint32_t nnz = src_bcsr->nnz;
			
 
				 	uint32_t nrow = src_bcsr->nrow;
			
@@ -449,13 +446,10 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_bcsr_interface_t *src_bcsr;
			
 
				-	starpu_bcsr_interface_t *dst_bcsr;
			
 
				-
			
 
				-	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_bcsr_interface_t *src_bcsr = src_interface;
			
 
				+	starpu_bcsr_interface_t *dst_bcsr = dst_interface;
			
 
				 
			
 
				 	uint32_t nnz = src_bcsr->nnz;
			
 
				 	uint32_t nrow = src_bcsr->nrow;
			
@@ -487,13 +481,10 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 
				 #endif // STARPU_USE_CUDA
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_bcsr_interface_t *src_bcsr;
			
 
				-	starpu_bcsr_interface_t *dst_bcsr;
			
 
				-
			
 
				-	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_bcsr_interface_t *src_bcsr = src_interface;
			
 
				+	starpu_bcsr_interface_t *dst_bcsr = dst_interface;
			
 
				 
			
 
				 	uint32_t nnz = src_bcsr->nnz;
			
 
				 	uint32_t nrow = src_bcsr->nrow;
			
@@ -521,13 +512,10 @@ static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_bcsr_interface_t *src_bcsr;
			
 
				-	starpu_bcsr_interface_t *dst_bcsr;
			
 
				-
			
 
				-	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_bcsr_interface_t *src_bcsr = src_interface;
			
 
				+	starpu_bcsr_interface_t *dst_bcsr = dst_interface;
			
 
				 
			
 
				 	uint32_t nnz = src_bcsr->nnz;
			
 
				 	uint32_t nrow = src_bcsr->nrow;
			
@@ -557,13 +545,10 @@ static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint
 
				 #endif // STARPU_USE_OPENCL
			
 
				 
			
 
				 /* as not all platform easily have a BLAS lib installed ... */
			
 
				-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_bcsr_interface_t *src_bcsr;
			
 
				-	starpu_bcsr_interface_t *dst_bcsr;
			
 
				-
			
 
				-	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_bcsr_interface_t *src_bcsr = src_interface;
			
 
				+	starpu_bcsr_interface_t *dst_bcsr = dst_interface;
			
 
				 
			
 
				 	uint32_t nnz = src_bcsr->nnz;
			
 
				 	uint32_t nrow = src_bcsr->nrow;
			
--- a/src/datawizard/interfaces/block_interface.c
+++ b/src/datawizard/interfaces/block_interface.c
@@ -27,18 +27,18 @@
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				 #endif
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
			
 
				+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
			
 
				-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
			
 
				+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
			
 
				 #endif
			
 
				 
			
 
				 static const struct starpu_data_copy_methods block_copy_data_methods_s = {
			
@@ -381,15 +381,12 @@ static void free_block_buffer_on_node(void *interface, uint32_t node)
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				 	cudaError_t cures;
			
 
				 
			
 
				-	starpu_block_interface_t *src_block;
			
 
				-	starpu_block_interface_t *dst_block;
			
 
				-
			
 
				-	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_block_interface_t *src_block = src_interface;
			
 
				+	starpu_block_interface_t *dst_block = dst_interface;
			
 
				 
			
 
				 	uint32_t nx = src_block->nx;
			
 
				 	uint32_t ny = src_block->ny;
			
@@ -429,13 +426,10 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				-	starpu_block_interface_t *src_block;
			
 
				-	starpu_block_interface_t *dst_block;
			
 
				-
			
 
				-	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_block_interface_t *src_block = src_interface;
			
 
				+	starpu_block_interface_t *dst_block = dst_interface;
			
 
				 
			
 
				 	uint32_t nx = src_block->nx;
			
 
				 	uint32_t ny = src_block->ny;
			
@@ -549,13 +543,10 @@ no_async_default:
 
				 
			
 
				 
			
 
				 
			
 
				-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				-	starpu_block_interface_t *src_block;
			
 
				-	starpu_block_interface_t *dst_block;
			
 
				-
			
 
				-	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_block_interface_t *src_block = src_interface;
			
 
				+	starpu_block_interface_t *dst_block = dst_interface;
			
 
				 
			
 
				 	uint32_t nx = src_block->nx;
			
 
				 	uint32_t ny = src_block->ny;
			
@@ -667,15 +658,12 @@ no_async_default:
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				 	cudaError_t cures;
			
 
				 
			
 
				-	starpu_block_interface_t *src_block;
			
 
				-	starpu_block_interface_t *dst_block;
			
 
				-
			
 
				-	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_block_interface_t *src_block = src_interface;
			
 
				+	starpu_block_interface_t *dst_block = dst_interface;
			
 
				 
			
 
				 	uint32_t nx = src_block->nx;
			
 
				 	uint32_t ny = src_block->ny;
			
@@ -717,12 +705,12 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 
				 #endif // STARPU_USE_CUDA
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
			
 
				-	starpu_block_interface_t *src_block;
			
 
				-	starpu_block_interface_t *dst_block;
			
 
				+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
			
 
				+{
			
 
				+	starpu_block_interface_t *src_block = src_interface;
			
 
				+	starpu_block_interface_t *dst_block = dst_interface;
			
 
				 
			
 
				-	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	cl_event *event = _event;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_to_opencl((void*)src_block->ptr, (cl_mem)dst_block->dev_handle,
			
 
				                                                 src_block->nx*src_block->ny*src_block->nz*src_block->elemsize,
			
@@ -736,12 +724,12 @@ static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
			
 
				-	starpu_block_interface_t *src_block;
			
 
				-	starpu_block_interface_t *dst_block;
			
 
				+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
			
 
				+{
			
 
				+	starpu_block_interface_t *src_block = src_interface;
			
 
				+	starpu_block_interface_t *dst_block = dst_interface;
			
 
				 
			
 
				-	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	cl_event *event = _event;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_block->dev_handle, (void*)dst_block->ptr,
			
 
				                                                   src_block->nx*src_block->ny*src_block->nz*src_block->elemsize,
			
@@ -755,12 +743,10 @@ static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
			
 
				-	starpu_block_interface_t *src_block;
			
 
				-	starpu_block_interface_t *dst_block;
			
 
				-
			
 
				-	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	starpu_block_interface_t *src_block = src_interface;
			
 
				+	starpu_block_interface_t *dst_block = dst_interface;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_to_opencl((void*)src_block->ptr, (cl_mem)dst_block->dev_handle,
			
 
				                                                 src_block->nx*src_block->ny*src_block->nz*src_block->elemsize,
			
@@ -774,12 +760,10 @@ static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
			
 
				-	starpu_block_interface_t *src_block;
			
 
				-	starpu_block_interface_t *dst_block;
			
 
				-
			
 
				-	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	starpu_block_interface_t *src_block = src_interface;
			
 
				+	starpu_block_interface_t *dst_block = dst_interface;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_block->dev_handle, (void*)dst_block->ptr,
			
 
				                                                   src_block->nx*src_block->ny*src_block->nz*src_block->elemsize,
			
@@ -796,13 +780,10 @@ static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint
 
				 #endif
			
 
				 
			
 
				 /* as not all platform easily have a BLAS lib installed ... */
			
 
				-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_block_interface_t *src_block;
			
 
				-	starpu_block_interface_t *dst_block;
			
 
				-
			
 
				-	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_block_interface_t *src_block = src_interface;
			
 
				+	starpu_block_interface_t *dst_block = dst_interface;
			
 
				 
			
 
				 	uint32_t nx = dst_block->nx;
			
 
				 	uint32_t ny = dst_block->ny;
			
--- a/src/datawizard/interfaces/csr_interface.c
+++ b/src/datawizard/interfaces/csr_interface.c
@@ -27,14 +27,14 @@
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				 #endif
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 #endif
			
 
				 
			
 
				 static const struct starpu_data_copy_methods csr_copy_data_methods_s = {
			
@@ -381,13 +381,10 @@ static void free_csr_buffer_on_node(void *interface, uint32_t node)
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_csr_interface_t *src_csr;
			
 
				-	starpu_csr_interface_t *dst_csr;
			
 
				-
			
 
				-	src_csr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_csr_interface_t *src_csr = src_interface;
			
 
				+	starpu_csr_interface_t *dst_csr = dst_interface;
			
 
				 
			
 
				 	uint32_t nnz = src_csr->nnz;
			
 
				 	uint32_t nrow = src_csr->nrow;
			
@@ -414,13 +411,10 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_csr_interface_t *src_csr;
			
 
				-	starpu_csr_interface_t *dst_csr;
			
 
				-
			
 
				-	src_csr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_csr_interface_t *src_csr = src_interface;
			
 
				+	starpu_csr_interface_t *dst_csr = dst_interface;
			
 
				 
			
 
				 	uint32_t nnz = src_csr->nnz;
			
 
				 	uint32_t nrow = src_csr->nrow;
			
@@ -449,13 +443,10 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 
				 #endif // STARPU_USE_CUDA
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_csr_interface_t *src_csr;
			
 
				-	starpu_csr_interface_t *dst_csr;
			
 
				-
			
 
				-	src_csr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_csr_interface_t *src_csr = src_interface;
			
 
				+	starpu_csr_interface_t *dst_csr = dst_interface;
			
 
				 
			
 
				 	uint32_t nnz = src_csr->nnz;
			
 
				 	uint32_t nrow = src_csr->nrow;
			
@@ -480,13 +471,10 @@ static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_csr_interface_t *src_csr;
			
 
				-	starpu_csr_interface_t *dst_csr;
			
 
				-
			
 
				-	src_csr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_csr_interface_t *src_csr = src_interface;
			
 
				+	starpu_csr_interface_t *dst_csr = dst_interface;
			
 
				 
			
 
				 	uint32_t nnz = src_csr->nnz;
			
 
				 	uint32_t nrow = src_csr->nrow;
			
@@ -513,14 +501,10 @@ static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint
 
				 #endif // STARPU_USE_OPENCL
			
 
				 
			
 
				 /* as not all platform easily have a BLAS lib installed ... */
			
 
				-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-
			
 
				-	starpu_csr_interface_t *src_csr;
			
 
				-	starpu_csr_interface_t *dst_csr;
			
 
				-
			
 
				-	src_csr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_csr_interface_t *src_csr = src_interface;
			
 
				+	starpu_csr_interface_t *dst_csr = dst_interface;
			
 
				 
			
 
				 	uint32_t nnz = src_csr->nnz;
			
 
				 	uint32_t nrow = src_csr->nrow;
			
--- a/src/datawizard/interfaces/matrix_interface.c
+++ b/src/datawizard/interfaces/matrix_interface.c
@@ -31,18 +31,18 @@
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				 #endif
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
			
 
				+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
			
 
				-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
			
 
				+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
			
 
				 #endif
			
 
				 
			
 
				 static const struct starpu_data_copy_methods matrix_copy_data_methods_s = {
			
@@ -356,13 +356,10 @@ static void free_matrix_buffer_on_node(void *interface, uint32_t node)
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_matrix_interface_t *src_matrix;
			
 
				-	starpu_matrix_interface_t *dst_matrix;
			
 
				-
			
 
				-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_matrix_interface_t *src_matrix = src_interface;
			
 
				+	starpu_matrix_interface_t *dst_matrix = dst_interface;
			
 
				 
			
 
				 	size_t elemsize = src_matrix->elemsize;
			
 
				 
			
@@ -378,13 +375,10 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_matrix_interface_t *src_matrix;
			
 
				-	starpu_matrix_interface_t *dst_matrix;
			
 
				-
			
 
				-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_matrix_interface_t *src_matrix = src_interface;
			
 
				+	starpu_matrix_interface_t *dst_matrix = dst_interface;
			
 
				 
			
 
				 	size_t elemsize = src_matrix->elemsize;
			
 
				 
			
@@ -404,13 +398,10 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				-	starpu_matrix_interface_t *src_matrix;
			
 
				-	starpu_matrix_interface_t *dst_matrix;
			
 
				-
			
 
				-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_matrix_interface_t *src_matrix = src_interface;
			
 
				+	starpu_matrix_interface_t *dst_matrix = dst_interface;
			
 
				 
			
 
				 	size_t elemsize = src_matrix->elemsize;
			
 
				 
			
@@ -442,13 +433,10 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				-	starpu_matrix_interface_t *src_matrix;
			
 
				-	starpu_matrix_interface_t *dst_matrix;
			
 
				-
			
 
				-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_matrix_interface_t *src_matrix = src_interface;
			
 
				+	starpu_matrix_interface_t *dst_matrix = dst_interface;
			
 
				 
			
 
				 	size_t elemsize = src_matrix->elemsize;
			
 
				 
			
@@ -478,12 +466,12 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 
				 #endif // STARPU_USE_CUDA
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
			
 
				-	starpu_matrix_interface_t *src_matrix;
			
 
				-	starpu_matrix_interface_t *dst_matrix;
			
 
				+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
			
 
				+{
			
 
				+	starpu_matrix_interface_t *src_matrix = src_interface;
			
 
				+	starpu_matrix_interface_t *dst_matrix = dst_interface;
			
 
				 
			
 
				-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	cl_event *event = _event;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_to_opencl((void*)src_matrix->ptr, (cl_mem)dst_matrix->dev_handle, src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
			
 
				                                                 dst_matrix->offset, event);
			
@@ -496,12 +484,12 @@ static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
			
 
				-	starpu_matrix_interface_t *src_matrix;
			
 
				-	starpu_matrix_interface_t *dst_matrix;
			
 
				+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
			
 
				+{
			
 
				+	starpu_matrix_interface_t *src_matrix = src_interface;
			
 
				+	starpu_matrix_interface_t *dst_matrix = dst_interface;
			
 
				 
			
 
				-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	cl_event *event = _event;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_matrix->dev_handle, (void*)dst_matrix->ptr,
			
 
				                                                   src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
			
@@ -515,12 +503,10 @@ static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
			
 
				-	starpu_matrix_interface_t *src_matrix;
			
 
				-	starpu_matrix_interface_t *dst_matrix;
			
 
				-
			
 
				-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	starpu_matrix_interface_t *src_matrix = src_interface;
			
 
				+	starpu_matrix_interface_t *dst_matrix = dst_interface;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_to_opencl((void*)src_matrix->ptr, (cl_mem)dst_matrix->dev_handle, src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
			
 
				                                                 dst_matrix->offset, NULL);
			
@@ -533,12 +519,10 @@ static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
			
 
				-	starpu_matrix_interface_t *src_matrix;
			
 
				-	starpu_matrix_interface_t *dst_matrix;
			
 
				-
			
 
				-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	starpu_matrix_interface_t *src_matrix = src_interface;
			
 
				+	starpu_matrix_interface_t *dst_matrix = dst_interface;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_matrix->dev_handle, (void*)dst_matrix->ptr,
			
 
				                                                   src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
			
@@ -555,13 +539,10 @@ static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint
 
				 #endif
			
 
				 
			
 
				 /* as not all platform easily have a  lib installed ... */
			
 
				-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_matrix_interface_t *src_matrix;
			
 
				-	starpu_matrix_interface_t *dst_matrix;
			
 
				-
			
 
				-	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_matrix_interface_t *src_matrix = src_interface;
			
 
				+	starpu_matrix_interface_t *dst_matrix = dst_interface;
			
 
				 
			
 
				 	unsigned y;
			
 
				 	uint32_t nx = dst_matrix->nx;
			
--- a/src/datawizard/interfaces/variable_interface.c
+++ b/src/datawizard/interfaces/variable_interface.c
@@ -31,18 +31,18 @@
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				 #endif
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
			
 
				+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
			
 
				-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
			
 
				+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
			
 
				 #endif
			
 
				 
			
 
				 static const struct starpu_data_copy_methods variable_copy_data_methods_s = {
			
@@ -270,13 +270,10 @@ static void free_variable_buffer_on_node(void *interface, uint32_t node)
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_variable_interface_t *src_variable;
			
 
				-	starpu_variable_interface_t *dst_variable;
			
 
				-
			
 
				-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_variable_interface_t *src_variable = src_interface;
			
 
				+	starpu_variable_interface_t *dst_variable = dst_interface;
			
 
				 
			
 
				 	cudaError_t cures;
			
 
				 	cures = cudaMemcpy((char *)dst_variable->ptr, (char *)src_variable->ptr, src_variable->elemsize, cudaMemcpyDeviceToHost);
			
@@ -290,13 +287,10 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_variable_interface_t *src_variable;
			
 
				-	starpu_variable_interface_t *dst_variable;
			
 
				-
			
 
				-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_variable_interface_t *src_variable = src_interface;
			
 
				+	starpu_variable_interface_t *dst_variable = dst_interface;
			
 
				 
			
 
				 	cudaError_t cures;
			
 
				 	cures = cudaMemcpy((char *)dst_variable->ptr, (char *)src_variable->ptr, src_variable->elemsize, cudaMemcpyHostToDevice);
			
@@ -310,13 +304,10 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				-	starpu_variable_interface_t *src_variable;
			
 
				-	starpu_variable_interface_t *dst_variable;
			
 
				-
			
 
				-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_variable_interface_t *src_variable = src_interface;
			
 
				+	starpu_variable_interface_t *dst_variable = dst_interface;
			
 
				 
			
 
				 	cudaError_t cures;
			
 
				 	cures = cudaMemcpyAsync((char *)dst_variable->ptr, (char *)src_variable->ptr, src_variable->elemsize, cudaMemcpyDeviceToHost, *stream);
			
@@ -337,13 +328,10 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				-	starpu_variable_interface_t *src_variable;
			
 
				-	starpu_variable_interface_t *dst_variable;
			
 
				-
			
 
				-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_variable_interface_t *src_variable = src_interface;
			
 
				+	starpu_variable_interface_t *dst_variable = dst_interface;
			
 
				 
			
 
				 	cudaError_t cures;
			
 
				 	
			
@@ -369,12 +357,12 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 
				 #endif // STARPU_USE_CUDA
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
			
 
				-	starpu_variable_interface_t *src_variable;
			
 
				-	starpu_variable_interface_t *dst_variable;
			
 
				+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
			
 
				+{
			
 
				+	starpu_variable_interface_t *src_variable = src_interface;
			
 
				+	starpu_variable_interface_t *dst_variable = dst_interface;
			
 
				 
			
 
				-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	cl_event *event = _event;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_to_opencl((void*)src_variable->ptr, (cl_mem)dst_variable->ptr, src_variable->elemsize,
			
 
				                                                 0, event);
			
@@ -387,12 +375,12 @@ static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
			
 
				-	starpu_variable_interface_t *src_variable;
			
 
				-	starpu_variable_interface_t *dst_variable;
			
 
				+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
			
 
				+{
			
 
				+	starpu_variable_interface_t *src_variable = src_interface;
			
 
				+	starpu_variable_interface_t *dst_variable = dst_interface;
			
 
				 
			
 
				-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	cl_event *event = _event;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_variable->ptr, (void*)dst_variable->ptr, src_variable->elemsize,
			
 
				                                                   0, event);
			
@@ -405,12 +393,10 @@ static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
			
 
				-	starpu_variable_interface_t *src_variable;
			
 
				-	starpu_variable_interface_t *dst_variable;
			
 
				-
			
 
				-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	starpu_variable_interface_t *src_variable = src_interface;
			
 
				+	starpu_variable_interface_t *dst_variable = dst_interface;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_to_opencl((void*)src_variable->ptr, (cl_mem)dst_variable->ptr, src_variable->elemsize,
			
 
				                                                 0, NULL);
			
@@ -423,12 +409,10 @@ static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
			
 
				-	starpu_variable_interface_t *src_variable;
			
 
				-	starpu_variable_interface_t *dst_variable;
			
 
				-
			
 
				-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	starpu_variable_interface_t *src_variable = src_interface;
			
 
				+	starpu_variable_interface_t *dst_variable = dst_interface;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_variable->ptr, (void*)dst_variable->ptr, src_variable->elemsize,
			
 
				                                                   0, NULL);
			
@@ -443,13 +427,10 @@ static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint
 
				 
			
 
				 #endif
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_variable_interface_t *src_variable;
			
 
				-	starpu_variable_interface_t *dst_variable;
			
 
				-
			
 
				-	src_variable = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_variable_interface_t *src_variable = src_interface;
			
 
				+	starpu_variable_interface_t *dst_variable = dst_interface;
			
 
				 
			
 
				 	size_t elemsize = dst_variable->elemsize;
			
 
				 
			
--- a/src/datawizard/interfaces/vector_interface.c
+++ b/src/datawizard/interfaces/vector_interface.c
@@ -30,18 +30,18 @@
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				 #endif
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
			
 
				+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream);
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
			
 
				-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event);
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
			
 
				+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
			
 
				 #endif
			
 
				 
			
 
				 static const struct starpu_data_copy_methods vector_copy_data_methods_s = {
			
@@ -305,13 +305,10 @@ static void free_vector_buffer_on_node(void *interface, uint32_t node)
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_vector_interface_t *src_vector;
			
 
				-	starpu_vector_interface_t *dst_vector;
			
 
				-
			
 
				-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_vector_interface_t *src_vector = src_interface;
			
 
				+	starpu_vector_interface_t *dst_vector = dst_interface;
			
 
				 
			
 
				 	cudaError_t cures;
			
 
				 	cures = cudaMemcpy((char *)dst_vector->ptr, (char *)src_vector->ptr, src_vector->nx*src_vector->elemsize, cudaMemcpyDeviceToHost);
			
@@ -325,13 +322,10 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_vector_interface_t *src_vector;
			
 
				-	starpu_vector_interface_t *dst_vector;
			
 
				-
			
 
				-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_vector_interface_t *src_vector = src_interface;
			
 
				+	starpu_vector_interface_t *dst_vector = dst_interface;
			
 
				 
			
 
				 	cudaError_t cures;
			
 
				 	cures = cudaMemcpy((char *)dst_vector->ptr, (char *)src_vector->ptr, src_vector->nx*src_vector->elemsize, cudaMemcpyHostToDevice);
			
@@ -345,13 +339,10 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				-	starpu_vector_interface_t *src_vector;
			
 
				-	starpu_vector_interface_t *dst_vector;
			
 
				-
			
 
				-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_vector_interface_t *src_vector = src_interface;
			
 
				+	starpu_vector_interface_t *dst_vector = dst_interface;
			
 
				 
			
 
				 	cudaError_t cures;
			
 
				 	cures = cudaMemcpyAsync((char *)dst_vector->ptr, (char *)src_vector->ptr, src_vector->nx*src_vector->elemsize, cudaMemcpyDeviceToHost, *stream);
			
@@ -372,13 +363,10 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				-	starpu_vector_interface_t *src_vector;
			
 
				-	starpu_vector_interface_t *dst_vector;
			
 
				-
			
 
				-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_vector_interface_t *src_vector = src_interface;
			
 
				+	starpu_vector_interface_t *dst_vector = dst_interface;
			
 
				 
			
 
				 	cudaError_t cures;
			
 
				 	
			
@@ -403,12 +391,12 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 
				 
			
 
				 #endif // STARPU_USE_CUDA
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
			
 
				-	starpu_vector_interface_t *src_vector;
			
 
				-	starpu_vector_interface_t *dst_vector;
			
 
				+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
			
 
				+{
			
 
				+	starpu_vector_interface_t *src_vector = src_interface;
			
 
				+	starpu_vector_interface_t *dst_vector = dst_interface;
			
 
				 
			
 
				-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	cl_event *event = _event;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_to_opencl((void*)src_vector->ptr, (cl_mem)dst_vector->dev_handle, src_vector->nx*src_vector->elemsize,
			
 
				                                                 dst_vector->offset, event);
			
@@ -421,12 +409,12 @@ static int copy_ram_to_opencl_async(starpu_data_handle handle, uint32_t src_node
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cl_event *event) {
			
 
				-	starpu_vector_interface_t *src_vector;
			
 
				-	starpu_vector_interface_t *dst_vector;
			
 
				+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event)
			
 
				+{
			
 
				+	starpu_vector_interface_t *src_vector = src_interface;
			
 
				+	starpu_vector_interface_t *dst_vector = dst_interface;
			
 
				 
			
 
				-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	cl_event *event = _event;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_vector->dev_handle, (void*)dst_vector->ptr, src_vector->nx*src_vector->elemsize,
			
 
				                                                   src_vector->offset, event);
			
@@ -439,12 +427,10 @@ static int copy_opencl_to_ram_async(starpu_data_handle handle, uint32_t src_node
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
			
 
				-	starpu_vector_interface_t *src_vector;
			
 
				-	starpu_vector_interface_t *dst_vector;
			
 
				-
			
 
				-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	starpu_vector_interface_t *src_vector = src_interface;
			
 
				+	starpu_vector_interface_t *dst_vector = dst_interface;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_to_opencl((void*)src_vector->ptr, (cl_mem)dst_vector->dev_handle, src_vector->nx*src_vector->elemsize,
			
 
				                                                 dst_vector->offset, NULL);
			
@@ -457,12 +443,10 @@ static int copy_ram_to_opencl(starpu_data_handle handle, uint32_t src_node, uint
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node) {
			
 
				-	starpu_vector_interface_t *src_vector;
			
 
				-	starpu_vector_interface_t *dst_vector;
			
 
				-
			
 
				-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	starpu_vector_interface_t *src_vector = src_interface;
			
 
				+	starpu_vector_interface_t *dst_vector = dst_interface;
			
 
				 
			
 
				 	int err = _starpu_opencl_copy_from_opencl((cl_mem)src_vector->dev_handle, (void*)dst_vector->ptr, src_vector->nx*src_vector->elemsize,
			
 
				                                                   src_vector->offset, NULL);
			
@@ -477,13 +461,10 @@ static int copy_opencl_to_ram(starpu_data_handle handle, uint32_t src_node, uint
 
				 
			
 
				 #endif
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				+static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				 {
			
 
				-	starpu_vector_interface_t *src_vector;
			
 
				-	starpu_vector_interface_t *dst_vector;
			
 
				-
			
 
				-	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				+	starpu_vector_interface_t *src_vector = src_interface;
			
 
				+	starpu_vector_interface_t *dst_vector = dst_interface;
			
 
				 
			
 
				 	uint32_t nx = dst_vector->nx;
			
 
				 	size_t elemsize = dst_vector->elemsize;