%!s(int64=12) %!d(string=hai) anos · de6deb09f5
--- a/doc/chapters/advanced-api.texi
+++ b/doc/chapters/advanced-api.texi
@@ -354,51 +354,56 @@ Unpack the data handle from the contiguous buffer at the address @code{ptr} of s
 
																 @end deftp
															
 
																 @deftp {Data Type} {struct starpu_data_copy_methods}
															
 
																-Defines the per-interface methods.
															
 
																+Defines the per-interface methods. If the @code{any_to_any} method is provided,
															
 
																+it will be used by default if no more specific method is provided. It can still
															
 
																+be useful to provide more specific method in case of e.g. available particular
															
 
																+CUDA or OpenCL support.
															
 
																+
															
 
																 @table @asis
															
 
																-@item @code{int @{ram,cuda,opencl@}_to_@{ram,cuda,opencl@}(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)}
															
 
																+@item @code{int (*@{ram,cuda,opencl@}_to_@{ram,cuda,opencl@})(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)}
															
 
																 These 12 functions define how to copy data from the @var{src_interface}
															
 
																 interface on the @var{src_node} node to the @var{dst_interface} interface
															
 
																 on the @var{dst_node} node. They return 0 on success.
															
 
																-@item @code{int (*ram_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)}
															
 
																-Define how to copy data from the @var{src_interface} interface on the
															
 
																-@var{src_node} node (in RAM) to the @var{dst_interface} interface on the
															
 
																-@var{dst_node} node (on a CUDA device), using the given @var{stream}. Return 0
															
 
																-on success.
															
 
																-
															
 
																-@item @code{int (*cuda_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)}
															
 
																-Define how to copy data from the @var{src_interface} interface on the
															
 
																-@var{src_node} node (on a CUDA device) to the @var{dst_interface} interface on the
															
 
																-@var{dst_node} node (in RAM), using the given @var{stream}. Return 0
															
 
																-on success.
															
 
																-
															
 
																-@item @code{int (*cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)}
															
 
																-Define how to copy data from the @var{src_interface} interface on the
															
 
																-@var{src_node} node (on a CUDA device) to the @var{dst_interface} interface on
															
 
																-the @var{dst_node} node (on another CUDA device), using the given @var{stream}.
															
 
																-Return 0 on success.
															
 
																-
															
 
																-@item @code{int (*ram_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event)}
															
 
																+@item @code{int (*@{ram,cuda@}_to_@{ram,cuda@}_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)}
															
 
																+These 3 functions (@code{ram_to_ram} is not among these) define how to copy
															
 
																+data from the @var{src_interface} interface on the @var{src_node} node to the
															
 
																+@var{dst_interface} interface on the @var{dst_node} node, using the given
															
 
																+@var{stream}. Must return 0 if the transfer was actually completed completely
															
 
																+synchronously, or -EAGAIN if at least some transfers are still ongoing and
															
 
																+should be awaited for by the core.
															
 
																+
															
 
																+@item @code{int (*@{ram,opencl@}_to_@{ram,opencl@}_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event)}
															
 
																+These 3 functions (@code{ram_to_ram} is not among them) define how to copy
															
 
																+data from the @var{src_interface} interface on the @var{src_node} node to the
															
 
																+@var{dst_interface} interface on the @var{dst_node} node, by recording in
															
 
																+@var{event}, a pointer to a cl_event, the event of the last submitted transfer.
															
 
																+Must return 0 if the transfer was actually completed completely synchronously,
															
 
																+or -EAGAIN if at least some transfers are still ongoing and should be awaited
															
 
																+for by the core.
															
 
																+
															
 
																+@item @code{int (*any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data)}
															
 
																 Define how to copy data from the @var{src_interface} interface on the
															
 
																-@var{src_node} node (in RAM) to the @var{dst_interface} interface on the
															
 
																-@var{dst_node} node (on an OpenCL device), using @var{event}, a pointer to a
															
 
																-cl_event. Return 0 on success.
															
 
																+@var{src_node} node to the @var{dst_interface} interface on the @var{dst_node}
															
 
																+node. This is meant to be implemented through the @var{starpu_interface_copy}
															
 
																+helper, to which @var{async_data} should be passed as such, and will be used to
															
 
																+manage asynchronicity. This must return -EAGAIN if any of the
															
 
																+@var{starpu_interface_copy} calls has returned -EAGAIN (i.e. at least some
															
 
																+transfer is still ongoing), and return 0 otherwise.
															
 
																-@item @code{int (*opencl_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event)}
															
 
																-Define how to copy data from the @var{src_interface} interface on the
															
 
																-@var{src_node} node (on an OpenCL device) to the @var{dst_interface} interface
															
 
																-on the @var{dst_node} node (in RAM), using the given @var{event}, a pointer to
															
 
																-a cl_event. Return 0 on success.
															
 
																-
															
 
																-@item @code{int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event)}
															
 
																-Define how to copy data from the @var{src_interface} interface on the
															
 
																-@var{src_node} node (on an OpenCL device) to the @var{dst_interface} interface
															
 
																-on the @var{dst_node} node (on another OpenCL device), using the given
															
 
																-@var{event}, a pointer to a cl_event. Return 0 on success.
															
 
																 @end table
															
 
																 @end deftp
															
 
																+@deftypefun int starpu_interface_copy(uintptr_t @var{src}, unsigned @var{src_node}, size_t @var{src_offset}, uintptr_t @var{dst}, unsigned @var{dst_node}, size_t @var{dst_offset}, size_t @var{size}, {void *}@var{async_data});
															
 
																+Copy @var{size} bytes from byte offset @var{src_offset} of @var{src} on
															
 
																+@var{src_node} to byte offset @var{dst_offset} of @var{dst} on @var{dst_node}.
															
 
																+This is to be used in the @var{any_to_any} copy method, which is provided with
															
 
																+the @var{async_data} to be pased to @var{starpu_interface_copy}. this returns
															
 
																+-EAGAIN if the transfer is still ongoing, or 0 if the transfer is already
															
 
																+completed.
															
 
																+@end deftypefun
															
 
																+
															
 
																+
															
 
																 @deftypefun uint32_t starpu_crc32_be_n ({void *}@var{input}, size_t @var{n}, uint32_t @var{inputcrc})
															
 
																 Compute the CRC of a byte buffer seeded by the inputcrc "current
															
 
																 state". The return value should be considered as the new "current
															
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
@@ -73,8 +73,12 @@ struct starpu_data_copy_methods
 
																 	int (*opencl_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event);
															
 
																 	int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event);
															
 
																 #endif
															
 
																+
															
 
																+	int (*any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
															
 
																 };
															
 
																+int starpu_interface_copy(uintptr_t src, unsigned src_node, size_t src_offset, uintptr_t dst, unsigned dst_node, size_t dst_offset, size_t size, void *async_data);
															
 
																+
															
 
																 enum starpu_data_interface_id
															
 
																 {
															
 
																 	STARPU_MATRIX_INTERFACE_ID=0,
															
--- a/src/datawizard/copy_driver.c
+++ b/src/datawizard/copy_driver.c
@@ -134,8 +134,10 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
																 	{
															
 
																 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CPU_RAM):
															
 
																 		/* STARPU_CPU_RAM -> STARPU_CPU_RAM */
															
 
																-		STARPU_ASSERT(copy_methods->ram_to_ram);
															
 
																-		copy_methods->ram_to_ram(src_interface, src_node, dst_interface, dst_node);
															
 
																+		if (copy_methods->ram_to_ram)
															
 
																+			copy_methods->ram_to_ram(src_interface, src_node, dst_interface, dst_node);
															
 
																+		else
															
 
																+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
															
 
																 		break;
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CPU_RAM):
															
@@ -143,10 +145,10 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
																 #if !defined(HAVE_CUDA_MEMCPY_PEER)
															
 
																 		STARPU_ASSERT(_starpu_memory_node_get_local_key() == src_node);
															
 
																 #endif
															
 
																-		STARPU_ASSERT(copy_methods->cuda_to_ram);
															
 
																-		if (!req || !copy_methods->cuda_to_ram_async)
															
 
																+		if (!req || !(copy_methods->cuda_to_ram_async || copy_methods->any_to_any))
															
 
																 		{
															
 
																 			/* this is not associated to a request so it's synchronous */
															
 
																+			STARPU_ASSERT(copy_methods->cuda_to_ram);
															
 
																 			copy_methods->cuda_to_ram(src_interface, src_node, dst_interface, dst_node);
															
 
																 		}
															
 
																 		else
															
@@ -156,7 +158,13 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
																 			if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
															
 
																 			stream = starpu_cuda_get_local_out_transfer_stream();
															
 
																-			ret = copy_methods->cuda_to_ram_async(src_interface, src_node, dst_interface, dst_node, stream);
															
 
																+			if (copy_methods->cuda_to_ram_async)
															
 
																+				ret = copy_methods->cuda_to_ram_async(src_interface, src_node, dst_interface, dst_node, stream);
															
 
																+			else
															
 
																+			{
															
 
																+				STARPU_ASSERT(copy_methods->any_to_any);
															
 
																+				ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
															
 
																+			}
															
 
																 			cures = cudaEventRecord(req->async_channel.event.cuda_event, stream);
															
 
																 			if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
															
@@ -168,10 +176,10 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
																 #if !defined(HAVE_CUDA_MEMCPY_PEER)
															
 
																 		STARPU_ASSERT(_starpu_memory_node_get_local_key() == dst_node);
															
 
																 #endif
															
 
																-		STARPU_ASSERT(copy_methods->ram_to_cuda);
															
 
																-		if (!req || !copy_methods->ram_to_cuda_async)
															
 
																+		if (!req || !(copy_methods->ram_to_cuda_async || copy_methods->any_to_any))
															
 
																 		{
															
 
																 			/* this is not associated to a request so it's synchronous */
															
 
																+			STARPU_ASSERT(copy_methods->ram_to_cuda);
															
 
																 			copy_methods->ram_to_cuda(src_interface, src_node, dst_interface, dst_node);
															
 
																 		}
															
 
																 		else
															
@@ -182,7 +190,13 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
																 				STARPU_CUDA_REPORT_ERROR(cures);
															
 
																 			stream = starpu_cuda_get_local_in_transfer_stream();
															
 
																-			ret = copy_methods->ram_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream);
															
 
																+			if (copy_methods->ram_to_cuda_async)
															
 
																+				ret = copy_methods->ram_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream);
															
 
																+			else
															
 
																+			{
															
 
																+				STARPU_ASSERT(copy_methods->any_to_any);
															
 
																+				ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
															
 
																+			}
															
 
																 			cures = cudaEventRecord(req->async_channel.event.cuda_event, stream);
															
 
																 			if (STARPU_UNLIKELY(cures != cudaSuccess))
															
@@ -191,8 +205,7 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
																 		break;
															
 
																 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CUDA_RAM):
															
 
																 		/* CUDA - CUDA transfer */
															
 
																-		STARPU_ASSERT(copy_methods->cuda_to_cuda || copy_methods->cuda_to_cuda_async);
															
 
																-		if (!req || !copy_methods->cuda_to_cuda_async)
															
 
																+		if (!req || !(copy_methods->cuda_to_cuda_async || copy_methods->any_to_any))
															
 
																 		{
															
 
																 			STARPU_ASSERT(copy_methods->cuda_to_cuda);
															
 
																 			/* this is not associated to a request so it's synchronous */
															
@@ -205,7 +218,13 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
																 			if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
															
 
																 			stream = starpu_cuda_get_local_peer_transfer_stream();
															
 
																-			ret = copy_methods->cuda_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream);
															
 
																+			if (copy_methods->cuda_to_cuda_async)
															
 
																+				ret = copy_methods->cuda_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream);
															
 
																+			else
															
 
																+			{
															
 
																+				STARPU_ASSERT(copy_methods->any_to_any);
															
 
																+				ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
															
 
																+			}
															
 
																 			cures = cudaEventRecord(req->async_channel.event.cuda_event, stream);
															
 
																 			if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
															
@@ -215,54 +234,65 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
																 #ifdef STARPU_USE_OPENCL
															
 
																 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_CPU_RAM):
															
 
																 		/* OpenCL -> RAM */
															
 
																-		if (_starpu_memory_node_get_local_key() == src_node)
															
 
																+		STARPU_ASSERT(_starpu_memory_node_get_local_key() == src_node);
															
 
																+		if (!req || !(copy_methods->opencl_to_ram_async || copy_methods->any_to_any))
															
 
																 		{
															
 
																 			STARPU_ASSERT(copy_methods->opencl_to_ram);
															
 
																-			if (!req || !copy_methods->opencl_to_ram_async)
															
 
																-			{
															
 
																-				/* this is not associated to a request so it's synchronous */
															
 
																-				copy_methods->opencl_to_ram(src_interface, src_node, dst_interface, dst_node);
															
 
																-			}
															
 
																-			else
															
 
																-			{
															
 
																-				req->async_channel.type = STARPU_OPENCL_RAM;
															
 
																-				ret = copy_methods->opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.event.opencl_event));
															
 
																-			}
															
 
																+			/* this is not associated to a request so it's synchronous */
															
 
																+			copy_methods->opencl_to_ram(src_interface, src_node, dst_interface, dst_node);
															
 
																 		}
															
 
																 		else
															
 
																 		{
															
 
																-			/* we should not have a blocking call ! */
															
 
																-			STARPU_ABORT();
															
 
																+			req->async_channel.type = STARPU_OPENCL_RAM;
															
 
																+			if (copy_methods->opencl_to_ram_async)
															
 
																+				ret = copy_methods->opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.event.opencl_event));
															
 
																+			else
															
 
																+			{
															
 
																+				STARPU_ASSERT(copy_methods->any_to_any);
															
 
																+				ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
															
 
																+			}
															
 
																 		}
															
 
																 		break;
															
 
																 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_OPENCL_RAM):
															
 
																 		/* STARPU_CPU_RAM -> STARPU_OPENCL_RAM */
															
 
																 		STARPU_ASSERT(_starpu_memory_node_get_local_key() == dst_node);
															
 
																-		STARPU_ASSERT(copy_methods->ram_to_opencl);
															
 
																-		if (!req || !copy_methods->ram_to_opencl_async)
															
 
																+		if (!req || !(copy_methods->ram_to_opencl_async || copy_methods->any_to_any))
															
 
																 		{
															
 
																+			STARPU_ASSERT(copy_methods->ram_to_opencl);
															
 
																 			/* this is not associated to a request so it's synchronous */
															
 
																 			copy_methods->ram_to_opencl(src_interface, src_node, dst_interface, dst_node);
															
 
																 		}
															
 
																 		else
															
 
																 		{
															
 
																 			req->async_channel.type = STARPU_OPENCL_RAM;
															
 
																-			ret = copy_methods->ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.event.opencl_event));
															
 
																+			if (copy_methods->ram_to_opencl_async)
															
 
																+				ret = copy_methods->ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.event.opencl_event));
															
 
																+			else
															
 
																+			{
															
 
																+				STARPU_ASSERT(copy_methods->any_to_any);
															
 
																+				ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
															
 
																+			}
															
 
																 		}
															
 
																 		break;
															
 
																 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_OPENCL_RAM):
															
 
																 		/* STARPU_OPENCL_RAM -> STARPU_OPENCL_RAM */
															
 
																 		STARPU_ASSERT(_starpu_memory_node_get_local_key() == dst_node || _starpu_memory_node_get_local_key() == src_node);
															
 
																-		STARPU_ASSERT(copy_methods->opencl_to_opencl);
															
 
																-		if (!req || !copy_methods->opencl_to_opencl_async)
															
 
																+		if (!req || !(copy_methods->opencl_to_opencl_async || copy_methods->any_to_any))
															
 
																 		{
															
 
																+			STARPU_ASSERT(copy_methods->opencl_to_opencl);
															
 
																 			/* this is not associated to a request so it's synchronous */
															
 
																 			copy_methods->opencl_to_opencl(src_interface, src_node, dst_interface, dst_node);
															
 
																 		}
															
 
																 		else
															
 
																 		{
															
 
																 			req->async_channel.type = STARPU_OPENCL_RAM;
															
 
																-			ret = copy_methods->opencl_to_opencl_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.event.opencl_event));
															
 
																+			if (copy_methods->opencl_to_opencl_async)
															
 
																+				ret = copy_methods->opencl_to_opencl_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.event.opencl_event));
															
 
																+			else
															
 
																+			{
															
 
																+				STARPU_ASSERT(copy_methods->any_to_any);
															
 
																+				ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
															
 
																+			}
															
 
																 		}
															
 
																 		break;
															
 
																 #endif
															
@@ -331,6 +361,64 @@ int __attribute__((warn_unused_result)) _starpu_driver_copy_data_1_to_1(starpu_d
 
																 	return 0;
															
 
																 }
															
 
																+/* This can be used by interfaces to easily transfer a piece of data without
															
 
																+ * caring about the particular CUDA/OpenCL methods.  */
															
 
																+
															
 
																+int starpu_interface_copy(uintptr_t src, unsigned src_node, size_t src_offset, uintptr_t dst, unsigned dst_node, size_t dst_offset, size_t size, void *async_data)
															
 
																+{
															
 
																+	struct _starpu_async_channel *async_channel = async_data;
															
 
																+	enum starpu_node_kind src_kind = starpu_node_get_kind(src_node);
															
 
																+	enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node);
															
 
																+
															
 
																+	switch (_STARPU_MEMORY_NODE_TUPLE(src_kind,dst_kind))
															
 
																+	{
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CPU_RAM):
															
 
																+		memcpy((void *) dst + dst_offset, (void *) src + src_offset, size);
															
 
																+		return 0;
															
 
																+
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CPU_RAM):
															
 
																+		return starpu_cuda_copy_async_sync(
															
 
																+				(void*) src + src_offset, src_node,
															
 
																+				(void*) dst + dst_offset, dst_node,
															
 
																+				size,
															
 
																+				async_channel?starpu_cuda_get_local_out_transfer_stream():NULL,
															
 
																+				cudaMemcpyDeviceToHost);
															
 
																+
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CUDA_RAM):
															
 
																+		return starpu_cuda_copy_async_sync(
															
 
																+				(void*) src + src_offset, src_node,
															
 
																+				(void*) dst + dst_offset, dst_node,
															
 
																+				size,
															
 
																+				async_channel?starpu_cuda_get_local_in_transfer_stream():NULL,
															
 
																+				cudaMemcpyHostToDevice);
															
 
																+
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CUDA_RAM):
															
 
																+		return starpu_cuda_copy_async_sync(
															
 
																+				(void*) src + src_offset, src_node,
															
 
																+				(void*) dst + dst_offset, dst_node,
															
 
																+				size,
															
 
																+				async_channel?starpu_cuda_get_local_peer_transfer_stream():NULL,
															
 
																+				cudaMemcpyDeviceToDevice);
															
 
																+
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_CPU_RAM):
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_OPENCL_RAM):
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_OPENCL_RAM):
															
 
																+		return starpu_opencl_copy_async_sync(
															
 
																+				src, src_node, src_offset,
															
 
																+				dst, dst_node, dst_offset,
															
 
																+				size,
															
 
																+				&async_channel->event.opencl_event);
															
 
																+#endif
															
 
																+	default:
															
 
																+		STARPU_ABORT();
															
 
																+		return -1;
															
 
																+	}
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																 void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_channel)
															
 
																 {
															
 
																 #ifdef STARPU_SIMGRID
															
--- a/src/datawizard/interfaces/bcsr_interface.c
+++ b/src/datawizard/interfaces/bcsr_interface.c
@@ -31,31 +31,11 @@
 
																  * BCSR : blocked CSR, we use blocks of size (r x c)
															
 
																  */
															
 
																-static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-#endif
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-#endif
															
 
																+static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
															
 
																 static struct starpu_data_copy_methods bcsr_copy_data_methods_s =
															
 
																 {
															
 
																-	.ram_to_ram = copy_ram_to_ram,
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-	.ram_to_cuda = copy_ram_to_cuda,
															
 
																-	.cuda_to_ram = copy_cuda_to_ram,
															
 
																-	.cuda_to_cuda = copy_cuda_to_cuda,
															
 
																-#endif
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-	.ram_to_opencl = copy_ram_to_opencl,
															
 
																-	.opencl_to_ram = copy_opencl_to_ram,
															
 
																-	.opencl_to_opencl = copy_opencl_to_opencl,
															
 
																-#endif
															
 
																+	.any_to_any = copy_any_to_any,
															
 
																 };
															
 
																 static void register_bcsr_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
															
@@ -315,105 +295,7 @@ static void free_bcsr_buffer_on_node(void *data_interface, unsigned node)
 
																 	starpu_free_buffer_on_node(node, (uintptr_t) bcsr_interface->rowptr, (nrow+1)*sizeof(uint32_t));
															
 
																 }
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, enum cudaMemcpyKind kind)
															
 
																-{
															
 
																-	struct starpu_bcsr_interface *src_bcsr = src_interface;
															
 
																-	struct starpu_bcsr_interface *dst_bcsr = dst_interface;
															
 
																-
															
 
																-	uint32_t nnz = src_bcsr->nnz;
															
 
																-	uint32_t nrow = src_bcsr->nrow;
															
 
																-	size_t elemsize = src_bcsr->elemsize;
															
 
																-
															
 
																-	uint32_t r = src_bcsr->r;
															
 
																-	uint32_t c = src_bcsr->c;
															
 
																-
															
 
																-	cudaError_t cures;
															
 
																-
															
 
																-	cures = cudaMemcpy((char *)dst_bcsr->nzval, (char *)src_bcsr->nzval, nnz*r*c*elemsize, kind);
															
 
																-	if (STARPU_UNLIKELY(cures))
															
 
																-		STARPU_CUDA_REPORT_ERROR(cures);
															
 
																-
															
 
																-	cures = cudaMemcpy((char *)dst_bcsr->colind, (char *)src_bcsr->colind, nnz*sizeof(uint32_t), kind);
															
 
																-	if (STARPU_UNLIKELY(cures))
															
 
																-		STARPU_CUDA_REPORT_ERROR(cures);
															
 
																-
															
 
																-	cures = cudaMemcpy((char *)dst_bcsr->rowptr, (char *)src_bcsr->rowptr, (nrow+1)*sizeof(uint32_t), kind);
															
 
																-	if (STARPU_UNLIKELY(cures))
															
 
																-		STARPU_CUDA_REPORT_ERROR(cures);
															
 
																-
															
 
																-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*r*c*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
															
 
																-
															
 
																-	return 0;
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																-{
															
 
																-	return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
															
 
																-}
															
 
																-
															
 
																-static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																-{
															
 
																-	return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																-{
															
 
																-	return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
															
 
																-}
															
 
																-#endif // STARPU_USE_CUDA
															
 
																-
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-static int copy_opencl_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																-{
															
 
																-	struct starpu_bcsr_interface *src_bcsr = src_interface;
															
 
																-	struct starpu_bcsr_interface *dst_bcsr = dst_interface;
															
 
																-
															
 
																-	uint32_t nnz = src_bcsr->nnz;
															
 
																-	uint32_t nrow = src_bcsr->nrow;
															
 
																-	size_t elemsize = src_bcsr->elemsize;
															
 
																-
															
 
																-	uint32_t r = src_bcsr->r;
															
 
																-	uint32_t c = src_bcsr->c;
															
 
																-
															
 
																-        int err;
															
 
																-
															
 
																-	err = starpu_opencl_copy_async_sync(src_bcsr->nzval, src_node, 0, dst_bcsr->nzval, dst_node, 0, nnz*r*c*elemsize, NULL);
															
 
																-	if (STARPU_UNLIKELY(err))
															
 
																-		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																-
															
 
																-	err = starpu_opencl_copy_async_sync((uintptr_t)src_bcsr->colind, src_node, 0, (uintptr_t)dst_bcsr->colind, dst_node, 0, nnz*sizeof(uint32_t), NULL);
															
 
																-	if (STARPU_UNLIKELY(err))
															
 
																-		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																-
															
 
																-	err = starpu_opencl_copy_async_sync((uintptr_t)src_bcsr->rowptr, src_node, 0, (uintptr_t)dst_bcsr->rowptr, dst_node, 0, (nrow+1)*sizeof(uint32_t), NULL);
															
 
																-	if (STARPU_UNLIKELY(err))
															
 
																-		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																-
															
 
																-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*r*c*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
															
 
																-
															
 
																-	return 0;
															
 
																-}
															
 
																-
															
 
																-static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
															
 
																-}
															
 
																-
															
 
																-static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
															
 
																-}
															
 
																-
															
 
																-static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
															
 
																-}
															
 
																-
															
 
																-#endif // STARPU_USE_OPENCL
															
 
																-
															
 
																-/* as not all platform easily have a BLAS lib installed ... */
															
 
																-static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																+static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data)
															
 
																 {
															
 
																 	struct starpu_bcsr_interface *src_bcsr = (struct starpu_bcsr_interface *) src_interface;
															
 
																 	struct starpu_bcsr_interface *dst_bcsr = (struct starpu_bcsr_interface *) dst_interface;
															
@@ -425,13 +307,18 @@ static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBU
 
																 	uint32_t r = src_bcsr->r;
															
 
																 	uint32_t c = src_bcsr->c;
															
 
																-	memcpy((void *)dst_bcsr->nzval, (void *)src_bcsr->nzval, nnz*elemsize*r*c);
															
 
																+	int ret = 0;
															
 
																+
															
 
																+	if (starpu_interface_copy(src_bcsr->nzval, src_node, 0, dst_bcsr->nzval, dst_node, 0, nnz*elemsize*r*c, async_data))
															
 
																+		ret = -EAGAIN;
															
 
																-	memcpy((void *)dst_bcsr->colind, (void *)src_bcsr->colind, nnz*sizeof(uint32_t));
															
 
																+	if (starpu_interface_copy((uintptr_t)src_bcsr->colind, src_node, 0, (uintptr_t)dst_bcsr->colind, dst_node, 0, nnz*sizeof(uint32_t), async_data))
															
 
																+		ret = -EAGAIN;
															
 
																-	memcpy((void *)dst_bcsr->rowptr, (void *)src_bcsr->rowptr, (nrow+1)*sizeof(uint32_t));
															
 
																+	if (starpu_interface_copy((uintptr_t)src_bcsr->rowptr, src_node, 0, (uintptr_t)dst_bcsr->rowptr, dst_node, 0, (nrow+1)*sizeof(uint32_t), async_data))
															
 
																+		ret = -EAGAIN;
															
 
																 	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize*r*c + (nnz+nrow+1)*sizeof(uint32_t));
															
 
																-	return 0;
															
 
																+	return ret;
															
 
																 }
															
--- a/src/datawizard/interfaces/block_interface.c
+++ b/src/datawizard/interfaces/block_interface.c
@@ -420,7 +420,7 @@ static int copy_cuda_async_common(void *src_interface, unsigned src_node STARPU_
 
																 	}
															
 
																 	else
															
 
																 	{
															
 
																-		/* Default case: we transfer all lines one by one: ny*nz transfers */
															
 
																+		/* Default case: we transfer all blocks one by one: nz 2D transfers */
															
 
																 		unsigned layer;
															
 
																 		for (layer = 0; layer < src_block->nz; layer++)
															
 
																 		{
															
--- a/src/datawizard/interfaces/coo_interface.c
+++ b/src/datawizard/interfaces/coo_interface.c
@@ -19,190 +19,36 @@
 
																 #include <datawizard/memalloc.h>
															
 
																 static int
															
 
																-copy_ram_to_ram(void *src_interface, STARPU_ATTRIBUTE_UNUSED unsigned src_node,
															
 
																-		void *dst_interface, STARPU_ATTRIBUTE_UNUSED unsigned dst_node)
															
 
																+copy_any_to_any(void *src_interface, unsigned src_node,
															
 
																+		void *dst_interface, unsigned dst_node, void *async_data)
															
 
																 {
															
 
																 	size_t size = 0;
															
 
																 	struct starpu_coo_interface *src_coo, *dst_coo;
															
 
																-
															
 
																-	src_coo = (struct starpu_coo_interface *) src_interface;
															
 
																-	dst_coo = (struct starpu_coo_interface *) dst_interface;
															
 
																-
															
 
																-	size = src_coo->n_values * sizeof(src_coo->columns[0]);
															
 
																-	memcpy((void *) dst_coo->columns, (void *) src_coo->columns, size);
															
 
																-
															
 
																-	/* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */
															
 
																-	memcpy((void *) dst_coo->rows, (void *) src_coo->rows, size);
															
 
																-
															
 
																-	size = src_coo->n_values * src_coo->elemsize;
															
 
																-	memcpy((void *) dst_coo->values, (void *) src_coo->values, size);
															
 
																-
															
 
																-	_STARPU_TRACE_DATA_COPY(src_node, dst_node,
															
 
																-		src_coo->n_values *
															
 
																-		(2 * sizeof(src_coo->rows[0]) + src_coo->elemsize));
															
 
																-
															
 
																-	return 0;
															
 
																-}
															
 
																-
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-static int
															
 
																-copy_cuda_async_sync(void *src_interface, unsigned src_node,
															
 
																-		     void *dst_interface, unsigned dst_node,
															
 
																-		     cudaStream_t stream, enum cudaMemcpyKind kind)
															
 
																-{
															
 
																-	int ret;
															
 
																-	size_t size = 0;
															
 
																-	struct starpu_coo_interface *src_coo, *dst_coo;
															
 
																-
															
 
																-	src_coo = (struct starpu_coo_interface *) src_interface;
															
 
																-	dst_coo = (struct starpu_coo_interface *) dst_interface;
															
 
																-
															
 
																-	size = src_coo->n_values * sizeof(src_coo->columns[0]);
															
 
																-	ret = starpu_cuda_copy_async_sync(
															
 
																-		(void *) src_coo->columns,
															
 
																-		src_node,
															
 
																-		(void *) dst_coo->columns,
															
 
																-		dst_node,
															
 
																-		size,
															
 
																-		stream,
															
 
																-		kind);
															
 
																-	if (ret == 0)
															
 
																-		stream = NULL;
															
 
																-
															
 
																-	/* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */
															
 
																-	ret = starpu_cuda_copy_async_sync(
															
 
																-		(void *) src_coo->rows,
															
 
																-		src_node,
															
 
																-		(void *) dst_coo->rows,
															
 
																-		dst_node,
															
 
																-		size,
															
 
																-		stream,
															
 
																-		kind);
															
 
																-	if (ret == 0)
															
 
																-		stream = NULL;
															
 
																-
															
 
																-	size = src_coo->n_values * src_coo->elemsize;
															
 
																-	ret = starpu_cuda_copy_async_sync(
															
 
																-		(void *) src_coo->values,
															
 
																-		src_node,
															
 
																-		(void *) dst_coo->values,
															
 
																-		dst_node,
															
 
																-		size,
															
 
																-		stream,
															
 
																-		kind);
															
 
																-
															
 
																-	_STARPU_TRACE_DATA_COPY(src_node, dst_node,
															
 
																-		src_coo->n_values *
															
 
																-		(2 * sizeof(src_coo->rows[0]) + src_coo->elemsize));
															
 
																-	return ret;
															
 
																-}
															
 
																-
															
 
																-static int
															
 
																-copy_ram_to_cuda(void *src_interface, unsigned src_node,
															
 
																-		 void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node,
															
 
																-				    dst_interface, dst_node,
															
 
																-				    NULL, cudaMemcpyHostToDevice);
															
 
																-}
															
 
																-
															
 
																-static int
															
 
																-copy_cuda_to_ram(void *src_interface, unsigned src_node,
															
 
																-		 void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node,
															
 
																-				    dst_interface, dst_node,
															
 
																-				    NULL, cudaMemcpyDeviceToHost);
															
 
																-}
															
 
																-
															
 
																-static int
															
 
																-copy_ram_to_cuda_async(void *src_interface, unsigned src_node,
															
 
																-		       void *dst_interface, unsigned dst_node,
															
 
																-		       cudaStream_t stream)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node,
															
 
																-				    dst_interface, dst_node,
															
 
																-				    stream, cudaMemcpyHostToDevice);
															
 
																-}
															
 
																-
															
 
																-static int
															
 
																-copy_cuda_to_ram_async(void *src_interface, unsigned src_node,
															
 
																-		       void *dst_interface, unsigned dst_node,
															
 
																-		       cudaStream_t stream)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node,
															
 
																-				    dst_interface, dst_node,
															
 
																-				    stream, cudaMemcpyDeviceToHost);
															
 
																-}
															
 
																-
															
 
																-static int
															
 
																-copy_cuda_to_cuda(void *src_interface, unsigned src_node,
															
 
																-		  void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node,
															
 
																-				    dst_interface, dst_node,
															
 
																-				    NULL, cudaMemcpyDeviceToDevice);
															
 
																-}
															
 
																-
															
 
																-#ifdef NO_STRIDE
															
 
																-static int
															
 
																-copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
															
 
																-			void *dst_interface, unsigned dst_node,
															
 
																-			cudaStream_t stream)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node,
															
 
																-				    dst_interface, dst_node,
															
 
																-				    stream, cudaMemcpyDeviceToDevice);
															
 
																-}
															
 
																-#endif /* !NO_STRIDE */
															
 
																-#endif /* !STARPU_USE_CUDA */
															
 
																-
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-static int
															
 
																-copy_opencl_common(void *src_interface, unsigned src_node,
															
 
																-			 void *dst_interface, unsigned dst_node,
															
 
																-			 cl_event *event)
															
 
																-{
															
 
																 	int ret = 0;
															
 
																-	size_t size = 0;
															
 
																-	struct starpu_coo_interface *src_coo, *dst_coo;
															
 
																 	src_coo = (struct starpu_coo_interface *) src_interface;
															
 
																 	dst_coo = (struct starpu_coo_interface *) dst_interface;
															
 
																-
															
 
																 	size = src_coo->n_values * sizeof(src_coo->columns[0]);
															
 
																-	ret = starpu_opencl_copy_async_sync(
															
 
																-		(uintptr_t) src_coo->columns,
															
 
																-		src_node,
															
 
																-		0,
															
 
																-		(uintptr_t) dst_coo->columns,
															
 
																-		dst_node,
															
 
																-		0,
															
 
																-		size,
															
 
																-		NULL);
															
 
																+	if (starpu_interface_copy(
															
 
																+		(uintptr_t) src_coo->columns, src_node, 0,
															
 
																+		(uintptr_t) dst_coo->columns, dst_node, 0,
															
 
																+		size, async_data))
															
 
																+		ret = -EAGAIN;
															
 
																 	/* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */
															
 
																-	ret = starpu_opencl_copy_async_sync(
															
 
																-		(uintptr_t) src_coo->rows,
															
 
																-		src_node,
															
 
																-		0,
															
 
																-		(uintptr_t) dst_coo->rows,
															
 
																-		dst_node,
															
 
																-		0,
															
 
																-		size,
															
 
																-		NULL);
															
 
																+	if (starpu_interface_copy(
															
 
																+		(uintptr_t) src_coo->rows, src_node, 0,
															
 
																+		(uintptr_t) dst_coo->rows, dst_node, 0,
															
 
																+		size, async_data))
															
 
																+		ret = -EAGAIN;
															
 
																 	size = src_coo->n_values * src_coo->elemsize;
															
 
																-	ret = starpu_opencl_copy_async_sync(
															
 
																-		src_coo->values,
															
 
																-		src_node,
															
 
																-		0,
															
 
																-		(uintptr_t) dst_coo->values,
															
 
																-		dst_node,
															
 
																-		0,
															
 
																-		size,
															
 
																-		event);
															
 
																+	if (starpu_interface_copy(
															
 
																+		src_coo->values, src_node, 0,
															
 
																+		dst_coo->values, dst_node, 0,
															
 
																+		size, async_data))
															
 
																+		ret = -EAGAIN;
															
 
																 	_STARPU_TRACE_DATA_COPY(src_node, dst_node,
															
 
																 		src_coo->n_values *
															
@@ -211,83 +57,9 @@ copy_opencl_common(void *src_interface, unsigned src_node,
 
																 	return ret;
															
 
																 }
															
 
																-static int
															
 
																-copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
															
 
																-			 void *dst_interface, unsigned dst_node,
															
 
																-			 cl_event *event)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node, event);
															
 
																-}
															
 
																-
															
 
																-static int
															
 
																-copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
															
 
																-			 void *dst_interface, unsigned dst_node,
															
 
																-			 cl_event *event)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node, event);
															
 
																-}
															
 
																-
															
 
																-static int
															
 
																-copy_opencl_to_opencl_async(void *src_interface, unsigned src_node,
															
 
																-			 void *dst_interface, unsigned dst_node,
															
 
																-			 cl_event *event)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node, event);
															
 
																-}
															
 
																-
															
 
																-static int
															
 
																-copy_ram_to_opencl(void *src_interface, unsigned src_node,
															
 
																-		   void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_ram_to_opencl_async(src_interface, src_node,
															
 
																-					dst_interface, dst_node,
															
 
																-					NULL);
															
 
																-}
															
 
																-static int
															
 
																-copy_opencl_to_ram(void *src_interface, unsigned src_node,
															
 
																-		   void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_opencl_to_ram_async(src_interface, src_node,
															
 
																-					dst_interface, dst_node,
															
 
																-					NULL);
															
 
																-}
															
 
																-static int
															
 
																-copy_opencl_to_opencl(void *src_interface, unsigned src_node,
															
 
																-		   void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_opencl_to_opencl_async(src_interface, src_node,
															
 
																-					dst_interface, dst_node,
															
 
																-					NULL);
															
 
																-}
															
 
																-#endif /* !STARPU_USE_OPENCL */
															
 
																-
															
 
																 static struct starpu_data_copy_methods coo_copy_data_methods =
															
 
																 {
															
 
																-	.ram_to_ram          = copy_ram_to_ram,
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-	.ram_to_cuda         = copy_ram_to_cuda,
															
 
																-	.cuda_to_ram         = copy_cuda_to_ram,
															
 
																-	.ram_to_cuda_async   = copy_ram_to_cuda_async,
															
 
																-	.cuda_to_ram_async   = copy_cuda_to_ram_async,
															
 
																-	.cuda_to_cuda        = copy_cuda_to_cuda,
															
 
																-#ifdef NO_STRIDE
															
 
																-	.cuda_to_cuda_async  = copy_cuda_to_cuda_async,
															
 
																-#endif
															
 
																-#else
															
 
																-#ifdef STARPU_SIMGRID
															
 
																-#ifdef NO_STRIDE
															
 
																-	/* Enable GPU-GPU transfers in simgrid */
															
 
																-	.cuda_to_cuda_async = 1,
															
 
																-#endif
															
 
																-#endif
															
 
																-#endif /* !STARPU_USE_CUDA */
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-	.ram_to_opencl       = copy_ram_to_opencl,
															
 
																-	.opencl_to_ram       = copy_opencl_to_ram,
															
 
																-	.opencl_to_opencl    = copy_opencl_to_opencl,
															
 
																-	.ram_to_opencl_async = copy_ram_to_opencl_async,
															
 
																-	.opencl_to_opencl_async = copy_opencl_to_opencl_async,
															
 
																-#endif /* !STARPU_USE_OPENCL */
															
 
																+	.any_to_any          = copy_any_to_any,
															
 
																 };
															
 
																 static void
															
--- a/src/datawizard/interfaces/csr_interface.c
+++ b/src/datawizard/interfaces/csr_interface.c
@@ -28,42 +28,11 @@
 
																 #include <starpu_opencl.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																-static int copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																-static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																-static int copy_cuda_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																-static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
															
 
																-static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
															
 
																-static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
															
 
																-#endif
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																-static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																-static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																-#endif
															
 
																+static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
															
 
																 static struct starpu_data_copy_methods csr_copy_data_methods_s =
															
 
																 {
															
 
																-	.ram_to_ram = copy_ram_to_ram,
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-	.ram_to_cuda = copy_ram_to_cuda,
															
 
																-	.cuda_to_ram = copy_cuda_to_ram,
															
 
																-	.cuda_to_cuda = copy_cuda_to_cuda,
															
 
																-	.ram_to_cuda_async = copy_ram_to_cuda_async,
															
 
																-	.cuda_to_ram_async = copy_cuda_to_ram_async,
															
 
																-	.cuda_to_cuda_async = copy_cuda_to_cuda_async,
															
 
																-#else
															
 
																-#ifdef STARPU_SIMGRID
															
 
																-	/* Enable GPU-GPU transfers in simgrid */
															
 
																-	.cuda_to_cuda_async = 1,
															
 
																-#endif
															
 
																-#endif
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-	.ram_to_opencl = copy_ram_to_opencl,
															
 
																-	.opencl_to_ram = copy_opencl_to_ram,
															
 
																-	.opencl_to_opencl = copy_opencl_to_opencl,
															
 
																-#endif
															
 
																+	.any_to_any = copy_any_to_any,
															
 
																 };
															
 
																 static void register_csr_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
															
@@ -293,188 +262,8 @@ static void free_csr_buffer_on_node(void *data_interface, unsigned node)
 
																 	starpu_free_buffer_on_node(node, (uintptr_t) csr_interface->rowptr, (nrow+1)*sizeof(uint32_t));
															
 
																 }
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-static int copy_cuda_async_sync(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, enum cudaMemcpyKind kind, cudaStream_t stream)
															
 
																-{
															
 
																-	struct starpu_csr_interface *src_csr = src_interface;
															
 
																-	struct starpu_csr_interface *dst_csr = dst_interface;
															
 
																-
															
 
																-	uint32_t nnz = src_csr->nnz;
															
 
																-	uint32_t nrow = src_csr->nrow;
															
 
																-	size_t elemsize = src_csr->elemsize;
															
 
																-
															
 
																-	cudaStream_t sstream = stream;
															
 
																-	int ret;
															
 
																-
															
 
																-	ret = starpu_cuda_copy_async_sync((void *)src_csr->nzval, src_node, (void *)dst_csr->nzval, dst_node, nnz*elemsize, sstream, kind);
															
 
																-	if (ret == 0) sstream = NULL;
															
 
																-
															
 
																-	ret = starpu_cuda_copy_async_sync((void *)src_csr->colind, src_node, (void *)dst_csr->colind, dst_node, nnz*sizeof(uint32_t), sstream, kind);
															
 
																-	if (ret == 0) sstream = NULL;
															
 
																-
															
 
																-	ret = starpu_cuda_copy_async_sync((void *)src_csr->rowptr, src_node, (void *)dst_csr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), sstream, kind);
															
 
																-
															
 
																-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
															
 
																-	return ret;
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_peer_async(void *src_interface STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
															
 
																-				void *dst_interface STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cudaStream_t stream STARPU_ATTRIBUTE_UNUSED)
															
 
																-{
															
 
																-#ifdef HAVE_CUDA_MEMCPY_PEER
															
 
																-	struct starpu_csr_interface *src_csr = src_interface;
															
 
																-	struct starpu_csr_interface *dst_csr = dst_interface;
															
 
																-
															
 
																-	uint32_t nnz = src_csr->nnz;
															
 
																-	uint32_t nrow = src_csr->nrow;
															
 
																-	size_t elemsize = src_csr->elemsize;
															
 
																-
															
 
																-	cudaError_t cures;
															
 
																-
															
 
																-	int src_dev = _starpu_memory_node_get_devid(src_node);
															
 
																-	int dst_dev = _starpu_memory_node_get_devid(dst_node);
															
 
																-
															
 
																-	int synchronous_fallback = 0;
															
 
																-
															
 
																-	_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
															
 
																-	cures = cudaMemcpyPeerAsync((char *)dst_csr->nzval, dst_dev, (char *)src_csr->nzval, src_dev, nnz*elemsize, stream);
															
 
																-	if (cures)
															
 
																-	{
															
 
																-		synchronous_fallback = 1;
															
 
																-		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
															
 
																-		cures = cudaMemcpyPeer((char *)dst_csr->nzval, dst_dev, (char *)src_csr->nzval, src_dev, nnz*elemsize);
															
 
																-		if (STARPU_UNLIKELY(cures))
															
 
																-			STARPU_CUDA_REPORT_ERROR(cures);
															
 
																-	}
															
 
																-
															
 
																-	if (!synchronous_fallback)
															
 
																-	{
															
 
																-		cures = cudaMemcpyPeerAsync((char *)dst_csr->colind, dst_dev, (char *)src_csr->colind, src_dev, nnz*sizeof(uint32_t), stream);
															
 
																-	}
															
 
																-
															
 
																-	if (synchronous_fallback || cures != cudaSuccess)
															
 
																-	{
															
 
																-		synchronous_fallback = 1;
															
 
																-		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
															
 
																-		cures = cudaMemcpyPeer((char *)dst_csr->colind, dst_dev, (char *)src_csr->colind, src_dev, nnz*sizeof(uint32_t));
															
 
																-		if (STARPU_UNLIKELY(cures))
															
 
																-			STARPU_CUDA_REPORT_ERROR(cures);
															
 
																-	}
															
 
																-
															
 
																-	if (!synchronous_fallback)
															
 
																-	{
															
 
																-		cures = cudaMemcpyPeerAsync((char *)dst_csr->rowptr, dst_dev, (char *)src_csr->rowptr, src_dev, (nrow+1)*sizeof(uint32_t), stream);
															
 
																-	}
															
 
																-
															
 
																-	if (synchronous_fallback || cures != cudaSuccess)
															
 
																-	{
															
 
																-		synchronous_fallback = 1;
															
 
																-		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
															
 
																-		cures = cudaMemcpyPeer((char *)dst_csr->rowptr, dst_dev, (char *)src_csr->rowptr, src_dev, (nrow+1)*sizeof(uint32_t));
															
 
																-		if (STARPU_UNLIKELY(cures))
															
 
																-			STARPU_CUDA_REPORT_ERROR(cures);
															
 
																-	}
															
 
																-
															
 
																-	if (synchronous_fallback)
															
 
																-	{
															
 
																-		_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
															
 
																-		return 0;
															
 
																-	}
															
 
																-	else
															
 
																-	{
															
 
																-		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
															
 
																-		return -EAGAIN;
															
 
																-	}
															
 
																-#else
															
 
																-	/* Illegal without Peer tranfers */
															
 
																-	STARPU_ABORT();
															
 
																-	return 0;
															
 
																-#endif
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost, NULL);
															
 
																-}
															
 
																-
															
 
																-static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice, NULL);
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice, NULL);
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost, stream);
															
 
																-}
															
 
																-
															
 
																-static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice, stream);
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
															
 
																-{
															
 
																-	if (src_node == dst_node)
															
 
																-		return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice, stream);
															
 
																-	else
															
 
																-		return copy_cuda_peer_async(src_interface, src_node, dst_interface, dst_node, stream);
															
 
																-}
															
 
																-
															
 
																-#endif // STARPU_USE_CUDA
															
 
																-
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-static int copy_opencl_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																-{
															
 
																-	struct starpu_csr_interface *src_csr = src_interface;
															
 
																-	struct starpu_csr_interface *dst_csr = dst_interface;
															
 
																-
															
 
																-	uint32_t nnz = src_csr->nnz;
															
 
																-	uint32_t nrow = src_csr->nrow;
															
 
																-	size_t elemsize = src_csr->elemsize;
															
 
																-
															
 
																-        int err;
															
 
																-
															
 
																-	err = starpu_opencl_copy_async_sync(src_csr->nzval, src_node, 0, dst_csr->nzval, dst_node, 0, nnz*elemsize, NULL);
															
 
																-	if (STARPU_UNLIKELY(err))
															
 
																-                STARPU_OPENCL_REPORT_ERROR(err);
															
 
																-
															
 
																-	err = starpu_opencl_copy_async_sync((uintptr_t)src_csr->colind, src_node, 0, (uintptr_t)dst_csr->colind, dst_node, 0, nnz*sizeof(uint32_t), NULL);
															
 
																-        if (STARPU_UNLIKELY(err))
															
 
																-                STARPU_OPENCL_REPORT_ERROR(err);
															
 
																-
															
 
																-	err = starpu_opencl_copy_async_sync((uintptr_t)src_csr->rowptr, src_node, 0, (uintptr_t)dst_csr->rowptr, dst_node, 0, (nrow+1)*sizeof(uint32_t), NULL);
															
 
																-	if (STARPU_UNLIKELY(err))
															
 
																-                STARPU_OPENCL_REPORT_ERROR(err);
															
 
																-
															
 
																-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
															
 
																-
															
 
																-	return 0;
															
 
																-}
															
 
																-
															
 
																-static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
															
 
																-}
															
 
																-
															
 
																-static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
															
 
																-}
															
 
																-
															
 
																-static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
															
 
																-}
															
 
																-
															
 
																-#endif // STARPU_USE_OPENCL
															
 
																-
															
 
																 /* as not all platform easily have a BLAS lib installed ... */
															
 
																-static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																+static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data)
															
 
																 {
															
 
																 	struct starpu_csr_interface *src_csr = (struct starpu_csr_interface *) src_interface;
															
 
																 	struct starpu_csr_interface *dst_csr = (struct starpu_csr_interface *) dst_interface;
															
@@ -482,14 +271,18 @@ static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBU
 
																 	uint32_t nnz = src_csr->nnz;
															
 
																 	uint32_t nrow = src_csr->nrow;
															
 
																 	size_t elemsize = src_csr->elemsize;
															
 
																+	int ret = 0;
															
 
																-	memcpy((void *)dst_csr->nzval, (void *)src_csr->nzval, nnz*elemsize);
															
 
																+	if (starpu_interface_copy(src_csr->nzval, src_node, 0, dst_csr->nzval, dst_node, 0, nnz*elemsize, async_data))
															
 
																+		ret = -EAGAIN;
															
 
																-	memcpy((void *)dst_csr->colind, (void *)src_csr->colind, nnz*sizeof(uint32_t));
															
 
																+	if (starpu_interface_copy((uintptr_t)src_csr->colind, src_node, 0, (uintptr_t)dst_csr->colind, dst_node, 0, nnz*sizeof(uint32_t), async_data))
															
 
																+		ret = -EAGAIN;
															
 
																-	memcpy((void *)dst_csr->rowptr, (void *)src_csr->rowptr, (nrow+1)*sizeof(uint32_t));
															
 
																+	if (starpu_interface_copy((uintptr_t)src_csr->rowptr, src_node, 0, (uintptr_t)dst_csr->rowptr, dst_node, 0, (nrow+1)*sizeof(uint32_t), async_data))
															
 
																+		ret = -EAGAIN;
															
 
																 	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
															
 
																-	return 0;
															
 
																+	return ret;
															
 
																 }
															
--- a/src/datawizard/interfaces/variable_interface.c
+++ b/src/datawizard/interfaces/variable_interface.c
@@ -25,48 +25,11 @@
 
																 #include <starpu_opencl.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																-static int copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cudaStream_t stream);
															
 
																-static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cudaStream_t stream);
															
 
																-static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cudaStream_t stream);
															
 
																-static int copy_cuda_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-#endif
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
															
 
																-static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
															
 
																-static int copy_opencl_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
															
 
																-#endif
															
 
																+static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
															
 
																 static struct starpu_data_copy_methods variable_copy_data_methods_s =
															
 
																 {
															
 
																-	.ram_to_ram = copy_ram_to_ram,
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-	.ram_to_cuda = copy_ram_to_cuda,
															
 
																-	.cuda_to_ram = copy_cuda_to_ram,
															
 
																-	.cuda_to_cuda = copy_cuda_to_cuda,
															
 
																-	.ram_to_cuda_async = copy_ram_to_cuda_async,
															
 
																-	.cuda_to_ram_async = copy_cuda_to_ram_async,
															
 
																-	.cuda_to_cuda_async = copy_cuda_to_cuda_async,
															
 
																-#else
															
 
																-#ifdef STARPU_SIMGRID
															
 
																-	/* Enable GPU-GPU transfers in simgrid */
															
 
																-	.cuda_to_cuda_async = 1,
															
 
																-#endif
															
 
																-#endif
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-	.ram_to_opencl = copy_ram_to_opencl,
															
 
																-	.opencl_to_ram = copy_opencl_to_ram,
															
 
																-	.opencl_to_opencl = copy_opencl_to_opencl,
															
 
																-        .ram_to_opencl_async = copy_ram_to_opencl_async,
															
 
																-	.opencl_to_ram_async = copy_opencl_to_ram_async,
															
 
																-	.opencl_to_opencl_async = copy_opencl_to_opencl_async,
															
 
																-#endif
															
 
																+	.any_to_any = copy_any_to_any,
															
 
																 };
															
 
																 static void register_variable_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
															
@@ -204,101 +167,7 @@ static void free_variable_buffer_on_node(void *data_interface, unsigned node)
 
																 	starpu_free_buffer_on_node(node, variable_interface->ptr, variable_interface->elemsize);
															
 
																 }
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-static int copy_cuda_async_sync(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream, enum cudaMemcpyKind kind)
															
 
																-{
															
 
																-	struct starpu_variable_interface *src_variable = src_interface;
															
 
																-	struct starpu_variable_interface *dst_variable = dst_interface;
															
 
																-	int ret;
															
 
																-
															
 
																-	ret = starpu_cuda_copy_async_sync((void *)src_variable->ptr, src_node, (void *)dst_variable->ptr, dst_node, src_variable->elemsize, stream, kind);
															
 
																-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_variable->elemsize);
															
 
																-	return ret;
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, NULL, cudaMemcpyDeviceToHost);
															
 
																-}
															
 
																-
															
 
																-static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, NULL, cudaMemcpyHostToDevice);
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, NULL, cudaMemcpyDeviceToDevice);
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
															
 
																-}
															
 
																-
															
 
																-static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToDevice);
															
 
																-}
															
 
																-
															
 
																-#endif // STARPU_USE_CUDA
															
 
																-
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-static int copy_opencl_common(void *src_interface, unsigned src_node,
															
 
																-				void *dst_interface, unsigned dst_node, cl_event *event)
															
 
																-{
															
 
																-	struct starpu_variable_interface *src_variable = src_interface;
															
 
																-	struct starpu_variable_interface *dst_variable = dst_interface;
															
 
																-        int ret;
															
 
																-
															
 
																-	ret = starpu_opencl_copy_async_sync(src_variable->ptr, src_node, 0, dst_variable->ptr, dst_node, 0, src_variable->elemsize, event);
															
 
																-
															
 
																-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_variable->elemsize);
															
 
																-
															
 
																-	return ret;
															
 
																-}
															
 
																-
															
 
																-static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
															
 
																-					void *dst_interface, unsigned dst_node, cl_event *event)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node, event);
															
 
																-}
															
 
																-
															
 
																-static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
															
 
																-					void *dst_interface, unsigned dst_node, cl_event *event)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node, event);
															
 
																-}
															
 
																-
															
 
																-static int copy_opencl_to_opencl_async(void *src_interface, unsigned src_node,
															
 
																-					void *dst_interface, unsigned dst_node, cl_event *event)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node, event);
															
 
																-}
															
 
																-
															
 
																-static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																-{
															
 
																-        return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
															
 
																-}
															
 
																-
															
 
																-static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																-{
															
 
																-        return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
															
 
																-}
															
 
																-
															
 
																-static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_opencl_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
															
 
																-}
															
 
																-
															
 
																-#endif
															
 
																-
															
 
																-static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																+static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data)
															
 
																 {
															
 
																 	struct starpu_variable_interface *src_variable = (struct starpu_variable_interface *) src_interface;
															
 
																 	struct starpu_variable_interface *dst_variable = (struct starpu_variable_interface *) dst_interface;
															
@@ -307,10 +176,11 @@ static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBU
 
																 	uintptr_t ptr_src = src_variable->ptr;
															
 
																 	uintptr_t ptr_dst = dst_variable->ptr;
															
 
																+	int ret;
															
 
																-	memcpy((void *)ptr_dst, (void *)ptr_src, elemsize);
															
 
																+	ret = starpu_interface_copy(ptr_src, src_node, 0, ptr_dst, dst_node, 0, elemsize, async_data);
															
 
																 	_STARPU_TRACE_DATA_COPY(src_node, dst_node, elemsize);
															
 
																-	return 0;
															
 
																+	return ret;
															
 
																 }
															
--- a/src/datawizard/interfaces/vector_interface.c
+++ b/src/datawizard/interfaces/vector_interface.c
@@ -25,48 +25,11 @@
 
																 #include <starpu_opencl.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																-static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
															
 
																-static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
															
 
																-static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
															
 
																-static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
															
 
																-static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																-static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,					void *dst_interface, unsigned dst_node, cudaStream_t stream);
															
 
																-#endif
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
															
 
																-static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
															
 
																-static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
															
 
																-static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
															
 
																-static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
															
 
																-static int copy_opencl_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
															
 
																-#endif
															
 
																+static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
															
 
																 static struct starpu_data_copy_methods vector_copy_data_methods_s =
															
 
																 {
															
 
																-	.ram_to_ram = copy_ram_to_ram,
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-	.ram_to_cuda = copy_ram_to_cuda,
															
 
																-	.cuda_to_ram = copy_cuda_to_ram,
															
 
																-	.ram_to_cuda_async = copy_ram_to_cuda_async,
															
 
																-	.cuda_to_ram_async = copy_cuda_to_ram_async,
															
 
																-	.cuda_to_cuda = copy_cuda_to_cuda,
															
 
																-	.cuda_to_cuda_async = copy_cuda_to_cuda_async,
															
 
																-#else
															
 
																-#ifdef STARPU_SIMGRID
															
 
																-	/* Enable GPU-GPU transfers in simgrid */
															
 
																-	.cuda_to_cuda_async = 1,
															
 
																-#endif
															
 
																-#endif
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-	.ram_to_opencl = copy_ram_to_opencl,
															
 
																-	.opencl_to_ram = copy_opencl_to_ram,
															
 
																-	.opencl_to_opencl = copy_opencl_to_opencl,
															
 
																-        .ram_to_opencl_async = copy_ram_to_opencl_async,
															
 
																-	.opencl_to_ram_async = copy_opencl_to_ram_async,
															
 
																-	.opencl_to_opencl_async = copy_opencl_to_opencl_async,
															
 
																-#endif
															
 
																+	.any_to_any = copy_any_to_any,
															
 
																 };
															
 
																 static void register_vector_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
															
@@ -252,120 +215,18 @@ static void free_vector_buffer_on_node(void *data_interface, unsigned node)
 
																 	starpu_free_buffer_on_node(node, vector_interface->ptr, nx*elemsize);
															
 
																 }
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-static int copy_cuda_async_sync(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream, enum cudaMemcpyKind kind)
															
 
																+static int copy_any_to_any(void *src_interface, unsigned src_node,
															
 
																+                           void *dst_interface, unsigned dst_node, void *async_data)
															
 
																 {
															
 
																 	struct starpu_vector_interface *src_vector = src_interface;
															
 
																 	struct starpu_vector_interface *dst_vector = dst_interface;
															
 
																 	int ret;
															
 
																-	ret = starpu_cuda_copy_async_sync((void *)src_vector->ptr, src_node, (void *)dst_vector->ptr, dst_node, src_vector->nx*src_vector->elemsize, stream, kind);
															
 
																-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_vector->nx*src_vector->elemsize);
															
 
																-	return ret;
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, NULL, cudaMemcpyDeviceToHost);
															
 
																-}
															
 
																-
															
 
																-static int copy_ram_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, NULL, cudaMemcpyHostToDevice);
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, NULL, cudaMemcpyDeviceToDevice);
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToDevice);
															
 
																-}
															
 
																-
															
 
																-static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
															
 
																-}
															
 
																-
															
 
																-static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
															
 
																-{
															
 
																-	return copy_cuda_async_sync(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
															
 
																-}
															
 
																-
															
 
																-#endif // STARPU_USE_CUDA
															
 
																-
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-static int copy_opencl_common(void *src_interface, unsigned src_node,
															
 
																-                                    void *dst_interface, unsigned dst_node, cl_event *event)
															
 
																-{
															
 
																-	struct starpu_vector_interface *src_vector = src_interface;
															
 
																-	struct starpu_vector_interface *dst_vector = dst_interface;
															
 
																-	int ret;
															
 
																-
															
 
																-	ret = starpu_opencl_copy_async_sync(src_vector->dev_handle, src_node, src_vector->offset,
															
 
																-					    dst_vector->dev_handle, dst_node, dst_vector->offset,
															
 
																-					       src_vector->nx*src_vector->elemsize, event);
															
 
																+	ret = starpu_interface_copy(src_vector->dev_handle, src_node, src_vector->offset,
															
 
																+				    dst_vector->dev_handle, dst_node, dst_vector->offset,
															
 
																+				    src_vector->nx*src_vector->elemsize, async_data);
															
 
																 	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_vector->nx*src_vector->elemsize);
															
 
																 	return ret;
															
 
																 }
															
 
																-static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
															
 
																-                                    void *dst_interface, unsigned dst_node, cl_event *event)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node, event);
															
 
																-}
															
 
																-
															
 
																-static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
															
 
																-                                    void *dst_interface, unsigned dst_node, cl_event *event)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node, event);
															
 
																-}
															
 
																-
															
 
																-static int copy_opencl_to_opencl_async(void *src_interface, unsigned src_node,
															
 
																-                                       void *dst_interface, unsigned dst_node, cl_event *event)
															
 
																-{
															
 
																-	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node, event);
															
 
																-}
															
 
																-
															
 
																-static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
															
 
																-                              void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																-{
															
 
																-        return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
															
 
																-}
															
 
																-
															
 
																-static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
															
 
																-				void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																-{
															
 
																-        return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
															
 
																-}
															
 
																-
															
 
																-static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
															
 
																-				 void *dst_interface, unsigned dst_node)
															
 
																-{
															
 
																-	return copy_opencl_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
															
 
																-}
															
 
																-
															
 
																-
															
 
																-#endif // STARPU_USE_OPENCL
															
 
																-
															
 
																-static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
															
 
																-					void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																-{
															
 
																-	struct starpu_vector_interface *src_vector = (struct starpu_vector_interface *) src_interface;
															
 
																-	struct starpu_vector_interface *dst_vector = (struct starpu_vector_interface *) dst_interface;
															
 
																-
															
 
																-	uint32_t nx = dst_vector->nx;
															
 
																-	size_t elemsize = dst_vector->elemsize;
															
 
																-
															
 
																-	uintptr_t ptr_src = src_vector->ptr;
															
 
																-	uintptr_t ptr_dst = dst_vector->ptr;
															
 
																-
															
 
																-	memcpy((void *)ptr_dst, (void *)ptr_src, nx*elemsize);
															
 
																-
															
 
																-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*elemsize);
															
 
																-
															
 
																-	return 0;
															
 
																-}
															
--- a/src/datawizard/interfaces/void_interface.c
+++ b/src/datawizard/interfaces/void_interface.c
@@ -25,36 +25,11 @@
 
																 #include <starpu_opencl.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																-static int dummy_copy(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-static int dummy_cuda_copy_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
															
 
																-#endif
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-static int dummy_opencl_copy_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event);
															
 
																-#endif
															
 
																+static int dummy_copy(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
															
 
																 static struct starpu_data_copy_methods void_copy_data_methods_s =
															
 
																 {
															
 
																-	.ram_to_ram = dummy_copy,
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-	.ram_to_cuda = dummy_copy,
															
 
																-	.cuda_to_ram = dummy_copy,
															
 
																-	.cuda_to_cuda = dummy_copy,
															
 
																-	.ram_to_cuda_async = dummy_cuda_copy_async,
															
 
																-	.cuda_to_ram_async = dummy_cuda_copy_async,
															
 
																-	.cuda_to_cuda_async = dummy_cuda_copy_async,
															
 
																-#else
															
 
																-#ifdef STARPU_SIMGRID
															
 
																-	/* Enable GPU-GPU transfers in simgrid */
															
 
																-	.cuda_to_cuda_async = 1,
															
 
																-#endif
															
 
																-#endif
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-	.ram_to_opencl = dummy_copy,
															
 
																-	.opencl_to_ram = dummy_copy,
															
 
																-        .ram_to_opencl_async = dummy_opencl_copy_async,
															
 
																-	.opencl_to_ram_async = dummy_opencl_copy_async,
															
 
																-#endif
															
 
																+	.any_to_any = dummy_copy,
															
 
																 };
															
 
																 static void register_void_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
															
@@ -139,25 +114,3 @@ static int dummy_copy(void *src_interface STARPU_ATTRIBUTE_UNUSED,
 
																 {
															
 
																 	return 0;
															
 
																 }
															
 
																-
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-static int dummy_cuda_copy_async(void *src_interface STARPU_ATTRIBUTE_UNUSED,
															
 
																-				unsigned src_node STARPU_ATTRIBUTE_UNUSED,
															
 
																-				void *dst_interface STARPU_ATTRIBUTE_UNUSED,
															
 
																-				unsigned dst_node STARPU_ATTRIBUTE_UNUSED,
															
 
																-				cudaStream_t stream __attribute__ ((unused)))
															
 
																-{
															
 
																-	return 0;
															
 
																-}
															
 
																-#endif // STARPU_USE_CUDA
															
 
																-
															
 
																-#ifdef STARPU_USE_OPENCL
															
 
																-static int dummy_opencl_copy_async(void *src_interface STARPU_ATTRIBUTE_UNUSED,
															
 
																-					unsigned src_node STARPU_ATTRIBUTE_UNUSED,
															
 
																-					void *dst_interface STARPU_ATTRIBUTE_UNUSED,
															
 
																-					unsigned dst_node STARPU_ATTRIBUTE_UNUSED,
															
 
																-					cl_event *event STARPU_ATTRIBUTE_UNUSED)
															
 
																-{
															
 
																-	return 0;
															
 
																-}
															
 
																-#endif // STARPU_USE_OPENCL
															
--- a/tests/datawizard/interfaces/test_interfaces.c
+++ b/tests/datawizard/interfaces/test_interfaces.c
@@ -550,7 +550,10 @@ ram_to_ram(void)
 
																 	/* We do not care about the nodes */
															
 
																 	src_interface = starpu_data_get_interface_on_node(src, 0);
															
 
																 	dst_interface = starpu_data_get_interface_on_node(dst, 0);
															
 
																-	src->ops->copy_methods->ram_to_ram(src_interface, 0, dst_interface, 0);
															
 
																+	if (src->ops->copy_methods->ram_to_ram)
															
 
																+		src->ops->copy_methods->ram_to_ram(src_interface, 0, dst_interface, 0);
															
 
																+	else
															
 
																+		src->ops->copy_methods->any_to_any(src_interface, 0, dst_interface, 0, NULL);
															
 
																 	err = create_task(&task, STARPU_CPU_WORKER, -1);
															
 
																 	if (err != 0)