6 роки тому · d65545f9b3
--- a/src/datawizard/copy_driver.c
+++ b/src/datawizard/copy_driver.c
@@ -300,13 +300,14 @@ int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, u
 
				 {
			
 
				 	struct _starpu_async_channel *async_channel = async_data;
			
 
				 	enum starpu_node_kind src_kind = starpu_node_get_kind(src_node);
			
 
				+	enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node);
			
 
				 
			
 
				-	if (_node_ops[src_kind].copy_interface)
			
 
				+	if (_node_ops[src_kind].copy_interface_to[dst_kind])
			
 
				 	{
			
 
				-		return _node_ops[src_kind].copy_interface(src, src_offset, src_node,
			
 
				-							  dst, dst_offset, dst_node,
			
 
				-							  size,
			
 
				-							  async_channel);
			
 
				+		return _node_ops[src_kind].copy_interface_to[dst_kind](src, src_offset, src_node,
			
 
				+								       dst, dst_offset, dst_node,
			
 
				+								       size,
			
 
				+								       async_channel);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
--- a/src/datawizard/node_ops.c
+++ b/src/datawizard/node_ops.c
@@ -20,10 +20,11 @@
 
				 #include <datawizard/node_ops.h>
			
 
				 #include <drivers/cpu/driver_cpu.h>
			
 
				 #include <drivers/cuda/driver_cuda.h>
			
 
				-#include <drivers/mpi/driver_mpi_sink.h>
			
 
				-#include <drivers/mpi/driver_mpi_source.h>
			
 
				+#include <drivers/opencl/driver_opencl.h>
			
 
				 #include <drivers/mpi/driver_mpi_common.h>
			
 
				+#include <drivers/mpi/driver_mpi_source.h>
			
 
				 #include <drivers/mic/driver_mic_source.h>
			
 
				+#include <drivers/scc/driver_scc_source.h>
			
 
				 #include <drivers/disk/driver_disk.h>
			
 
				 
			
 
				 struct _starpu_node_ops _node_ops[STARPU_MPI_MS_RAM+1];
			
@@ -34,14 +35,8 @@ void _starpu_node_ops_init()
 
				 
			
 
				 	// CPU
			
 
				 	// CPU_RAM does not define wait_event operation
			
 
				-	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_cpu_copy_data_to_cpu;
			
 
				-	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_CUDA_RAM] = _starpu_cpu_copy_data_to_cuda;
			
 
				-	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_OPENCL_RAM] = _starpu_cpu_copy_data_to_opencl;
			
 
				-	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_DISK_RAM] = _starpu_cpu_copy_data_to_disk;
			
 
				-	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_MPI_MS_RAM] = _starpu_cpu_copy_data_to_mpi_ms;
			
 
				-	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_SCC_RAM] = _starpu_cpu_copy_data_to_scc;
			
 
				-	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_MIC_RAM] = _starpu_cpu_copy_data_to_mic;
			
 
				-	_node_ops[STARPU_CPU_RAM].copy_interface = _starpu_cpu_copy_interface;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_cpu_copy_data;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_interface_to[STARPU_CPU_RAM] = _starpu_cpu_copy_interface;
			
 
				 	_node_ops[STARPU_CPU_RAM].direct_access_supported = _starpu_cpu_direct_access_supported;
			
 
				 	_node_ops[STARPU_CPU_RAM].malloc_on_node = _starpu_cpu_malloc_on_node;
			
 
				 	_node_ops[STARPU_CPU_RAM].free_on_node = _starpu_cpu_free_on_node;
			
@@ -49,9 +44,12 @@ void _starpu_node_ops_init()
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	_node_ops[STARPU_CUDA_RAM].wait_request_completion = _starpu_cuda_wait_request_completion;
			
 
				 	_node_ops[STARPU_CUDA_RAM].test_request_completion = _starpu_cuda_test_request_completion;
			
 
				-	_node_ops[STARPU_CUDA_RAM].copy_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_to_cuda;
			
 
				-	_node_ops[STARPU_CUDA_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy_data_to_cpu;
			
 
				-	_node_ops[STARPU_CUDA_RAM].copy_interface = _starpu_cuda_copy_interface;
			
 
				+	_node_ops[STARPU_CUDA_RAM].copy_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda;
			
 
				+	_node_ops[STARPU_CUDA_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cuda_to_cpu;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cpu_to_cuda;
			
 
				+	_node_ops[STARPU_CUDA_RAM].copy_interface_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cuda;
			
 
				+	_node_ops[STARPU_CUDA_RAM].copy_interface_to[STARPU_CPU_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cpu;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_interface_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cpu_to_cuda;
			
 
				 	_node_ops[STARPU_CUDA_RAM].direct_access_supported = _starpu_cuda_direct_access_supported;
			
 
				 	_node_ops[STARPU_CUDA_RAM].malloc_on_node = _starpu_cuda_malloc_on_node;
			
 
				 	_node_ops[STARPU_CUDA_RAM].free_on_node = _starpu_cuda_free_on_node;
			
@@ -60,9 +58,12 @@ void _starpu_node_ops_init()
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 	_node_ops[STARPU_OPENCL_RAM].wait_request_completion = _starpu_opencl_wait_request_completion;
			
 
				 	_node_ops[STARPU_OPENCL_RAM].test_request_completion = _starpu_opencl_test_request_completion;
			
 
				-	_node_ops[STARPU_OPENCL_RAM].copy_data_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_data_to_opencl;
			
 
				-	_node_ops[STARPU_OPENCL_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_opencl_copy_data_to_cpu;
			
 
				-	_node_ops[STARPU_OPENCL_RAM].copy_interface = _starpu_opencl_copy_interface;
			
 
				+	_node_ops[STARPU_OPENCL_RAM].copy_data_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_data_from_opencl_to_opencl;
			
 
				+	_node_ops[STARPU_OPENCL_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_opencl_copy_data_from_opencl_to_cpu;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_data_from_cpu_to_opencl;
			
 
				+	_node_ops[STARPU_OPENCL_RAM].copy_interface_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_interface_from_opencl_to_opencl;
			
 
				+	_node_ops[STARPU_OPENCL_RAM].copy_interface_to[STARPU_CPU_RAM] = _starpu_opencl_copy_interface_from_opencl_to_cpu;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_interface_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_interface_from_cpu_to_opencl;
			
 
				 	_node_ops[STARPU_OPENCL_RAM].direct_access_supported = _starpu_opencl_direct_access_supported;
			
 
				 	_node_ops[STARPU_OPENCL_RAM].malloc_on_node = _starpu_opencl_malloc_on_node;
			
 
				 	_node_ops[STARPU_OPENCL_RAM].free_on_node = _starpu_opencl_free_on_node;
			
@@ -71,9 +72,11 @@ void _starpu_node_ops_init()
 
				 #ifdef STARPU_USE_MIC
			
 
				 	_node_ops[STARPU_MIC_RAM].wait_request_completion = _starpu_mic_wait_request_completion;
			
 
				 	_node_ops[STARPU_MIC_RAM].test_request_completion = _starpu_mic_test_request_completion;
			
 
				-	_node_ops[STARPU_MIC_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_mic_copy_data_to_cpu;
			
 
				 	/* TODO: MIC -> MIC */
			
 
				-	_node_ops[STARPU_MIC_RAM].copy_interface = _starpu_mic_copy_interface;
			
 
				+	_node_ops[STARPU_MIC_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_mic_copy_data_from_mic_to_cpu;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_MIC_RAM] = _starpu_mic_copy_data_from_cpu_to_mic;
			
 
				+	_node_ops[STARPU_MIC_RAM].copy_interface_to[STARPU_CPU_RAM] = _starpu_mic_copy_interface_from_mic_to_cpu;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_interface_to[STARPU_MIC_RAM] = _starpu_mic_copy_interface_from_cpu_to_mic;
			
 
				 	_node_ops[STARPU_MIC_RAM].direct_access_supported = _starpu_mic_direct_access_supported;
			
 
				 	_node_ops[STARPU_MIC_RAM].malloc_on_node = _starpu_mic_malloc_on_node;
			
 
				 	_node_ops[STARPU_MIC_RAM].free_on_node = _starpu_mic_free_on_node;
			
@@ -82,9 +85,12 @@ void _starpu_node_ops_init()
 
				 #ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				 	_node_ops[STARPU_MPI_MS_RAM].wait_request_completion = _starpu_mpi_common_wait_request_completion;
			
 
				 	_node_ops[STARPU_MPI_MS_RAM].test_request_completion = _starpu_mpi_common_test_event;
			
 
				-	_node_ops[STARPU_MPI_MS_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_mpi_common_copy_data_to_cpu;
			
 
				-	_node_ops[STARPU_MPI_MS_RAM].copy_data_to[STARPU_MPI_MS_RAM] = _starpu_mpi_common_copy_data_to_mpi;
			
 
				-	_node_ops[STARPU_MPI_MS_RAM].copy_interface = _starpu_mpi_copy_interface;
			
 
				+	_node_ops[STARPU_MPI_MS_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_mpi_copy_data_from_mpi_to_cpu;
			
 
				+	_node_ops[STARPU_MPI_MS_RAM].copy_data_to[STARPU_MPI_MS_RAM] = _starpu_mpi_copy_data_from_mpi_to_mpi;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_MPI_MS_RAM] = _starpu_mpi_copy_data_from_cpu_to_mpi;
			
 
				+	_node_ops[STARPU_MPI_MS_RAM].copy_interface_to[STARPU_MPI_MS_RAM] = _starpu_mpi_copy_interface_from_mpi_to_mpi;
			
 
				+	_node_ops[STARPU_MPI_MS_RAM].copy_interface_to[STARPU_CPU_RAM] = _starpu_mpi_copy_interface_from_mpi_to_cpu;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_interface_to[STARPU_MPI_MS_RAM] = _starpu_mpi_copy_interface_from_cpu_to_mpi;
			
 
				 	_node_ops[STARPU_MPI_MS_RAM].direct_access_supported = _starpu_mpi_direct_access_supported;
			
 
				 	_node_ops[STARPU_MPI_MS_RAM].malloc_on_node = _starpu_mpi_malloc_on_node;
			
 
				 	_node_ops[STARPU_MPI_MS_RAM].free_on_node = _starpu_mpi_free_on_node;
			
@@ -92,17 +98,23 @@ void _starpu_node_ops_init()
 
				 
			
 
				 	_node_ops[STARPU_DISK_RAM].wait_request_completion = _starpu_disk_wait_request_completion;
			
 
				 	_node_ops[STARPU_DISK_RAM].test_request_completion = _starpu_disk_test_request_completion;
			
 
				-	_node_ops[STARPU_DISK_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_disk_copy_data_to_cpu;
			
 
				-	_node_ops[STARPU_DISK_RAM].copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_to_disk;
			
 
				-	_node_ops[STARPU_DISK_RAM].copy_interface = _starpu_disk_copy_interface;
			
 
				+	_node_ops[STARPU_DISK_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_disk_copy_data_from_disk_to_cpu;
			
 
				+	_node_ops[STARPU_DISK_RAM].copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_disk_to_disk;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_cpu_to_disk;
			
 
				+	_node_ops[STARPU_DISK_RAM].copy_interface_to[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_disk_to_disk;
			
 
				+	_node_ops[STARPU_DISK_RAM].copy_interface_to[STARPU_CPU_RAM] = _starpu_disk_copy_interface_from_disk_to_cpu;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_interface_to[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_cpu_to_disk;
			
 
				 	_node_ops[STARPU_DISK_RAM].direct_access_supported = _starpu_disk_direct_access_supported;
			
 
				 	_node_ops[STARPU_DISK_RAM].malloc_on_node = _starpu_disk_malloc_on_node;
			
 
				 	_node_ops[STARPU_DISK_RAM].free_on_node = _starpu_disk_free_on_node;
			
 
				 
			
 
				 #ifdef STARPU_USE_SCC
			
 
				-	_node_ops[STARPU_SCC_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_scc_common_copy_data_to_cpu;
			
 
				-	_node_ops[STARPU_SCC_RAM].copy_data_to[STARPU_SCC_RAM] = _starpu_scc_common_copy_data_to_scc;
			
 
				-	_node_ops[STARPU_SCC_RAM].copy_interface = _starpu_scc_copy_interface;
			
 
				+	_node_ops[STARPU_SCC_RAM].copy_data_to[STARPU_CPU_RAM] = _starpu_scc_copy_data_from_scc_to_cpu;
			
 
				+	_node_ops[STARPU_SCC_RAM].copy_data_to[STARPU_SCC_RAM] = _starpu_scc_copy_data_from_scc_to_scc;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_data_to[STARPU_SCC_RAM] = _starpu_scc_copy_data_from_cpu_to_scc;
			
 
				+	_node_ops[STARPU_SCC_RAM].copy_interface_to[STARPU_SCC_RAM] = _starpu_scc_copy_interface_from_scc_to_scc;
			
 
				+	_node_ops[STARPU_SCC_RAM].copy_interface_to[STARPU_CPU_RAM] = _starpu_scc_copy_interface_from_scc_to_cpu;
			
 
				+	_node_ops[STARPU_CPU_RAM].copy_interface_to[STARPU_SCC_RAM] = _starpu_scc_copy_interface_from_cpu_to_scc;
			
 
				 	_node_ops[STARPU_SCC_RAM].direct_access_supported = _starpu_scc_direct_access_supported;
			
 
				 	_node_ops[STARPU_SCC_RAM].malloc_on_node = _starpu_scc_malloc_on_node;
			
 
				 	_node_ops[STARPU_SCC_RAM].free_on_node = _starpu_scc_free_on_node;
			
--- a/src/datawizard/node_ops.h
+++ b/src/datawizard/node_ops.h
@@ -34,7 +34,7 @@ typedef int (*copy_interface_t)(uintptr_t src_ptr, size_t src_offset, unsigned s
 
				 struct _starpu_node_ops
			
 
				 {
			
 
				 	copy_data_func_t copy_data_to[STARPU_MPI_MS_RAM+1];
			
 
				-	copy_interface_t copy_interface;
			
 
				+	copy_interface_t copy_interface_to[STARPU_MPI_MS_RAM+1];
			
 
				 	void (*wait_request_completion)(struct _starpu_async_channel *async_channel);
			
 
				 	unsigned (*test_request_completion)(struct _starpu_async_channel *async_channel);
			
 
				 	int (*direct_access_supported)(unsigned node, unsigned handling_node);
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -429,173 +429,7 @@ int _starpu_cpu_driver_run(struct _starpu_worker *worker)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int _starpu_cpu_copy_data_to_opencl(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				-{
			
 
				-	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_OPENCL_RAM);
			
 
				-
			
 
				-	int ret = 1;
			
 
				-
			
 
				-#ifdef STARPU_USE_OPENCL
			
 
				-	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				-	/* STARPU_CPU_RAM -> STARPU_OPENCL_RAM */
			
 
				-	STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node);
			
 
				-	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_opencl_copy_disabled() || !(copy_methods->ram_to_opencl_async || copy_methods->any_to_any))
			
 
				-	{
			
 
				-		STARPU_ASSERT(copy_methods->ram_to_opencl || copy_methods->any_to_any);
			
 
				-		/* this is not associated to a request so it's synchronous */
			
 
				-		if (copy_methods->ram_to_opencl)
			
 
				-			copy_methods->ram_to_opencl(src_interface, src_node, dst_interface, dst_node);
			
 
				-		else
			
 
				-			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		req->async_channel.type = STARPU_OPENCL_RAM;
			
 
				-		if (copy_methods->ram_to_opencl_async)
			
 
				-			ret = copy_methods->ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.event.opencl_event));
			
 
				-		else
			
 
				-		{
			
 
				-			STARPU_ASSERT(copy_methods->any_to_any);
			
 
				-			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				-		}
			
 
				-	}
			
 
				-#endif
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-int _starpu_cpu_copy_data_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				-{
			
 
				-	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM);
			
 
				-
			
 
				-	int ret = 1;
			
 
				-
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	cudaError_t cures;
			
 
				-	cudaStream_t stream;
			
 
				-	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				-
			
 
				-	/* STARPU_CPU_RAM -> CUBLAS_RAM */
			
 
				-	/* only the proper CUBLAS thread can initiate this ! */
			
 
				-#if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
			
 
				-	STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node);
			
 
				-#endif
			
 
				-	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() ||
			
 
				-	    !(copy_methods->ram_to_cuda_async || copy_methods->any_to_any))
			
 
				-	{
			
 
				-		/* this is not associated to a request so it's synchronous */
			
 
				-		STARPU_ASSERT(copy_methods->ram_to_cuda || copy_methods->any_to_any);
			
 
				-		if (copy_methods->ram_to_cuda)
			
 
				-			copy_methods->ram_to_cuda(src_interface, src_node, dst_interface, dst_node);
			
 
				-		else
			
 
				-			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		req->async_channel.type = STARPU_CUDA_RAM;
			
 
				-		cures = cudaEventCreateWithFlags(&req->async_channel.event.cuda_event, cudaEventDisableTiming);
			
 
				-		if (STARPU_UNLIKELY(cures != cudaSuccess))
			
 
				-			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				-
			
 
				-		stream = starpu_cuda_get_in_transfer_stream(dst_node);
			
 
				-		if (copy_methods->ram_to_cuda_async)
			
 
				-			ret = copy_methods->ram_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream);
			
 
				-		else
			
 
				-		{
			
 
				-			STARPU_ASSERT(copy_methods->any_to_any);
			
 
				-			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				-		}
			
 
				-
			
 
				-		cures = cudaEventRecord(req->async_channel.event.cuda_event, stream);
			
 
				-		if (STARPU_UNLIKELY(cures != cudaSuccess))
			
 
				-			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				-	}
			
 
				-#endif
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-int _starpu_cpu_copy_data_to_mic(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				-{
			
 
				-	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_MIC_RAM);
			
 
				-
			
 
				-	int ret = 1;
			
 
				-
			
 
				-#ifdef STARPU_USE_MIC
			
 
				-	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				-	/* RAM -> MIC */
			
 
				-	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mic_copy_disabled() || !(copy_methods->ram_to_mic_async || copy_methods->any_to_any))
			
 
				-	{
			
 
				-		/* this is not associated to a request so it's synchronous */
			
 
				-		STARPU_ASSERT(copy_methods->ram_to_mic || copy_methods->any_to_any);
			
 
				-		if (copy_methods->ram_to_mic)
			
 
				-			copy_methods->ram_to_mic(src_interface, src_node, dst_interface, dst_node);
			
 
				-		else
			
 
				-			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		req->async_channel.type = STARPU_MIC_RAM;
			
 
				-		if (copy_methods->ram_to_mic_async)
			
 
				-			ret = copy_methods->ram_to_mic_async(src_interface, src_node, dst_interface, dst_node);
			
 
				-		else
			
 
				-		{
			
 
				-			STARPU_ASSERT(copy_methods->any_to_any);
			
 
				-			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				-		}
			
 
				-		_starpu_mic_init_event(&(req->async_channel.event.mic_event), dst_node);
			
 
				-	}
			
 
				-#endif
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-int _starpu_cpu_copy_data_to_disk(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				-{
			
 
				-	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_DISK_RAM);
			
 
				-
			
 
				-	int ret = 0;
			
 
				-	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				-
			
 
				-	if (req && !starpu_asynchronous_copy_disabled())
			
 
				-	{
			
 
				-		req->async_channel.type = STARPU_DISK_RAM;
			
 
				-		req->async_channel.event.disk_event.requests = NULL;
			
 
				-		req->async_channel.event.disk_event.ptr = NULL;
			
 
				-		req->async_channel.event.disk_event.handle = NULL;
			
 
				-	}
			
 
				-	if(copy_methods->any_to_any)
			
 
				-		ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL);
			
 
				-	else
			
 
				-	{
			
 
				-		void *obj = starpu_data_handle_to_pointer(handle, dst_node);
			
 
				-		void * ptr = NULL;
			
 
				-		starpu_ssize_t size = 0;
			
 
				-		handle->ops->pack_data(handle, src_node, &ptr, &size);
			
 
				-		ret = _starpu_disk_full_write(src_node, dst_node, obj, ptr, size, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL);
			
 
				-		if (ret == 0)
			
 
				-		{
			
 
				-			/* write is already finished, ptr was allocated in pack_data */
			
 
				-			_starpu_free_flags_on_node(src_node, ptr, size, 0);
			
 
				-		}
			
 
				-		else if (ret == -EAGAIN)
			
 
				-		{
			
 
				-			STARPU_ASSERT(req);
			
 
				-			req->async_channel.event.disk_event.ptr = ptr;
			
 
				-			req->async_channel.event.disk_event.node = src_node;
			
 
				-			req->async_channel.event.disk_event.size = size;
			
 
				-		}
			
 
				-		STARPU_ASSERT(ret == 0 || ret == -EAGAIN);
			
 
				-	}
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-int _starpu_cpu_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				+int _starpu_cpu_copy_data(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
@@ -610,135 +444,22 @@ int _starpu_cpu_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interfac
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int _starpu_cpu_copy_data_to_mpi_ms(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				-{
			
 
				-	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_MPI_MS_RAM);
			
 
				-
			
 
				-	int ret = 0;
			
 
				-	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				-
			
 
				-	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mpi_ms_copy_disabled() || !(copy_methods->ram_to_mpi_ms_async || copy_methods->any_to_any))
			
 
				-	{
			
 
				-		/* this is not associated to a request so it's synchronous */
			
 
				-		STARPU_ASSERT(copy_methods->ram_to_mpi_ms || copy_methods->any_to_any);
			
 
				-		if (copy_methods->ram_to_mpi_ms)
			
 
				-			copy_methods->ram_to_mpi_ms(src_interface, src_node, dst_interface, dst_node);
			
 
				-		else
			
 
				-			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		req->async_channel.type = STARPU_MPI_MS_RAM;
			
 
				-		if(copy_methods->ram_to_mpi_ms_async)
			
 
				-			ret = copy_methods->ram_to_mpi_ms_async(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				-		else
			
 
				-		{
			
 
				-			STARPU_ASSERT(copy_methods->any_to_any);
			
 
				-			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				-		}
			
 
				-	}
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-int _starpu_cpu_copy_data_to_scc(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				-{
			
 
				-	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_SCC_RAM);
			
 
				-
			
 
				-	int ret = 0;
			
 
				-	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				-	if (copy_methods->scc_src_to_sink)
			
 
				-		copy_methods->scc_src_to_sink(src_interface, src_node, dst_interface, dst_node);
			
 
				-	else
			
 
				-		copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				 int _starpu_cpu_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_CPU_RAM);
			
 
				-
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				 
			
 
				-	if (dst_kind == STARPU_CPU_RAM)
			
 
				-	{
			
 
				-		memcpy((void *) (dst + dst_offset), (void *) (src + src_offset), size);
			
 
				-		return 0;
			
 
				-	}
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	else if (dst_kind == STARPU_CUDA_RAM)
			
 
				-	{
			
 
				-		return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node,
			
 
				-						   (void*) (dst + dst_offset), dst_node,
			
 
				-						   size,
			
 
				-						   async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL,
			
 
				-						   cudaMemcpyHostToDevice);
			
 
				-	}
			
 
				-#endif
			
 
				-#ifdef STARPU_USE_OPENCL
			
 
				-	else if (dst_kind == STARPU_OPENCL_RAM)
			
 
				-	{
			
 
				-		return starpu_opencl_copy_async_sync(src, src_offset, src_node,
			
 
				-						     dst, dst_offset, dst_node,
			
 
				-						     size,
			
 
				-						     &async_channel->event.opencl_event);
			
 
				-
			
 
				-	}
			
 
				-#endif
			
 
				-#ifdef STARPU_USE_MIC
			
 
				-	else if (dst_kind == STARPU_MIC_RAM)
			
 
				-	{
			
 
				-		if (async_channel)
			
 
				-			return _starpu_mic_copy_ram_to_mic_async((void*) (src + src_offset), src_node,
			
 
				-								 (void*) (dst + dst_offset), dst_node,
			
 
				-								 size);
			
 
				-		else
			
 
				-			return _starpu_mic_copy_ram_to_mic((void*) (src + src_offset), src_node,
			
 
				-							   (void*) (dst + dst_offset), dst_node,
			
 
				-							   size);
			
 
				+	(void) async_channel;
			
 
				 
			
 
				-	}
			
 
				-#endif
			
 
				-#ifdef STARPU_USE_SCC
			
 
				-	else if (dst_kind == STARPU_MIC_RAM)
			
 
				-	{
			
 
				-		return _starpu_scc_copy_src_to_sink((void*) (src + src_offset), src_node,
			
 
				-						    (void*) (dst + dst_offset), dst_node,
			
 
				-						    size);
			
 
				-	}
			
 
				-#endif
			
 
				-#ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				-	else if (dst_kind == STARPU_MPI_MS_RAM)
			
 
				-	{
			
 
				-                if (async_channel)
			
 
				-                        return _starpu_mpi_copy_ram_to_mpi_async((void*) (src + src_offset), src_node,
			
 
				-								 (void*) (dst + dst_offset), dst_node,
			
 
				-								 size, async_channel);
			
 
				-                else
			
 
				-                        return _starpu_mpi_copy_ram_to_mpi_sync((void*) (src + src_offset), src_node,
			
 
				-								(void*) (dst + dst_offset), dst_node,
			
 
				-								size);
			
 
				-	}
			
 
				-#endif
			
 
				-	else if (dst_kind == STARPU_DISK_RAM)
			
 
				-	{
			
 
				-		return _starpu_disk_copy_src_to_disk((void*) (src + src_offset), src_node,
			
 
				-						     (void*) dst, dst_offset, dst_node,
			
 
				-						     size, async_channel);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		STARPU_ABORT();
			
 
				-		return -1;
			
 
				-	}
			
 
				+	memcpy((void *) (dst + dst_offset), (void *) (src + src_offset), size);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 int _starpu_cpu_direct_access_supported(unsigned node, unsigned handling_node)
			
 
				 {
			
 
				+	(void) node;
			
 
				+	(void) handling_node;
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
--- a/src/drivers/cpu/driver_cpu.h
+++ b/src/drivers/cpu/driver_cpu.h
@@ -26,34 +26,10 @@
 
				 extern struct _starpu_driver_ops _starpu_driver_cpu_ops;
			
 
				 void *_starpu_cpu_worker(void *);
			
 
				 
			
 
				-int _starpu_cpu_copy_data_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node,
			
 
				-				  void *dst_interface, unsigned dst_node,
			
 
				-				  struct _starpu_data_request *req);
			
 
				-int _starpu_cpu_copy_data_to_opencl(starpu_data_handle_t handle, void *src_interface, unsigned src_node,
			
 
				-				    void *dst_interface, unsigned dst_node,
			
 
				-				    struct _starpu_data_request *req);
			
 
				-int _starpu_cpu_copy_data_to_mic(starpu_data_handle_t handle, void *src_interface, unsigned src_node,
			
 
				-				 void *dst_interface, unsigned dst_node,
			
 
				-				 struct _starpu_data_request *req);
			
 
				-int _starpu_cpu_copy_data_to_disk(starpu_data_handle_t handle, void *src_interface, unsigned src_node,
			
 
				-				  void *dst_interface, unsigned dst_node,
			
 
				-				  struct _starpu_data_request *req);
			
 
				-int _starpu_cpu_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node,
			
 
				-				 void *dst_interface, unsigned dst_node,
			
 
				-				 struct _starpu_data_request *req);
			
 
				-int _starpu_cpu_copy_data_to_mpi_ms(starpu_data_handle_t handle, void *src_interface, unsigned src_node,
			
 
				-				    void *dst_interface, unsigned dst_node,
			
 
				-				    struct _starpu_data_request *req);
			
 
				-int _starpu_cpu_copy_data_to_scc(starpu_data_handle_t handle, void *src_interface, unsigned src_node,
			
 
				-				 void *dst_interface, unsigned dst_node,
			
 
				-				 struct _starpu_data_request *req);
			
 
				-
			
 
				-int _starpu_cpu_copy_interface(uintptr_t src_ptr, size_t src_offset, unsigned src_node,
			
 
				-			       uintptr_t dst_ptr, size_t dst_offset, unsigned dst_node,
			
 
				-			       size_t ssize, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_cpu_copy_data(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_cpu_copy_interface(uintptr_t src_ptr, size_t src_offset, unsigned src_node, uintptr_t dst_ptr, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel);
			
 
				 
			
 
				 int _starpu_cpu_direct_access_supported(unsigned node, unsigned handling_node);
			
 
				-
			
 
				 uintptr_t _starpu_cpu_malloc_on_node(unsigned dst_node, size_t size, int flags);
			
 
				 void _starpu_cpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);
			
 
				 
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -1214,6 +1214,7 @@ int _starpu_cuda_driver_deinit_from_worker(struct _starpu_worker *worker)
 
				 	return _starpu_cuda_driver_deinit(worker->set);
			
 
				 }
			
 
				 
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				 unsigned _starpu_cuda_test_request_completion(struct _starpu_async_channel *async_channel)
			
 
				 {
			
 
				 	cudaEvent_t event;
			
@@ -1248,14 +1249,13 @@ void _starpu_cuda_wait_request_completion(struct _starpu_async_channel *async_ch
 
				 		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 }
			
 
				 
			
 
				-int _starpu_cuda_copy_data_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				+int _starpu_cuda_copy_data_from_cuda_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				 	STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM);
			
 
				 
			
 
				 	int ret = 1;
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				 	cudaError_t cures;
			
 
				 	cudaStream_t stream;
			
 
				 	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
@@ -1287,18 +1287,16 @@ int _starpu_cuda_copy_data_to_cuda(starpu_data_handle_t handle, void *src_interf
 
				 		cures = cudaEventRecord(req->async_channel.event.cuda_event, stream);
			
 
				 		if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 	}
			
 
				-#endif
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int _starpu_cuda_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				+int _starpu_cuda_copy_data_from_cuda_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				 	STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				 
			
 
				 	int ret = 1;
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				 	cudaError_t cures;
			
 
				 	cudaStream_t stream;
			
 
				 	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
@@ -1334,38 +1332,98 @@ int _starpu_cuda_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interfa
 
				 		cures = cudaEventRecord(req->async_channel.event.cuda_event, stream);
			
 
				 		if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 	}
			
 
				-#endif
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int _starpu_cuda_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+int _starpu_cuda_copy_data_from_cpu_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_CUDA_RAM);
			
 
				-
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM);
			
 
				 
			
 
				-	if (dst_kind == STARPU_CPU_RAM)
			
 
				-	{
			
 
				-		return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node,
			
 
				-						   (void*) (dst + dst_offset), dst_node,
			
 
				-						   size,
			
 
				-						   async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL,
			
 
				-						   cudaMemcpyDeviceToHost);
			
 
				-	}
			
 
				-	else if (dst_kind == STARPU_CUDA_RAM)
			
 
				+	int ret = 1;
			
 
				+	cudaError_t cures;
			
 
				+	cudaStream_t stream;
			
 
				+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				+
			
 
				+	/* STARPU_CPU_RAM -> CUBLAS_RAM */
			
 
				+	/* only the proper CUBLAS thread can initiate this ! */
			
 
				+#if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
			
 
				+	STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node);
			
 
				+#endif
			
 
				+	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() ||
			
 
				+	    !(copy_methods->ram_to_cuda_async || copy_methods->any_to_any))
			
 
				 	{
			
 
				-		return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node,
			
 
				-						   (void*) (dst + dst_offset), dst_node,
			
 
				-						   size,
			
 
				-						   async_channel?starpu_cuda_get_peer_transfer_stream(src_node, dst_node):NULL,
			
 
				-						   cudaMemcpyDeviceToDevice);
			
 
				+		/* this is not associated to a request so it's synchronous */
			
 
				+		STARPU_ASSERT(copy_methods->ram_to_cuda || copy_methods->any_to_any);
			
 
				+		if (copy_methods->ram_to_cuda)
			
 
				+			copy_methods->ram_to_cuda(src_interface, src_node, dst_interface, dst_node);
			
 
				+		else
			
 
				+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		STARPU_ABORT();
			
 
				-		return -1;
			
 
				+		req->async_channel.type = STARPU_CUDA_RAM;
			
 
				+		cures = cudaEventCreateWithFlags(&req->async_channel.event.cuda_event, cudaEventDisableTiming);
			
 
				+		if (STARPU_UNLIKELY(cures != cudaSuccess))
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				+
			
 
				+		stream = starpu_cuda_get_in_transfer_stream(dst_node);
			
 
				+		if (copy_methods->ram_to_cuda_async)
			
 
				+			ret = copy_methods->ram_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream);
			
 
				+		else
			
 
				+		{
			
 
				+			STARPU_ASSERT(copy_methods->any_to_any);
			
 
				+			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				+		}
			
 
				+
			
 
				+		cures = cudaEventRecord(req->async_channel.event.cuda_event, stream);
			
 
				+		if (STARPU_UNLIKELY(cures != cudaSuccess))
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 	}
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+int _starpu_cuda_copy_interface_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				+
			
 
				+	return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node,
			
 
				+					   (void*) (dst + dst_offset), dst_node,
			
 
				+					   size,
			
 
				+					   async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL,
			
 
				+					   cudaMemcpyDeviceToHost);
			
 
				+}
			
 
				+
			
 
				+int _starpu_cuda_copy_interface_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM);
			
 
				+
			
 
				+	return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node,
			
 
				+					   (void*) (dst + dst_offset), dst_node,
			
 
				+					   size,
			
 
				+					   async_channel?starpu_cuda_get_peer_transfer_stream(src_node, dst_node):NULL,
			
 
				+					   cudaMemcpyDeviceToDevice);
			
 
				+}
			
 
				+
			
 
				+int _starpu_cuda_copy_interface_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM);
			
 
				+
			
 
				+	return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node,
			
 
				+					   (void*) (dst + dst_offset), dst_node,
			
 
				+					   size,
			
 
				+					   async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL,
			
 
				+					   cudaMemcpyHostToDevice);
			
 
				 }
			
 
				 
			
 
				 int _starpu_cuda_direct_access_supported(unsigned node, unsigned handling_node)
			
@@ -1373,6 +1431,7 @@ int _starpu_cuda_direct_access_supported(unsigned node, unsigned handling_node)
 
				 	/* GPUs not always allow direct remote access: if CUDA4
			
 
				 	 * is enabled, we allow two CUDA devices to communicate. */
			
 
				 #ifdef STARPU_SIMGRID
			
 
				+	(void) node;
			
 
				 	if (starpu_node_get_kind(handling_node) == STARPU_CUDA_RAM)
			
 
				 	{
			
 
				 		msg_host_t host = _starpu_simgrid_get_memnode_host(handling_node);
			
@@ -1382,10 +1441,13 @@ int _starpu_cuda_direct_access_supported(unsigned node, unsigned handling_node)
 
				 	else
			
 
				 		return 0;
			
 
				 #elif defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
			
 
				+	(void) node;
			
 
				 	enum starpu_node_kind kind = starpu_node_get_kind(handling_node);
			
 
				 	return kind == STARPU_CUDA_RAM;
			
 
				 #else /* STARPU_HAVE_CUDA_MEMCPY_PEER */
			
 
				 	/* Direct GPU-GPU transfers are not allowed in general */
			
 
				+	(void) node;
			
 
				+	(void) handling_node;
			
 
				 	return 0;
			
 
				 #endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */
			
 
				 }
			
@@ -1393,6 +1455,7 @@ int _starpu_cuda_direct_access_supported(unsigned node, unsigned handling_node)
 
				 uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags)
			
 
				 {
			
 
				 	uintptr_t addr = 0;
			
 
				+	(void) flags;
			
 
				 
			
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				 
			
@@ -1445,6 +1508,9 @@ uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags)
 
				 
			
 
				 void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags)
			
 
				 {
			
 
				+	(void) size;
			
 
				+	(void) flags;
			
 
				+
			
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
			
@@ -1481,6 +1547,7 @@ void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, i
 
				 #endif /* STARPU_SIMGRID */
			
 
				 #endif
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				 struct _starpu_driver_ops _starpu_driver_cuda_ops =
			
 
				 {
			
--- a/src/drivers/cuda/driver_cuda.h
+++ b/src/drivers/cuda/driver_cuda.h
@@ -55,9 +55,15 @@ cudaStream_t starpu_cuda_get_peer_transfer_stream(unsigned src_node, unsigned ds
 
				 
			
 
				 unsigned _starpu_cuda_test_request_completion(struct _starpu_async_channel *async_channel);
			
 
				 void _starpu_cuda_wait_request_completion(struct _starpu_async_channel *async_channel);
			
 
				-int _starpu_cuda_copy_data_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-int _starpu_cuda_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-int _starpu_cuda_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+
			
 
				+int _starpu_cuda_copy_data_from_cpu_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_cuda_copy_data_from_cuda_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_cuda_copy_data_from_cuda_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+
			
 
				+int _starpu_cuda_copy_interface_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_cuda_copy_interface_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_cuda_copy_interface_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+
			
 
				 int _starpu_cuda_direct_access_supported(unsigned node, unsigned handling_node);
			
 
				 uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags);
			
 
				 void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);
			
--- a/src/drivers/disk/driver_disk.c
+++ b/src/drivers/disk/driver_disk.c
@@ -81,7 +81,7 @@ void _starpu_disk_wait_request_completion(struct _starpu_async_channel *async_ch
 
				 	}
			
 
				 }
			
 
				 
			
 
				-int _starpu_disk_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				+int _starpu_disk_copy_data_from_disk_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
@@ -124,7 +124,7 @@ int _starpu_disk_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interfa
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int _starpu_disk_copy_data_to_disk(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				+int _starpu_disk_copy_data_from_disk_to_disk(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
@@ -144,30 +144,81 @@ int _starpu_disk_copy_data_to_disk(starpu_data_handle_t handle, void *src_interf
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int _starpu_disk_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+int _starpu_disk_copy_data_from_cpu_to_disk(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_DISK_RAM);
			
 
				-
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_DISK_RAM);
			
 
				 
			
 
				-	if (dst_kind == STARPU_CPU_RAM)
			
 
				-	{
			
 
				-		return _starpu_disk_copy_disk_to_src((void*) src, src_offset, src_node,
			
 
				-						     (void*) (dst + dst_offset), dst_node,
			
 
				-						     size, async_channel);
			
 
				-	}
			
 
				-	else if (dst_kind == STARPU_DISK_RAM)
			
 
				+	int ret = 0;
			
 
				+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				+
			
 
				+	if (req && !starpu_asynchronous_copy_disabled())
			
 
				 	{
			
 
				-		return _starpu_disk_copy_disk_to_disk((void*) src, src_offset, src_node,
			
 
				-						      (void*) dst, dst_offset, dst_node,
			
 
				-						      size, async_channel);
			
 
				+		req->async_channel.type = STARPU_DISK_RAM;
			
 
				+		req->async_channel.event.disk_event.requests = NULL;
			
 
				+		req->async_channel.event.disk_event.ptr = NULL;
			
 
				+		req->async_channel.event.disk_event.handle = NULL;
			
 
				 	}
			
 
				+
			
 
				+	if(copy_methods->any_to_any)
			
 
				+		ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL);
			
 
				 	else
			
 
				 	{
			
 
				-		STARPU_ABORT();
			
 
				-		return -1;
			
 
				+		void *obj = starpu_data_handle_to_pointer(handle, dst_node);
			
 
				+		void * ptr = NULL;
			
 
				+		starpu_ssize_t size = 0;
			
 
				+		handle->ops->pack_data(handle, src_node, &ptr, &size);
			
 
				+		ret = _starpu_disk_full_write(src_node, dst_node, obj, ptr, size, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL);
			
 
				+		if (ret == 0)
			
 
				+		{
			
 
				+			/* write is already finished, ptr was allocated in pack_data */
			
 
				+			_starpu_free_flags_on_node(src_node, ptr, size, 0);
			
 
				+		}
			
 
				+		else if (ret == -EAGAIN)
			
 
				+		{
			
 
				+			STARPU_ASSERT(req);
			
 
				+			req->async_channel.event.disk_event.ptr = ptr;
			
 
				+			req->async_channel.event.disk_event.node = src_node;
			
 
				+			req->async_channel.event.disk_event.size = size;
			
 
				+		}
			
 
				+		STARPU_ASSERT(ret == 0 || ret == -EAGAIN);
			
 
				 	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+int _starpu_disk_copy_interface_from_disk_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_DISK_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				+
			
 
				+	return _starpu_disk_copy_disk_to_src((void*) src, src_offset, src_node,
			
 
				+					     (void*) (dst + dst_offset), dst_node,
			
 
				+					     size, async_channel);
			
 
				+}
			
 
				+
			
 
				+int _starpu_disk_copy_interface_from_disk_to_disk(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_DISK_RAM && dst_kind == STARPU_DISK_RAM);
			
 
				+
			
 
				+	return _starpu_disk_copy_disk_to_disk((void*) src, src_offset, src_node,
			
 
				+					      (void*) dst, dst_offset, dst_node,
			
 
				+					      size, async_channel);
			
 
				+}
			
 
				+
			
 
				+int _starpu_disk_copy_interface_from_cpu_to_disk(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_DISK_RAM);
			
 
				+
			
 
				+	return _starpu_disk_copy_src_to_disk((void*) (src + src_offset), src_node,
			
 
				+					     (void*) dst, dst_offset, dst_node,
			
 
				+					     size, async_channel);
			
 
				 }
			
 
				 
			
 
				 int _starpu_disk_direct_access_supported(unsigned node, unsigned handling_node)
			
@@ -186,6 +237,7 @@ int _starpu_disk_direct_access_supported(unsigned node, unsigned handling_node)
 
				 
			
 
				 uintptr_t _starpu_disk_malloc_on_node(unsigned dst_node, size_t size, int flags)
			
 
				 {
			
 
				+	(void) flags;
			
 
				 	uintptr_t addr = 0;
			
 
				 	addr = (uintptr_t) _starpu_disk_alloc(dst_node, size);
			
 
				 	return addr;
			
@@ -193,5 +245,6 @@ uintptr_t _starpu_disk_malloc_on_node(unsigned dst_node, size_t size, int flags)
 
				 
			
 
				 void _starpu_disk_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags)
			
 
				 {
			
 
				+	(void) flags;
			
 
				 	_starpu_disk_free(dst_node, (void *) addr , size);
			
 
				 }
			
--- a/src/drivers/disk/driver_disk.h
+++ b/src/drivers/disk/driver_disk.h
@@ -28,9 +28,15 @@ int _starpu_disk_copy_disk_to_disk(void * src, size_t src_offset, unsigned src_n
 
				 
			
 
				 unsigned _starpu_disk_test_request_completion(struct _starpu_async_channel *async_channel);
			
 
				 void _starpu_disk_wait_request_completion(struct _starpu_async_channel *async_channel);
			
 
				-int _starpu_disk_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-int _starpu_disk_copy_data_to_disk(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-int _starpu_disk_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+
			
 
				+int _starpu_disk_copy_data_from_disk_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_disk_copy_data_from_disk_to_disk(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_disk_copy_data_from_cpu_to_disk(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+
			
 
				+int _starpu_disk_copy_interface_from_disk_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_disk_copy_interface_from_disk_to_disk(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_disk_copy_interface_from_cpu_to_disk(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+
			
 
				 int _starpu_disk_direct_access_supported(unsigned node, unsigned handling_node);
			
 
				 uintptr_t _starpu_disk_malloc_on_node(unsigned dst_node, size_t size, int flags);
			
 
				 void _starpu_disk_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);
			
--- a/src/drivers/mic/driver_mic_source.c
+++ b/src/drivers/mic/driver_mic_source.c
@@ -563,15 +563,13 @@ void *_starpu_mic_src_worker(void *arg)
 
				 
			
 
				 }
			
 
				 
			
 
				-int _starpu_mic_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				+int _starpu_mic_copy_data_from_mic_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				 	STARPU_ASSERT(src_kind == STARPU_MIC_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				 
			
 
				-	int ret = 1;
			
 
				-
			
 
				-#ifdef STARPU_USE_MIC
			
 
				+	int ret = 0;
			
 
				 	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				 	/* MIC -> RAM */
			
 
				 	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mic_copy_disabled() || !(copy_methods->mic_to_ram_async || copy_methods->any_to_any))
			
@@ -579,9 +577,9 @@ int _starpu_mic_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interfac
 
				 		/* this is not associated to a request so it's synchronous */
			
 
				 		STARPU_ASSERT(copy_methods->mic_to_ram || copy_methods->any_to_any);
			
 
				 		if (copy_methods->mic_to_ram)
			
 
				-			copy_methods->mic_to_ram(src_interface, src_node, dst_interface, dst_node);
			
 
				+			ret = copy_methods->mic_to_ram(src_interface, src_node, dst_interface, dst_node);
			
 
				 		else
			
 
				-			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
@@ -595,43 +593,86 @@ int _starpu_mic_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interfac
 
				 		}
			
 
				 		_starpu_mic_init_event(&(req->async_channel.event.mic_event), src_node);
			
 
				 	}
			
 
				-#endif
			
 
				-	return 1;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				-int _starpu_mic_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+int _starpu_mic_copy_data_from_cpu_to_mic(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_MIC_RAM);
			
 
				-
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_MIC_RAM);
			
 
				 
			
 
				-	if (dst_kind == STARPU_CPU_RAM)
			
 
				+	int ret = 0;
			
 
				+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				+	/* RAM -> MIC */
			
 
				+	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mic_copy_disabled() || !(copy_methods->ram_to_mic_async || copy_methods->any_to_any))
			
 
				 	{
			
 
				-		if (async_channel)
			
 
				-			return _starpu_mic_copy_mic_to_ram_async((void*) (src + src_offset), src_node,
			
 
				-								 (void*) (dst + dst_offset), dst_node,
			
 
				-								 size);
			
 
				+		/* this is not associated to a request so it's synchronous */
			
 
				+		STARPU_ASSERT(copy_methods->ram_to_mic || copy_methods->any_to_any);
			
 
				+		if (copy_methods->ram_to_mic)
			
 
				+			ret = copy_methods->ram_to_mic(src_interface, src_node, dst_interface, dst_node);
			
 
				 		else
			
 
				-			return _starpu_mic_copy_mic_to_ram((void*) (src + src_offset), src_node,
			
 
				-							   (void*) (dst + dst_offset), dst_node,
			
 
				-							   size);
			
 
				+			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		STARPU_ABORT();
			
 
				-		return -1;
			
 
				+		req->async_channel.type = STARPU_MIC_RAM;
			
 
				+		if (copy_methods->ram_to_mic_async)
			
 
				+			ret = copy_methods->ram_to_mic_async(src_interface, src_node, dst_interface, dst_node);
			
 
				+		else
			
 
				+		{
			
 
				+			STARPU_ASSERT(copy_methods->any_to_any);
			
 
				+			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				+		}
			
 
				+		_starpu_mic_init_event(&(req->async_channel.event.mic_event), dst_node);
			
 
				 	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+int _starpu_mic_copy_interface_from_mic_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_MIC_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				+
			
 
				+	if (async_channel)
			
 
				+		return _starpu_mic_copy_mic_to_ram_async((void*) (src + src_offset), src_node,
			
 
				+							 (void*) (dst + dst_offset), dst_node,
			
 
				+							 size);
			
 
				+	else
			
 
				+		return _starpu_mic_copy_mic_to_ram((void*) (src + src_offset), src_node,
			
 
				+						   (void*) (dst + dst_offset), dst_node,
			
 
				+						   size);
			
 
				+}
			
 
				+
			
 
				+int _starpu_mic_copy_interface_from_cpu_to_mic(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_MIC_RAM);
			
 
				+
			
 
				+	if (async_channel)
			
 
				+		return _starpu_mic_copy_ram_to_mic_async((void*) (src + src_offset), src_node,
			
 
				+							 (void*) (dst + dst_offset), dst_node,
			
 
				+							 size);
			
 
				+	else
			
 
				+		return _starpu_mic_copy_ram_to_mic((void*) (src + src_offset), src_node,
			
 
				+						   (void*) (dst + dst_offset), dst_node,
			
 
				+						   size);
			
 
				 }
			
 
				 
			
 
				 int _starpu_mic_direct_access_supported(unsigned node, unsigned handling_node)
			
 
				 {
			
 
				+	(void) node;
			
 
				+	(void) handling_node;
			
 
				 	/* TODO: We don't handle direct MIC-MIC transfers yet */
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				 uintptr_t _starpu_mic_malloc_on_node(unsigned dst_node, size_t size, int flags)
			
 
				 {
			
 
				+	(void) flags;
			
 
				 	uintptr_t addr = 0;
			
 
				 	if (_starpu_mic_allocate_memory((void **)(&addr), size, dst_node))
			
 
				 		addr = 0;
			
@@ -640,5 +681,6 @@ uintptr_t _starpu_mic_malloc_on_node(unsigned dst_node, size_t size, int flags)
 
				 
			
 
				 void _starpu_mic_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags)
			
 
				 {
			
 
				+	(void) flags;
			
 
				 	_starpu_mic_free_memory((void*) addr, size, dst_node);
			
 
				 }
			
--- a/src/drivers/mic/driver_mic_source.h
+++ b/src/drivers/mic/driver_mic_source.h
@@ -81,8 +81,13 @@ void *_starpu_mic_src_worker(void *arg);
 
				 
			
 
				 unsigned _starpu_mic_test_request_completion(struct _starpu_async_channel *async_channel);
			
 
				 void _starpu_mic_wait_request_completion(struct _starpu_async_channel *async_channel);
			
 
				-int _starpu_mic_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-int _starpu_mic_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+
			
 
				+int _starpu_mic_copy_data_from_mic_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_mic_copy_data_from_cpu_to_mic(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+
			
 
				+int _starpu_mic_copy_interface_from_mic_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_mic_copy_interface_from_cpu_to_mic(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+
			
 
				 int _starpu_mic_direct_access_supported(unsigned node, unsigned handling_node);
			
 
				 uintptr_t _starpu_mic_malloc_on_node(unsigned dst_node, size_t size, int flags);
			
 
				 void _starpu_mic_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);
			
--- a/src/drivers/mpi/driver_mpi_common.c
+++ b/src/drivers/mpi/driver_mpi_common.c
@@ -551,65 +551,3 @@ void _starpu_mpi_common_measure_bandwidth_latency(double timing_dtod[STARPU_MAXM
 
				         free(buf);
			
 
				 }
			
 
				 
			
 
				-int _starpu_mpi_common_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				-{
			
 
				-	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_MPI_MS_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				-
			
 
				-	int ret = 0;
			
 
				-	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				-	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mpi_ms_copy_disabled() || !(copy_methods->mpi_ms_to_ram_async || copy_methods->any_to_any))
			
 
				-	{
			
 
				-		/* this is not associated to a request so it's synchronous */
			
 
				-		STARPU_ASSERT(copy_methods->mpi_ms_to_ram || copy_methods->any_to_any);
			
 
				-		if (copy_methods->mpi_ms_to_ram)
			
 
				-			copy_methods->mpi_ms_to_ram(src_interface, src_node, dst_interface, dst_node);
			
 
				-		else
			
 
				-			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		req->async_channel.type = STARPU_MPI_MS_RAM;
			
 
				-		if(copy_methods->mpi_ms_to_ram_async)
			
 
				-			ret = copy_methods->mpi_ms_to_ram_async(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				-		else
			
 
				-		{
			
 
				-			STARPU_ASSERT(copy_methods->any_to_any);
			
 
				-			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				-		}
			
 
				-	}
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-int _starpu_mpi_common_copy_data_to_mpi(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				-{
			
 
				-	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_MPI_MS_RAM && dst_kind == STARPU_MPI_MS_RAM);
			
 
				-
			
 
				-	int ret = 0;
			
 
				-	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				-
			
 
				-	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mpi_ms_copy_disabled() || !(copy_methods->mpi_ms_to_mpi_ms_async || copy_methods->any_to_any))
			
 
				-	{
			
 
				-		/* this is not associated to a request so it's synchronous */
			
 
				-		STARPU_ASSERT(copy_methods->mpi_ms_to_mpi_ms || copy_methods->any_to_any);
			
 
				-		if (copy_methods->mpi_ms_to_mpi_ms)
			
 
				-			copy_methods->mpi_ms_to_mpi_ms(src_interface, src_node, dst_interface, dst_node);
			
 
				-		else
			
 
				-			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		req->async_channel.type = STARPU_MPI_MS_RAM;
			
 
				-		if(copy_methods->mpi_ms_to_mpi_ms_async)
			
 
				-			ret = copy_methods->mpi_ms_to_mpi_ms_async(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				-		else
			
 
				-		{
			
 
				-			STARPU_ASSERT(copy_methods->any_to_any);
			
 
				-			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				-		}
			
 
				-	}
			
 
				-	return ret;
			
 
				-}
			
--- a/src/drivers/mpi/driver_mpi_common.h
+++ b/src/drivers/mpi/driver_mpi_common.h
@@ -54,9 +54,6 @@ void _starpu_mpi_common_barrier(void);
 
				 
			
 
				 void _starpu_mpi_common_measure_bandwidth_latency(double bandwidth_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS], double latency_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS]);
			
 
				 
			
 
				-int _starpu_mpi_common_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-int _starpu_mpi_common_copy_data_to_mpi(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-
			
 
				 #endif  /* STARPU_USE_MPI_MASTER_SLAVE */
			
 
				 
			
 
				 #endif	/* __DRIVER_MPI_COMMON_H__ */
			
--- a/src/drivers/mpi/driver_mpi_source.c
+++ b/src/drivers/mpi/driver_mpi_source.c
@@ -381,50 +381,159 @@ void *_starpu_mpi_src_worker(void *arg)
 
				         return NULL;
			
 
				 }
			
 
				 
			
 
				-int _starpu_mpi_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+int _starpu_mpi_copy_data_from_mpi_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_MPI_MS_RAM);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_MPI_MS_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				+
			
 
				+	int ret = 0;
			
 
				+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				+	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mpi_ms_copy_disabled() || !(copy_methods->mpi_ms_to_ram_async || copy_methods->any_to_any))
			
 
				+	{
			
 
				+		/* this is not associated to a request so it's synchronous */
			
 
				+		STARPU_ASSERT(copy_methods->mpi_ms_to_ram || copy_methods->any_to_any);
			
 
				+		if (copy_methods->mpi_ms_to_ram)
			
 
				+			copy_methods->mpi_ms_to_ram(src_interface, src_node, dst_interface, dst_node);
			
 
				+		else
			
 
				+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		req->async_channel.type = STARPU_MPI_MS_RAM;
			
 
				+		if(copy_methods->mpi_ms_to_ram_async)
			
 
				+			ret = copy_methods->mpi_ms_to_ram_async(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				+		else
			
 
				+		{
			
 
				+			STARPU_ASSERT(copy_methods->any_to_any);
			
 
				+			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				+		}
			
 
				+	}
			
 
				+	return ret;
			
 
				+}
			
 
				 
			
 
				+int _starpu_mpi_copy_data_from_mpi_to_mpi(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_MPI_MS_RAM && dst_kind == STARPU_MPI_MS_RAM);
			
 
				 
			
 
				-	if (dst_kind == STARPU_CPU_RAM)
			
 
				+	int ret = 0;
			
 
				+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				+
			
 
				+	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mpi_ms_copy_disabled() || !(copy_methods->mpi_ms_to_mpi_ms_async || copy_methods->any_to_any))
			
 
				+	{
			
 
				+		/* this is not associated to a request so it's synchronous */
			
 
				+		STARPU_ASSERT(copy_methods->mpi_ms_to_mpi_ms || copy_methods->any_to_any);
			
 
				+		if (copy_methods->mpi_ms_to_mpi_ms)
			
 
				+			copy_methods->mpi_ms_to_mpi_ms(src_interface, src_node, dst_interface, dst_node);
			
 
				+		else
			
 
				+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+	}
			
 
				+	else
			
 
				 	{
			
 
				-                if (async_channel)
			
 
				-                        return _starpu_mpi_copy_mpi_to_ram_async((void*) (src + src_offset), src_node,
			
 
				-								 (void*) (dst + dst_offset), dst_node,
			
 
				-								 size, async_channel);
			
 
				-                else
			
 
				-                        return _starpu_mpi_copy_mpi_to_ram_sync((void*) (src + src_offset), src_node,
			
 
				-								(void*) (dst + dst_offset), dst_node,
			
 
				-								size);
			
 
				+		req->async_channel.type = STARPU_MPI_MS_RAM;
			
 
				+		if(copy_methods->mpi_ms_to_mpi_ms_async)
			
 
				+			ret = copy_methods->mpi_ms_to_mpi_ms_async(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				+		else
			
 
				+		{
			
 
				+			STARPU_ASSERT(copy_methods->any_to_any);
			
 
				+			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				+		}
			
 
				 	}
			
 
				-	else if (dst_kind == STARPU_MPI_MS_RAM)
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+int _starpu_mpi_copy_data_from_cpu_to_mpi(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_MPI_MS_RAM);
			
 
				+
			
 
				+	int ret = 0;
			
 
				+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				+
			
 
				+	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mpi_ms_copy_disabled() || !(copy_methods->ram_to_mpi_ms_async || copy_methods->any_to_any))
			
 
				 	{
			
 
				-                if (async_channel)
			
 
				-                        return _starpu_mpi_copy_sink_to_sink_async((void*) (src + src_offset), src_node,
			
 
				-								   (void*) (dst + dst_offset), dst_node,
			
 
				-								   size, async_channel);
			
 
				-                else
			
 
				-                        return _starpu_mpi_copy_sink_to_sink_sync((void*) (src + src_offset), src_node,
			
 
				-								  (void*) (dst + dst_offset), dst_node,
			
 
				-								  size);
			
 
				+		/* this is not associated to a request so it's synchronous */
			
 
				+		STARPU_ASSERT(copy_methods->ram_to_mpi_ms || copy_methods->any_to_any);
			
 
				+		if (copy_methods->ram_to_mpi_ms)
			
 
				+			copy_methods->ram_to_mpi_ms(src_interface, src_node, dst_interface, dst_node);
			
 
				+		else
			
 
				+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		STARPU_ABORT();
			
 
				-		return -1;
			
 
				+		req->async_channel.type = STARPU_MPI_MS_RAM;
			
 
				+		if(copy_methods->ram_to_mpi_ms_async)
			
 
				+			ret = copy_methods->ram_to_mpi_ms_async(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				+		else
			
 
				+		{
			
 
				+			STARPU_ASSERT(copy_methods->any_to_any);
			
 
				+			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				+		}
			
 
				 	}
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+int _starpu_mpi_copy_interface_from_mpi_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_MPI_MS_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				+
			
 
				+	if (async_channel)
			
 
				+		return _starpu_mpi_copy_mpi_to_ram_async((void*) (src + src_offset), src_node,
			
 
				+							 (void*) (dst + dst_offset), dst_node,
			
 
				+							 size, async_channel);
			
 
				+	else
			
 
				+		return _starpu_mpi_copy_mpi_to_ram_sync((void*) (src + src_offset), src_node,
			
 
				+							(void*) (dst + dst_offset), dst_node,
			
 
				+							size);
			
 
				+}
			
 
				+
			
 
				+int _starpu_mpi_copy_interface_from_mpi_to_mpi(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_MPI_MS_RAM && dst_kind == STARPU_MPI_MS_RAM);
			
 
				+
			
 
				+	if (async_channel)
			
 
				+		return _starpu_mpi_copy_sink_to_sink_async((void*) (src + src_offset), src_node,
			
 
				+							   (void*) (dst + dst_offset), dst_node,
			
 
				+							   size, async_channel);
			
 
				+	else
			
 
				+		return _starpu_mpi_copy_sink_to_sink_sync((void*) (src + src_offset), src_node,
			
 
				+							  (void*) (dst + dst_offset), dst_node,
			
 
				+							  size);
			
 
				+}
			
 
				+
			
 
				+int _starpu_mpi_copy_interface_from_cpu_to_mpi(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_MPI_MS_RAM);
			
 
				+
			
 
				+	if (async_channel)
			
 
				+		return _starpu_mpi_copy_ram_to_mpi_async((void*) (src + src_offset), src_node,
			
 
				+							 (void*) (dst + dst_offset), dst_node,
			
 
				+							 size, async_channel);
			
 
				+	else
			
 
				+		return _starpu_mpi_copy_ram_to_mpi_sync((void*) (src + src_offset), src_node,
			
 
				+							(void*) (dst + dst_offset), dst_node,
			
 
				+							size);
			
 
				 }
			
 
				 
			
 
				 int _starpu_mpi_direct_access_supported(unsigned node, unsigned handling_node)
			
 
				 {
			
 
				+	(void) node;
			
 
				 	enum starpu_node_kind kind = starpu_node_get_kind(handling_node);
			
 
				 	return kind == STARPU_MPI_MS_RAM;
			
 
				 }
			
 
				 
			
 
				 uintptr_t _starpu_mpi_malloc_on_node(unsigned dst_node, size_t size, int flags)
			
 
				 {
			
 
				+	(void) flags;
			
 
				 	uintptr_t addr = 0;
			
 
				 	if (_starpu_mpi_src_allocate_memory((void **)(&addr), size, dst_node))
			
 
				 		addr = 0;
			
@@ -433,5 +542,7 @@ uintptr_t _starpu_mpi_malloc_on_node(unsigned dst_node, size_t size, int flags)
 
				 
			
 
				 void _starpu_mpi_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags)
			
 
				 {
			
 
				+	(void) flags;
			
 
				+	(void) size;
			
 
				 	_starpu_mpi_source_free_memory((void*) addr, dst_node);
			
 
				 }
			
--- a/src/drivers/mpi/driver_mpi_source.h
+++ b/src/drivers/mpi/driver_mpi_source.h
@@ -47,7 +47,15 @@ int _starpu_mpi_copy_sink_to_sink_sync(void *src, unsigned src_node, void *dst,
 
				 int _starpu_mpi_copy_mpi_to_ram_async(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, void * event);
			
 
				 int _starpu_mpi_copy_ram_to_mpi_async(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size, void * event);
			
 
				 int _starpu_mpi_copy_sink_to_sink_async(void *src, unsigned src_node, void *dst, unsigned dst_node, size_t size, void * event);
			
 
				-int _starpu_mpi_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+
			
 
				+int _starpu_mpi_copy_data_from_mpi_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_mpi_copy_data_from_mpi_to_mpi(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_mpi_copy_data_from_cpu_to_mpi(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+
			
 
				+int _starpu_mpi_copy_interface_from_mpi_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_mpi_copy_interface_from_mpi_to_mpi(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_mpi_copy_interface_from_cpu_to_mpi(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+
			
 
				 int _starpu_mpi_direct_access_supported(unsigned node, unsigned handling_node);
			
 
				 uintptr_t _starpu_mpi_malloc_on_node(unsigned dst_node, size_t size, int flags);
			
 
				 void _starpu_mpi_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -1145,16 +1145,13 @@ struct _starpu_driver_ops _starpu_driver_opencl_ops =
 
				 	.deinit = _starpu_opencl_driver_deinit
			
 
				 };
			
 
				 
			
 
				-#endif /* STARPU_USE_OPENCL */
			
 
				-
			
 
				-int _starpu_opencl_copy_data_to_opencl(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				+int _starpu_opencl_copy_data_from_opencl_to_opencl(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				 	STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_OPENCL_RAM);
			
 
				 
			
 
				 	int ret = 1;
			
 
				-#ifdef STARPU_USE_OPENCL
			
 
				 	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				 	/* STARPU_OPENCL_RAM -> STARPU_OPENCL_RAM */
			
 
				 	STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node || starpu_worker_get_local_memory_node() == src_node);
			
@@ -1178,18 +1175,16 @@ int _starpu_opencl_copy_data_to_opencl(starpu_data_handle_t handle, void *src_in
 
				 			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				 		}
			
 
				 	}
			
 
				-#endif
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int _starpu_opencl_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				+int _starpu_opencl_copy_data_from_opencl_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				 	STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				 
			
 
				 	int ret = 1;
			
 
				-#ifdef STARPU_USE_OPENCL
			
 
				 	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				 	/* OpenCL -> RAM */
			
 
				 	STARPU_ASSERT(starpu_worker_get_local_memory_node() == src_node);
			
@@ -1213,40 +1208,89 @@ int _starpu_opencl_copy_data_to_cpu(starpu_data_handle_t handle, void *src_inter
 
				 			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				 		}
			
 
				 	}
			
 
				-#endif
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int _starpu_opencl_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+int _starpu_opencl_copy_data_from_cpu_to_opencl(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM);
			
 
				-
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_OPENCL_RAM);
			
 
				 
			
 
				-	if (dst_kind == STARPU_OPENCL_RAM || dst_kind == STARPU_CPU_RAM)
			
 
				+	int ret = 0;
			
 
				+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				+	/* STARPU_CPU_RAM -> STARPU_OPENCL_RAM */
			
 
				+	STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node);
			
 
				+	if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_opencl_copy_disabled() || !(copy_methods->ram_to_opencl_async || copy_methods->any_to_any))
			
 
				 	{
			
 
				-		return starpu_opencl_copy_async_sync(src, src_offset, src_node,
			
 
				-						     dst, dst_offset, dst_node,
			
 
				-						     size,
			
 
				-						     &async_channel->event.opencl_event);
			
 
				+		STARPU_ASSERT(copy_methods->ram_to_opencl || copy_methods->any_to_any);
			
 
				+		/* this is not associated to a request so it's synchronous */
			
 
				+		if (copy_methods->ram_to_opencl)
			
 
				+			copy_methods->ram_to_opencl(src_interface, src_node, dst_interface, dst_node);
			
 
				+		else
			
 
				+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		STARPU_ABORT();
			
 
				-		return -1;
			
 
				+		req->async_channel.type = STARPU_OPENCL_RAM;
			
 
				+		if (copy_methods->ram_to_opencl_async)
			
 
				+			ret = copy_methods->ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.event.opencl_event));
			
 
				+		else
			
 
				+		{
			
 
				+			STARPU_ASSERT(copy_methods->any_to_any);
			
 
				+			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				+		}
			
 
				 	}
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+int _starpu_opencl_copy_interface_from_opencl_to_opencl(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_OPENCL_RAM);
			
 
				+
			
 
				+	return starpu_opencl_copy_async_sync(src, src_offset, src_node,
			
 
				+					     dst, dst_offset, dst_node,
			
 
				+					     size,
			
 
				+					     &async_channel->event.opencl_event);
			
 
				+}
			
 
				+
			
 
				+int _starpu_opencl_copy_interface_from_opencl_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				+
			
 
				+	return starpu_opencl_copy_async_sync(src, src_offset, src_node,
			
 
				+					     dst, dst_offset, dst_node,
			
 
				+					     size,
			
 
				+					     &async_channel->event.opencl_event);
			
 
				+}
			
 
				+
			
 
				+int _starpu_opencl_copy_interface_from_cpu_to_opencl(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_OPENCL_RAM);
			
 
				+
			
 
				+	return starpu_opencl_copy_async_sync(src, src_offset, src_node,
			
 
				+					     dst, dst_offset, dst_node,
			
 
				+					     size,
			
 
				+					     &async_channel->event.opencl_event);
			
 
				 }
			
 
				 
			
 
				 int _starpu_opencl_direct_access_supported(unsigned node, unsigned handling_node)
			
 
				 {
			
 
				+	(void)node;
			
 
				+	(void)handling_node;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				 uintptr_t _starpu_opencl_malloc_on_node(unsigned dst_node, size_t size, int flags)
			
 
				 {
			
 
				+	(void)flags;
			
 
				 	uintptr_t addr = 0;
			
 
				-#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	static uintptr_t last[STARPU_MAXNODES];
			
 
				 	/* Sleep for the allocation */
			
@@ -1273,14 +1317,14 @@ uintptr_t _starpu_opencl_malloc_on_node(unsigned dst_node, size_t size, int flag
 
				 		addr = (uintptr_t)ptr;
			
 
				 	}
			
 
				 #endif
			
 
				-
			
 
				-#endif
			
 
				 	return addr;
			
 
				 }
			
 
				 
			
 
				 void _starpu_opencl_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags)
			
 
				 {
			
 
				-#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				+	(void)flags;
			
 
				+	(void)size;
			
 
				+	(void)dst_node;
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
			
 
				 	/* Sleep for the free */
			
@@ -1293,5 +1337,6 @@ void _starpu_opencl_free_on_node(unsigned dst_node, uintptr_t addr, size_t size,
 
				 	if (STARPU_UNLIKELY(err != CL_SUCCESS))
			
 
				 		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				 #endif
			
 
				-#endif
			
 
				 }
			
 
				+
			
 
				+#endif /* STARPU_USE_OPENCL */
			
--- a/src/drivers/opencl/driver_opencl.h
+++ b/src/drivers/opencl/driver_opencl.h
@@ -72,9 +72,15 @@ cl_int _starpu_opencl_copy_rect_ram_to_opencl(void *ptr, unsigned src_node, cl_m
 
				 
			
 
				 unsigned _starpu_opencl_test_request_completion(struct _starpu_async_channel *async_channel);
			
 
				 void _starpu_opencl_wait_request_completion(struct _starpu_async_channel *async_channel);
			
 
				-int _starpu_opencl_copy_data_to_opencl(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-int _starpu_opencl_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-int _starpu_opencl_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+
			
 
				+int _starpu_opencl_copy_data_from_opencl_to_opencl(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_opencl_copy_data_from_opencl_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_opencl_copy_data_from_cpu_to_opencl(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+
			
 
				+int _starpu_opencl_copy_interface_from_opencl_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_opencl_copy_interface_from_opencl_to_opencl(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_opencl_copy_interface_from_cpu_to_opencl(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+
			
 
				 int _starpu_opencl_direct_access_supported(unsigned node, unsigned handling_node);
			
 
				 uintptr_t _starpu_opencl_malloc_on_node(unsigned dst_node, size_t size, int flags);
			
 
				 void _starpu_opencl_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);
			
--- a/src/drivers/scc/driver_scc_common.c
+++ b/src/drivers/scc/driver_scc_common.c
@@ -191,33 +191,3 @@ int _starpu_scc_common_recv_is_ready(const struct _starpu_mp_node *mp_node)
 
				   ************/
			
 
				   STARPU_ASSERT(0);
			
 
				 }
			
 
				-
			
 
				-int _starpu_scc_common_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				-{
			
 
				-	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_SCC_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				-
			
 
				-	int ret = 0;
			
 
				-	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				-	if (copy_methods->scc_sink_to_src)
			
 
				-		copy_methods->scc_sink_to_src(src_interface, src_node, dst_interface, dst_node);
			
 
				-	else
			
 
				-		copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-int _starpu_scc_common_copy_data_to_scc(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				-{
			
 
				-	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_SCC_RAM && dst_kind == STARPU_SCC_RAM);
			
 
				-
			
 
				-	int ret = 0;
			
 
				-	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				-	if (copy_methods->scc_sink_to_sink)
			
 
				-		copy_methods->scc_sink_to_sink(src_interface, src_node, dst_interface, dst_node);
			
 
				-	else
			
 
				-		copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				-	return ret;
			
 
				-}
			
--- a/src/drivers/scc/driver_scc_common.h
+++ b/src/drivers/scc/driver_scc_common.h
@@ -49,9 +49,6 @@ void _starpu_scc_common_report_rcce_error(const char *func, const char *file, co
 
				 
			
 
				 int _starpu_scc_common_recv_is_ready(const struct _starpu_mp_node *mp_node);
			
 
				 
			
 
				-int _starpu_scc_common_copy_data_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-int _starpu_scc_common_copy_data_to_scc(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-
			
 
				 #endif /* STARPU_USE_SCC */
			
 
				 
			
 
				 
			
--- a/src/drivers/scc/driver_scc_source.c
+++ b/src/drivers/scc/driver_scc_source.c
@@ -326,40 +326,97 @@ void *_starpu_scc_src_worker(void *arg)
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				-int _starpu_scc_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+int _starpu_scc_copy_data_from_scc_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				 {
			
 
				+	(void) req;
			
 
				 	int src_kind = starpu_node_get_kind(src_node);
			
 
				-	STARPU_ASSERT(src_kind == STARPU_SCC_RAM);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_SCC_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				 
			
 
				+	int ret = 0;
			
 
				+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				+	if (copy_methods->scc_sink_to_src)
			
 
				+		ret = copy_methods->scc_sink_to_src(src_interface, src_node, dst_interface, dst_node);
			
 
				+	else
			
 
				+		ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+int _starpu_scc_copy_data_from_scc_to_scc(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				+{
			
 
				+	(void) req;
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				 	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_SCC_RAM && dst_kind == STARPU_SCC_RAM);
			
 
				 
			
 
				-	if (dst_kind == STARPU_CPU_RAM)
			
 
				-	{
			
 
				-		return _starpu_scc_copy_sink_to_src((void*) (src + src_offset), src_node,
			
 
				-						    (void*) (dst + dst_offset), dst_node,
			
 
				-						    size);
			
 
				+	int ret = 0;
			
 
				+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				+	if (copy_methods->scc_sink_to_sink)
			
 
				+		ret = copy_methods->scc_sink_to_sink(src_interface, src_node, dst_interface, dst_node);
			
 
				+	else
			
 
				+		ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+	return ret;
			
 
				+}
			
 
				 
			
 
				-	}
			
 
				-	else if (dst_kind == STARPU_SCC_RAM)
			
 
				-	{
			
 
				-		return _starpu_scc_copy_sink_to_sink((void*) (src + src_offset), src_node,
			
 
				-						     (void*) (dst + dst_offset), dst_node,
			
 
				-						     size);
			
 
				-	}
			
 
				+int _starpu_scc_copy_data_from_cpu_to_scc(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
			
 
				+{
			
 
				+	(void) req;
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_SCC_RAM);
			
 
				+
			
 
				+	int ret = 0;
			
 
				+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
			
 
				+	if (copy_methods->scc_src_to_sink)
			
 
				+		ret = copy_methods->scc_src_to_sink(src_interface, src_node, dst_interface, dst_node);
			
 
				 	else
			
 
				-	{
			
 
				-		STARPU_ABORT();
			
 
				-		return -1;
			
 
				-	}
			
 
				+		ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+int _starpu_scc_copy_interface_from_scc_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_SCC_RAM && dst_kind == STARPU_CPU_RAM);
			
 
				+
			
 
				+	return _starpu_scc_copy_sink_to_src((void*) (src + src_offset), src_node,
			
 
				+					    (void*) (dst + dst_offset), dst_node,
			
 
				+					    size);
			
 
				+}
			
 
				+
			
 
				+int _starpu_scc_copy_interface_from_scc_to_scc(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_SCC_RAM && dst_kind == STARPU_SCC_RAM);
			
 
				+
			
 
				+	return _starpu_scc_copy_sink_to_sink((void*) (src + src_offset), src_node,
			
 
				+					     (void*) (dst + dst_offset), dst_node,
			
 
				+					     size);
			
 
				+}
			
 
				+
			
 
				+int _starpu_scc_copy_interface_from_cpu_to_scc(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel)
			
 
				+{
			
 
				+	int src_kind = starpu_node_get_kind(src_node);
			
 
				+	int dst_kind = starpu_node_get_kind(dst_node);
			
 
				+	STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_SCC_RAM);
			
 
				+
			
 
				+	return _starpu_scc_copy_src_to_sink((void*) (src + src_offset), src_node,
			
 
				+					    (void*) (dst + dst_offset), dst_node,
			
 
				+					    size);
			
 
				 }
			
 
				 
			
 
				 int _starpu_scc_direct_access_supported(unsigned node, unsigned handling_node)
			
 
				 {
			
 
				+	(void) node;
			
 
				+	(void) handling_node;
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				 uintptr_t _starpu_scc_malloc_on_node(unsigned dst_node, size_t size, int flags)
			
 
				 {
			
 
				+	(void) flags;
			
 
				 	uintptr_t addr = 0;
			
 
				 	if (_starpu_scc_allocate_memory((void **)(&addr), size, dst_node))
			
 
				 		addr = 0;
			
@@ -368,5 +425,6 @@ uintptr_t _starpu_scc_malloc_on_node(unsigned dst_node, size_t size, int flags)
 
				 
			
 
				 void _starpu_scc_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags)
			
 
				 {
			
 
				+	(void) flags;
			
 
				 	_starpu_scc_free_memory((void *) addr, dst_node);
			
 
				 }
			
--- a/src/drivers/scc/driver_scc_source.h
+++ b/src/drivers/scc/driver_scc_source.h
@@ -52,7 +52,14 @@ int _starpu_scc_copy_sink_to_sink(void *src, unsigned src_node, void *dst, unsig
 
				 
			
 
				 void *_starpu_scc_src_worker(void *arg);
			
 
				 
			
 
				-int _starpu_scc_copy_interface(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_scc_copy_data_from_scc_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_scc_copy_data_from_scc_to_scc(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_scc_copy_data_from_cpu_to_scc(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+
			
 
				+int _starpu_scc_copy_interface_from_scc_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_scc_copy_interface_from_scc_to_scc(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_scc_copy_interface_from_cpu_to_scc(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+
			
 
				 int _starpu_scc_direct_access_supported(unsigned node, unsigned handling_node);
			
 
				 uintptr_t _starpu_scc_malloc_on_node(unsigned dst_node, size_t size, int flags);
			
 
				 void _starpu_scc_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);