|
@@ -1,6 +1,6 @@
|
|
|
/* StarPU --- Runtime system for heterogeneous multicore architectures.
|
|
|
*
|
|
|
- * Copyright (C) 2010-2013 Université de Bordeaux 1
|
|
|
+ * Copyright (C) 2010-2014 Université de Bordeaux 1
|
|
|
* Copyright (C) 2010, 2011, 2013 Centre National de la Recherche Scientifique
|
|
|
*
|
|
|
* StarPU is free software; you can redistribute it and/or modify
|
|
@@ -163,7 +163,7 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
|
|
|
cures = cudaEventCreate(&req->async_channel.event.cuda_event);
|
|
|
if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
|
|
- stream = starpu_cuda_get_local_out_transfer_stream();
|
|
|
+ stream = starpu_cuda_get_out_transfer_stream(src_node);
|
|
|
if (copy_methods->cuda_to_ram_async)
|
|
|
ret = copy_methods->cuda_to_ram_async(src_interface, src_node, dst_interface, dst_node, stream);
|
|
|
else
|
|
@@ -199,7 +199,7 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
|
|
|
if (STARPU_UNLIKELY(cures != cudaSuccess))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
|
|
- stream = starpu_cuda_get_local_in_transfer_stream();
|
|
|
+ stream = starpu_cuda_get_in_transfer_stream(dst_node);
|
|
|
if (copy_methods->ram_to_cuda_async)
|
|
|
ret = copy_methods->ram_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream);
|
|
|
else
|
|
@@ -231,7 +231,7 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
|
|
|
cures = cudaEventCreate(&req->async_channel.event.cuda_event);
|
|
|
if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
|
|
- stream = starpu_cuda_get_local_peer_transfer_stream();
|
|
|
+ stream = starpu_cuda_get_peer_transfer_stream(src_node, dst_node);
|
|
|
if (copy_methods->cuda_to_cuda_async)
|
|
|
ret = copy_methods->cuda_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream);
|
|
|
else
|
|
@@ -531,7 +531,7 @@ int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, u
|
|
|
(void*) src + src_offset, src_node,
|
|
|
(void*) dst + dst_offset, dst_node,
|
|
|
size,
|
|
|
- async_channel?starpu_cuda_get_local_out_transfer_stream():NULL,
|
|
|
+ async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL,
|
|
|
cudaMemcpyDeviceToHost);
|
|
|
|
|
|
case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CUDA_RAM):
|
|
@@ -539,7 +539,7 @@ int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, u
|
|
|
(void*) src + src_offset, src_node,
|
|
|
(void*) dst + dst_offset, dst_node,
|
|
|
size,
|
|
|
- async_channel?starpu_cuda_get_local_in_transfer_stream():NULL,
|
|
|
+ async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL,
|
|
|
cudaMemcpyHostToDevice);
|
|
|
|
|
|
case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CUDA_RAM):
|
|
@@ -547,7 +547,7 @@ int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, u
|
|
|
(void*) src + src_offset, src_node,
|
|
|
(void*) dst + dst_offset, dst_node,
|
|
|
size,
|
|
|
- async_channel?starpu_cuda_get_local_peer_transfer_stream():NULL,
|
|
|
+ async_channel?starpu_cuda_get_peer_transfer_stream(src_node, dst_node):NULL,
|
|
|
cudaMemcpyDeviceToDevice);
|
|
|
|
|
|
#endif
|