|
@@ -163,7 +163,7 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
|
|
cures = cudaEventCreate(&req->async_channel.event.cuda_event);
|
|
cures = cudaEventCreate(&req->async_channel.event.cuda_event);
|
|
if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
|
|
if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
|
|
- stream = starpu_cuda_get_out_transfer_stream(src_node);
|
|
|
|
|
|
+ stream = starpu_cuda_get_local_out_transfer_stream();
|
|
if (copy_methods->cuda_to_ram_async)
|
|
if (copy_methods->cuda_to_ram_async)
|
|
ret = copy_methods->cuda_to_ram_async(src_interface, src_node, dst_interface, dst_node, stream);
|
|
ret = copy_methods->cuda_to_ram_async(src_interface, src_node, dst_interface, dst_node, stream);
|
|
else
|
|
else
|
|
@@ -199,7 +199,7 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
|
|
if (STARPU_UNLIKELY(cures != cudaSuccess))
|
|
if (STARPU_UNLIKELY(cures != cudaSuccess))
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
|
|
- stream = starpu_cuda_get_in_transfer_stream(dst_node);
|
|
|
|
|
|
+ stream = starpu_cuda_get_local_in_transfer_stream();
|
|
if (copy_methods->ram_to_cuda_async)
|
|
if (copy_methods->ram_to_cuda_async)
|
|
ret = copy_methods->ram_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream);
|
|
ret = copy_methods->ram_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream);
|
|
else
|
|
else
|
|
@@ -531,7 +531,7 @@ int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, u
|
|
(void*) src + src_offset, src_node,
|
|
(void*) src + src_offset, src_node,
|
|
(void*) dst + dst_offset, dst_node,
|
|
(void*) dst + dst_offset, dst_node,
|
|
size,
|
|
size,
|
|
- async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL,
|
|
|
|
|
|
+ async_channel?starpu_cuda_get_local_out_transfer_stream():NULL,
|
|
cudaMemcpyDeviceToHost);
|
|
cudaMemcpyDeviceToHost);
|
|
|
|
|
|
case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CUDA_RAM):
|
|
case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CUDA_RAM):
|
|
@@ -539,7 +539,7 @@ int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, u
|
|
(void*) src + src_offset, src_node,
|
|
(void*) src + src_offset, src_node,
|
|
(void*) dst + dst_offset, dst_node,
|
|
(void*) dst + dst_offset, dst_node,
|
|
size,
|
|
size,
|
|
- async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL,
|
|
|
|
|
|
+ async_channel?starpu_cuda_get_local_in_transfer_stream():NULL,
|
|
cudaMemcpyHostToDevice);
|
|
cudaMemcpyHostToDevice);
|
|
|
|
|
|
case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CUDA_RAM):
|
|
case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CUDA_RAM):
|