|
@@ -42,8 +42,8 @@ static int ncudagpus;
|
|
|
static size_t global_mem[STARPU_MAXCUDADEVS];
|
|
|
#ifdef STARPU_USE_CUDA
|
|
|
static cudaStream_t streams[STARPU_NMAXWORKERS];
|
|
|
-static cudaStream_t out_transfer_streams[STARPU_MAXCUDADEVS];
|
|
|
-static cudaStream_t in_transfer_streams[STARPU_MAXCUDADEVS];
|
|
|
+static cudaStream_t out_transfer_streams[STARPU_NMAXWORKERS];
|
|
|
+static cudaStream_t in_transfer_streams[STARPU_NMAXWORKERS];
|
|
|
static cudaStream_t peer_transfer_streams[STARPU_MAXCUDADEVS][STARPU_MAXCUDADEVS];
|
|
|
static struct cudaDeviceProp props[STARPU_MAXCUDADEVS];
|
|
|
static cudaEvent_t task_events[STARPU_MAXCUDADEVS];
|
|
@@ -116,18 +116,18 @@ static void _starpu_cuda_limit_gpu_mem_if_needed(unsigned devid)
|
|
|
}
|
|
|
|
|
|
#ifdef STARPU_USE_CUDA
|
|
|
-cudaStream_t starpu_cuda_get_in_transfer_stream(unsigned node)
|
|
|
+cudaStream_t starpu_cuda_get_local_in_transfer_stream(void)
|
|
|
{
|
|
|
- int devid = _starpu_memory_node_get_devid(node);
|
|
|
+ int worker = starpu_worker_get_id();
|
|
|
|
|
|
- return in_transfer_streams[devid];
|
|
|
+ return in_transfer_streams[worker];
|
|
|
}
|
|
|
|
|
|
-cudaStream_t starpu_cuda_get_out_transfer_stream(unsigned node)
|
|
|
+cudaStream_t starpu_cuda_get_local_out_transfer_stream(void)
|
|
|
{
|
|
|
- int devid = _starpu_memory_node_get_devid(node);
|
|
|
+ int worker = starpu_worker_get_id();
|
|
|
|
|
|
- return out_transfer_streams[devid];
|
|
|
+ return out_transfer_streams[worker];
|
|
|
}
|
|
|
|
|
|
cudaStream_t starpu_cuda_get_peer_transfer_stream(unsigned src_node, unsigned dst_node)
|
|
@@ -261,11 +261,11 @@ static void init_context(unsigned devid)
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
|
|
- cures = cudaStreamCreate(&in_transfer_streams[devid]);
|
|
|
+ cures = cudaStreamCreate(&in_transfer_streams[workerid]);
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
|
|
- cures = cudaStreamCreate(&out_transfer_streams[devid]);
|
|
|
+ cures = cudaStreamCreate(&out_transfer_streams[workerid]);
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
|
@@ -285,8 +285,8 @@ static void deinit_context(int workerid)
|
|
|
|
|
|
cudaEventDestroy(task_events[workerid]);
|
|
|
cudaStreamDestroy(streams[workerid]);
|
|
|
- cudaStreamDestroy(in_transfer_streams[devid]);
|
|
|
- cudaStreamDestroy(out_transfer_streams[devid]);
|
|
|
+ cudaStreamDestroy(in_transfer_streams[workerid]);
|
|
|
+ cudaStreamDestroy(out_transfer_streams[workerid]);
|
|
|
for (i = 0; i < ncudagpus; i++)
|
|
|
cudaStreamDestroy(peer_transfer_streams[i][devid]);
|
|
|
|