|
@@ -2,7 +2,7 @@
|
|
|
*
|
|
|
* Copyright (C) 2009-2014 Université de Bordeaux 1
|
|
|
* Copyright (C) 2010 Mehdi Juhoor <mjuhoor@gmail.com>
|
|
|
- * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
|
|
|
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014 Centre National de la Recherche Scientifique
|
|
|
* Copyright (C) 2011 Télécom-SudParis
|
|
|
*
|
|
|
* StarPU is free software; you can redistribute it and/or modify
|
|
@@ -49,7 +49,7 @@ static cudaStream_t in_transfer_streams[STARPU_MAXCUDADEVS];
|
|
|
static cudaStream_t in_peer_transfer_streams[STARPU_MAXCUDADEVS][STARPU_MAXCUDADEVS];
|
|
|
static cudaStream_t out_peer_transfer_streams[STARPU_MAXCUDADEVS][STARPU_MAXCUDADEVS];
|
|
|
static struct cudaDeviceProp props[STARPU_MAXCUDADEVS];
|
|
|
-static cudaEvent_t task_events[STARPU_NMAXWORKERS];
|
|
|
+static cudaEvent_t task_events[STARPU_NMAXWORKERS][STARPU_MAX_PIPELINE];
|
|
|
#endif /* STARPU_USE_CUDA */
|
|
|
|
|
|
void
|
|
@@ -221,7 +221,7 @@ static void init_context(struct _starpu_worker_set *worker_set, unsigned devid)
|
|
|
{
|
|
|
cudaError_t cures;
|
|
|
int workerid;
|
|
|
- unsigned i;
|
|
|
+ unsigned i, j;
|
|
|
|
|
|
/* TODO: cudaSetDeviceFlag(cudaDeviceMapHost) */
|
|
|
|
|
@@ -276,7 +276,8 @@ static void init_context(struct _starpu_worker_set *worker_set, unsigned devid)
|
|
|
{
|
|
|
workerid = worker_set->workers[i].workerid;
|
|
|
|
|
|
- cures = cudaEventCreateWithFlags(&task_events[workerid], cudaEventDisableTiming);
|
|
|
+ for (j = 0; j < STARPU_MAX_PIPELINE; j++)
|
|
|
+ cures = cudaEventCreateWithFlags(&task_events[workerid][j], cudaEventDisableTiming);
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
|
@@ -307,7 +308,7 @@ static void init_context(struct _starpu_worker_set *worker_set, unsigned devid)
|
|
|
static void deinit_context(struct _starpu_worker_set *worker_set)
|
|
|
{
|
|
|
cudaError_t cures;
|
|
|
- unsigned i;
|
|
|
+ unsigned i, j;
|
|
|
int workerid = worker_set->workers[0].workerid;
|
|
|
int devid = starpu_worker_get_devid(workerid);
|
|
|
|
|
@@ -316,7 +317,8 @@ static void deinit_context(struct _starpu_worker_set *worker_set)
|
|
|
workerid = worker_set->workers[i].workerid;
|
|
|
devid = starpu_worker_get_devid(workerid);
|
|
|
|
|
|
- cudaEventDestroy(task_events[workerid]);
|
|
|
+ for (j = 0; j < STARPU_MAX_PIPELINE; j++)
|
|
|
+ cudaEventDestroy(task_events[workerid][j]);
|
|
|
cudaStreamDestroy(streams[workerid]);
|
|
|
}
|
|
|
|
|
@@ -372,7 +374,7 @@ void _starpu_init_cuda(void)
|
|
|
STARPU_ASSERT(ncudagpus <= STARPU_MAXCUDADEVS);
|
|
|
}
|
|
|
|
|
|
-static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *args)
|
|
|
+static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker)
|
|
|
{
|
|
|
int ret;
|
|
|
|
|
@@ -396,11 +398,15 @@ static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *args)
|
|
|
return -EAGAIN;
|
|
|
}
|
|
|
|
|
|
- _starpu_driver_start_job(args, j, &args->perf_arch, &j->cl_start, 0, profiling);
|
|
|
+ if (worker->ntasks == 1)
|
|
|
+ {
|
|
|
+ /* We are alone in the pipeline, the kernel will start now, record it */
|
|
|
+ _starpu_driver_start_job(worker, j, &worker->perf_arch, &j->cl_start, 0, profiling);
|
|
|
+ }
|
|
|
|
|
|
#if defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
|
|
|
/* We make sure we do manipulate the proper device */
|
|
|
- starpu_cuda_set_device(args->devid);
|
|
|
+ starpu_cuda_set_device(worker->devid);
|
|
|
#endif
|
|
|
|
|
|
starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, j->nimpl);
|
|
@@ -410,7 +416,7 @@ static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *args)
|
|
|
{
|
|
|
_STARPU_TRACE_START_EXECUTING();
|
|
|
#ifdef STARPU_SIMGRID
|
|
|
- _starpu_simgrid_execute_job(j, &args->perf_arch, NAN);
|
|
|
+ _starpu_simgrid_execute_job(j, &worker->perf_arch, NAN);
|
|
|
#else
|
|
|
func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
|
|
|
#endif
|
|
@@ -420,35 +426,90 @@ static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *args)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *args)
|
|
|
+static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker)
|
|
|
{
|
|
|
struct timespec codelet_end;
|
|
|
|
|
|
int profiling = starpu_profiling_status_get();
|
|
|
|
|
|
_starpu_set_current_task(NULL);
|
|
|
- args->current_task = NULL;
|
|
|
+ worker->current_tasks[worker->first_task] = NULL;
|
|
|
+ worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE;
|
|
|
+ worker->ntasks--;
|
|
|
|
|
|
- _starpu_driver_end_job(args, j, &args->perf_arch, &codelet_end, 0, profiling);
|
|
|
+ _starpu_driver_end_job(worker, j, &worker->perf_arch, &codelet_end, 0, profiling);
|
|
|
|
|
|
- _starpu_driver_update_job_feedback(j, args, &args->perf_arch, &j->cl_start, &codelet_end, profiling);
|
|
|
+ _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, &j->cl_start, &codelet_end, profiling);
|
|
|
|
|
|
_starpu_push_task_output(j);
|
|
|
|
|
|
_starpu_handle_job_termination(j);
|
|
|
}
|
|
|
|
|
|
+/* Execute a job, up to completion for synchronous jobs */
|
|
|
+static void execute_job_on_cuda(struct starpu_task *task, struct _starpu_worker *worker)
|
|
|
+{
|
|
|
+ int workerid = worker->workerid;
|
|
|
+ int res;
|
|
|
+
|
|
|
+ struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
|
|
|
+
|
|
|
+ res = start_job_on_cuda(j, worker);
|
|
|
+
|
|
|
+ if (res)
|
|
|
+ {
|
|
|
+ switch (res)
|
|
|
+ {
|
|
|
+ case -EAGAIN:
|
|
|
+ _STARPU_DISP("ouch, CUDA could not actually run task %p, putting it back...\n", task);
|
|
|
+ _starpu_push_task_to_workers(task);
|
|
|
+ STARPU_ABORT();
|
|
|
+ default:
|
|
|
+ STARPU_ABORT();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+#ifndef STARPU_SIMGRID
|
|
|
+ if (task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC)
|
|
|
+ {
|
|
|
+ /* Record event to synchronize with task termination later */
|
|
|
+ cudaEventRecord(task_events[workerid][(worker->first_task + worker->ntasks - 1)%STARPU_MAX_PIPELINE], starpu_cuda_get_local_stream());
|
|
|
+#ifdef STARPU_USE_FXT
|
|
|
+ int k;
|
|
|
+ for (k = 0; k < (int) worker->set->nworkers; k++)
|
|
|
+ if (worker->set->workers[k].ntasks == worker->set->workers[k].pipeline_length)
|
|
|
+ break;
|
|
|
+ if (k == (int) worker->set->nworkers)
|
|
|
+ /* Everybody busy */
|
|
|
+ _STARPU_TRACE_START_EXECUTING()
|
|
|
+#endif
|
|
|
+ }
|
|
|
+ else
|
|
|
+#else
|
|
|
+#ifdef STARPU_DEVEL
|
|
|
+#warning No CUDA asynchronous execution with simgrid yet.
|
|
|
+#endif
|
|
|
+#endif
|
|
|
+ /* Synchronous execution */
|
|
|
+ {
|
|
|
+#if defined(STARPU_DEBUG) && !defined(STARPU_SIMGRID)
|
|
|
+ STARPU_ASSERT_MSG(cudaStreamQuery(starpu_cuda_get_local_stream()) == cudaSuccess, "CUDA codelets have to wait for termination of their kernels on the starpu_cuda_get_local_stream() stream");
|
|
|
+#endif
|
|
|
+ finish_job_on_cuda(j, worker);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/* XXX Should this be merged with _starpu_init_cuda ? */
|
|
|
int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
|
|
|
{
|
|
|
- struct _starpu_worker *args = &worker_set->workers[0];
|
|
|
- unsigned devid = args->devid;
|
|
|
+ struct _starpu_worker *worker0 = &worker_set->workers[0];
|
|
|
+ unsigned devid = worker0->devid;
|
|
|
unsigned i;
|
|
|
|
|
|
- _starpu_worker_start(args, _STARPU_FUT_CUDA_KEY);
|
|
|
+ _starpu_worker_start(worker0, _STARPU_FUT_CUDA_KEY);
|
|
|
|
|
|
#ifdef STARPU_USE_FXT
|
|
|
- unsigned memnode = args->memory_node;
|
|
|
+ unsigned memnode = worker0->memory_node;
|
|
|
for (i = 1; i < worker_set->nworkers; i++)
|
|
|
{
|
|
|
struct _starpu_worker *worker = &worker_set->workers[i];
|
|
@@ -458,17 +519,20 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
|
|
|
|
|
|
#ifndef STARPU_SIMGRID
|
|
|
init_context(worker_set, devid);
|
|
|
+
|
|
|
+ if (worker_set->nworkers > 1 && props[devid].concurrentKernels == 0)
|
|
|
+ _STARPU_DISP("Warning: STARPU_NWORKER_PER_CUDA is %u, but the device does not support concurrent kernel execution!\n", worker_set->nworkers);
|
|
|
#endif
|
|
|
|
|
|
_starpu_cuda_limit_gpu_mem_if_needed(devid);
|
|
|
- _starpu_memory_manager_set_global_memory_size(args->memory_node, _starpu_cuda_get_global_mem_size(devid));
|
|
|
+ _starpu_memory_manager_set_global_memory_size(worker0->memory_node, _starpu_cuda_get_global_mem_size(devid));
|
|
|
|
|
|
- _starpu_malloc_init(args->memory_node);
|
|
|
+ _starpu_malloc_init(worker0->memory_node);
|
|
|
|
|
|
/* one more time to avoid hacks from third party lib :) */
|
|
|
- _starpu_bind_thread_on_cpu(args->config, args->bindid);
|
|
|
+ _starpu_bind_thread_on_cpu(worker0->config, worker0->bindid);
|
|
|
|
|
|
- args->status = STATUS_UNKNOWN;
|
|
|
+ worker0->status = STATUS_UNKNOWN;
|
|
|
|
|
|
float size = (float) global_mem[devid] / (1<<30);
|
|
|
#ifdef STARPU_SIMGRID
|
|
@@ -479,29 +543,31 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
|
|
|
strncpy(devname, props[devid].name, 128);
|
|
|
#endif
|
|
|
|
|
|
+ for (i = 0; i < worker_set->nworkers; i++)
|
|
|
+ {
|
|
|
+ struct _starpu_worker *worker = &worker_set->workers[i];
|
|
|
#if defined(STARPU_HAVE_BUSID) && !defined(STARPU_SIMGRID)
|
|
|
#if defined(STARPU_HAVE_DOMAINID) && !defined(STARPU_SIMGRID)
|
|
|
- if (props[devid].pciDomainID)
|
|
|
- snprintf(args->name, sizeof(args->name), "CUDA %u (%s %.1f GiB %04x:%02x:%02x.0)", devid, devname, size, props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID);
|
|
|
- else
|
|
|
+ if (props[devid].pciDomainID)
|
|
|
+ snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB %04x:%02x:%02x.0)", devid, i, devname, size, props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID);
|
|
|
+ else
|
|
|
#endif
|
|
|
- snprintf(args->name, sizeof(args->name), "CUDA %u (%s %.1f GiB %02x:%02x.0)", devid, devname, size, props[devid].pciBusID, props[devid].pciDeviceID);
|
|
|
+ snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB %02x:%02x.0)", devid, i, devname, size, props[devid].pciBusID, props[devid].pciDeviceID);
|
|
|
#else
|
|
|
- snprintf(args->name, sizeof(args->name), "CUDA %u (%s %.1f GiB)", devid, devname, size);
|
|
|
+ snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB)", devid, i, devname, size);
|
|
|
#endif
|
|
|
- snprintf(args->short_name, sizeof(args->short_name), "CUDA %u", devid);
|
|
|
- _STARPU_DEBUG("cuda (%s) dev id %u thread is ready to run on CPU %d !\n", devname, devid, args->bindid);
|
|
|
+ snprintf(worker->short_name, sizeof(worker->short_name), "CUDA %u.%u", devid, i);
|
|
|
+ _STARPU_DEBUG("cuda (%s) dev id %u worker %u thread is ready to run on CPU %d !\n", devname, devid, i, worker->bindid);
|
|
|
|
|
|
- for (i = 0; i < worker_set->nworkers; i++)
|
|
|
- {
|
|
|
+ worker->pipeline_length = starpu_get_env_number_default("STARPU_CUDA_PIPELINE", 2);
|
|
|
_STARPU_TRACE_WORKER_INIT_END(worker_set->workers[i].workerid);
|
|
|
}
|
|
|
|
|
|
/* tell the main thread that this one is ready */
|
|
|
- STARPU_PTHREAD_MUTEX_LOCK(&args->mutex);
|
|
|
- args->worker_is_initialized = 1;
|
|
|
- STARPU_PTHREAD_COND_SIGNAL(&args->ready_cond);
|
|
|
- STARPU_PTHREAD_MUTEX_UNLOCK(&args->mutex);
|
|
|
+ STARPU_PTHREAD_MUTEX_LOCK(&worker0->mutex);
|
|
|
+ worker0->worker_is_initialized = 1;
|
|
|
+ STARPU_PTHREAD_COND_SIGNAL(&worker0->ready_cond);
|
|
|
+ STARPU_PTHREAD_MUTEX_UNLOCK(&worker0->mutex);
|
|
|
|
|
|
/* tell the main thread that this one is ready */
|
|
|
STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex);
|
|
@@ -527,19 +593,20 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
|
|
|
idle = 0;
|
|
|
for (i = 0; i < (int) worker_set->nworkers; i++)
|
|
|
{
|
|
|
- struct _starpu_worker *args = &worker_set->workers[i];
|
|
|
- int workerid = args->workerid;
|
|
|
-
|
|
|
- task = args->current_task;
|
|
|
+ struct _starpu_worker *worker = &worker_set->workers[i];
|
|
|
+ int workerid = worker->workerid;
|
|
|
|
|
|
- if (!task)
|
|
|
+ if (!worker->ntasks)
|
|
|
{
|
|
|
idle++;
|
|
|
+ /* Even nothing to test */
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
+ task = worker->current_tasks[worker->first_task];
|
|
|
+
|
|
|
/* On-going asynchronous task, check for its termination first */
|
|
|
- cudaError_t cures = cudaEventQuery(task_events[workerid]);
|
|
|
+ cudaError_t cures = cudaEventQuery(task_events[workerid][worker->first_task]);
|
|
|
|
|
|
if (cures != cudaSuccess)
|
|
|
{
|
|
@@ -548,19 +615,46 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
|
|
|
else
|
|
|
{
|
|
|
/* Asynchronous task completed! */
|
|
|
- _starpu_set_local_worker_key(args);
|
|
|
- finish_job_on_cuda(_starpu_get_job_associated_to_task(task), args);
|
|
|
- idle++;
|
|
|
+ _starpu_set_local_worker_key(worker);
|
|
|
+ finish_job_on_cuda(_starpu_get_job_associated_to_task(task), worker);
|
|
|
+ /* See next task if any */
|
|
|
+ if (worker->ntasks)
|
|
|
+ {
|
|
|
+ task = worker->current_tasks[worker->first_task];
|
|
|
+ j = _starpu_get_job_associated_to_task(task);
|
|
|
+ if (task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC)
|
|
|
+ {
|
|
|
+ /* An asynchronous task, it was already
|
|
|
+ * queued, it's now running, record its start time. */
|
|
|
+ _starpu_driver_start_job(worker, j, &worker->perf_arch, &j->cl_start, 0, starpu_profiling_status_get());
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ /* A synchronous task, we have finished
|
|
|
+ * flushing the pipeline, we can now at
|
|
|
+ * last execute it. */
|
|
|
+
|
|
|
+ _STARPU_TRACE_END_PROGRESS(memnode);
|
|
|
+ _STARPU_TRACE_EVENT("sync_task");
|
|
|
+ execute_job_on_cuda(task, worker);
|
|
|
+ _STARPU_TRACE_EVENT("end_sync_task");
|
|
|
+ _STARPU_TRACE_START_PROGRESS(memnode);
|
|
|
+ worker->pipeline_stuck = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
#ifdef STARPU_USE_FXT
|
|
|
int k;
|
|
|
for (k = 0; k < (int) worker_set->nworkers; k++)
|
|
|
- if (worker_set->workers[k].current_task)
|
|
|
+ if (worker_set->workers[k].ntasks)
|
|
|
break;
|
|
|
if (k == (int) worker_set->nworkers)
|
|
|
/* Everybody busy */
|
|
|
_STARPU_TRACE_END_EXECUTING()
|
|
|
#endif
|
|
|
}
|
|
|
+
|
|
|
+ if (worker->ntasks < worker->pipeline_length)
|
|
|
+ idle++;
|
|
|
}
|
|
|
|
|
|
if (!idle)
|
|
@@ -582,14 +676,12 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
|
|
|
|
|
|
for (i = 0; i < (int) worker_set->nworkers; i++)
|
|
|
{
|
|
|
- struct _starpu_worker *args = &worker_set->workers[i];
|
|
|
- int workerid = args->workerid;
|
|
|
+ struct _starpu_worker *worker = &worker_set->workers[i];
|
|
|
|
|
|
task = tasks[i];
|
|
|
if (!task)
|
|
|
continue;
|
|
|
|
|
|
- _starpu_set_local_worker_key(args);
|
|
|
|
|
|
j = _starpu_get_job_associated_to_task(task);
|
|
|
|
|
@@ -597,54 +689,24 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
|
|
|
if (!_STARPU_CUDA_MAY_PERFORM(j))
|
|
|
{
|
|
|
/* this is neither a cuda or a cublas task */
|
|
|
+ worker->ntasks--;
|
|
|
_starpu_push_task_to_workers(task);
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- _STARPU_TRACE_END_PROGRESS(memnode);
|
|
|
- res = start_job_on_cuda(j, args);
|
|
|
-
|
|
|
- if (res)
|
|
|
+ if (worker->ntasks > 1 && !(task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC))
|
|
|
{
|
|
|
- switch (res)
|
|
|
- {
|
|
|
- case -EAGAIN:
|
|
|
- _STARPU_DISP("ouch, CUDA could not actually run task %p, putting it back...\n", task);
|
|
|
- _starpu_push_task_to_workers(task);
|
|
|
- STARPU_ABORT();
|
|
|
- default:
|
|
|
- STARPU_ABORT();
|
|
|
- }
|
|
|
+ /* We have to execute a non-asynchronous task but we
|
|
|
+ * still have tasks in the pipeline... Record it to
|
|
|
+ * prevent more tasks from coming, and do it later */
|
|
|
+ worker->pipeline_stuck = 1;
|
|
|
+ continue;
|
|
|
}
|
|
|
|
|
|
-#ifndef STARPU_SIMGRID
|
|
|
- if (task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC)
|
|
|
- {
|
|
|
- /* Record event to synchronize with task termination later */
|
|
|
- cudaEventRecord(task_events[workerid], starpu_cuda_get_local_stream());
|
|
|
-#ifdef STARPU_USE_FXT
|
|
|
- int k;
|
|
|
- for (k = 0; k < (int) worker_set->nworkers; k++)
|
|
|
- if (worker_set->workers[k].current_task)
|
|
|
- break;
|
|
|
- if (k < (int) worker_set->nworkers)
|
|
|
- /* Everybody busy */
|
|
|
- _STARPU_TRACE_START_EXECUTING()
|
|
|
-#endif
|
|
|
- }
|
|
|
- else
|
|
|
-#else
|
|
|
-#ifdef STARPU_DEVEL
|
|
|
-#warning No CUDA asynchronous execution with simgrid yet.
|
|
|
-#endif
|
|
|
-#endif
|
|
|
- /* Synchronous execution */
|
|
|
- {
|
|
|
-#if defined(STARPU_DEBUG) && !defined(STARPU_SIMGRID)
|
|
|
- STARPU_ASSERT_MSG(cudaStreamQuery(starpu_cuda_get_local_stream()) == cudaSuccess, "CUDA codelets have to wait for termination of their kernels on the starpu_cuda_get_local_stream() stream");
|
|
|
-#endif
|
|
|
- finish_job_on_cuda(j, args);
|
|
|
- }
|
|
|
+ _starpu_set_local_worker_key(worker);
|
|
|
+
|
|
|
+ _STARPU_TRACE_END_PROGRESS(memnode);
|
|
|
+ execute_job_on_cuda(task, worker);
|
|
|
_STARPU_TRACE_START_PROGRESS(memnode);
|
|
|
}
|
|
|
|
|
@@ -653,8 +715,8 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
|
|
|
|
|
|
int _starpu_cuda_driver_deinit(struct _starpu_worker_set *arg)
|
|
|
{
|
|
|
- struct _starpu_worker *args = &arg->workers[0];
|
|
|
- unsigned memnode = args->memory_node;
|
|
|
+ struct _starpu_worker *worker = &arg->workers[0];
|
|
|
+ unsigned memnode = worker->memory_node;
|
|
|
_STARPU_TRACE_WORKER_DEINIT_START;
|
|
|
|
|
|
_starpu_handle_all_pending_node_data_requests(memnode);
|
|
@@ -675,16 +737,16 @@ int _starpu_cuda_driver_deinit(struct _starpu_worker_set *arg)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-void *_starpu_cuda_worker(void *arg)
|
|
|
+void *_starpu_cuda_worker(void *_arg)
|
|
|
{
|
|
|
- struct _starpu_worker_set* args = arg;
|
|
|
+ struct _starpu_worker_set* worker = _arg;
|
|
|
|
|
|
- _starpu_cuda_driver_init(args);
|
|
|
+ _starpu_cuda_driver_init(worker);
|
|
|
_STARPU_TRACE_START_PROGRESS(memnode);
|
|
|
while (_starpu_machine_is_running())
|
|
|
- _starpu_cuda_driver_run_once(args);
|
|
|
+ _starpu_cuda_driver_run_once(worker);
|
|
|
_STARPU_TRACE_END_PROGRESS(memnode);
|
|
|
- _starpu_cuda_driver_deinit(args);
|
|
|
+ _starpu_cuda_driver_deinit(worker);
|
|
|
|
|
|
return NULL;
|
|
|
}
|