|
@@ -50,6 +50,7 @@ static cl_command_queue in_transfer_queues[STARPU_MAXOPENCLDEVS];
|
|
|
static cl_command_queue out_transfer_queues[STARPU_MAXOPENCLDEVS];
|
|
|
static cl_command_queue peer_transfer_queues[STARPU_MAXOPENCLDEVS];
|
|
|
static cl_command_queue alloc_queues[STARPU_MAXOPENCLDEVS];
|
|
|
+static cl_event task_events[STARPU_MAXOPENCLDEVS];
|
|
|
#endif
|
|
|
|
|
|
void
|
|
@@ -561,7 +562,8 @@ void _starpu_opencl_init(void)
|
|
|
#ifndef STARPU_SIMGRID
|
|
|
static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname);
|
|
|
#endif
|
|
|
-static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_worker *args);
|
|
|
+static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker *args);
|
|
|
+static void _starpu_opencl_stop_job(struct _starpu_job *j, struct _starpu_worker *args);
|
|
|
|
|
|
int _starpu_opencl_driver_init(struct _starpu_worker *args)
|
|
|
{
|
|
@@ -616,13 +618,36 @@ int _starpu_opencl_driver_run_once(struct _starpu_worker *args)
|
|
|
struct starpu_task *task;
|
|
|
int res;
|
|
|
|
|
|
- _STARPU_TRACE_START_PROGRESS(memnode);
|
|
|
- _starpu_datawizard_progress(memnode, 1);
|
|
|
+ task = starpu_task_get_current();
|
|
|
+
|
|
|
+ if (task)
|
|
|
+ {
|
|
|
+ cl_int status;
|
|
|
+ size_t size;
|
|
|
+ int err;
|
|
|
+ /* On-going asynchronous task, check for its termination first */
|
|
|
+
|
|
|
+ err = clGetEventInfo(task_events[args->devid], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, &size);
|
|
|
+ STARPU_ASSERT(size == sizeof(cl_int));
|
|
|
+ if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
+
|
|
|
+ if (status != CL_COMPLETE)
|
|
|
+ {
|
|
|
+ /* Not ready yet, no better thing to do than waiting */
|
|
|
+ __starpu_datawizard_progress(memnode, 1, 0);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Asynchronous task completed! */
|
|
|
+ _starpu_opencl_stop_job(_starpu_get_job_associated_to_task(task), args);
|
|
|
+ }
|
|
|
+
|
|
|
+ __starpu_datawizard_progress(memnode, 1, 1);
|
|
|
+
|
|
|
_STARPU_TRACE_END_PROGRESS(memnode);
|
|
|
|
|
|
task = _starpu_get_worker_task(args, workerid, memnode);
|
|
|
|
|
|
-
|
|
|
if (task == NULL)
|
|
|
return 0;
|
|
|
|
|
@@ -636,13 +661,7 @@ int _starpu_opencl_driver_run_once(struct _starpu_worker *args)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
- _starpu_set_current_task(j->task);
|
|
|
- args->current_task = j->task;
|
|
|
-
|
|
|
- res = _starpu_opencl_execute_job(j, args);
|
|
|
-
|
|
|
- _starpu_set_current_task(NULL);
|
|
|
- args->current_task = NULL;
|
|
|
+ res = _starpu_opencl_start_job(j, args);
|
|
|
|
|
|
if (res)
|
|
|
{
|
|
@@ -658,7 +677,28 @@ int _starpu_opencl_driver_run_once(struct _starpu_worker *args)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- _starpu_handle_job_termination(j);
|
|
|
+#ifndef STARPU_SIMGRID
|
|
|
+ if (task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC)
|
|
|
+ {
|
|
|
+ /* Record event to synchronize with task termination later */
|
|
|
+ int err;
|
|
|
+ cl_command_queue queue;
|
|
|
+ starpu_opencl_get_queue(args->devid, &queue);
|
|
|
+ err = clEnqueueMarker(queue, &task_events[args->devid]);
|
|
|
+ if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
+ }
|
|
|
+ else
|
|
|
+#else
|
|
|
+#ifdef STARPU_DEVEL
|
|
|
+#warning No CUDA asynchronous execution with simgrid yet.
|
|
|
+#endif
|
|
|
+#endif
|
|
|
+ /* Synchronous execution */
|
|
|
+ {
|
|
|
+ _starpu_opencl_stop_job(j, args);
|
|
|
+ }
|
|
|
+ _STARPU_TRACE_START_PROGRESS(memnode);
|
|
|
+
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -692,9 +732,11 @@ void *_starpu_opencl_worker(void *arg)
|
|
|
struct _starpu_worker* args = arg;
|
|
|
|
|
|
_starpu_opencl_driver_init(args);
|
|
|
+ _STARPU_TRACE_START_PROGRESS(memnode);
|
|
|
while (_starpu_machine_is_running())
|
|
|
_starpu_opencl_driver_run_once(args);
|
|
|
_starpu_opencl_driver_deinit(args);
|
|
|
+ _STARPU_TRACE_END_PROGRESS(memnode);
|
|
|
|
|
|
return NULL;
|
|
|
}
|
|
@@ -746,7 +788,7 @@ cl_device_type _starpu_opencl_get_device_type(int devid)
|
|
|
}
|
|
|
#endif /* STARPU_USE_OPENCL */
|
|
|
|
|
|
-static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_worker *args)
|
|
|
+static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker *args)
|
|
|
{
|
|
|
int ret;
|
|
|
|
|
@@ -754,12 +796,14 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
|
|
|
struct starpu_task *task = j->task;
|
|
|
|
|
|
int profiling = starpu_profiling_status_get();
|
|
|
- struct timespec codelet_start, codelet_end;
|
|
|
|
|
|
STARPU_ASSERT(task);
|
|
|
struct starpu_codelet *cl = task->cl;
|
|
|
STARPU_ASSERT(cl);
|
|
|
|
|
|
+ _starpu_set_current_task(j->task);
|
|
|
+ args->current_task = j->task;
|
|
|
+
|
|
|
ret = _starpu_fetch_task_input(j);
|
|
|
if (ret != 0)
|
|
|
{
|
|
@@ -769,7 +813,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
|
|
|
return -EAGAIN;
|
|
|
}
|
|
|
|
|
|
- _starpu_driver_start_job(args, j, &codelet_start, 0, profiling);
|
|
|
+ _starpu_driver_start_job(args, j, &j->cl_start, 0, profiling);
|
|
|
|
|
|
starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, j->nimpl);
|
|
|
STARPU_ASSERT_MSG(func, "when STARPU_OPENCL is defined in 'where', opencl_func or opencl_funcs has to be defined");
|
|
@@ -780,12 +824,6 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
|
|
|
double length = NAN;
|
|
|
#ifdef STARPU_OPENCL_SIMULATOR
|
|
|
func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
|
|
|
- if (cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC)
|
|
|
- {
|
|
|
- cl_command_queue queue;
|
|
|
- starpu_opencl_get_queue(args->devid, &queue);
|
|
|
- clFinish(queue);
|
|
|
- }
|
|
|
#ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
|
|
|
#ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
|
|
|
#define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
|
|
@@ -800,23 +838,28 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
|
|
|
_starpu_simgrid_execute_job(j, &args->perf_arch, length);
|
|
|
#else
|
|
|
func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
|
|
|
- if (cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC)
|
|
|
- {
|
|
|
- cl_command_queue queue;
|
|
|
- starpu_opencl_get_queue(args->devid, &queue);
|
|
|
- clFinish(queue);
|
|
|
- }
|
|
|
#endif
|
|
|
}
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void _starpu_opencl_stop_job(struct _starpu_job *j, struct _starpu_worker *args)
|
|
|
+{
|
|
|
+ struct timespec codelet_end;
|
|
|
+ int profiling = starpu_profiling_status_get();
|
|
|
+
|
|
|
+ _starpu_set_current_task(NULL);
|
|
|
+ args->current_task = NULL;
|
|
|
|
|
|
_starpu_driver_end_job(args, j, &args->perf_arch, &codelet_end, 0, profiling);
|
|
|
|
|
|
_starpu_driver_update_job_feedback(j, args, &args->perf_arch,
|
|
|
- &codelet_start, &codelet_end, profiling);
|
|
|
+ &j->cl_start, &codelet_end, profiling);
|
|
|
|
|
|
_starpu_push_task_output(j);
|
|
|
|
|
|
- return EXIT_SUCCESS;
|
|
|
+ _starpu_handle_job_termination(j);
|
|
|
+
|
|
|
}
|
|
|
|
|
|
#ifdef STARPU_USE_OPENCL
|