|
@@ -91,9 +91,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
|
|
STARPU_ASSERT(j);
|
|
STARPU_ASSERT(j);
|
|
struct starpu_task *task = j->task;
|
|
struct starpu_task *task = j->task;
|
|
|
|
|
|
- cudaError_t cures;
|
|
|
|
struct timespec codelet_start, codelet_end;
|
|
struct timespec codelet_start, codelet_end;
|
|
- struct timespec codelet_start_comm, codelet_end_comm;
|
|
|
|
|
|
|
|
unsigned calibrate_model = 0;
|
|
unsigned calibrate_model = 0;
|
|
int workerid = args->workerid;
|
|
int workerid = args->workerid;
|
|
@@ -105,15 +103,6 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
|
|
if (cl->model && cl->model->benchmarking)
|
|
if (cl->model && cl->model->benchmarking)
|
|
calibrate_model = 1;
|
|
calibrate_model = 1;
|
|
|
|
|
|
- /* we do not take communication into account when modeling the performance */
|
|
|
|
- if (STARPU_BENCHMARK_COMM)
|
|
|
|
- {
|
|
|
|
- cures = cudaThreadSynchronize();
|
|
|
|
- if (STARPU_UNLIKELY(cures))
|
|
|
|
- STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
- starpu_clock_gettime(&codelet_start_comm);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
ret = _starpu_fetch_task_input(task, mask);
|
|
ret = _starpu_fetch_task_input(task, mask);
|
|
if (ret != 0) {
|
|
if (ret != 0) {
|
|
/* there was not enough memory, so the input of
|
|
/* there was not enough memory, so the input of
|
|
@@ -122,20 +111,12 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
|
|
return -EAGAIN;
|
|
return -EAGAIN;
|
|
}
|
|
}
|
|
|
|
|
|
- if (STARPU_BENCHMARK_COMM)
|
|
|
|
- {
|
|
|
|
- cures = cudaThreadSynchronize();
|
|
|
|
- if (STARPU_UNLIKELY(cures))
|
|
|
|
- STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
- starpu_clock_gettime(&codelet_end_comm);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
STARPU_TRACE_START_CODELET_BODY(j);
|
|
STARPU_TRACE_START_CODELET_BODY(j);
|
|
|
|
|
|
struct starpu_task_profiling_info *profiling_info;
|
|
struct starpu_task_profiling_info *profiling_info;
|
|
profiling_info = task->profiling_info;
|
|
profiling_info = task->profiling_info;
|
|
|
|
|
|
- if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
|
|
|
|
|
|
+ if (profiling_info || calibrate_model)
|
|
{
|
|
{
|
|
starpu_clock_gettime(&codelet_start);
|
|
starpu_clock_gettime(&codelet_start);
|
|
_starpu_worker_register_executing_start_date(workerid, &codelet_start);
|
|
_starpu_worker_register_executing_start_date(workerid, &codelet_start);
|
|
@@ -150,7 +131,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
|
|
|
|
|
|
cl->per_worker_stats[workerid]++;
|
|
cl->per_worker_stats[workerid]++;
|
|
|
|
|
|
- if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
|
|
|
|
|
|
+ if (profiling_info || calibrate_model)
|
|
starpu_clock_gettime(&codelet_end);
|
|
starpu_clock_gettime(&codelet_end);
|
|
|
|
|
|
STARPU_TRACE_END_CODELET_BODY(j);
|
|
STARPU_TRACE_END_CODELET_BODY(j);
|
|
@@ -159,9 +140,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
|
|
_starpu_push_task_output(task, mask);
|
|
_starpu_push_task_output(task, mask);
|
|
|
|
|
|
_starpu_driver_update_job_feedback(j, args, profiling_info, calibrate_model,
|
|
_starpu_driver_update_job_feedback(j, args, profiling_info, calibrate_model,
|
|
- &codelet_start, &codelet_end, &codelet_start_comm, &codelet_end_comm);
|
|
|
|
-
|
|
|
|
- (void)STARPU_ATOMIC_ADD(&args->jobq->total_job_performed, 1);
|
|
|
|
|
|
+ &codelet_start, &codelet_end);
|
|
|
|
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
@@ -287,21 +266,6 @@ void *_starpu_cuda_worker(void *arg)
|
|
|
|
|
|
deinit_context(args->workerid);
|
|
deinit_context(args->workerid);
|
|
|
|
|
|
-#ifdef STARPU_DATA_STATS
|
|
|
|
- fprintf(stderr, "CUDA #%d computation %le comm %le (%lf \%%)\n", args->id, jobq->total_computation_time, jobq->total_communication_time, jobq->total_communication_time*100.0/jobq->total_computation_time);
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
-#ifdef STARPU_VERBOSE
|
|
|
|
- double ratio = 0;
|
|
|
|
- if (jobq->total_job_performed != 0)
|
|
|
|
- {
|
|
|
|
- ratio = jobq->total_computation_time_error/jobq->total_computation_time;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- _starpu_print_to_logfile("MODEL ERROR: CUDA %d ERROR %lf EXEC %lf RATIO %lf NTASKS %d\n", args->devid, jobq->total_computation_time_error, jobq->total_computation_time, ratio, jobq->total_job_performed);
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CUDA_KEY);
|
|
STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CUDA_KEY);
|
|
|
|
|
|
pthread_exit(NULL);
|
|
pthread_exit(NULL);
|