Browse Source

simplify yet more storing codelet start/stop: put it in the worker instead of jobs, so various parts of StarPÜ can access it directly

Samuel Thibault 8 years ago
parent
commit
fad8cc39f7

+ 1 - 5
src/core/jobs.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2013, 2014, 2015, 2017  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2014  INRIA
@@ -180,10 +180,6 @@ struct _starpu_job {
 	 * parallel tasks only). */
 	int active_task_alias_count;
 
-	/* Used to record codelet start time instead of using a
-	 * local variable */
-	struct timespec cl_start;
-
 	struct bound_task *bound_task;
 
 	/* Parallel workers may have to synchronize before/after the execution of a parallel task. */

+ 2 - 0
src/core/workers.h

@@ -132,6 +132,8 @@ LIST_TYPE(_starpu_worker,
 	starpu_pthread_wait_t wait;
 #endif
 
+	struct timespec cl_start; /* Codelet start time of the task currently running */
+	struct timespec cl_end; /* Codelet end time of the last task running */
 	unsigned char first_task; /* Index of first task in the pipeline */
 	unsigned char ntasks; /* number of tasks in the pipeline */
 	unsigned char pipeline_length; /* number of tasks to be put in the pipeline */

+ 2 - 8
src/drivers/cpu/driver_cpu.c

@@ -55,11 +55,6 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 {
 	int is_parallel_task = (j->task_size > 1);
 	int profiling = starpu_profiling_status_get();
-	/* end timestamp is only conditionnally measured in
-	 * _starpu_driver_end_job, thus make sure that it is
-	 * always initialized */
-	struct timespec codelet_end = {0,0};
-
 	struct starpu_task *task = j->task;
 	struct starpu_codelet *cl = task->cl;
 
@@ -110,7 +105,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 			_starpu_bind_thread_on_cpu(cpu_args->config, cpu_args->bindid, cpu_args->workerid);
 	}
 
-	_starpu_driver_end_job(cpu_args, j, perf_arch, &codelet_end, rank, profiling);
+	_starpu_driver_end_job(cpu_args, j, perf_arch, rank, profiling);
 
 	if (is_parallel_task)
 	{
@@ -143,8 +138,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 
 	if (rank == 0)
 	{
-		_starpu_driver_update_job_feedback(j, cpu_args,
-				perf_arch, &j->cl_start, &codelet_end, profiling);
+		_starpu_driver_update_job_feedback(j, cpu_args, perf_arch, profiling);
 #ifdef STARPU_OPENMP
 		if (!j->continuation)
 #endif

+ 3 - 5
src/drivers/cuda/driver_cuda.c

@@ -528,8 +528,6 @@ static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worke
 
 static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker)
 {
-	struct timespec codelet_end;
-
 	int profiling = starpu_profiling_status_get();
 
 	_starpu_set_current_task(NULL);
@@ -540,16 +538,16 @@ static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *wor
 	worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE;
 	worker->ntasks--;
 
-	_starpu_driver_end_job(worker, j, &worker->perf_arch, &codelet_end, 0, profiling);
+	_starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling);
 
 	struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j);
 	if(!sched_ctx)
 		sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx);
 
 	if(!sched_ctx->sched_policy)
-		_starpu_driver_update_job_feedback(j, worker, &sched_ctx->perf_arch, &j->cl_start, &codelet_end, profiling);
+		_starpu_driver_update_job_feedback(j, worker, &sched_ctx->perf_arch, profiling);
 	else
-		_starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, &j->cl_start, &codelet_end, profiling);
+		_starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling);
 
 	_starpu_push_task_output(j);
 

+ 11 - 11
src/drivers/driver_common/driver_common.c

@@ -63,13 +63,13 @@ void _starpu_driver_start_job(struct _starpu_worker *worker, struct _starpu_job
 
 		if ((profiling && profiling_info) || calibrate_model || starpu_top)
 		{
-			_starpu_clock_gettime(&j->cl_start);
-			_starpu_worker_register_executing_start_date(workerid, &j->cl_start);
+			_starpu_clock_gettime(&worker->cl_start);
+			_starpu_worker_register_executing_start_date(workerid, &worker->cl_start);
 		}
 	}
 
 	if (starpu_top)
-		_starpu_top_task_started(task,workerid,&j->cl_start);
+		_starpu_top_task_started(task,workerid,&worker->cl_start);
 
 
 	// Find out if the worker is the master of a parallel context
@@ -106,7 +106,7 @@ void _starpu_driver_start_job(struct _starpu_worker *worker, struct _starpu_job
 	_STARPU_TASK_BREAK_ON(task, exec);
 }
 
-void _starpu_driver_end_job(struct _starpu_worker *worker, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, struct timespec *codelet_end, int rank, int profiling)
+void _starpu_driver_end_job(struct _starpu_worker *worker, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, int rank, int profiling)
 {
 	struct starpu_task *task = j->task;
 	struct starpu_codelet *cl = task->cl;
@@ -138,14 +138,14 @@ void _starpu_driver_end_job(struct _starpu_worker *worker, struct _starpu_job *j
 		struct starpu_profiling_task_info *profiling_info = task->profiling_info;
 		if ((profiling && profiling_info) || calibrate_model || starpu_top)
 		{
-			_starpu_clock_gettime(codelet_end);
+			_starpu_clock_gettime(&worker->cl_end);
 			_starpu_worker_register_executing_end(workerid);
 		}
 		STARPU_AYU_POSTRUNTASK(j->job_id);
 	}
 
 	if (starpu_top)
-		_starpu_top_task_ended(task,workerid,codelet_end);
+		_starpu_top_task_ended(task,workerid,&worker->cl_end);
 
 	_starpu_set_worker_status(worker, STATUS_UNKNOWN);
 
@@ -165,7 +165,7 @@ void _starpu_driver_end_job(struct _starpu_worker *worker, struct _starpu_job *j
 			{
 				new_rank++;
 				struct _starpu_worker *_worker = _starpu_get_worker_struct(_workerid);
-				_starpu_driver_end_job(_worker, j, &_worker->perf_arch, codelet_end, new_rank, profiling);
+				_starpu_driver_end_job(_worker, j, &_worker->perf_arch, new_rank, profiling);
 			}
 		}
 	}
@@ -173,7 +173,7 @@ void _starpu_driver_end_job(struct _starpu_worker *worker, struct _starpu_job *j
 
 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker,
 					struct starpu_perfmodel_arch* perf_arch,
-					struct timespec *codelet_start, struct timespec *codelet_end, int profiling)
+					int profiling)
 {
 	struct starpu_profiling_task_info *profiling_info = j->task->profiling_info;
 	struct timespec measured_ts;
@@ -193,14 +193,14 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 	{
 		double measured;
 
-		starpu_timespec_sub(codelet_end, codelet_start, &measured_ts);
+		starpu_timespec_sub(&worker->cl_end, &worker->cl_start, &measured_ts);
 		measured = starpu_timing_timespec_to_us(&measured_ts);
 		STARPU_ASSERT_MSG(measured >= 0, "measured=%lf\n", measured);
 
 		if (profiling && profiling_info)
 		{
-			memcpy(&profiling_info->start_time, codelet_start, sizeof(struct timespec));
-			memcpy(&profiling_info->end_time, codelet_end, sizeof(struct timespec));
+			memcpy(&profiling_info->start_time, &worker->cl_start, sizeof(struct timespec));
+			memcpy(&profiling_info->end_time, &worker->cl_end, sizeof(struct timespec));
 
 			profiling_info->workerid = workerid;
 

+ 2 - 3
src/drivers/driver_common/driver_common.h

@@ -26,10 +26,9 @@
 void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch,
 			      int rank, int profiling);
 void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch,
-			    struct timespec *codelet_end, int rank, int profiling);
+			    int rank, int profiling);
 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args,
-					struct starpu_perfmodel_arch* perf_arch,
-					struct timespec *codelet_start, struct timespec *codelet_end, int profiling);
+					struct starpu_perfmodel_arch* perf_arch, int profiling);
 
 struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *args, int workerid, unsigned memnode);
 int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_task ** tasks, int nworker, unsigned memnode);

+ 2 - 3
src/drivers/mp_common/source_common.c

@@ -53,8 +53,7 @@ static int _starpu_src_common_finalize_job (struct _starpu_job *j, struct _starp
 {
 	int profiling = starpu_profiling_status_get();
 	struct timespec codelet_end;
-	_starpu_driver_end_job(worker, j, &worker->perf_arch, &codelet_end, 0,
-			profiling);
+	_starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling);
 
 	int count = worker->current_rank;
 
@@ -75,7 +74,7 @@ static int _starpu_src_common_finalize_job (struct _starpu_job *j, struct _starp
 	{
 
 		_starpu_driver_update_job_feedback(j, worker, &worker->perf_arch,
-				&j->cl_start, &codelet_end,
+				&codelet_end,
 				profiling);
 
 		_starpu_push_task_output (j);

+ 3 - 4
src/drivers/opencl/driver_opencl.c

@@ -983,7 +983,6 @@ static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker
 
 static void _starpu_opencl_stop_job(struct _starpu_job *j, struct _starpu_worker *worker)
 {
-	struct timespec codelet_end;
 	int profiling = starpu_profiling_status_get();
 
 	_starpu_set_current_task(NULL);
@@ -994,14 +993,14 @@ static void _starpu_opencl_stop_job(struct _starpu_job *j, struct _starpu_worker
 	worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE;
 	worker->ntasks--;
 
-	_starpu_driver_end_job(worker, j, &worker->perf_arch, &codelet_end, 0, profiling);
+	_starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling);
 
 	struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j);
 	STARPU_ASSERT_MSG(sched_ctx != NULL, "there should be a worker %d in the ctx of this job \n", worker->workerid);
 	if(!sched_ctx->sched_policy)
-		_starpu_driver_update_job_feedback(j, worker, &sched_ctx->perf_arch, &j->cl_start, &codelet_end, profiling);
+		_starpu_driver_update_job_feedback(j, worker, &sched_ctx->perf_arch, profiling);
 	else
-		_starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, &j->cl_start, &codelet_end, profiling);
+		_starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling);
 
 	_starpu_push_task_output(j);