Sfoglia il codice sorgente

Cleanup: get rid of the total_computation_time, total_communication_time,
total_computation_time_error and total_job_performed in the jobq data structure
(instead, we should use the profiling capabilities recently added in StarPU).

Cédric Augonnet 14 anni fa
parent
commit
6d7c5c65db

+ 0 - 6
src/core/mechanisms/queues.h

@@ -26,12 +26,6 @@ struct starpu_jobq_s {
 	/* a pointer to some queue structure */
 	void *queue; 
 
-	/* for performance analysis purpose */
-	double total_computation_time;
-	double total_communication_time;
-	double total_computation_time_error;
-	unsigned total_job_performed;
-
 	/* in case workers are blocked on the queue, signaling on that 
 	  condition must unblock them, even if there is no available task */
 	pthread_cond_t activity_cond;

+ 0 - 9
src/core/workers.c

@@ -83,15 +83,6 @@ static void _starpu_init_worker_queue(struct starpu_worker_s *workerarg)
 {
 	struct starpu_jobq_s *jobq = workerarg->jobq;
 
-	PTHREAD_MUTEX_LOCK(&jobq->activity_mutex);
-
-	jobq->total_computation_time = 0.0;
-	jobq->total_communication_time = 0.0;
-	jobq->total_computation_time_error = 0.0;
-	jobq->total_job_performed = 0;
-		
-	PTHREAD_MUTEX_UNLOCK(&jobq->activity_mutex);
-
 	_starpu_memory_node_attach_queue(jobq, workerarg->memory_node);
 }
 

+ 0 - 6
src/core/workers.h

@@ -54,12 +54,6 @@
 #define STARPU_OPENCL_ALPHA	12.22f
 #define STARPU_GORDON_ALPHA	6.0f /* XXX this is a random value ... */
 
-#ifdef STARPU_DATA_STATS
-#define STARPU_BENCHMARK_COMM	1
-#else
-#define STARPU_BENCHMARK_COMM	0
-#endif
-
 struct starpu_worker_s {
 	struct starpu_machine_config_s *config;
         pthread_mutex_t mutex;

+ 3 - 26
src/drivers/cpu/driver_cpu.c

@@ -28,7 +28,6 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 {
 	int ret;
 	struct timespec codelet_start, codelet_end;
-	struct timespec codelet_start_comm, codelet_end_comm;
 
 	unsigned calibrate_model = 0;
 	int workerid = cpu_args->workerid;
@@ -41,14 +40,8 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 	if (cl->model && cl->model->benchmarking)
 		calibrate_model = 1;
 
-	if (STARPU_BENCHMARK_COMM)
-		starpu_clock_gettime(&codelet_start_comm);
-
 	ret = _starpu_fetch_task_input(task, 0);
 
-	if (STARPU_BENCHMARK_COMM)
-		starpu_clock_gettime(&codelet_end_comm);
-
 	if (ret != 0) {
 		/* there was not enough memory so the codelet cannot be executed right now ... */
 		/* push the codelet back and try another one ... */
@@ -60,7 +53,7 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 	struct starpu_task_profiling_info *profiling_info;
 	profiling_info = task->profiling_info;
 
-	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
+	if (profiling_info || calibrate_model)
 	{
 		starpu_clock_gettime(&codelet_start);
 		_starpu_worker_register_executing_start_date(workerid, &codelet_start);
@@ -74,7 +67,7 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 
 	cl->per_worker_stats[workerid]++;
 	
-	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
+	if (profiling_info || calibrate_model)
 		starpu_clock_gettime(&codelet_end);
 
 	STARPU_TRACE_END_CODELET_BODY(j);
@@ -83,9 +76,7 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 	_starpu_push_task_output(task, 0);
 
 	_starpu_driver_update_job_feedback(j, cpu_args, profiling_info, calibrate_model,
-			&codelet_start, &codelet_end, &codelet_start_comm, &codelet_end_comm);
-
-	(void)STARPU_ATOMIC_ADD(&cpu_args->jobq->total_job_performed, 1);
+			&codelet_start, &codelet_end);
 
 	return 0;
 }
@@ -196,20 +187,6 @@ void *_starpu_cpu_worker(void *arg)
 	 * coherency is not maintained anymore at that point ! */
 	_starpu_free_all_automatically_allocated_buffers(memnode);
 
-#ifdef STARPU_DATA_STATS
-	fprintf(stderr, "CPU #%d computation %le comm %le (%lf \%%)\n", devid, jobq->total_computation_time, jobq->total_communication_time,  jobq->total_communication_time*100.0/jobq->total_computation_time);
-#endif
-
-#ifdef STARPU_VERBOSE
-	double ratio = 0;
-	if (jobq->total_job_performed != 0)
-	{
-		ratio = jobq->total_computation_time_error/jobq->total_computation_time;
-	}
-
-	_starpu_print_to_logfile("MODEL ERROR: CPU %d ERROR %lf EXEC %lf RATIO %lf NTASKS %d\n", devid, jobq->total_computation_time_error, jobq->total_computation_time, ratio, jobq->total_job_performed);
-#endif
-
 	STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CPU_KEY);
 
 	pthread_exit(NULL);

+ 3 - 39
src/drivers/cuda/driver_cuda.c

@@ -91,9 +91,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 	STARPU_ASSERT(j);
 	struct starpu_task *task = j->task;
 
-	cudaError_t cures;
 	struct timespec codelet_start, codelet_end;
-	struct timespec codelet_start_comm, codelet_end_comm;
 
 	unsigned calibrate_model = 0;
 	int workerid = args->workerid;
@@ -105,15 +103,6 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 	if (cl->model && cl->model->benchmarking) 
 		calibrate_model = 1;
 
-	/* we do not take communication into account when modeling the performance */
-	if (STARPU_BENCHMARK_COMM)
-	{
-		cures = cudaThreadSynchronize();
-		if (STARPU_UNLIKELY(cures))
-			STARPU_CUDA_REPORT_ERROR(cures);
-		starpu_clock_gettime(&codelet_start_comm);
-	}
-
 	ret = _starpu_fetch_task_input(task, mask);
 	if (ret != 0) {
 		/* there was not enough memory, so the input of
@@ -122,20 +111,12 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 		return -EAGAIN;
 	}
 
-	if (STARPU_BENCHMARK_COMM)
-	{
-		cures = cudaThreadSynchronize();
-		if (STARPU_UNLIKELY(cures))
-			STARPU_CUDA_REPORT_ERROR(cures);
-		starpu_clock_gettime(&codelet_end_comm);
-	}
-
 	STARPU_TRACE_START_CODELET_BODY(j);
 
 	struct starpu_task_profiling_info *profiling_info;
 	profiling_info = task->profiling_info;
 
-	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
+	if (profiling_info || calibrate_model)
 	{
 		starpu_clock_gettime(&codelet_start);
 		_starpu_worker_register_executing_start_date(workerid, &codelet_start);
@@ -150,7 +131,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 
 	cl->per_worker_stats[workerid]++;
 
-	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
+	if (profiling_info || calibrate_model)
 		starpu_clock_gettime(&codelet_end);
 
 	STARPU_TRACE_END_CODELET_BODY(j);	
@@ -159,9 +140,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 	_starpu_push_task_output(task, mask);
 
 	_starpu_driver_update_job_feedback(j, args, profiling_info, calibrate_model,
-			&codelet_start, &codelet_end, &codelet_start_comm, &codelet_end_comm);
-
-	(void)STARPU_ATOMIC_ADD(&args->jobq->total_job_performed, 1);
+			&codelet_start, &codelet_end);
 
 	return 0;
 }
@@ -287,21 +266,6 @@ void *_starpu_cuda_worker(void *arg)
 
 	deinit_context(args->workerid);
 
-#ifdef STARPU_DATA_STATS
-	fprintf(stderr, "CUDA #%d computation %le comm %le (%lf \%%)\n", args->id, jobq->total_computation_time, jobq->total_communication_time, jobq->total_communication_time*100.0/jobq->total_computation_time);
-#endif
-
-#ifdef STARPU_VERBOSE
-	double ratio = 0;
-	if (jobq->total_job_performed != 0)
-	{
-		ratio = jobq->total_computation_time_error/jobq->total_computation_time;
-	}
-
-
-	_starpu_print_to_logfile("MODEL ERROR: CUDA %d ERROR %lf EXEC %lf RATIO %lf NTASKS %d\n", args->devid, jobq->total_computation_time_error, jobq->total_computation_time, ratio, jobq->total_job_performed);
-#endif
-
 	STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CUDA_KEY);
 
 	pthread_exit(NULL);

+ 1 - 20
src/drivers/driver_common/driver_common.c

@@ -24,25 +24,16 @@
 void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *worker_args,
 					struct starpu_task_profiling_info *profiling_info,
 					unsigned calibrate_model,
-					struct timespec *codelet_start, struct timespec *codelet_end,
-					struct timespec *codelet_start_comm, struct timespec *codelet_end_comm)
+					struct timespec *codelet_start, struct timespec *codelet_end)
 {
 	struct timespec measured_ts;
-	struct timespec measured_comm_ts;
 	double measured;
-	double measured_comm;
 
 	if (profiling_info || calibrate_model)
 	{
 		starpu_timespec_sub(codelet_end, codelet_start, &measured_ts);
 		measured = starpu_timing_timespec_to_us(&measured_ts);
 
-		worker_args->jobq->total_computation_time += measured;
-
-		double error;
-		error = fabs(STARPU_MAX(measured, 0.0) - STARPU_MAX(j->predicted, 0.0)); 
-		worker_args->jobq->total_computation_time_error += error;
-
 		if (profiling_info)
 		{
 			memcpy(&profiling_info->start_time, codelet_start, sizeof(struct timespec));
@@ -57,16 +48,6 @@ void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *
 		if (calibrate_model)
 			_starpu_update_perfmodel_history(j, worker_args->perf_arch, worker_args->devid, measured);
 	}
-
-	if (STARPU_BENCHMARK_COMM)
-	{
-		starpu_timespec_sub(codelet_end_comm, codelet_start_comm, &measured_comm_ts);
-		measured_comm = starpu_timing_timespec_to_us(&measured_comm_ts);
-
-		worker_args->jobq->total_communication_time += measured_comm;
-	}
-
-	(void)STARPU_ATOMIC_ADD(&worker_args->jobq->total_job_performed, 1);
 }
 
 /* Workers may block when there is no work to do at all. We assume that the

+ 1 - 2
src/drivers/driver_common/driver_common.h

@@ -27,8 +27,7 @@
 void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *worker_args,
 		struct starpu_task_profiling_info *profiling_info,
 		unsigned calibrate_model,
-		struct timespec *codelet_start, struct timespec *codelet_end,
-		struct timespec *codelet_start_comm, struct timespec *codelet_end_comm);
+		struct timespec *codelet_start, struct timespec *codelet_end);
 
 void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex);
 

+ 4 - 36
src/drivers/opencl/driver_opencl.c

@@ -382,22 +382,7 @@ void *_starpu_opencl_worker(void *arg)
 
 	STARPU_TRACE_WORKER_DEINIT_START
 
-          _starpu_opencl_deinit_context(devid);
-
-#ifdef STARPU_DATA_STATS
-	fprintf(stderr, "OpenCL #%d computation %le comm %le (%lf \%%)\n", args->id, jobq->total_computation_time, jobq->total_communication_time, jobq->total_communication_time*100.0/jobq->total_computation_time);
-#endif
-
-#ifdef STARPU_VERBOSE
-	double ratio = 0;
-	if (jobq->total_job_performed != 0)
-	{
-		ratio = jobq->total_computation_time_error/jobq->total_computation_time;
-	}
-
-
-	_starpu_print_to_logfile("MODEL ERROR: OpenCL %d ERROR %lf EXEC %lf RATIO %lf NTASKS %d\n", args->devid, jobq->total_computation_time_error, jobq->total_computation_time, ratio, jobq->total_job_performed);
-#endif
+        _starpu_opencl_deinit_context(devid);
 
 	pthread_exit(NULL);
 
@@ -437,7 +422,6 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 	struct starpu_task *task = j->task;
 
 	struct timespec codelet_start, codelet_end;
-	struct timespec codelet_start_comm, codelet_end_comm;
 
 	unsigned calibrate_model = 0;
 	int workerid = args->workerid;
@@ -449,13 +433,6 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 	if (cl->model && cl->model->benchmarking)
 		calibrate_model = 1;
 
-	/* we do not take communication into account when modeling the performance */
-	if (STARPU_BENCHMARK_COMM)
-	{
-                //barrier(CLK_GLOBAL_MEM_FENCE);
-		starpu_clock_gettime(&codelet_start_comm);
-	}
-
 	ret = _starpu_fetch_task_input(task, mask);
 	if (ret != 0) {
 		/* there was not enough memory, so the input of
@@ -464,18 +441,12 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 		return -EAGAIN;
 	}
 
-	if (STARPU_BENCHMARK_COMM)
-	{
-                //barrier(CLK_GLOBAL_MEM_FENCE);
-		starpu_clock_gettime(&codelet_end_comm);
-	}
-
 	STARPU_TRACE_START_CODELET_BODY(j);
 
 	struct starpu_task_profiling_info *profiling_info;
 	profiling_info = task->profiling_info;
 
-	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
+	if (profiling_info || calibrate_model)
 	{
 		starpu_clock_gettime(&codelet_start);
 		_starpu_worker_register_executing_start_date(workerid, &codelet_start);
@@ -490,7 +461,7 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
 	cl->per_worker_stats[workerid]++;
 
-	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
+	if (profiling_info || calibrate_model)
 		starpu_clock_gettime(&codelet_end);
 
 	STARPU_TRACE_END_CODELET_BODY(j);
@@ -499,10 +470,7 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 	_starpu_push_task_output(task, mask);
 
 	_starpu_driver_update_job_feedback(j, args, profiling_info, calibrate_model,
-			&codelet_start, &codelet_end, &codelet_start_comm, &codelet_end_comm);
-
-	(void)STARPU_ATOMIC_ADD(&args->jobq->total_job_performed, 1);
-
+							&codelet_start, &codelet_end);
 
 	return EXIT_SUCCESS;
 }