15 years ago · 6d7c5c65db
--- a/src/core/mechanisms/queues.h
+++ b/src/core/mechanisms/queues.h
@@ -26,12 +26,6 @@ struct starpu_jobq_s {
 
																 	/* a pointer to some queue structure */
															
 
																 	void *queue; 
															
 
																-	/* for performance analysis purpose */
															
 
																-	double total_computation_time;
															
 
																-	double total_communication_time;
															
 
																-	double total_computation_time_error;
															
 
																-	unsigned total_job_performed;
															
 
																-
															
 
																 	/* in case workers are blocked on the queue, signaling on that 
															
 
																 	  condition must unblock them, even if there is no available task */
															
 
																 	pthread_cond_t activity_cond;
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -83,15 +83,6 @@ static void _starpu_init_worker_queue(struct starpu_worker_s *workerarg)
 
																 {
															
 
																 	struct starpu_jobq_s *jobq = workerarg->jobq;
															
 
																-	PTHREAD_MUTEX_LOCK(&jobq->activity_mutex);
															
 
																-
															
 
																-	jobq->total_computation_time = 0.0;
															
 
																-	jobq->total_communication_time = 0.0;
															
 
																-	jobq->total_computation_time_error = 0.0;
															
 
																-	jobq->total_job_performed = 0;
															
 
																-		
															
 
																-	PTHREAD_MUTEX_UNLOCK(&jobq->activity_mutex);
															
 
																-
															
 
																 	_starpu_memory_node_attach_queue(jobq, workerarg->memory_node);
															
 
																 }
															
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -54,12 +54,6 @@
 
																 #define STARPU_OPENCL_ALPHA	12.22f
															
 
																 #define STARPU_GORDON_ALPHA	6.0f /* XXX this is a random value ... */
															
 
																-#ifdef STARPU_DATA_STATS
															
 
																-#define STARPU_BENCHMARK_COMM	1
															
 
																-#else
															
 
																-#define STARPU_BENCHMARK_COMM	0
															
 
																-#endif
															
 
																-
															
 
																 struct starpu_worker_s {
															
 
																 	struct starpu_machine_config_s *config;
															
 
																         pthread_mutex_t mutex;
															
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -28,7 +28,6 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 
																 {
															
 
																 	int ret;
															
 
																 	struct timespec codelet_start, codelet_end;
															
 
																-	struct timespec codelet_start_comm, codelet_end_comm;
															
 
																 	unsigned calibrate_model = 0;
															
 
																 	int workerid = cpu_args->workerid;
															
@@ -41,14 +40,8 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 
																 	if (cl->model && cl->model->benchmarking)
															
 
																 		calibrate_model = 1;
															
 
																-	if (STARPU_BENCHMARK_COMM)
															
 
																-		starpu_clock_gettime(&codelet_start_comm);
															
 
																-
															
 
																 	ret = _starpu_fetch_task_input(task, 0);
															
 
																-	if (STARPU_BENCHMARK_COMM)
															
 
																-		starpu_clock_gettime(&codelet_end_comm);
															
 
																-
															
 
																 	if (ret != 0) {
															
 
																 		/* there was not enough memory so the codelet cannot be executed right now ... */
															
 
																 		/* push the codelet back and try another one ... */
															
@@ -60,7 +53,7 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 
																 	struct starpu_task_profiling_info *profiling_info;
															
 
																 	profiling_info = task->profiling_info;
															
 
																-	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
															
 
																+	if (profiling_info || calibrate_model)
															
 
																 	{
															
 
																 		starpu_clock_gettime(&codelet_start);
															
 
																 		_starpu_worker_register_executing_start_date(workerid, &codelet_start);
															
@@ -74,7 +67,7 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 
																 	cl->per_worker_stats[workerid]++;
															
 
																-	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
															
 
																+	if (profiling_info || calibrate_model)
															
 
																 		starpu_clock_gettime(&codelet_end);
															
 
																 	STARPU_TRACE_END_CODELET_BODY(j);
															
@@ -83,9 +76,7 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 
																 	_starpu_push_task_output(task, 0);
															
 
																 	_starpu_driver_update_job_feedback(j, cpu_args, profiling_info, calibrate_model,
															
 
																-			&codelet_start, &codelet_end, &codelet_start_comm, &codelet_end_comm);
															
 
																-
															
 
																-	(void)STARPU_ATOMIC_ADD(&cpu_args->jobq->total_job_performed, 1);
															
 
																+			&codelet_start, &codelet_end);
															
 
																 	return 0;
															
 
																 }
															
@@ -196,20 +187,6 @@ void *_starpu_cpu_worker(void *arg)
 
																 	 * coherency is not maintained anymore at that point ! */
															
 
																 	_starpu_free_all_automatically_allocated_buffers(memnode);
															
 
																-#ifdef STARPU_DATA_STATS
															
 
																-	fprintf(stderr, "CPU #%d computation %le comm %le (%lf \%%)\n", devid, jobq->total_computation_time, jobq->total_communication_time,  jobq->total_communication_time*100.0/jobq->total_computation_time);
															
 
																-#endif
															
 
																-
															
 
																-#ifdef STARPU_VERBOSE
															
 
																-	double ratio = 0;
															
 
																-	if (jobq->total_job_performed != 0)
															
 
																-	{
															
 
																-		ratio = jobq->total_computation_time_error/jobq->total_computation_time;
															
 
																-	}
															
 
																-
															
 
																-	_starpu_print_to_logfile("MODEL ERROR: CPU %d ERROR %lf EXEC %lf RATIO %lf NTASKS %d\n", devid, jobq->total_computation_time_error, jobq->total_computation_time, ratio, jobq->total_job_performed);
															
 
																-#endif
															
 
																-
															
 
																 	STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CPU_KEY);
															
 
																 	pthread_exit(NULL);
															
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -91,9 +91,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 
																 	STARPU_ASSERT(j);
															
 
																 	struct starpu_task *task = j->task;
															
 
																-	cudaError_t cures;
															
 
																 	struct timespec codelet_start, codelet_end;
															
 
																-	struct timespec codelet_start_comm, codelet_end_comm;
															
 
																 	unsigned calibrate_model = 0;
															
 
																 	int workerid = args->workerid;
															
@@ -105,15 +103,6 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 
																 	if (cl->model && cl->model->benchmarking) 
															
 
																 		calibrate_model = 1;
															
 
																-	/* we do not take communication into account when modeling the performance */
															
 
																-	if (STARPU_BENCHMARK_COMM)
															
 
																-	{
															
 
																-		cures = cudaThreadSynchronize();
															
 
																-		if (STARPU_UNLIKELY(cures))
															
 
																-			STARPU_CUDA_REPORT_ERROR(cures);
															
 
																-		starpu_clock_gettime(&codelet_start_comm);
															
 
																-	}
															
 
																-
															
 
																 	ret = _starpu_fetch_task_input(task, mask);
															
 
																 	if (ret != 0) {
															
 
																 		/* there was not enough memory, so the input of
															
@@ -122,20 +111,12 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 
																 		return -EAGAIN;
															
 
																 	}
															
 
																-	if (STARPU_BENCHMARK_COMM)
															
 
																-	{
															
 
																-		cures = cudaThreadSynchronize();
															
 
																-		if (STARPU_UNLIKELY(cures))
															
 
																-			STARPU_CUDA_REPORT_ERROR(cures);
															
 
																-		starpu_clock_gettime(&codelet_end_comm);
															
 
																-	}
															
 
																-
															
 
																 	STARPU_TRACE_START_CODELET_BODY(j);
															
 
																 	struct starpu_task_profiling_info *profiling_info;
															
 
																 	profiling_info = task->profiling_info;
															
 
																-	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
															
 
																+	if (profiling_info || calibrate_model)
															
 
																 	{
															
 
																 		starpu_clock_gettime(&codelet_start);
															
 
																 		_starpu_worker_register_executing_start_date(workerid, &codelet_start);
															
@@ -150,7 +131,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 
																 	cl->per_worker_stats[workerid]++;
															
 
																-	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
															
 
																+	if (profiling_info || calibrate_model)
															
 
																 		starpu_clock_gettime(&codelet_end);
															
 
																 	STARPU_TRACE_END_CODELET_BODY(j);	
															
@@ -159,9 +140,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 
																 	_starpu_push_task_output(task, mask);
															
 
																 	_starpu_driver_update_job_feedback(j, args, profiling_info, calibrate_model,
															
 
																-			&codelet_start, &codelet_end, &codelet_start_comm, &codelet_end_comm);
															
 
																-
															
 
																-	(void)STARPU_ATOMIC_ADD(&args->jobq->total_job_performed, 1);
															
 
																+			&codelet_start, &codelet_end);
															
 
																 	return 0;
															
 
																 }
															
@@ -287,21 +266,6 @@ void *_starpu_cuda_worker(void *arg)
 
																 	deinit_context(args->workerid);
															
 
																-#ifdef STARPU_DATA_STATS
															
 
																-	fprintf(stderr, "CUDA #%d computation %le comm %le (%lf \%%)\n", args->id, jobq->total_computation_time, jobq->total_communication_time, jobq->total_communication_time*100.0/jobq->total_computation_time);
															
 
																-#endif
															
 
																-
															
 
																-#ifdef STARPU_VERBOSE
															
 
																-	double ratio = 0;
															
 
																-	if (jobq->total_job_performed != 0)
															
 
																-	{
															
 
																-		ratio = jobq->total_computation_time_error/jobq->total_computation_time;
															
 
																-	}
															
 
																-
															
 
																-
															
 
																-	_starpu_print_to_logfile("MODEL ERROR: CUDA %d ERROR %lf EXEC %lf RATIO %lf NTASKS %d\n", args->devid, jobq->total_computation_time_error, jobq->total_computation_time, ratio, jobq->total_job_performed);
															
 
																-#endif
															
 
																-
															
 
																 	STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CUDA_KEY);
															
 
																 	pthread_exit(NULL);
															
--- a/src/drivers/driver_common/driver_common.c
+++ b/src/drivers/driver_common/driver_common.c
@@ -24,25 +24,16 @@
 
																 void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *worker_args,
															
 
																 					struct starpu_task_profiling_info *profiling_info,
															
 
																 					unsigned calibrate_model,
															
 
																-					struct timespec *codelet_start, struct timespec *codelet_end,
															
 
																-					struct timespec *codelet_start_comm, struct timespec *codelet_end_comm)
															
 
																+					struct timespec *codelet_start, struct timespec *codelet_end)
															
 
																 {
															
 
																 	struct timespec measured_ts;
															
 
																-	struct timespec measured_comm_ts;
															
 
																 	double measured;
															
 
																-	double measured_comm;
															
 
																 	if (profiling_info || calibrate_model)
															
 
																 	{
															
 
																 		starpu_timespec_sub(codelet_end, codelet_start, &measured_ts);
															
 
																 		measured = starpu_timing_timespec_to_us(&measured_ts);
															
 
																-		worker_args->jobq->total_computation_time += measured;
															
 
																-
															
 
																-		double error;
															
 
																-		error = fabs(STARPU_MAX(measured, 0.0) - STARPU_MAX(j->predicted, 0.0)); 
															
 
																-		worker_args->jobq->total_computation_time_error += error;
															
 
																-
															
 
																 		if (profiling_info)
															
 
																 		{
															
 
																 			memcpy(&profiling_info->start_time, codelet_start, sizeof(struct timespec));
															
@@ -57,16 +48,6 @@ void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *
 
																 		if (calibrate_model)
															
 
																 			_starpu_update_perfmodel_history(j, worker_args->perf_arch, worker_args->devid, measured);
															
 
																 	}
															
 
																-
															
 
																-	if (STARPU_BENCHMARK_COMM)
															
 
																-	{
															
 
																-		starpu_timespec_sub(codelet_end_comm, codelet_start_comm, &measured_comm_ts);
															
 
																-		measured_comm = starpu_timing_timespec_to_us(&measured_comm_ts);
															
 
																-
															
 
																-		worker_args->jobq->total_communication_time += measured_comm;
															
 
																-	}
															
 
																-
															
 
																-	(void)STARPU_ATOMIC_ADD(&worker_args->jobq->total_job_performed, 1);
															
 
																 }
															
 
																 /* Workers may block when there is no work to do at all. We assume that the
															
--- a/src/drivers/driver_common/driver_common.h
+++ b/src/drivers/driver_common/driver_common.h
@@ -27,8 +27,7 @@
 
																 void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *worker_args,
															
 
																 		struct starpu_task_profiling_info *profiling_info,
															
 
																 		unsigned calibrate_model,
															
 
																-		struct timespec *codelet_start, struct timespec *codelet_end,
															
 
																-		struct timespec *codelet_start_comm, struct timespec *codelet_end_comm);
															
 
																+		struct timespec *codelet_start, struct timespec *codelet_end);
															
 
																 void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex);
															
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -382,22 +382,7 @@ void *_starpu_opencl_worker(void *arg)
 
																 	STARPU_TRACE_WORKER_DEINIT_START
															
 
																-          _starpu_opencl_deinit_context(devid);
															
 
																-
															
 
																-#ifdef STARPU_DATA_STATS
															
 
																-	fprintf(stderr, "OpenCL #%d computation %le comm %le (%lf \%%)\n", args->id, jobq->total_computation_time, jobq->total_communication_time, jobq->total_communication_time*100.0/jobq->total_computation_time);
															
 
																-#endif
															
 
																-
															
 
																-#ifdef STARPU_VERBOSE
															
 
																-	double ratio = 0;
															
 
																-	if (jobq->total_job_performed != 0)
															
 
																-	{
															
 
																-		ratio = jobq->total_computation_time_error/jobq->total_computation_time;
															
 
																-	}
															
 
																-
															
 
																-
															
 
																-	_starpu_print_to_logfile("MODEL ERROR: OpenCL %d ERROR %lf EXEC %lf RATIO %lf NTASKS %d\n", args->devid, jobq->total_computation_time_error, jobq->total_computation_time, ratio, jobq->total_job_performed);
															
 
																-#endif
															
 
																+        _starpu_opencl_deinit_context(devid);
															
 
																 	pthread_exit(NULL);
															
@@ -437,7 +422,6 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
																 	struct starpu_task *task = j->task;
															
 
																 	struct timespec codelet_start, codelet_end;
															
 
																-	struct timespec codelet_start_comm, codelet_end_comm;
															
 
																 	unsigned calibrate_model = 0;
															
 
																 	int workerid = args->workerid;
															
@@ -449,13 +433,6 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
																 	if (cl->model && cl->model->benchmarking)
															
 
																 		calibrate_model = 1;
															
 
																-	/* we do not take communication into account when modeling the performance */
															
 
																-	if (STARPU_BENCHMARK_COMM)
															
 
																-	{
															
 
																-                //barrier(CLK_GLOBAL_MEM_FENCE);
															
 
																-		starpu_clock_gettime(&codelet_start_comm);
															
 
																-	}
															
 
																-
															
 
																 	ret = _starpu_fetch_task_input(task, mask);
															
 
																 	if (ret != 0) {
															
 
																 		/* there was not enough memory, so the input of
															
@@ -464,18 +441,12 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
																 		return -EAGAIN;
															
 
																 	}
															
 
																-	if (STARPU_BENCHMARK_COMM)
															
 
																-	{
															
 
																-                //barrier(CLK_GLOBAL_MEM_FENCE);
															
 
																-		starpu_clock_gettime(&codelet_end_comm);
															
 
																-	}
															
 
																-
															
 
																 	STARPU_TRACE_START_CODELET_BODY(j);
															
 
																 	struct starpu_task_profiling_info *profiling_info;
															
 
																 	profiling_info = task->profiling_info;
															
 
																-	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
															
 
																+	if (profiling_info || calibrate_model)
															
 
																 	{
															
 
																 		starpu_clock_gettime(&codelet_start);
															
 
																 		_starpu_worker_register_executing_start_date(workerid, &codelet_start);
															
@@ -490,7 +461,7 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
																 	cl->per_worker_stats[workerid]++;
															
 
																-	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
															
 
																+	if (profiling_info || calibrate_model)
															
 
																 		starpu_clock_gettime(&codelet_end);
															
 
																 	STARPU_TRACE_END_CODELET_BODY(j);
															
@@ -499,10 +470,7 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
																 	_starpu_push_task_output(task, mask);
															
 
																 	_starpu_driver_update_job_feedback(j, args, profiling_info, calibrate_model,
															
 
																-			&codelet_start, &codelet_end, &codelet_start_comm, &codelet_end_comm);
															
 
																-
															
 
																-	(void)STARPU_ATOMIC_ADD(&args->jobq->total_job_performed, 1);
															
 
																-
															
 
																+							&codelet_start, &codelet_end);
															
 
																 	return EXIT_SUCCESS;
															
 
																 }