Browse Source

Account active spinning time into sleeping time when using non-blocking drivers

Samuel Thibault 12 years ago
parent
commit
b20e9ae4fa

+ 4 - 0
doc/chapters/advanced-examples.texi

@@ -197,9 +197,11 @@ for (worker = 0; worker < starpu_worker_get_count(); worker++)
         double total_time = starpu_timing_timespec_to_us(&worker_info.total_time);
         double executing_time = starpu_timing_timespec_to_us(&worker_info.executing_time);
         double sleeping_time = starpu_timing_timespec_to_us(&worker_info.sleeping_time);
+        double overhead_time = total_time - executing_time - sleeping_time;
 
         float executing_ratio = 100.0*executing_time/total_time;
         float sleeping_ratio = 100.0*sleeping_time/total_time;
+	float overhead_ratio = 100.0 - executing_ratio - sleeping_ratio;
 
         char workername[128];
         starpu_worker_get_name(worker, workername, 128);
@@ -209,6 +211,8 @@ for (worker = 0; worker < starpu_worker_get_count(); worker++)
                 executing_ratio);
         fprintf(stderr, "\tblocked time: %.2lf ms (%.2f %%)\n", sleeping_time*1e-3,
                 sleeping_ratio);
+        fprintf(stderr, "\toverhead time: %.2lf ms (%.2f %%)\n", overhead_time*1e-3,
+                overhead_ratio);
 @}
 @end smallexample
 @end cartouche

+ 4 - 1
examples/profiling/profiling.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -117,9 +117,11 @@ int main(int argc, char **argv)
 		double total_time = starpu_timing_timespec_to_us(&worker_info.total_time);
 		double executing_time = starpu_timing_timespec_to_us(&worker_info.executing_time);
 		double sleeping_time = starpu_timing_timespec_to_us(&worker_info.sleeping_time);
+		double overhead_time = total_time - executing_time - sleeping_time;
 
 		float executing_ratio = 100.0*executing_time/total_time;
 		float sleeping_ratio = 100.0*sleeping_time/total_time;
+		float overhead_ratio = 100.0 - executing_ratio - sleeping_ratio;
 
 		char workername[128];
 		starpu_worker_get_name(worker, workername, 128);
@@ -127,6 +129,7 @@ int main(int argc, char **argv)
 		FPRINTF(stderr, "\ttotal time : %.2lf ms\n", total_time*1e-3);
 		FPRINTF(stderr, "\texec time  : %.2lf ms (%.2f %%)\n", executing_time*1e-3, executing_ratio);
 		FPRINTF(stderr, "\tblocked time  : %.2lf ms (%.2f %%)\n", sleeping_time*1e-3, sleeping_ratio);
+		FPRINTF(stderr, "\toverhead time: %.2lf ms (%.2f %%)\n", overhead_time*1e-3, overhead_ratio);
 	}
 
 	starpu_shutdown();

+ 1 - 17
src/drivers/cpu/driver_cpu.c

@@ -160,31 +160,15 @@ int _starpu_cpu_driver_run_once(struct starpu_driver *d)
 	_starpu_datawizard_progress(memnode, 1);
 	_STARPU_TRACE_END_PROGRESS(memnode);
 
-	/* Note: we need to keep the sched condition mutex all along the path
-	 * from popping a task from the scheduler to blocking. Otherwise the
-	 * driver may go block just after the scheduler got a new task to be
-	 * executed, and thus hanging. */
-	_STARPU_PTHREAD_MUTEX_LOCK(cpu_worker->sched_mutex);
-
         struct _starpu_job *j;
 	struct starpu_task *task;
 	int res;
 
-	task = _starpu_pop_task(cpu_worker);
+	task = _starpu_get_worker_task(cpu_worker, workerid, memnode);
 
 	if (!task)
-	{
-		if (_starpu_worker_can_block(memnode))
-			_starpu_block_worker(workerid, cpu_worker->sched_cond, cpu_worker->sched_mutex);
-
-		_STARPU_PTHREAD_MUTEX_UNLOCK(cpu_worker->sched_mutex);
-
 		return 0;
-	};
-
-	_STARPU_PTHREAD_MUTEX_UNLOCK(cpu_worker->sched_mutex);
 
-	STARPU_ASSERT(task);
 	j = _starpu_get_job_associated_to_task(task);
 
 	/* can a cpu perform that task ? */

+ 5 - 17
src/drivers/cuda/driver_cuda.c

@@ -151,6 +151,8 @@ static void init_context(int devid)
 	cudaError_t cures;
 	int workerid;
 
+	/* TODO: cudaSetDeviceFlag(cudaDeviceMapHost) */
+
 	starpu_cuda_set_device(devid);
 
 #ifdef HAVE_CUDA_MEMCPY_PEER
@@ -388,28 +390,14 @@ int _starpu_cuda_driver_run_once(struct starpu_driver *d)
 	_starpu_datawizard_progress(memnode, 1);
 	_STARPU_TRACE_END_PROGRESS(memnode);
 
-	/* Note: we need to keep the sched condition mutex all along the path
-	 * from popping a task from the scheduler to blocking. Otherwise the
-	 * driver may go block just after the scheduler got a new task to be
-	 * executed, and thus hanging. */
-	_STARPU_PTHREAD_MUTEX_LOCK(args->sched_mutex);
-
-	struct starpu_task *task = _starpu_pop_task(args);
+	struct starpu_task *task;
 	struct _starpu_job *j = NULL;
 
-	if (task == NULL)
-	{
-		if (_starpu_worker_can_block(memnode))
-			_starpu_block_worker(workerid, args->sched_cond, args->sched_mutex);
-
-		_STARPU_PTHREAD_MUTEX_UNLOCK(args->sched_mutex);
+	task = _starpu_get_worker_task(args, workerid, memnode);
 
+	if (!task)
 		return 0;
-	}
-
-	_STARPU_PTHREAD_MUTEX_UNLOCK(args->sched_mutex);
 
-	STARPU_ASSERT(task);
 	j = _starpu_get_job_associated_to_task(task);
 
 	/* can CUDA do that task ? */

+ 34 - 15
src/drivers/driver_common/driver_common.c

@@ -142,25 +142,44 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 	}
 }
 
-/* Workers may block when there is no work to do at all. We assume that the
- * mutex is hold when that function is called. */
-void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex)
+/* Workers may block when there is no work to do at all. */
+struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *args, int workerid, unsigned memnode)
 {
-	struct timespec start_time, end_time;
+	struct starpu_task *task;
 
-	_STARPU_TRACE_WORKER_SLEEP_START
-	_starpu_worker_set_status(workerid, STATUS_SLEEPING);
+	/* Note: we need to keep the sched condition mutex all along the path
+	 * from popping a task from the scheduler to blocking. Otherwise the
+	 * driver may go block just after the scheduler got a new task to be
+	 * executed, and thus hanging. */
+	_STARPU_PTHREAD_MUTEX_LOCK(args->sched_mutex);
 
-	_starpu_clock_gettime(&start_time);
-	_starpu_worker_register_sleeping_start_date(workerid, &start_time);
+	task = _starpu_pop_task(args);
 
-	_STARPU_PTHREAD_COND_WAIT(cond, mutex);
+	if (task == NULL)
+	{
+		if (_starpu_worker_get_status(workerid) != STATUS_SLEEPING)
+		{
+			_STARPU_TRACE_WORKER_SLEEP_START
+			_starpu_worker_restart_sleeping(workerid);
+			_starpu_worker_set_status(workerid, STATUS_SLEEPING);
+		}
+
+		if (_starpu_worker_can_block(memnode))
+			_STARPU_PTHREAD_COND_WAIT(args->sched_cond, args->sched_mutex);
+
+		_STARPU_PTHREAD_MUTEX_UNLOCK(args->sched_mutex);
+
+		return NULL;
+	}
+
+	if (_starpu_worker_get_status(workerid) == STATUS_SLEEPING)
+	{
+		_STARPU_TRACE_WORKER_SLEEP_END
+		_starpu_worker_stop_sleeping(workerid);
+		_starpu_worker_set_status(workerid, STATUS_UNKNOWN);
+	}
 
-	_starpu_worker_set_status(workerid, STATUS_UNKNOWN);
-	_STARPU_TRACE_WORKER_SLEEP_END
-	_starpu_clock_gettime(&end_time);
+	_STARPU_PTHREAD_MUTEX_UNLOCK(args->sched_mutex);
 
-	int profiling = starpu_profiling_status_get();
-	if (profiling)
-		_starpu_worker_update_profiling_info_sleeping(workerid, &start_time, &end_time);
+	return task;
 }

+ 1 - 1
src/drivers/driver_common/driver_common.h

@@ -31,6 +31,6 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 					enum starpu_perf_archtype perf_arch,
 					struct timespec *codelet_start, struct timespec *codelet_end, int profiling);
 
-void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex);
+struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *args, int workerid, unsigned memnode);
 
 #endif // __DRIVER_COMMON_H__

+ 1 - 17
src/drivers/opencl/driver_opencl.c

@@ -491,27 +491,11 @@ int _starpu_opencl_driver_run_once(struct starpu_driver *d)
 	_starpu_datawizard_progress(memnode, 1);
 	_STARPU_TRACE_END_PROGRESS(memnode);
 
-	/* Note: we need to keep the sched condition mutex all along the path
-	 * from popping a task from the scheduler to blocking. Otherwise the
-	 * driver may go block just after the scheduler got a new task to be
-	 * executed, and thus hanging. */
-	_STARPU_PTHREAD_MUTEX_LOCK(args->sched_mutex);
-
-	task = _starpu_pop_task(args);
+	task = _starpu_get_worker_task(args, workerid, memnode);
 
 	if (task == NULL)
-	{
-		if (_starpu_worker_can_block(memnode))
-			_starpu_block_worker(workerid, args->sched_cond, args->sched_mutex);
-
-		_STARPU_PTHREAD_MUTEX_UNLOCK(args->sched_mutex);
-
 		return 0;
-	};
-
-	_STARPU_PTHREAD_MUTEX_UNLOCK(args->sched_mutex);
 
-	STARPU_ASSERT(task);
 	j = _starpu_get_job_associated_to_task(task);
 
 	/* can OpenCL do that task ? */

+ 25 - 15
src/profiling/profiling.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -182,34 +182,32 @@ void _starpu_worker_reset_profiling_info(int workerid)
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
 }
 
-void _starpu_worker_register_sleeping_start_date(int workerid, struct timespec *sleeping_start)
+void _starpu_worker_restart_sleeping(int workerid)
 {
 	if (profiling)
 	{
+		struct timespec sleep_start_time;
+		_starpu_clock_gettime(&sleep_start_time);
+
 		_STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
 		worker_registered_sleeping_start[workerid] = 1;
-		memcpy(&sleeping_start_date[workerid], sleeping_start, sizeof(struct timespec));
+		memcpy(&sleeping_start_date[workerid], &sleep_start_time, sizeof(struct timespec));
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
 	}
 }
 
-void _starpu_worker_register_executing_start_date(int workerid, struct timespec *executing_start)
+void _starpu_worker_stop_sleeping(int workerid)
 {
 	if (profiling)
 	{
-		_STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
-		worker_registered_executing_start[workerid] = 1;
-		memcpy(&executing_start_date[workerid], executing_start, sizeof(struct timespec));
-		_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
-	}
-}
+		struct timespec *sleeping_start, sleep_end_time;
+
+		_starpu_clock_gettime(&sleep_end_time);
 
-void _starpu_worker_update_profiling_info_sleeping(int workerid, struct timespec *sleeping_start, struct timespec *sleeping_end)
-{
-	if (profiling)
-	{
 		_STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
 
+		sleeping_start = &sleeping_start_date[workerid];
+
                 /* Perhaps that profiling was enabled while the worker was
                  * already blocked, so we don't measure (end - start), but
                  * (end - max(start,worker_start)) where worker_start is the
@@ -222,13 +220,25 @@ void _starpu_worker_update_profiling_info_sleeping(int workerid, struct timespec
 		}
 
 		struct timespec sleeping_time;
-		starpu_timespec_sub(sleeping_end, sleeping_start, &sleeping_time);
+		starpu_timespec_sub(&sleep_end_time, sleeping_start, &sleeping_time);
 
 		starpu_timespec_accumulate(&worker_info[workerid].sleeping_time, &sleeping_time);
 
 		worker_registered_sleeping_start[workerid] = 0;
 
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
+
+	}
+}
+
+void _starpu_worker_register_executing_start_date(int workerid, struct timespec *executing_start)
+{
+	if (profiling)
+	{
+		_STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
+		worker_registered_executing_start[workerid] = 1;
+		memcpy(&executing_start_date[workerid], executing_start, sizeof(struct timespec));
+		_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
 	}
 }
 

+ 6 - 6
src/profiling/profiling.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -34,13 +34,13 @@ void _starpu_worker_reset_profiling_info(int workerid);
  * This tells StarPU how much time was spent doing computation. */
 void _starpu_worker_update_profiling_info_executing(int workerid, struct timespec *executing_time, int executed_tasks, uint64_t used_cycles, uint64_t stall_cycles, double consumed_power);
 
-/* Update the per-worker profiling info when StarPU wakes up: this indicates
+/* Record the date when the worker started to sleep. This permits to measure
  * how much time was spent sleeping. */
-void _starpu_worker_update_profiling_info_sleeping(int workerid, struct timespec *sleeping_start, struct timespec *sleeping_end);
+void _starpu_worker_restart_sleeping(int workerid);
 
-/* Record the date when the worker started to sleep. This permits to measure
- * how much time was spent sleeping when it becomes awake later on. */
-void _starpu_worker_register_sleeping_start_date(int workerid, struct timespec *sleeping_start);
+/* Record the date when the worker stopped sleeping. This permits to measure
+ * how much time was spent sleeping. */
+void _starpu_worker_stop_sleeping(int workerid);
 
 /* Record the date when the worker started to execute a piece of code. This
  * permits to measure how much time was really spent doing computation at the