浏览代码

Move the pause implementation into its own function, instead of _starpu_machine_is_running which is called in various places where the pause can actually lead to deadlocks

Samuel Thibault 10 年之前
父节点
当前提交
c3e6095f8d

+ 10 - 3
src/core/workers.c

@@ -1224,10 +1224,10 @@ out:
 /* Condition variable and mutex used to pause/resume. */
 static starpu_pthread_cond_t pause_cond = STARPU_PTHREAD_COND_INITIALIZER;
 static starpu_pthread_mutex_t pause_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
-unsigned _starpu_machine_is_running(void)
+
+void _starpu_may_pause(void)
 {
-	unsigned ret;
-	/* running and pause_depth are just protected by a memory barrier */
+	/* pause_depth is just protected by a memory barrier */
 	STARPU_RMB();
 
 	if (STARPU_UNLIKELY(config.pause_depth > 0)) {
@@ -1237,6 +1237,13 @@ unsigned _starpu_machine_is_running(void)
 		}
 		STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex);
 	}
+}
+
+unsigned _starpu_machine_is_running(void)
+{
+	unsigned ret;
+	/* running is just protected by a memory barrier */
+	STARPU_RMB();
 
 	ANNOTATE_HAPPENS_AFTER(&config.running);
 	ret = config.running;

+ 3 - 0
src/core/workers.h

@@ -354,6 +354,9 @@ char ***_starpu_get_argv();
 /* Fill conf with environment variables */
 void _starpu_conf_check_environment(struct starpu_conf *conf);
 
+/* Called by the driver when it is ready to pause  */
+void _starpu_may_pause(void);
+
 /* Has starpu_shutdown already been called ? */
 unsigned _starpu_machine_is_running(void);
 

+ 3 - 0
src/drivers/cpu/driver_cpu.c

@@ -301,7 +301,10 @@ _starpu_cpu_worker(void *arg)
 
 	_starpu_cpu_driver_init(args);
 	while (_starpu_machine_is_running())
+	{
+		_starpu_may_pause();
 		_starpu_cpu_driver_run_once(args);
+	}
 	_starpu_cpu_driver_deinit(args);
 
 	return NULL;

+ 3 - 0
src/drivers/cuda/driver_cuda.c

@@ -757,7 +757,10 @@ void *_starpu_cuda_worker(void *_arg)
 	_starpu_cuda_driver_init(worker);
 	_STARPU_TRACE_START_PROGRESS(memnode);
 	while (_starpu_machine_is_running())
+	{
+		_starpu_may_pause();
 		_starpu_cuda_driver_run_once(worker);
+	}
 	_STARPU_TRACE_END_PROGRESS(memnode);
 	_starpu_cuda_driver_deinit(worker);
 

+ 1 - 0
src/drivers/gordon/driver_gordon.c

@@ -343,6 +343,7 @@ void *gordon_worker_inject(struct _starpu_worker_set *arg)
 
 	while(_starpu_machine_is_running())
 	{
+		_starpu_may_pause();
 		if (gordon_busy_enough())
 		{
 			/* gordon already has enough work, wait a little TODO */

+ 2 - 0
src/drivers/mp_common/source_common.c

@@ -683,6 +683,8 @@ void _starpu_src_common_worker(struct _starpu_worker_set * worker_set,
 		int res;
 		struct _starpu_job * j;
 
+		_starpu_may_pause();
+
 		_STARPU_TRACE_START_PROGRESS(memnode);
 		_starpu_datawizard_progress(memnode, 1);
 		_STARPU_TRACE_END_PROGRESS(memnode);

+ 3 - 0
src/drivers/opencl/driver_opencl.c

@@ -745,7 +745,10 @@ void *_starpu_opencl_worker(void *_arg)
 	_starpu_opencl_driver_init(worker);
 	_STARPU_TRACE_START_PROGRESS(memnode);
 	while (_starpu_machine_is_running())
+	{
+		_starpu_may_pause();
 		_starpu_opencl_driver_run_once(worker);
+	}
 	_starpu_opencl_driver_deinit(worker);
 	_STARPU_TRACE_END_PROGRESS(memnode);