Nathalie Furmento vor 10 Jahren
Ursprung
Commit
67951c8857

+ 2 - 1
Makefile.am

@@ -84,7 +84,8 @@ versinclude_HEADERS = 				\
 	include/starpu_stdlib.h			\
 	include/starpu_stdlib.h			\
 	include/starpu_thread.h			\
 	include/starpu_thread.h			\
 	include/starpu_thread_util.h		\
 	include/starpu_thread_util.h		\
-	include/starpu_tree.h
+	include/starpu_tree.h			\
+	include/starpu_simgrid_wrap.h
 
 
 nodist_versinclude_HEADERS = 			\
 nodist_versinclude_HEADERS = 			\
 	include/starpu_config.h
 	include/starpu_config.h

+ 2 - 2
doc/doxygen/chapters/21simgrid.doxy

@@ -22,9 +22,9 @@ get the simulated time, it has to use starpu_timing_now() which returns the
 virtual timestamp in us.
 virtual timestamp in us.
 
 
 For some technical reason, the application's .c file which contains main() has
 For some technical reason, the application's .c file which contains main() has
-to be recompiled with starpu.h, which in the simgrid case will # define main()
+to be recompiled with starpu_simgrid_wrap.h, which in the simgrid case will # define main()
 into starpu_main(), and it is libstarpu which will provide the real main() and
 into starpu_main(), and it is libstarpu which will provide the real main() and
-call the application's main().
+will call the application's main().
 
 
 To be able to test with crazy data sizes, one may want to only allocate
 To be able to test with crazy data sizes, one may want to only allocate
 application data if STARPU_SIMGRID is not defined.  Passing a NULL pointer to
 application data if STARPU_SIMGRID is not defined.  Passing a NULL pointer to

+ 3 - 2
doc/doxygen/chapters/api/data_interfaces.doxy

@@ -57,8 +57,9 @@ case of e.g. available particular CUDA or OpenCL support.
 \ingroup API_Data_Interfaces
 \ingroup API_Data_Interfaces
 \var starpu_data_copy_methods::can_copy
 \var starpu_data_copy_methods::can_copy
 If defined, allows the interface to declare whether it supports transferring
 If defined, allows the interface to declare whether it supports transferring
-from \p src_interface on node \p src_node to \p dst_interface on node \p. If not
-defined, it is assumed that the interface supports all transfers.
+from \p src_interface on node \p src_node to \p dst_interface on node \p
+dst_node, run from node \p handling_node. If not defined, it is assumed that the
+interface supports all transfers.
 \var starpu_data_copy_methods::ram_to_ram
 \var starpu_data_copy_methods::ram_to_ram
 Define how to copy data from the \p src_interface interface on the \p
 Define how to copy data from the \p src_interface interface on the \p
 src_node CPU node to the \p dst_interface interface on the \p dst_node
 src_node CPU node to the \p dst_interface interface on the \p dst_node

+ 1 - 6
include/starpu.h

@@ -66,18 +66,13 @@ typedef UINT_PTR uintptr_t;
 #include <starpu_fxt.h>
 #include <starpu_fxt.h>
 #include <starpu_driver.h>
 #include <starpu_driver.h>
 #include <starpu_tree.h>
 #include <starpu_tree.h>
+#include <starpu_simgrid_wrap.h>
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 extern "C"
 extern "C"
 {
 {
 #endif
 #endif
 
 
-#ifdef STARPU_SIMGRID
-#ifndef main
-#define main starpu_main
-#endif
-#endif
-
 struct starpu_conf
 struct starpu_conf
 {
 {
 	int magic;
 	int magic;

+ 1 - 1
include/starpu_data_interfaces.h

@@ -37,7 +37,7 @@ extern "C"
 
 
 struct starpu_data_copy_methods
 struct starpu_data_copy_methods
 {
 {
-	int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+	int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node);
 
 
 	int (*ram_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 	int (*ram_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 	int (*ram_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 	int (*ram_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);

+ 28 - 0
include/starpu_simgrid_wrap.h

@@ -0,0 +1,28 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2014  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_SIMGRID_WRAP_H__
+#define __STARPU_SIMGRID_WRAP_H__
+
+#include <starpu_config.h>
+
+#ifdef STARPU_SIMGRID
+#ifndef main
+#define main starpu_main
+#endif
+#endif
+
+#endif /* __STARPU_SIMGRID_WRAP_H__ */

+ 21 - 19
src/common/utils.c

@@ -83,10 +83,24 @@ int _starpu_mkpath(const char *s, mode_t mode)
 	if ((_starpu_mkpath(up, mode) == -1) && (errno != EEXIST))
 	if ((_starpu_mkpath(up, mode) == -1) && (errno != EEXIST))
 		goto out;
 		goto out;
 
 
-	if ((mkdir(path, mode) == -1) && (errno != EEXIST))
-		rv = -1;
-	else
+	struct stat sb;
+	if (stat(path, &sb) == 0)
+	{
+		if (!S_ISDIR(sb.st_mode))
+		{
+			fprintf(stderr,"Error: %s is not a directory:\n", path);
+			STARPU_ABORT();
+		}
+		/* It already exists and is a directory.  */
 		rv = 0;
 		rv = 0;
+	}
+	else
+	{
+		if ((mkdir(path, mode) == -1) && (errno != EEXIST))
+			rv = -1;
+		else
+			rv = 0;
+	}
 
 
 out:
 out:
 	olderrno = errno;
 	olderrno = errno;
@@ -105,23 +119,11 @@ void _starpu_mkpath_and_check(const char *path, mode_t mode)
 
 
 	ret = _starpu_mkpath(path, mode);
 	ret = _starpu_mkpath(path, mode);
 
 
-	if (ret == -1)
+	if (ret == -1 && errno != EEXIST)
 	{
 	{
-		if (errno != EEXIST)
-		{
-			fprintf(stderr,"Error making StarPU directory %s:\n", path);
-			perror("mkdir");
-			STARPU_ABORT();
-		}
-
-		/* make sure that it is actually a directory */
-		struct stat sb;
-		stat(path, &sb);
-		if (!S_ISDIR(sb.st_mode))
-		{
-			fprintf(stderr,"Error: %s is not a directory:\n", path);
-			STARPU_ABORT();
-		}
+		fprintf(stderr,"Error making StarPU directory %s:\n", path);
+		perror("mkdir");
+		STARPU_ABORT();
 	}
 	}
 }
 }
 
 

+ 2 - 0
src/core/sched_policy.c

@@ -859,6 +859,7 @@ pick:
 	 * We do have a task that uses multiformat handles. Let's create the
 	 * We do have a task that uses multiformat handles. Let's create the
 	 * required conversion tasks.
 	 * required conversion tasks.
 	 */
 	 */
+	STARPU_PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex);
 	unsigned i;
 	unsigned i;
 	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
 	for (i = 0; i < nbuffers; i++)
 	for (i = 0; i < nbuffers; i++)
@@ -882,6 +883,7 @@ pick:
 
 
 	task->mf_skip = 1;
 	task->mf_skip = 1;
 	starpu_task_list_push_back(&worker->local_tasks, task);
 	starpu_task_list_push_back(&worker->local_tasks, task);
+	STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
 	goto pick;
 	goto pick;
 
 
 profiling:
 profiling:

+ 7 - 1
src/core/simgrid.c

@@ -160,7 +160,7 @@ int main(int argc, char **argv)
 
 
 	if (!starpu_main && !(smpi_main && smpi_simulated_main_))
 	if (!starpu_main && !(smpi_main && smpi_simulated_main_))
 	{
 	{
-		_STARPU_ERROR("The main file of this application needs to be compiled with starpu.h included, to properly define starpu_main\n");
+		_STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h included, to properly rename it into starpu_main\n");
 		exit(EXIT_FAILURE);
 		exit(EXIT_FAILURE);
 	}
 	}
 
 
@@ -196,6 +196,12 @@ void _starpu_simgrid_init()
 	xbt_dynar_t hosts;
 	xbt_dynar_t hosts;
 	int i;
 	int i;
 
 
+	if (!starpu_main && !(smpi_main && smpi_simulated_main_))
+	{
+		_STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h included, to properly rename it into starpu_main\n");
+		exit(EXIT_FAILURE);
+	}
+
 #ifdef HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT
 #ifdef HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT
 	if (_starpu_simgrid_running_smpi())
 	if (_starpu_simgrid_running_smpi())
 	{
 	{

+ 10 - 3
src/core/workers.c

@@ -1232,10 +1232,10 @@ out:
 /* Condition variable and mutex used to pause/resume. */
 /* Condition variable and mutex used to pause/resume. */
 static starpu_pthread_cond_t pause_cond = STARPU_PTHREAD_COND_INITIALIZER;
 static starpu_pthread_cond_t pause_cond = STARPU_PTHREAD_COND_INITIALIZER;
 static starpu_pthread_mutex_t pause_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
 static starpu_pthread_mutex_t pause_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
-unsigned _starpu_machine_is_running(void)
+
+void _starpu_may_pause(void)
 {
 {
-	unsigned ret;
-	/* running and pause_depth are just protected by a memory barrier */
+	/* pause_depth is just protected by a memory barrier */
 	STARPU_RMB();
 	STARPU_RMB();
 
 
 	if (STARPU_UNLIKELY(config.pause_depth > 0)) {
 	if (STARPU_UNLIKELY(config.pause_depth > 0)) {
@@ -1245,6 +1245,13 @@ unsigned _starpu_machine_is_running(void)
 		}
 		}
 		STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex);
 	}
 	}
+}
+
+unsigned _starpu_machine_is_running(void)
+{
+	unsigned ret;
+	/* running is just protected by a memory barrier */
+	STARPU_RMB();
 
 
 	ANNOTATE_HAPPENS_AFTER(&config.running);
 	ANNOTATE_HAPPENS_AFTER(&config.running);
 	ret = config.running;
 	ret = config.running;

+ 3 - 0
src/core/workers.h

@@ -354,6 +354,9 @@ char ***_starpu_get_argv();
 /* Fill conf with environment variables */
 /* Fill conf with environment variables */
 void _starpu_conf_check_environment(struct starpu_conf *conf);
 void _starpu_conf_check_environment(struct starpu_conf *conf);
 
 
+/* Called by the driver when it is ready to pause  */
+void _starpu_may_pause(void);
+
 /* Has starpu_shutdown already been called ? */
 /* Has starpu_shutdown already been called ? */
 unsigned _starpu_machine_is_running(void);
 unsigned _starpu_machine_is_running(void);
 
 

+ 41 - 22
src/datawizard/coherency.c

@@ -42,8 +42,6 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 	double cost = INFINITY;
 	double cost = INFINITY;
 	unsigned src_node_mask = 0;
 	unsigned src_node_mask = 0;
 
 
-	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
-
 	for (node = 0; node < nnodes; node++)
 	for (node = 0; node < nnodes; node++)
 	{
 	{
 		if (handle->per_node[node].state != STARPU_INVALID)
 		if (handle->per_node[node].state != STARPU_INVALID)
@@ -75,15 +73,6 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 				double time = starpu_transfer_predict(i, destination, size);
 				double time = starpu_transfer_predict(i, destination, size);
 				unsigned handling_node;
 				unsigned handling_node;
 
 
-				/* Avoid transfers which the interface does not want */
-				if (copy_methods->can_copy)
-				{
-					void *src_interface = handle->per_node[i].data_interface;
-					void *dst_interface = handle->per_node[destination].data_interface;
-					if (!copy_methods->can_copy(src_interface, i, dst_interface, destination))
-						continue;
-				}
-
 				/* Avoid indirect transfers */
 				/* Avoid indirect transfers */
 				if (!link_supports_direct_transfers(handle, i, destination, &handling_node))
 				if (!link_supports_direct_transfers(handle, i, destination, &handling_node))
 					continue;
 					continue;
@@ -116,22 +105,22 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 		
 		
 		if (src_node_mask & (1<<i))
 		if (src_node_mask & (1<<i))
 		{
 		{
+			int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node) = handle->ops->copy_methods->can_copy;
 			/* Avoid transfers which the interface does not want */
 			/* Avoid transfers which the interface does not want */
-			if (copy_methods->can_copy)
+			if (can_copy)
 			{
 			{
 				void *src_interface = handle->per_node[i].data_interface;
 				void *src_interface = handle->per_node[i].data_interface;
 				void *dst_interface = handle->per_node[destination].data_interface;
 				void *dst_interface = handle->per_node[destination].data_interface;
 				unsigned handling_node;
 				unsigned handling_node;
 
 
-				if (!copy_methods->can_copy(src_interface, i, dst_interface, destination))
-					continue;
-
 				if (!link_supports_direct_transfers(handle, i, destination, &handling_node))
 				if (!link_supports_direct_transfers(handle, i, destination, &handling_node))
 				{
 				{
 					/* Avoid through RAM if the interface does not want it */
 					/* Avoid through RAM if the interface does not want it */
 					void *ram_interface = handle->per_node[STARPU_MAIN_RAM].data_interface;
 					void *ram_interface = handle->per_node[STARPU_MAIN_RAM].data_interface;
-					if (!copy_methods->can_copy(src_interface, i, ram_interface, STARPU_MAIN_RAM)
-					 || !copy_methods->can_copy(ram_interface, STARPU_MAIN_RAM, dst_interface, destination))
+					if ((!can_copy(src_interface, i, ram_interface, STARPU_MAIN_RAM, i)
+					  && !can_copy(src_interface, i, ram_interface, STARPU_MAIN_RAM, STARPU_MAIN_RAM))
+					 || (!can_copy(ram_interface, STARPU_MAIN_RAM, dst_interface, destination, STARPU_MAIN_RAM)
+					  && !can_copy(ram_interface, STARPU_MAIN_RAM, dst_interface, destination, destination)))
 						continue;
 						continue;
 				}
 				}
 			}
 			}
@@ -252,7 +241,9 @@ static int worker_supports_direct_access(unsigned node, unsigned handling_node)
 
 
 static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node)
 static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node)
 {
 {
-	(void) handle; // unused
+	int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node) = handle->ops->copy_methods->can_copy;
+	void *src_interface = handle->per_node[src_node].data_interface;
+	void *dst_interface = handle->per_node[dst_node].data_interface;
 
 
 	/* XXX That's a hack until we fix cudaMemcpy3DPeerAsync in the block interface
 	/* XXX That's a hack until we fix cudaMemcpy3DPeerAsync in the block interface
 	 * Perhaps not all data interface provide a direct GPU-GPU transfer
 	 * Perhaps not all data interface provide a direct GPU-GPU transfer
@@ -267,13 +258,13 @@ static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned
 #endif
 #endif
 
 
 	/* Note: with CUDA, performance seems a bit better when issuing the transfer from the destination (tested without GPUDirect, but GPUDirect probably behave the same) */
 	/* Note: with CUDA, performance seems a bit better when issuing the transfer from the destination (tested without GPUDirect, but GPUDirect probably behave the same) */
-	if (worker_supports_direct_access(src_node, dst_node))
+	if (worker_supports_direct_access(src_node, dst_node) && (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, dst_node)))
 	{
 	{
 		*handling_node = dst_node;
 		*handling_node = dst_node;
 		return 1;
 		return 1;
 	}
 	}
 
 
-	if (worker_supports_direct_access(dst_node, src_node))
+	if (worker_supports_direct_access(dst_node, src_node) && (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, src_node)))
 	{
 	{
 		*handling_node = src_node;
 		*handling_node = src_node;
 		return 1;
 		return 1;
@@ -320,6 +311,10 @@ static int determine_request_path(starpu_data_handle_t handle,
 
 
 	if (!link_is_valid)
 	if (!link_is_valid)
 	{
 	{
+		int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node) = handle->ops->copy_methods->can_copy;
+		void *src_interface = handle->per_node[src_node].data_interface;
+		void *dst_interface = handle->per_node[dst_node].data_interface;
+
 		/* We need an intermediate hop to implement data staging
 		/* We need an intermediate hop to implement data staging
 		 * through main memory. */
 		 * through main memory. */
 		STARPU_ASSERT(max_len >= 2);
 		STARPU_ASSERT(max_len >= 2);
@@ -327,12 +322,36 @@ static int determine_request_path(starpu_data_handle_t handle,
 		/* GPU -> RAM */
 		/* GPU -> RAM */
 		src_nodes[0] = src_node;
 		src_nodes[0] = src_node;
 		dst_nodes[0] = STARPU_MAIN_RAM;
 		dst_nodes[0] = STARPU_MAIN_RAM;
-		handling_nodes[0] = starpu_node_get_kind(src_node) == STARPU_DISK_RAM ? dst_node : src_node;
+
+		if (starpu_node_get_kind(src_node) == STARPU_DISK_RAM)
+			/* Disks don't have their own driver thread */
+			handling_nodes[0] = dst_node;
+		else if (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, src_node))
+		{
+			handling_nodes[0] = src_node;
+		}
+		else
+		{
+			STARPU_ASSERT_MSG(can_copy(src_interface, src_node, dst_interface, dst_node, dst_node), "interface %d refuses all kinds of transfers from node %u to node %u\n", handle->ops->interfaceid, src_node, dst_node);
+			handling_nodes[0] = dst_node;
+		}
 
 
 		/* RAM -> GPU */
 		/* RAM -> GPU */
 		src_nodes[1] = STARPU_MAIN_RAM;
 		src_nodes[1] = STARPU_MAIN_RAM;
 		dst_nodes[1] = dst_node;
 		dst_nodes[1] = dst_node;
-		handling_nodes[1] = starpu_node_get_kind(dst_node) == STARPU_DISK_RAM ? src_node : dst_node;
+
+		if (starpu_node_get_kind(dst_node) == STARPU_DISK_RAM)
+			/* Disks don't have their own driver thread */
+			handling_nodes[1] = src_node;
+		else if (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, dst_node))
+		{
+			handling_nodes[1] = dst_node;
+		}
+		else
+		{
+			STARPU_ASSERT_MSG(can_copy(src_interface, src_node, dst_interface, dst_node, src_node), "interface %d refuses all kinds of transfers from node %u to node %u\n", handle->ops->interfaceid, src_node, dst_node);
+			handling_nodes[1] = src_node;
+		}
 
 
 		return 2;
 		return 2;
 	}
 	}

+ 3 - 0
src/drivers/cpu/driver_cpu.c

@@ -311,7 +311,10 @@ _starpu_cpu_worker(void *arg)
 
 
 	_starpu_cpu_driver_init(args);
 	_starpu_cpu_driver_init(args);
 	while (_starpu_machine_is_running())
 	while (_starpu_machine_is_running())
+	{
+		_starpu_may_pause();
 		_starpu_cpu_driver_run_once(args);
 		_starpu_cpu_driver_run_once(args);
+	}
 	_starpu_cpu_driver_deinit(args);
 	_starpu_cpu_driver_deinit(args);
 
 
 	return NULL;
 	return NULL;

+ 3 - 0
src/drivers/cuda/driver_cuda.c

@@ -764,7 +764,10 @@ void *_starpu_cuda_worker(void *_arg)
 	_starpu_cuda_driver_init(worker);
 	_starpu_cuda_driver_init(worker);
 	_STARPU_TRACE_START_PROGRESS(memnode);
 	_STARPU_TRACE_START_PROGRESS(memnode);
 	while (_starpu_machine_is_running())
 	while (_starpu_machine_is_running())
+	{
+		_starpu_may_pause();
 		_starpu_cuda_driver_run_once(worker);
 		_starpu_cuda_driver_run_once(worker);
+	}
 	_STARPU_TRACE_END_PROGRESS(memnode);
 	_STARPU_TRACE_END_PROGRESS(memnode);
 	_starpu_cuda_driver_deinit(worker);
 	_starpu_cuda_driver_deinit(worker);
 
 

+ 1 - 0
src/drivers/gordon/driver_gordon.c

@@ -343,6 +343,7 @@ void *gordon_worker_inject(struct _starpu_worker_set *arg)
 
 
 	while(_starpu_machine_is_running())
 	while(_starpu_machine_is_running())
 	{
 	{
+		_starpu_may_pause();
 		if (gordon_busy_enough())
 		if (gordon_busy_enough())
 		{
 		{
 			/* gordon already has enough work, wait a little TODO */
 			/* gordon already has enough work, wait a little TODO */

+ 2 - 0
src/drivers/mp_common/source_common.c

@@ -683,6 +683,8 @@ void _starpu_src_common_worker(struct _starpu_worker_set * worker_set,
 		int res;
 		int res;
 		struct _starpu_job * j;
 		struct _starpu_job * j;
 
 
+		_starpu_may_pause();
+
 		_STARPU_TRACE_START_PROGRESS(memnode);
 		_STARPU_TRACE_START_PROGRESS(memnode);
 		_starpu_datawizard_progress(memnode, 1);
 		_starpu_datawizard_progress(memnode, 1);
 		_STARPU_TRACE_END_PROGRESS(memnode);
 		_STARPU_TRACE_END_PROGRESS(memnode);

+ 3 - 0
src/drivers/opencl/driver_opencl.c

@@ -745,7 +745,10 @@ void *_starpu_opencl_worker(void *_arg)
 	_starpu_opencl_driver_init(worker);
 	_starpu_opencl_driver_init(worker);
 	_STARPU_TRACE_START_PROGRESS(memnode);
 	_STARPU_TRACE_START_PROGRESS(memnode);
 	while (_starpu_machine_is_running())
 	while (_starpu_machine_is_running())
+	{
+		_starpu_may_pause();
 		_starpu_opencl_driver_run_once(worker);
 		_starpu_opencl_driver_run_once(worker);
+	}
 	_starpu_opencl_driver_deinit(worker);
 	_starpu_opencl_driver_deinit(worker);
 	_STARPU_TRACE_END_PROGRESS(memnode);
 	_STARPU_TRACE_END_PROGRESS(memnode);
 
 

+ 29 - 9
src/sched_policies/eager_central_policy.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2013  Université de Bordeaux 1
+ * Copyright (C) 2010-2014  Université de Bordeaux 1
  * Copyright (C) 2010-2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010-2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  INRIA
  * Copyright (C) 2011  INRIA
  *
  *
@@ -91,6 +91,9 @@ static int push_task_eager_policy(struct starpu_task *task)
 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	
 	
 	struct starpu_sched_ctx_iterator it;
 	struct starpu_sched_ctx_iterator it;
+#ifndef STARPU_NON_BLOCKING_DRIVERS
+	char dowake[STARPU_NMAXWORKERS] = { 0 };
+#endif
 	if(workers->init_iterator)
 	if(workers->init_iterator)
 		workers->init_iterator(workers, &it);
 		workers->init_iterator(workers, &it);
 	
 	
@@ -112,20 +115,35 @@ static int push_task_eager_policy(struct starpu_task *task)
 #ifdef STARPU_NON_BLOCKING_DRIVERS
 #ifdef STARPU_NON_BLOCKING_DRIVERS
 				starpu_bitmap_unset(data->waiters, worker);
 				starpu_bitmap_unset(data->waiters, worker);
 				/* We really woke at least somebody, no need to wake somebody else */
 				/* We really woke at least somebody, no need to wake somebody else */
-				goto out;
+				break;
 #else
 #else
-				starpu_pthread_mutex_t *sched_mutex;
-				starpu_pthread_cond_t *sched_cond;
-				starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
-
-				if (starpu_wakeup_worker(worker, sched_cond, sched_mutex))
-				    goto out; // wake up a single worker
+				dowake[worker] = 1;
 #endif
 #endif
 			}
 			}
 	}
 	}
-out:
+	/* Let the task free */
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 
 
+#ifndef STARPU_NON_BLOCKING_DRIVERS
+	/* Now that we have a list of potential workers, try to wake one */
+	if(workers->init_iterator)
+		workers->init_iterator(workers, &it);
+	
+	while(workers->has_next(workers, &it))
+	{
+		worker = workers->get_next(workers, &it);
+		if (dowake[worker])
+		{
+			starpu_pthread_mutex_t *sched_mutex;
+			starpu_pthread_cond_t *sched_cond;
+			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
+
+			if (starpu_wakeup_worker(worker, sched_cond, sched_mutex))
+				break; // wake up a single worker
+		}
+	}
+#endif
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -154,9 +172,11 @@ static struct starpu_task *pop_task_eager_policy(unsigned sched_ctx_id)
 	if (_starpu_fifo_empty(data->fifo))
 	if (_starpu_fifo_empty(data->fifo))
 		return NULL;
 		return NULL;
 
 
+#ifdef STARPU_NON_BLOCKING_DRIVERS
 	if (starpu_bitmap_get(data->waiters, workerid))
 	if (starpu_bitmap_get(data->waiters, workerid))
 		/* Nobody woke us, avoid bothering the mutex */
 		/* Nobody woke us, avoid bothering the mutex */
 		return NULL;
 		return NULL;
+#endif
 
 
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 
 

+ 29 - 9
src/sched_policies/eager_central_priority_policy.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2013  Université de Bordeaux 1
+ * Copyright (C) 2010-2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  INRIA
  * Copyright (C) 2011  INRIA
  *
  *
@@ -139,6 +139,9 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	
 	
 	struct starpu_sched_ctx_iterator it;
 	struct starpu_sched_ctx_iterator it;
+#ifndef STARPU_NON_BLOCKING_DRIVERS
+	char dowake[STARPU_NMAXWORKERS] = { 0 };
+#endif
 	if(workers->init_iterator)
 	if(workers->init_iterator)
 		workers->init_iterator(workers, &it);
 		workers->init_iterator(workers, &it);
 	
 	
@@ -160,20 +163,35 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 #ifdef STARPU_NON_BLOCKING_DRIVERS
 #ifdef STARPU_NON_BLOCKING_DRIVERS
 				starpu_bitmap_unset(data->waiters, worker);
 				starpu_bitmap_unset(data->waiters, worker);
 				/* We really woke at least somebody, no need to wake somebody else */
 				/* We really woke at least somebody, no need to wake somebody else */
-				goto out;
+				break;
 #else
 #else
-				starpu_pthread_mutex_t *sched_mutex;
-				starpu_pthread_cond_t *sched_cond;
-				starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
-
-				if (starpu_wakeup_worker(worker, sched_cond, sched_mutex))
-				    goto out; // wake up a single worker
+				dowake[worker] = 1;
 #endif
 #endif
 			}
 			}
 	}
 	}
-out:
+	/* Let the task free */
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 
 
+#ifndef STARPU_NON_BLOCKING_DRIVERS
+	/* Now that we have a list of potential workers, try to wake one */
+	if(workers->init_iterator)
+		workers->init_iterator(workers, &it);
+	
+	while(workers->has_next(workers, &it))
+	{
+		worker = workers->get_next(workers, &it);
+		if (dowake[worker])
+		{
+			starpu_pthread_mutex_t *sched_mutex;
+			starpu_pthread_cond_t *sched_cond;
+			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
+
+			if (starpu_wakeup_worker(worker, sched_cond, sched_mutex))
+				break; // wake up a single worker
+		}
+	}
+#endif
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -194,9 +212,11 @@ static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id)
 	if (taskq->total_ntasks == 0)
 	if (taskq->total_ntasks == 0)
 		return NULL;
 		return NULL;
 
 
+#ifdef STARPU_NON_BLOCKING_DRIVERS
 	if (starpu_bitmap_get(data->waiters, workerid))
 	if (starpu_bitmap_get(data->waiters, workerid))
 		/* Nobody woke us, avoid bothering the mutex */
 		/* Nobody woke us, avoid bothering the mutex */
 		return NULL;
 		return NULL;
+#endif
 
 
 	/* release this mutex before trying to wake up other workers */
 	/* release this mutex before trying to wake up other workers */
 	starpu_pthread_mutex_t *curr_sched_mutex;
 	starpu_pthread_mutex_t *curr_sched_mutex;

+ 2 - 2
src/sched_policies/locality_work_stealing_policy.c

@@ -367,6 +367,6 @@ struct starpu_sched_policy _starpu_sched_lws_policy =
 	.pre_exec_hook = NULL,
 	.pre_exec_hook = NULL,
 	.post_exec_hook = NULL,
 	.post_exec_hook = NULL,
 	.pop_every_task = NULL,
 	.pop_every_task = NULL,
-	.policy_name = "nws",
-	.policy_description = "new work stealing"
+	.policy_name = "lws",
+	.policy_description = "locality work stealing"
 };
 };