лет назад: 12 · 739ba80d7d
--- a/AUTHORS
+++ b/AUTHORS
@@ -17,4 +17,5 @@ Ludovic Stordeur <ludovic.stordeur@inria.fr>
 
				 François Tessier <francois.tessier@inria.fr>
			
 
				 Samuel Thibault <samuel.thibault@labri.fr>
			
 
				 Pierre-André Wacrenier <wacrenier@labri.fr>
			
 
				-Andra Hugo <andra.hugo@inria.fr>
			
 
				+Andra Hugo <andra.hugo@inria.fr>
			
 
				+Simon Archipoff <simon.archipoff@etu.u-bordeaux1.fr>
			
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -226,7 +226,8 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 						\
 
				 	sched_policies/node_random.c				\
			
 
				 	sched_policies/node_work_stealing.c			\
			
 
				 	sched_policies/node_worker.c				\
			
 
				-	sched_policies/node_fifo.c
			
 
				+	sched_policies/node_fifo.c 				\
			
 
				+	sched_policies/node_heft.c
			
 
				 if STARPU_USE_CPU
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cpu/driver_cpu.c
			
 
				 endif
			
--- a/src/sched_policies/fifo_queues.c
+++ b/src/sched_policies/fifo_queues.c
@@ -23,6 +23,24 @@
 
				 #include <sched_policies/fifo_queues.h>
			
 
				 #include <common/fxt.h>
			
 
				 
			
 
				+int is_sorted_task_list(struct starpu_task * task)
			
 
				+{
			
 
				+	if(!task)
			
 
				+		return 1;
			
 
				+	struct starpu_task * next = task->next;
			
 
				+	if(!next)
			
 
				+		return 1;
			
 
				+	while(next)
			
 
				+	{
			
 
				+		if(task->priority < next->priority)
			
 
				+			return 0;
			
 
				+		task = next;
			
 
				+		next = next->next;
			
 
				+	}
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+
			
 
				 struct _starpu_fifo_taskq *_starpu_create_fifo(void)
			
 
				 {
			
 
				 	struct _starpu_fifo_taskq *fifo;
			
@@ -36,7 +54,7 @@ struct _starpu_fifo_taskq *_starpu_create_fifo(void)
 
				 	fifo->exp_start = starpu_timing_now();
			
 
				 	fifo->exp_len = 0.0;
			
 
				 	fifo->exp_end = fifo->exp_start;
			
 
				-
			
 
				+	STARPU_ASSERT(is_sorted_task_list(fifo->taskq.head));
			
 
				 	return fifo;
			
 
				 }
			
 
				 
			
@@ -107,7 +125,7 @@ _starpu_fifo_push_sorted_task(struct _starpu_fifo_taskq *fifo_queue, struct star
 
				 
			
 
				 	fifo_queue->ntasks++;
			
 
				 	fifo_queue->nprocessed++;
			
 
				-
			
 
				+	STARPU_ASSERT(is_sorted_task_list(list->head));
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -125,6 +143,7 @@ int _starpu_fifo_push_task(struct _starpu_fifo_taskq *fifo_queue, struct starpu_
 
				 		fifo_queue->ntasks++;
			
 
				 		fifo_queue->nprocessed++;
			
 
				 	}
			
 
				+	STARPU_ASSERT(is_sorted_task_list(fifo_queue->taskq.head));
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -132,6 +151,7 @@ int _starpu_fifo_push_task(struct _starpu_fifo_taskq *fifo_queue, struct starpu_
 
				 struct starpu_task *_starpu_fifo_pop_task(struct _starpu_fifo_taskq *fifo_queue, int workerid)
			
 
				 {
			
 
				 	struct starpu_task *task;
			
 
				+	STARPU_ASSERT(is_sorted_task_list(fifo_queue->taskq.head));
			
 
				 
			
 
				 	for (task  = starpu_task_list_begin(&fifo_queue->taskq);
			
 
				 	     task != starpu_task_list_end(&fifo_queue->taskq);
			
@@ -145,6 +165,7 @@ struct starpu_task *_starpu_fifo_pop_task(struct _starpu_fifo_taskq *fifo_queue,
 
				 			{
			
 
				 				starpu_task_set_implementation(task, nimpl);
			
 
				 				starpu_task_list_erase(&fifo_queue->taskq, task);
			
 
				+				//		fprintf(stderr,"nb task %d prio %d\n", fifo_queue->ntasks, task->priority);
			
 
				 				fifo_queue->ntasks--;
			
 
				 				_STARPU_TRACE_JOB_POP(task, 0);
			
 
				 				return task;
			
@@ -167,6 +188,7 @@ struct starpu_task *_starpu_fifo_pop_local_task(struct _starpu_fifo_taskq *fifo_
 
				 		fifo_queue->ntasks--;
			
 
				 		_STARPU_TRACE_JOB_POP(task, 0);
			
 
				 	}
			
 
				+	STARPU_ASSERT(is_sorted_task_list(fifo_queue->taskq.head));
			
 
				 
			
 
				 	return task;
			
 
				 }
			
--- a/src/sched_policies/node_fifo.c
+++ b/src/sched_policies/node_fifo.c
@@ -10,10 +10,11 @@ static int push_task(struct _starpu_sched_node * node, struct starpu_task * task
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_task *  pop_task(struct _starpu_sched_node * node, unsigned sched_ctx_id)
			
 
				+static struct starpu_task * pop_task(struct _starpu_sched_node * node, unsigned sched_ctx_id)
			
 
				 {
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&node->mutex);
			
 
				 	struct starpu_task * task  = _starpu_fifo_pop_task(node->data, starpu_worker_get_id());
			
 
				+
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex);
			
 
				 	if(task)
			
 
				 		return task;
			
@@ -39,3 +40,5 @@ struct _starpu_fifo_taskq *  _starpu_sched_node_fifo_get_fifo(struct _starpu_sch
 
				 	STARPU_ASSERT(node->push_task == push_task);
			
 
				 	return node->data;
			
 
				 }
			
 
				+
			
 
				+
			
--- a/src/sched_policies/node_heft.c
+++ b/src/sched_policies/node_heft.c
@@ -0,0 +1,197 @@
 
				+#include "node_sched.h"
			
 
				+#include <starpu_perfmodel.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				+#include <float.h>
			
 
				+
			
 
				+struct _starpu_dmda_data
			
 
				+{
			
 
				+	double alpha;
			
 
				+	double beta;
			
 
				+	double gamma;
			
 
				+	double idle_power;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+static void compute_all_things(struct starpu_task * task,
			
 
				+			       struct _starpu_sched_node ** nodes, int nnodes,
			
 
				+			       double * execution_lengths, int * best_impls,//impl used for best execution length, -1 if no execution possible
			
 
				+			       double * transfer_lengths,
			
 
				+			       double * finish_times,
			
 
				+			       int * is_not_calibrated, enum starpu_perf_archtype * arch_not_calibrated, int * impl_not_calibrated,
			
 
				+			       int * is_no_model)
			
 
				+{
			
 
				+	*is_not_calibrated = 0;
			
 
				+	*is_no_model = 1;
			
 
				+	int i = 0;
			
 
				+	for(i = 0; i < nnodes; i++)
			
 
				+	{
			
 
				+		execution_lengths[i] = DBL_MAX;
			
 
				+		best_impls[i] = -1;
			
 
				+		int j;
			
 
				+		for(j = 0; j < STARPU_MAXIMPLEMENTATIONS; j++)
			
 
				+		{
			
 
				+			if(_starpu_sched_node_can_execute_task_with_impl(nodes[i], task, j))
			
 
				+			{
			
 
				+				enum starpu_perf_archtype archtype = starpu_worker_get_perf_archtype(nodes[i]->workerids[0]);
			
 
				+				double d = starpu_task_expected_length(task, archtype, j);
			
 
				+				if(isnan(d))
			
 
				+				{
			
 
				+					*is_not_calibrated = 1;
			
 
				+					*arch_not_calibrated = archtype;
			
 
				+					*impl_not_calibrated = j;
			
 
				+				}
			
 
				+				if(!_STARPU_IS_ZERO(d))//we have a perf model
			
 
				+				{
			
 
				+					*is_no_model = 0;
			
 
				+					if(d < execution_lengths[i])
			
 
				+					{
			
 
				+						execution_lengths[i] = d;
			
 
				+						best_impls[i] = j;
			
 
				+					}
			
 
				+				}
			
 
				+				else//we dont have a perf model for this implementation but we may have one for an other
			
 
				+					if(*is_no_model)
			
 
				+						best_impls[i] = j;
			
 
				+				unsigned memory_node = starpu_worker_get_memory_node(nodes[i]->workerids[0]);
			
 
				+				transfer_lengths[i] = starpu_task_expected_data_transfer_time(memory_node, task);
			
 
				+				finish_times[i] = nodes[i]->estimated_finish_time(nodes[i]);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static double compute_total_finish_time(double exp_end, double exp_len, double exp_trans)
			
 
				+{
			
 
				+	if(exp_trans < exp_end)
			
 
				+		return exp_end + exp_len;
			
 
				+	else
			
 
				+		return exp_end + exp_trans;
			
 
				+}
			
 
				+
			
 
				+static double fitness(double alpha, double beta, double gamma,
			
 
				+		      double execution_length, double transfer_length, double finish_time, double now)
			
 
				+{
			
 
				+	(void) gamma;
			
 
				+	double total_execution_time = compute_total_finish_time(finish_time - now, execution_length, transfer_length);
			
 
				+	return alpha * total_execution_time + transfer_length * beta;
			
 
				+}
			
 
				+
			
 
				+static double fitness_no_model(double alpha, double beta, double transfer_length, double finish_time, double now)
			
 
				+{
			
 
				+	(void) gamma;
			
 
				+	double exp_end = finish_time - now;
			
 
				+	return alpha * exp_end + beta * transfer_length;
			
 
				+}
			
 
				+
			
 
				+static double estimated_transfert_time(struct _starpu_sched_node * node, struct starpu_task * task)
			
 
				+{
			
 
				+	STARPU_ASSERT(node->nworkers);
			
 
				+	unsigned memory_node = starpu_worker_get_memory_node(node->workerids[0]);
			
 
				+	return starpu_task_expected_data_transfer_time(memory_node, task);
			
 
				+}
			
 
				+
			
 
				+static int push_task(struct _starpu_sched_node * node, struct starpu_task * task)
			
 
				+{
			
 
				+	struct _starpu_dmda_data * dt = node->data;
			
 
				+	double execution_lengths[node->nchilds];
			
 
				+	double finish_times[node->nchilds];
			
 
				+	double transfer_lengths[node->nchilds];
			
 
				+	int best_impls[node->nchilds];// -1 mean cant execute
			
 
				+	//double power_consumptions[node->nchilds];
			
 
				+	int i;
			
 
				+
			
 
				+	int is_not_calibrated;
			
 
				+	enum starpu_perf_archtype arch_not_calibrated;
			
 
				+	int impl_not_calibrated;
			
 
				+
			
 
				+	int is_no_model;
			
 
				+
			
 
				+	compute_all_things(task,
			
 
				+			   node->childs, node->nchilds,
			
 
				+			   execution_lengths, best_impls,
			
 
				+			   transfer_lengths,
			
 
				+			   finish_times,
			
 
				+			   &is_not_calibrated, &arch_not_calibrated, &impl_not_calibrated,
			
 
				+			   &is_no_model);
			
 
				+
			
 
				+	double max_fitness = DBL_MAX;
			
 
				+	int index_max = -1;
			
 
				+	double now = starpu_timing_now();
			
 
				+	if(is_not_calibrated)
			
 
				+	{
			
 
				+		for(i = 0; i < node->nchilds; i++)
			
 
				+		{
			
 
				+			if(best_impls[i] == -1)
			
 
				+				continue;
			
 
				+			enum starpu_perf_archtype archtype = starpu_worker_get_perf_archtype(node->childs[i]->workerids[0]);
			
 
				+			if(archtype != arch_not_calibrated)
			
 
				+				continue;
			
 
				+			double f = fitness_no_model(dt->alpha, dt->beta, transfer_lengths[i], finish_times[i], now);
			
 
				+			if(f < max_fitness)
			
 
				+			{
			
 
				+				max_fitness = f;
			
 
				+				index_max = i;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	else if(is_no_model)
			
 
				+	{
			
 
				+		for(i = 0; i < node->nchilds; i++)
			
 
				+		{
			
 
				+			if(best_impls[i] == -1)
			
 
				+				continue;
			
 
				+			double f = fitness_no_model(dt->alpha, dt->beta, transfer_lengths[i], finish_times[i], now);
			
 
				+			if(f < max_fitness)
			
 
				+			{
			
 
				+				max_fitness = f;
			
 
				+				index_max = i;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		for(i = 0; i < node->nchilds; i++)
			
 
				+		{
			
 
				+			if(best_impls[i] == -1)
			
 
				+				continue;
			
 
				+			double f =  fitness(dt->alpha, dt->beta, dt->gamma,
			
 
				+					    execution_lengths[i], transfer_lengths[i] , finish_times[i], now);
			
 
				+
			
 
				+			if(f < max_fitness)
			
 
				+			{
			
 
				+				max_fitness = f;
			
 
				+				index_max = i;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	STARPU_ASSERT(index_max != -1);
			
 
				+	task->predicted = execution_lengths[index_max];
			
 
				+	task->predicted_transfer = transfer_lengths[index_max];
			
 
				+	starpu_task_set_implementation(task, best_impls[index_max]);
			
 
				+	struct _starpu_sched_node * child = node->childs[index_max];
			
 
				+	return child->push_task(child, task);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+struct _starpu_sched_node * _starpu_sched_node_heft_create(double alpha, double beta, double gamma, double idle_power)
			
 
				+{
			
 
				+	struct _starpu_sched_node * node = _starpu_sched_node_create();
			
 
				+	struct _starpu_dmda_data * data = malloc(sizeof(*data));
			
 
				+
			
 
				+	data->alpha = alpha;
			
 
				+	data->beta = beta;
			
 
				+	data->gamma = gamma;
			
 
				+	data->idle_power = idle_power;
			
 
				+
			
 
				+	node->data = data;
			
 
				+	node->push_task = push_task;
			
 
				+	data->alpha = data->beta = data->gamma = data->idle_power = 0.0;
			
 
				+	//data->total_task_cnt = data->ready_task_cnt = 0;
			
 
				+
			
 
				+	return node;
			
 
				+}
			
 
				+
			
 
				+
			
--- a/src/sched_policies/node_random.c
+++ b/src/sched_policies/node_random.c
@@ -122,7 +122,7 @@ static void deinitialize_random_center_policy(unsigned sched_ctx_id)
 
				 }
			
 
				 
			
 
				 
			
 
				-static void add_worker_random(unsigned sched_ctx_id, int * workerids, unsigned nworkers)
			
 
				+ static void add_worker_random(unsigned sched_ctx_id, int * workerids, unsigned nworkers)
			
 
				 {
			
 
				 	struct _starpu_sched_tree *t = starpu_sched_ctx_get_policy_data(sched_ctx_id);
			
 
				 	unsigned i;
			
--- a/src/sched_policies/node_sched.c
+++ b/src/sched_policies/node_sched.c
@@ -8,9 +8,12 @@ static void available(struct _starpu_sched_node * node)
 
				 	for(i = 0; i < node->nchilds; i++)
			
 
				 		node->childs[i]->available(node->childs[i]);
			
 
				 }
			
 
				-static struct starpu_task * pop_task_null(struct _starpu_sched_node * node STARPU_ATTRIBUTE_UNUSED, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED)
			
 
				+static struct starpu_task * pop_task_node(struct _starpu_sched_node * node, unsigned sched_ctx_id)
			
 
				 {
			
 
				-	return NULL;
			
 
				+	if(node->fathers[sched_ctx_id] == NULL)
			
 
				+		return NULL;
			
 
				+	else
			
 
				+		return node->fathers[sched_ctx_id]->pop_task(node->fathers[sched_ctx_id], sched_ctx_id);
			
 
				 }
			
 
				 
			
 
				 struct _starpu_sched_node * _starpu_sched_node_create(void)
			
@@ -19,7 +22,7 @@ struct _starpu_sched_node * _starpu_sched_node_create(void)
 
				 	memset(node,0,sizeof(*node));
			
 
				 	STARPU_PTHREAD_MUTEX_INIT(&node->mutex,NULL);
			
 
				 	node->available = available;
			
 
				-	node->pop_task = pop_task_null;
			
 
				+	node->pop_task = pop_task_node;
			
 
				 	node->destroy_node = _starpu_sched_node_destroy;
			
 
				 	node->add_child = _starpu_sched_node_add_child;
			
 
				 	node->remove_child = _starpu_sched_node_remove_child;
			
@@ -171,6 +174,33 @@ int _starpu_sched_node_can_execute_task(struct _starpu_sched_node * node, struct
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+int _starpu_sched_node_can_execute_task_with_impl(struct _starpu_sched_node * node, struct starpu_task * task, unsigned nimpl)
			
 
				+{
			
 
				+	
			
 
				+	int worker;
			
 
				+	STARPU_ASSERT(task);
			
 
				+	STARPU_ASSERT(nimpl < STARPU_MAXIMPLEMENTATIONS);
			
 
				+	for(worker = 0; worker < node->nworkers; worker++)
			
 
				+		if (starpu_worker_can_execute_task(worker, task, nimpl))
			
 
				+			return 1;
			
 
				+	return 0;
			
 
				+
			
 
				+}
			
 
				+
			
 
				+static int is_homogeneous(int * workerids, int nworkers)
			
 
				+{
			
 
				+	if(nworkers == 0)
			
 
				+		return 1;
			
 
				+	int i = 0;
			
 
				+	uint32_t last_worker = _starpu_get_worker_struct(workerids[i])->worker_mask;
			
 
				+	for(i = 1; i < nworkers; i++)
			
 
				+	{
			
 
				+		if(last_worker != _starpu_get_worker_struct(workerids[i])->worker_mask)
			
 
				+		   return 0;
			
 
				+		last_worker = _starpu_get_worker_struct(workerids[i])->worker_mask;
			
 
				+	}
			
 
				+	return 1;
			
 
				+}
			
 
				 
			
 
				 
			
 
				 static int in_tab(int elem, int * tab, int size)
			
@@ -180,29 +210,31 @@ static int in_tab(int elem, int * tab, int size)
 
				 			return 1;
			
 
				 	return 0;
			
 
				 }
			
 
				-
			
 
				 static void _update_workerids_after_tree_modification(struct _starpu_sched_node * node)
			
 
				 {
			
 
				 	if(_starpu_sched_node_is_worker(node))
			
 
				 	{
			
 
				 		node->nworkers = 1;
			
 
				 		node->workerids[0] =  _starpu_sched_node_worker_get_workerid(node);
			
 
				-		return;
			
 
				 	}
			
 
				-	int i;
			
 
				-	node->nworkers = 0;
			
 
				-	for(i = 0; i < node->nchilds; i++)
			
 
				+	else
			
 
				 	{
			
 
				-		struct _starpu_sched_node * child = node->childs[i];
			
 
				-		_update_workerids_after_tree_modification(child);
			
 
				-		int j;
			
 
				-		for(j = 0; j < child->nworkers; j++)
			
 
				+		int i;
			
 
				+		node->nworkers = 0;
			
 
				+		for(i = 0; i < node->nchilds; i++)
			
 
				 		{
			
 
				-			int id = child->workerids[j];
			
 
				-			if(!in_tab(id, node->workerids, node->nworkers))
			
 
				-				node->workerids[node->nworkers++] = id;
			
 
				+			struct _starpu_sched_node * child = node->childs[i];
			
 
				+			_update_workerids_after_tree_modification(child);
			
 
				+			int j;
			
 
				+			for(j = 0; j < child->nworkers; j++)
			
 
				+			{
			
 
				+				int id = child->workerids[j];
			
 
				+				if(!in_tab(id, node->workerids, node->nworkers))
			
 
				+					node->workerids[node->nworkers++] = id;
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				+	node->is_homogeneous = is_homogeneous(node->workerids, node->nworkers);
			
 
				 }
			
 
				 
			
 
				 
			
--- a/src/sched_policies/node_sched.h
+++ b/src/sched_policies/node_sched.h
@@ -5,10 +5,11 @@
 
				 struct _starpu_sched_node
			
 
				 {
			
 
				 	int (*push_task)(struct _starpu_sched_node *, struct starpu_task *);
			
 
				-	struct starpu_task * (*pop_task)(struct _starpu_sched_node *, unsigned sched_ctx_id);
			
 
				+	struct starpu_task * (*pop_task)(struct _starpu_sched_node *,
			
 
				+					 unsigned sched_ctx_id);
			
 
				 	void (*available)(struct _starpu_sched_node *);
			
 
				-
			
 
				-	void * data;
			
 
				+	double (*estimated_finish_time)(struct _starpu_sched_node * node);
			
 
				+	
			
 
				 
			
 
				 	int nchilds;
			
 
				 	struct _starpu_sched_node ** childs;
			
@@ -19,13 +20,17 @@ struct _starpu_sched_node
 
				 	int workerids[STARPU_NMAXWORKERS];
			
 
				 	int nworkers;
			
 
				 
			
 
				+	//is_homogeneous is 0 iff workers in the node's subtree are heterogeneous,
			
 
				+	//this field is set and updated automaticaly, you shouldn't write on it
			
 
				+	int is_homogeneous;
			
 
				+
			
 
				+	void * data;
			
 
				 	/* may be shared by several contexts
			
 
				 	 * so we need several fathers
			
 
				 	 */
			
 
				 	struct _starpu_sched_node * fathers[STARPU_NMAX_SCHED_CTXS];
			
 
				 	
			
 
				 	
			
 
				-
			
 
				 	void (*add_child)(struct _starpu_sched_node *node,
			
 
				 			  struct _starpu_sched_node *child,
			
 
				 			  unsigned sched_ctx_id);
			
@@ -46,7 +51,7 @@ struct _starpu_sched_tree
 
				 
			
 
				 
			
 
				 /* allocate and initalise node field with defaults values :
			
 
				- *  .pop_task return NULL
			
 
				+ *  .pop_task make recursive call on father
			
 
				  *  .available make a recursive call on childrens
			
 
				  *  .destroy_node  call _starpu_sched_node_destroy
			
 
				  *  .update_nchilds a function that does nothing
			
@@ -65,26 +70,29 @@ void _starpu_sched_node_remove_child(struct _starpu_sched_node * node, struct _s
 
				 
			
 
				 
			
 
				 int _starpu_sched_node_can_execute_task(struct _starpu_sched_node * node, struct starpu_task * task);
			
 
				+int _starpu_sched_node_can_execute_task_with_impl(struct _starpu_sched_node * node, struct starpu_task * task, unsigned nimpl);
			
 
				 
			
 
				-
			
 
				-//no public create function for workers because we dont want to have several node_worker for a single workerid
			
 
				+/* no public create function for workers because we dont want to have several node_worker for a single workerid */
			
 
				 struct _starpu_sched_node * _starpu_sched_node_worker_get(int workerid);
			
 
				 void _starpu_sched_node_worker_destroy(struct _starpu_sched_node *);
			
 
				 
			
 
				-/*this function assume that workers are the only leafs */
			
 
				+/* this function compare the available function of the node with the standard available for worker nodes*/
			
 
				 int _starpu_sched_node_is_worker(struct _starpu_sched_node * node);
			
 
				 int _starpu_sched_node_worker_get_workerid(struct _starpu_sched_node * worker_node);
			
 
				 
			
 
				 struct _starpu_sched_node * _starpu_sched_node_fifo_create(void);
			
 
				 struct _starpu_fifo_taskq *  _starpu_sched_node_fifo_get_fifo(struct _starpu_sched_node *);
			
 
				 
			
 
				-//struct _starpu_sched_node * _starpu_sched_node_work_stealing_create(void);
			
 
				+/* struct _starpu_sched_node * _starpu_sched_node_work_stealing_create(void); */
			
 
				 struct _starpu_sched_node * _starpu_sched_node_random_create(void);
			
 
				 
			
 
				 struct _starpu_sched_node * _starpu_sched_node_eager_create(void);
			
 
				 
			
 
				 
			
 
				 
			
 
				+
			
 
				+
			
 
				+
			
 
				 void _starpu_tree_destroy(struct _starpu_sched_tree * tree, unsigned sched_ctx_id);
			
 
				 
			
 
				 /* destroy node and all his child
			
@@ -98,6 +106,6 @@ struct starpu_task * _starpu_tree_pop_task(unsigned sched_ctx_id);
 
				 //this function must be called after all modification of tree
			
 
				 void _starpu_tree_update_after_modification(struct _starpu_sched_tree * tree);
			
 
				 ;
			
 
				-//extern struct starpu_sched_policy _starpu_sched_tree_eager_policy;
			
 
				-//extern struct starpu_sched_policy _starpu_sched_tree_random_policy;
			
 
				+
			
 
				+
			
 
				 #endif
			
--- a/src/sched_policies/node_work_stealing.c
+++ b/src/sched_policies/node_work_stealing.c
@@ -3,7 +3,7 @@
 
				 #include <starpu_scheduler.h>
			
 
				 
			
 
				 
			
 
				-#define USE_OVERLOAD
			
 
				+//#define USE_OVERLOAD
			
 
				 #ifdef USE_OVERLOAD
			
 
				 #include <float.h>
			
 
				 
			
@@ -48,7 +48,9 @@ static int select_victim_round_robin(struct _starpu_sched_node *node)
 
				 		unsigned ntasks;
			
 
				 		struct _starpu_sched_node * child = node->childs[i];
			
 
				 		struct _starpu_fifo_taskq * fifo = _starpu_sched_node_fifo_get_fifo(child);
			
 
				-		STARPU_PTHREAD_MUTEX_LOCK(&child->mutex);
			
 
				+		//STARPU_PTHREAD_MUTEX_LOCK(&child->mutex);//do we need to wait ?
			
 
				+		if(starpu_pthread_mutex_trylock(&child->mutex))//or not
			
 
				+			continue;
			
 
				 		ntasks = fifo->ntasks;
			
 
				 		if (ntasks)
			
 
				 			break;
			
@@ -235,6 +237,7 @@ static struct starpu_task * pop_task(struct _starpu_sched_node * node, unsigned
 
				 }
			
 
				 
			
 
				 
			
 
				+
			
 
				 static int push_task(struct _starpu_sched_node * node, struct starpu_task * task)
			
 
				 {
			
 
				 	struct _starpu_work_stealing_data * wsd = node->data;
			
@@ -283,7 +286,8 @@ int _starpu_ws_push_task(struct starpu_task *task)
 
				 		if(is_my_fifo_node(node,sched_ctx_id))
			
 
				 		{
			
 
				 			STARPU_PTHREAD_MUTEX_LOCK(&node->mutex);
			
 
				-			int ret_val =  _starpu_fifo_push_sorted_task(node->data, task);
			
 
				+			struct _starpu_fifo_taskq * fifo = node->data;
			
 
				+			int ret_val =  _starpu_fifo_push_sorted_task(fifo, task);
			
 
				 			STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex);
			
 
				 			return ret_val;
			
 
				 		}
			
--- a/src/sched_policies/node_worker.c
+++ b/src/sched_policies/node_worker.c
@@ -1,5 +1,6 @@
 
				 #include "node_sched.h"
			
 
				 #include <core/workers.h>
			
 
				+#include <float.h>
			
 
				 
			
 
				 static struct _starpu_sched_node * _worker_nodes[STARPU_NMAXWORKERS];
			
 
				 
			
@@ -33,15 +34,7 @@ int _starpu_sched_node_worker_push_task(struct _starpu_sched_node * node, struct
 
				 
			
 
				 struct starpu_task * _starpu_sched_node_worker_pop_task(struct _starpu_sched_node *node,unsigned sched_ctx_id)
			
 
				 {
			
 
				-/*	STARPU_PTHREAD_MUTEX_LOCK(&node->mutex);
			
 
				-	struct starpu_task * task = _starpu_fifo_pop_local_task(node->fifo);
			
 
				-	if(task)
			
 
				-	{      
			
 
				-		STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex);
			
 
				-		return task;
			
 
				-	}
			
 
				-*/	struct _starpu_sched_node *father = node->fathers[sched_ctx_id];
			
 
				-//	STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex);
			
 
				+	struct _starpu_sched_node *father = node->fathers[sched_ctx_id];
			
 
				 	if(father == NULL)
			
 
				 		return NULL;
			
 
				 	else
			
@@ -73,6 +66,24 @@ static void available(struct _starpu_sched_node * worker_node)
 
				 }
			
 
				 
			
 
				 
			
 
				+static double estimated_finish_time(struct _starpu_sched_node * node, struct starpu_task * task)
			
 
				+{
			
 
				+	STARPU_ASSERT(_starpu_sched_node_is_worker(node));
			
 
				+	double d = DBL_MAX;
			
 
				+	int nimpl;
			
 
				+	for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+	{
			
 
				+		double tmp = starpu_task_expected_length(task,
			
 
				+							 ((struct _starpu_worker *)node->data)->arch,
			
 
				+							 nimpl);
			
 
				+		if(!isnan(tmp) && tmp < d)
			
 
				+			tmp = d;
			
 
				+			
			
 
				+	}
			
 
				+	STARPU_ASSERT(d != DBL_MAX);
			
 
				+	return d;
			
 
				+}
			
 
				+
			
 
				 static struct _starpu_sched_node  * _starpu_sched_node_worker_create(int workerid)
			
 
				 {
			
 
				 	STARPU_ASSERT(workerid >= 0 && workerid <  (int) starpu_worker_get_count());
			
@@ -86,6 +97,7 @@ static struct _starpu_sched_node  * _starpu_sched_node_worker_create(int workeri
 
				 	//node->fifo = _starpu_create_fifo(),
			
 
				 	node->push_task = _starpu_sched_node_worker_push_task;
			
 
				 	node->pop_task = _starpu_sched_node_worker_pop_task;
			
 
				+	node->estimated_finish_time = estimated_finish_time;
			
 
				 	node->destroy_node = _starpu_sched_node_worker_destroy;
			
 
				 	node->available = available;
			
 
				 	node->workerids[0] = workerid;
			
--- a/tests/sched_policies/prio.c
+++ b/tests/sched_policies/prio.c
@@ -77,7 +77,7 @@ run(struct starpu_sched_policy *policy)
 
				 	for (i = 0; i < NTASKS; i++) {
			
 
				 		struct starpu_task *task = starpu_task_create();
			
 
				 
			
 
				-		if (i%2) {
			
 
				+		if (random()%2) {
			
 
				 			task->cl = &clA;
			
 
				 			task->priority=STARPU_MIN_PRIO;
			
 
				 		} else {