소스 검색

a lot of bug fixes, comments
+ a README file to note various things on schedulers

Simon Archipoff 11 년 전
부모
커밋
c03b52a93d

+ 2 - 6
include/starpu_sched_node.h

@@ -100,7 +100,7 @@ struct starpu_sched_tree
 	 * it is taken in read mode pushing a task
 	 * and in write mode for adding or removing workers
 	 */
-	starpu_pthread_rwlock_t lock;
+	starpu_pthread_mutex_t lock;
 };
 
 
@@ -159,10 +159,6 @@ struct starpu_sched_node * starpu_sched_node_heft_create(struct starpu_heft_data
 
 int starpu_sched_node_is_heft(struct starpu_sched_node * node);
 
-/* compute predicted_end by taking in account the case of the predicted transfer and the predicted_end overlap
- */
-double starpu_sched_compute_expected_time(double now, double predicted_end, double predicted_length, double predicted_transfer);
-
 /* this node select the best implementation for the first worker in context that can execute task.
  * and fill task->predicted and task->predicted_transfer
  * cannot have several childs if push_task is called
@@ -175,7 +171,7 @@ struct starpu_sched_node * starpu_sched_node_calibration_create(void * arg STARP
 /*create an empty tree
  */
 struct starpu_sched_tree * starpu_sched_tree_create(unsigned sched_ctx_id);
-void starpu_sched_tree_destroy(struct starpu_sched_tree * tree, unsigned sched_ctx_id);
+void starpu_sched_tree_destroy(struct starpu_sched_tree * tree);
 
 /* destroy node and all his child
  * except if they are shared between several contexts

+ 54 - 0
src/sched_policies/README

@@ -0,0 +1,54 @@
+Mutex policy
+
+
+scheduler have to be protected when the hypervisor is modifying it.
+there is a mutex in struct starpu_sched_tree wich should be taken by
+the application to push a task
+and one mutex per worker wich should be taken by workers when they pop
+or push a task.
+The hypervisor must take all of them to modifying the scheduler.
+
+
+
+
+
+Creation/Destruction
+
+all the struct starpu_sched_node * starpu_sched_node_foo_create()
+function return a initialized struct starpu_sched_node.
+
+the void starpu_sched_node_destroy(struct starpu_sched_node * node)
+function call node->deinit_data(node) to free data allocated during
+creation
+
+Workers nodes are particulars, there is no creation function, only
+accessor to garanty unicity of worker nodes. worker_node->workers and
+worker_node->workers_in_ctx should not be modified.
+
+
+Add/Remove workers
+I see 2 way for adding/removing workers of the scheduler
+The hypervisor block all the scheduling and modify the scheduler in
+the way it wants, and then update all node->workers_in_ctx bitmaps, and
+all node->push_task should respect it.
+
+
+And the second one may be done in an atomic way. The struct
+starpu_sched_tree hold a struct starpu_bitmap * that represent
+available workers in context. All node can make a call to struct starpu_bitmap
+* starpu_sched_node_get_worker_mask(unsigned sched_ctx_id) to see
+where they can push a task according to available workers.
+But with this way we have a problem for node->estimated_end, in case
+of fifo, we have to know how many workers are available to the fifo
+node. We also have a problem for shared object. The first way seems to
+be better.
+
+
+
+
+
+
+
+In several place realloc is used (in prio_deque and for
+starpu_sched_node_add_child), because we should not have a lot
+different priority level nor adding too many childs.

+ 2 - 2
src/sched_policies/hierarchical_heft.c

@@ -37,7 +37,7 @@ static void initialize_heft_center_policy(unsigned sched_ctx_id)
 
 	r = starpu_sched_node_create_recipe();
 	starpu_sched_recipe_add_node(r, starpu_sched_node_best_implementation_create, NULL);
-	starpu_sched_recipe_add_node(r, starpu_sched_node_work_stealing_create ,NULL);
+	starpu_sched_recipe_add_node(r, starpu_sched_node_fifo_create ,NULL);
 
 	specs.hwloc_node_composed_sched_node = r;
 	specs.worker_composed_sched_node = recipe_for_worker;
@@ -55,7 +55,7 @@ static void deinitialize_heft_center_policy(unsigned sched_ctx_id)
 {
 	struct starpu_sched_tree *t = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 
-	starpu_sched_tree_destroy(t, sched_ctx_id);
+	starpu_sched_tree_destroy(t);
 	starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
 }
 

+ 28 - 22
src/sched_policies/node_composed.c

@@ -1,5 +1,10 @@
 #include <starpu_sched_node.h>
 #include <common/list.h>
+
+
+/* a composed node is parametred by a list of pair
+ * (create_node_function(arg), arg)
+ */
 LIST_TYPE(fun_create_node,
 	  struct starpu_sched_node *(*create_node)(void * arg);
 	  void * arg;
@@ -19,14 +24,17 @@ struct _starpu_composed_sched_node_recipe * starpu_sched_node_create_recipe(void
 	return recipe;
 }
 
-void starpu_sched_recipe_add_node(struct _starpu_composed_sched_node_recipe * recipe, struct starpu_sched_node *(*create_node)(void * arg), void * arg)
+void starpu_sched_recipe_add_node(struct _starpu_composed_sched_node_recipe * recipe,
+				  struct starpu_sched_node *(*create_node)(void * arg),
+				  void * arg)
 {
 	struct fun_create_node * e = fun_create_node_new();
 	e->create_node = create_node;
 	e->arg = arg;
 	fun_create_node_list_push_back(recipe->list, e);
 }
-struct _starpu_composed_sched_node_recipe * starpu_sched_node_create_recipe_singleton(struct starpu_sched_node *(*create_node)(void * arg), void * arg)
+struct _starpu_composed_sched_node_recipe * starpu_sched_node_create_recipe_singleton(struct starpu_sched_node *(*create_node)(void * arg),
+										      void * arg)
 {
 	struct _starpu_composed_sched_node_recipe * r = starpu_sched_node_create_recipe();
 	starpu_sched_recipe_add_node(r, create_node, arg);
@@ -42,10 +50,16 @@ void _starpu_destroy_composed_sched_node_recipe(struct _starpu_composed_sched_no
 	free(recipe);
 }
 
+
+
 struct composed_node
 {
 	struct starpu_sched_node *top,*bottom;
 };
+
+/* this function actualy build the composed node data by changing the list of
+ * (node_create_fun, arg_create_fun) into a tree where all nodes have 1 childs
+ */
 struct composed_node create_composed_node(struct _starpu_composed_sched_node_recipe * recipe
 #ifdef STARPU_HAVE_HWLOC
 					  ,hwloc_obj_t obj
@@ -53,11 +67,8 @@ struct composed_node create_composed_node(struct _starpu_composed_sched_node_rec
 )
 {
 	struct composed_node c;
-	if(!recipe)
-	{
-		c.top = c.bottom = NULL;
-		return c;
-	}
+	STARPU_ASSERT(recipe);
+
 	struct fun_create_node_list * list = recipe->list;
 	struct fun_create_node * i = fun_create_node_list_begin(list);
 	STARPU_ASSERT(i);
@@ -76,16 +87,19 @@ struct composed_node create_composed_node(struct _starpu_composed_sched_node_rec
 		node->obj = obj;
 #endif
 		starpu_sched_node_add_child(c.bottom, node);
-//we want to be able to to traverse scheduler bottom up for all sched ctxs
-		int j;
+
+		/* we want to be able to traverse scheduler bottom up for all sched ctxs
+		 * when a worker call pop()
+		 */
+		unsigned j;
 		for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++)
-			starpu_sched_node_set_father(node, c.bottom,(unsigned)j);
+			starpu_sched_node_set_father(node, c.bottom, j);
 		c.bottom = node;
 	}
 	STARPU_ASSERT(!starpu_sched_node_is_worker(c.bottom));
 	return c;
 }
-		
+
 
 static int composed_node_push_task(struct starpu_sched_node * node, struct starpu_task * task)
 {
@@ -104,13 +118,6 @@ struct starpu_task * composed_node_pop_task(struct starpu_sched_node *node, unsi
 	return NULL;
 }
 
-/*
-void composed_node_available(struct starpu_sched_node *node)
-{
-	struct composed_node * c = node->data;
-	c->top->available(c->top);
-}
-*/	
 double composed_node_estimated_load(struct starpu_sched_node * node)
 {
 	struct composed_node * c = node->data;
@@ -140,10 +147,10 @@ static void composed_node_notify_change_workers(struct starpu_sched_node * node)
 	{
 		starpu_bitmap_unset_all(n->workers);
 		starpu_bitmap_or(n->workers, workers);
-	       
+
 		starpu_bitmap_unset_all(n->workers_in_ctx);
 		starpu_bitmap_or(n->workers_in_ctx, workers_in_ctx);
-		
+
 		n->is_homogeneous = is_homogeneous;
 		if(n == c->bottom)
 			break;
@@ -179,7 +186,7 @@ struct starpu_sched_node * starpu_sched_node_composed_node_create(struct _starpu
 	*c = create_composed_node(recipe
 #ifdef STARPU_HAVE_HWLOC
 				  ,node->obj
-#endif 
+#endif
 );
 	c->bottom->nchilds = node->nchilds;
 	c->bottom->childs = node->childs;
@@ -187,7 +194,6 @@ struct starpu_sched_node * starpu_sched_node_composed_node_create(struct _starpu
 	node->data = c;
 	node->push_task = composed_node_push_task;
 	node->pop_task = composed_node_pop_task;
-//	node->available = composed_node_available;
 	node->estimated_load = composed_node_estimated_load;
 	node->add_child = composed_node_add_child;
 	node->remove_child = composed_node_remove_child;

+ 1 - 1
src/sched_policies/node_eager.c

@@ -26,7 +26,7 @@ static void initialize_eager_center_policy(unsigned sched_ctx_id)
 static void deinitialize_eager_center_policy(unsigned sched_ctx_id)
 {
 	struct starpu_sched_tree *tree = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	starpu_sched_tree_destroy(tree, sched_ctx_id);
+	starpu_sched_tree_destroy(tree);
 	starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
 }
 

+ 29 - 32
src/sched_policies/node_fifo.c

@@ -9,21 +9,23 @@ struct _starpu_fifo_data
 	starpu_pthread_mutex_t mutex;
 };
 
-void _fifo_node_deinit_data(struct starpu_sched_node * node)
+void fifo_node_deinit_data(struct starpu_sched_node * node)
 {
+	STARPU_ASSERT(node && node->data);
 	struct _starpu_fifo_data * f = node->data;
 	_starpu_prio_deque_destroy(&f->fifo);
-	STARPU_PTHREAD_MUTEX_LOCK(&f->mutex);
+	STARPU_PTHREAD_MUTEX_DESTROY(&f->mutex);
 	free(f);
 }
 
 static double fifo_estimated_end(struct starpu_sched_node * node)
 {
+	STARPU_ASSERT(node && node->data);
 	struct _starpu_fifo_data * data = node->data;
 	struct _starpu_prio_deque * fifo = &data->fifo;
 	starpu_pthread_mutex_t * mutex = &data->mutex;
 	int card = starpu_bitmap_cardinal(node->workers_in_ctx);
-
+	STARPU_ASSERT(card != 0);
 	STARPU_PTHREAD_MUTEX_LOCK(mutex);
 	double estimated_end = fifo->exp_start + fifo->exp_len / card;
 	STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
@@ -31,17 +33,19 @@ static double fifo_estimated_end(struct starpu_sched_node * node)
 	return estimated_end;
 }
 
-static double estimated_load(struct starpu_sched_node * node)
+static double fifo_estimated_load(struct starpu_sched_node * node)
 {
+	STARPU_ASSERT(node && node->data);
+	STARPU_ASSERT(starpu_bitmap_cardinal(node->workers_in_ctx) != 0);
 	struct _starpu_fifo_data * data = node->data;
 	struct _starpu_prio_deque * fifo = &data->fifo;
 	starpu_pthread_mutex_t * mutex = &data->mutex;
 	double relative_speedup = 0.0;
 	double load;
-
 	if(node->is_homogeneous)
-	{
-		relative_speedup = starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(starpu_bitmap_first(node->workers)));
+	{		
+		int first_worker = starpu_bitmap_first(node->workers_in_ctx);
+		relative_speedup = starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(first_worker));
 		STARPU_PTHREAD_MUTEX_LOCK(mutex);
 		load = fifo->ntasks / relative_speedup;
 		STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
@@ -50,15 +54,15 @@ static double estimated_load(struct starpu_sched_node * node)
 	else
 	{
 		int i;
-		for(i = starpu_bitmap_first(node->workers);
+		for(i = starpu_bitmap_first(node->workers_in_ctx);
 		    i != -1;
-		    i = starpu_bitmap_next(node->workers, i))
+		    i = starpu_bitmap_next(node->workers_in_ctx, i))
 			relative_speedup += starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(i));
-		relative_speedup /= starpu_bitmap_cardinal(node->workers);
-			STARPU_ASSERT(!_STARPU_IS_ZERO(relative_speedup));
-			STARPU_PTHREAD_MUTEX_LOCK(mutex);
-			load = fifo->ntasks / relative_speedup;
-			STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
+		relative_speedup /= starpu_bitmap_cardinal(node->workers_in_ctx);
+		STARPU_ASSERT(!_STARPU_IS_ZERO(relative_speedup));
+		STARPU_PTHREAD_MUTEX_LOCK(mutex);
+		load = fifo->ntasks / relative_speedup;
+		STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
 	}
 	int i;
 	for(i = 0; i < node->nchilds; i++)
@@ -69,16 +73,14 @@ static double estimated_load(struct starpu_sched_node * node)
 	return load;
 }
 
-static int push_task(struct starpu_sched_node * node, struct starpu_task * task)
+static int fifo_push_task(struct starpu_sched_node * node, struct starpu_task * task)
 {
+	STARPU_ASSERT(node && node->data && task);
 	STARPU_ASSERT(starpu_sched_node_can_execute_task(node,task));
 	struct _starpu_fifo_data * data = node->data;
 	struct _starpu_prio_deque * fifo = &data->fifo;
 	starpu_pthread_mutex_t * mutex = &data->mutex;
 	STARPU_PTHREAD_MUTEX_LOCK(mutex);
-	STARPU_ASSERT(!isnan(fifo->exp_end));
-	STARPU_ASSERT(!isnan(fifo->exp_len));
-	STARPU_ASSERT(!isnan(fifo->exp_start));
 	int ret = _starpu_prio_deque_push_task(fifo, task);
 	if(!isnan(task->predicted))
 	{
@@ -90,24 +92,19 @@ static int push_task(struct starpu_sched_node * node, struct starpu_task * task)
 	STARPU_ASSERT(!isnan(fifo->exp_start));
 	STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
 
-
 	starpu_sched_node_available(node);
-//	node->available(node);
+
 	return ret;
 }
 
-static struct starpu_task * pop_task(struct starpu_sched_node * node, unsigned sched_ctx_id)
+static struct starpu_task * fifo_pop_task(struct starpu_sched_node * node, unsigned sched_ctx_id)
 {
+	STARPU_ASSERT(node && node->data);
 	struct _starpu_fifo_data * data = node->data;
 	struct _starpu_prio_deque * fifo = &data->fifo;
 	starpu_pthread_mutex_t * mutex = &data->mutex;
 	STARPU_PTHREAD_MUTEX_LOCK(mutex);
-	STARPU_ASSERT(!isnan(fifo->exp_end));
-	STARPU_ASSERT(!isnan(fifo->exp_len));
-	STARPU_ASSERT(!isnan(fifo->exp_start));
-	struct starpu_task * task  = node->is_homogeneous ?
-		_starpu_prio_deque_pop_task(fifo):
-		_starpu_prio_deque_pop_task_for_worker(fifo, starpu_worker_get_id());
+	struct starpu_task * task = _starpu_prio_deque_pop_task_for_worker(fifo, starpu_worker_get_id());
 	if(task)
 	{
 
@@ -134,7 +131,7 @@ static struct starpu_task * pop_task(struct starpu_sched_node * node, unsigned s
 
 int starpu_sched_node_is_fifo(struct starpu_sched_node * node)
 {
-	return node->push_task == push_task;
+	return node->push_task == fifo_push_task;
 }
 
 struct starpu_sched_node * starpu_sched_node_fifo_create(void * arg STARPU_ATTRIBUTE_UNUSED)
@@ -145,9 +142,9 @@ struct starpu_sched_node * starpu_sched_node_fifo_create(void * arg STARPU_ATTRI
 	STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL);
 	node->data = data;
 	node->estimated_end = fifo_estimated_end;
-	node->estimated_load = estimated_load;
-	node->push_task = push_task;
-	node->pop_task = pop_task;
-	node->deinit_data = _fifo_node_deinit_data;
+	node->estimated_load = fifo_estimated_load;
+	node->push_task = fifo_push_task;
+	node->pop_task = fifo_pop_task;
+	node->deinit_data = fifo_node_deinit_data;
 	return node;
 }

+ 71 - 57
src/sched_policies/node_heft.c

@@ -1,5 +1,6 @@
 #include <starpu_sched_node.h>
 #include "fifo_queues.h"
+#include "sched_node.h"
 #include <starpu_perfmodel.h>
 #include <starpu_scheduler.h>
 #include <float.h>
@@ -15,6 +16,36 @@ struct _starpu_heft_data
 	struct starpu_sched_node * calibrating_node;
 };
 
+/* compute predicted_end by taking into account the case of the predicted transfer and the predicted_end overlap
+ */
+double compute_expected_time(double now, double predicted_end, double predicted_length, double predicted_transfer)
+{
+	STARPU_ASSERT(!isnan(now + predicted_end + predicted_length + predicted_transfer));
+	STARPU_ASSERT(now >= 0.0 && predicted_end >= 0.0 && predicted_length >= 0.0 && predicted_transfer >= 0.0);
+	if (now + predicted_transfer < predicted_end)
+	{
+		/* We may hope that the transfer will be finished by
+		 * the start of the task. */
+		predicted_transfer = 0;
+	}
+	else
+	{
+		/* The transfer will not be finished by then, take the
+		 * remainder into account */
+		predicted_transfer += now;
+		predicted_transfer -= predicted_end;
+	}
+//	if(!isnan(predicted_transfer))
+	{
+		predicted_end += predicted_transfer;
+		predicted_length += predicted_transfer;
+	}
+
+//	if(!isnan(predicted_length))
+		predicted_end += predicted_length;
+	return predicted_end;
+}
+
 
 static double compute_fitness(struct _starpu_heft_data * d, double exp_end, double best_exp_end, double max_exp_end, double transfer_len, double local_power)
 {
@@ -24,8 +55,9 @@ static double compute_fitness(struct _starpu_heft_data * d, double exp_end, doub
 		+ d->gamma * d->idle_power * (exp_end - max_exp_end);
 }
 
-static int push_task(struct starpu_sched_node * node, struct starpu_task * task)
+static int heft_push_task(struct starpu_sched_node * node, struct starpu_task * task)
 {
+	STARPU_ASSERT(node && task && starpu_sched_node_is_heft(node));
 	struct _starpu_heft_data * d = node->data;	
 	struct starpu_sched_node * best_node = NULL;
 	double estimated_ends[node->nchilds];
@@ -49,11 +81,11 @@ static int push_task(struct starpu_sched_node * node, struct starpu_task * task)
 				return d->no_perf_model_node->push_task(d->no_perf_model_node, task);
 			estimated_transfer_length[i] = starpu_sched_node_transfer_length(c, task);
 			estimated_ends[i] = c->estimated_end(c);
-	double now = starpu_timing_now();
-			estimated_ends_with_task[i] = starpu_sched_compute_expected_time(now,
-											 estimated_ends[i],
-											 estimated_lengths[i],
-											 estimated_transfer_length[i]);
+			double now = starpu_timing_now();
+			estimated_ends_with_task[i] = compute_expected_time(now,
+									    estimated_ends[i],
+									    estimated_lengths[i],
+									    estimated_transfer_length[i]);
 			if(estimated_ends_with_task[i] < best_exp_end_with_task)	
 				best_exp_end_with_task = estimated_ends_with_task[i];
 			if(estimated_ends_with_task[i] > max_exp_end_with_task)
@@ -62,34 +94,6 @@ static int push_task(struct starpu_sched_node * node, struct starpu_task * task)
 		}
 	}
 
-#if 0
-	fprintf(stderr,"estimated end           ");
-	for(i = 0; i < node->nchilds; i++)
-	{
-		fprintf(stderr,"%.0f ",estimated_ends[i]);
-	}
-	fprintf(stderr,"\n");
-	fprintf(stderr,"estimated end with task ");
-		for(i = 0; i < node->nchilds; i++)
-	{
-		fprintf(stderr,"%.0f ",estimated_ends_with_task[i]);
-	}
-	fprintf(stderr,"\n");
-	fprintf(stderr,"transfere length        ");
-		for(i = 0; i < node->nchilds; i++)
-	{
-		fprintf(stderr,"%.0f ",estimated_transfer_length[i]);
-	}
-	fprintf(stderr,"\n");
-	fprintf(stderr,"estimated length        ");
-		for(i = 0; i < node->nchilds; i++)
-	{
-		fprintf(stderr,"%.0f ",estimated_lengths[i]);
-	}
-	fprintf(stderr,"\n\n");
-
-	fprintf(stderr,"fitness                 ");
-#endif
 	double best_fitness = DBL_MAX;
 	int best_inode = -1;
 	for(i = 0; i < nsuitable_nodes; i++)
@@ -101,18 +105,13 @@ static int push_task(struct starpu_sched_node * node, struct starpu_task * task)
 					     max_exp_end_with_task,
 					     estimated_transfer_length[inode],
 					     0.0);
-#if 0
-		fprintf(stderr,"%.0f ",tmp);
-#endif
 		if(tmp < best_fitness)
 		{
 			best_fitness = tmp;
 			best_inode = inode;
 		}
 	}
-#if 0
-	fprintf(stderr,"push on %d\n",best_inode);
-#endif
+
 	STARPU_ASSERT(best_inode != -1);
 	best_node = node->childs[best_inode];
 	return best_node->push_task(best_node, task);
@@ -154,24 +153,28 @@ static void param_modified(struct starpu_top_param* d)
 }
 #endif /* !STARPU_USE_TOP */
 
-void _heft_add_child(struct starpu_sched_node * node, struct starpu_sched_node * child)
+void heft_add_child(struct starpu_sched_node * node, struct starpu_sched_node * child)
 {
+	STARPU_ASSERT(starpu_sched_node_is_heft(node));
 	starpu_sched_node_add_child(node, child);
 	struct _starpu_heft_data * data = node->data;
 	starpu_sched_node_add_child(data->no_perf_model_node,child);
 	starpu_sched_node_add_child(data->calibrating_node, child);
 }
 
-void _heft_remove_child(struct starpu_sched_node * node, struct starpu_sched_node * child)
+void heft_remove_child(struct starpu_sched_node * node, struct starpu_sched_node * child)
 {
+
+	STARPU_ASSERT(starpu_sched_node_is_heft(node));
 	starpu_sched_node_remove_child(node, child);
 	struct _starpu_heft_data * data = node->data;
 	starpu_sched_node_remove_child(data->no_perf_model_node,child);
 	starpu_sched_node_remove_child(data->calibrating_node, child);
 }
 
-static void _heft_notify_change_in_workers(struct starpu_sched_node * node)
+static void heft_notify_change_in_workers(struct starpu_sched_node * node)
 {
+	STARPU_ASSERT(starpu_sched_node_is_heft(node));
 	struct _starpu_heft_data * data = node->data;
 	starpu_bitmap_unset_all(data->no_perf_model_node->workers_in_ctx);
 	starpu_bitmap_unset_all(data->no_perf_model_node->workers);
@@ -190,8 +193,9 @@ static void _heft_notify_change_in_workers(struct starpu_sched_node * node)
 
 	data->calibrating_node->is_homogeneous = node->is_homogeneous;
 }
-void _heft_node_deinit_data(struct starpu_sched_node * node)
+void heft_node_deinit_data(struct starpu_sched_node * node)
 {
+	STARPU_ASSERT(starpu_sched_node_is_heft(node));
 	struct _starpu_heft_data * d = node->data;
 	starpu_sched_node_destroy(d->no_perf_model_node);
 	starpu_sched_node_destroy(d->calibrating_node);
@@ -211,23 +215,33 @@ struct starpu_sched_node * starpu_sched_node_heft_create(struct starpu_heft_data
 
 	data->calibrating_node = params->calibrating_node_create(params->arg_calibrating_node);
 
-	node->push_task = push_task;
-	node->add_child = _heft_add_child;
-	node->remove_child = _heft_remove_child;
 	node->data = data;
-	node->deinit_data = _heft_node_deinit_data;
-	node->notify_change_workers = _heft_notify_change_in_workers;
+
+	node->push_task = heft_push_task;
+	node->add_child = heft_add_child;
+	node->remove_child = heft_remove_child;
+	node->deinit_data = heft_node_deinit_data;
+	node->notify_change_workers = heft_notify_change_in_workers;
 
 	return node;
 }
 
 int starpu_sched_node_is_heft(struct starpu_sched_node * node)
 {
-	return node->push_task == push_task;
+	return node->push_task == heft_push_task;
 }
 
 
-
+/* initialize a policy that look like that :
+ *     | heft |
+ *       /  \
+ * | best || best | ...
+ * | impl || impl | ...
+ *     |      |     ...
+ * | fifo || fifo | ...
+ *     |      |     ...
+ * |worker||worker| ...
+ */
 static void initialize_heft_center_policy(unsigned sched_ctx_id)
 {
 	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
@@ -267,7 +281,7 @@ static void initialize_heft_center_policy(unsigned sched_ctx_id)
 			.gamma = _gamma,
 			.idle_power = idle_power,
 			.no_perf_model_node_create = starpu_sched_node_random_create,
-			. arg_no_perf_model = NULL,
+			.arg_no_perf_model = NULL,
 			.calibrating_node_create = starpu_sched_node_random_create,
 			.arg_calibrating_node = NULL
 		};
@@ -280,10 +294,10 @@ static void initialize_heft_center_policy(unsigned sched_ctx_id)
 		STARPU_ASSERT(worker_node);
 
 #if 1
-		struct starpu_sched_node * ws = starpu_sched_node_work_stealing_create(NULL);
-		ws->add_child(ws, worker_node);
-		starpu_sched_node_set_father(worker_node, ws, sched_ctx_id);
-		worker_node = ws;
+		struct starpu_sched_node * fifo = starpu_sched_node_fifo_create(NULL);
+		fifo->add_child(fifo, worker_node);
+		starpu_sched_node_set_father(worker_node, fifo, sched_ctx_id);
+		worker_node = fifo;
 #endif
 
 		struct starpu_sched_node * impl_node = starpu_sched_node_best_implementation_create(NULL);
@@ -301,7 +315,7 @@ static void initialize_heft_center_policy(unsigned sched_ctx_id)
 static void deinitialize_heft_center_policy(unsigned sched_ctx_id)
 {
 	struct starpu_sched_tree *t = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	starpu_sched_tree_destroy(t, sched_ctx_id);
+	starpu_sched_tree_destroy(t);
 	starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
 }
 

+ 24 - 6
src/sched_policies/node_random.c

@@ -18,13 +18,25 @@ static double compute_relative_speedup(struct starpu_sched_node * node)
 }
 
 
-static int push_task(struct starpu_sched_node * node, struct starpu_task * task)
+static int random_push_task(struct starpu_sched_node * node, struct starpu_task * task)
 {
 	STARPU_ASSERT(node->nchilds > 0);
+
+	/* indexes_nodes and size are used to memoize node that can execute tasks
+	 * during the first phase of algorithm, it contain the size indexes of the nodes
+	 * that can execute task.
+	 */
 	int indexes_nodes[node->nchilds];
+	int size=0;
+
+	/* speedup[i] is revelant only if i is in the size firsts elements of
+	 * indexes_nodes
+	 */
 	double speedup[node->nchilds];
-	int size=0,i;
+
 	double alpha_sum = 0.0;
+
+	int i;
 	for(i = 0; i < node->nchilds ; i++)
 	{
 		if(starpu_sched_node_can_execute_task(node->childs[i],task))
@@ -38,6 +50,9 @@ static int push_task(struct starpu_sched_node * node, struct starpu_task * task)
 	if(size == 0)
 		return -ENODEV;
 
+	/* not fully sure that this code is correct
+	 * because of bad properties of double arithmetic
+	 */
 	double random = starpu_drand48()*alpha_sum;
 	double alpha = 0.0;
 	struct starpu_sched_node * select  = NULL;
@@ -57,7 +72,9 @@ static int push_task(struct starpu_sched_node * node, struct starpu_task * task)
 
 	return ret_val;
 }
-
+/* taking the min of estimated_end not seems to be a good value to return here
+ * as random scheduler balance between childs very poorly
+ */
 double random_estimated_end(struct starpu_sched_node * node)
 {
 	double sum = 0.0;
@@ -66,17 +83,18 @@ double random_estimated_end(struct starpu_sched_node * node)
 		sum += node->childs[i]->estimated_end(node->childs[i]);
 	return sum / node->nchilds;
 }
+
 struct starpu_sched_node * starpu_sched_node_random_create(void * arg STARPU_ATTRIBUTE_UNUSED)
 {
 	struct starpu_sched_node * node = starpu_sched_node_create();
 	node->estimated_end = random_estimated_end;
-	node->push_task = push_task;
+	node->push_task = random_push_task;
 	return node;
 }
 
 int starpu_sched_node_is_random(struct starpu_sched_node *node)
 {
-	return node->push_task == push_task;
+	return node->push_task == random_push_task;
 }
 
 
@@ -101,7 +119,7 @@ static void initialize_random_center_policy(unsigned sched_ctx_id)
 static void deinitialize_random_center_policy(unsigned sched_ctx_id)
 {
 	struct starpu_sched_tree *tree = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	starpu_sched_tree_destroy(tree, sched_ctx_id);
+	starpu_sched_tree_destroy(tree);
 	starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
 }
 

+ 159 - 236
src/sched_policies/node_sched.c

@@ -5,37 +5,13 @@
 #include "sched_node.h"
 #include <float.h>
 
-double starpu_sched_compute_expected_time(double now, double predicted_end, double predicted_length, double predicted_transfer)
-{
 
-	if (now + predicted_transfer < predicted_end)
-	{
-		/* We may hope that the transfer will be finished by
-		 * the start of the task. */
-		predicted_transfer = 0;
-	}
-	else
-	{
-		/* The transfer will not be finished by then, take the
-		 * remainder into account */
-		predicted_transfer += now;
-		predicted_transfer -= predicted_end;
-	}
-	if(!isnan(predicted_transfer))
-	{
-		predicted_end += predicted_transfer;
-		predicted_length += predicted_transfer;
-	}
-
-	if(!isnan(predicted_length))
-		predicted_end += predicted_length;
-	return predicted_end;
-}
-
-
-static void _wake_simple_worker(int workerid)
+/* wake up worker workerid
+ * if called by a worker it dont try to wake up himself
+ */
+static void wake_simple_worker(int workerid)
 {
-	STARPU_ASSERT(0 <= workerid && workerid < starpu_worker_get_count());
+	STARPU_ASSERT(0 <= workerid && (unsigned)  workerid < starpu_worker_get_count());
 	starpu_pthread_mutex_t * sched_mutex;
 	starpu_pthread_cond_t * sched_cond;
 	if(workerid == starpu_worker_get_id())
@@ -46,22 +22,32 @@ static void _wake_simple_worker(int workerid)
 	STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 }
 
-static void _wake_combined_worker(int workerid)
+/* wake up all workers of a combined workers
+ * this function must not be called during a pop (however this should not
+ * even be possible) or you will have a dead lock
+ */
+static void wake_combined_worker(int workerid)
 {
-	STARPU_ASSERT(starpu_worker_get_count() <= workerid
-		      && workerid < starpu_worker_get_count() + starpu_combined_worker_get_count());
-	int me = starpu_worker_get_id();
+	STARPU_ASSERT( 0 <= workerid
+		       && starpu_worker_get_count() <= (unsigned) workerid
+		       && (unsigned) workerid < starpu_worker_get_count() + starpu_combined_worker_get_count());
 	struct _starpu_combined_worker * combined_worker = _starpu_get_combined_worker_struct(workerid);
 	int * list = combined_worker->combined_workerid;
 	int size = combined_worker->worker_size;
 	int i;
 	for(i = 0; i < size; i++)
-		_wake_simple_worker(list[i]);
+		wake_simple_worker(list[i]);
 }
 
+
+/* this function must not be called on worker nodes :
+ * because this wouldn't have sense
+ * and should dead lock
+ */
 void starpu_sched_node_available(struct starpu_sched_node * node)
 {
 	(void)node;
+	STARPU_ASSERT(node);
 	STARPU_ASSERT(!starpu_sched_node_is_worker(node));
 #ifndef STARPU_NON_BLOCKING_DRIVERS
 	int i;
@@ -69,17 +55,21 @@ void starpu_sched_node_available(struct starpu_sched_node * node)
 	    i != -1;
 	    i = starpu_bitmap_next(node->workers_in_ctx, i))
 	{
-		if(i < starpu_worker_get_count())
-			_wake_simple_worker(i);
+		if(i < (int) starpu_worker_get_count())
+			wake_simple_worker(i);
 		else
-			_wake_combined_worker(i);
+			wake_combined_worker(i);
 	}
 #endif
 }
 
-
+/* default implementation for node->pop_task()
+ * just perform a recursive call on father
+ */
 static struct starpu_task * pop_task_node(struct starpu_sched_node * node, unsigned sched_ctx_id)
 {
+	STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS);
+	STARPU_ASSERT(node);
 	if(node->fathers[sched_ctx_id] == NULL)
 		return NULL;
 	else
@@ -88,77 +78,111 @@ static struct starpu_task * pop_task_node(struct starpu_sched_node * node, unsig
 
 
 void starpu_sched_node_set_father(struct starpu_sched_node *node,
-				   struct starpu_sched_node *father_node,
-				   unsigned sched_ctx_id)
+				  struct starpu_sched_node *father_node,
+				  unsigned sched_ctx_id)
 {
 	STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS);
+	STARPU_ASSERT(node);
 	node->fathers[sched_ctx_id] = father_node;
 }
 
-struct starpu_task * pop_task(unsigned sched_ctx_id)
-{
-	struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	int workerid = starpu_worker_get_id();
-	struct starpu_sched_node * wn = starpu_sched_node_worker_get(workerid);
-	STARPU_PTHREAD_RWLOCK_RDLOCK(&t->lock);
-	struct starpu_task * task = wn->pop_task(wn, sched_ctx_id);
-	STARPU_PTHREAD_RWLOCK_UNLOCK(&t->lock);
-	return task;
-}
 
-int push_task(struct starpu_task * task)
+
+/******************************************************************************
+ *          functions for struct starpu_sched_policy interface                *
+ ******************************************************************************/
+int starpu_sched_tree_push_task(struct starpu_task * task)
 {
+	STARPU_ASSERT(task);
 	unsigned sched_ctx_id = task->sched_ctx;
-	struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	STARPU_PTHREAD_RWLOCK_RDLOCK(&t->lock);
-	int ret = t->root->push_task(t->root, task);
-	STARPU_PTHREAD_RWLOCK_UNLOCK(&t->lock);
-	return ret;
+	struct starpu_sched_tree *tree = starpu_sched_ctx_get_policy_data(sched_ctx_id);
+	int workerid = starpu_worker_get_id();
+	/* application should take tree->lock to prevent concurent acces from hypervisor
+	 * worker take they own mutexes
+	 */
+	if(-1 == workerid)
+		STARPU_PTHREAD_MUTEX_LOCK(&tree->lock);
+	else
+		_starpu_sched_node_lock_worker(workerid);
+		
+	int ret_val = tree->root->push_task(tree->root,task);
+	if(-1 == workerid)
+		STARPU_PTHREAD_MUTEX_UNLOCK(&tree->lock);
+	else
+		_starpu_sched_node_unlock_worker(workerid);
+	return ret_val;
 }
 
+struct starpu_task * starpu_sched_tree_pop_task(unsigned sched_ctx_id)
+{
+	STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS);
+	int workerid = starpu_worker_get_id();
+	struct starpu_sched_node * node = starpu_sched_node_worker_get(workerid);
 
+	/* _starpu_sched_node_lock_worker(workerid) is called by node->pop_task()
+	 */
+	struct starpu_task * task = node->pop_task(node, sched_ctx_id);
+	return task;
+}
 
 void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
 {
+	STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS);
+	STARPU_ASSERT(workerids);
 	struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	STARPU_PTHREAD_RWLOCK_WRLOCK(&t->lock);
+
+	STARPU_PTHREAD_MUTEX_LOCK(&t->lock);
 	_starpu_sched_node_lock_all_workers();
+
 	unsigned i;
 	for(i = 0; i < nworkers; i++)
 		starpu_bitmap_set(t->workers, workerids[i]);
+
 	starpu_sched_tree_update_workers_in_ctx(t);
+
 	_starpu_sched_node_unlock_all_workers();
-	STARPU_PTHREAD_RWLOCK_UNLOCK(&t->lock);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&t->lock);
 }
 
 void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
 {
+	STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS);
+	STARPU_ASSERT(workerids);
 	struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	STARPU_PTHREAD_RWLOCK_WRLOCK(&t->lock);
+
+	STARPU_PTHREAD_MUTEX_LOCK(&t->lock);
 	_starpu_sched_node_lock_all_workers();
+
 	unsigned i;
 	for(i = 0; i < nworkers; i++)
 		starpu_bitmap_unset(t->workers, workerids[i]);
+
 	starpu_sched_tree_update_workers_in_ctx(t);
+
 	_starpu_sched_node_unlock_all_workers();
-	STARPU_PTHREAD_RWLOCK_UNLOCK(&t->lock);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&t->lock);
 }
 
 
+
+
 void starpu_sched_node_destroy_rec(struct starpu_sched_node * node, unsigned sched_ctx_id)
 {
 	if(node == NULL)
 		return;
 	struct starpu_sched_node ** stack = NULL;
 	int top = -1;
-#define PUSH(n) do{							\
+#define PUSH(n)								\
+	do{								\
 		stack = realloc(stack, sizeof(*stack) * (top + 2));	\
-		stack[++top] = n;}while(0)
+		stack[++top] = n;					\
+	}while(0)
 #define POP() stack[top--]
 #define EMPTY() (top == -1)
-//we want to delete all subtrees exept if a pointer in fathers point in an other tree
-//ie an other context
 
+	/* we want to delete all subtrees exept if a pointer in fathers point in an other tree
+	 * ie an other context
+	 */
 	node->fathers[sched_ctx_id] = NULL;
 	int shared = 0;
 	{
@@ -182,36 +206,40 @@ void starpu_sched_node_destroy_rec(struct starpu_sched_node * node, unsigned sch
 			child->fathers[sched_ctx_id] = NULL;
 			for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++)
 			{
-				if(child->fathers[j] != NULL)//child is shared
+				if(child->fathers[j] != NULL)/* child is shared */
 					shared = 1;
 			}
-			if(!shared)//if not shared we want to destroy it and his childs
+			if(!shared)/* if not shared we want to destroy it and his childs */
 				PUSH(child);
 		}
 		starpu_sched_node_destroy(n);
 	}
 	free(stack);
 }
+
 struct starpu_sched_tree * starpu_sched_tree_create(unsigned sched_ctx_id)
 {
+	STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS);
 	struct starpu_sched_tree * t = malloc(sizeof(*t));
 	memset(t, 0, sizeof(*t));
 	t->sched_ctx_id = sched_ctx_id;
 	t->workers = starpu_bitmap_create();
-	STARPU_PTHREAD_RWLOCK_INIT(&t->lock,NULL);
+	STARPU_PTHREAD_MUTEX_INIT(&t->lock,NULL);
 	return t;
 }
 
-void starpu_sched_tree_destroy(struct starpu_sched_tree * tree, unsigned sched_ctx_id)
+void starpu_sched_tree_destroy(struct starpu_sched_tree * tree)
 {
+	STARPU_ASSERT(tree);
 	if(tree->root)
-		starpu_sched_node_destroy_rec(tree->root, sched_ctx_id);
+		starpu_sched_node_destroy_rec(tree->root, tree->sched_ctx_id);
 	starpu_bitmap_destroy(tree->workers);
-	STARPU_PTHREAD_RWLOCK_DESTROY(&tree->lock);
+	STARPU_PTHREAD_MUTEX_DESTROY(&tree->lock);
 	free(tree);
 }
 void starpu_sched_node_add_child(struct starpu_sched_node* node, struct starpu_sched_node * child)
 {
+	STARPU_ASSERT(node && child);
 	STARPU_ASSERT(!starpu_sched_node_is_worker(node));
 	int i;
 	for(i = 0; i < node->nchilds; i++){
@@ -225,6 +253,9 @@ void starpu_sched_node_add_child(struct starpu_sched_node* node, struct starpu_s
 }
 void starpu_sched_node_remove_child(struct starpu_sched_node * node, struct starpu_sched_node * child)
 {
+	STARPU_ASSERT(node && child);
+	STARPU_ASSERT(!starpu_sched_node_is_worker(node));
+	STARPU_ASSERT(node && child);
 	int pos;
 	for(pos = 0; pos < node->nchilds; pos++)
 		if(node->childs[pos] == child)
@@ -235,51 +266,12 @@ void starpu_sched_node_remove_child(struct starpu_sched_node * node, struct star
 
 struct starpu_bitmap * _starpu_get_worker_mask(unsigned sched_ctx_id)
 {
+	STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS);
 	struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id);
+	STARPU_ASSERT(t);
 	return t->workers;
 }
-void _starpu_sched_node_block_worker(int workerid);
-void _starpu_sched_node_unblock_worker(int workerid);
-int starpu_sched_tree_push_task(struct starpu_task * task)
-{
-	unsigned sched_ctx_id = task->sched_ctx;
-	struct starpu_sched_tree *tree = starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	int workerid = starpu_worker_get_id();
-	if(-1 == workerid)
-		STARPU_PTHREAD_RWLOCK_RDLOCK(&tree->lock);
-	else
-		_starpu_sched_node_block_worker(workerid);
-		
-	int ret_val = tree->root->push_task(tree->root,task);
-	if(-1 == workerid)
-		STARPU_PTHREAD_RWLOCK_UNLOCK(&tree->lock);
-	else
-		_starpu_sched_node_unblock_worker(workerid);
-	return ret_val;
-}
-struct starpu_task * starpu_sched_tree_pop_task(unsigned sched_ctx_id)
-{
-	struct starpu_sched_tree *tree = starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	int workerid = starpu_worker_get_id();
-	struct starpu_sched_node * node = starpu_sched_node_worker_get(workerid);
-	struct starpu_task * task = node->pop_task(node, sched_ctx_id);
-	return task;
-}
-/*
-static double estimated_finish_time(struct starpu_sched_node * node)
-{
-	double sum = 0.0;
-	int i;
-	for(i = 0; i < node->nchilds; i++)
-	{
-		struct starpu_sched_node * c = node->childs[i];
-		double tmp = c->estimated_finish_time(c);
-		if( tmp > sum)
-			sum = tmp;
-	}
-	return sum;
-}
-*/
+
 static double estimated_load(struct starpu_sched_node * node)
 {
 	double sum = 0.0;
@@ -292,9 +284,9 @@ static double estimated_load(struct starpu_sched_node * node)
 	return sum;
 }
 
-
 static double _starpu_sched_node_estimated_end_min(struct starpu_sched_node * node)
 {
+	STARPU_ASSERT(node);
 	double min = DBL_MAX;
 	int i;
 	for(i = 0; i < node->nchilds; i++)
@@ -306,8 +298,13 @@ static double _starpu_sched_node_estimated_end_min(struct starpu_sched_node * no
 	return min;
 }
 
+/* this function find the best implementation or an implementation that need to be calibrated for a worker available
+ * and set prediction in *length. nan if a implementation need to be calibrated, 0.0 if no perf model are available
+ * return false if no worker on the node can execute that task
+ */
 int STARPU_WARN_UNUSED_RESULT starpu_sched_node_execute_preds(struct starpu_sched_node * node, struct starpu_task * task, double * length)
 {
+	STARPU_ASSERT(node && task);
 	int can_execute = 0;
 	starpu_task_bundle_t bundle = task->bundle;
 	double len = DBL_MAX;
@@ -359,8 +356,29 @@ int STARPU_WARN_UNUSED_RESULT starpu_sched_node_execute_preds(struct starpu_sche
 	return can_execute;
 }
 
+/* very similar function that dont compute prediction */
+int starpu_sched_node_can_execute_task(struct starpu_sched_node * node, struct starpu_task * task)
+{
+	STARPU_ASSERT(task);
+	STARPU_ASSERT(node);
+	unsigned nimpl;
+	int worker;
+	for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
+		for(worker = starpu_bitmap_first(node->workers_in_ctx);
+		    -1 != worker;
+		    worker = starpu_bitmap_next(node->workers_in_ctx, worker))
+			if (starpu_worker_can_execute_task(worker, task, nimpl)
+			     || starpu_combined_worker_can_execute_task(worker, task, nimpl))
+			    return 1;
+	return 0;
+}
+
+/* compute the average of transfer length for tasks on all workers
+ * maybe this should be optimised if all workers are under the same numa node
+ */
 double starpu_sched_node_transfer_length(struct starpu_sched_node * node, struct starpu_task * task)
 {
+	STARPU_ASSERT(node && task);
 	int nworkers = starpu_bitmap_cardinal(node->workers_in_ctx);
 	double sum = 0.0;
 	int worker;
@@ -376,45 +394,15 @@ double starpu_sched_node_transfer_length(struct starpu_sched_node * node, struct
 		else
 		{
 			sum += starpu_task_expected_data_transfer_time(memory_node, task);
-			//sum += starpu_task_expected_conversion_time(task, starpu_worker_get_perf_archtype(worker), impl ?)
+			/* sum += starpu_task_expected_conversion_time(task, starpu_worker_get_perf_archtype(worker), impl ?)
+			 * I dont know what to do as we dont know what implementation would be used here...
+			 */
 		}
 	}
 	return sum / nworkers;
 }
 
 
-/*
-static double estimated_transfer_length(struct starpu_sched_node * node, struct starpu_task * task)
-{
-	double sum = 0.0;
-	int nb = 0, i = 0;
-	for(i = 0; i < node->nchilds; i++)
-	{		struct starpu_sched_node * c = node->childs[i];
-		if(starpu_sched_node_can_execute_task(c, task))
-		{
-			sum += c->estimated_transfer_length(c, task);
-			nb++;
-		}
-	}
-	sum /= nb;
-	return sum;
-}
-*/
-int starpu_sched_node_can_execute_task(struct starpu_sched_node * node, struct starpu_task * task)
-{
-	unsigned nimpl;
-	int worker;
-	STARPU_ASSERT(task);
-	STARPU_ASSERT(node);
-	for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
-		for(worker = starpu_bitmap_first(node->workers_in_ctx);
-		    -1 != worker;
-		    worker = starpu_bitmap_next(node->workers_in_ctx, worker))
-			if (starpu_worker_can_execute_task(worker, task, nimpl)
-			     || starpu_combined_worker_can_execute_task(worker, task, nimpl))
-			    return 1;
-	return 0;
-}
 
 
 void take_node_and_does_nothing(struct starpu_sched_node * node STARPU_ATTRIBUTE_UNUSED)
@@ -436,8 +424,14 @@ struct starpu_sched_node * starpu_sched_node_create(void)
 	node->deinit_data = take_node_and_does_nothing;
 	return node;
 }
+
+/* remove all child
+ * for all child of node, if child->fathers[x] == node, set child->fathers[x] to null 
+ * call node->deinit_data
+ */
 void starpu_sched_node_destroy(struct starpu_sched_node *node)
 {
+	STARPU_ASSERT(node);
 	if(starpu_sched_node_is_worker(node))
 		return;
 	int i,j;
@@ -449,6 +443,8 @@ void starpu_sched_node_destroy(struct starpu_sched_node *node)
 				child->fathers[i] = NULL;
 
 	}
+	while(node->nchilds != 0)
+		node->remove_child(node, node->childs[0]);
 	node->deinit_data(node);
 	free(node->childs);
 	starpu_bitmap_destroy(node->workers);
@@ -456,30 +452,32 @@ void starpu_sched_node_destroy(struct starpu_sched_node *node)
 	free(node);
 }
 
-
+/* set the node->is_homogeneous member according to node->workers_in_ctx
+ */
 static void set_is_homogeneous(struct starpu_sched_node * node)
 {
+	STARPU_ASSERT(node);
 	STARPU_ASSERT(starpu_bitmap_cardinal(node->workers_in_ctx) > 0);
 	if(starpu_bitmap_cardinal(node->workers_in_ctx) == 1)
 		node->is_homogeneous = 1;
 	int worker = starpu_bitmap_first(node->workers_in_ctx);
-	uint32_t last_worker = _starpu_get_worker_struct(worker)->worker_mask;
-	
-	do
-	{
-		if(last_worker != _starpu_get_worker_struct(worker)->worker_mask)
+	uint32_t first_worker = _starpu_get_worker_struct(worker)->worker_mask;
+
+	for(;
+	    worker != -1;
+	    worker = starpu_bitmap_next(node->workers_in_ctx, worker))		
+		if(first_worker != _starpu_get_worker_struct(worker)->worker_mask)
 		{
 			node->is_homogeneous = 0;
 			return;
 		}
-		last_worker = _starpu_get_worker_struct(worker)->worker_mask;
-		worker = starpu_bitmap_next(node->workers_in_ctx, worker);
-	}while(worker != -1);
 	node->is_homogeneous = 1;
 }
-
+/* recursively set the node->workers member of node's subtree
+ */
 void _starpu_sched_node_update_workers(struct starpu_sched_node * node)
 {
+	STARPU_ASSERT(node);
 	if(starpu_sched_node_is_worker(node))
 		return;
 	starpu_bitmap_unset_all(node->workers);
@@ -492,8 +490,11 @@ void _starpu_sched_node_update_workers(struct starpu_sched_node * node)
 	}
 }
 
+/* recursively set the node->workers_in_ctx in node's subtree
+ */
 void _starpu_sched_node_update_workers_in_ctx(struct starpu_sched_node * node, unsigned sched_ctx_id)
 {
+	STARPU_ASSERT(node);
 	if(starpu_sched_node_is_worker(node))
 		return;
 	struct starpu_bitmap * workers_in_ctx = _starpu_get_worker_mask(sched_ctx_id);
@@ -516,90 +517,12 @@ void _starpu_sched_node_update_workers_in_ctx(struct starpu_sched_node * node, u
 
 void starpu_sched_tree_update_workers_in_ctx(struct starpu_sched_tree * t)
 {
+	STARPU_ASSERT(t);
 	_starpu_sched_node_update_workers_in_ctx(t->root, t->sched_ctx_id);
 }
 
 void starpu_sched_tree_update_workers(struct starpu_sched_tree * t)
 {
+	STARPU_ASSERT(t);
 	_starpu_sched_node_update_workers(t->root);
 }
-
-void _update_worker_bits(struct starpu_sched_node * node, struct starpu_bitmap * workers_in_ctx)
-{
-	if(starpu_sched_node_is_worker(node))
-		return;
-	starpu_bitmap_unset_and(node->workers_in_ctx, node->workers, workers_in_ctx);
-	int i;
-	for(i = 0; i < node->nchilds; i++)
-		_update_worker_bits(node->childs[i], workers_in_ctx);
-}
-
-
-
-void starpu_sched_node_init_rec(struct starpu_sched_node * node)
-{
-	if(starpu_sched_node_is_worker(node))
-		return;
-	int i;
-	for(i = 0; i < node->nchilds; i++)
-		starpu_sched_node_init_rec(node->childs[i]);
-
-	for(i = 0; i < node->nchilds; i++)
-		starpu_bitmap_or(node->workers, node->childs[i]->workers);
-	set_is_homogeneous(node);
-}
-
-
-
-static void _init_add_worker_bit(struct starpu_sched_node * node, int worker)
-{
-	STARPU_ASSERT(node);
-	starpu_bitmap_set(node->workers, worker);
-	int i;
-	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
-		if(node->fathers[i])
-		{
-			_init_add_worker_bit(node->fathers[i], worker);
-			set_is_homogeneous(node->fathers[i]);
-		}
-}
-
-void _starpu_set_workers_bitmaps(void)
-{
-	unsigned worker;	
-	for(worker = 0; worker < starpu_worker_get_count() + starpu_combined_worker_get_count(); worker++)
-	{
-		struct starpu_sched_node * worker_node = starpu_sched_node_worker_get(worker);
-		_init_add_worker_bit(worker_node, worker);
-	}
-}
-
-
-static int push_task_to_first_suitable_parent(struct starpu_sched_node * node, struct starpu_task * task, int sched_ctx_id)
-{
-	if(node == NULL || node->fathers[sched_ctx_id] == NULL)
-		return 1;
-
-	struct starpu_sched_node * father = node->fathers[sched_ctx_id];
-	if(starpu_sched_node_can_execute_task(father,task))
-		return father->push_task(father, task);
-	else
-		return push_task_to_first_suitable_parent(father, task, sched_ctx_id);
-}
-
-
-int starpu_sched_node_push_tasks_to_firsts_suitable_parent(struct starpu_sched_node * node, struct starpu_task_list *list, int sched_ctx_id)
-{
-	while(!starpu_task_list_empty(list))
-	{
-		struct starpu_task * task = starpu_task_list_pop_front(list);
-		int res = push_task_to_first_suitable_parent(node, task, sched_ctx_id);
-		if(res)
-		{
-			starpu_task_list_push_front(list,task);
-			return res;
-		}
-	}
-	return 0;
-}
-

+ 1 - 1
src/sched_policies/node_work_stealing.c

@@ -351,7 +351,7 @@ static void initialize_ws_center_policy(unsigned sched_ctx_id)
 static void deinitialize_ws_center_policy(unsigned sched_ctx_id)
 {
 	struct starpu_sched_tree *t = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	starpu_sched_tree_destroy(t, sched_ctx_id);
+	starpu_sched_tree_destroy(t);
 	starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
 }
 

+ 4 - 4
src/sched_policies/node_worker.c

@@ -397,13 +397,13 @@ void starpu_sched_node_worker_destroy(struct starpu_sched_node *node)
 	_worker_nodes[id] = NULL;
 }
 
-void _starpu_sched_node_block_worker(int workerid)
+void _starpu_sched_node_lock_worker(int workerid)
 {
 	STARPU_ASSERT(0 <= workerid && workerid < (int) starpu_worker_get_count());
 	struct _starpu_worker_node_data * data = starpu_sched_node_worker_create(workerid)->data;
 	STARPU_PTHREAD_MUTEX_LOCK(&data->lock);
 }
-void _starpu_sched_node_unblock_worker(int workerid)
+void _starpu_sched_node_unlock_worker(int workerid)
 {
 	STARPU_ASSERT(0 <= workerid && workerid < (int)starpu_worker_get_count());
 	struct _starpu_worker_node_data * data = starpu_sched_node_worker_create(workerid)->data;
@@ -414,13 +414,13 @@ void _starpu_sched_node_lock_all_workers(void)
 {
 	unsigned i;
 	for(i = 0; i < starpu_worker_get_count(); i++)
-		_starpu_sched_node_block_worker(i);
+		_starpu_sched_node_lock_worker(i);
 }
 void _starpu_sched_node_unlock_all_workers(void)
 {
 	unsigned i;
 	for(i = 0; i < starpu_worker_get_count(); i++)
-		_starpu_sched_node_unblock_worker(i);
+		_starpu_sched_node_unlock_worker(i);
 }
 
 

+ 18 - 2
src/sched_policies/prio_deque.c

@@ -4,10 +4,12 @@
 
 void _starpu_prio_deque_init(struct _starpu_prio_deque * pdeque)
 {
+	STARPU_ASSERT(pdeque);
 	memset(pdeque,0,sizeof(*pdeque));
 }
 void _starpu_prio_deque_destroy(struct _starpu_prio_deque * pdeque)
 {
+	STARPU_ASSERT(pdeque);
 	int i;
 	for(i = 0; i < pdeque->size_array; i++)
 	{
@@ -18,11 +20,17 @@ void _starpu_prio_deque_destroy(struct _starpu_prio_deque * pdeque)
 
 int _starpu_prio_deque_is_empty(struct _starpu_prio_deque * pdeque)
 {
+	STARPU_ASSERT(pdeque);
 	return pdeque->ntasks == 0;
 }
 
+
+/* return the struct starpu_prio_list * of prio,
+ * create it and return it if none exist yet
+ */
 static struct starpu_task_list * get_prio(struct _starpu_prio_deque * pdeque, int prio)
 {
+	STARPU_ASSERT(pdeque);
 	int i;
 	for(i = 0; i < pdeque->size_array; i++)
 	{
@@ -48,6 +56,7 @@ static struct starpu_task_list * get_prio(struct _starpu_prio_deque * pdeque, in
 
 int _starpu_prio_deque_push_task(struct _starpu_prio_deque * pdeque, struct starpu_task * task)
 {
+	STARPU_ASSERT(pdeque && task);
 	struct starpu_task_list * list = get_prio(pdeque, task->priority);
 	starpu_task_list_push_back(list, task);
 	pdeque->ntasks++;
@@ -55,6 +64,9 @@ int _starpu_prio_deque_push_task(struct _starpu_prio_deque * pdeque, struct star
 }
 
 
+
+/* a little dirty code factorization */
+
 static inline int pred_true(struct starpu_task * t STARPU_ATTRIBUTE_UNUSED, void * v STARPU_ATTRIBUTE_UNUSED)
 {
 	return 1;
@@ -69,7 +81,6 @@ static inline int pred_can_execute(struct starpu_task * t, void * pworkerid)
 	return 0;
 }
 
-
 #define REMOVE_TASK(pdeque, first_task_field, next_task_field, predicate, parg)	\
 	{								\
 		int i;							\
@@ -95,16 +106,21 @@ struct starpu_task * _starpu_prio_deque_pop_task(struct _starpu_prio_deque * pde
 }
 struct starpu_task * _starpu_prio_deque_pop_task_for_worker(struct _starpu_prio_deque * pdeque, int workerid)
 {
+	STARPU_ASSERT(pdeque);
+	STARPU_ASSERT(0 <= workerid && (unsigned) workerid < starpu_worker_get_count());
 	REMOVE_TASK(pdeque, head, prev, pred_can_execute, &workerid);
 }
 
-// deque a task of the higher priority available
+/* deque a task of the higher priority available */
 struct starpu_task * _starpu_prio_deque_deque_task(struct _starpu_prio_deque * pdeque)
 {
+	STARPU_ASSERT(pdeque);
 	REMOVE_TASK(pdeque, tail, next, pred_true, STARPU_POISON_PTR);
 }
 
 struct starpu_task * _starpu_prio_deque_deque_task_for_worker(struct _starpu_prio_deque * pdeque, int workerid)
 {
+	STARPU_ASSERT(pdeque);
+	STARPU_ASSERT(0 <= workerid && (unsigned) workerid < starpu_worker_get_count());
 	REMOVE_TASK(pdeque, tail, next, pred_can_execute, &workerid);
 }

+ 17 - 5
src/sched_policies/prio_deque.h

@@ -22,15 +22,27 @@ struct _starpu_prio_deque
 void _starpu_prio_deque_init(struct _starpu_prio_deque *);
 void _starpu_prio_deque_destroy(struct _starpu_prio_deque *);
 
+/* return 0 iff the struct _starpu_prio_deque is not empty */
 int _starpu_prio_deque_is_empty(struct _starpu_prio_deque *);
 
-int _starpu_prio_deque_push_task(struct _starpu_prio_deque *, struct starpu_task*);
+/* push a task in O(nb priorities) */
+int _starpu_prio_deque_push_task(struct _starpu_prio_deque *, struct starpu_task *);
 
-struct starpu_task * _starpu_prio_deque_pop_task(struct _starpu_prio_deque*);
-struct starpu_task * _starpu_prio_deque_pop_task_for_worker(struct _starpu_prio_deque*, int workerid);
 
-// deque a task of the higher priority available
+/* all _starpu_prio_deque_pop/deque_task function return a task or a NULL pointer if none are available
+ * in O(nb priorities)
+ */
+
+struct starpu_task * _starpu_prio_deque_pop_task(struct _starpu_prio_deque *);
+
+/* return a task that can be executed by workerid
+ */
+struct starpu_task * _starpu_prio_deque_pop_task_for_worker(struct _starpu_prio_deque *, int workerid);
+
+/* deque a task of the higher priority available */
 struct starpu_task * _starpu_prio_deque_deque_task(struct _starpu_prio_deque *);
+/* return a task that can be executed by workerid
+ */
 struct starpu_task * _starpu_prio_deque_deque_task_for_worker(struct _starpu_prio_deque *, int workerid);
 
-#endif // __PRIO_DEQUE_H__
+#endif /* __PRIO_DEQUE_H__ */

+ 3 - 2
src/sched_policies/sched_node.h

@@ -3,8 +3,9 @@
 
 void _starpu_sched_node_lock_all_workers(void);
 void _starpu_sched_node_unlock_all_workers(void);
-void _starpu_sched_node_block_worker(int workerid);
-void _starpu_sched_node_unblock_worker(int workerid);
+void _starpu_sched_node_lock_worker(int workerid);
+void _starpu_sched_node_unlock_worker(int workerid);
+
 
 struct _starpu_worker * _starpu_sched_node_worker_get_worker(struct starpu_sched_node *);
 struct _starpu_combined_worker * _starpu_sched_node_combined_worker_get_combined_worker(struct starpu_sched_node * worker_node);