瀏覽代碼

big change :
available workers in ctx is memoized in starpu_sched_node
new function to get expected end
new things in bitmaps
new heft that use the expected end function (dont work yet)

Simon Archipoff 12 年之前
父節點
當前提交
8fc2aa4d01

+ 32 - 46
include/starpu_sched_node.h

@@ -11,7 +11,7 @@
  * structure of them, some parts of scheduler can be shared by several contexes
  * to perform some local optimisations, so, for all nodes, a list of father is
  * defined indexed by sched_ctx_id
- * 
+ *
  * they embed there specialised method in a pseudo object-style, so calls are like node->push_task(node,task)
  *
  */
@@ -38,12 +38,12 @@ struct starpu_sched_node
 	/* this function notify underlying worker that a task as been pushed
 	 * and would be returned by a pop_task call
 	 * it should be called each time a node localy store a task
-	 * 
+
 	 * default implementation simply perform a recursive call on childrens
 	 * this function can be called by a worker as it doesn't try to wake up himself
 	 */
 	void (*available)(struct starpu_sched_node *);
-	
+
 	/* this function is an heuristic that compute load of subtree, basicaly
 	 * it compute
 	 * estimated_load(node) = sum(estimated_load(node_childs)) +
@@ -51,12 +51,7 @@ struct starpu_sched_node
 	 */
 	double (*estimated_load)(struct starpu_sched_node * node);
 
-	/* this function return a struct starpu_task_execute_preds defined lower
-	 * wich basicaly give predictions for a task execution a call on 
-	 * homogeneous (with all workers of the same arch) node is optimised
-	 */
-	struct starpu_task_execute_preds (*estimated_execute_preds)(struct starpu_sched_node * node,
-								     struct starpu_task * task);
+	double (*estimated_end)(struct starpu_sched_node * node);
 	/* the numbers of node's childs
 	 */
 	int nchilds;
@@ -70,6 +65,10 @@ struct starpu_sched_node
 	/* the set of workers in the node's subtree
 	 */
 	struct starpu_bitmap * workers;
+	/* the workers available in context
+	 */
+	struct starpu_bitmap * workers_in_ctx;
+	
 	/* is_homogeneous is 0 iff workers in the node's subtree are heterogeneous,
 	 * this field is set and updated automaticaly, you shouldn't write on it
 	 */
@@ -82,7 +81,7 @@ struct starpu_sched_node
 	 * workers member was filled, can be used to init data, or anything you want
 	 */
 	void (*init_data)(struct starpu_sched_node *);
-	/* this function is called to free data allocated by init_data 
+	/* this function is called to free data allocated by init_data
 	 * just before the call of starpu_sched_node_destroy(node)
 	 */
 	void (*deinit_data)(struct starpu_sched_node *);
@@ -96,32 +95,6 @@ struct starpu_sched_node
 #endif
 };
 
-/* this structure is only returned by estimated_execute_preds and give
- * predictions on task computations
- */
-struct starpu_task_execute_preds
-{
-	/* if several value are possible for state member,
-	 * in order of priority :
-	 * CALIBRATING, PERF_MODEL, NO_PERF_MODEL, CANNOT_EXECUTE
-	 */
-	enum {CANNOT_EXECUTE = 0, CALIBRATING , NO_PERF_MODEL, PERF_MODEL} state;
-
-	/* archtype and nimpl is set to
-	 * best values if state is PERF_MODEL
-	 * values that needs to be calibrated if state is CALIBRATING
-	 * suitable values if NO_PERF_MODEL
-	 * irrevelant if CANNOT_EXECUTE
-	 */
-	enum starpu_perfmodel_archtype archtype;
-	int impl;
-
-	double expected_finish_time;
-	double expected_length;
-	double expected_transfer_length;
-	double expected_power;
-};
-
 
 struct starpu_sched_tree
 {
@@ -136,6 +109,9 @@ struct starpu_sched_tree
 
 
 
+int STARPU_WARN_UNUSED_RESULT starpu_sched_node_execute_preds(struct starpu_sched_node * node, struct starpu_task * task, double * length);
+double starpu_sched_node_transfer_length(struct starpu_sched_node * node, struct starpu_task * task);
+
 struct starpu_sched_node * starpu_sched_node_create(void);
 
 void starpu_sched_node_destroy(struct starpu_sched_node * node);
@@ -166,12 +142,22 @@ int starpu_sched_node_is_work_stealing(struct starpu_sched_node * node);
 struct starpu_sched_node * starpu_sched_node_random_create(void * arg STARPU_ATTRIBUTE_UNUSED);
 int starpu_sched_node_is_random(struct starpu_sched_node *);
 
-struct starpu_sched_node * starpu_sched_node_heft_create(void * arg STARPU_ATTRIBUTE_UNUSED);
-/* this function is called to create the node wich will be used to push task when no perf model are available
- * by default, a random node is created
- */
-void starpu_sched_node_heft_set_no_model_node(struct starpu_sched_node * heft_node,
-					       struct starpu_sched_node * (*create_no_model_node)(void * arg), void * arg);
+
+struct starpu_heft_data
+{
+	double alpha;
+	double beta;
+	double gamma;
+	double idle_power;
+	struct starpu_sched_node * no_perf_model_node;
+	struct starpu_sched_node * calibrating_node;
+};
+
+/* create a node with heft_data paremeters
+   a copy the struct starpu_heft_data * given is performed during the init_data call
+   the heft node doesnt do anything but pushing tasks on no_perf_model_node and calibrating_node
+*/
+struct starpu_sched_node * starpu_sched_node_heft_create(struct starpu_heft_data * heft_data);
 
 int starpu_sched_node_is_heft(struct starpu_sched_node * node);
 
@@ -212,9 +198,6 @@ void starpu_sched_tree_call_init_data(struct starpu_sched_tree * t);
  */
 int starpu_sched_node_push_tasks_to_firsts_suitable_parent(struct starpu_sched_node * node, struct starpu_task_list * list, int sched_ctx_id);
 
-struct starpu_task_execute_preds starpu_sched_node_average_estimated_execute_preds(struct starpu_sched_node * node, struct starpu_task * task);
-
-
 
 struct starpu_bitmap;
 
@@ -227,7 +210,10 @@ void starpu_bitmap_unset_all(struct starpu_bitmap *);
 
 int starpu_bitmap_get(struct starpu_bitmap *, int);
 
-//this is basically compute a |= b;
+/* basicaly compute starpu_bitmap_unset_all(a) ; a = b & c; */
+void starpu_bitmap_unset_and(struct starpu_bitmap * a, struct starpu_bitmap * b, struct starpu_bitmap * c);
+
+/* this is basically compute a |= b;*/
 void starpu_bitmap_or(struct starpu_bitmap * a,
 		       struct starpu_bitmap * b);
 

+ 24 - 0
src/sched_policies/bitmap.c

@@ -20,6 +20,16 @@ struct starpu_bitmap{
 static int check_bitmap(struct starpu_bitmap *b);
 #endif
 
+static int _count_bit(unsigned long e)
+{
+	int c = 0;
+	while(e)
+	{
+		c += e&1;
+		e >>= 1;
+	}
+	return c;
+}
 
 struct starpu_bitmap * starpu_bitmap_create(void)
 {
@@ -66,6 +76,20 @@ void starpu_bitmap_unset_all(struct starpu_bitmap * b)
 	b->size = 0;
 }
 
+void starpu_bitmap_unset_and(struct starpu_bitmap * a, struct starpu_bitmap * b, struct starpu_bitmap * c)
+{
+	int n = STARPU_MIN(b->size, c->size);
+	a->bits = realloc(a->bits, sizeof(unsigned long) * n);
+	a->size = n;
+	a->cardinal = 0;
+	int i;
+	for(i = 0; i < n; i++)
+	{
+		a->bits[i] = b->bits[i] & c->bits[i];
+		a->cardinal += _count_bit(a->bits[i]);
+	}
+}
+
 int starpu_bitmap_get(struct starpu_bitmap * b, int e)
 {
 	if(e / LONG_BIT >= b->size)

+ 0 - 7
src/sched_policies/node_composed.c

@@ -114,12 +114,6 @@ double composed_node_estimated_load(struct starpu_sched_node * node)
 	return c->top->estimated_load(c->top);
 }
 
-struct starpu_task_execute_preds composed_node_estimated_execute_preds(struct starpu_sched_node * node,
-									struct starpu_task * task)
-{
-	struct composed_node * c = node->data;
-	return c->top->estimated_execute_preds(c->top,task);
-}
 
 static void invalid_second_init_data(struct starpu_sched_node * node STARPU_ATTRIBUTE_UNUSED)
 {
@@ -172,7 +166,6 @@ struct starpu_sched_node * starpu_sched_node_composed_node_create(struct _starpu
 	node->pop_task = composed_node_pop_task;
 	node->available = composed_node_available;
 	node->estimated_load = composed_node_estimated_load;
-	node->estimated_execute_preds = composed_node_estimated_execute_preds;
 	node->init_data = composed_node_init_data;
 	node->deinit_data = composed_node_deinit_data;
 

+ 13 - 35
src/sched_policies/node_fifo.c

@@ -10,44 +10,19 @@ struct _starpu_fifo_data
 };
 
 
-static struct starpu_task_execute_preds estimated_execute_preds(struct starpu_sched_node * node,
-								 struct starpu_task * task)
+
+static double fifo_estimated_end(struct starpu_sched_node * node)
 {
 	struct _starpu_fifo_data * data = node->data;
 	struct _starpu_prio_deque * fifo = &data->fifo;
 	starpu_pthread_mutex_t * mutex = &data->mutex;
-	if(node->nchilds == 0)
-	{
-		struct starpu_task_execute_preds p = { CANNOT_EXECUTE };
-		return p;
-	}
-	
-	if(!node->is_homogeneous)
-	{
-		struct starpu_task_execute_preds preds = starpu_sched_node_average_estimated_execute_preds(node, task);
-		STARPU_PTHREAD_MUTEX_LOCK(mutex);
-		double fifo_len = fifo->exp_len / starpu_bitmap_cardinal(node->workers);
-		preds.expected_finish_time = starpu_sched_compute_expected_time(fifo->exp_start,
-									   preds.expected_finish_time + fifo_len,
-									   preds.state == PERF_MODEL ? preds.expected_length + fifo_len : fifo_len,
-									   preds.expected_transfer_length);
-		STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
-		return preds;
-	}
-	
-	struct starpu_task_execute_preds preds = node->childs[0]->estimated_execute_preds(node->childs[0],task);
+	int card = starpu_bitmap_cardinal(node->workers_in_ctx);
 
-	if(preds.state == PERF_MODEL)
-	{
-		double fifo_len = fifo->exp_len / starpu_bitmap_cardinal(node->workers);
-		STARPU_PTHREAD_MUTEX_LOCK(mutex);
-		preds.expected_finish_time = starpu_sched_compute_expected_time(fifo->exp_start,
-									   preds.expected_finish_time + fifo_len,
-									   preds.expected_length + fifo_len,
-									   preds.expected_transfer_length);
-		STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
-	}
-	return preds;
+	STARPU_PTHREAD_MUTEX_LOCK(mutex);
+	double estimated_end = fifo->exp_start + fifo->exp_len / card;
+	STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
+
+	return estimated_end;
 }
 
 static double estimated_load(struct starpu_sched_node * node)
@@ -127,9 +102,12 @@ static struct starpu_task * pop_task(struct starpu_sched_node * node, unsigned s
 		_starpu_prio_deque_pop_task_for_worker(fifo, starpu_worker_get_id());
 	if(task)
 	{
-		fifo->exp_start = starpu_timing_now();
+
 		if(!isnan(task->predicted))
+		{
+			fifo->exp_start = starpu_timing_now() + task->predicted;
 			fifo->exp_len -= task->predicted;
+		}
 		fifo->exp_end = fifo->exp_start + fifo->exp_len;
 		if(fifo->ntasks == 0)
 			fifo->exp_len = 0.0;
@@ -193,7 +171,7 @@ int starpu_sched_node_is_fifo(struct starpu_sched_node * node)
 struct starpu_sched_node * starpu_sched_node_fifo_create(void * arg STARPU_ATTRIBUTE_UNUSED)
 {
 	struct starpu_sched_node * node = starpu_sched_node_create();
-	node->estimated_execute_preds = estimated_execute_preds;
+	node->estimated_end = fifo_estimated_end;
 	node->estimated_load = estimated_load;
 	node->init_data = init_fifo_data;
 	node->deinit_data = deinit_fifo_data;

+ 68 - 151
src/sched_policies/node_heft.c

@@ -5,147 +5,73 @@
 #include <float.h>
 
 
-struct _starpu_dmda_data
-{
-	double alpha;
-	double beta;
-	double gamma;
-	double idle_power;
-	struct starpu_sched_node * no_model_node;
-};
 
-static double compute_fitness_calibration(struct starpu_sched_node * child,
-					  struct _starpu_dmda_data * data STARPU_ATTRIBUTE_UNUSED,
-					  struct starpu_task_execute_preds *pred,
-					  double best_exp_end STARPU_ATTRIBUTE_UNUSED,
-					  double max_exp_end STARPU_ATTRIBUTE_UNUSED)
+static double compute_fitness(struct starpu_heft_data * d, double exp_end, double best_exp_end, double max_exp_end, double transfer_len, double local_power)
 {
-	if(pred->state == CALIBRATING)
-		return child->estimated_load(child);
-	return DBL_MAX;
-}
-
-static double compute_fitness_perf_model(struct starpu_sched_node * child STARPU_ATTRIBUTE_UNUSED,
-					 struct _starpu_dmda_data * data,
-					 struct starpu_task_execute_preds * preds,
-					 double best_exp_end,
-					 double max_exp_end)
-{
-	double fitness;
-	switch(preds->state)
-	{
-	case CANNOT_EXECUTE:
-	case NO_PERF_MODEL:
-		return DBL_MAX;
-	case PERF_MODEL:
-		fitness = data->alpha * (preds->expected_finish_time - best_exp_end)
-			+ data->beta  * preds->expected_transfer_length
-			+ data->gamma * preds->expected_power
-			+ data->gamma * data->idle_power * (max_exp_end - best_exp_end) / 1000000.0;
-		return fitness;
-	case CALIBRATING:
-		STARPU_ASSERT_MSG(0,"we should have calibrate this task");
-	default:
-		STARPU_ABORT();
-		break;
-	}
+	return d->alpha * (exp_end - best_exp_end)
+		+ d->beta * transfer_len
+		+ d->gamma * local_power
+		+ d->gamma * d->idle_power * (exp_end - max_exp_end);
 }
 
 static int push_task(struct starpu_sched_node * node, struct starpu_task * task)
 {
-	struct starpu_task_execute_preds preds[node->nchilds];
+	struct starpu_heft_data * d = node->data;	
+	struct starpu_sched_node * best_node = NULL;
+	double estimated_ends[node->nchilds];
+	double estimated_ends_with_task[node->nchilds];
+	double best_exp_end_with_task = DBL_MAX;
+	double max_exp_end_with_task = 0.0;
+	double estimated_lengths[node->nchilds];
+	double estimated_transfer_length[node->nchilds];
+	int suitable_nodes[node->nchilds];
+	int nsuitable_nodes = 0;
+	double now = starpu_timing_now();
 	int i;
-	int calibrating = 0;
-	int perf_model = 0;
-	int can_execute = 0;
-	double best_exp_end = DBL_MAX;
-	double max_exp_end = DBL_MIN;
 	for(i = 0; i < node->nchilds; i++)
 	{
-		preds[i] = node->childs[i]->estimated_execute_preds(node->childs[i], task);
-		switch(preds[i].state)
+		struct starpu_sched_node * c = node->childs[i];
+		if(starpu_sched_node_execute_preds(c, task, estimated_lengths + i))
 		{
-		case PERF_MODEL:
-			STARPU_ASSERT(!isnan(preds[i].expected_finish_time));
-			perf_model = 1;
-			can_execute = 1;
- 			if(preds[i].expected_finish_time < best_exp_end)
-				best_exp_end = preds[i].expected_finish_time;
-			else if(preds[i].expected_finish_time > max_exp_end)
-				max_exp_end = preds[i].expected_finish_time;
-			break;
-		case CALIBRATING:
-			calibrating = 1;
-			can_execute = 1;
-			break;
-		case NO_PERF_MODEL:
-			can_execute = 1;
-			break;
-		case CANNOT_EXECUTE:
-			break;
+			if(isnan(estimated_lengths[i]))
+				return d->calibrating_node->push_task(d->calibrating_node, task);
+			if(_STARPU_IS_ZERO(estimated_lengths[i]))
+				return d->no_perf_model_node->push_task(d->no_perf_model_node, task);
+			estimated_transfer_length[i] = starpu_sched_node_transfer_length(c, task);
+			estimated_ends[i] = c->estimated_end(c);
+			estimated_ends_with_task[i] = starpu_sched_compute_expected_time(now,
+											 estimated_ends[i],
+											 estimated_lengths[i],
+											 estimated_transfer_length[i]);
+			if(estimated_ends_with_task[i] < best_exp_end_with_task)
+				best_exp_end_with_task = estimated_ends_with_task[i];
+			if(estimated_ends_with_task[i] > max_exp_end_with_task)
+				max_exp_end_with_task = estimated_ends_with_task[i];
+			suitable_nodes[nsuitable_nodes++] = i;
 		}
 	}
-	if(!can_execute)
-	{
-		return -ENODEV;
-	}
-
-	struct _starpu_dmda_data * data = node->data;
-
-	if(!calibrating && !perf_model)
-	{
-		int ret = data->no_model_node->push_task(data->no_model_node, task);
-		return ret;
-	}
-
-	double (*fitness_fun)(struct starpu_sched_node *,
-			      struct _starpu_dmda_data *,
-			      struct starpu_task_execute_preds*,
-			      double,
-			      double) = compute_fitness_perf_model;
-
-	if(calibrating)
-		fitness_fun = compute_fitness_calibration;
-
-
-
 	double best_fitness = DBL_MAX;
-	int index_best_fitness = -1;
-	for(i = 0; i < node->nchilds; i++)
+	int best_inode = -1;
+	for(i = 0; i < nsuitable_nodes; i++)
 	{
-		double tmp = fitness_fun(node->childs[i],
-					 node->data,
-					 preds + i,
-					 best_exp_end,
-					 max_exp_end);
-//		fprintf(stderr,"fitness for worker %d is %f\n",i,tmp == DBL_MAX ? -1 : tmp);
-		if(tmp < best_fitness)
+		int inode = suitable_nodes[i];
+		double tmp = compute_fitness(d,
+					     estimated_ends_with_task[inode],
+					     best_exp_end_with_task,
+					     max_exp_end_with_task,
+					     estimated_transfer_length[inode],
+					     0.0);
+		if(best_fitness > tmp)
 		{
 			best_fitness = tmp;
-			index_best_fitness = i;
+			best_inode = inode;
 		}
 	}
-//	fprintf(stderr,"push on worker %d\n",index_best_fitness);
-	STARPU_ASSERT(best_fitness != DBL_MAX);
-
-	struct starpu_sched_node * c = node->childs[index_best_fitness];
-	starpu_task_set_implementation(task, preds[index_best_fitness].impl);
-	task->predicted = preds[index_best_fitness].expected_length;
-	task->predicted_transfer = preds[index_best_fitness].expected_transfer_length;
-	return c->push_task(c, task);
-}
-/*
-static void update_helper_node(struct starpu_sched_node * heft_node)
-{
-	struct _starpu_dmda_data * data = heft_node->data;
-	struct starpu_sched_node * node = data->no_model_node;
-	node->nchilds = heft_node->nchilds;
-	node->childs = realloc(node->childs, sizeof(struct starpu_sched_node *) * node->nchilds);
-	memcpy(node->childs, heft_node->childs, sizeof(struct starpu_sched_node*) * node->nchilds);
-	node->nworkers = heft_node->nworkers;
-	memcpy(node->workerids, heft_node->workerids, sizeof(int) * node->nworkers);
+	fprintf(stderr,"%d best inode\n",best_inode);
+	best_node = node->childs[best_inode];
+	return best_node->push_task(best_node, task);
 }
-*/
+
 
 
 #define _STARPU_SCHED_ALPHA_DEFAULT 1.0
@@ -212,9 +138,9 @@ void init_heft_data(struct starpu_sched_node *node)
 					    idle_power_minimum, idle_power_maximum, param_modified);
 #endif /* !STARPU_USE_TOP */
 
-
-	struct _starpu_dmda_data * data = malloc(sizeof(*data));
-	memset(data, 0, sizeof(*data));
+	struct starpu_heft_data * old = node->data;
+	struct starpu_heft_data * data = malloc(sizeof(*data));
+	*data = *old;
 	data->alpha = alpha;
 	data->beta = beta;
 	data->gamma = _gamma;
@@ -222,45 +148,23 @@ void init_heft_data(struct starpu_sched_node *node)
 
 	node->data = data;
 
-	starpu_sched_node_heft_set_no_model_node(node, starpu_sched_node_random_create,NULL);
 }
 
-static void destroy_no_model_node(struct starpu_sched_node * heft_node)
-{
-	struct _starpu_dmda_data * data = heft_node->data;
-	if(data->no_model_node)
-	{
-		starpu_sched_node_destroy(data->no_model_node);
-	}
-}
 
 void deinit_heft_data(struct starpu_sched_node * node)
 {
-	destroy_no_model_node(node);
 	free(node->data);
 }
 
-void starpu_sched_node_heft_set_no_model_node(struct starpu_sched_node * heft_node,
-					       struct starpu_sched_node * (*create_no_model_node)(void *),void * arg)
-{
-	destroy_no_model_node(heft_node);
-	struct _starpu_dmda_data * data = heft_node->data;
-	struct starpu_sched_node * no_model_node = create_no_model_node(arg);
-	no_model_node->childs = malloc(heft_node->nchilds * sizeof(struct starpu_sched_node *));
-	memcpy(no_model_node->childs, heft_node->childs, heft_node->nchilds * sizeof(struct _strapu_sched_node *));
-
-	no_model_node->nchilds = heft_node->nchilds;
-	no_model_node->init_data(no_model_node);
-	data->no_model_node = no_model_node;
-}
 
-struct starpu_sched_node * starpu_sched_node_heft_create(void * arg STARPU_ATTRIBUTE_UNUSED)
+struct starpu_sched_node * starpu_sched_node_heft_create(struct starpu_heft_data * data)
 {
 	struct starpu_sched_node * node = starpu_sched_node_create();
 
 	node->push_task = push_task;
 	node->init_data = init_heft_data;
 	node->deinit_data = deinit_heft_data;
+	node->data = data;
 
 	return node;
 }
@@ -277,7 +181,17 @@ static void initialize_heft_center_policy(unsigned sched_ctx_id)
 	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 	
 	struct starpu_sched_tree * t = starpu_sched_tree_create();
-	t->root = starpu_sched_node_heft_create(NULL);
+	struct starpu_sched_node * random = starpu_sched_node_random_create(NULL);
+	struct starpu_heft_data data =
+		{
+			.alpha = 1.0,
+			.beta = 1.0,
+			.gamma = 1.0,
+			.idle_power = 200,
+			random,
+			random
+		};
+	t->root = starpu_sched_node_heft_create(&data);
 	
 	unsigned i;
 	for(i = 0; i < starpu_worker_get_count() + starpu_combined_worker_get_count(); i++)
@@ -289,13 +203,16 @@ static void initialize_heft_center_policy(unsigned sched_ctx_id)
 		starpu_sched_node_add_child(fifo_node, worker_node);
 		starpu_sched_node_set_father(worker_node, fifo_node, sched_ctx_id);
 */
+		
 		starpu_sched_node_add_child(t->root, worker_node);
+		starpu_sched_node_add_child(random, worker_node);
 		starpu_sched_node_set_father(worker_node, t->root, sched_ctx_id);
 	}
 	
 	_starpu_set_workers_bitmaps();
 	starpu_sched_tree_call_init_data(t);
-
+	starpu_bitmap_destroy(random->workers_in_ctx);
+	random->workers_in_ctx = t->root->workers_in_ctx;
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t);
 }
 

+ 10 - 0
src/sched_policies/node_random.c

@@ -84,11 +84,20 @@ static int push_task(struct starpu_sched_node * node, struct starpu_task * task)
 }
 
 
+double random_estimated_end(struct starpu_sched_node * node)
+{
+	double sum = 0.0;
+	int i;
+	for(i = 0; i < node->nchilds; i++)
+		sum += node->childs[i]->estimated_end(node->childs[i]);
+	return sum / node->nchilds;
+}
 struct starpu_sched_node * starpu_sched_node_random_create(void * arg STARPU_ATTRIBUTE_UNUSED)
 {
 	struct starpu_sched_node * node = starpu_sched_node_create();
 	node->data = NULL;
 	node->init_data = init_data_random;
+	node->estimated_end = random_estimated_end;
 	node->deinit_data = deinit_data_random;
 	node->push_task = push_task;
 	starpu_srand48(time(NULL));
@@ -100,6 +109,7 @@ int starpu_sched_node_is_random(struct starpu_sched_node *node)
 	return node->init_data == init_data_random;
 }
 
+
 static void initialize_random_center_policy(unsigned sched_ctx_id)
 {
 	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);

+ 104 - 41
src/sched_policies/node_sched.c

@@ -2,6 +2,8 @@
 #include <core/workers.h>
 #include <starpu_sched_node.h>
 #include <starpu_thread_util.h>
+#include <float.h>
+
 double starpu_sched_compute_expected_time(double now, double predicted_end, double predicted_length, double predicted_transfer)
 {
 
@@ -76,6 +78,17 @@ int push_task(struct starpu_task * task)
 	return ret;
 }
 
+void _update_worker_bits(struct starpu_sched_node * node, struct starpu_bitmap * workers_in_ctx)
+{
+	if(starpu_sched_node_is_worker(node))
+		return;
+	starpu_bitmap_unset_and(node->workers_in_ctx, node->workers, workers_in_ctx);
+	int i;
+	for(i = 0; i < node->nchilds; i++)
+		_update_worker_bits(node->childs[i], workers_in_ctx);
+}
+
+
 void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
 {
 	struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id);
@@ -83,6 +96,7 @@ void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsign
 	unsigned i;
 	for(i = 0; i < nworkers; i++)
 		starpu_bitmap_set(t->workers, workerids[i]);
+	_update_worker_bits(t->root, t->workers);
 	STARPU_PTHREAD_RWLOCK_UNLOCK(&t->lock);
 }
 
@@ -93,6 +107,7 @@ void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, uns
 	unsigned i;
 	for(i = 0; i < nworkers; i++)
 		starpu_bitmap_unset(t->workers, workerids[i]);
+	_update_worker_bits(t->root, t->workers);
 	STARPU_PTHREAD_RWLOCK_UNLOCK(&t->lock);
 }
 
@@ -237,51 +252,96 @@ static double estimated_load(struct starpu_sched_node * node)
 }
 
 
-struct starpu_task_execute_preds starpu_sched_node_average_estimated_execute_preds(struct starpu_sched_node * node, struct starpu_task * task)
+static double _starpu_sched_node_estimated_end_min(struct starpu_sched_node * node)
 {
-	if(node->is_homogeneous)
-		return node->childs[0]->estimated_execute_preds(node->childs[0], task);
-	struct starpu_task_execute_preds pred =
-		{ 
-			.state = CANNOT_EXECUTE,
-			.expected_length = 0.0,
-			.expected_finish_time = 0.0,
-			.expected_transfer_length = 0.0,
-			.expected_power = 0.0
-			
-		};
-	int nb = 0;
+	double min = DBL_MAX;
 	int i;
 	for(i = 0; i < node->nchilds; i++)
 	{
-		struct starpu_task_execute_preds tmp = node->childs[i]->estimated_execute_preds(node->childs[i], task);
-		switch(tmp.state)
+		double tmp = node->childs[i]->estimated_end(node->childs[i]);
+		if(tmp < min)
+			min = tmp;
+	}
+	return min;
+}
+
+int STARPU_WARN_UNUSED_RESULT starpu_sched_node_execute_preds(struct starpu_sched_node * node, struct starpu_task * task, double * length)
+{
+	int can_execute = 0;
+	starpu_task_bundle_t bundle = task->bundle;
+	double len = DBL_MAX;
+	
+
+	int workerid;
+	for(workerid = starpu_bitmap_first(node->workers_in_ctx);
+	    workerid != -1;
+	    workerid = starpu_bitmap_next(node->workers_in_ctx, workerid))
+	{
+		enum starpu_perfmodel_archtype archtype = starpu_worker_get_perf_archtype(workerid);
+		int nimpl;
+		for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 		{
-		case CALIBRATING:
-			return tmp;
-			break;
-		case NO_PERF_MODEL:
-			if(pred.state == CANNOT_EXECUTE)
-				pred = tmp;
-			break;
-		case PERF_MODEL:
-			nb++;
-			pred.expected_length += tmp.expected_length;
-			pred.expected_finish_time += tmp.expected_finish_time;
-			pred.expected_transfer_length += tmp.expected_transfer_length;
-			pred.expected_power += tmp.expected_power;
-			pred.state = PERF_MODEL;
-			break;
-		case CANNOT_EXECUTE:
+			if(starpu_worker_can_execute_task(workerid,task,nimpl)
+			   || starpu_combined_worker_can_execute_task(workerid, task, nimpl))
+			{
+				double d;
+				can_execute = 1;
+				if(bundle)
+					d = starpu_task_bundle_expected_length(bundle, archtype, nimpl);
+				else
+					d = starpu_task_expected_length(task, archtype, nimpl);
+				if(isnan(d))
+				{
+					*length = d;
+					return can_execute;
+						
+				}
+				if(_STARPU_IS_ZERO(d) && !can_execute)
+				{
+					can_execute = 1;
+					continue;
+				}
+				if(d < len)
+				{
+					len = d;
+				}
+			}
+		}
+		if(node->is_homogeneous)
 			break;
+	}
+
+	if(len == DBL_MAX) /* we dont have perf model */
+		len = 0.0; 
+	if(length)
+		*length = len;
+	return can_execute;
+}
+
+double starpu_sched_node_transfer_length(struct starpu_sched_node * node, struct starpu_task * task)
+{
+	int nworkers = starpu_bitmap_cardinal(node->workers_in_ctx);
+	double sum = 0.0;
+	int worker;
+	for(worker = starpu_bitmap_first(node->workers_in_ctx);
+	    worker != -1;
+	    worker = starpu_bitmap_next(node->workers_in_ctx, worker))
+	{
+		unsigned memory_node  = starpu_worker_get_memory_node(worker);
+		if(task->bundle)
+		{
+			sum += starpu_task_bundle_expected_data_transfer_time(task->bundle,memory_node);
+		}
+		else
+		{
+			sum += starpu_task_expected_data_transfer_time(memory_node, task);
+			//sum += starpu_task_expected_conversion_time(task, starpu_worker_get_perf_archtype(worker), impl ?)
 		}
 	}
-	pred.expected_length /= nb;
-	pred.expected_finish_time /= nb;
-	pred.expected_transfer_length /= nb;
-	pred.expected_power /= nb;
-	return pred;
+	return sum / nworkers;
 }
+
+
 /*
 static double estimated_transfer_length(struct starpu_sched_node * node, struct starpu_task * task)
 {
@@ -329,13 +389,13 @@ struct starpu_sched_node * starpu_sched_node_create(void)
 	struct starpu_sched_node * node = malloc(sizeof(*node));
 	memset(node,0,sizeof(*node));
 	node->workers = starpu_bitmap_create();
+	node->workers_in_ctx = starpu_bitmap_create();
 	node->available = available;
 	node->init_data = take_node_and_does_nothing;
 	node->deinit_data = take_node_and_does_nothing;
 	node->pop_task = pop_task_node;
 	node->estimated_load = estimated_load;
-	node->estimated_execute_preds = starpu_sched_node_average_estimated_execute_preds;
-
+	node->estimated_end = _starpu_sched_node_estimated_end_min;
 	return node;
 }
 void starpu_sched_node_destroy(struct starpu_sched_node *node)
@@ -379,7 +439,10 @@ static void set_is_homogeneous(struct starpu_sched_node * node)
 }
 
 
-static void add_worker_bit(struct starpu_sched_node * node, int worker)
+
+
+
+static void _init_add_worker_bit(struct starpu_sched_node * node, int worker)
 {
 	STARPU_ASSERT(node);
 	starpu_bitmap_set(node->workers, worker);
@@ -387,7 +450,7 @@ static void add_worker_bit(struct starpu_sched_node * node, int worker)
 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
 		if(node->fathers[i])
 		{
-			add_worker_bit(node->fathers[i], worker);
+			_init_add_worker_bit(node->fathers[i], worker);
 			set_is_homogeneous(node->fathers[i]);
 		}
 }
@@ -398,7 +461,7 @@ void _starpu_set_workers_bitmaps(void)
 	for(worker = 0; worker < starpu_worker_get_count() + starpu_combined_worker_get_count(); worker++)
 	{
 		struct starpu_sched_node * worker_node = starpu_sched_node_worker_get(worker);
-		add_worker_bit(worker_node, worker);
+		_init_add_worker_bit(worker_node, worker);
 	}
 }
 

+ 5 - 78
src/sched_policies/node_worker.c

@@ -417,7 +417,7 @@ static double worker_estimated_finish_time(struct _starpu_worker * worker)
 	return sum + starpu_timing_now();
 }
 
-static double combined_worker_expected_finish_time(struct starpu_sched_node * node)
+static double combined_worker_estimated_end(struct starpu_sched_node * node)
 {
 	STARPU_ASSERT(starpu_sched_node_is_combined_worker(node));
 	struct _starpu_worker_node_data * data = node->data;
@@ -434,7 +434,7 @@ static double combined_worker_expected_finish_time(struct starpu_sched_node * no
 	}
 	return max;
 }
-static double simple_worker_expected_finish_time(struct starpu_sched_node * node)
+static double simple_worker_estimated_end(struct starpu_sched_node * node)
 {
 	struct _starpu_worker_node_data * data = node->data;
 	STARPU_PTHREAD_MUTEX_LOCK(&data->list->mutex);
@@ -443,80 +443,6 @@ static double simple_worker_expected_finish_time(struct starpu_sched_node * node
 	return tmp;
 }
 
-static struct starpu_task_execute_preds estimated_execute_preds(struct starpu_sched_node * node, struct starpu_task * task,
-								 double (*estimated_finish_time)(struct starpu_sched_node*))
-{
-	STARPU_ASSERT(starpu_sched_node_is_worker(node));
-	starpu_task_bundle_t bundle = task->bundle;
-	int workerid = starpu_sched_node_worker_get_workerid(node);
-
-	struct starpu_task_execute_preds preds =
-		{
-			.state = CANNOT_EXECUTE,
-			.archtype = starpu_sched_node_worker_get_perf_arch(node),
-			.expected_length = DBL_MAX,
-			.expected_finish_time = estimated_finish_time(node),
-			.expected_transfer_length = estimated_transfer_length(node, task),
-			.expected_power = 0.0
-		};
-
-	int nimpl;
-	for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
-	{
-		if(starpu_worker_can_execute_task(workerid,task,nimpl) || starpu_combined_worker_can_execute_task(workerid, task, nimpl))
-		{
-			double d;
-			if(bundle)
-				d = starpu_task_bundle_expected_length(bundle, preds.archtype, nimpl);
-			else
-				d = starpu_task_expected_length(task, preds.archtype, nimpl);
-			if(isnan(d))
-			{
-				preds.state = CALIBRATING;
-				preds.expected_length = d;
-				preds.impl = nimpl;
-				return preds;
-			}
-			if(_STARPU_IS_ZERO(d) && preds.state == CANNOT_EXECUTE)
-			{
-				preds.state = NO_PERF_MODEL;
-				preds.impl = nimpl;
-				continue;
-			}
-			if(d < preds.expected_length)
-			{
-				preds.state = PERF_MODEL;
-				preds.expected_length = d;
-				preds.impl = nimpl;
-			}
-		}
-	}
-
-	if(preds.state == PERF_MODEL)
-	{
-		preds.expected_finish_time = starpu_sched_compute_expected_time(starpu_timing_now(),
-									  preds.expected_finish_time,
-									  preds.expected_length,
-									  preds.expected_transfer_length);
-
-		if(bundle)
-			preds.expected_power = starpu_task_bundle_expected_power(bundle, preds.archtype, preds.impl);
-		else
-			preds.expected_power = starpu_task_expected_power(task, preds.archtype, preds.impl);
-	}
-
-	return preds;
-}
-
-static struct starpu_task_execute_preds combined_worker_estimated_execute_preds(struct starpu_sched_node * node, struct starpu_task * task)
-{
-	return estimated_execute_preds(node,task,combined_worker_expected_finish_time);
-}
-
-static struct starpu_task_execute_preds simple_worker_estimated_execute_preds(struct starpu_sched_node * node, struct starpu_task * task)
-{
-	return estimated_execute_preds(node,task,simple_worker_expected_finish_time);
-}
 
 
 static double simple_worker_estimated_load(struct starpu_sched_node * node)
@@ -656,12 +582,13 @@ static struct starpu_sched_node * starpu_sched_node_worker_create(int workerid)
 	node->data = data;
 	node->push_task = starpu_sched_node_worker_push_task;
 	node->pop_task = starpu_sched_node_worker_pop_task;
-	node->estimated_execute_preds = simple_worker_estimated_execute_preds;
+	node->estimated_end = simple_worker_estimated_end;
 	node->estimated_load = simple_worker_estimated_load;
 	node->available = simple_worker_available;
 	node->deinit_data = worker_deinit_data;
 	node->workers = starpu_bitmap_create();
 	starpu_bitmap_set(node->workers, workerid);
+	starpu_bitmap_or(node->workers_in_ctx, node->workers);
 	_worker_nodes[workerid] = node;
 
 #ifdef STARPU_HAVE_HWLOC
@@ -694,7 +621,7 @@ static struct starpu_sched_node  * starpu_sched_node_combined_worker_create(int
 	node->data = data;
 	node->push_task = starpu_sched_node_combined_worker_push_task;
 	node->pop_task = NULL;
-	node->estimated_execute_preds = combined_worker_estimated_execute_preds;
+	node->estimated_end = combined_worker_estimated_end;
 	node->estimated_load = combined_worker_estimated_load;
 	node->available = combined_worker_available;
 	node->deinit_data = worker_deinit_data;