Pārlūkot izejas kodu

less bugs
new nodes for setting best implementation and calibration
initialization problem do fix

Simon Archipoff 12 gadi atpakaļ
vecāks
revīzija
19b7332298

+ 12 - 1
include/starpu_sched_node.h

@@ -165,6 +165,15 @@ int starpu_sched_node_is_heft(struct starpu_sched_node * node);
  */
 double starpu_sched_compute_expected_time(double now, double predicted_end, double predicted_length, double predicted_transfer);
 
+/* this node select the best implementation for the first worker in context that can execute task.
+ * and fill task->predicted and task->predicted_transfer
+ * cannot have several childs if push_task is called
+ */
+struct starpu_sched_node * starpu_sched_node_best_implementation_create(void * arg STARPU_ATTRIBUTE_UNUSED);
+/* this node select an implementation that need to be calibrated.
+ * cannot have several childs if push_task is called.
+ */
+struct starpu_sched_node * starpu_sched_node_calibration_create(void * arg STARPU_ATTRIBUTE_UNUSED);
 /*create an empty tree
  */
 struct starpu_sched_tree * starpu_sched_tree_create(void);
@@ -175,7 +184,6 @@ void starpu_sched_tree_destroy(struct starpu_sched_tree * tree, unsigned sched_c
  */
 void starpu_sched_node_destroy_rec(struct starpu_sched_node * node, unsigned sched_ctx_id);
 
-
 int starpu_sched_tree_push_task(struct starpu_task * task);
 struct starpu_task * starpu_sched_tree_pop_task(unsigned sched_ctx_id);
 void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
@@ -187,6 +195,9 @@ void starpu_sched_node_worker_post_exec_hook(struct starpu_task * task);
  */
 struct starpu_bitmap * _starpu_get_worker_mask(struct starpu_task * task);
 
+/* this function is called to initialize a scheduler tree
+ */
+void starpu_sched_node_init_rec(struct starpu_sched_node * node);
 /* this function fill all the node->workers members
  */
 void _starpu_set_workers_bitmaps(void);

+ 3 - 1
src/Makefile.am

@@ -1,3 +1,4 @@
+
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 # Copyright (C) 2009-2013  Université de Bordeaux 1
@@ -249,7 +250,8 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 		\
 	sched_policies/bitmap.c					\
 	sched_policies/node_random.c				\
 	sched_policies/node_heft.c				\
-	sched_policies/node_composed.c
+	sched_policies/node_composed.c				\
+	sched_policies/node_best_implementation.c
 #	sched_policies/hierarchical_heft.c			\
 	sched_policies/scheduler_maker.c			
 

+ 5 - 1
src/sched_policies/bitmap.c

@@ -104,7 +104,8 @@ void starpu_bitmap_or(struct starpu_bitmap * a, struct starpu_bitmap * b)
 	if(a->size < b->size)
 	{
 		a->bits = realloc(a->bits, b->size * sizeof(unsigned long));
-		memset(a->bits + a->size, 0, (b->size - a->size) * sizeof(unsigned long));		a->size = b->size;
+		memset(a->bits + a->size, 0, (b->size - a->size) * sizeof(unsigned long));
+		a->size = b->size;
 
 	}
 	int i;
@@ -112,6 +113,9 @@ void starpu_bitmap_or(struct starpu_bitmap * a, struct starpu_bitmap * b)
 	{
 		a->bits[i] |= b->bits[i];
 	}
+	a->cardinal = 0;
+	for(i = 0; i < a->size; i++)
+		a->cardinal += _count_bit(a->bits[i]);
 }
 
 

+ 101 - 0
src/sched_policies/node_best_implementation.c

@@ -0,0 +1,101 @@
+#include <starpu_sched_node.h>
+#include <starpu_scheduler.h>
+#include <float.h>
+/* set implementation, task->predicted and task->predicted_transfer with the first worker of workers that can execute that task
+ * or have to be calibrated
+ */
+static void select_best_implementation_and_set_preds(struct starpu_bitmap * workers, struct starpu_task * task, int calibrating)
+{
+	int best_impl = -1;
+	double len = DBL_MAX;
+	int workerid;
+	for(workerid = starpu_bitmap_first(workers);
+	    -1 != workerid;
+	    workerid = starpu_bitmap_next(workers, workerid))
+	{
+		int impl;
+		for(impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++)
+		{
+			if(starpu_worker_can_execute_task(workerid, task, impl))
+			{
+				enum starpu_perfmodel_archtype archtype = starpu_worker_get_perf_archtype(workerid);
+				double d = starpu_task_expected_length(task, archtype, impl);
+				if(calibrating && isnan(d))
+				{
+					best_impl = impl;
+					len = 0.0;
+					break;
+				}
+				if(d < len)
+				{
+					len = d;
+					best_impl = impl;
+				}
+			}
+		}
+		if(best_impl != -1)
+			break;
+	}
+	int memory_node = starpu_worker_get_memory_node(workerid);
+	task->predicted = len;
+	task->predicted_transfer = starpu_task_expected_data_transfer_time(memory_node, task);
+	starpu_task_set_implementation(task, best_impl);
+}
+
+
+
+
+static int select_best_implementation_push_task(struct starpu_sched_node * node, struct starpu_task * task)
+{
+	STARPU_ASSERT(node->nchilds == 1);
+	select_best_implementation_and_set_preds(node->workers_in_ctx, task, 0);
+	return node->childs[0]->push_task(node->childs[0],task);
+}
+
+static struct starpu_task * select_best_implementation_pop_task(struct starpu_sched_node * node, unsigned sched_ctx_id)
+{
+	struct starpu_task * t;
+	if(!node->fathers[sched_ctx_id])
+		return NULL;
+	t = node->fathers[sched_ctx_id]->pop_task(node->fathers[sched_ctx_id], sched_ctx_id);
+	if(t)
+		select_best_implementation_and_set_preds(node->workers_in_ctx, t,0);
+	return t;
+}
+
+
+
+static int select_calibration_push_task(struct starpu_sched_node * node, struct starpu_task * task)
+{
+	STARPU_ASSERT(node->nchilds == 1);
+	select_best_implementation_and_set_preds(node->workers_in_ctx, task, 1);
+	return node->childs[0]->push_task(node->childs[0],task);
+}
+
+static struct starpu_task * select_calibration_pop_task(struct starpu_sched_node * node, unsigned sched_ctx_id)
+{
+	struct starpu_task * t;
+	if(!node->fathers[sched_ctx_id])
+		return NULL;
+	t = node->fathers[sched_ctx_id]->pop_task(node->fathers[sched_ctx_id], sched_ctx_id);
+	if(t)
+		select_best_implementation_and_set_preds(node->workers_in_ctx, t, 1);
+	return t;
+}
+
+struct starpu_sched_node * starpu_sched_node_best_implementation_create(void * ARG STARPU_ATTRIBUTE_UNUSED)
+{
+	struct starpu_sched_node * node = starpu_sched_node_create();
+	node->push_task = select_best_implementation_push_task;
+	node->pop_task = select_best_implementation_pop_task;
+	return node;
+}
+
+
+struct starpu_sched_node * starpu_sched_node_calibration_create(void * arg STARPU_ATTRIBUTE_UNUSED)
+{
+	struct starpu_sched_node * node = starpu_sched_node_create();
+	node->push_task = select_calibration_push_task;
+	node->pop_task = select_calibration_pop_task;
+	return node;
+}

+ 23 - 16
src/sched_policies/node_heft.c

@@ -61,13 +61,15 @@ static int push_task(struct starpu_sched_node * node, struct starpu_task * task)
 					     max_exp_end_with_task,
 					     estimated_transfer_length[inode],
 					     0.0);
-		if(best_fitness > tmp)
+//		fprintf(stderr,"%f %d\n", tmp, inode);
+		if(tmp < best_fitness)
 		{
 			best_fitness = tmp;
 			best_inode = inode;
 		}
 	}
-	fprintf(stderr,"%d best inode\n",best_inode);
+//	fprintf(stderr,"push %d\n",best_inode);
+	STARPU_ASSERT(best_inode != -1);
 	best_node = node->childs[best_inode];
 	return best_node->push_task(best_node, task);
 }
@@ -198,23 +200,28 @@ static void initialize_heft_center_policy(unsigned sched_ctx_id)
 	{
 		struct starpu_sched_node * worker_node = starpu_sched_node_worker_get(i);
 		STARPU_ASSERT(worker_node);
-/*
-		struct starpu_sched_node * fifo_node = starpu_sched_node_fifo_create(NULL);
-		starpu_sched_node_add_child(fifo_node, worker_node);
-		starpu_sched_node_set_father(worker_node, fifo_node, sched_ctx_id);
-*/
-		
-		starpu_sched_node_add_child(t->root, worker_node);
-		starpu_sched_node_add_child(random, worker_node);
-		starpu_sched_node_set_father(worker_node, t->root, sched_ctx_id);
+
+		struct starpu_sched_node * impl_node = starpu_sched_node_best_implementation_create(NULL);
+		starpu_sched_node_add_child(impl_node, worker_node);
+		starpu_sched_node_set_father(worker_node, impl_node, sched_ctx_id);
+
+		starpu_sched_node_add_child(t->root, impl_node);
+		starpu_sched_node_set_father(impl_node, t->root, sched_ctx_id);
+
+
+		struct starpu_sched_node * calibration_node = starpu_sched_node_calibration_create(NULL);
+		starpu_sched_node_add_child(calibration_node, worker_node);
+		starpu_sched_node_add_child(random, calibration_node);
+
+
 	}
-	
-	_starpu_set_workers_bitmaps();
-	starpu_sched_tree_call_init_data(t);
+
+	starpu_sched_node_init_rec(t->root);
+	starpu_sched_node_init_rec(random);
+//	_starpu_set_workers_bitmaps();
+//	starpu_sched_tree_call_init_data(t);
 	starpu_bitmap_destroy(random->workers_in_ctx);
 	random->workers_in_ctx = t->root->workers_in_ctx;
-	if(random->init_data)
-		random->init_data(random);
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t);
 }
 

+ 19 - 7
src/sched_policies/node_sched.c

@@ -364,17 +364,14 @@ int starpu_sched_node_can_execute_task(struct starpu_sched_node * node, struct s
 {
 	unsigned nimpl;
 	int worker;
-	struct starpu_bitmap * worker_mask = _starpu_get_worker_mask(task);
 	STARPU_ASSERT(task);
 	STARPU_ASSERT(node);
 	for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
-		for(worker = starpu_bitmap_first(node->workers);
+		for(worker = starpu_bitmap_first(node->workers_in_ctx);
 		    -1 != worker;
-		    worker = starpu_bitmap_next(node->workers, worker))
-			if (starpu_bitmap_get(worker_mask, worker)
-			    &&
-			    (starpu_worker_can_execute_task(worker, task, nimpl)
-			     || starpu_combined_worker_can_execute_task(worker, task, nimpl)))
+		    worker = starpu_bitmap_next(node->workers_in_ctx, worker))
+			if (starpu_worker_can_execute_task(worker, task, nimpl)
+			     || starpu_combined_worker_can_execute_task(worker, task, nimpl))
 			    return 1;
 	return 0;
 }
@@ -441,6 +438,21 @@ static void set_is_homogeneous(struct starpu_sched_node * node)
 
 
 
+void starpu_sched_node_init_rec(struct starpu_sched_node * node)
+{
+	if(starpu_sched_node_is_worker(node))
+		return;
+	int i;
+	for(i = 0; i < node->nchilds; i++)
+		starpu_sched_node_init_rec(node->childs[i]);
+
+	for(i = 0; i < node->nchilds; i++)
+		starpu_bitmap_or(node->workers, node->childs[i]->workers);
+
+	if(node->init_data)
+		node->init_data(node);
+}
+
 
 
 static void _init_add_worker_bit(struct starpu_sched_node * node, int worker)

+ 2 - 1
src/sched_policies/node_worker.c

@@ -96,6 +96,7 @@ static struct _starpu_task_grid * _starpu_task_grid_create(void)
 	return t;
 }
 static void _starpu_task_grid_destroy(struct _starpu_task_grid * t)
+
 {
 	free(t);
 }
@@ -706,7 +707,7 @@ void starpu_sched_node_worker_pre_exec_hook(struct starpu_task * task)
 		STARPU_ASSERT(list->ntasks != 0);
 		list->ntasks--;
 		if(!task->execute_on_a_specific_worker)
-			list->exp_len = STARPU_MIN(list->exp_len - task->predicted, 0.0);
+			list->exp_len = STARPU_MAX(list->exp_len - task->predicted, 0.0);
 
 		list->exp_start = starpu_timing_now() + task->predicted;
 		if(list->ntasks == 0)