浏览代码

optimised pop in node_best_implementation
more doc

Simon Archipoff 12 年之前
父节点
当前提交
1ffbe4d7bb

+ 4 - 0
doc/doxygen/chapters/api/modularized_scheduler.doxy

@@ -203,6 +203,10 @@ starpu_sched_node_heft_create parameters
 	 recursively destroy non shared parts of a \p node 's tree
 
 
+\fn starpu_sched_node_available(struct starpu_sched_node * node)
+\ingroup API_Modularized_Scheduler
+	 notify all node's underlying workers that a task is available to pop
+
 \fn int starpu_sched_tree_push_task(struct starpu_task * task)
 \ingroup API_Modularized_Scheduler
 	 compatibility with starpu_sched_policy interface

+ 4 - 8
doc/doxygen/chapters/modularized_scheduler.doxy

@@ -15,7 +15,7 @@ pop, in this case that node should call starpu_sched_node_available to wake work
 up. Push must be called on a child, and only if this child can execute
 the task.
 
-A pop call on a node can either return a localy stored task or perform
+A pop call on a node can either return a locally stored task or perform
 a recursive call on its father in its current context. Only workers
 should call pop.
 
@@ -24,10 +24,10 @@ should call pop.
 Scheduler node are created with the starpu_sched_node_foo_create() functions
 and then must be assembled using them starpu_sched_node::add_child and
 starpu_sched_node::remove_child functions.
-A father can be set to allow him to be reacheable by a starpu_sched_node::pop_task
+A father can be set to allow him to be reachable by a starpu_sched_node::pop_task
 call.
 
-Underlayings workers are memoized in starpu_sched_node::workers. Hence the
+Underlyings workers are memoized in starpu_sched_node::workers. Hence the
 function starpu_sched_tree_update_workers should be called when the scheduler is
 finished, or modified.
 
@@ -36,13 +36,9 @@ The hypervisor can balance load between contexts by adding or removing workers f
 
 
 
-
-
-
 \section Push
 All scheduler node must define a starpu_sched_node::push_task
-function. The caller ensure that the node can realy execute the task.
-
+function. The caller ensure that the node can actually execute the task.
 
 \section Pop
 starpu_sched_node::push_task should either return a local task or

+ 40 - 33
src/sched_policies/node_best_implementation.c

@@ -1,45 +1,57 @@
 #include <starpu_sched_node.h>
 #include <starpu_scheduler.h>
 #include <float.h>
-/* set implementation, task->predicted and task->predicted_transfer with the first worker of workers that can execute that task
- * or have to be calibrated
+
+
+/* return true if workerid can execute task, and fill task->predicted and task->predicted_transfer
+ *  according to best implementation predictions
  */
-static void select_best_implementation_and_set_preds(struct starpu_bitmap * workers, struct starpu_task * task)
+static int find_best_impl(struct starpu_task * task, int workerid)
 {
-	int best_impl = -1;
 	double len = DBL_MAX;
-	int workerid;
-	for(workerid = starpu_bitmap_first(workers);
-	    -1 != workerid;
-	    workerid = starpu_bitmap_next(workers, workerid))
+	int best_impl = -1;
+	int impl;
+	for(impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++)
 	{
-		int impl;
-		for(impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++)
+		if(starpu_worker_can_execute_task(workerid, task, impl))
 		{
-			if(starpu_worker_can_execute_task(workerid, task, impl))
+			enum starpu_perfmodel_archtype archtype = starpu_worker_get_perf_archtype(workerid);
+			double d = starpu_task_expected_length(task, archtype, impl);
+			if(isnan(d))
+			{
+				best_impl = impl;
+				len = 0.0;
+				break;
+			}
+			if(d < len)
 			{
-				enum starpu_perfmodel_archtype archtype = starpu_worker_get_perf_archtype(workerid);
-				double d = starpu_task_expected_length(task, archtype, impl);
-				if(isnan(d))
-				{
-					best_impl = impl;
-					len = 0.0;
-					break;
-				}
-				if(d < len)
-				{
-					len = d;
-					best_impl = impl;
-				}
+				len = d;
+				best_impl = impl;
 			}
 		}
-		if(best_impl != -1)
-			break;
 	}
+	if(best_impl == -1)
+		return 0;
+
 	int memory_node = starpu_worker_get_memory_node(workerid);
 	task->predicted = len;
 	task->predicted_transfer = starpu_task_expected_data_transfer_time(memory_node, task);
 	starpu_task_set_implementation(task, best_impl);
+	return 1;
+}
+
+
+/* set implementation, task->predicted and task->predicted_transfer with the first worker of workers that can execute that task
+ * or have to be calibrated
+ */
+static void select_best_implementation_and_set_preds(struct starpu_bitmap * workers, struct starpu_task * task)
+{
+	int workerid;
+	for(workerid = starpu_bitmap_first(workers);
+	    -1 != workerid;
+	    workerid = starpu_bitmap_next(workers, workerid))
+		if(find_best_impl(task, workerid))
+			break;
 }
 
 
@@ -60,7 +72,8 @@ static struct starpu_task * select_best_implementation_pop_task(struct starpu_sc
 		return NULL;
 	t = node->fathers[sched_ctx_id]->pop_task(node->fathers[sched_ctx_id], sched_ctx_id);
 	if(t)
-		select_best_implementation_and_set_preds(node->workers_in_ctx, t);
+		/* this worker can execute this task as it was returned by a pop*/
+		(void)find_best_impl(t, starpu_worker_get_id());
 	return t;
 }
 
@@ -71,9 +84,3 @@ struct starpu_sched_node * starpu_sched_node_best_implementation_create(void * A
 	node->pop_task = select_best_implementation_pop_task;
 	return node;
 }
-
-
-
-
-
-