浏览代码

overload computation works for work stealing
several bugs fixeds

Simon Archipoff 12 年之前
父节点
当前提交
f0c79e19dd

+ 3 - 3
src/core/sched_policy.c

@@ -33,6 +33,9 @@ int starpu_get_prefetch_flag(void)
 
 static struct starpu_sched_policy *predefined_policies[] =
 {
+	&_starpu_sched_tree_eager_policy,
+	&_starpu_sched_tree_random_policy,
+	&_starpu_sched_tree_ws_policy,
 	&_starpu_sched_eager_policy,
 	&_starpu_sched_prio_policy,
 	&_starpu_sched_random_policy,
@@ -43,9 +46,6 @@ static struct starpu_sched_policy *predefined_policies[] =
 	&_starpu_sched_dmda_sorted_policy,
 	&_starpu_sched_parallel_heft_policy,
 	&_starpu_sched_peager_policy,
-	&_starpu_sched_tree_eager_policy,
-	&_starpu_sched_tree_random_policy,
-	&_starpu_sched_tree_ws_policy,
 	NULL
 };
 

+ 1 - 1
src/sched_policies/node_sched.c

@@ -145,7 +145,7 @@ int _starpu_tree_push_task(struct starpu_task * task)
 	struct _starpu_sched_tree *tree = starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	STARPU_PTHREAD_MUTEX_LOCK(&tree->mutex);
 	int ret_val = tree->root->push_task(tree->root,task); 
-	starpu_push_task_end(task);
+//	starpu_push_task_end(task);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&tree->mutex);
 	return ret_val;
 }

+ 1 - 1
src/sched_policies/node_sched.h

@@ -24,7 +24,7 @@ struct _starpu_sched_node
 	 */
 	struct _starpu_sched_node * fathers[STARPU_NMAX_SCHED_CTXS];
 	
-
+	
 
 	void (*add_child)(struct _starpu_sched_node *node,
 			  struct _starpu_sched_node *child,

+ 65 - 77
src/sched_policies/node_work_stealing.c

@@ -2,28 +2,30 @@
 #include "fifo_queues.h"
 #include <starpu_scheduler.h>
 
-struct _starpu_work_stealing_data
-{
-	/* keep track of the work performed from the beginning of the algorithm to make
-	 * better decisions about which queue to child when stealing or deferring work
-	 */
-	
-	unsigned performed_total;
-	unsigned last_pop_child;
-	unsigned last_push_child;
-};
-
-
 
+#define USE_OVERLOAD
 #ifdef USE_OVERLOAD
+#include <float.h>
+
 /**
  * Minimum number of task we wait for being processed before we start assuming
  * on which child the computation would be faster.
  */
-static int calibration_value = 0;
+static unsigned calibration_value = 0;
 
 #endif /* USE_OVERLOAD */
 
+struct _starpu_work_stealing_data
+{
+/* keep track of the work performed from the beginning of the algorithm to make
+ * better decisions about which queue to child when stealing or deferring work
+ */
+	
+	unsigned performed_total;
+	unsigned last_pop_child;
+	unsigned last_push_child;
+};
+
 
 /**
  * Return a child from which a task can be stolen.
@@ -31,29 +33,26 @@ static int calibration_value = 0;
  * the child previously selected doesn't own any task,
  * then we return the first non-empty worker.
  * and take his mutex
- *
- * if no child have task, return -1 and dont take any mutex
+ * if no child have tasks return -1 
  */
 static int select_victim_round_robin(struct _starpu_sched_node *node)
 {
 	struct _starpu_work_stealing_data *ws = node->data;
 	unsigned i = ws->last_pop_child;
-
-	starpu_pthread_mutex_t *victim_sched_mutex;
-
-	/* If the worker's queue is empty, let's try
-	 * the next ones */
+	
+	
+/* If the worker's queue is empty, let's try
+ * the next ones */
 	while (1)
 	{
 		unsigned ntasks;
 		struct _starpu_sched_node * child = node->childs[i];
 		struct _starpu_fifo_taskq * fifo = _starpu_sched_node_fifo_get_fifo(child);
-		victim_sched_mutex = &child->mutex;
-		STARPU_PTHREAD_MUTEX_LOCK(victim_sched_mutex);
+		STARPU_PTHREAD_MUTEX_LOCK(&child->mutex);
 		ntasks = fifo->ntasks;
 		if (ntasks)
 			break;
-		STARPU_PTHREAD_MUTEX_UNLOCK(victim_sched_mutex);
+		STARPU_PTHREAD_MUTEX_UNLOCK(&child->mutex);
 		i = (i + 1) % node->nchilds;
 		if (i == ws->last_pop_child)
 		{
@@ -79,7 +78,7 @@ static unsigned select_worker_round_robin(struct _starpu_sched_node * node)
 	ws->last_push_child = i;
 	return i;
 }
-#undef USE_OVERLOAD
+
 #ifdef USE_OVERLOAD
 
 /**
@@ -90,23 +89,22 @@ static unsigned select_worker_round_robin(struct _starpu_sched_node * node)
  * 		a smaller value implies a faster worker with an relatively emptier queue : more suitable to put tasks in
  * 		a bigger value implies a slower worker with an reletively more replete queue : more suitable to steal tasks from
  */
-static float overload_metric(struct _starpu_sched_node * node, unsigned id)
+static float overload_metric(struct _starpu_sched_node * fifo_node, unsigned performed_total)
 {
-	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)node->data;
 	float execution_ratio = 0.0f;
 	float current_ratio = 0.0f;
-	struct _starpu_fifo_taskq * fifo = _starpu_sched_node_fifo_get_fifo(node->childs[id]);
+	struct _starpu_fifo_taskq * fifo = _starpu_sched_node_fifo_get_fifo(fifo_node);
 	int nprocessed = fifo->nprocessed;
-	unsigned njobs = fifo->ntasks;
+	unsigned ntasks = fifo->ntasks;
 
 	/* Did we get enough information ? */
-	if (ws->performed_total > 0 && nprocessed > 0)
+	if (performed_total > 0 && nprocessed > 0)
 	{
-		/* How fast or slow is the worker compared to the other workers */
-		execution_ratio = (float) nprocessed / ws->performed_total;
-		/* How replete is its queue */
-		current_ratio = (float) njobs / nprocessed;
-	}
+/* How fast or slow is the worker compared to the other workers */
+execution_ratio = (float) nprocessed / performed_total;
+/* How replete is its queue */
+current_ratio = (float) ntasks / nprocessed;
+}
 	else
 	{
 		return 0.0f;
@@ -122,37 +120,31 @@ static float overload_metric(struct _starpu_sched_node * node, unsigned id)
  * by the tasks are taken into account to select the most suitable
  * worker to steal task from.
  */
-static unsigned select_victim_overload(unsigned sched_ctx_id)
+static int select_victim_overload(struct _starpu_sched_node * node)
 {
-	unsigned worker;
-	float  worker_ratio;
-	unsigned best_worker = 0;
+	float  child_ratio;
+	int best_child = -1;
 	float best_ratio = FLT_MIN;
+	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)node->data;
+	unsigned performed_total = ws->performed_total;
 
 	/* Don't try to play smart until we get
 	 * enough informations. */
 	if (performed_total < calibration_value)
-		return select_victim_round_robin(sched_ctx_id);
+		return select_victim_round_robin(node);
 
-	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
-
-	struct starpu_sched_ctx_iterator it;
-        if(workers->init_iterator)
-                workers->init_iterator(workers, &it);
-
-	while(workers->has_next(workers, &it))
-        {
-                worker = workers->get_next(workers, &it);
-		worker_ratio = overload_metric(sched_ctx_id, worker);
-
-		if (worker_ratio > best_ratio)
+	int i;
+	for(i = 0; i < node->nchilds; i++)
+	{
+		child_ratio = overload_metric(node->childs[i],performed_total);
+		if(child_ratio > best_ratio)
 		{
-			best_worker = worker;
-			best_ratio = worker_ratio;
+			best_ratio = child_ratio;
+			best_child = i;
 		}
 	}
-
-	return best_worker;
+	
+	return best_child;
 }
 
 /**
@@ -162,38 +154,31 @@ static unsigned select_victim_overload(unsigned sched_ctx_id)
  * by the tasks are taken into account to select the most suitable
  * worker to add a task to.
  */
-static unsigned select_worker_overload(unsigned sched_ctx_id)
+static unsigned select_worker_overload(struct _starpu_sched_node * node)
 {
-	unsigned worker;
-	float  worker_ratio;
-	unsigned best_worker = 0;
+	float  child_ratio;
+	int best_child = -1;
 	float best_ratio = FLT_MAX;
+	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)node->data;
+	unsigned performed_total = ws->performed_total;
 
 	/* Don't try to play smart until we get
 	 * enough informations. */
 	if (performed_total < calibration_value)
-		return select_worker_round_robin(sched_ctx_id);
-
-	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
-
-	struct starpu_sched_ctx_iterator it;
-        if(workers->init_iterator)
-                workers->init_iterator(workers, &it);
-
-	while(workers->has_next(workers, &it))
-        {
-                worker = workers->get_next(workers, &it);
+		return select_victim_round_robin(node);
 
-		worker_ratio = overload_metric(sched_ctx_id, worker);
-
-		if (worker_ratio < best_ratio)
+	int i;
+	for(i = 0; i < node->nchilds; i++)
+	{
+		child_ratio = overload_metric(node->childs[i],performed_total);
+		if(child_ratio < best_ratio)
 		{
-			best_worker = worker;
-			best_ratio = worker_ratio;
+			best_ratio = child_ratio;
+			best_child = i;
 		}
 	}
-
-	return best_worker;
+	
+	return best_child;
 }
 
 #endif /* USE_OVERLOAD */
@@ -244,6 +229,8 @@ static struct starpu_task * pop_task(struct _starpu_sched_node * node, unsigned
 							  starpu_worker_get_id());
 	fifo->nprocessed--;
 	STARPU_PTHREAD_MUTEX_UNLOCK(&child->mutex);
+	if(task)
+		starpu_push_task_end(task);
 	return task;
 }
 
@@ -302,6 +289,7 @@ int _starpu_ws_push_task(struct starpu_task *task)
 		}
 	}
 	//there were a problem here, dont know what to do
+	STARPU_ASSERT(1);
 	return _starpu_tree_push_task(task);
 }
 

+ 5 - 6
src/sched_policies/node_worker.c

@@ -17,9 +17,12 @@ struct _starpu_sched_node * _starpu_sched_node_worker_get(int workerid)
 
 int _starpu_sched_node_worker_push_task(struct _starpu_sched_node * node, struct starpu_task *task)
 {
+	/*this function take the worker's mutex */
 	
-	return _starpu_push_local_task(node->data, task, task->priority);
+	int ret = _starpu_push_local_task(node->data, task, task->priority);
 
+
+	return ret;
 /*	STARPU_PTHREAD_MUTEX_LOCK(&node->mutex);
 	int ret_val = _starpu_fifo_push_sorted_task(node->fifo, task);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex);
@@ -93,11 +96,7 @@ static struct _starpu_sched_node  * _starpu_sched_node_worker_create(int workeri
 
 int _starpu_sched_node_is_worker(struct _starpu_sched_node * node)
 {
-	int i;
-	for(i = 0; i < STARPU_NMAXWORKERS; i++)
-		if(_worker_nodes[i] == node)
-			return 1;
-	return 0;
+	return node->available == available;
 }
 
 #ifndef STARPU_NO_ASSERT