%!s(int64=12) %!d(string=hai) anos · f0c79e19dd
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -33,6 +33,9 @@ int starpu_get_prefetch_flag(void)
 
				 
			
 
				 static struct starpu_sched_policy *predefined_policies[] =
			
 
				 {
			
 
				+	&_starpu_sched_tree_eager_policy,
			
 
				+	&_starpu_sched_tree_random_policy,
			
 
				+	&_starpu_sched_tree_ws_policy,
			
 
				 	&_starpu_sched_eager_policy,
			
 
				 	&_starpu_sched_prio_policy,
			
 
				 	&_starpu_sched_random_policy,
			
@@ -43,9 +46,6 @@ static struct starpu_sched_policy *predefined_policies[] =
 
				 	&_starpu_sched_dmda_sorted_policy,
			
 
				 	&_starpu_sched_parallel_heft_policy,
			
 
				 	&_starpu_sched_peager_policy,
			
 
				-	&_starpu_sched_tree_eager_policy,
			
 
				-	&_starpu_sched_tree_random_policy,
			
 
				-	&_starpu_sched_tree_ws_policy,
			
 
				 	NULL
			
 
				 };
			
 
				 
			
--- a/src/sched_policies/node_sched.c
+++ b/src/sched_policies/node_sched.c
@@ -145,7 +145,7 @@ int _starpu_tree_push_task(struct starpu_task * task)
 
				 	struct _starpu_sched_tree *tree = starpu_sched_ctx_get_policy_data(sched_ctx_id);
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&tree->mutex);
			
 
				 	int ret_val = tree->root->push_task(tree->root,task); 
			
 
				-	starpu_push_task_end(task);
			
 
				+//	starpu_push_task_end(task);
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&tree->mutex);
			
 
				 	return ret_val;
			
 
				 }
			
--- a/src/sched_policies/node_sched.h
+++ b/src/sched_policies/node_sched.h
@@ -24,7 +24,7 @@ struct _starpu_sched_node
 
				 	 */
			
 
				 	struct _starpu_sched_node * fathers[STARPU_NMAX_SCHED_CTXS];
			
 
				 	
			
 
				-
			
 
				+	
			
 
				 
			
 
				 	void (*add_child)(struct _starpu_sched_node *node,
			
 
				 			  struct _starpu_sched_node *child,
			
--- a/src/sched_policies/node_work_stealing.c
+++ b/src/sched_policies/node_work_stealing.c
@@ -2,28 +2,30 @@
 
				 #include "fifo_queues.h"
			
 
				 #include <starpu_scheduler.h>
			
 
				 
			
 
				-struct _starpu_work_stealing_data
			
 
				-{
			
 
				-	/* keep track of the work performed from the beginning of the algorithm to make
			
 
				-	 * better decisions about which queue to child when stealing or deferring work
			
 
				-	 */
			
 
				-	
			
 
				-	unsigned performed_total;
			
 
				-	unsigned last_pop_child;
			
 
				-	unsigned last_push_child;
			
 
				-};
			
 
				-
			
 
				-
			
 
				 
			
 
				+#define USE_OVERLOAD
			
 
				 #ifdef USE_OVERLOAD
			
 
				+#include <float.h>
			
 
				+
			
 
				 /**
			
 
				  * Minimum number of task we wait for being processed before we start assuming
			
 
				  * on which child the computation would be faster.
			
 
				  */
			
 
				-static int calibration_value = 0;
			
 
				+static unsigned calibration_value = 0;
			
 
				 
			
 
				 #endif /* USE_OVERLOAD */
			
 
				 
			
 
				+struct _starpu_work_stealing_data
			
 
				+{
			
 
				+/* keep track of the work performed from the beginning of the algorithm to make
			
 
				+ * better decisions about which queue to child when stealing or deferring work
			
 
				+ */
			
 
				+	
			
 
				+	unsigned performed_total;
			
 
				+	unsigned last_pop_child;
			
 
				+	unsigned last_push_child;
			
 
				+};
			
 
				+
			
 
				 
			
 
				 /**
			
 
				  * Return a child from which a task can be stolen.
			
@@ -31,29 +33,26 @@ static int calibration_value = 0;
 
				  * the child previously selected doesn't own any task,
			
 
				  * then we return the first non-empty worker.
			
 
				  * and take his mutex
			
 
				- *
			
 
				- * if no child have task, return -1 and dont take any mutex
			
 
				+ * if no child have tasks return -1 
			
 
				  */
			
 
				 static int select_victim_round_robin(struct _starpu_sched_node *node)
			
 
				 {
			
 
				 	struct _starpu_work_stealing_data *ws = node->data;
			
 
				 	unsigned i = ws->last_pop_child;
			
 
				-
			
 
				-	starpu_pthread_mutex_t *victim_sched_mutex;
			
 
				-
			
 
				-	/* If the worker's queue is empty, let's try
			
 
				-	 * the next ones */
			
 
				+	
			
 
				+	
			
 
				+/* If the worker's queue is empty, let's try
			
 
				+ * the next ones */
			
 
				 	while (1)
			
 
				 	{
			
 
				 		unsigned ntasks;
			
 
				 		struct _starpu_sched_node * child = node->childs[i];
			
 
				 		struct _starpu_fifo_taskq * fifo = _starpu_sched_node_fifo_get_fifo(child);
			
 
				-		victim_sched_mutex = &child->mutex;
			
 
				-		STARPU_PTHREAD_MUTEX_LOCK(victim_sched_mutex);
			
 
				+		STARPU_PTHREAD_MUTEX_LOCK(&child->mutex);
			
 
				 		ntasks = fifo->ntasks;
			
 
				 		if (ntasks)
			
 
				 			break;
			
 
				-		STARPU_PTHREAD_MUTEX_UNLOCK(victim_sched_mutex);
			
 
				+		STARPU_PTHREAD_MUTEX_UNLOCK(&child->mutex);
			
 
				 		i = (i + 1) % node->nchilds;
			
 
				 		if (i == ws->last_pop_child)
			
 
				 		{
			
@@ -79,7 +78,7 @@ static unsigned select_worker_round_robin(struct _starpu_sched_node * node)
 
				 	ws->last_push_child = i;
			
 
				 	return i;
			
 
				 }
			
 
				-#undef USE_OVERLOAD
			
 
				+
			
 
				 #ifdef USE_OVERLOAD
			
 
				 
			
 
				 /**
			
@@ -90,23 +89,22 @@ static unsigned select_worker_round_robin(struct _starpu_sched_node * node)
 
				  * 		a smaller value implies a faster worker with an relatively emptier queue : more suitable to put tasks in
			
 
				  * 		a bigger value implies a slower worker with an reletively more replete queue : more suitable to steal tasks from
			
 
				  */
			
 
				-static float overload_metric(struct _starpu_sched_node * node, unsigned id)
			
 
				+static float overload_metric(struct _starpu_sched_node * fifo_node, unsigned performed_total)
			
 
				 {
			
 
				-	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)node->data;
			
 
				 	float execution_ratio = 0.0f;
			
 
				 	float current_ratio = 0.0f;
			
 
				-	struct _starpu_fifo_taskq * fifo = _starpu_sched_node_fifo_get_fifo(node->childs[id]);
			
 
				+	struct _starpu_fifo_taskq * fifo = _starpu_sched_node_fifo_get_fifo(fifo_node);
			
 
				 	int nprocessed = fifo->nprocessed;
			
 
				-	unsigned njobs = fifo->ntasks;
			
 
				+	unsigned ntasks = fifo->ntasks;
			
 
				 
			
 
				 	/* Did we get enough information ? */
			
 
				-	if (ws->performed_total > 0 && nprocessed > 0)
			
 
				+	if (performed_total > 0 && nprocessed > 0)
			
 
				 	{
			
 
				-		/* How fast or slow is the worker compared to the other workers */
			
 
				-		execution_ratio = (float) nprocessed / ws->performed_total;
			
 
				-		/* How replete is its queue */
			
 
				-		current_ratio = (float) njobs / nprocessed;
			
 
				-	}
			
 
				+/* How fast or slow is the worker compared to the other workers */
			
 
				+execution_ratio = (float) nprocessed / performed_total;
			
 
				+/* How replete is its queue */
			
 
				+current_ratio = (float) ntasks / nprocessed;
			
 
				+}
			
 
				 	else
			
 
				 	{
			
 
				 		return 0.0f;
			
@@ -122,37 +120,31 @@ static float overload_metric(struct _starpu_sched_node * node, unsigned id)
 
				  * by the tasks are taken into account to select the most suitable
			
 
				  * worker to steal task from.
			
 
				  */
			
 
				-static unsigned select_victim_overload(unsigned sched_ctx_id)
			
 
				+static int select_victim_overload(struct _starpu_sched_node * node)
			
 
				 {
			
 
				-	unsigned worker;
			
 
				-	float  worker_ratio;
			
 
				-	unsigned best_worker = 0;
			
 
				+	float  child_ratio;
			
 
				+	int best_child = -1;
			
 
				 	float best_ratio = FLT_MIN;
			
 
				+	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)node->data;
			
 
				+	unsigned performed_total = ws->performed_total;
			
 
				 
			
 
				 	/* Don't try to play smart until we get
			
 
				 	 * enough informations. */
			
 
				 	if (performed_total < calibration_value)
			
 
				-		return select_victim_round_robin(sched_ctx_id);
			
 
				+		return select_victim_round_robin(node);
			
 
				 
			
 
				-	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
			
 
				-
			
 
				-	struct starpu_sched_ctx_iterator it;
			
 
				-        if(workers->init_iterator)
			
 
				-                workers->init_iterator(workers, &it);
			
 
				-
			
 
				-	while(workers->has_next(workers, &it))
			
 
				-        {
			
 
				-                worker = workers->get_next(workers, &it);
			
 
				-		worker_ratio = overload_metric(sched_ctx_id, worker);
			
 
				-
			
 
				-		if (worker_ratio > best_ratio)
			
 
				+	int i;
			
 
				+	for(i = 0; i < node->nchilds; i++)
			
 
				+	{
			
 
				+		child_ratio = overload_metric(node->childs[i],performed_total);
			
 
				+		if(child_ratio > best_ratio)
			
 
				 		{
			
 
				-			best_worker = worker;
			
 
				-			best_ratio = worker_ratio;
			
 
				+			best_ratio = child_ratio;
			
 
				+			best_child = i;
			
 
				 		}
			
 
				 	}
			
 
				-
			
 
				-	return best_worker;
			
 
				+	
			
 
				+	return best_child;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -162,38 +154,31 @@ static unsigned select_victim_overload(unsigned sched_ctx_id)
 
				  * by the tasks are taken into account to select the most suitable
			
 
				  * worker to add a task to.
			
 
				  */
			
 
				-static unsigned select_worker_overload(unsigned sched_ctx_id)
			
 
				+static unsigned select_worker_overload(struct _starpu_sched_node * node)
			
 
				 {
			
 
				-	unsigned worker;
			
 
				-	float  worker_ratio;
			
 
				-	unsigned best_worker = 0;
			
 
				+	float  child_ratio;
			
 
				+	int best_child = -1;
			
 
				 	float best_ratio = FLT_MAX;
			
 
				+	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)node->data;
			
 
				+	unsigned performed_total = ws->performed_total;
			
 
				 
			
 
				 	/* Don't try to play smart until we get
			
 
				 	 * enough informations. */
			
 
				 	if (performed_total < calibration_value)
			
 
				-		return select_worker_round_robin(sched_ctx_id);
			
 
				-
			
 
				-	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
			
 
				-
			
 
				-	struct starpu_sched_ctx_iterator it;
			
 
				-        if(workers->init_iterator)
			
 
				-                workers->init_iterator(workers, &it);
			
 
				-
			
 
				-	while(workers->has_next(workers, &it))
			
 
				-        {
			
 
				-                worker = workers->get_next(workers, &it);
			
 
				+		return select_victim_round_robin(node);
			
 
				 
			
 
				-		worker_ratio = overload_metric(sched_ctx_id, worker);
			
 
				-
			
 
				-		if (worker_ratio < best_ratio)
			
 
				+	int i;
			
 
				+	for(i = 0; i < node->nchilds; i++)
			
 
				+	{
			
 
				+		child_ratio = overload_metric(node->childs[i],performed_total);
			
 
				+		if(child_ratio < best_ratio)
			
 
				 		{
			
 
				-			best_worker = worker;
			
 
				-			best_ratio = worker_ratio;
			
 
				+			best_ratio = child_ratio;
			
 
				+			best_child = i;
			
 
				 		}
			
 
				 	}
			
 
				-
			
 
				-	return best_worker;
			
 
				+	
			
 
				+	return best_child;
			
 
				 }
			
 
				 
			
 
				 #endif /* USE_OVERLOAD */
			
@@ -244,6 +229,8 @@ static struct starpu_task * pop_task(struct _starpu_sched_node * node, unsigned
 
				 							  starpu_worker_get_id());
			
 
				 	fifo->nprocessed--;
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&child->mutex);
			
 
				+	if(task)
			
 
				+		starpu_push_task_end(task);
			
 
				 	return task;
			
 
				 }
			
 
				 
			
@@ -302,6 +289,7 @@ int _starpu_ws_push_task(struct starpu_task *task)
 
				 		}
			
 
				 	}
			
 
				 	//there were a problem here, dont know what to do
			
 
				+	STARPU_ASSERT(1);
			
 
				 	return _starpu_tree_push_task(task);
			
 
				 }
			
 
				 
			
--- a/src/sched_policies/node_worker.c
+++ b/src/sched_policies/node_worker.c
@@ -17,9 +17,12 @@ struct _starpu_sched_node * _starpu_sched_node_worker_get(int workerid)
 
				 
			
 
				 int _starpu_sched_node_worker_push_task(struct _starpu_sched_node * node, struct starpu_task *task)
			
 
				 {
			
 
				+	/*this function take the worker's mutex */
			
 
				 	
			
 
				-	return _starpu_push_local_task(node->data, task, task->priority);
			
 
				+	int ret = _starpu_push_local_task(node->data, task, task->priority);
			
 
				 
			
 
				+
			
 
				+	return ret;
			
 
				 /*	STARPU_PTHREAD_MUTEX_LOCK(&node->mutex);
			
 
				 	int ret_val = _starpu_fifo_push_sorted_task(node->fifo, task);
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex);
			
@@ -93,11 +96,7 @@ static struct _starpu_sched_node  * _starpu_sched_node_worker_create(int workeri
 
				 
			
 
				 int _starpu_sched_node_is_worker(struct _starpu_sched_node * node)
			
 
				 {
			
 
				-	int i;
			
 
				-	for(i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				-		if(_worker_nodes[i] == node)
			
 
				-			return 1;
			
 
				-	return 0;
			
 
				+	return node->available == available;
			
 
				 }
			
 
				 
			
 
				 #ifndef STARPU_NO_ASSERT