лет назад: 13 · 63a92fffd9
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -20,11 +20,21 @@
 
				 
			
 
				 #include <limits.h>
			
 
				 
			
 
				+#include <core/perfmodel/perfmodel.h>
			
 
				+#include <core/task_bundle.h>
			
 
				 #include <core/workers.h>
			
 
				 #include <sched_policies/fifo_queues.h>
			
 
				 #include <core/perfmodel/perfmodel.h>
			
 
				 #include <starpu_parameters.h>
			
 
				 
			
 
				+#ifndef DBL_MIN
			
 
				+#define DBL_MIN __DBL_MIN__
			
 
				+#endif
			
 
				+
			
 
				+#ifndef DBL_MAX
			
 
				+#define DBL_MAX __DBL_MAX__
			
 
				+#endif
			
 
				+
			
 
				 static unsigned nworkers;
			
 
				 static struct _starpu_fifo_taskq *queue_array[STARPU_NMAXWORKERS];
			
 
				 
			
@@ -328,76 +338,75 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio)
 
				 	return push_task_on_best_worker(task, best, model_best, prio);
			
 
				 }
			
 
				 
			
 
				-static int _dmda_push_task(struct starpu_task *task, unsigned prio)
			
 
				+static void compute_all_performance_predictions(struct starpu_task *task,
			
 
				+					double local_task_length[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS],
			
 
				+					double exp_end[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS],
			
 
				+					double *max_exp_endp,
			
 
				+					double *best_exp_endp,
			
 
				+					double local_data_penalty[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS],
			
 
				+					double local_power[STARPU_NMAXWORKERS][STARPU_MAXIMPLEMENTATIONS],
			
 
				+					int *forced_worker, int *forced_impl)
			
 
				 {
			
 
				-	/* find the queue */
			
 
				-	struct _starpu_fifo_taskq *fifo;
			
 
				-	unsigned worker;
			
 
				-	int best = -1;
			
 
				-
			
 
				-	/* this flag is set if the corresponding worker is selected because
			
 
				-	   there is no performance prediction available yet */
			
 
				-	int forced_best = -1;
			
 
				-
			
 
				-	double local_task_length[nworkers][STARPU_MAXIMPLEMENTATIONS];
			
 
				-	double local_data_penalty[nworkers][STARPU_MAXIMPLEMENTATIONS];
			
 
				-	double local_power[nworkers][STARPU_MAXIMPLEMENTATIONS];
			
 
				-	double exp_end[nworkers][STARPU_MAXIMPLEMENTATIONS];
			
 
				-	double max_exp_end = 0.0;
			
 
				-
			
 
				-	double fitness[nworkers][STARPU_MAXIMPLEMENTATIONS];
			
 
				-
			
 
				-	double best_exp_end = 10e240;
			
 
				-	double model_best = 0.0;
			
 
				-	//double penality_best = 0.0;
			
 
				-
			
 
				+	int calibrating = 0;
			
 
				+	double max_exp_end = DBL_MIN;
			
 
				+	double best_exp_end = DBL_MAX;
			
 
				 	int ntasks_best = -1;
			
 
				+	int nimpl_best = 0;
			
 
				 	double ntasks_best_end = 0.0;
			
 
				-	int calibrating = 0;
			
 
				 
			
 
				 	/* A priori, we know all estimations */
			
 
				 	int unknown = 0;
			
 
				+	unsigned worker;
			
 
				 
			
 
				-	unsigned best_impl = 0;
			
 
				 	unsigned nimpl;
			
 
				 
			
 
				+	starpu_task_bundle_t bundle = task->bundle;
			
 
				+
			
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 	{
			
 
				+		struct _starpu_fifo_taskq *fifo = queue_array[worker];
			
 
				+
			
 
				 		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				 		{
			
 
				-			fifo = queue_array[worker];
			
 
				-
			
 
				-			/* Sometimes workers didn't take the tasks as early as we expected */
			
 
				-			fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
			
 
				-			fifo->exp_end = fifo->exp_start + fifo->exp_len;
			
 
				-			if (fifo->exp_end > max_exp_end)
			
 
				-				max_exp_end = fifo->exp_end;
			
 
				-
			
 
				 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
			
 
				 			{
			
 
				 				/* no one on that queue may execute this task */
			
 
				 				continue;
			
 
				 			}
			
 
				 
			
 
				+			/* Sometimes workers didn't take the tasks as early as we expected */
			
 
				+			fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
			
 
				+			exp_end[worker][nimpl] = fifo->exp_start + fifo->exp_len;
			
 
				+			if (exp_end[worker][nimpl] > max_exp_end)
			
 
				+				max_exp_end = exp_end[worker][nimpl];
			
 
				+
			
 
				 			enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
			
 
				-			local_task_length[worker][nimpl] = starpu_task_expected_length(task, perf_arch, nimpl);
			
 
				+			unsigned memory_node = starpu_worker_get_memory_node(worker);
			
 
				 
			
 
				 			//_STARPU_DEBUG("Scheduler dmda: task length (%lf) worker (%u) kernel (%u) \n", local_task_length[worker][nimpl],worker,nimpl);
			
 
				 
			
 
				-			unsigned memory_node = starpu_worker_get_memory_node(worker);
			
 
				-			local_data_penalty[worker][nimpl] = starpu_task_expected_data_transfer_time(memory_node, task);
			
 
				+			if (bundle)
			
 
				+			{
			
 
				+				STARPU_ABORT(); /* Not implemented yet. */
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				local_task_length[worker][nimpl] = starpu_task_expected_length(task, perf_arch, nimpl);
			
 
				+				local_data_penalty[worker][nimpl] = starpu_task_expected_data_transfer_time(memory_node, task);
			
 
				+				local_power[worker][nimpl] = starpu_task_expected_power(task, perf_arch,nimpl);
			
 
				+			}
			
 
				 
			
 
				 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
			
 
				 
			
 
				 			if (ntasks_best == -1
			
 
				-			    || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
			
 
				+			    || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better worker */
			
 
				 			    || (!calibrating && isnan(local_task_length[worker][nimpl])) /* Not calibrating but this worker is being calibrated */
			
 
				 			    || (calibrating && isnan(local_task_length[worker][nimpl]) && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
			
 
				 				)
			
 
				 			{
			
 
				 				ntasks_best_end = ntasks_end;
			
 
				 				ntasks_best = worker;
			
 
				-				best_impl = nimpl;
			
 
				+				nimpl_best = nimpl;
			
 
				 			}
			
 
				 
			
 
				 			if (isnan(local_task_length[worker][nimpl]))
			
@@ -407,13 +416,13 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 				calibrating = 1;
			
 
				 
			
 
				 			if (isnan(local_task_length[worker][nimpl])
			
 
				-					|| _STARPU_IS_ZERO(local_task_length[worker][nimpl]))
			
 
				+				|| _STARPU_IS_ZERO(local_task_length[worker][nimpl]))
			
 
				 				/* there is no prediction available for that task
			
 
				-				 * with that arch yet, so switch to a greedy strategy */
			
 
				+				 * with that arch (yet or at all), so switch to a greedy strategy */
			
 
				 				unknown = 1;
			
 
				 
			
 
				 			if (unknown)
			
 
				-					continue;
			
 
				+				continue;
			
 
				 
			
 
				 			exp_end[worker][nimpl] = fifo->exp_start + fifo->exp_len + local_task_length[worker][nimpl];
			
 
				 
			
@@ -421,21 +430,57 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 			{
			
 
				 				/* a better solution was found */
			
 
				 				best_exp_end = exp_end[worker][nimpl];
			
 
				-				best_impl = nimpl;
			
 
				+				nimpl_best = nimpl;
			
 
				 			}
			
 
				 
			
 
				-			local_power[worker][nimpl] = starpu_task_expected_power(task, perf_arch, nimpl);
			
 
				 			if (isnan(local_power[worker][nimpl]))
			
 
				 				local_power[worker][nimpl] = 0.;
			
 
				 
			
 
				-		 }
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				-	if (unknown)
			
 
				-		forced_best = ntasks_best;
			
 
				+	*forced_worker = unknown?ntasks_best:-1;
			
 
				+	*forced_impl = unknown?nimpl_best:-1;
			
 
				+
			
 
				+	*best_exp_endp = best_exp_end;
			
 
				+	*max_exp_endp = max_exp_end;
			
 
				+}
			
 
				+
			
 
				+static int _dmda_push_task(struct starpu_task *task, unsigned prio)
			
 
				+{
			
 
				+	/* find the queue */
			
 
				+	unsigned worker;
			
 
				+	int best = -1;
			
 
				+	int selected_impl = 0;
			
 
				+	double model_best = 0.0;
			
 
				+
			
 
				+	/* this flag is set if the corresponding worker is selected because
			
 
				+	   there is no performance prediction available yet */
			
 
				+	int forced_best = -1;
			
 
				+	int forced_impl = -1;
			
 
				+
			
 
				+	double local_task_length[nworkers][STARPU_MAXIMPLEMENTATIONS];
			
 
				+	double local_data_penalty[nworkers][STARPU_MAXIMPLEMENTATIONS];
			
 
				+	double local_power[nworkers][STARPU_MAXIMPLEMENTATIONS];
			
 
				+	double exp_end[nworkers][STARPU_MAXIMPLEMENTATIONS];
			
 
				+	double max_exp_end = 0.0;
			
 
				+	double best_exp_end;
			
 
				+
			
 
				+	double fitness[nworkers][STARPU_MAXIMPLEMENTATIONS];
			
 
				+
			
 
				+	compute_all_performance_predictions(task,
			
 
				+		local_task_length,
			
 
				+		exp_end,
			
 
				+		&max_exp_end,
			
 
				+		&best_exp_end,
			
 
				+		local_data_penalty,
			
 
				+		local_power,
			
 
				+		&forced_best,
			
 
				+		&forced_impl);
			
 
				 
			
 
				 	double best_fitness = -1;
			
 
				 
			
 
				+	unsigned nimpl;
			
 
				 	if (forced_best == -1)
			
 
				 	{
			
 
				 		for (worker = 0; worker < nworkers; worker++)
			
@@ -464,7 +509,7 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 				/* we found a better solution */
			
 
				 				best_fitness = fitness[worker][nimpl];
			
 
				 				best = worker;
			
 
				-				best_impl = nimpl;
			
 
				+				selected_impl = nimpl;
			
 
				 
			
 
				 				//			_STARPU_DEBUG("best fitness (worker %d) %e = alpha*(%e) + beta(%e) +gamma(%e)\n", worker, best_fitness, exp_end[worker][nimpl] - best_exp_end, local_data_penalty[worker][nimpl], local_power[worker][nimpl]);
			
 
				 			}
			
@@ -484,13 +529,13 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		model_best = local_task_length[best][best_impl];
			
 
				+		model_best = local_task_length[best][selected_impl];
			
 
				 		//penality_best = local_data_penalty[best][best_impl];
			
 
				 	}
			
 
				 
			
 
				 
			
 
				 	//_STARPU_DEBUG("Scheduler dmda: kernel (%u)\n", best_impl);
			
 
				-	 _starpu_get_job_associated_to_task(task)->nimpl = best_impl;
			
 
				+	 _starpu_get_job_associated_to_task(task)->nimpl = selected_impl;
			
 
				 
			
 
				 	/* we should now have the best worker in variable "best" */
			
 
				 	return push_task_on_best_worker(task, best, model_best, prio);