5 年之前 · f4f4c07ed4
--- a/src/sched_policies/component_heteroprio.c
+++ b/src/sched_policies/component_heteroprio.c
@@ -109,10 +109,10 @@ static int heteroprio_progress_accel(struct starpu_sched_component *component, s
 
				 	/* provided local energy */
			
 
				 	double local_energy[component->nchildren];
			
 
				 
			
 
				-	/* Minimum transfer+task termination on all children */
			
 
				-	double min_exp_end_with_task;
			
 
				-	/* Maximum transfer+task termination on all children */
			
 
				-	double max_exp_end_with_task;
			
 
				+	/* Minimum transfer+task termination of the task over all workers */
			
 
				+	double min_exp_end_of_task;
			
 
				+	/* Maximum termination of the already-scheduled tasks over all workers */
			
 
				+	double max_exp_end_of_workers;
			
 
				 
			
 
				 	unsigned suitable_components[component->nchildren];
			
 
				 	unsigned nsuitable_components;
			
@@ -158,7 +158,7 @@ static int heteroprio_progress_accel(struct starpu_sched_component *component, s
 
				 			estimated_lengths,
			
 
				 			estimated_transfer_length,
			
 
				 			estimated_ends_with_task,
			
 
				-			&min_exp_end_with_task, &max_exp_end_with_task,
			
 
				+			&min_exp_end_of_task, &max_exp_end_of_workers,
			
 
				 			suitable_components, nsuitable_components);
			
 
				 
			
 
				 	/* Compute the energy, if provided*/
			
@@ -172,7 +172,7 @@ static int heteroprio_progress_accel(struct starpu_sched_component *component, s
 
				 			estimated_transfer_length,
			
 
				 			estimated_ends_with_task,
			
 
				                         local_energy,
			
 
				-			min_exp_end_with_task, max_exp_end_with_task,
			
 
				+			min_exp_end_of_task, max_exp_end_of_workers,
			
 
				 			suitable_components, nsuitable_components);
			
 
				 
			
 
				 	if (best_icomponent == -1)
			
@@ -247,10 +247,10 @@ static int heteroprio_progress_noaccel(struct starpu_sched_component *component,
 
				 	/* estimated energy */
			
 
				 	double local_energy[component->nchildren];
			
 
				 
			
 
				-	/* Minimum transfer+task termination on all children */
			
 
				-	double min_exp_end_with_task;
			
 
				-	/* Maximum transfer+task termination on all children */
			
 
				-	double max_exp_end_with_task;
			
 
				+	/* Minimum transfer+task termination of the task over all workers */
			
 
				+	double min_exp_end_of_task;
			
 
				+	/* Maximum termination of the already-scheduled tasks over all workers */
			
 
				+	double max_exp_end_of_workers;
			
 
				 
			
 
				 	unsigned suitable_components[component->nchildren];
			
 
				 	unsigned nsuitable_components;
			
@@ -275,7 +275,7 @@ static int heteroprio_progress_noaccel(struct starpu_sched_component *component,
 
				 			estimated_lengths,
			
 
				 			estimated_transfer_length,
			
 
				 			estimated_ends_with_task,
			
 
				-			&min_exp_end_with_task, &max_exp_end_with_task,
			
 
				+			&min_exp_end_of_task, &max_exp_end_of_workers,
			
 
				 			suitable_components, nsuitable_components);
			
 
				 
			
 
				 	/* Compute the energy, if provided*/
			
@@ -289,7 +289,7 @@ static int heteroprio_progress_noaccel(struct starpu_sched_component *component,
 
				 			estimated_transfer_length,
			
 
				 			estimated_ends_with_task,
			
 
				                         local_energy,
			
 
				-			min_exp_end_with_task, max_exp_end_with_task,
			
 
				+			min_exp_end_of_task, max_exp_end_of_workers,
			
 
				 			suitable_components, nsuitable_components);
			
 
				 
			
 
				 	/* If no best component is found, it means that the perfmodel of
			
--- a/src/sched_policies/component_mct.c
+++ b/src/sched_policies/component_mct.c
@@ -38,10 +38,10 @@ static int mct_push_task(struct starpu_sched_component * component, struct starp
 
				 	/* estimated energy */
			
 
				 	double local_energy[component->nchildren];
			
 
				 
			
 
				-	/* Minimum transfer+task termination on all children */
			
 
				-	double min_exp_end_with_task;
			
 
				-	/* Maximum transfer+task termination on all children */
			
 
				-	double max_exp_end_with_task;
			
 
				+	/* Minimum transfer+task termination of the task over all workers */
			
 
				+	double min_exp_end_of_task;
			
 
				+	/* Maximum termination of the already-scheduled tasks over all workers */
			
 
				+	double max_exp_end_of_workers;
			
 
				 
			
 
				 	unsigned suitable_components[component->nchildren];
			
 
				 	unsigned nsuitable_components;
			
@@ -62,13 +62,13 @@ static int mct_push_task(struct starpu_sched_component * component, struct starp
 
				 	STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex);
			
 
				 
			
 
				 	starpu_mct_compute_expected_times(component, task, estimated_lengths, estimated_transfer_length,
			
 
				-					  estimated_ends_with_task, &min_exp_end_with_task, &max_exp_end_with_task, suitable_components, nsuitable_components);
			
 
				+					  estimated_ends_with_task, &min_exp_end_of_task, &max_exp_end_of_workers, suitable_components, nsuitable_components);
			
 
				 
			
 
				 	/* Compute the energy, if provided*/
			
 
				 	starpu_mct_compute_energy(component, task, local_energy, suitable_components, nsuitable_components);
			
 
				 
			
 
				 	int best_icomponent = starpu_mct_get_best_component(d, task, estimated_lengths, estimated_transfer_length,
			
 
				-							    estimated_ends_with_task, local_energy, min_exp_end_with_task, max_exp_end_with_task, suitable_components, nsuitable_components);
			
 
				+							    estimated_ends_with_task, local_energy, min_exp_end_of_task, max_exp_end_of_workers, suitable_components, nsuitable_components);
			
 
				 
			
 
				 	/* If no best component is found, it means that the perfmodel of
			
 
				 	 * the task had been purged since it has been pushed on the mct component.
			
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -445,7 +445,7 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 
				 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
			
 
				 	int best = -1;
			
 
				 
			
 
				-	double best_exp_end = 0.0;
			
 
				+	double best_exp_end_of_task = 0.0;
			
 
				 	double model_best = 0.0;
			
 
				 	double transfer_model_best = 0.0;
			
 
				 
			
@@ -552,10 +552,10 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 
				 
			
 
				 			exp_end = exp_start + fifo->exp_len + local_length;
			
 
				 
			
 
				-			if (best == -1 || exp_end < best_exp_end)
			
 
				+			if (best == -1 || exp_end < best_exp_end_of_task)
			
 
				 			{
			
 
				 				/* a better solution was found */
			
 
				-				best_exp_end = exp_end;
			
 
				+				best_exp_end_of_task = exp_end;
			
 
				 				best = worker;
			
 
				 				model_best = local_length;
			
 
				 				transfer_model_best = local_penalty;
			
@@ -589,15 +589,15 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 						unsigned nworkers,
			
 
				 						double local_task_length[nworkers][STARPU_MAXIMPLEMENTATIONS],
			
 
				 						double exp_end[nworkers][STARPU_MAXIMPLEMENTATIONS],
			
 
				-						double *max_exp_endp,
			
 
				-						double *best_exp_endp,
			
 
				+						double *max_exp_endp_of_workers,
			
 
				+						double *min_exp_endp_of_task,
			
 
				 						double local_data_penalty[nworkers][STARPU_MAXIMPLEMENTATIONS],
			
 
				 						double local_energy[nworkers][STARPU_MAXIMPLEMENTATIONS],
			
 
				 						int *forced_worker, int *forced_impl, unsigned sched_ctx_id, unsigned sorted_decision)
			
 
				 {
			
 
				 	int calibrating = 0;
			
 
				-	double max_exp_end = DBL_MIN;
			
 
				-	double best_exp_end = DBL_MAX;
			
 
				+	double max_exp_end_of_workers = DBL_MIN;
			
 
				+	double best_exp_end_of_task = DBL_MAX;
			
 
				 	int ntasks_best = -1;
			
 
				 	int nimpl_best = 0;
			
 
				 	double ntasks_best_end = 0.0;
			
@@ -664,8 +664,8 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 			}
			
 
				 
			
 
				 			exp_end[worker_ctx][nimpl] = exp_start + prev_exp_len;
			
 
				-			if (exp_end[worker_ctx][nimpl] > max_exp_end)
			
 
				-				max_exp_end = exp_end[worker_ctx][nimpl];
			
 
				+			if (exp_end[worker_ctx][nimpl] > max_exp_end_of_workers)
			
 
				+				max_exp_end_of_workers = exp_end[worker_ctx][nimpl];
			
 
				 
			
 
				 			//_STARPU_DEBUG("Scheduler dmda: task length (%lf) workerid (%u) kernel (%u) \n", local_task_length[workerid][nimpl],workerid,nimpl);
			
 
				 
			
@@ -742,10 +742,10 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 
			
 
				 			exp_end[worker_ctx][nimpl] = task_starting_time + local_task_length[worker_ctx][nimpl];
			
 
				 
			
 
				-			if (exp_end[worker_ctx][nimpl] < best_exp_end)
			
 
				+			if (exp_end[worker_ctx][nimpl] < best_exp_end_of_task)
			
 
				 			{
			
 
				 				/* a better solution was found */
			
 
				-				best_exp_end = exp_end[worker_ctx][nimpl];
			
 
				+				best_exp_end_of_task = exp_end[worker_ctx][nimpl];
			
 
				 				nimpl_best = nimpl;
			
 
				 			}
			
 
				 
			
@@ -766,8 +766,8 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 	}
			
 
				 #endif
			
 
				 
			
 
				-	*best_exp_endp = best_exp_end;
			
 
				-	*max_exp_endp = max_exp_end;
			
 
				+	*min_exp_endp_of_task = best_exp_end_of_task;
			
 
				+	*max_exp_endp_of_workers = max_exp_end_of_workers;
			
 
				 }
			
 
				 
			
 
				 static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id, unsigned simulate, unsigned sorted_decision)
			
@@ -794,10 +794,10 @@ static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned
 
				 	double exp_end[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS];
			
 
				 
			
 
				 	/* This is the minimum among the exp_end[] matrix */
			
 
				-	double best_exp_end;
			
 
				+	double min_exp_end_of_task;
			
 
				 
			
 
				 	/* This is the maximum termination time of already-scheduled tasks over all workers */
			
 
				-	double max_exp_end = 0.0;
			
 
				+	double max_exp_end_of_workers = 0.0;
			
 
				 
			
 
				 	double fitness[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS];
			
 
				 
			
@@ -806,8 +806,8 @@ static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned
 
				 					    nworkers_ctx,
			
 
				 					    local_task_length,
			
 
				 					    exp_end,
			
 
				-					    &max_exp_end,
			
 
				-					    &best_exp_end,
			
 
				+					    &max_exp_end_of_workers,
			
 
				+					    &min_exp_end_of_task,
			
 
				 					    local_data_penalty,
			
 
				 					    local_energy,
			
 
				 					    &forced_best,
			
@@ -836,17 +836,17 @@ static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned
 
				 					/* no one on that queue may execute this task */
			
 
				 					continue;
			
 
				 				}
			
 
				-				fitness[worker_ctx][nimpl] = dt->alpha * __s_alpha__value *(exp_end[worker_ctx][nimpl] - best_exp_end)
			
 
				+				fitness[worker_ctx][nimpl] = dt->alpha * __s_alpha__value *(exp_end[worker_ctx][nimpl] - min_exp_end_of_task)
			
 
				 					+ dt->beta * __s_beta__value *(local_data_penalty[worker_ctx][nimpl])
			
 
				 					+ dt->_gamma * __s_gamma__value *(local_energy[worker_ctx][nimpl]);
			
 
				 
			
 
				-				if (exp_end[worker_ctx][nimpl] > max_exp_end)
			
 
				+				if (exp_end[worker_ctx][nimpl] > max_exp_end_of_workers)
			
 
				 				{
			
 
				 					/* This placement will make the computation
			
 
				 					 * longer, take into account the idle
			
 
				 					 * consumption of other cpus */
			
 
				-					fitness[worker_ctx][nimpl] += dt->_gamma * __s_gamma__value * dt->idle_power * __s_idle_power__value * (exp_end[worker_ctx][nimpl] - max_exp_end) / 1000000.0; /* Since gamma is the cost in us of one Joules, 
			
 
				-																									  then  d->idle_power * (exp_end - max_exp_end) 
			
 
				+					fitness[worker_ctx][nimpl] += dt->_gamma * __s_gamma__value * dt->idle_power * __s_idle_power__value * (exp_end[worker_ctx][nimpl] - max_exp_end_of_workers) / 1000000.0; /* Since gamma is the cost in us of one Joules, 
			
 
				+																									  then  d->idle_power * (exp_end - max_exp_end_of_workers) 
			
 
				 																									  must be in Joules, thus the / 1000000.0 */
			
 
				 				}
			
 
				 
			
@@ -858,7 +858,7 @@ static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned
 
				 					best_in_ctx = worker_ctx;
			
 
				 					selected_impl = nimpl;
			
 
				 
			
 
				-					//_STARPU_DEBUG("best fitness (worker %d) %e = alpha*(%e) + beta(%e) +gamma(%e)\n", worker, best_fitness, exp_end[worker][nimpl] - best_exp_end, local_data_penalty[worker][nimpl], local_energy[worker][nimpl]);
			
 
				+					//_STARPU_DEBUG("best fitness (worker %d) %e = alpha*(%e) + beta(%e) +gamma(%e)\n", worker, best_fitness, exp_end[worker][nimpl] - min_exp_end_of_task, local_data_penalty[worker][nimpl], local_energy[worker][nimpl]);
			
 
				 
			
 
				 				}
			
 
				 			}
			
--- a/src/sched_policies/helper_mct.c
+++ b/src/sched_policies/helper_mct.c
@@ -81,17 +81,17 @@ static double compute_expected_time(double now, double predicted_end, double pre
 
				 	return predicted_end;
			
 
				 }
			
 
				 
			
 
				-double starpu_mct_compute_fitness(struct _starpu_mct_data * d, double exp_end, double min_exp_end, double max_exp_end, double transfer_len, double local_energy)
			
 
				+double starpu_mct_compute_fitness(struct _starpu_mct_data * d, double exp_end, double min_exp_end_of_task, double max_exp_end_of_workers, double transfer_len, double local_energy)
			
 
				 {
			
 
				 	/* Note: the expected end includes the data transfer duration, which we want to be able to tune separately */
			
 
				 	
			
 
				-	/* min_exp_end is the minimum end time of the task over all workers */
			
 
				-	double fitness = d->alpha * (exp_end - min_exp_end) + d->beta * transfer_len + d->_gamma * local_energy;
			
 
				+	/* min_exp_end_of_task is the minimum end time of the task over all workers */
			
 
				+	double fitness = d->alpha * (exp_end - min_exp_end_of_task) + d->beta * transfer_len + d->_gamma * local_energy;
			
 
				 	
			
 
				 	/* max_exp_end is the maximum end time of the workers. If the total execution time is increased, then an 
			
 
				           additional energy penalty must be considered*/
			
 
				-	if(exp_end > max_exp_end)
			
 
				-		fitness += d->_gamma * d->idle_power * (exp_end - max_exp_end) / 1000000.0; /* Since gamma is the cost in us of one Joules, 
			
 
				+	if(exp_end > max_exp_end_of_workers)
			
 
				+		fitness += d->_gamma * d->idle_power * (exp_end - max_exp_end_of_workers) / 1000000.0; /* Since gamma is the cost in us of one Joules, 
			
 
				 											       then  d->idle_power * (exp_end - max_exp_end) 
			
 
				 											       must be in Joules, thus the / 1000000.0 */
			
 
				 
			
@@ -129,12 +129,12 @@ unsigned starpu_mct_compute_execution_times(struct starpu_sched_component *compo
 
				 
			
 
				 void starpu_mct_compute_expected_times(struct starpu_sched_component *component, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED,
			
 
				 		double *estimated_lengths, double *estimated_transfer_length, double *estimated_ends_with_task,
			
 
				-				       double *min_exp_end_with_task, double *max_exp_end_with_task, unsigned *suitable_components, unsigned nsuitable_components)
			
 
				+				       double *min_exp_end_of_task, double *max_exp_end_of_workers, unsigned *suitable_components, unsigned nsuitable_components)
			
 
				 {
			
 
				 	unsigned i;
			
 
				 	double now = starpu_timing_now();
			
 
				-	*min_exp_end_with_task = DBL_MAX;
			
 
				-	*max_exp_end_with_task = 0.0;
			
 
				+	*min_exp_end_of_task = DBL_MAX;
			
 
				+	*max_exp_end_of_workers = 0.0;
			
 
				 	for(i = 0; i < nsuitable_components; i++)
			
 
				 	{
			
 
				 		unsigned icomponent = suitable_components[i];
			
@@ -150,13 +150,13 @@ void starpu_mct_compute_expected_times(struct starpu_sched_component *component,
 
				 		
			
 
				 		/* estimated_ends_with_task[icomponent]: estimated end of execution on the worker icomponent
			
 
				 		   estimated_end: estimatated end of the worker
			
 
				-		   min_exp_end_with_task: minimum estimated execution time of the task over all workers
			
 
				-		   max_exp_end_with_task: maximum estimated end of the whole run (not just the task) all workers 
			
 
				+		   min_exp_end_of_task: minimum estimated execution time of the task over all workers
			
 
				+		   max_exp_end_of_workers: maximum estimated end of the already-scheduled tasks over all workers
			
 
				 		*/
			
 
				 		if(estimated_ends_with_task[icomponent] < *min_exp_end_with_task)
			
 
				-			*min_exp_end_with_task = estimated_ends_with_task[icomponent];
			
 
				+			*min_exp_end_of_task = estimated_ends_with_task[icomponent];
			
 
				 		if(estimated_end > *max_exp_end_with_task)
			
 
				-			*max_exp_end_with_task = estimated_end;
			
 
				+			*max_exp_end_of_workers = estimated_end;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -179,7 +179,7 @@ void starpu_mct_compute_energy(struct starpu_sched_component *component, struct
 
				 	}
			
 
				 }
			
 
				 
			
 
				-int starpu_mct_get_best_component(struct _starpu_mct_data *d, struct starpu_task *task, double *estimated_lengths, double *estimated_transfer_length, double *estimated_ends_with_task, double *local_energy, double min_exp_end_with_task, double max_exp_end_with_task, unsigned *suitable_components, unsigned nsuitable_components)
			
 
				+int starpu_mct_get_best_component(struct _starpu_mct_data *d, struct starpu_task *task, double *estimated_lengths, double *estimated_transfer_length, double *estimated_ends_with_task, double *local_energy, double min_exp_end_of_task, double max_exp_end_of_workers, unsigned *suitable_components, unsigned nsuitable_components)
			
 
				 {
			
 
				 	double best_fitness = DBL_MAX;
			
 
				 	int best_icomponent = -1;
			
@@ -190,8 +190,8 @@ int starpu_mct_get_best_component(struct _starpu_mct_data *d, struct starpu_task
 
				 		int icomponent = suitable_components[i];
			
 
				 		double tmp = starpu_mct_compute_fitness(d,
			
 
				 					     estimated_ends_with_task[icomponent],
			
 
				-					     min_exp_end_with_task,
			
 
				-					     max_exp_end_with_task,
			
 
				+					     min_exp_end_of_task,
			
 
				+					     max_exp_end_of_workers,
			
 
				 					     estimated_transfer_length[icomponent],
			
 
				 					     local_energy[icomponent]);
			
 
				 
			
--- a/src/sched_policies/helper_mct.h
+++ b/src/sched_policies/helper_mct.h
@@ -39,8 +39,8 @@ void starpu_mct_compute_expected_times(struct starpu_sched_component *component,
 
				 				       double *estimated_lengths,
			
 
				 				       double *estimated_transfer_length,
			
 
				 				       double *estimated_ends_with_task,
			
 
				-				       double *min_exp_end_with_task,
			
 
				-				       double *max_exp_end_with_task,
			
 
				+				       double *min_exp_end_of_task,
			
 
				+				       double *max_exp_end_of_workers,
			
 
				 				       unsigned *suitable_components,
			
 
				 				       unsigned nsuitable_components);
			
 
				 
			
@@ -57,8 +57,8 @@ int starpu_mct_get_best_component(struct _starpu_mct_data *d,
 
				 				  double *estimated_transfer_length,
			
 
				 				  double *estimated_ends_with_task,
			
 
				 				  double *local_energy,
			
 
				-				  double min_exp_end_with_task,
			
 
				-				  double max_exp_end_with_task,
			
 
				+				  double min_exp_end_of_task,
			
 
				+				  double max_exp_end_of_workers,
			
 
				 				  unsigned *suitable_components,
			
 
				 				  unsigned nsuitable_components);