|
@@ -3,6 +3,7 @@
|
|
|
#include <starpu_scheduler.h>
|
|
|
#include <float.h>
|
|
|
|
|
|
+
|
|
|
struct _starpu_dmda_data
|
|
|
{
|
|
|
double alpha;
|
|
@@ -13,169 +14,100 @@ struct _starpu_dmda_data
|
|
|
|
|
|
|
|
|
|
|
|
-static void compute_all_things(struct starpu_task * task,
|
|
|
- struct _starpu_sched_node ** nodes, int nnodes,
|
|
|
- double * execution_lengths, int * best_impls,//impl used for best execution length, -1 if no execution possible
|
|
|
- double * transfer_lengths,
|
|
|
- double * finish_times,
|
|
|
- int * is_not_calibrated, enum starpu_perf_archtype * arch_not_calibrated, int * impl_not_calibrated,
|
|
|
- int * is_no_model)
|
|
|
-{
|
|
|
- *is_not_calibrated = 0;
|
|
|
- *is_no_model = 1;
|
|
|
- int i = 0;
|
|
|
- for(i = 0; i < nnodes; i++)
|
|
|
- {
|
|
|
- execution_lengths[i] = DBL_MAX;
|
|
|
- best_impls[i] = -1;
|
|
|
- int j;
|
|
|
- for(j = 0; j < STARPU_MAXIMPLEMENTATIONS; j++)
|
|
|
- {
|
|
|
- if(_starpu_sched_node_can_execute_task_with_impl(nodes[i], task, j))
|
|
|
- {
|
|
|
- enum starpu_perf_archtype archtype = starpu_worker_get_perf_archtype(nodes[i]->workerids[0]);
|
|
|
- double d = starpu_task_expected_length(task, archtype, j);
|
|
|
- if(isnan(d))
|
|
|
- {
|
|
|
- *is_not_calibrated = 1;
|
|
|
- *arch_not_calibrated = archtype;
|
|
|
- *impl_not_calibrated = j;
|
|
|
- }
|
|
|
- if(!_STARPU_IS_ZERO(d))//we have a perf model
|
|
|
- {
|
|
|
- *is_no_model = 0;
|
|
|
- if(d < execution_lengths[i])
|
|
|
- {
|
|
|
- execution_lengths[i] = d;
|
|
|
- best_impls[i] = j;
|
|
|
- }
|
|
|
- }
|
|
|
- else//we dont have a perf model for this implementation but we may have one for an other
|
|
|
- if(*is_no_model)
|
|
|
- best_impls[i] = j;
|
|
|
- unsigned memory_node = starpu_worker_get_memory_node(nodes[i]->workerids[0]);
|
|
|
- transfer_lengths[i] = starpu_task_expected_data_transfer_time(memory_node, task);
|
|
|
- finish_times[i] = nodes[i]->estimated_finish_time(nodes[i]);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-static double compute_total_finish_time(double exp_end, double exp_len, double exp_trans)
|
|
|
-{
|
|
|
- if(exp_trans < exp_end)
|
|
|
- return exp_end + exp_len;
|
|
|
- else
|
|
|
- return exp_end + exp_trans;
|
|
|
-}
|
|
|
-
|
|
|
-static double fitness(double alpha, double beta, double gamma,
|
|
|
- double execution_length, double transfer_length, double finish_time, double now)
|
|
|
+static double compute_fitness_calibration(struct _starpu_sched_node * child,
|
|
|
+ struct _starpu_dmda_data * data STARPU_ATTRIBUTE_UNUSED,
|
|
|
+ struct starpu_task * task STARPU_ATTRIBUTE_UNUSED,
|
|
|
+ struct _starpu_execute_pred *pred)
|
|
|
{
|
|
|
- (void) gamma;
|
|
|
- double total_execution_time = compute_total_finish_time(finish_time - now, execution_length, transfer_length);
|
|
|
- return alpha * total_execution_time + transfer_length * beta;
|
|
|
+ if(pred->state == CALIBRATING)
|
|
|
+ return child->estimated_load(child);
|
|
|
+ return DBL_MAX;
|
|
|
}
|
|
|
-
|
|
|
-static double fitness_no_model(double alpha, double beta, double transfer_length, double finish_time, double now)
|
|
|
+static double compute_fitness_no_perf_model(struct _starpu_sched_node * child,
|
|
|
+ struct _starpu_dmda_data * data STARPU_ATTRIBUTE_UNUSED,
|
|
|
+ struct starpu_task * task STARPU_ATTRIBUTE_UNUSED,
|
|
|
+ struct _starpu_execute_pred *pred)
|
|
|
{
|
|
|
- (void) gamma;
|
|
|
- double exp_end = finish_time - now;
|
|
|
- return alpha * exp_end + beta * transfer_length;
|
|
|
+ if(pred->state == CANNOT_EXECUTE)
|
|
|
+ return DBL_MAX;
|
|
|
+ return child->estimated_load(child);
|
|
|
}
|
|
|
|
|
|
-static double estimated_transfert_time(struct _starpu_sched_node * node, struct starpu_task * task)
|
|
|
+static double compute_fitness_perf_model(struct _starpu_sched_node * child,
|
|
|
+ struct _starpu_dmda_data * data,
|
|
|
+ struct starpu_task * task,
|
|
|
+ struct _starpu_execute_pred * pred)
|
|
|
{
|
|
|
- STARPU_ASSERT(node->nworkers);
|
|
|
- unsigned memory_node = starpu_worker_get_memory_node(node->workerids[0]);
|
|
|
- return starpu_task_expected_data_transfer_time(memory_node, task);
|
|
|
+ if(pred->state == CANNOT_EXECUTE)
|
|
|
+ return DBL_MAX;
|
|
|
+ return data->alpha * pred->expected_length
|
|
|
+ + data->beta * child->estimated_transfer_length(child, task);
|
|
|
}
|
|
|
|
|
|
static int push_task(struct _starpu_sched_node * node, struct starpu_task * task)
|
|
|
{
|
|
|
- struct _starpu_dmda_data * dt = node->data;
|
|
|
- double execution_lengths[node->nchilds];
|
|
|
- double finish_times[node->nchilds];
|
|
|
- double transfer_lengths[node->nchilds];
|
|
|
- int best_impls[node->nchilds];// -1 mean cant execute
|
|
|
- //double power_consumptions[node->nchilds];
|
|
|
+ STARPU_PTHREAD_MUTEX_LOCK(&node->mutex);
|
|
|
+ struct _starpu_execute_pred preds[node->nchilds];
|
|
|
int i;
|
|
|
-
|
|
|
- int is_not_calibrated;
|
|
|
- enum starpu_perf_archtype arch_not_calibrated;
|
|
|
- int impl_not_calibrated;
|
|
|
-
|
|
|
- int is_no_model;
|
|
|
-
|
|
|
- compute_all_things(task,
|
|
|
- node->childs, node->nchilds,
|
|
|
- execution_lengths, best_impls,
|
|
|
- transfer_lengths,
|
|
|
- finish_times,
|
|
|
- &is_not_calibrated, &arch_not_calibrated, &impl_not_calibrated,
|
|
|
- &is_no_model);
|
|
|
-
|
|
|
- double max_fitness = DBL_MAX;
|
|
|
- int index_max = -1;
|
|
|
- double now = starpu_timing_now();
|
|
|
- if(is_not_calibrated)
|
|
|
+ int calibrating = 0;
|
|
|
+ int perf_model = 0;
|
|
|
+ int can_execute = 0;
|
|
|
+ for(i = 0; i < node->nchilds; i++)
|
|
|
{
|
|
|
- for(i = 0; i < node->nchilds; i++)
|
|
|
+ preds[i] = node->childs[i]->estimated_execute_length(node->childs[i], task);
|
|
|
+ switch(preds[i].state)
|
|
|
{
|
|
|
- if(best_impls[i] == -1)
|
|
|
- continue;
|
|
|
- enum starpu_perf_archtype archtype = starpu_worker_get_perf_archtype(node->childs[i]->workerids[0]);
|
|
|
- if(archtype != arch_not_calibrated)
|
|
|
- continue;
|
|
|
- double f = fitness_no_model(dt->alpha, dt->beta, transfer_lengths[i], finish_times[i], now);
|
|
|
- if(f < max_fitness)
|
|
|
- {
|
|
|
- max_fitness = f;
|
|
|
- index_max = i;
|
|
|
- }
|
|
|
+ case PERF_MODEL:
|
|
|
+ perf_model = 1;
|
|
|
+ can_execute = 1;
|
|
|
+ break;
|
|
|
+ case CALIBRATING:
|
|
|
+ calibrating = 1;
|
|
|
+ can_execute = 1;
|
|
|
+ break;
|
|
|
+ case NO_PERF_MODEL:
|
|
|
+ can_execute = 1;
|
|
|
+ case CANNOT_EXECUTE:
|
|
|
+ break;
|
|
|
}
|
|
|
}
|
|
|
- else if(is_no_model)
|
|
|
+ if(!can_execute)
|
|
|
{
|
|
|
- for(i = 0; i < node->nchilds; i++)
|
|
|
- {
|
|
|
- if(best_impls[i] == -1)
|
|
|
- continue;
|
|
|
- double f = fitness_no_model(dt->alpha, dt->beta, transfer_lengths[i], finish_times[i], now);
|
|
|
- if(f < max_fitness)
|
|
|
- {
|
|
|
- max_fitness = f;
|
|
|
- index_max = i;
|
|
|
- }
|
|
|
- }
|
|
|
+ STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex);
|
|
|
+ return -ENODEV;
|
|
|
}
|
|
|
- else
|
|
|
+ double (*fitness_fun)(struct _starpu_sched_node *,
|
|
|
+ struct _starpu_dmda_data *,
|
|
|
+ struct starpu_task *,
|
|
|
+ struct _starpu_execute_pred*) = compute_fitness_no_perf_model;
|
|
|
+ if(perf_model)
|
|
|
+ fitness_fun = compute_fitness_perf_model;
|
|
|
+ if(calibrating)
|
|
|
+ fitness_fun = compute_fitness_calibration;
|
|
|
+ double best_fitness = DBL_MAX;
|
|
|
+ int index_best_fitness;
|
|
|
+ for(i = 0; i < node->nchilds; i++)
|
|
|
{
|
|
|
- for(i = 0; i < node->nchilds; i++)
|
|
|
+ double tmp = fitness_fun(node->childs[i],
|
|
|
+ node->data,
|
|
|
+ task,
|
|
|
+ preds + i);
|
|
|
+ if(tmp < best_fitness)
|
|
|
{
|
|
|
- if(best_impls[i] == -1)
|
|
|
- continue;
|
|
|
- double f = fitness(dt->alpha, dt->beta, dt->gamma,
|
|
|
- execution_lengths[i], transfer_lengths[i] , finish_times[i], now);
|
|
|
-
|
|
|
- if(f < max_fitness)
|
|
|
- {
|
|
|
- max_fitness = f;
|
|
|
- index_max = i;
|
|
|
- }
|
|
|
+ best_fitness = tmp;
|
|
|
+ index_best_fitness = i;
|
|
|
}
|
|
|
}
|
|
|
+ struct _starpu_sched_node * c = node->childs[index_best_fitness];
|
|
|
|
|
|
- STARPU_ASSERT(index_max != -1);
|
|
|
- task->predicted = execution_lengths[index_max];
|
|
|
- task->predicted_transfer = transfer_lengths[index_max];
|
|
|
- starpu_task_set_implementation(task, best_impls[index_max]);
|
|
|
- struct _starpu_sched_node * child = node->childs[index_max];
|
|
|
- return child->push_task(child, task);
|
|
|
+ starpu_task_set_implementation(task, preds[index_best_fitness].impl);
|
|
|
+ task->predicted = preds[index_best_fitness].expected_length;
|
|
|
+ task->predicted_transfer = c->estimated_transfer_length(c,task);
|
|
|
+ STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex);
|
|
|
+ return c->push_task(c, task);
|
|
|
}
|
|
|
|
|
|
|
|
|
-
|
|
|
struct _starpu_sched_node * _starpu_sched_node_heft_create(double alpha, double beta, double gamma, double idle_power)
|
|
|
{
|
|
|
struct _starpu_sched_node * node = _starpu_sched_node_create();
|
|
@@ -195,3 +127,4 @@ struct _starpu_sched_node * _starpu_sched_node_heft_create(double alpha, double
|
|
|
}
|
|
|
|
|
|
|
|
|
+
|