|
@@ -7,10 +7,11 @@ extern struct hypervisor_policy simple_policy;
|
|
|
|
|
|
static void idle_time_cb(unsigned sched_ctx, int worker, double idle_time);
|
|
|
static void pushed_task_cb(unsigned sched_ctx, int worker);
|
|
|
-static void poped_task_cb(unsigned sched_ctx, int worker);
|
|
|
+static void poped_task_cb(unsigned sched_ctx, int worker, double flops);
|
|
|
static void post_exec_hook_cb(unsigned sched_ctx, int taskid);
|
|
|
static void reset_idle_time_cb(unsigned sched_ctx, int worker);
|
|
|
|
|
|
+static void _set_elapsed_flops_per_sched_ctx(unsigned sched_ctx, double val);
|
|
|
static void _load_hypervisor_policy(int type)
|
|
|
{
|
|
|
switch(type)
|
|
@@ -25,6 +26,7 @@ static void _load_hypervisor_policy(int type)
|
|
|
hypervisor.policy.update_config = simple_policy.update_config;
|
|
|
hypervisor.policy.resize = simple_policy.resize;
|
|
|
hypervisor.policy.manage_task_flux = simple_policy.manage_task_flux;
|
|
|
+ hypervisor.policy.manage_gflops_rate = simple_policy.manage_gflops_rate;
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
@@ -44,6 +46,13 @@ struct starpu_sched_ctx_hypervisor_criteria** sched_ctx_hypervisor_init(int type
|
|
|
hypervisor.sched_ctx_w[i].config = NULL;
|
|
|
hypervisor.sched_ctx_w[i].temp_npushed_tasks = 0;
|
|
|
hypervisor.sched_ctx_w[i].temp_npoped_tasks = 0;
|
|
|
+ hypervisor.sched_ctx_w[i].total_flops = 0.0;
|
|
|
+ hypervisor.sched_ctx_w[i].remaining_flops = 0.0;
|
|
|
+ hypervisor.sched_ctx_w[i].start_time = 0.0;
|
|
|
+ hypervisor.sched_ctx_w[i].bef_res_exp_end = 0.0;
|
|
|
+ hypervisor.sched_ctx_w[i].resize_ack.receiver_sched_ctx = -1;
|
|
|
+ hypervisor.sched_ctx_w[i].resize_ack.moved_workers = NULL;
|
|
|
+ hypervisor.sched_ctx_w[i].resize_ack.nmoved_workers = 0;
|
|
|
|
|
|
int j;
|
|
|
for(j = 0; j < STARPU_NMAXWORKERS; j++)
|
|
@@ -51,6 +60,8 @@ struct starpu_sched_ctx_hypervisor_criteria** sched_ctx_hypervisor_init(int type
|
|
|
hypervisor.sched_ctx_w[i].current_idle_time[j] = 0.0;
|
|
|
hypervisor.sched_ctx_w[i].pushed_tasks[j] = 0;
|
|
|
hypervisor.sched_ctx_w[i].poped_tasks[j] = 0;
|
|
|
+ hypervisor.sched_ctx_w[i].elapsed_flops[j] = 0.0;
|
|
|
+ hypervisor.sched_ctx_w[i].total_elapsed_flops[j] = 0.0;
|
|
|
|
|
|
}
|
|
|
}
|
|
@@ -102,7 +113,7 @@ void sched_ctx_hypervisor_shutdown(void)
|
|
|
pthread_mutex_destroy(&act_hypervisor_mutex);
|
|
|
}
|
|
|
|
|
|
-void sched_ctx_hypervisor_handle_ctx(unsigned sched_ctx)
|
|
|
+void sched_ctx_hypervisor_handle_ctx(unsigned sched_ctx, double total_flops)
|
|
|
{
|
|
|
hypervisor.configurations[sched_ctx] = (struct starpu_htbl32_node_s*)malloc(sizeof(struct starpu_htbl32_node_s));
|
|
|
hypervisor.steal_requests[sched_ctx] = (struct starpu_htbl32_node_s*)malloc(sizeof(struct starpu_htbl32_node_s));
|
|
@@ -111,6 +122,9 @@ void sched_ctx_hypervisor_handle_ctx(unsigned sched_ctx)
|
|
|
hypervisor.policy.add_sched_ctx(sched_ctx);
|
|
|
hypervisor.sched_ctx_w[sched_ctx].sched_ctx = sched_ctx;
|
|
|
hypervisor.sched_ctxs[hypervisor.nsched_ctxs++] = sched_ctx;
|
|
|
+
|
|
|
+ hypervisor.sched_ctx_w[sched_ctx].total_flops = total_flops;
|
|
|
+ hypervisor.sched_ctx_w[sched_ctx].remaining_flops = total_flops;
|
|
|
}
|
|
|
|
|
|
static int _get_first_free_sched_ctx(int *sched_ctxs, unsigned nsched_ctxs)
|
|
@@ -260,17 +274,58 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
|
|
|
printf(" %d", workers_to_move[j]);
|
|
|
printf("\n");
|
|
|
|
|
|
+ hypervisor.sched_ctx_w[sender_sched_ctx].bef_res_exp_end = sched_ctx_hypervisor_get_exp_end(sender_sched_ctx);
|
|
|
+
|
|
|
starpu_remove_workers_from_sched_ctx(workers_to_move, nworkers_to_move, sender_sched_ctx);
|
|
|
starpu_add_workers_to_sched_ctx(workers_to_move, nworkers_to_move, receiver_sched_ctx);
|
|
|
|
|
|
+ hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.receiver_sched_ctx = receiver_sched_ctx;
|
|
|
+ hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_move * sizeof(int));
|
|
|
+ hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.nmoved_workers = nworkers_to_move;
|
|
|
+
|
|
|
+
|
|
|
int i;
|
|
|
for(i = 0; i < nworkers_to_move; i++)
|
|
|
+ {
|
|
|
hypervisor.sched_ctx_w[sender_sched_ctx].current_idle_time[workers_to_move[i]] = 0.0;
|
|
|
+ hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers[i] = workers_to_move[i];
|
|
|
+ }
|
|
|
+
|
|
|
+ hypervisor.resize[sender_sched_ctx] = 0;
|
|
|
}
|
|
|
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
+unsigned _check_for_resize_ack(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int *moved_workers, int nmoved_workers)
|
|
|
+{
|
|
|
+ struct sched_ctx_wrapper *sender_sc_w = &hypervisor.sched_ctx_w[sender_sched_ctx];
|
|
|
+ struct sched_ctx_wrapper *receiver_sc_w = &hypervisor.sched_ctx_w[receiver_sched_ctx];
|
|
|
+ int i;
|
|
|
+ for(i = 0; i < nmoved_workers; i++)
|
|
|
+ {
|
|
|
+ int worker = moved_workers[i];
|
|
|
+ if(receiver_sc_w->elapsed_flops[worker] == 0.0f)
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ sender_sc_w->resize_ack.receiver_sched_ctx = -1;
|
|
|
+ sender_sc_w->resize_ack.nmoved_workers = 0;
|
|
|
+ free(sender_sc_w->resize_ack.moved_workers);
|
|
|
+
|
|
|
+ double start_time = starpu_timing_now();
|
|
|
+ sender_sc_w->start_time = start_time;
|
|
|
+ sender_sc_w->remaining_flops = sender_sc_w->remaining_flops - _get_elapsed_flops_per_sched_ctx(sender_sched_ctx);
|
|
|
+ _set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
|
|
|
+
|
|
|
+ receiver_sc_w->start_time = start_time;
|
|
|
+ receiver_sc_w->remaining_flops = receiver_sc_w->remaining_flops - _get_elapsed_flops_per_sched_ctx(receiver_sched_ctx);
|
|
|
+ _set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
|
|
|
+
|
|
|
+ hypervisor.resize[sender_sched_ctx] = 1;
|
|
|
+ return 1;
|
|
|
+}
|
|
|
+
|
|
|
unsigned sched_ctx_hypervisor_resize(unsigned sched_ctx, int task_tag)
|
|
|
{
|
|
|
if(task_tag == -1)
|
|
@@ -357,6 +412,16 @@ void sched_ctx_hypervisor_steal_workers(unsigned sched_ctx, int *workerids, int
|
|
|
return ;
|
|
|
}
|
|
|
|
|
|
+double sched_ctx_hypervisor_get_idle_time(unsigned sched_ctx, int worker)
|
|
|
+{
|
|
|
+ return hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker];
|
|
|
+}
|
|
|
+
|
|
|
+double sched_ctx_hypervisor_get_bef_res_exp_end(unsigned sched_ctx)
|
|
|
+{
|
|
|
+ return hypervisor.sched_ctx_w[sched_ctx].bef_res_exp_end;
|
|
|
+}
|
|
|
+
|
|
|
static void reset_idle_time_cb(unsigned sched_ctx, int worker)
|
|
|
{
|
|
|
if(hypervisor.resize[sched_ctx])
|
|
@@ -407,32 +472,85 @@ double sched_ctx_hypervisor_get_debit(unsigned sched_ctx)
|
|
|
return 0.0;
|
|
|
}
|
|
|
|
|
|
+static double _get_total_elapsed_flops_per_sched_ctx(unsigned sched_ctx)
|
|
|
+{
|
|
|
+ double ret_val = 0.0;
|
|
|
+ int i;
|
|
|
+ for(i = 0; i < STARPU_NMAXWORKERS; i++)
|
|
|
+ ret_val += hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[i];
|
|
|
+ return ret_val;
|
|
|
+}
|
|
|
+
|
|
|
+static double _get_elapsed_flops_per_sched_ctx(unsigned sched_ctx)
|
|
|
+{
|
|
|
+ double ret_val = 0.0;
|
|
|
+ int i;
|
|
|
+ for(i = 0; i < STARPU_NMAXWORKERS; i++)
|
|
|
+ ret_val += hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[i];
|
|
|
+ return ret_val;
|
|
|
+}
|
|
|
+
|
|
|
+static void _set_elapsed_flops_per_sched_ctx(unsigned sched_ctx, double val)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ for(i = 0; i < STARPU_NMAXWORKERS; i++)
|
|
|
+ hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[i] = val;
|
|
|
+}
|
|
|
+
|
|
|
+double sched_ctx_hypervisor_get_exp_end(unsigned sched_ctx)
|
|
|
+{
|
|
|
+ double elapsed_flops = _get_elapsed_flops_per_sched_ctx(sched_ctx);
|
|
|
+
|
|
|
+ if( elapsed_flops != 0.0)
|
|
|
+ {
|
|
|
+ double curr_time = starpu_timing_now();
|
|
|
+ double elapsed_time = curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time;
|
|
|
+ double exp_end = (elapsed_time * hypervisor.sched_ctx_w[sched_ctx].remaining_flops /
|
|
|
+ elapsed_flops) + curr_time;
|
|
|
+ return exp_end;
|
|
|
+ }
|
|
|
+ return -1.0;
|
|
|
+}
|
|
|
+
|
|
|
+double sched_ctx_hypervisor_get_flops_left_pct(unsigned sched_ctx)
|
|
|
+{
|
|
|
+ struct sched_ctx_wrapper *wrapper = &hypervisor.sched_ctx_w[sched_ctx];
|
|
|
+ double total_elapsed_flops = _get_total_elapsed_flops_per_sched_ctx(sched_ctx);
|
|
|
+ if(wrapper->total_flops == total_elapsed_flops || total_elapsed_flops > wrapper->total_flops)
|
|
|
+ return 0.0;
|
|
|
+
|
|
|
+ return (wrapper->total_flops - total_elapsed_flops)/wrapper->total_flops;
|
|
|
+}
|
|
|
+
|
|
|
static void pushed_task_cb(unsigned sched_ctx, int worker)
|
|
|
{
|
|
|
hypervisor.sched_ctx_w[sched_ctx].pushed_tasks[worker]++;
|
|
|
+ if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].start_time == 0.0)
|
|
|
+ hypervisor.sched_ctx_w[sched_ctx].start_time = starpu_timing_now();
|
|
|
|
|
|
int ntasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].pushed_tasks);
|
|
|
|
|
|
- if(!imposed_resize)
|
|
|
- hypervisor.resize[sched_ctx] = (ntasks > hypervisor.min_tasks);
|
|
|
+ if(!imposed_resize && ntasks == hypervisor.min_tasks)
|
|
|
+ hypervisor.resize[sched_ctx] = 1;
|
|
|
}
|
|
|
|
|
|
-static void poped_task_cb(unsigned sched_ctx, int worker)
|
|
|
+static void poped_task_cb(unsigned sched_ctx, int worker, double elapsed_flops)
|
|
|
{
|
|
|
hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++;
|
|
|
-
|
|
|
+ hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += elapsed_flops;
|
|
|
+ hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += elapsed_flops;
|
|
|
+
|
|
|
if(hypervisor.nsched_ctxs > 1)
|
|
|
{
|
|
|
- int npushed_tasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].pushed_tasks);
|
|
|
- int npoped_tasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].poped_tasks);
|
|
|
- int old_npushed_tasks = hypervisor.sched_ctx_w[sched_ctx].temp_npushed_tasks;
|
|
|
- int old_npoped_tasks = hypervisor.sched_ctx_w[sched_ctx].temp_npoped_tasks;
|
|
|
- if((old_npushed_tasks + old_npushed_tasks * 0.2) < npushed_tasks ||
|
|
|
- (old_npoped_tasks + old_npoped_tasks * 0.2) < npoped_tasks)
|
|
|
- hypervisor.policy.manage_task_flux(sched_ctx);
|
|
|
-
|
|
|
- hypervisor.sched_ctx_w[sched_ctx].temp_npushed_tasks = npushed_tasks;
|
|
|
- hypervisor.sched_ctx_w[sched_ctx].temp_npoped_tasks = npoped_tasks;
|
|
|
+ struct sched_ctx_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
|
|
|
+ if(hypervisor.resize[sched_ctx] || sched_ctx_hypervisor_get_flops_left_pct(sched_ctx) == 0.0f)
|
|
|
+ hypervisor.policy.manage_gflops_rate(sched_ctx);
|
|
|
+ else if(sc_w->resize_ack.receiver_sched_ctx != -1)
|
|
|
+ {
|
|
|
+ _check_for_resize_ack(sched_ctx, sc_w->resize_ack.receiver_sched_ctx,
|
|
|
+ sc_w->resize_ack.moved_workers, sc_w->resize_ack.nmoved_workers);
|
|
|
+ }
|
|
|
+
|
|
|
}
|
|
|
}
|
|
|
|