|
@@ -14,28 +14,70 @@
|
|
|
* See the GNU Lesser General Public License in COPYING.LGPL for more details.
|
|
|
*/
|
|
|
|
|
|
-#include "policy_tools.h"
|
|
|
+#include "lp_tools.h"
|
|
|
#include <math.h>
|
|
|
|
|
|
-static struct bound_task_pool *task_pools, *last;
|
|
|
+static struct bound_task_pool *task_pools = NULL;
|
|
|
|
|
|
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
|
|
|
|
|
-static void lp2_handle_submitted_job(struct starpu_task *task, unsigned footprint)
|
|
|
+static void _size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
|
|
|
{
|
|
|
- pthread_mutex_lock(&mutex);
|
|
|
- struct bound_task_pool *tp;
|
|
|
+ int ns = sched_ctxs == NULL ? sched_ctx_hypervisor_get_nsched_ctxs() : nsched_ctxs;
|
|
|
+ int nw = workers == NULL ? starpu_worker_get_count() : nworkers; /* Number of different workers */
|
|
|
+ int nt = 0; /* Number of different kinds of tasks */
|
|
|
+ struct bound_task_pool * tp;
|
|
|
+ for (tp = task_pools; tp; tp = tp->next)
|
|
|
+ nt++;
|
|
|
|
|
|
- if (last && last->cl == task->cl && last->footprint == footprint && last->sched_ctx_id == task->sched_ctx)
|
|
|
- tp = last;
|
|
|
- else
|
|
|
- for (tp = task_pools; tp; tp = tp->next)
|
|
|
- if (tp->cl == task->cl && tp->footprint == footprint && tp->sched_ctx_id == task->sched_ctx)
|
|
|
- break;
|
|
|
+ double w_in_s[ns][nw];
|
|
|
|
|
|
+ unsigned found_sol = _compute_task_distribution_over_ctxs(ns, nw, nt, w_in_s, sched_ctxs, workers);
|
|
|
+ /* if we did find at least one solution redistribute the resources */
|
|
|
+ if(found_sol)
|
|
|
+ _redistribute_resources_in_ctxs(ns, nw, nt, w_in_s, 1, sched_ctxs, workers);
|
|
|
+}
|
|
|
+
|
|
|
+static void size_if_required()
|
|
|
+{
|
|
|
+ int nsched_ctxs, nworkers;
|
|
|
+ int *sched_ctxs, *workers;
|
|
|
+ unsigned has_req = sched_ctx_hypervisor_get_size_req(&sched_ctxs, &nsched_ctxs, &workers, &nworkers);
|
|
|
+
|
|
|
+ if(has_req)
|
|
|
+ {
|
|
|
+ struct sched_ctx_wrapper* sc_w = NULL;
|
|
|
+ unsigned ready_to_size = 1;
|
|
|
+ int s;
|
|
|
+ pthread_mutex_lock(&act_hypervisor_mutex);
|
|
|
+ for(s = 0; s < nsched_ctxs; s++)
|
|
|
+ {
|
|
|
+ sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]);
|
|
|
+ if(sc_w->submitted_flops < sc_w->total_flops)
|
|
|
+ ready_to_size = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ if(ready_to_size)
|
|
|
+ _size_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers);
|
|
|
+ pthread_mutex_unlock(&act_hypervisor_mutex);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void lp2_handle_submitted_job(struct starpu_task *task, uint32_t footprint)
|
|
|
+{
|
|
|
+ /* count the tasks of the same type */
|
|
|
+ pthread_mutex_lock(&mutex);
|
|
|
+ struct bound_task_pool *tp = NULL;
|
|
|
+
|
|
|
+ for (tp = task_pools; tp; tp = tp->next)
|
|
|
+ {
|
|
|
+ if (tp->cl == task->cl && tp->footprint == footprint && tp->sched_ctx_id == task->sched_ctx)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
if (!tp)
|
|
|
{
|
|
|
- tp = (struct bound_task_pool *) malloc(sizeof(*tp));
|
|
|
+ tp = (struct bound_task_pool *) malloc(sizeof(struct bound_task_pool));
|
|
|
tp->cl = task->cl;
|
|
|
tp->footprint = footprint;
|
|
|
tp->sched_ctx_id = task->sched_ctx;
|
|
@@ -43,13 +85,15 @@ static void lp2_handle_submitted_job(struct starpu_task *task, unsigned footprin
|
|
|
tp->next = task_pools;
|
|
|
task_pools = tp;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
/* One more task of this kind */
|
|
|
tp->n++;
|
|
|
pthread_mutex_unlock(&mutex);
|
|
|
+
|
|
|
+ size_if_required();
|
|
|
}
|
|
|
|
|
|
-static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt])
|
|
|
+static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers)
|
|
|
{
|
|
|
struct bound_task_pool *tp;
|
|
|
int w, t;
|
|
@@ -57,30 +101,16 @@ static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt])
|
|
|
{
|
|
|
for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
|
{
|
|
|
- enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
|
|
|
+ enum starpu_perf_archtype arch = workers == NULL ? starpu_worker_get_perf_archtype(w) :
|
|
|
+ starpu_worker_get_perf_archtype(workers[w]);
|
|
|
double length = starpu_history_based_job_expected_perf(tp->cl->model, arch, tp->footprint);
|
|
|
|
|
|
if (isnan(length))
|
|
|
times[w][t] = NAN;
|
|
|
else
|
|
|
- times[w][t] = length / 1000.;
|
|
|
-
|
|
|
-// printf("t%d on worker %d ctx %d: %lf \n", t, w, tp->sched_ctx_id, times[w][t]);
|
|
|
+ times[w][t] = length / 1000.;
|
|
|
}
|
|
|
-// printf("\n");
|
|
|
}
|
|
|
-// printf("\n");
|
|
|
-}
|
|
|
-
|
|
|
-int _get_idx_sched_ctx(int sched_ctx_id)
|
|
|
-{
|
|
|
- int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
|
|
|
- int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
|
|
|
- int i;
|
|
|
- for(i = 0; i < nsched_ctxs; i++)
|
|
|
- if(sched_ctxs[i] == sched_ctx_id)
|
|
|
- return i;
|
|
|
- return -1;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -88,7 +118,7 @@ int _get_idx_sched_ctx(int sched_ctx_id)
|
|
|
*/
|
|
|
#ifdef HAVE_GLPK_H
|
|
|
#include <glpk.h>
|
|
|
-static void _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt])
|
|
|
+static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers)
|
|
|
{
|
|
|
struct bound_task_pool * tp;
|
|
|
int t, w, s;
|
|
@@ -96,26 +126,27 @@ static void _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt])
|
|
|
|
|
|
lp = glp_create_prob();
|
|
|
glp_set_prob_name(lp, "StarPU theoretical bound");
|
|
|
- glp_set_obj_dir(lp, GLP_MIN);
|
|
|
+ glp_set_obj_dir(lp, GLP_MAX);
|
|
|
glp_set_obj_name(lp, "total execution time");
|
|
|
|
|
|
{
|
|
|
double times[nw][nt];
|
|
|
- int ne =
|
|
|
- nw * (nt+1) /* worker execution time */
|
|
|
- + nt * nw
|
|
|
- + nw * (nt+ns)
|
|
|
+ int ne = nt * nw /* worker execution time */
|
|
|
+ + nw * ns
|
|
|
+ + nw * (nt + ns)
|
|
|
+ 1; /* glp dumbness */
|
|
|
int n = 1;
|
|
|
int ia[ne], ja[ne];
|
|
|
double ar[ne];
|
|
|
|
|
|
- _starpu_get_tasks_times(nw, nt, times);
|
|
|
+ _starpu_get_tasks_times(nw, nt, times, workers);
|
|
|
|
|
|
/* Variables: number of tasks i assigned to worker j, and tmax */
|
|
|
- glp_add_cols(lp, nw*nt+1);
|
|
|
+ glp_add_cols(lp, nw*nt+ns*nw);
|
|
|
#define colnum(w, t) ((t)*nw+(w)+1)
|
|
|
- glp_set_obj_coef(lp, nw*nt+1, 1.);
|
|
|
+ for(s = 0; s < ns; s++)
|
|
|
+ for(w = 0; w < nw; w++)
|
|
|
+ glp_set_obj_coef(lp, nw*nt+s*nw+w+1, 1.);
|
|
|
|
|
|
for (w = 0; w < nw; w++)
|
|
|
for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
@@ -125,43 +156,18 @@ static void _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt])
|
|
|
glp_set_col_name(lp, colnum(w, t), name);
|
|
|
glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
|
|
|
}
|
|
|
- glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.);
|
|
|
-
|
|
|
- int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
|
|
|
-
|
|
|
- /* ntasks_per_worker*t_tasks < tmax */
|
|
|
- glp_add_rows(lp, nw*ns);
|
|
|
for(s = 0; s < ns; s++)
|
|
|
- {
|
|
|
- for (w = 0; w < nw; w++)
|
|
|
+ for(w = 0; w < nw; w++)
|
|
|
{
|
|
|
- char name[32], title[64];
|
|
|
- starpu_worker_get_name(w, name, sizeof(name));
|
|
|
- snprintf(title, sizeof(title), "worker %x ctx %x limit", w, s);
|
|
|
- glp_set_row_name(lp, w+(s*nw)+1, title);
|
|
|
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
|
- {
|
|
|
- if(tp->sched_ctx_id == sched_ctxs[s])
|
|
|
- {
|
|
|
- ia[n] = w+(s*nw)+1;
|
|
|
- ja[n] = colnum(w, t);
|
|
|
- ar[n] = times[w][t];
|
|
|
-
|
|
|
- n++;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /* tmax */
|
|
|
- ia[n] = w+(s*nw)+1;
|
|
|
- ja[n] = nw*nt+1;
|
|
|
- ar[n] = -1;
|
|
|
- n++;
|
|
|
-
|
|
|
- glp_set_row_bnds(lp, w+(s*nw)+1, GLP_UP, 0.0, 0.0);
|
|
|
+ char name[32];
|
|
|
+ snprintf(name, sizeof(name), "w%ds%dn", w, s);
|
|
|
+ glp_set_col_name(lp, nw*nt+s*nw+w+1, name);
|
|
|
+ glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0.0, 1.0);
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- int curr_row_idx = nw*ns;
|
|
|
+ int *sched_ctxs = in_sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : in_sched_ctxs;
|
|
|
+
|
|
|
+ int curr_row_idx = 0;
|
|
|
/* Total worker execution time */
|
|
|
glp_add_rows(lp, nw*ns);
|
|
|
for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
@@ -174,37 +180,42 @@ static void _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt])
|
|
|
{
|
|
|
/* This task does not have any performance model at all, abort */
|
|
|
glp_delete_prob(lp);
|
|
|
- return NULL;
|
|
|
+ return 0.0;
|
|
|
}
|
|
|
}
|
|
|
- for (w = 0; w < nw; w++)
|
|
|
+ /*sum(t[t][w]*n[t][w]) < x[s][w]*tmax */
|
|
|
+ for(s = 0; s < ns; s++)
|
|
|
{
|
|
|
-
|
|
|
- char name[32], title[64];
|
|
|
- starpu_worker_get_name(w, name, sizeof(name));
|
|
|
- snprintf(title, sizeof(title), "worker %s", name);
|
|
|
- glp_set_row_name(lp, curr_row_idx+w+1, title);
|
|
|
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
|
+ for (w = 0; w < nw; w++)
|
|
|
{
|
|
|
- ia[n] = curr_row_idx+w+1;
|
|
|
- ja[n] = colnum(w, t);
|
|
|
- if (isnan(times[w][t]))
|
|
|
- ar[n] = 1000000000.;
|
|
|
- else
|
|
|
- ar[n] = times[w][t];
|
|
|
- if(starpu_worker_belongs_to_sched_ctx(w, tp->sched_ctx_id))
|
|
|
- ar[n] = 100000;
|
|
|
-
|
|
|
+ char name[32], title[64];
|
|
|
+ starpu_worker_get_name(w, name, sizeof(name));
|
|
|
+ snprintf(title, sizeof(title), "worker %s", name);
|
|
|
+ glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title);
|
|
|
+ for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
|
+ {
|
|
|
+ if(tp->sched_ctx_id == sched_ctxs[s])
|
|
|
+ {
|
|
|
+ ia[n] = curr_row_idx+s*nw+w+1;
|
|
|
+ ja[n] = colnum(w, t);
|
|
|
+ if (isnan(times[w][t]))
|
|
|
+ ar[n] = 1000000000.;
|
|
|
+ else
|
|
|
+ ar[n] = times[w][t];
|
|
|
+ n++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ /* x[s][w] = 1 | 0 */
|
|
|
+ ia[n] = curr_row_idx+s*nw+w+1;
|
|
|
+ ja[n] = nw*nt+s*nw+w+1;
|
|
|
+ ar[n] = (-1) * tmax;
|
|
|
n++;
|
|
|
+ glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
|
|
|
}
|
|
|
- /* tmax */
|
|
|
- ia[n] = curr_row_idx+w+1;
|
|
|
- ja[n] = nw*nt+1;
|
|
|
- ar[n] = -1;
|
|
|
- n++;
|
|
|
- glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_UP, 0, 0);
|
|
|
}
|
|
|
|
|
|
+ curr_row_idx += nw*ns;
|
|
|
+
|
|
|
/* Total task completion */
|
|
|
glp_add_rows(lp, nt);
|
|
|
for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
@@ -212,19 +223,39 @@ static void _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt])
|
|
|
char name[32], title[64];
|
|
|
starpu_worker_get_name(w, name, sizeof(name));
|
|
|
snprintf(title, sizeof(title), "task %s key %x", tp->cl->name, (unsigned) tp->footprint);
|
|
|
- glp_set_row_name(lp, curr_row_idx+nw+t+1, title);
|
|
|
+ glp_set_row_name(lp, curr_row_idx+t+1, title);
|
|
|
for (w = 0; w < nw; w++)
|
|
|
{
|
|
|
- ia[n] = curr_row_idx+nw+t+1;
|
|
|
+ ia[n] = curr_row_idx+t+1;
|
|
|
ja[n] = colnum(w, t);
|
|
|
ar[n] = 1;
|
|
|
n++;
|
|
|
}
|
|
|
- glp_set_row_bnds(lp, curr_row_idx+nw+t+1, GLP_FX, tp->n, tp->n);
|
|
|
+ glp_set_row_bnds(lp, curr_row_idx+t+1, GLP_FX, tp->n, tp->n);
|
|
|
}
|
|
|
|
|
|
+ curr_row_idx += nt;
|
|
|
+
|
|
|
+ /* sum(x[s][i]) = 1 */
|
|
|
+ glp_add_rows(lp, nw);
|
|
|
+ for (w = 0; w < nw; w++)
|
|
|
+ {
|
|
|
+ char name[32], title[64];
|
|
|
+ starpu_worker_get_name(w, name, sizeof(name));
|
|
|
+ snprintf(title, sizeof(title), "w%x", w);
|
|
|
+ glp_set_row_name(lp, curr_row_idx+w+1, title);
|
|
|
+ for(s = 0; s < ns; s++)
|
|
|
+ {
|
|
|
+ ia[n] = curr_row_idx+w+1;
|
|
|
+ ja[n] = nw*nt+s*nw+w+1;
|
|
|
+ ar[n] = 1;
|
|
|
+ n++;
|
|
|
+ }
|
|
|
|
|
|
-// printf("n = %d nw*ns = %d ne = %d\n", n, nw*ns, ne);
|
|
|
+ glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
|
|
|
+ }
|
|
|
+ if(n != ne)
|
|
|
+ printf("ns= %d nw = %d nt = %d n = %d ne = %d\n", ns, nw, nt, n, ne);
|
|
|
STARPU_ASSERT(n == ne);
|
|
|
|
|
|
glp_load_matrix(lp, ne-1, ia, ja, ar);
|
|
@@ -238,207 +269,235 @@ static void _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt])
|
|
|
{
|
|
|
glp_delete_prob(lp);
|
|
|
lp = NULL;
|
|
|
- return NULL;
|
|
|
+ return 0.0;
|
|
|
}
|
|
|
|
|
|
- double tmax = glp_get_obj_val(lp);
|
|
|
+ int stat = glp_get_prim_stat(lp);
|
|
|
+ /* if we don't have a solution return */
|
|
|
+ if(stat == GLP_NOFEAS)
|
|
|
+ {
|
|
|
+ glp_delete_prob(lp);
|
|
|
+ lp = NULL;
|
|
|
+ return 0.0;
|
|
|
+ }
|
|
|
|
|
|
-// printf("Theoretical minimum execution time: %f ms\n", tmax);
|
|
|
+ double res = glp_get_obj_val(lp);
|
|
|
for (w = 0; w < nw; w++)
|
|
|
- {
|
|
|
for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
|
- {
|
|
|
tasks[w][t] = glp_get_col_prim(lp, colnum(w, t));
|
|
|
-// printf("t%d worker %d ctx %d res %lf \n", t, w, tasks[w][t]);
|
|
|
- }
|
|
|
- }
|
|
|
+
|
|
|
+ for(s = 0; s < ns; s++)
|
|
|
+ for(w = 0; w < nw; w++)
|
|
|
+ w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
|
|
|
|
|
|
glp_delete_prob(lp);
|
|
|
+ return res;
|
|
|
}
|
|
|
|
|
|
-int _get_worker_having_tasks_of_this_ctx(int worker, int nw, int nt, double tasks[nw][nt], int sched_ctx)
|
|
|
-{
|
|
|
- int t, w;
|
|
|
- struct bound_task_pool * tp;
|
|
|
- for(w = 0; w < nw; w++)
|
|
|
- {
|
|
|
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
|
- if(w != worker && tasks[w][t] >= 1.0 && tp->sched_ctx_id == sched_ctx)
|
|
|
- return w;
|
|
|
- }
|
|
|
- return -1;
|
|
|
-}
|
|
|
-int _get_worker_full_of_tasks_of_this_ctx(int worker, int nw, int nt, double tasks[nw][nt], int sched_ctx)
|
|
|
+static void _redistribute_resources_in_ctxs(int ns, int nw, int nt, double w_in_s[ns][nw], unsigned first_time, int *in_sched_ctxs, int *workers)
|
|
|
{
|
|
|
- int t, w;
|
|
|
- struct bound_task_pool * tp;
|
|
|
- for(w = 0; w < nw; w++)
|
|
|
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
|
- if(w != worker && tasks[w][t] > 0.3 * tp->n && tp->sched_ctx_id == sched_ctx)
|
|
|
- return w;
|
|
|
- return -1;
|
|
|
-}
|
|
|
+ int *sched_ctxs = in_sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : in_sched_ctxs;
|
|
|
+ struct bound_task_pool * tp;
|
|
|
+ int s, s2, w, t;
|
|
|
|
|
|
-void _get_tasks_from_busiest_worker(int nw, int nt, double tasks[nw][nt], int worker)
|
|
|
-{
|
|
|
- int w, t;
|
|
|
- double tasks_per_worker[nw];
|
|
|
- double max_tasks = 0.0;
|
|
|
- int busiest_worker = -1;
|
|
|
- printf("got inside \n");
|
|
|
- for(w = 0; w < nw; w++)
|
|
|
+ for(s = 0; s < ns; s++)
|
|
|
{
|
|
|
- if(w != worker)
|
|
|
+ int workers_to_add[nw], workers_to_remove[nw];
|
|
|
+ int destination_ctx[nw][ns];
|
|
|
+
|
|
|
+ for(w = 0; w < nw; w++)
|
|
|
+ {
|
|
|
+ workers_to_add[w] = -1;
|
|
|
+ workers_to_remove[w] = -1;
|
|
|
+ for(s2 = 0; s2 < ns; s2++)
|
|
|
+ destination_ctx[w][s2] = -1;
|
|
|
+ }
|
|
|
+
|
|
|
+ int nadd = 0, nremove = 0;
|
|
|
+
|
|
|
+ for(w = 0; w < nw; w++)
|
|
|
{
|
|
|
- tasks_per_worker[w] = 0.0;
|
|
|
- for(t = 0; t < nt; t++)
|
|
|
+ enum starpu_perf_archtype arch = workers == NULL ? starpu_worker_get_type(w) :
|
|
|
+ starpu_worker_get_type(workers[w]);
|
|
|
+
|
|
|
+ if(arch == STARPU_CPU_WORKER)
|
|
|
{
|
|
|
- tasks_per_worker[w] += tasks[w][t];
|
|
|
+ if(w_in_s[s][w] >= 0.5)
|
|
|
+ {
|
|
|
+ workers_to_add[nadd++] = workers == NULL ? w : workers[w];
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ workers_to_remove[nremove++] = workers == NULL ? w : workers[w];
|
|
|
+ for(s2 = 0; s2 < ns; s2++)
|
|
|
+ if(s2 != s && w_in_s[s2][w] >= 0.5)
|
|
|
+ destination_ctx[w][s2] = 1;
|
|
|
+ else
|
|
|
+ destination_ctx[w][s2] = 0;
|
|
|
+ }
|
|
|
}
|
|
|
- if(max_tasks < tasks_per_worker[w])
|
|
|
+ else
|
|
|
{
|
|
|
- max_tasks = tasks_per_worker[w];
|
|
|
- busiest_worker = w;
|
|
|
+ if(w_in_s[s][w] >= 0.3)
|
|
|
+ {
|
|
|
+ workers_to_add[nadd++] = workers == NULL ? w : workers[w];
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ workers_to_remove[nremove++] = workers == NULL ? w : workers[w];
|
|
|
+ for(s2 = 0; s2 < ns; s2++)
|
|
|
+ if(s2 != s && w_in_s[s2][w] >= 0.3)
|
|
|
+ destination_ctx[w][s2] = 1;
|
|
|
+ else
|
|
|
+ destination_ctx[w][s2] = 0;
|
|
|
+ }
|
|
|
}
|
|
|
+
|
|
|
}
|
|
|
- }
|
|
|
- for(t = 0; t < nt; t++)
|
|
|
- {
|
|
|
- if(tasks_per_worker[busiest_worker] > (max_tasks / 2))
|
|
|
- {
|
|
|
- tasks[worker][t] = tasks[busiest_worker][t];
|
|
|
- tasks_per_worker[busiest_worker] -= tasks[busiest_worker][t];
|
|
|
- tasks[busiest_worker][t] = 0.0;
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-void _recompute_resource_distrib(int nw, int nt, double tasks[nw][nt])
|
|
|
-{
|
|
|
- int w, s, t;
|
|
|
- struct bound_task_pool * tp;
|
|
|
- for(w = 0; w < nw; w++)
|
|
|
- {
|
|
|
- int no_ctxs = 0;
|
|
|
- int last_ctx = -1;
|
|
|
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
|
+
|
|
|
+ sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, nadd, sched_ctxs[s]);
|
|
|
+ struct policy_config *new_config = sched_ctx_hypervisor_get_config(sched_ctxs[s]);
|
|
|
+ int i;
|
|
|
+ for(i = 0; i < nadd; i++)
|
|
|
+ new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] : new_config->new_workers_max_idle;
|
|
|
+
|
|
|
+ if(!first_time)
|
|
|
{
|
|
|
- if(tasks[w][t] >= 1.0)
|
|
|
- {
|
|
|
- if(last_ctx != -1 && tp->sched_ctx_id != last_ctx)
|
|
|
+ /* do not remove workers if they can't go anywhere */
|
|
|
+ int w2;
|
|
|
+ unsigned found_one_dest[nremove];
|
|
|
+ unsigned all_have_dest = 1;
|
|
|
+ for(w2 = 0; w2 < nremove; w2++)
|
|
|
+ found_one_dest[w2] = 0;
|
|
|
+
|
|
|
+ for(w2 = 0; w2 < nremove; w2++)
|
|
|
+ for(s2 = 0; s2 < ns; s2++)
|
|
|
{
|
|
|
- enum starpu_archtype arch = starpu_worker_get_type(w);
|
|
|
- int w2 = -1;
|
|
|
- if(arch == STARPU_CPU_WORKER)
|
|
|
- w2 = _get_worker_having_tasks_of_this_ctx(w, nw, nt, tasks, tp->sched_ctx_id);
|
|
|
- else if(arch == STARPU_CUDA_WORKER && tasks[w][t] < 0.3*tp->n)
|
|
|
- w2 = _get_worker_full_of_tasks_of_this_ctx(w, nw, nt, tasks, tp->sched_ctx_id);
|
|
|
-
|
|
|
- printf("w=%d t=%d tasks=%lf w2=%d\n", w, t, tasks[w][t], w2);
|
|
|
- if(w2 != -1)
|
|
|
+ /* if the worker has to be removed we should find a destination
|
|
|
+ otherwise we are not interested */
|
|
|
+ if(destination_ctx[w2][s2] == -1)
|
|
|
+ found_one_dest[w2] = -1;
|
|
|
+ if(destination_ctx[w2][s2] == 1)// && sched_ctx_hypervisor_can_resize(sched_ctxs[s2]))
|
|
|
{
|
|
|
- tasks[w2][t] += tasks[w][t];
|
|
|
- tasks[w][t] = 0.0;
|
|
|
+ found_one_dest[w2] = 1;
|
|
|
+ break;
|
|
|
}
|
|
|
}
|
|
|
- else
|
|
|
- last_ctx = tp->sched_ctx_id;
|
|
|
+ for(w2 = 0; w2 < nremove; w2++)
|
|
|
+ {
|
|
|
+ if(found_one_dest[w2] == 0)
|
|
|
+ {
|
|
|
+ all_have_dest = 0;
|
|
|
+ break;
|
|
|
+ }
|
|
|
}
|
|
|
+ if(all_have_dest)
|
|
|
+ sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s], 0);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+}
|
|
|
+
|
|
|
+static double _find_tmax(double t1, double t2)
|
|
|
+{
|
|
|
+ return t1 + ((t2 - t1)/2);
|
|
|
+}
|
|
|
+
|
|
|
+static unsigned _compute_task_distribution_over_ctxs(int ns, int nw, int nt, double w_in_s[ns][nw], int *sched_ctxs, int *workers)
|
|
|
+{
|
|
|
+ double tasks[nw][nt];
|
|
|
+ double draft_tasks[nw][nt];
|
|
|
+ double draft_w_in_s[ns][nw];
|
|
|
|
|
|
+ int w,t, s;
|
|
|
for(w = 0; w < nw; w++)
|
|
|
- {
|
|
|
- unsigned empty = 1;
|
|
|
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
|
+ for(t = 0; t < nt; t++)
|
|
|
{
|
|
|
- if(tasks[w][t] >= 1.0)
|
|
|
- {
|
|
|
- printf("%d: tasks %lf\n", w, tasks[w][t]);
|
|
|
- empty = 0;
|
|
|
- break;
|
|
|
- }
|
|
|
+ tasks[w][t] = 0.0;
|
|
|
+ draft_tasks[w][t] == 0.0;
|
|
|
}
|
|
|
-
|
|
|
- if(empty)
|
|
|
+
|
|
|
+ for(s = 0; s < ns; s++)
|
|
|
+ for(w = 0; w < nw; w++)
|
|
|
{
|
|
|
- printf("worker having no task %d\n", w);
|
|
|
- _get_tasks_from_busiest_worker(nw, nt, tasks, w);
|
|
|
+ w_in_s[s][w] = 0.0;
|
|
|
+ draft_w_in_s[s][w] = 0.0;
|
|
|
}
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-void _redistribute_resources_in_ctxs2(int ns, int nw, int nt, double tasks[nw][nt])
|
|
|
-{
|
|
|
- int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
|
|
|
- struct bound_task_pool * tp;
|
|
|
- int s, s2, w, t;
|
|
|
|
|
|
- for(s = 0; s < ns; s++)
|
|
|
+ /* smallest possible tmax, difficult to obtain as we
|
|
|
+ compute the nr of flops and not the tasks */
|
|
|
+ double smallest_tmax = _lp_get_tmax(nw, workers);
|
|
|
+ double tmax = smallest_tmax * ns;
|
|
|
+
|
|
|
+ double res = 1.0;
|
|
|
+ unsigned has_sol = 0;
|
|
|
+ double tmin = 0.0;
|
|
|
+ double old_tmax = 0.0;
|
|
|
+ unsigned found_sol = 0;
|
|
|
+ /* we fix tmax and we do not treat it as an unknown
|
|
|
+ we just vary by dichotomy its values*/
|
|
|
+ while(tmax > 1.0)
|
|
|
{
|
|
|
- int workers_to_add[nw], workers_to_remove[nw];
|
|
|
- for(w = 0; w < nw; w++)
|
|
|
+ /* find solution and save the values in draft tables
|
|
|
+ only if there is a solution for the system we save them
|
|
|
+ in the proper table */
|
|
|
+ res = _glp_resolve(ns, nw, nt, draft_tasks, tmax, draft_w_in_s, sched_ctxs, workers);
|
|
|
+ if(res != 0.0)
|
|
|
{
|
|
|
- workers_to_add[w] = -1;
|
|
|
- workers_to_remove[w] = -1;
|
|
|
+ for(w = 0; w < nw; w++)
|
|
|
+ for(t = 0; t < nt; t++)
|
|
|
+ tasks[w][t] = draft_tasks[w][t];
|
|
|
+ for(s = 0; s < ns; s++)
|
|
|
+ for(w = 0; w < nw; w++)
|
|
|
+ w_in_s[s][w] = draft_w_in_s[s][w];
|
|
|
+ has_sol = 1;
|
|
|
+ found_sol = 1;
|
|
|
}
|
|
|
-
|
|
|
- int nadd = 0, nremove = 0;
|
|
|
-
|
|
|
- for(w = 0; w < nw; w++)
|
|
|
+ else
|
|
|
+ has_sol = 0;
|
|
|
+
|
|
|
+ /* if we have a solution with this tmax try a smaller value
|
|
|
+ bigger than the old min */
|
|
|
+ if(has_sol)
|
|
|
{
|
|
|
- int found = 0;
|
|
|
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
|
- {
|
|
|
- if(tp->sched_ctx_id == sched_ctxs[s])
|
|
|
- {
|
|
|
- if(tasks[w][t] >= 1.0)
|
|
|
- {
|
|
|
- workers_to_add[nadd++] = w;
|
|
|
- found = 1;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if(!found)
|
|
|
- workers_to_remove[nremove++] = w;
|
|
|
+ if(old_tmax != 0.0 && (old_tmax - tmax) < 0.5)
|
|
|
+ break;
|
|
|
+ old_tmax = tmax;
|
|
|
}
|
|
|
-
|
|
|
+ else /*else try a bigger one but smaller than the old tmax */
|
|
|
+ {
|
|
|
+ tmin = tmax;
|
|
|
+ if(old_tmax != 0.0)
|
|
|
+ tmax = old_tmax;
|
|
|
+ }
|
|
|
+ if(tmin == tmax) break;
|
|
|
+ tmax = _find_tmax(tmin, tmax);
|
|
|
|
|
|
- unsigned nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], STARPU_ALL);
|
|
|
-
|
|
|
- if(nworkers_ctx > nremove)
|
|
|
- sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s], 0);
|
|
|
-
|
|
|
- if(nworkers_ctx != STARPU_NMAXWORKERS)
|
|
|
+ if(tmax < smallest_tmax)
|
|
|
{
|
|
|
- sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, nadd, sched_ctxs[s]);
|
|
|
- struct policy_config *new_config = sched_ctx_hypervisor_get_config(sched_ctxs[s]);
|
|
|
- int i;
|
|
|
- for(i = 0; i < nadd; i++)
|
|
|
- new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] : new_config->new_workers_max_idle;
|
|
|
+ tmax = old_tmax;
|
|
|
+ tmin = smallest_tmax;
|
|
|
+ tmax = _find_tmax(tmin, tmax);
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+ return found_sol;
|
|
|
}
|
|
|
-int redistrib = 0;
|
|
|
-int done = 0;
|
|
|
-void lp2_handle_poped_task(unsigned sched_ctx, int worker)
|
|
|
+
|
|
|
+static void lp2_handle_poped_task(unsigned sched_ctx, int worker)
|
|
|
{
|
|
|
struct sched_ctx_wrapper* sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctx);
|
|
|
|
|
|
int ret = pthread_mutex_trylock(&act_hypervisor_mutex);
|
|
|
if(ret != EBUSY)
|
|
|
{
|
|
|
- if(sc_w->submitted_flops >= sc_w->total_flops && !done)
|
|
|
+ if(sc_w->submitted_flops < sc_w->total_flops)
|
|
|
{
|
|
|
- redistrib = 1;
|
|
|
- done = 1;
|
|
|
+ pthread_mutex_unlock(&act_hypervisor_mutex);
|
|
|
+ return;
|
|
|
}
|
|
|
|
|
|
- if(_velocity_gap_btw_ctxs() && redistrib)
|
|
|
+ if(_velocity_gap_btw_ctxs())
|
|
|
{
|
|
|
- redistrib = 0;
|
|
|
int ns = sched_ctx_hypervisor_get_nsched_ctxs();
|
|
|
int nw = starpu_worker_get_count(); /* Number of different workers */
|
|
|
int nt = 0; /* Number of different kinds of tasks */
|
|
@@ -446,40 +505,27 @@ void lp2_handle_poped_task(unsigned sched_ctx, int worker)
|
|
|
for (tp = task_pools; tp; tp = tp->next)
|
|
|
nt++;
|
|
|
|
|
|
- double tasks[nw][nt];
|
|
|
- int w,t;
|
|
|
- for(w = 0; w < nw; w++)
|
|
|
- for(t = 0; t < nt; t++)
|
|
|
- tasks[w][t] = 0.0;
|
|
|
-
|
|
|
- printf("###################################start to resolve \n");
|
|
|
- _glp_resolve(ns, nw, nt, tasks);
|
|
|
- for(w = 0; w < nw; w++)
|
|
|
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
|
- {
|
|
|
- if(tasks[w][t] > 0.0)
|
|
|
- printf("ctx %d/worker %d/task type %d: res = %lf \n", tp->sched_ctx_id, w, t, tasks[w][t]);
|
|
|
- }
|
|
|
- printf("***************************\n");
|
|
|
-
|
|
|
- _recompute_resource_distrib(nw, nt, tasks);
|
|
|
-
|
|
|
- for(w = 0; w < nw; w++)
|
|
|
- for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
|
|
|
- {
|
|
|
- if(tasks[w][t] > 0.0)
|
|
|
- printf("ctx %d/worker %d/task type %d: res = %lf \n", tp->sched_ctx_id, w, t, tasks[w][t]);
|
|
|
- }
|
|
|
-
|
|
|
+ double w_in_s[ns][nw];
|
|
|
|
|
|
- _redistribute_resources_in_ctxs2(ns, nw, nt, tasks);
|
|
|
+ unsigned found_sol = _compute_task_distribution_over_ctxs(ns, nw, nt, w_in_s, NULL, NULL);
|
|
|
+ /* if we did find at least one solution redistribute the resources */
|
|
|
+ if(found_sol)
|
|
|
+ {
|
|
|
+ _redistribute_resources_in_ctxs(ns, nw, nt, w_in_s, 0, NULL, NULL);
|
|
|
+ }
|
|
|
}
|
|
|
pthread_mutex_unlock(&act_hypervisor_mutex);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+static void lp2_size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
|
|
|
+{
|
|
|
+ sched_ctx_hypervisor_save_size_req(sched_ctxs, nsched_ctxs, workers, nworkers);
|
|
|
+}
|
|
|
+
|
|
|
struct hypervisor_policy lp2_policy = {
|
|
|
- .size_ctxs = NULL,
|
|
|
+ .size_ctxs = lp2_size_ctxs,
|
|
|
.handle_poped_task = lp2_handle_poped_task,
|
|
|
.handle_pushed_task = NULL,
|
|
|
.handle_idle_cycle = NULL,
|