13 years ago · 5e367ff87f
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -123,7 +123,8 @@ enum starpu_archtype
 
																 	STARPU_CPU_WORKER, /* CPU core */
															
 
																 	STARPU_CUDA_WORKER, /* NVIDIA CUDA device */
															
 
																 	STARPU_OPENCL_WORKER, /* OpenCL CUDA device */
															
 
																-	STARPU_GORDON_WORKER /* Cell SPU */
															
 
																+	STARPU_GORDON_WORKER, /* Cell SPU */
															
 
																+	STARPU_ALL
															
 
																 };
															
 
																 /* This function returns the type of worker associated to an identifier (as
															
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -208,7 +208,7 @@ int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel *model
 
																 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, char *path, size_t maxlen, unsigned nimpl);
															
 
																 void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen, unsigned nimpl);
															
 
																 int starpu_list_models(FILE *output);
															
 
																-
															
 
																+double starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned footprint);
															
 
																 void starpu_force_bus_sampling(void);
															
 
																 void starpu_bus_print_bandwidth(FILE *f);
															
--- a/include/starpu_scheduler.h
+++ b/include/starpu_scheduler.h
@@ -155,6 +155,7 @@ struct starpu_performance_counters {
 
																 	void (*notify_pushed_task)(unsigned sched_ctx, int worker);
															
 
																 	void (*notify_poped_task)(unsigned sched_ctx, int worker, double flops);
															
 
																 	void (*notify_post_exec_hook)(unsigned sched_ctx, int taskid);
															
 
																+	void (*notify_submitted_job)(struct starpu_task *task, unsigned footprint);
															
 
																 };
															
 
																 #ifdef STARPU_BUILD_SCHED_CTX_HYPERVISOR
															
--- a/sched_ctx_hypervisor/include/sched_ctx_hypervisor.h
+++ b/sched_ctx_hypervisor/include/sched_ctx_hypervisor.h
@@ -96,6 +96,7 @@ struct hypervisor_policy {
 
																 	void (*handle_poped_task)(unsigned sched_ctx, int worker);
															
 
																 	void (*handle_idle_end)(unsigned sched_ctx, int worker);
															
 
																 	void (*handle_post_exec_hook)(unsigned sched_ctx, struct starpu_htbl32_node* resize_requests, int task_tag);
															
 
																+	void (*handle_submitted_job)(struct starpu_task *task, unsigned footprint);
															
 
																 };
															
@@ -125,8 +126,14 @@ int* sched_ctx_hypervisor_get_sched_ctxs();
 
																 int sched_ctx_hypervisor_get_nsched_ctxs();
															
 
																+int get_nworkers_ctx(unsigned sched_ctx, enum starpu_archtype arch);
															
 
																+
															
 
																 struct sched_ctx_wrapper* sched_ctx_hypervisor_get_wrapper(unsigned sched_ctx);
															
 
																 double sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(struct sched_ctx_wrapper* sc_w);
															
 
																 char* sched_ctx_hypervisor_get_policy();
															
 
																+
															
 
																+void sched_ctx_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx);
															
 
																+
															
 
																+void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx);
															
--- a/sched_ctx_hypervisor/src/Makefile.am
+++ b/sched_ctx_hypervisor/src/Makefile.am
@@ -30,7 +30,8 @@ libsched_ctx_hypervisor_la_SOURCES = 			\
 
																 	hypervisor_policies/idle_policy.c		\
															
 
																 	hypervisor_policies/app_driven_policy.c		\
															
 
																 	hypervisor_policies/gflops_rate_policy.c	\
															
 
																-	hypervisor_policies/lp_policy.c	
															
 
																+	hypervisor_policies/lp_policy.c			\
															
 
																+	hypervisor_policies/lp2_policy.c	
															
 
																 noinst_HEADERS = sched_ctx_hypervisor_intern.h		\
															
 
																 	hypervisor_policies/policy_utils.h
															
--- a/sched_ctx_hypervisor/src/hypervisor_policies/app_driven_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/app_driven_policy.c
@@ -33,6 +33,7 @@ struct hypervisor_policy app_driven_policy = {
 
																 	.handle_idle_cycle = NULL,
															
 
																 	.handle_idle_end = NULL,
															
 
																 	.handle_post_exec_hook = app_driven_handle_post_exec_hook,
															
 
																+	.handle_submitted_job = NULL,
															
 
																 	.custom = 0,
															
 
																 	.name = "app_driven"
															
 
																 };
															
--- a/sched_ctx_hypervisor/src/hypervisor_policies/gflops_rate_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/gflops_rate_policy.c
@@ -294,6 +294,7 @@ struct hypervisor_policy gflops_rate_policy = {
 
																 	.handle_idle_cycle = NULL,
															
 
																 	.handle_idle_end = NULL,
															
 
																 	.handle_post_exec_hook = NULL,
															
 
																+	.handle_submitted_job = NULL,
															
 
																 	.custom = 0,
															
 
																 	.name = "gflops_rate"
															
 
																 };
															
--- a/sched_ctx_hypervisor/src/hypervisor_policies/idle_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/idle_policy.c
@@ -30,6 +30,7 @@ struct hypervisor_policy idle_policy = {
 
																 	.handle_idle_cycle = idle_handle_idle_cycle,
															
 
																 	.handle_idle_end = NULL,
															
 
																 	.handle_post_exec_hook = NULL,
															
 
																+	.handle_submitted_job = NULL,
															
 
																 	.custom = 0,
															
 
																 	.name = "idle"
															
 
																 };
															
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c
@@ -0,0 +1,368 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011, 2012  INRIA
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include "policy_utils.h"
															
 
																+#include <math.h>
															
 
																+struct bound_task_pool
															
 
																+{
															
 
																+	/* Which codelet has been executed */
															
 
																+	struct starpu_codelet *cl;
															
 
																+	/* Task footprint key */
															
 
																+	uint32_t footprint;
															
 
																+	/* Context the task belongs to */
															
 
																+	unsigned sched_ctx_id;
															
 
																+	/* Number of tasks of this kind */
															
 
																+	unsigned long n;
															
 
																+	/* Other task kinds */
															
 
																+	struct bound_task_pool *next;
															
 
																+};
															
 
																+
															
 
																+
															
 
																+static struct bound_task_pool *task_pools, *last;
															
 
																+
															
 
																+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
															
 
																+
															
 
																+static void lp2_handle_submitted_job(struct starpu_task *task, unsigned footprint)
															
 
																+{
															
 
																+	pthread_mutex_lock(&mutex);
															
 
																+	struct bound_task_pool *tp;
															
 
																+	
															
 
																+	if (last && last->cl == task->cl && last->footprint == footprint && last->sched_ctx_id == task->sched_ctx)
															
 
																+		tp = last;
															
 
																+	else
															
 
																+		for (tp = task_pools; tp; tp = tp->next)
															
 
																+			if (tp->cl == task->cl && tp->footprint == footprint && tp->sched_ctx_id == task->sched_ctx)
															
 
																+					break;
															
 
																+	
															
 
																+	if (!tp)
															
 
																+	{
															
 
																+		tp = (struct bound_task_pool *) malloc(sizeof(*tp));
															
 
																+		tp->cl = task->cl;
															
 
																+		tp->footprint = footprint;
															
 
																+		tp->sched_ctx_id = task->sched_ctx;
															
 
																+		tp->n = 0;
															
 
																+		tp->next = task_pools;
															
 
																+		task_pools = tp;
															
 
																+	}
															
 
																+	
															
 
																+	/* One more task of this kind */
															
 
																+	tp->n++;
															
 
																+	pthread_mutex_unlock(&mutex);
															
 
																+}
															
 
																+
															
 
																+static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt])
															
 
																+{
															
 
																+        struct bound_task_pool *tp;
															
 
																+        int w, t;
															
 
																+        for (w = 0; w < nw; w++)
															
 
																+        {
															
 
																+                for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
															
 
																+                {
															
 
																+                        enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
															
 
																+                        double length = starpu_history_based_job_expected_perf(tp->cl->model, arch, tp->footprint);
															
 
																+
															
 
																+                        if (isnan(length))
															
 
																+                                times[w][t] = NAN;
															
 
																+                        else
															
 
																+                                times[w][t] = length / 1000.;
															
 
																+                }
															
 
																+        }
															
 
																+}
															
 
																+
															
 
																+int _get_idx_sched_ctx(int sched_ctx_id)
															
 
																+{
															
 
																+	int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
															
 
																+	int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
															
 
																+	int i;
															
 
																+	for(i = 0; i < nsched_ctxs; i++)
															
 
																+		if(sched_ctxs[i] == sched_ctx_id)
															
 
																+			return i;
															
 
																+	return -1;
															
 
																+}
															
 
																+
															
 
																+/*                                                                                                                                                                                                                  
															
 
																+ * GNU Linear Programming Kit backend                                                                                                                                                                               
															
 
																+ */
															
 
																+#ifdef HAVE_GLPK_H
															
 
																+#include <glpk.h>
															
 
																+static void _glp_resolve(int ns, int nw, int nt, double res[ns][nw][nt], int integer)
															
 
																+{
															
 
																+	struct bound_task_pool * tp;
															
 
																+	int t, w, s;
															
 
																+	glp_prob *lp;
															
 
																+
															
 
																+	lp = glp_create_prob();
															
 
																+	glp_set_prob_name(lp, "StarPU theoretical bound");
															
 
																+	glp_set_obj_dir(lp, GLP_MIN);
															
 
																+	glp_set_obj_name(lp, "total execution time");
															
 
																+
															
 
																+	{
															
 
																+		double times[nw][nt];
															
 
																+		int ne =
															
 
																+			nw * (nt+1)	/* worker execution time */
															
 
																+			+ nt * nw
															
 
																+			+ nw * (nt+ns)
															
 
																+			+ 1; /* glp dumbness */
															
 
																+		int n = 1;
															
 
																+		int ia[ne], ja[ne];
															
 
																+		double ar[ne];
															
 
																+
															
 
																+		_starpu_get_tasks_times(nw, nt, times);
															
 
																+
															
 
																+		/* Variables: number of tasks i assigned to worker j, and tmax */
															
 
																+		glp_add_cols(lp, nw*nt+1);
															
 
																+#define colnum(w, t) ((t)*nw+(w)+1)
															
 
																+		glp_set_obj_coef(lp, nw*nt+1, 1.);
															
 
																+
															
 
																+		for (w = 0; w < nw; w++)
															
 
																+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
															
 
																+			{
															
 
																+				char name[32];
															
 
																+				snprintf(name, sizeof(name), "w%dt%dn", w, t);
															
 
																+				glp_set_col_name(lp, colnum(w, t), name);
															
 
																+				if (integer)
															
 
																+					glp_set_col_kind(lp, colnum(w, t), GLP_IV);
															
 
																+				glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
															
 
																+			}
															
 
																+		glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.);
															
 
																+
															
 
																+		/* Total worker execution time */
															
 
																+		glp_add_rows(lp, nw);
															
 
																+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
															
 
																+		{
															
 
																+			int someone = 0;
															
 
																+			for (w = 0; w < nw; w++)
															
 
																+				if (!isnan(times[w][t]))
															
 
																+					someone = 1;
															
 
																+			if (!someone)
															
 
																+			{
															
 
																+				/* This task does not have any performance model at all, abort */
															
 
																+				glp_delete_prob(lp);
															
 
																+				return NULL;
															
 
																+			}
															
 
																+		}
															
 
																+		for (w = 0; w < nw; w++)
															
 
																+		{
															
 
																+			char name[32], title[64];
															
 
																+			starpu_worker_get_name(w, name, sizeof(name));
															
 
																+			snprintf(title, sizeof(title), "worker %s", name);
															
 
																+			glp_set_row_name(lp, w+1, title);
															
 
																+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
															
 
																+			{
															
 
																+				ia[n] = w+1;
															
 
																+				ja[n] = colnum(w, t);
															
 
																+				if (isnan(times[w][t]))
															
 
																+					ar[n] = 1000000000.;
															
 
																+				else
															
 
																+					ar[n] = times[w][t];
															
 
																+				n++;
															
 
																+			}
															
 
																+			/* tmax */
															
 
																+			ia[n] = w+1;
															
 
																+			ja[n] = nw*nt+1;
															
 
																+			ar[n] = -1;
															
 
																+			n++;
															
 
																+			glp_set_row_bnds(lp, w+1, GLP_UP, 0, 0);
															
 
																+		}
															
 
																+
															
 
																+		/* Total task completion */
															
 
																+		glp_add_rows(lp, nt);
															
 
																+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
															
 
																+		{
															
 
																+			char name[32], title[64];
															
 
																+			starpu_worker_get_name(w, name, sizeof(name));
															
 
																+			snprintf(title, sizeof(title), "task %s key %x", tp->cl->name, (unsigned) tp->footprint);
															
 
																+			glp_set_row_name(lp, nw+t+1, title);
															
 
																+			for (w = 0; w < nw; w++)
															
 
																+			{
															
 
																+				ia[n] = nw+t+1;
															
 
																+				ja[n] = colnum(w, t);
															
 
																+				ar[n] = 1;
															
 
																+				n++;
															
 
																+			}
															
 
																+			glp_set_row_bnds(lp, nw+t+1, GLP_FX, tp->n, tp->n);
															
 
																+		}
															
 
																+
															
 
																+		int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
															
 
																+		/* Number of task * time > 0.3 * tmax */
															
 
																+		glp_add_rows(lp, nw*ns);
															
 
																+		for (w = 0; w < nw; w++)
															
 
																+		{
															
 
																+			for(s = 0; s < ns; s++)
															
 
																+			{
															
 
																+				char name[32], title[64];
															
 
																+				starpu_worker_get_name(w, name, sizeof(name));
															
 
																+				snprintf(title, sizeof(title), "worker %x ctx %x limit", w, s);
															
 
																+				glp_set_row_name(lp, nw+nt+w+(s*nw)+1, title);
															
 
																+				for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
															
 
																+				{
															
 
																+					if(tp->sched_ctx_id == sched_ctxs[s])
															
 
																+					{
															
 
																+						ia[n] = nw+nt+w+(s*nw)+1;
															
 
																+						ja[n] = colnum(w, t);
															
 
																+						ar[n] = 1;
															
 
																+						n++;
															
 
																+					}
															
 
																+				}
															
 
																+
															
 
																+				/* tmax */
															
 
																+				ia[n] = nw+nt+w+(s*nw)+1;
															
 
																+				ja[n] = nw*nt+1;
															
 
																+				ar[n] = -0.3;
															
 
																+				n++;
															
 
																+
															
 
																+				glp_set_row_bnds(lp, nw+nt+w+(s*nw)+1, GLP_UP, 0.0, 0.0);
															
 
																+			}
															
 
																+		}
															
 
																+
															
 
																+		STARPU_ASSERT(n == ne);
															
 
																+
															
 
																+		glp_load_matrix(lp, ne-1, ia, ja, ar);
															
 
																+	}
															
 
																+
															
 
																+	glp_smcp parm;
															
 
																+	glp_init_smcp(&parm);
															
 
																+	parm.msg_lev = GLP_MSG_OFF;
															
 
																+	int ret = glp_simplex(lp, &parm);
															
 
																+	if (ret)
															
 
																+	{
															
 
																+		glp_delete_prob(lp);
															
 
																+		lp = NULL;
															
 
																+		return NULL;
															
 
																+	}
															
 
																+
															
 
																+        if (integer)
															
 
																+        {
															
 
																+                glp_iocp iocp;
															
 
																+                glp_init_iocp(&iocp);
															
 
																+                iocp.msg_lev = GLP_MSG_OFF;
															
 
																+                glp_intopt(lp, &iocp);
															
 
																+        }
															
 
																+
															
 
																+
															
 
																+	double tmax = glp_get_obj_val(lp);
															
 
																+
															
 
																+        printf("Theoretical minimum execution time: %f ms\n", tmax);
															
 
																+
															
 
																+	for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
															
 
																+	{
															
 
																+		for (w = 0; w < nw; w++)
															
 
																+		{
															
 
																+			s = _get_idx_sched_ctx(tp->sched_ctx_id);
															
 
																+			res[s][w][t] = glp_get_col_prim(lp, colnum(w, t));
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	glp_delete_prob(lp);
															
 
																+}
															
 
																+
															
 
																+void _redistribute_resources_in_ctxs2(int ns, int nw, int nt, double res[ns][nw][nt])
															
 
																+{
															
 
																+	int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
															
 
																+        struct bound_task_pool * tp;
															
 
																+	int s, s2, w, t;
															
 
																+
															
 
																+	for(s = 0; s < ns; s++)
															
 
																+	{
															
 
																+		int workers_to_add[nw], workers_to_remove[nw];
															
 
																+		for(w = 0; w < nw; w++)
															
 
																+		{
															
 
																+			workers_to_add[w] = -1;
															
 
																+			workers_to_remove[w] = -1;
															
 
																+		}
															
 
																+
															
 
																+		int nadd = 0, nremove = 0;
															
 
																+
															
 
																+		for(w = 0; w < nw; w++)
															
 
																+		{
															
 
																+			int found = 0;
															
 
																+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
															
 
																+			{
															
 
																+				if(tp->sched_ctx_id == sched_ctxs[s])
															
 
																+				{
															
 
																+					if(res[s][w][t] >= 1.0)
															
 
																+					{
															
 
																+						workers_to_add[nadd++] = w;
															
 
																+						found = 1;
															
 
																+						break;
															
 
																+					}
															
 
																+				}
															
 
																+			}
															
 
																+			if(!found)
															
 
																+				workers_to_remove[nremove++] = w;
															
 
																+		}
															
 
																+
															
 
																+		
															
 
																+		unsigned nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], STARPU_ALL);
															
 
																+		if(nadd != nworkers_ctx)
															
 
																+		{
															
 
																+			printf("%d: add %d \n", sched_ctxs[s], nadd);
															
 
																+			printf("%d: remove %d \n", sched_ctxs[s], nremove);
															
 
																+			sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, nadd, sched_ctxs[s]);
															
 
																+			sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s]);
															
 
																+
															
 
																+			struct policy_config *new_config = sched_ctx_hypervisor_get_config(sched_ctxs[s]);
															
 
																+			int i;
															
 
																+			for(i = 0; i < nadd; i++)
															
 
																+				new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] :  new_config->new_workers_max_idle;
															
 
																+		}
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+void lp2_handle_poped_task(unsigned sched_ctx, int worker)
															
 
																+{
															
 
																+	if(_velocity_gap_btw_ctxs())
															
 
																+	{
															
 
																+		int ns = sched_ctx_hypervisor_get_nsched_ctxs();
															
 
																+		int nw = starpu_worker_get_count(); /* Number of different workers */
															
 
																+		int nt = 0; /* Number of different kinds of tasks */
															
 
																+		struct bound_task_pool * tp;
															
 
																+		for (tp = task_pools; tp; tp = tp->next)
															
 
																+			nt++;
															
 
																+		
															
 
																+       		double res[ns][nw][nt];
															
 
																+
															
 
																+		int ret = pthread_mutex_trylock(&act_hypervisor_mutex);
															
 
																+		if(ret != EBUSY)
															
 
																+		{
															
 
																+			_glp_resolve(ns, nw, nt, res, 0);
															
 
																+/* 			int i, j, k; */
															
 
																+/* 			for( i = 0; i < ns; i++) */
															
 
																+/* 				for(j = 0; j < nw; j++) */
															
 
																+/* 					for(k = 0; k < nt; k++) */
															
 
																+/* 					{ */
															
 
																+/* 						printf("ctx %d/worker %d/task type %d: res = %lf \n", i, j, k, res[i][j][k]); */
															
 
																+/* 					} */
															
 
																+		
															
 
																+			_redistribute_resources_in_ctxs2(ns, nw, nt, res);
															
 
																+			pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																+		}
															
 
																+	}		
															
 
																+}
															
 
																+
															
 
																+struct hypervisor_policy lp2_policy = {
															
 
																+	.handle_poped_task = lp2_handle_poped_task,
															
 
																+	.handle_pushed_task = NULL,
															
 
																+	.handle_idle_cycle = NULL,
															
 
																+	.handle_idle_end = NULL,
															
 
																+	.handle_post_exec_hook = NULL,
															
 
																+	.handle_submitted_job = lp2_handle_submitted_job,
															
 
																+	.custom = 0,
															
 
																+	.name = "lp2"
															
 
																+};
															
 
																+	
															
 
																+#endif /* HAVE_GLPK_H */
															
 
																+
															
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c
@@ -22,14 +22,13 @@
 
																  */
															
 
																 #ifdef HAVE_GLPK_H
															
 
																 #include <glpk.h>
															
 
																-static void _glp_resolve(int ns, int nw, double v[ns][nw], double flops[ns])
															
 
																+static void _glp_resolve(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw])
															
 
																 {
															
 
																 	int s, w;
															
 
																 	glp_prob *lp;
															
 
																 	int ne =
															
 
																-		ns * (nw+1)     /* worker execution time */
															
 
																-		+ ns * nw
															
 
																+		(ns*nw+1)*(ns+nw)
															
 
																 		+ 1; /* glp dumbness */
															
 
																 	int n = 1;
															
 
																 	int ia[ne], ja[ne];
															
@@ -41,13 +40,9 @@ static void _glp_resolve(int ns, int nw, double v[ns][nw], double flops[ns])
 
																 	glp_set_obj_dir(lp, GLP_MAX);
															
 
																         glp_set_obj_name(lp, "max speed");
															
 
																-
															
 
																-#define colnum(s, w) ((w)*ns+(s)+1)
															
 
																 	/* we add nw*ns columns one for each type of worker in each context 
															
 
																 	   and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/
															
 
																 	glp_add_cols(lp, nw*ns+1);
															
 
																-	/* Z = 1/tmax -> 1/vmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
															
 
																-	glp_set_obj_coef(lp, nw*ns+1, 1.);
															
 
																 	for(s = 0; s < ns; s++)
															
 
																 	{
															
@@ -55,13 +50,19 @@ static void _glp_resolve(int ns, int nw, double v[ns][nw], double flops[ns])
 
																 		{
															
 
																 			char name[32];
															
 
																 			snprintf(name, sizeof(name), "worker%dctx%d", w, s);
															
 
																-			glp_set_col_name(lp, colnum(s,w), name);
															
 
																-			glp_set_col_bnds(lp, colnum(s,w), GLP_LO, 0.0, 0.0);
															
 
																+			glp_set_col_name(lp, n, name);
															
 
																+			glp_set_col_bnds(lp, n, GLP_LO, 0.3, 0.0);
															
 
																+			n++;
															
 
																 		}
															
 
																 	}
															
 
																-	glp_set_col_bnds(lp, nw*ns+1, GLP_DB, 0.0, 1.0);
															
 
																+	/*1/tmax should belong to the interval [0.0;1.0]*/
															
 
																+	glp_set_col_name(lp, n, "vmax");
															
 
																+	glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
															
 
																+	/* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
															
 
																+	glp_set_obj_coef(lp, n, 1.0);
															
 
																+	n = 1;
															
 
																 	/* one row corresponds to one ctx*/
															
 
																 	glp_add_rows(lp, ns);
															
@@ -71,23 +72,38 @@ static void _glp_resolve(int ns, int nw, double v[ns][nw], double flops[ns])
 
																 		snprintf(name, sizeof(name), "ctx%d", s);
															
 
																 		glp_set_row_name(lp, s+1, name);
															
 
																 		glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.);
															
 
																+
															
 
																 		for(w = 0; w < nw; w++)
															
 
																 		{
															
 
																-			ia[n] = s+1;
															
 
																-			ja[n] = colnum(s, w);
															
 
																-			ar[n] = v[s][w];
															
 
																-			printf("v[%d][%d] = %lf\n", s, w, v[s][w]);
															
 
																-			n++;
															
 
																+			int s2;
															
 
																+			for(s2 = 0; s2 < ns; s2++)
															
 
																+			{
															
 
																+				if(s2 == s)
															
 
																+				{
															
 
																+					ia[n] = s+1;
															
 
																+					ja[n] = w + nw*s2 + 1;
															
 
																+					ar[n] = v[s][w];
															
 
																+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
															
 
																+				}
															
 
																+				else
															
 
																+				{
															
 
																+					ia[n] = s+1;
															
 
																+					ja[n] = w + nw*s2 + 1;
															
 
																+					ar[n] = 0.0;
															
 
																+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
															
 
																+				}
															
 
																+				n++;
															
 
																+			}
															
 
																 		}
															
 
																 		/* 1/tmax */
															
 
																 		ia[n] = s+1;
															
 
																 		ja[n] = ns*nw+1;
															
 
																 		ar[n] = (-1) * flops[s];
															
 
																-		printf("%d: flops %lf\n", s, flops[s]);
															
 
																+//		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
															
 
																 		n++;
															
 
																 	}
															
 
																-	/*we add another linear constraint : sum(all cpus) = 3 and sum(all gpus) = 9 */
															
 
																+	/*we add another linear constraint : sum(all cpus) = 9 and sum(all gpus) = 3 */
															
 
																 	glp_add_rows(lp, nw);
															
 
																 	for(w = 0; w < nw; w++)
															
@@ -97,36 +113,64 @@ static void _glp_resolve(int ns, int nw, double v[ns][nw], double flops[ns])
 
																 		glp_set_row_name(lp, ns+w+1, name);
															
 
																 		for(s = 0; s < ns; s++)
															
 
																 		{
															
 
																-			ia[n] = ns+w+1;
															
 
																-			ja[n] = colnum(s,w);
															
 
																-			ar[n] = 1;
															
 
																-			n++;
															
 
																+			int w2;
															
 
																+			for(w2 = 0; w2 < nw; w2++)
															
 
																+			{
															
 
																+				if(w2 == w)
															
 
																+				{
															
 
																+					ia[n] = ns+w+1;
															
 
																+					ja[n] = w2+s*nw + 1;
															
 
																+					ar[n] = 1.0;
															
 
																+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
															
 
																+				}
															
 
																+				else
															
 
																+				{
															
 
																+					ia[n] = ns+w+1;
															
 
																+					ja[n] = w2+s*nw + 1;
															
 
																+					ar[n] = 0.0;
															
 
																+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
															
 
																+				}
															
 
																+				n++;
															
 
																+			}
															
 
																 		}
															
 
																-		if(w == 0) glp_set_row_bnds(lp, ns+w+1, GLP_FX, 3., 3.);
															
 
																-		if(w == 1) glp_set_row_bnds(lp, ns+w+1, GLP_FX, 9., 9.);
															
 
																-	}
															
 
																+		/* 1/tmax */
															
 
																+		ia[n] = ns+w+1;
															
 
																+		ja[n] = ns*nw+1;
															
 
																+		ar[n] = 0.0;
															
 
																+//		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
															
 
																+		n++;
															
 
																+
															
 
																+		/*sum(all gpus) = 3*/
															
 
																+		if(w == 0)
															
 
																+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, 3., 3.);
															
 
																+		/*sum(all cpus) = 9*/
															
 
																+		if(w == 1) 
															
 
																+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, 9., 9.);
															
 
																+	}
															
 
																 	STARPU_ASSERT(n == ne);
															
 
																 	glp_load_matrix(lp, ne-1, ia, ja, ar);
															
 
																-	glp_simplex(lp, NULL);
															
 
																+	glp_smcp parm;
															
 
																+	glp_init_smcp(&parm);
															
 
																+	parm.msg_lev = GLP_MSG_OFF;
															
 
																+	glp_simplex(lp, &parm);
															
 
																+//	glp_simplex(lp, NULL);
															
 
																+	
															
 
																 	double vmax1 = glp_get_obj_val(lp);
															
 
																-	printf("vmax1 = %lf \n", vmax1);
															
 
																-	double res[ne];
															
 
																+//	printf("vmax1 = %lf \n", vmax1);
															
 
																+
															
 
																 	n = 1;
															
 
																-	for(w = 0; w < nw; w++)
															
 
																+	for(s = 0; s < ns; s++)
															
 
																 	{
															
 
																-		for(s = 0; s < ns; s++)
															
 
																+		for(w = 0; w < nw; w++)
															
 
																 		{
															
 
																-			res[n] = glp_get_col_prim(lp, colnum(s,w));
															
 
																-			printf("ctx %d/worker type %d: n = %lf \n", s, w, res[n]);
															
 
																+			res[s][w] = glp_get_col_prim(lp, n);
															
 
																 			n++;
															
 
																 		}
															
 
																 	}
															
 
																-//	res[n] = glp_get_col_prim(lp, ns*nw+1);
															
 
																-//	printf("vmax = %lf \n", res[n]);
															
 
																 	glp_delete_prob(lp);
															
 
																 	return;
															
@@ -152,7 +196,7 @@ int _velocity_gap_btw_ctxs()
 
																 				other_sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[j]);
															
 
																 				double other_ctx_v = _get_ctx_velocity(other_sc_w);
															
 
																 				double gap = ctx_v < other_ctx_v ? ctx_v / other_ctx_v : other_ctx_v / ctx_v;
															
 
																-				if(gap > 0.5)
															
 
																+				if(gap > 0.7)
															
 
																 					return 1;
															
 
																 			}
															
 
																 		}
															
@@ -161,15 +205,120 @@ int _velocity_gap_btw_ctxs()
 
																 	return 0;
															
 
																 }
															
 
																+void _round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw])
															
 
																+{
															
 
																+	int s, w;
															
 
																+	double left_res[nw];
															
 
																+	for(w = 0; w < nw; w++)
															
 
																+		left_res[nw] = 0.0;
															
 
																+	for(s = 0; s < ns; s++)
															
 
																+	{
															
 
																+		for(w = 0; w < nw; w++)
															
 
																+		{
															
 
																+			int x = floor(res[s][w]);
															
 
																+			double x_double = (double)x;
															
 
																+			double diff = res[s][w] - x_double;
															
 
																+			
															
 
																+			if(diff != 0.0)
															
 
																+			{
															
 
																+				if(diff > 0.5)
															
 
																+				{
															
 
																+					if(left_res[w] != 0.0)
															
 
																+					{
															
 
																+						if((diff + left_res[w]) > 0.5)
															
 
																+						{
															
 
																+							res_rounded[s][w] = x + 1;
															
 
																+							left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
															
 
																+						}
															
 
																+						else
															
 
																+						{
															
 
																+							res_rounded[s][w] = x;
															
 
																+							left_res[w] = (-1.0) * (diff + left_res[w]);
															
 
																+						}
															
 
																+					}
															
 
																+					else
															
 
																+					{
															
 
																+						res_rounded[s][w] = x + 1;
															
 
																+						left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]);
															
 
																+					}
															
 
																+
															
 
																+				}
															
 
																+				else
															
 
																+				{
															
 
																+					if((diff + left_res[w]) > 0.5)
															
 
																+					{
															
 
																+						res_rounded[s][w] = x + 1;
															
 
																+						left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
															
 
																+					}
															
 
																+					else
															
 
																+					{
															
 
																+						res_rounded[s][w] = x;
															
 
																+						left_res[w] = diff;
															
 
																+					}
															
 
																+				}
															
 
																+			}
															
 
																+		}
															
 
																+	}		
															
 
																+}
															
 
																+
															
 
																+void _redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw])
															
 
																+{
															
 
																+	int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
															
 
																+	int s, s2, w;
															
 
																+	for(s = 0; s < ns; s++)
															
 
																+	{
															
 
																+		for(w = 0; w < nw; w++)
															
 
																+		{
															
 
																+			enum starpu_archtype arch;
															
 
																+			if(w == 0) arch = STARPU_CUDA_WORKER;
															
 
																+			if(w == 1) arch = STARPU_CPU_WORKER;
															
 
																+
															
 
																+			unsigned nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], arch);
															
 
																+			if(nworkers_ctx > res_rounded[s][w])
															
 
																+			{
															
 
																+				int nworkers_to_move = nworkers_ctx - res_rounded[s][w];
															
 
																+				int receiving_s = -1;
															
 
																+				
															
 
																+				for(s2 = 0; s2 < ns; s2++)
															
 
																+				{
															
 
																+					if(sched_ctxs[s2] != sched_ctxs[s])
															
 
																+					{
															
 
																+						int nworkers_ctx2 = get_nworkers_ctx(sched_ctxs[s2], arch);
															
 
																+						if((res_rounded[s2][w] - nworkers_ctx2) == nworkers_to_move)
															
 
																+						{
															
 
																+							receiving_s = sched_ctxs[s2];
															
 
																+							break;
															
 
																+						}
															
 
																+					}
															
 
																+				}
															
 
																+				if(receiving_s != -1)
															
 
																+				{
															
 
																+					int *workers_to_move = _get_first_workers(sched_ctxs[s], &nworkers_to_move, arch);
															
 
																+					sched_ctx_hypervisor_move_workers(sched_ctxs[s], receiving_s, workers_to_move, nworkers_to_move);
															
 
																+					struct policy_config *new_config = sched_ctx_hypervisor_get_config(receiving_s);
															
 
																+					int i;
															
 
																+					for(i = 0; i < nworkers_to_move; i++)
															
 
																+						new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] :  new_config->new_workers_max_idle;
															
 
																+
															
 
																+					free(workers_to_move);
															
 
																+				}
															
 
																+			}
															
 
																+		}
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																 void lp_handle_poped_task(unsigned sched_ctx, int worker)
															
 
																 {
															
 
																 	if(_velocity_gap_btw_ctxs())
															
 
																 	{
															
 
																 		int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
															
 
																 		int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
															
 
																+//		int nsched_ctxs = 3;
															
 
																 		double v[nsched_ctxs][2];
															
 
																 		double flops[nsched_ctxs];
															
 
																+		double res[nsched_ctxs][2];
															
 
																+
															
 
																 		int i = 0;
															
 
																 		struct sched_ctx_wrapper* sc_w;
															
 
																 		for(i = 0; i < nsched_ctxs; i++)
															
@@ -177,13 +326,28 @@ void lp_handle_poped_task(unsigned sched_ctx, int worker)
 
																 			sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
															
 
																 			v[i][0] = 200.0;//_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
															
 
																 			v[i][1] = 20.0;//_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
															
 
																-			flops[i] = sc_w->total_flops;
															
 
																+			flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
															
 
																 		}
															
 
																 		int ret = pthread_mutex_trylock(&act_hypervisor_mutex);
															
 
																 		if(ret != EBUSY)
															
 
																 		{
															
 
																-			_glp_resolve(nsched_ctxs, 2, v, flops);
															
 
																+			_glp_resolve(nsched_ctxs, 2, v, flops, res);
															
 
																+/* 			for( i = 0; i < nsched_ctxs; i++) */
															
 
																+/* 			{ */
															
 
																+/* 				printf("ctx %d/worker type %d: n = %lf \n", i, 0, res[i][0]); */
															
 
																+/* 				printf("ctx %d/worker type %d: n = %lf \n", i, 1, res[i][1]); */
															
 
																+/* 			} */
															
 
																+			int res_rounded[nsched_ctxs][2];
															
 
																+			_round_double_to_int(nsched_ctxs, 2, res, res_rounded);
															
 
																+/* 			for( i = 0; i < nsched_ctxs; i++) */
															
 
																+/* 			{ */
															
 
																+/* 				printf("ctx %d/worker type %d: n = %d \n", i, 0, res_rounded[i][0]); */
															
 
																+/* 				printf("ctx %d/worker type %d: n = %d \n", i, 1, res_rounded[i][1]); */
															
 
																+/* 			} */
															
 
																+			
															
 
																+			_redistribute_resources_in_ctxs(nsched_ctxs, 2, res_rounded);
															
 
																+			
															
 
																 			pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																 		}
															
 
																 	}		
															
@@ -195,6 +359,7 @@ struct hypervisor_policy lp_policy = {
 
																 	.handle_idle_cycle = NULL,
															
 
																 	.handle_idle_end = NULL,
															
 
																 	.handle_post_exec_hook = NULL,
															
 
																+	.handle_submitted_job = NULL,
															
 
																 	.custom = 0,
															
 
																 	.name = "lp"
															
 
																 };
															
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_utils.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_utils.c
@@ -1,7 +1,7 @@
 
																 #include <sched_ctx_hypervisor.h>
															
 
																 #include <pthread.h>
															
 
																-enum starpu_archtype STARPU_ALL;
															
 
																+//enum starpu_archtype STARPU_ALL;
															
 
																 static int _compute_priority(unsigned sched_ctx)
															
 
																 {
															
@@ -300,6 +300,7 @@ double _get_ctx_velocity(struct sched_ctx_wrapper* sc_w)
 
																                 double elapsed_time = curr_time - sc_w->start_time;
															
 
																                 return elapsed_flops/elapsed_time;
															
 
																         }
															
 
																+	return 0.0;
															
 
																 }
															
 
																 /* compute an average value of the cpu velocity */
															
--- a/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
+++ b/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
@@ -24,13 +24,14 @@ static void notify_pushed_task(unsigned sched_ctx, int worker);
 
																 static void notify_poped_task(unsigned sched_ctx, int worker, double flops);
															
 
																 static void notify_post_exec_hook(unsigned sched_ctx, int taskid);
															
 
																 static void notify_idle_end(unsigned sched_ctx, int  worker);
															
 
																-
															
 
																+static void notify_submitted_job(struct starpu_task *task, unsigned footprint);
															
 
																 extern struct hypervisor_policy idle_policy;
															
 
																 extern struct hypervisor_policy app_driven_policy;
															
 
																 extern struct hypervisor_policy gflops_rate_policy;
															
 
																 #ifdef HAVE_GLPK_H
															
 
																 extern struct hypervisor_policy lp_policy;
															
 
																+extern struct hypervisor_policy lp2_policy;
															
 
																 #endif
															
 
																 static struct hypervisor_policy *predefined_policies[] = {
															
@@ -38,6 +39,7 @@ static struct hypervisor_policy *predefined_policies[] = {
 
																 	&app_driven_policy,
															
 
																 #ifdef HAVE_GLPK_H
															
 
																 	&lp_policy,
															
 
																+	&lp2_policy,
															
 
																 #endif
															
 
																 	&gflops_rate_policy
															
 
																 };
															
@@ -58,6 +60,7 @@ static void _load_hypervisor_policy(struct hypervisor_policy *policy)
 
																 	hypervisor.policy.handle_idle_cycle = policy->handle_idle_cycle;
															
 
																 	hypervisor.policy.handle_idle_end = policy->handle_idle_end;
															
 
																 	hypervisor.policy.handle_post_exec_hook = policy->handle_post_exec_hook;
															
 
																+	hypervisor.policy.handle_submitted_job = policy->handle_submitted_job;
															
 
																 }
															
@@ -162,6 +165,7 @@ struct starpu_performance_counters* sched_ctx_hypervisor_init(struct hypervisor_
 
																 	perf_counters->notify_poped_task = notify_poped_task;
															
 
																 	perf_counters->notify_post_exec_hook = notify_post_exec_hook;
															
 
																 	perf_counters->notify_idle_end = notify_idle_end;
															
 
																+	perf_counters->notify_submitted_job = notify_submitted_job;
															
 
																 	starpu_notify_hypervisor_exists();
															
@@ -307,10 +311,30 @@ static void _get_cpus(int *workers, int nworkers, int *cpus, int *ncpus)
 
																 	}
															
 
																 }
															
 
																+int get_nworkers_ctx(unsigned sched_ctx, enum starpu_archtype arch)
															
 
																+{
															
 
																+	int nworkers_ctx = 0;
															
 
																+	struct worker_collection *workers = starpu_get_worker_collection_of_sched_ctx(sched_ctx);
															
 
																+	int worker;
															
 
																+
															
 
																+	if(workers->init_cursor)
															
 
																+		workers->init_cursor(workers);
															
 
																+
															
 
																+	while(workers->has_next(workers))
															
 
																+	{
															
 
																+		worker = workers->get_next(workers);
															
 
																+		enum starpu_archtype curr_arch = starpu_worker_get_type(worker);
															
 
																+		if( curr_arch == arch)
															
 
																+			nworkers_ctx++;
															
 
																+	}
															
 
																+	return nworkers_ctx;
															
 
																+}
															
 
																+
															
 
																 /* actually move the workers: the cpus are moved, gpus are only shared  */
															
 
																 /* forbids another resize request before this one is take into account */
															
 
																 void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int* workers_to_move, unsigned nworkers_to_move)
															
 
																 {
															
 
																+	printf("nworkers to move %d resize_sender %d resize_receiver %d\n", nworkers_to_move,  hypervisor.resize[sender_sched_ctx], hypervisor.resize[receiver_sched_ctx]);
															
 
																 	if(nworkers_to_move > 0 && hypervisor.resize[sender_sched_ctx] && hypervisor.resize[receiver_sched_ctx])
															
 
																 	{
															
 
																 		int j;
															
@@ -324,7 +348,7 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 
																 		_get_cpus(workers_to_move, nworkers_to_move, cpus, &ncpus);
															
 
																-		if(ncpus != 0)
															
 
																+//		if(ncpus != 0)
															
 
																 			starpu_remove_workers_from_sched_ctx(cpus, ncpus, sender_sched_ctx);
															
 
																 		starpu_add_workers_to_sched_ctx(workers_to_move, nworkers_to_move, receiver_sched_ctx);
															
@@ -350,6 +374,70 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 
																 	return;
															
 
																 }
															
 
																+void sched_ctx_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx)
															
 
																+{
															
 
																+	if(nworkers_to_add > 0 && hypervisor.resize[sched_ctx])
															
 
																+	{
															
 
																+		int j;
															
 
																+		printf("resize ctx %d with", sched_ctx);
															
 
																+		for(j = 0; j < nworkers_to_add; j++)
															
 
																+			printf(" %d", workers_to_add[j]);
															
 
																+		printf("\n");
															
 
																+
															
 
																+		starpu_add_workers_to_sched_ctx(workers_to_add, nworkers_to_add, sched_ctx);
															
 
																+
															
 
																+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = sched_ctx;
															
 
																+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_add * sizeof(int));
															
 
																+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = nworkers_to_add;
															
 
																+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_add * sizeof(int));
															
 
																+
															
 
																+
															
 
																+		int i;
															
 
																+		for(i = 0; i < nworkers_to_add; i++)
															
 
																+		{
															
 
																+			hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers_to_add[i]] = 0.0;
															
 
																+			hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers_to_add[i];	
															
 
																+			hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;	
															
 
																+		}
															
 
																+
															
 
																+		hypervisor.resize[sched_ctx] = 0;
															
 
																+	}
															
 
																+
															
 
																+	return;
															
 
																+}
															
 
																+
															
 
																+void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx)
															
 
																+{
															
 
																+	if(nworkers_to_remove > 0 && hypervisor.resize[sched_ctx])
															
 
																+	{
															
 
																+		int j;
															
 
																+		printf("resize ctx %d with", sched_ctx);
															
 
																+		for(j = 0; j < nworkers_to_remove; j++)
															
 
																+			printf(" %d", workers_to_remove[j]);
															
 
																+		printf("\n");
															
 
																+
															
 
																+		starpu_remove_workers_from_sched_ctx(workers_to_remove, nworkers_to_remove, sched_ctx);
															
 
																+
															
 
																+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = sched_ctx;
															
 
																+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
															
 
																+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = nworkers_to_remove;
															
 
																+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
															
 
																+
															
 
																+
															
 
																+		int i;
															
 
																+		for(i = 0; i < nworkers_to_remove; i++)
															
 
																+		{
															
 
																+			hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers_to_remove[i]] = 0.0;
															
 
																+			hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers_to_remove[i];	
															
 
																+			hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;	
															
 
																+		}
															
 
																+
															
 
																+		hypervisor.resize[sched_ctx] = 0;
															
 
																+	}
															
 
																+
															
 
																+	return;
															
 
																+}
															
 
																+
															
 
																 static void _set_elapsed_flops_per_sched_ctx(unsigned sched_ctx, double val)
															
 
																 {
															
 
																 	int i;
															
@@ -514,6 +602,7 @@ static void notify_poped_task(unsigned sched_ctx, int worker, double elapsed_flo
 
																 	hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++;
															
 
																 	hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += elapsed_flops;
															
 
																 	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += elapsed_flops;
															
 
																+	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(&hypervisor.sched_ctx_w[sched_ctx]);
															
 
																 	if(hypervisor.nsched_ctxs > 1)
															
 
																 	{
															
@@ -561,6 +650,13 @@ static void notify_post_exec_hook(unsigned sched_ctx, int task_tag)
 
																 		}
															
 
																 	}
															
 
																 }
															
 
																+
															
 
																+static void notify_submitted_job(struct starpu_task *task, unsigned footprint)
															
 
																+{
															
 
																+	if(hypervisor.policy.handle_submitted_job)
															
 
																+		hypervisor.policy.handle_submitted_job(task, footprint);
															
 
																+}
															
 
																+
															
 
																 struct sched_ctx_wrapper* sched_ctx_hypervisor_get_wrapper(unsigned sched_ctx)
															
 
																 {
															
 
																 	return &hypervisor.sched_ctx_w[sched_ctx];
															
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1026,6 +1026,16 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 
																 	return exp;
															
 
																 }
															
 
																+double starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned footprint)
															
 
																+{
															
 
																+	struct _starpu_job j =
															
 
																+		{
															
 
																+			.footprint = footprint,
															
 
																+			.footprint_is_computed = 1,
															
 
																+		};
															
 
																+	return _starpu_history_based_job_expected_perf(model, arch, &j, j.nimpl);
															
 
																+}
															
 
																+
															
 
																 void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned nimpl)
															
 
																 {
															
 
																 	if (model)
															
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -219,6 +219,17 @@ int _starpu_submit_job(struct _starpu_job *j)
 
																 	_starpu_increment_nsubmitted_tasks();
															
 
																 	_starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
															
 
																+	
															
 
																+#ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
															
 
																+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx);
															
 
																+	if(sched_ctx != NULL && j->task->sched_ctx != 0 && j->task->sched_ctx != STARPU_NMAX_SCHED_CTXS
															
 
																+	   && sched_ctx->perf_counters != NULL)
															
 
																+	{
															
 
																+		_starpu_compute_buffers_footprint(NULL, STARPU_CPU_DEFAULT, 0, j);
															
 
																+		sched_ctx->perf_counters->notify_submitted_job(j->task, j->footprint);
															
 
																+	}
															
 
																+#endif
															
 
																+
															
 
																 	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																 	/* Need to atomically set submitted to 1 and check dependencies, since