%!s(int64=12) %!d(string=hai) anos · f21b5879bf
--- a/include/starpu_sched_ctx.h
+++ b/include/starpu_sched_ctx.h
@@ -64,7 +64,7 @@ struct starpu_sched_ctx_performance_counters
 
				 	void (*notify_idle_cycle)(unsigned sched_ctx_id, int worker, double idle_time);
			
 
				 	void (*notify_idle_end)(unsigned sched_ctx_id, int worker);
			
 
				 	void (*notify_pushed_task)(unsigned sched_ctx_id, int worker);
			
 
				-	void (*notify_poped_task)(unsigned sched_ctx_id, int worker, double flops, size_t data_size);
			
 
				+	void (*notify_poped_task)(unsigned sched_ctx_id, int worker, struct starpu_task *task, size_t data_size, uint32_t footprint);
			
 
				 	void (*notify_post_exec_hook)(unsigned sched_ctx_id, int taskid);
			
 
				 	void (*notify_submitted_job)(struct starpu_task *task, uint32_t footprint);
			
 
				 	void (*notify_delete_context)(unsigned sched_ctx);
			
@@ -72,7 +72,6 @@ struct starpu_sched_ctx_performance_counters
 
				 
			
 
				 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				 void starpu_sched_ctx_set_perf_counters(unsigned sched_ctx_id, struct starpu_sched_ctx_performance_counters *perf_counters);
			
 
				-void starpu_sched_ctx_call_poped_task_cb(int workerid, unsigned sched_ctx_id, double flops, size_t data_size);
			
 
				 void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id);
			
 
				 #endif //STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				 
			
--- a/sched_ctx_hypervisor/include/sched_ctx_hypervisor.h
+++ b/sched_ctx_hypervisor/include/sched_ctx_hypervisor.h
@@ -167,7 +167,7 @@ struct sched_ctx_hypervisor_policy
 
				 	void (*size_ctxs)(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers);
			
 
				 	void (*handle_idle_cycle)(unsigned sched_ctx, int worker);
			
 
				 	void (*handle_pushed_task)(unsigned sched_ctx, int worker);
			
 
				-	void (*handle_poped_task)(unsigned sched_ctx, int worker);
			
 
				+	void (*handle_poped_task)(unsigned sched_ctx, int worker,struct starpu_task *task, uint32_t footprint);
			
 
				 	void (*handle_idle_end)(unsigned sched_ctx, int worker);
			
 
				 
			
 
				 	void (*handle_post_exec_hook)(unsigned sched_ctx, int task_tag);
			
@@ -230,6 +230,8 @@ unsigned sched_ctx_hypervisor_can_resize(unsigned sched_ctx);
 
				 /* compute an average value of the cpu/cuda velocity */
			
 
				 double sched_ctx_hypervisor_get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch);
			
 
				 
			
 
				+double sched_ctx_hypervisor_get_velocity(struct sched_ctx_hypervisor_wrapper *sc_w, enum starpu_archtype arch);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/debit_lp_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/debit_lp_policy.c
@@ -38,16 +38,8 @@ static unsigned _compute_max_velocity(int ns, int nw, double w_in_s[ns][nw], int
 
				 			w_in_s[s][w] = 0.0;
			
 
				 			int worker = workers == NULL ? w : workers[w];
			
 
				 
			
 
				-			velocity[s][w] = _get_velocity_per_worker(sc_w, worker);
			
 
				-			if(velocity[s][w] == -1.0)
			
 
				-			{
			
 
				-				enum starpu_archtype arch = starpu_worker_get_type(worker);
			
 
				-				velocity[s][w] = _get_velocity_per_worker_type(sc_w, arch);
			
 
				-				if(velocity[s][w] == -1.0)
			
 
				-					velocity[s][w] = sc_w->ref_velocity[worker];
			
 
				-				if(velocity[s][w] < 1.0)
			
 
				-					velocity[s][w] = arch == STARPU_CPU_WORKER ? 5.0 : 100.0;
			
 
				-			}
			
 
				+			enum starpu_archtype arch = starpu_worker_get_type(worker);
			
 
				+			velocity[s][w] = sched_ctx_hypervisor_get_velocity(sc_w, arch);
			
 
				 		}
			
 
				 	}
			
 
				 	
			
@@ -231,7 +223,7 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double w_in_
 
				 }
			
 
				 
			
 
				 
			
 
				-static void debit_lp_handle_poped_task(unsigned sched_ctx, int worker)
			
 
				+static void debit_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
			
 
				 {
			
 
				 	struct sched_ctx_hypervisor_wrapper* sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctx);
			
 
				 	_get_velocity_per_worker(sc_w, worker);
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/gflops_rate_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/gflops_rate_policy.c
@@ -289,7 +289,7 @@ static void gflops_rate_resize(unsigned sched_ctx)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void gflops_rate_handle_poped_task(unsigned sched_ctx, int worker)
			
 
				+static void gflops_rate_handle_poped_task(unsigned sched_ctx, int worker)
			
 
				 {
			
 
				 	gflops_rate_resize(sched_ctx);
			
 
				 }
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
@@ -19,8 +19,6 @@
 
				 #include <math.h>
			
 
				 
			
 
				 static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers, unsigned integer);
			
 
				-static double _find_tmax(double t1, double t2);
			
 
				-
			
 
				 
			
 
				 static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_in_s[ns][nw], double flops_on_w[ns][nw], int *in_sched_ctxs, int *workers)
			
 
				 {
			
@@ -50,11 +48,7 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 
				 			if(velocity[s][w] == -1.0)
			
 
				 			{
			
 
				 				enum starpu_archtype arch = starpu_worker_get_type(worker);
			
 
				-				velocity[s][w] = _get_velocity_per_worker_type(sc_w, arch);
			
 
				-				if(velocity[s][w] == -1.0)
			
 
				-					velocity[s][w] = sc_w->ref_velocity[worker];
			
 
				-				if(velocity[s][w] == -1.0)
			
 
				-					velocity[s][w] = arch == STARPU_CPU_WORKER ? 5.0 : 100.0;
			
 
				+				velocity[s][w] = sched_ctx_hypervisor_get_velocity(sc_w, arch);
			
 
				 			}
			
 
				 			
			
 
				 //			printf("v[w%d][s%d] = %lf\n",w, s, velocity[s][w]);
			
@@ -348,13 +342,7 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops
 
				 }
			
 
				 
			
 
				 
			
 
				-static double _find_tmax(double t1, double t2)
			
 
				-{
			
 
				-	return t1 + ((t2 - t1)/2);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker)
			
 
				+static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
			
 
				 {
			
 
				 	struct sched_ctx_hypervisor_wrapper* sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctx);
			
 
				 	_get_velocity_per_worker(sc_w, worker);
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_policy.c
@@ -141,7 +141,7 @@ static int* _get_slowest_workers(unsigned sched_ctx, int *nworkers, enum starpu_
 
				 	return curr_workers;
			
 
				 }			
			
 
				 
			
 
				-static void ispeed_handle_poped_task(unsigned sched_ctx, int worker)
			
 
				+static void ispeed_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
			
 
				 {
			
 
				 	int ret = pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				 	if(ret != EBUSY)
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c
@@ -22,7 +22,6 @@ static struct bound_task_pool *task_pools = NULL;
 
				 
			
 
				 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
			
 
				 static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers, unsigned interger);
			
 
				-static double _find_tmax(double t1, double t2);
			
 
				 static unsigned _compute_task_distribution_over_ctxs(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt], int *sched_ctxs, int *workers)
			
 
				 {
			
 
				 	double draft_tasks[nw][nt];
			
@@ -45,12 +44,12 @@ static unsigned _compute_task_distribution_over_ctxs(int ns, int nw, int nt, dou
 
				 
			
 
				 	/* smallest possible tmax, difficult to obtain as we
			
 
				 	   compute the nr of flops and not the tasks */
			
 
				-	double smallest_tmax = _lp_get_tmax(nw, workers);
			
 
				-	double tmax = smallest_tmax * ns * 2;
			
 
				-
			
 
				+	double possible_tmax = _lp_get_tmax(nw, workers);
			
 
				+	double smallest_tmax = possible_tmax / 2;
			
 
				+	double tmax = possible_tmax * ns;
			
 
				 	double res = 1.0;
			
 
				 	unsigned has_sol = 0;
			
 
				-	double tmin = 0.0;
			
 
				+	double tmin = smallest_tmax;
			
 
				 	double old_tmax = 0.0;
			
 
				 	unsigned found_sol = 0;
			
 
				 
			
@@ -114,118 +113,16 @@ static unsigned _compute_task_distribution_over_ctxs(int ns, int nw, int nt, dou
 
				 	float timing = (float)(diff_s*1000000 + diff_us)/1000;
			
 
				 
			
 
				 //        fprintf(stdout, "nd = %d total time: %f ms \n", nd, timing);
			
 
				-
			
 
				 	return found_sol;
			
 
				 }
			
 
				 
			
 
				-static void _redistribute_resources_in_ctxs(int ns, int nw, int nt, double w_in_s[ns][nw], unsigned first_time, int *in_sched_ctxs, int *workers)
			
 
				-{
			
 
				-	int *sched_ctxs = in_sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : in_sched_ctxs;
			
 
				-	int s, s2, w;
			
 
				-
			
 
				-	for(s = 0; s < ns; s++)
			
 
				-	{
			
 
				-		int workers_to_add[nw], workers_to_remove[nw];
			
 
				-		int destination_ctx[nw][ns];
			
 
				-
			
 
				-		for(w = 0; w < nw; w++)
			
 
				-		{
			
 
				-			workers_to_add[w] = -1;
			
 
				-			workers_to_remove[w] = -1;
			
 
				-			for(s2 = 0; s2 < ns; s2++)
			
 
				-				destination_ctx[w][s2] = -1;
			
 
				-		}
			
 
				-
			
 
				-		int nadd = 0, nremove = 0;
			
 
				-
			
 
				-		for(w = 0; w < nw; w++)
			
 
				-		{
			
 
				-			enum starpu_perf_archtype arch = workers == NULL ? starpu_worker_get_type(w) :
			
 
				-				starpu_worker_get_type(workers[w]);
			
 
				-
			
 
				-			if(arch == STARPU_CPU_WORKER)
			
 
				-			{
			
 
				-				if(w_in_s[s][w] >= 0.5)
			
 
				-				{
			
 
				-					workers_to_add[nadd++] = workers == NULL ? w : workers[w];
			
 
				-				}
			
 
				-				else
			
 
				-				{
			
 
				-					workers_to_remove[nremove++] = workers == NULL ? w : workers[w];
			
 
				-					for(s2 = 0; s2 < ns; s2++)
			
 
				-						if(s2 != s && w_in_s[s2][w] >= 0.5)
			
 
				-							destination_ctx[w][s2] = 1;
			
 
				-						else
			
 
				-							destination_ctx[w][s2] = 0;
			
 
				-				}
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				if(w_in_s[s][w] >= 0.3)
			
 
				-				{
			
 
				-					workers_to_add[nadd++] = workers == NULL ? w : workers[w];
			
 
				-				}
			
 
				-				else
			
 
				-				{
			
 
				-					workers_to_remove[nremove++] = workers == NULL ? w : workers[w];
			
 
				-					for(s2 = 0; s2 < ns; s2++)
			
 
				-						if(s2 != s && w_in_s[s2][w] >= 0.3)
			
 
				-							destination_ctx[w][s2] = 1;
			
 
				-						else
			
 
				-							destination_ctx[w][s2] = 0;
			
 
				-				}
			
 
				-			}
			
 
				-
			
 
				-		}
			
 
				-
			
 
				-		sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, nadd, sched_ctxs[s]);
			
 
				-		struct sched_ctx_hypervisor_policy_config *new_config = sched_ctx_hypervisor_get_config(sched_ctxs[s]);
			
 
				-		int i;
			
 
				-		for(i = 0; i < nadd; i++)
			
 
				-			new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] :  new_config->new_workers_max_idle;
			
 
				-
			
 
				-		if(!first_time)
			
 
				-		{
			
 
				-			/* do not remove workers if they can't go anywhere */
			
 
				-			int w2;
			
 
				-			unsigned found_one_dest[nremove];
			
 
				-			unsigned all_have_dest = 1;
			
 
				-			for(w2 = 0; w2 < nremove; w2++)
			
 
				-				found_one_dest[w2] = 0;
			
 
				-
			
 
				-			for(w2 = 0; w2 < nremove; w2++)
			
 
				-				for(s2 = 0; s2 < ns; s2++)
			
 
				-				{
			
 
				-					/* if the worker has to be removed we should find a destination
			
 
				-					   otherwise we are not interested */
			
 
				-					if(destination_ctx[w2][s2] == -1)
			
 
				-						found_one_dest[w2] = -1;
			
 
				-					if(destination_ctx[w2][s2] == 1)// && sched_ctx_hypervisor_can_resize(sched_ctxs[s2]))
			
 
				-					{
			
 
				-						found_one_dest[w2] = 1;
			
 
				-						break;
			
 
				-					}
			
 
				-				}
			
 
				-			for(w2 = 0; w2 < nremove; w2++)
			
 
				-			{
			
 
				-				if(found_one_dest[w2] == 0)
			
 
				-				{
			
 
				-					all_have_dest = 0;
			
 
				-					break;
			
 
				-				}
			
 
				-			}
			
 
				-			if(all_have_dest)
			
 
				-				sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s], 0);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-}
			
 
				 
			
 
				 static void _size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
			
 
				 {
			
 
				 	int ns = sched_ctxs == NULL ? sched_ctx_hypervisor_get_nsched_ctxs() : nsched_ctxs;
			
 
				-	int nw = workers == NULL ? starpu_worker_get_count() : nworkers; /* Number of different workers */
			
 
				+	int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
			
 
				 	int nt = 0; /* Number of different kinds of tasks */
			
 
				+	pthread_mutex_lock(&mutex);
			
 
				 	struct bound_task_pool * tp;
			
 
				 	for (tp = task_pools; tp; tp = tp->next)
			
 
				 		nt++;
			
@@ -233,63 +130,10 @@ static void _size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nwor
 
				 	double w_in_s[ns][nw];
			
 
				 	double tasks[nw][nt];
			
 
				 	unsigned found_sol = _compute_task_distribution_over_ctxs(ns, nw, nt, w_in_s, tasks, sched_ctxs, workers);
			
 
				+	pthread_mutex_unlock(&mutex);
			
 
				 	/* if we did find at least one solution redistribute the resources */
			
 
				 	if(found_sol)
			
 
				-	{
			
 
				-		int w, s;
			
 
				-		double nworkers[ns][2];
			
 
				-		int nworkers_rounded[ns][2];
			
 
				-		for(s = 0; s < ns; s++)
			
 
				-		{
			
 
				-			nworkers[s][0] = 0.0;
			
 
				-			nworkers[s][1] = 0.0;
			
 
				-			nworkers_rounded[s][0] = 0;
			
 
				-			nworkers_rounded[s][1] = 0;
			
 
				-			
			
 
				-		}
			
 
				-		
			
 
				-		for(s = 0; s < ns; s++)
			
 
				-		{
			
 
				-			for(w = 0; w < nw; w++)
			
 
				-			{
			
 
				-				enum starpu_perf_archtype arch = starpu_worker_get_type(w);
			
 
				-				
			
 
				-				if(arch == STARPU_CUDA_WORKER)
			
 
				-				{
			
 
				-					nworkers[s][0] += w_in_s[s][w];
			
 
				-					if(w_in_s[s][w] >= 0.3)
			
 
				-						nworkers_rounded[s][0]++;
			
 
				-				}
			
 
				-				else
			
 
				-				{
			
 
				-					nworkers[s][1] += w_in_s[s][w];
			
 
				-					if(w_in_s[s][w] > 0.5)
			
 
				-						nworkers_rounded[s][1]++;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		int *current_sched_ctxs = sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : 
			
 
				-			sched_ctxs;
			
 
				-
			
 
				-		unsigned has_workers = 0;
			
 
				-		for(s = 0; s < ns; s++)
			
 
				-		{
			
 
				-			int nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(current_sched_ctxs[s], 
			
 
				-									     STARPU_ANY_WORKER);
			
 
				-			if(nworkers_ctx != 0)
			
 
				-			{
			
 
				-				has_workers = 1;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		if(has_workers)
			
 
				-			_lp_redistribute_resources_in_ctxs(nsched_ctxs, 2, nworkers_rounded, nworkers);
			
 
				-		else
			
 
				-			_lp_distribute_resources_in_ctxs(sched_ctxs, nsched_ctxs, 2, nworkers_rounded, nworkers, workers, nworkers);
			
 
				-	
			
 
				-//		_redistribute_resources_in_ctxs(ns, nw, nt, w_in_s, 1, sched_ctxs, workers);
			
 
				-	}
			
 
				+		_lp_place_resources_in_ctx(ns, nw, w_in_s, sched_ctxs, workers, 1);
			
 
				 }
			
 
				 
			
 
				 static void size_if_required()
			
@@ -325,7 +169,7 @@ static void lp2_handle_submitted_job(struct starpu_task *task, uint32_t footprin
 
				 
			
 
				 	for (tp = task_pools; tp; tp = tp->next)
			
 
				 	{
			
 
				-		if (tp->cl == task->cl && tp->footprint == footprint && tp->sched_ctx_id == task->sched_ctx)
			
 
				+		if (tp && tp->cl == task->cl && tp->footprint == footprint && tp->sched_ctx_id == task->sched_ctx)
			
 
				 			break;
			
 
				 	}
			
 
				 
			
@@ -347,7 +191,38 @@ static void lp2_handle_submitted_job(struct starpu_task *task, uint32_t footprin
 
				 	size_if_required();
			
 
				 }
			
 
				 
			
 
				-static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers)
			
 
				+static void _remove_task_from_pool(struct starpu_task *task, uint32_t footprint)
			
 
				+{
			
 
				+	/* count the tasks of the same type */
			
 
				+	pthread_mutex_lock(&mutex);
			
 
				+	struct bound_task_pool *tp = NULL;
			
 
				+
			
 
				+	for (tp = task_pools; tp; tp = tp->next)
			
 
				+	{
			
 
				+		if (tp && tp->cl == task->cl && tp->footprint == footprint && tp->sched_ctx_id == task->sched_ctx)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	if (tp)
			
 
				+	{
			
 
				+		if(tp->n > 1)
			
 
				+			tp->n--;
			
 
				+		else
			
 
				+		{
			
 
				+			struct bound_task_pool *prev_tp = NULL;
			
 
				+			for (prev_tp = task_pools; prev_tp; prev_tp = prev_tp->next)
			
 
				+			{
			
 
				+				if (prev_tp->next == tp)
			
 
				+					prev_tp->next = tp->next;
			
 
				+			}
			
 
				+
			
 
				+			free(tp);
			
 
				+		}
			
 
				+	}
			
 
				+	pthread_mutex_unlock(&mutex);
			
 
				+}
			
 
				+
			
 
				+static void _get_tasks_times(int nw, int nt, double times[nw][nt], int *workers)
			
 
				 {
			
 
				         struct bound_task_pool *tp;
			
 
				         int w, t;
			
@@ -374,6 +249,8 @@ static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt], int *w
 
				 #include <glpk.h>
			
 
				 static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers, unsigned integer)
			
 
				 {
			
 
				+	if(task_pools == NULL)
			
 
				+		return 0.0;
			
 
				 	struct bound_task_pool * tp;
			
 
				 	int t, w, s;
			
 
				 	glp_prob *lp;
			
@@ -393,7 +270,7 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 		int ia[ne], ja[ne];
			
 
				 		double ar[ne];
			
 
				 
			
 
				-		_starpu_get_tasks_times(nw, nt, times, workers);
			
 
				+		_get_tasks_times(nw, nt, times, workers);
			
 
				 
			
 
				 		/* Variables: number of tasks i assigned to worker j, and tmax */
			
 
				 		glp_add_cols(lp, nw*nt+ns*nw);
			
@@ -408,7 +285,13 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 				char name[32];
			
 
				 				snprintf(name, sizeof(name), "w%dt%dn", w, t);
			
 
				 				glp_set_col_name(lp, colnum(w, t), name);
			
 
				-				glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
			
 
				+/* 				if (integer) */
			
 
				+/*                                 { */
			
 
				+/*                                         glp_set_col_kind(lp, colnum(w, t), GLP_IV); */
			
 
				+/* 					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0); */
			
 
				+/*                                 } */
			
 
				+/* 				else */
			
 
				+					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0.0, 0.0);
			
 
				 			}
			
 
				 		for(s = 0; s < ns; s++)
			
 
				 			for(w = 0; w < nw; w++)
			
@@ -439,6 +322,7 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 			if (!someone)
			
 
				 			{
			
 
				 				/* This task does not have any performance model at all, abort */
			
 
				+				printf("NO PERF MODELS\n");
			
 
				 				glp_delete_prob(lp);
			
 
				 				return 0.0;
			
 
				 			}
			
@@ -454,7 +338,7 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 				glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title);
			
 
				 				for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				 				{
			
 
				-					if(tp->sched_ctx_id == sched_ctxs[s])
			
 
				+					if((int)tp->sched_ctx_id == sched_ctxs[s])
			
 
				 					{
			
 
				 						ia[n] = curr_row_idx+s*nw+w+1;
			
 
				 						ja[n] = colnum(w, t);
			
@@ -529,6 +413,7 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 	int ret = glp_simplex(lp, &parm);
			
 
				 	if (ret)
			
 
				 	{
			
 
				+		printf("error in simplex\n");
			
 
				 		glp_delete_prob(lp);
			
 
				 		lp = NULL;
			
 
				 		return 0.0;
			
@@ -539,6 +424,7 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 	if(stat == GLP_NOFEAS)
			
 
				 	{
			
 
				 		glp_delete_prob(lp);
			
 
				+//		printf("no_sol in tmax = %lf\n", tmax);
			
 
				 		lp = NULL;
			
 
				 		return 0.0;
			
 
				 	}
			
@@ -554,6 +440,7 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 		/* if we don't have a solution return */
			
 
				 		if(stat == GLP_NOFEAS)
			
 
				 		{
			
 
				+//			printf("no int sol in tmax = %lf\n", tmax);
			
 
				 			glp_delete_prob(lp);
			
 
				 			lp = NULL;
			
 
				 			return 0.0;
			
@@ -563,8 +450,12 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 	double res = glp_get_obj_val(lp);
			
 
				 	for (w = 0; w < nw; w++)
			
 
				 		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				-			tasks[w][t] = glp_get_col_prim(lp, colnum(w, t));
			
 
				-
			
 
				+/* 			if (integer) */
			
 
				+/* 				tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t)); */
			
 
				+/*                         else */
			
 
				+				tasks[w][t] = glp_get_col_prim(lp, colnum(w, t));
			
 
				+	
			
 
				+//	printf("for tmax %lf\n", tmax);
			
 
				 	for(s = 0; s < ns; s++)
			
 
				 		for(w = 0; w < nw; w++)
			
 
				 		{
			
@@ -572,21 +463,18 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 				w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*nt+s*nw+w+1);
			
 
				                         else
			
 
				 				w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
			
 
				+//			printf("w_in_s[%d][%d]=%lf\n", s, w, w_in_s[s][w]);
			
 
				 		}
			
 
				+//	printf("\n");
			
 
				 
			
 
				 	glp_delete_prob(lp);
			
 
				 	return res;
			
 
				 }
			
 
				 
			
 
				 
			
 
				-static double _find_tmax(double t1, double t2)
			
 
				-{
			
 
				-	return t1 + ((t2 - t1)/2);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-static void lp2_handle_poped_task(unsigned sched_ctx, int worker)
			
 
				+static void lp2_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
			
 
				 {
			
 
				+	_remove_task_from_pool(task, footprint);
			
 
				 	struct sched_ctx_hypervisor_wrapper* sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctx);
			
 
				 
			
 
				 	int ret = pthread_mutex_trylock(&act_hypervisor_mutex);
			
@@ -603,6 +491,7 @@ static void lp2_handle_poped_task(unsigned sched_ctx, int worker)
 
				 			int ns = sched_ctx_hypervisor_get_nsched_ctxs();
			
 
				 			int nw = starpu_worker_get_count(); /* Number of different workers */
			
 
				 			int nt = 0; /* Number of different kinds of tasks */
			
 
				+			pthread_mutex_lock(&mutex);
			
 
				 			struct bound_task_pool * tp;
			
 
				 			for (tp = task_pools; tp; tp = tp->next)
			
 
				 				nt++;
			
@@ -611,48 +500,12 @@ static void lp2_handle_poped_task(unsigned sched_ctx, int worker)
 
				 			double tasks_per_worker[nw][nt];
			
 
				 
			
 
				 			unsigned found_sol = _compute_task_distribution_over_ctxs(ns, nw, nt, w_in_s, tasks_per_worker, NULL, NULL);
			
 
				+			pthread_mutex_unlock(&mutex);
			
 
				 			/* if we did find at least one solution redistribute the resources */
			
 
				 			if(found_sol)
			
 
				-			{
			
 
				-				int w, s;
			
 
				-				double nworkers[ns][2];
			
 
				-				int nworkers_rounded[ns][2];
			
 
				-				for(s = 0; s < ns; s++)
			
 
				-				{
			
 
				-					nworkers[s][0] = 0.0;
			
 
				-					nworkers[s][1] = 0.0;
			
 
				-					nworkers_rounded[s][0] = 0;
			
 
				-					nworkers_rounded[s][1] = 0;
			
 
				-
			
 
				-				}
			
 
				+				_lp_place_resources_in_ctx(ns, nw, w_in_s, NULL, NULL, 0);
			
 
				 
			
 
				-				for(s = 0; s < ns; s++)
			
 
				-				{
			
 
				-					for(w = 0; w < nw; w++)
			
 
				-					{
			
 
				-						enum starpu_perf_archtype arch = starpu_worker_get_type(w);
			
 
				-
			
 
				-						if(arch == STARPU_CUDA_WORKER)
			
 
				-						{
			
 
				-							nworkers[s][0] += w_in_s[s][w];
			
 
				-							if(w_in_s[s][w] >= 0.3)
			
 
				-								nworkers_rounded[s][0]++;
			
 
				-						}
			
 
				-						else
			
 
				-						{
			
 
				-							nworkers[s][1] += w_in_s[s][w];
			
 
				-							if(w_in_s[s][w] > 0.5)
			
 
				-								nworkers_rounded[s][1]++;
			
 
				-						}
			
 
				-					}
			
 
				-				}
			
 
				-/* 				for(s = 0; s < ns; s++) */
			
 
				-/* 					printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
			
 
				-/* 					       nworkers_rounded[s][1], nworkers_rounded[s][0]); */
			
 
				-
			
 
				-				_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
			
 
				 
			
 
				-			}
			
 
				 		}
			
 
				 		pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 	}
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c
@@ -19,7 +19,7 @@
 
				 
			
 
				 
			
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				-static void lp_handle_poped_task(unsigned sched_ctx, int worker)
			
 
				+static void lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
			
 
				 {
			
 
				 	if(_velocity_gap_btw_ctxs())
			
 
				 	{
			
@@ -67,20 +67,20 @@ static void lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworkers)
 
				 	double vmax = _lp_get_nworkers_per_ctx(nsched_ctxs, 2, nworkers_per_type, total_nw);
			
 
				 	if(vmax != 0.0)
			
 
				 	{
			
 
				-		printf("********size\n");
			
 
				-		int i;
			
 
				-		for( i = 0; i < nsched_ctxs; i++)
			
 
				-		{
			
 
				-			printf("ctx %d/worker type %d: n = %lf \n", i, 0, nworkers_per_type[i][0]);
			
 
				-			printf("ctx %d/worker type %d: n = %lf \n", i, 1, nworkers_per_type[i][1]);
			
 
				-		}
			
 
				+/*  		printf("********size\n"); */
			
 
				+/* 		int i; */
			
 
				+/* 		for( i = 0; i < nsched_ctxs; i++) */
			
 
				+/* 		{ */
			
 
				+/* 			printf("ctx %d/worker type %d: n = %lf \n", i, 0, nworkers_per_type[i][0]); */
			
 
				+/* 			printf("ctx %d/worker type %d: n = %lf \n", i, 1, nworkers_per_type[i][1]); */
			
 
				+/* 		} */
			
 
				 		int nworkers_per_type_rounded[nsched_ctxs][2];
			
 
				 		_lp_round_double_to_int(nsched_ctxs, 2, nworkers_per_type, nworkers_per_type_rounded);
			
 
				-      		for( i = 0; i < nsched_ctxs; i++)
			
 
				-		{
			
 
				-			printf("ctx %d/worker type %d: n = %d \n", i, 0, nworkers_per_type_rounded[i][0]);
			
 
				-			printf("ctx %d/worker type %d: n = %d \n", i, 1, nworkers_per_type_rounded[i][1]);
			
 
				-		}
			
 
				+/*       		for( i = 0; i < nsched_ctxs; i++) */
			
 
				+/* 		{ */
			
 
				+/* 			printf("ctx %d/worker type %d: n = %d \n", i, 0, nworkers_per_type_rounded[i][0]); */
			
 
				+/* 			printf("ctx %d/worker type %d: n = %d \n", i, 1, nworkers_per_type_rounded[i][1]); */
			
 
				+/* 		} */
			
 
				 		int *current_sched_ctxs = sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : 
			
 
				 			sched_ctxs;
			
 
				 
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c
@@ -22,6 +22,7 @@
 
				 
			
 
				 double _lp_compute_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int  total_nw[nw])
			
 
				 {
			
 
				+	int integer = 1;
			
 
				 	int s, w;
			
 
				 	glp_prob *lp;
			
 
				 
			
@@ -49,7 +50,13 @@ double _lp_compute_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flo
 
				 			char name[32];
			
 
				 			snprintf(name, sizeof(name), "worker%dctx%d", w, s);
			
 
				 			glp_set_col_name(lp, n, name);
			
 
				-			glp_set_col_bnds(lp, n, GLP_LO, 0.3, 0.0);
			
 
				+			if (integer)
			
 
				+			{
			
 
				+				glp_set_col_kind(lp, n, GLP_IV);
			
 
				+				glp_set_col_bnds(lp, n, GLP_LO, 0, 0);
			
 
				+			}
			
 
				+			else
			
 
				+				glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0);
			
 
				 			n++;
			
 
				 		}
			
 
				 	}
			
@@ -154,7 +161,42 @@ double _lp_compute_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flo
 
				 	glp_smcp parm;
			
 
				 	glp_init_smcp(&parm);
			
 
				 	parm.msg_lev = GLP_MSG_OFF;
			
 
				-	glp_simplex(lp, &parm);
			
 
				+	int ret = glp_simplex(lp, &parm);
			
 
				+	if (ret)
			
 
				+        {
			
 
				+                printf("error in simplex\n");
			
 
				+		glp_delete_prob(lp);
			
 
				+                lp = NULL;
			
 
				+                return 0.0;
			
 
				+        }
			
 
				+
			
 
				+	int stat = glp_get_prim_stat(lp);
			
 
				+        /* if we don't have a solution return */
			
 
				+        if(stat == GLP_NOFEAS)
			
 
				+        {
			
 
				+                glp_delete_prob(lp);
			
 
				+//              printf("no_sol in tmax = %lf\n", tmax);                                                                                                                                                             
			
 
				+                lp = NULL;
			
 
				+                return 0.0;
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+	if (integer)
			
 
				+        {
			
 
				+                glp_iocp iocp;
			
 
				+                glp_init_iocp(&iocp);
			
 
				+                iocp.msg_lev = GLP_MSG_OFF;
			
 
				+                glp_intopt(lp, &iocp);
			
 
				+                int stat = glp_mip_status(lp);
			
 
				+                /* if we don't have a solution return */
			
 
				+                if(stat == GLP_NOFEAS)
			
 
				+                {
			
 
				+//                      printf("no int sol in tmax = %lf\n", tmax);                                                                                                                                                 
			
 
				+                        glp_delete_prob(lp);
			
 
				+                        lp = NULL;
			
 
				+                        return 0.0;
			
 
				+                }
			
 
				+        }
			
 
				 
			
 
				 	double vmax = glp_get_obj_val(lp);
			
 
				 
			
@@ -163,7 +205,11 @@ double _lp_compute_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flo
 
				 	{
			
 
				 		for(w = 0; w < nw; w++)
			
 
				 		{
			
 
				-			res[s][w] = glp_get_col_prim(lp, n);
			
 
				+			if (integer)
			
 
				+                                res[s][w] = (double)glp_mip_col_val(lp, n);
			
 
				+			else
			
 
				+				res[s][w] = glp_get_col_prim(lp, n);
			
 
				+//			printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]);
			
 
				 			n++;
			
 
				 		}
			
 
				 	}
			
@@ -186,20 +232,11 @@ double _lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double r
 
				 	for(i = 0; i < nsched_ctxs; i++)
			
 
				 	{
			
 
				 		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				-		v[i][0] = _get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
			
 
				-		if(v[i][0] == -1.0)
			
 
				-			v[i][0] = _get_ref_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
			
 
				-		if(v[i][0] == -1.0)
			
 
				-			v[i][0] = 20.0;
			
 
				-		v[i][1] = _get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
			
 
				-
			
 
				-		if(v[i][1] == -1.0)
			
 
				-			v[i][0] = _get_ref_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
			
 
				-		if(v[i][1] == -1.0)
			
 
				-			v[i][1] = 200.0;
			
 
				+		v[i][0] = sched_ctx_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
			
 
				+		v[i][1] = sched_ctx_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				 
			
 
				 		flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
			
 
				-//			printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
			
 
				+//		printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
			
 
				 	}
			
 
				 
			
 
				 	return 1/_lp_compute_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
			
@@ -272,6 +309,8 @@ void _lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded
 
				 					}
			
 
				 				}
			
 
				 			}
			
 
				+			else 
			
 
				+				res_rounded[s][w] = x;
			
 
				 		}
			
 
				 	}
			
 
				 }
			
@@ -509,9 +548,7 @@ void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw],
 
				 
			
 
				 void _lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers)
			
 
				 {
			
 
				-	int current_nworkers = workers == NULL ? starpu_worker_get_count() : nworkers;
			
 
				-	int *current_sched_ctxs = sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : sched_ctxs;
			
 
				-
			
 
				+	unsigned current_nworkers = workers == NULL ? starpu_worker_get_count() : (unsigned)nworkers;
			
 
				 	int s, w;
			
 
				 	int start[nw];
			
 
				 	for(w = 0; w < nw; w++)
			
@@ -546,29 +583,23 @@ void _lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_r
 
				 				if(diff == 0.0)
			
 
				 				{
			
 
				 					int *workers_to_add = _get_first_workers_in_list(&start[w], workers, current_nworkers, &x, arch);
			
 
				-					if(x > 0)
			
 
				-					{
			
 
				-						int i;
			
 
				-						for(i = 0; i < x; i++)
			
 
				-							workers_add[nw_add++] = workers_to_add[i];
			
 
				-					}
			
 
				+					int i;
			
 
				+					for(i = 0; i < x; i++)
			
 
				+						workers_add[nw_add++] = workers_to_add[i];
			
 
				 					free(workers_to_add);
			
 
				 				}
			
 
				 				else
			
 
				 				{
			
 
				 					x+=1;
			
 
				 					int *workers_to_add = _get_first_workers_in_list(&start[w], workers, current_nworkers, &x, arch);
			
 
				-					if(x > 0)
			
 
				-					{
			
 
				-						int i;
			
 
				-						if(diff >= 0.3)
			
 
				-							for(i = 0; i < x; i++)
			
 
				-								workers_add[nw_add++] = workers_to_add[i];
			
 
				-						else
			
 
				-							for(i = 0; i < x-1; i++)
			
 
				-								workers_add[nw_add++] = workers_to_add[i];
			
 
				+					int i;
			
 
				+					if(diff >= 0.3)
			
 
				+						for(i = 0; i < x; i++)
			
 
				+							workers_add[nw_add++] = workers_to_add[i];
			
 
				+					else
			
 
				+						for(i = 0; i < x-1; i++)
			
 
				+							workers_add[nw_add++] = workers_to_add[i];
			
 
				 
			
 
				-					}
			
 
				 					free(workers_to_add);
			
 
				 				}
			
 
				 			}
			
@@ -582,3 +613,72 @@ void _lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_r
 
				 //		sched_ctx_hypervisor_stop_resize(current_sched_ctxs[s]);
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+/* nw = all the workers (either in a list or on all machine) */
			
 
				+void _lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], int *sched_ctxs_input, int *workers_input, unsigned do_size)
			
 
				+{
			
 
				+	int w, s;
			
 
				+	double nworkers[ns][2];
			
 
				+	int nworkers_rounded[ns][2];
			
 
				+	for(s = 0; s < ns; s++)
			
 
				+	{
			
 
				+		nworkers[s][0] = 0.0;
			
 
				+		nworkers[s][1] = 0.0;
			
 
				+		nworkers_rounded[s][0] = 0;
			
 
				+		nworkers_rounded[s][1] = 0;
			
 
				+		
			
 
				+	}
			
 
				+	
			
 
				+	for(s = 0; s < ns; s++)
			
 
				+	{
			
 
				+		for(w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			enum starpu_archtype arch = starpu_worker_get_type(w);
			
 
				+			
			
 
				+			if(arch == STARPU_CUDA_WORKER)
			
 
				+			{
			
 
				+				nworkers[s][0] += w_in_s[s][w];
			
 
				+				if(w_in_s[s][w] >= 0.3)
			
 
				+					nworkers_rounded[s][0]++;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				nworkers[s][1] += w_in_s[s][w];
			
 
				+				if(w_in_s[s][w] > 0.5)
			
 
				+					nworkers_rounded[s][1]++;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	
			
 
				+/* 	for(s = 0; s < ns; s++) */
			
 
				+/* 		printf("%d: cpus = %d gpus = %d \n", s, nworkers_rounded[s][1], nworkers_rounded[s][0]); */
			
 
				+
			
 
				+	if(!do_size)
			
 
				+		_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
			
 
				+	else
			
 
				+	{
			
 
				+		int *current_sched_ctxs = sched_ctxs_input == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : sched_ctxs_input;
			
 
				+
			
 
				+		unsigned has_workers = 0;
			
 
				+		for(s = 0; s < ns; s++)
			
 
				+		{
			
 
				+			int nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(current_sched_ctxs[s], 
			
 
				+										 STARPU_ANY_WORKER);
			
 
				+			if(nworkers_ctx != 0)
			
 
				+			{
			
 
				+				has_workers = 1;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		if(has_workers)
			
 
				+			_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
			
 
				+		else
			
 
				+			_lp_distribute_resources_in_ctxs(current_sched_ctxs, ns, 2, nworkers_rounded, nworkers, workers_input, nw);
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+double _find_tmax(double t1, double t2)
			
 
				+{
			
 
				+	return t1 + ((t2 - t1)/2);
			
 
				+}
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.h
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.h
@@ -42,3 +42,9 @@ void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw],
 
				 
			
 
				 /* make the first distribution of ressource in contexts by assigning the first x available ressources to each one */
			
 
				 void _lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers);
			
 
				+
			
 
				+/* place resources in contexts dependig on whether they already have workers or not */
			
 
				+void _lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], int *sched_ctxs, int *workers, unsigned do_size);
			
 
				+
			
 
				+/* dichotomy btw t1 & t2 */
			
 
				+double _find_tmax(double t1, double t2);
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
@@ -75,7 +75,7 @@ unsigned _find_poor_sched_ctx(unsigned req_sched_ctx, int nworkers_to_move)
 
				 	return sched_ctx;
			
 
				 }
			
 
				 
			
 
				-int* _get_first_workers_in_list(int *start, int *workers, int nall_workers,  unsigned *nworkers, enum starpu_archtype arch)
			
 
				+int* _get_first_workers_in_list(int *start, int *workers, int nall_workers,  int *nworkers, enum starpu_archtype arch)
			
 
				 {
			
 
				 	int *curr_workers = (int*)malloc((*nworkers)*sizeof(int));
			
 
				 
			
@@ -83,6 +83,9 @@ int* _get_first_workers_in_list(int *start, int *workers, int nall_workers,  uns
 
				 	int nfound_workers = 0;
			
 
				 	for(w = 0; w < nall_workers; w++)
			
 
				 	{
			
 
				+		if(nfound_workers >= *nworkers)
			
 
				+			break;
			
 
				+
			
 
				 		worker = workers == NULL ? w : workers[w];
			
 
				 		enum starpu_archtype curr_arch = starpu_worker_get_type(worker);
			
 
				 		if(arch == STARPU_ANY_WORKER || curr_arch == arch)
			
@@ -93,8 +96,6 @@ int* _get_first_workers_in_list(int *start, int *workers, int nall_workers,  uns
 
				 				*start = w+1;
			
 
				 			}
			
 
				 		}
			
 
				-		if(nfound_workers == *nworkers)
			
 
				-			break;
			
 
				 	}
			
 
				 	if(nfound_workers < *nworkers)
			
 
				 		*nworkers = nfound_workers;
			
@@ -519,33 +520,6 @@ double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w,
 
				         return -1.0;
			
 
				 }
			
 
				 
			
 
				-/* compute an average value of the cpu/cuda old velocity */
			
 
				-double _get_ref_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch)
			
 
				-{
			
 
				-	double ref_velocity = 0.0;
			
 
				-	unsigned nw = 0;
			
 
				-
			
 
				-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
 
				-	int worker;
			
 
				-
			
 
				-	struct starpu_sched_ctx_iterator it;
			
 
				-	if(workers->init_iterator)
			
 
				-		workers->init_iterator(workers, &it);
			
 
				-
			
 
				-	while(workers->has_next(workers, &it))
			
 
				-	{
			
 
				-		worker = workers->get_next(workers, &it);
			
 
				-		if(sc_w->ref_velocity[worker] > 1.0)
			
 
				-		{
			
 
				-			ref_velocity += sc_w->ref_velocity[worker];
			
 
				-			nw++;
			
 
				-		}
			
 
				-	}
			
 
				-	
			
 
				-	if(nw > 0)
			
 
				-		return ref_velocity / nw;
			
 
				-	return -1.0;
			
 
				-}
			
 
				 
			
 
				 /* check if there is a big velocity gap between the contexts */
			
 
				 int _velocity_gap_btw_ctxs()
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h
@@ -38,7 +38,7 @@ unsigned _find_poor_sched_ctx(unsigned req_sched_ctx, int nworkers_to_move);
 
				 
			
 
				 int* _get_first_workers(unsigned sched_ctx, int *nworkers, enum starpu_archtype arch);
			
 
				 
			
 
				-int* _get_first_workers_in_list(int *start, int *workers, int nall_workers,  unsigned *nworkers, enum starpu_archtype arch);
			
 
				+int* _get_first_workers_in_list(int *start, int *workers, int nall_workers,  int *nworkers, enum starpu_archtype arch);
			
 
				 
			
 
				 unsigned _get_potential_nworkers(struct sched_ctx_hypervisor_policy_config *config, unsigned sched_ctx, enum starpu_archtype arch);
			
 
				 
			
--- a/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
+++ b/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
@@ -23,7 +23,7 @@ struct starpu_sched_ctx_performance_counters* perf_counters = NULL;
 
				 
			
 
				 static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time);
			
 
				 static void notify_pushed_task(unsigned sched_ctx, int worker);
			
 
				-static void notify_poped_task(unsigned sched_ctx, int worker, double flops, size_t data_size);
			
 
				+static void notify_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, size_t data_size, uint32_t footprint);
			
 
				 static void notify_post_exec_hook(unsigned sched_ctx, int taskid);
			
 
				 static void notify_idle_end(unsigned sched_ctx, int  worker);
			
 
				 static void notify_submitted_job(struct starpu_task *task, unsigned footprint);
			
@@ -209,25 +209,23 @@ void sched_ctx_hypervisor_start_resize(unsigned sched_ctx)
 
				 
			
 
				 static void _print_current_time()
			
 
				 {
			
 
				-/* 	double curr_time = starpu_timing_now(); */
			
 
				-/* 	double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /\* in seconds *\/ */
			
 
				-/* 	fprintf(stdout, "Time: %lf\n", elapsed_time); */
			
 
				-/* 	int i; */
			
 
				-/* 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) */
			
 
				-/* 	{ */
			
 
				-/* 		if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS) */
			
 
				-/* 		{ */
			
 
				-/* 			struct sched_ctx_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]]; */
			
 
				-
			
 
				-/* 			double cpu_speed = sched_ctx_hypervisor_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER); */
			
 
				-/* 			double cuda_speed = sched_ctx_hypervisor_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER); */
			
 
				-/* 			int ncpus = sched_ctx_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CPU_WORKER); */
			
 
				-/* 			int ncuda = sched_ctx_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CUDA_WORKER); */
			
 
				-/* 			cpu_speed = cpu_speed == -1.0 ? 0.0 : cpu_speed; */
			
 
				-/* 			cuda_speed = cuda_speed == -1.0 ? 0.0 : cuda_speed; */
			
 
				-/* 			fprintf(stdout, "%d: cpu_v = %lf cuda_v = %lf ncpus = %d ncuda = %d\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed, ncpus, ncuda); */
			
 
				-/* 		} */
			
 
				-/* 	} */
			
 
				+	double curr_time = starpu_timing_now();
			
 
				+	double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */
			
 
				+	fprintf(stdout, "Time: %lf\n", elapsed_time);
			
 
				+	int i;
			
 
				+	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
 
				+	{
			
 
				+		if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
			
 
				+		{
			
 
				+			struct sched_ctx_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
			
 
				+
			
 
				+			double cpu_speed = sched_ctx_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				+			double cuda_speed = sched_ctx_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
			
 
				+			int ncpus = sched_ctx_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CPU_WORKER);
			
 
				+			int ncuda = sched_ctx_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CUDA_WORKER);
			
 
				+			fprintf(stdout, "%d: cpu_v = %lf cuda_v = %lf ncpus = %d ncuda = %d\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed, ncpus, ncuda);
			
 
				+		}
			
 
				+	}
			
 
				 	return;
			
 
				 }
			
 
				 
			
@@ -277,7 +275,7 @@ void sched_ctx_hypervisor_register_ctx(unsigned sched_ctx, double total_flops)
 
				 	pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 }
			
 
				 
			
 
				-static int _get_first_free_sched_ctx(int *sched_ctxs, unsigned nsched_ctxs)
			
 
				+static int _get_first_free_sched_ctx(int *sched_ctxs, int nsched_ctxs)
			
 
				 {
			
 
				 	int i;
			
 
				 	for(i = 0; i < nsched_ctxs; i++)
			
@@ -318,7 +316,7 @@ void sched_ctx_hypervisor_unregister_ctx(unsigned sched_ctx)
 
				 	unsigned i;
			
 
				 	for(i = 0; i < hypervisor.nsched_ctxs; i++)
			
 
				 	{
			
 
				-		if(hypervisor.sched_ctxs[i] == sched_ctx)
			
 
				+		if(hypervisor.sched_ctxs[i] == (int)sched_ctx)
			
 
				 		{
			
 
				 			hypervisor.sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS;
			
 
				 			break;
			
@@ -384,6 +382,34 @@ double sched_ctx_hypervisor_get_velocity_per_worker_type(struct sched_ctx_hyperv
 
				         return -1.0;
			
 
				 }
			
 
				 
			
 
				+/* compute an average value of the cpu/cuda old velocity */
			
 
				+double _get_ref_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch)
			
 
				+{
			
 
				+	double ref_velocity = 0.0;
			
 
				+	unsigned nw = 0;
			
 
				+
			
 
				+	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
 
				+	int worker;
			
 
				+
			
 
				+	struct starpu_sched_ctx_iterator it;
			
 
				+	if(workers->init_iterator)
			
 
				+		workers->init_iterator(workers, &it);
			
 
				+
			
 
				+	while(workers->has_next(workers, &it))
			
 
				+	{
			
 
				+		worker = workers->get_next(workers, &it);
			
 
				+		if(sc_w->ref_velocity[worker] > 1.0)
			
 
				+		{
			
 
				+			ref_velocity += sc_w->ref_velocity[worker];
			
 
				+			nw++;
			
 
				+		}
			
 
				+	}
			
 
				+	
			
 
				+	if(nw > 0)
			
 
				+		return ref_velocity / nw;
			
 
				+	return -1.0;
			
 
				+}
			
 
				+
			
 
				 static int get_ntasks( int *tasks)
			
 
				 {
			
 
				 	int ntasks = 0;
			
@@ -471,11 +497,11 @@ void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sche
 
				 	
			
 
				 	double start_time =  starpu_timing_now();
			
 
				 	sender_sc_w->start_time = start_time;
			
 
				-	sender_sc_w->remaining_flops = sender_sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sender_sc_w);
			
 
				+//	sender_sc_w->remaining_flops = sender_sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sender_sc_w);
			
 
				 	_set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
			
 
				 
			
 
				 	receiver_sc_w->start_time = start_time;
			
 
				-	receiver_sc_w->remaining_flops = receiver_sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(receiver_sc_w);
			
 
				+//	receiver_sc_w->remaining_flops = receiver_sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(receiver_sc_w);
			
 
				 	_set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
			
 
				 }
			
 
				 
			
@@ -486,7 +512,7 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 
				 	if(nworkers_to_move > 0 && hypervisor.resize[sender_sched_ctx])// && hypervisor.resize[receiver_sched_ctx])
			
 
				 	{
			
 
				 		_print_current_time();
			
 
				-		int j;
			
 
				+		unsigned j;
			
 
				 		printf("resize ctx %d with %d workers", sender_sched_ctx, nworkers_to_move);
			
 
				 		for(j = 0; j < nworkers_to_move; j++)
			
 
				 			printf(" %d", workers_to_move[j]);
			
@@ -497,7 +523,7 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 
				 
			
 
				 		if(now)
			
 
				 		{
			
 
				-			int j;
			
 
				+			unsigned j;
			
 
				 			printf("remove now from ctx %d:", sender_sched_ctx);
			
 
				 			for(j = 0; j < nworkers_to_move; j++)
			
 
				 				printf(" %d", workers_to_move[j]);
			
@@ -518,7 +544,7 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 
				 				hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_move * sizeof(int));
			
 
				 
			
 
				 
			
 
				-				int i;
			
 
				+				unsigned i;
			
 
				 				for(i = 0; i < nworkers_to_move; i++)
			
 
				 				{
			
 
				 					hypervisor.sched_ctx_w[sender_sched_ctx].current_idle_time[workers_to_move[i]] = 0.0;
			
@@ -533,7 +559,7 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 
				 			}
			
 
				 		}
			
 
				 		struct sched_ctx_hypervisor_policy_config *new_config = sched_ctx_hypervisor_get_config(receiver_sched_ctx);
			
 
				-		int i;
			
 
				+		unsigned i;
			
 
				 		for(i = 0; i < nworkers_to_move; i++)
			
 
				 			new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] :  new_config->new_workers_max_idle;
			
 
				 
			
@@ -546,14 +572,14 @@ void sched_ctx_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned
 
				 	if(nworkers_to_add > 0 && hypervisor.resize[sched_ctx])
			
 
				 	{
			
 
				 		_print_current_time();
			
 
				-		int j;
			
 
				+		unsigned j;
			
 
				 		printf("add to ctx %d:", sched_ctx);
			
 
				 		for(j = 0; j < nworkers_to_add; j++)
			
 
				 			printf(" %d", workers_to_add[j]);
			
 
				 		printf("\n");
			
 
				 		starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, sched_ctx);
			
 
				 		struct sched_ctx_hypervisor_policy_config *new_config = sched_ctx_hypervisor_get_config(sched_ctx);
			
 
				-		int i;
			
 
				+		unsigned i;
			
 
				 		for(i = 0; i < nworkers_to_add; i++)
			
 
				 			new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] :  new_config->new_workers_max_idle;
			
 
				 
			
@@ -571,12 +597,12 @@ void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove,
 
				 	if(nworkers_to_remove > 0 && hypervisor.resize[sched_ctx] && hypervisor.allow_remove[sched_ctx])
			
 
				 	{
			
 
				 		_print_current_time();
			
 
				-		int nworkers=0;
			
 
				+		unsigned nworkers = 0;
			
 
				 		int workers[nworkers_to_remove];
			
 
				 
			
 
				 		if(now)
			
 
				 		{
			
 
				-			int j;
			
 
				+			unsigned j;
			
 
				 			printf("remove explicitley now from ctx %d:", sched_ctx);
			
 
				 			for(j = 0; j < nworkers_to_remove; j++)
			
 
				 				printf(" %d", workers_to_remove[j]);
			
@@ -587,7 +613,7 @@ void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove,
 
				 		else
			
 
				 		{
			
 
				 			printf("try to remove from ctx %d: ", sched_ctx);
			
 
				-			int j;
			
 
				+			unsigned j;
			
 
				 			for(j = 0; j < nworkers_to_remove; j++)
			
 
				 				printf(" %d", workers_to_remove[j]);
			
 
				 			printf("\n");
			
@@ -596,14 +622,14 @@ void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove,
 
				 			if(ret != EBUSY)
			
 
				 			{
			
 
				 
			
 
				-				int i;
			
 
				+				unsigned i;
			
 
				 				for(i = 0; i < nworkers_to_remove; i++)
			
 
				 					if(starpu_sched_ctx_contains_worker(workers_to_remove[i], sched_ctx))
			
 
				 						workers[nworkers++] = workers_to_remove[i];
			
 
				 
			
 
				 				hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = -1;
			
 
				 				hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
			
 
				-				hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = nworkers;
			
 
				+				hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = (int)nworkers;
			
 
				 				hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
			
 
				 
			
 
				 
			
@@ -638,7 +664,7 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 
				 			struct sched_ctx_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
			
 
				 			pthread_mutex_lock(&sc_w->mutex);
			
 
				 			unsigned only_remove = 0;
			
 
				-			if(sc_w->resize_ack.receiver_sched_ctx == -1 && hypervisor.sched_ctxs[i] != sched_ctx &&
			
 
				+			if(sc_w->resize_ack.receiver_sched_ctx == -1 && hypervisor.sched_ctxs[i] != (int)sched_ctx &&
			
 
				 			   sc_w->resize_ack.nmoved_workers > 0 && starpu_sched_ctx_contains_worker(worker, hypervisor.sched_ctxs[i]))
			
 
				 			{
			
 
				 				int j;
			
@@ -650,7 +676,7 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 
				 					}
			
 
				 			}
			
 
				 			if(only_remove ||
			
 
				-			   (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == sched_ctx))
			
 
				+			   (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == (int)sched_ctx))
			
 
				 			{
			
 
				 				resize_ack = &sc_w->resize_ack;
			
 
				 				sender_sched_ctx = hypervisor.sched_ctxs[i];
			
@@ -795,22 +821,22 @@ static void notify_pushed_task(unsigned sched_ctx, int worker)
 
				 }
			
 
				 
			
 
				 /* notifies the hypervisor that a task was poped from the queue of the worker */
			
 
				-static void notify_poped_task(unsigned sched_ctx, int worker, double elapsed_flops, size_t data_size)
			
 
				+static void notify_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, size_t data_size, uint32_t footprint)
			
 
				 {
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++;
			
 
				-	hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += elapsed_flops;
			
 
				+	hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += task->flops;
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].elapsed_data[worker] += data_size ;
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[worker]++ ;
			
 
				-	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += elapsed_flops;
			
 
				-	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= elapsed_flops; //sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(&hypervisor.sched_ctx_w[sched_ctx]);
			
 
				+	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += task->flops;
			
 
				+	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= task->flops; //sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(&hypervisor.sched_ctx_w[sched_ctx]);
			
 
				 
			
 
				 	if(hypervisor.resize[sched_ctx])
			
 
				 	{	
			
 
				 		if(hypervisor.policy.handle_poped_task)
			
 
				-			hypervisor.policy.handle_poped_task(sched_ctx, worker);
			
 
				+			hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
			
 
				 	}
			
 
				 	_ack_resize_completed(sched_ctx, worker);
			
 
				-	if(hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker] % 100 == 0)
			
 
				+	if(hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker] % 200 == 0)
			
 
				 		_print_current_time();
			
 
				 }
			
 
				 
			
@@ -820,7 +846,7 @@ static void notify_post_exec_hook(unsigned sched_ctx, int task_tag)
 
				 	STARPU_ASSERT(task_tag > 0);
			
 
				 
			
 
				 	unsigned conf_sched_ctx;
			
 
				-	int i;
			
 
				+	unsigned i;
			
 
				 	pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				 	unsigned ns = hypervisor.nsched_ctxs;
			
 
				 	pthread_mutex_unlock(&act_hypervisor_mutex);
			
@@ -887,10 +913,10 @@ static void notify_delete_context(unsigned sched_ctx)
 
				 void sched_ctx_hypervisor_size_ctxs(int *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
			
 
				 {
			
 
				 	pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				-	int curr_nsched_ctxs = sched_ctxs == NULL ? hypervisor.nsched_ctxs : nsched_ctxs;
			
 
				+	unsigned curr_nsched_ctxs = sched_ctxs == NULL ? hypervisor.nsched_ctxs : nsched_ctxs;
			
 
				 	int *curr_sched_ctxs = sched_ctxs == NULL ? hypervisor.sched_ctxs : sched_ctxs;
			
 
				 	pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				-	int s;
			
 
				+	unsigned s;
			
 
				 	for(s = 0; s < curr_nsched_ctxs; s++)
			
 
				 		hypervisor.resize[curr_sched_ctxs[s]] = 1;
			
 
				 
			
@@ -945,3 +971,15 @@ void sched_ctx_hypervisor_free_size_req(void)
 
				 		hypervisor.sr = NULL;
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+double sched_ctx_hypervisor_get_velocity(struct sched_ctx_hypervisor_wrapper *sc_w, enum starpu_archtype arch)
			
 
				+{
			
 
				+
			
 
				+	double velocity = sched_ctx_hypervisor_get_velocity_per_worker_type(sc_w, arch);
			
 
				+	if(velocity == -1.0)
			
 
				+		velocity = _get_ref_velocity_per_worker_type(sc_w, arch);
			
 
				+	if(velocity == -1.0)
			
 
				+		velocity = arch == STARPU_CPU_WORKER ? 5.0 : 100.0;
			
 
				+       
			
 
				+	return velocity;
			
 
				+}
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -219,7 +219,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 	{
			
 
				 		_starpu_sched_post_exec_hook(task);
			
 
				 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				-		starpu_sched_ctx_call_poped_task_cb(workerid, task->sched_ctx, task->flops, data_size);
			
 
				+		_starpu_sched_ctx_call_poped_task_cb(workerid, task, data_size, j->footprint);
			
 
				 #endif //STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				 	}
			
 
				 
			
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -1037,12 +1037,12 @@ void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id)
 
				 
			
 
				 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				 
			
 
				-void starpu_sched_ctx_call_poped_task_cb(int workerid, unsigned sched_ctx_id, double flops, size_t data_size)
			
 
				+void _starpu_sched_ctx_call_poped_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint)
			
 
				 {
			
 
				-	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
			
 
				-	if(sched_ctx != NULL && sched_ctx_id != 0 && sched_ctx_id != STARPU_NMAX_SCHED_CTXS
			
 
				+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
			
 
				+	if(sched_ctx != NULL && task->sched_ctx != 0 && task->sched_ctx != STARPU_NMAX_SCHED_CTXS
			
 
				 	   && sched_ctx->perf_counters != NULL)
			
 
				-		sched_ctx->perf_counters->notify_poped_task(sched_ctx_id, workerid, flops, data_size);
			
 
				+		sched_ctx->perf_counters->notify_poped_task(task->sched_ctx, workerid, task, data_size, footprint);
			
 
				 }
			
 
				 
			
 
				 void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id)
			
--- a/src/core/sched_ctx.h
+++ b/src/core/sched_ctx.h
@@ -141,6 +141,11 @@ void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker
 
				 /* Check if the worker belongs to another sched_ctx */
			
 
				 unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id);
			
 
				 
			
 
				+#ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				+/* Notifies the hypervisor that a tasks was poped from the workers' list */
			
 
				+void _starpu_sched_ctx_call_poped_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint);
			
 
				+#endif //STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				+
			
 
				 #if defined(_MSC_VER) || defined(STARPU_SIMGRID)
			
 
				 _starpu_pthread_mutex_t* starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id);
			
 
				 #endif