11 years ago · 5f388d2a95
--- a/configure.ac
+++ b/configure.ac
@@ -330,6 +330,20 @@ fi
 
				 AM_CONDITIONAL([STARPU_BUILD_SC_HYPERVISOR], [test "x$build_sc_hypervisor" = "xyes"])
			
 
				 AM_CONDITIONAL([STARPU_USE_SC_HYPERVISOR], [test "x$build_sc_hypervisor" = "xyes"])
			
 
				 
			
 
				+AC_ARG_ENABLE([sc_hypervisor_debug],
			
 
				+  [AS_HELP_STRING([--enable-sc-hypervisor-debug],
			
 
				+    [enable debug for resizing contexts (experimental)])],
			
 
				+  [enable_sc_hypervisor_debug="yes"],
			
 
				+  [enable_sc_hypervisor_debug="no"])
			
 
				+
			
 
				+
			
 
				+AC_SUBST(STARPU_SC_HYPERVISOR_DEBUG, $enable_sc_hypervisor_debug)
			
 
				+AM_CONDITIONAL([STARPU_SC_HYPERVISOR_DEBUG], [test "x$enable_sc_hypervisor_debug" = "xyes"])
			
 
				+
			
 
				+if test "x$enable_sc_hypervisor_debug" = "xyes"; then
			
 
				+  AC_DEFINE(STARPU_SC_HYPERVISOR_DEBUG, [1], [enable debug sc_hypervisor])
			
 
				+fi
			
 
				+
			
 
				 ###############################################################################
			
 
				 #                                                                             #
			
 
				 #                                 CPUs settings                               #
			
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -80,6 +80,7 @@
 
				 #undef STARPU_MAXIMPLEMENTATIONS
			
 
				 #undef STARPU_MAXMPKERNELS
			
 
				 #undef STARPU_USE_SC_HYPERVISOR
			
 
				+#undef STARPU_SC_HYPERVISOR_DEBUG
			
 
				 #undef STARPU_HAVE_GLPK_H
			
 
				 
			
 
				 #undef STARPU_HAVE_LIBNUMA
			
--- a/include/starpu_sched_ctx.h
+++ b/include/starpu_sched_ctx.h
@@ -113,6 +113,9 @@ double starpu_sched_ctx_get_nready_flops(unsigned sched_ctx_id);
 
				 
			
 
				 void starpu_sched_ctx_set_priority(int *workers, int nworkers, unsigned sched_ctx_id, unsigned priority);
			
 
				 
			
 
				+void starpu_sched_ctx_set_priority_on_level(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority);
			
 
				+
			
 
				+unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id);
			
 
				 #ifdef STARPU_USE_SC_HYPERVISOR
			
 
				 void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id);
			
 
				 #endif //STARPU_USE_SC_HYPERVISOR
			
--- a/include/starpu_worker.h
+++ b/include/starpu_worker.h
@@ -57,7 +57,7 @@ struct starpu_worker_collection
 
				 {
			
 
				 	void *workerids;
			
 
				 	unsigned nworkers;
			
 
				-	unsigned present[STARPU_NMAXWORKERS];
			
 
				+	int present[STARPU_NMAXWORKERS];
			
 
				 	enum starpu_worker_collection_type type;
			
 
				 	unsigned (*has_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
			
 
				 	int (*get_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
			
--- a/sc_hypervisor/include/sc_hypervisor.h
+++ b/sc_hypervisor/include/sc_hypervisor.h
@@ -129,7 +129,7 @@ void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total
 
				 void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_task_flops);
			
 
				 
			
 
				 /* updates the min and max workers needed by each context */
			
 
				-void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs);
			
 
				+void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs, int max_nworkers);
			
 
				 
			
 
				 /* returns a list of contexts that are on the same level in the hierarchy of contexts */
			
 
				 void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, unsigned hierarchy_level, unsigned father_sched_ctx_id);
			
@@ -137,8 +137,13 @@ void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, un
 
				 /* returns the number of levels of ctxs registered to the hyp */
			
 
				 unsigned sc_hypervisor_get_nhierarchy_levels(void);
			
 
				 
			
 
				+/* return the leaves ctxs from the list of ctxs */
			
 
				+void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *leaves, int *nleaves);
			
 
				+
			
 
				 /* returns the nready flops of all ctxs below in hierachy of sched_ctx */
			
 
				 double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx);
			
 
				+
			
 
				+void sc_hypervisor_print_overhead();
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/sc_hypervisor/include/sc_hypervisor_lp.h
+++ b/sc_hypervisor/include/sc_hypervisor_lp.h
@@ -50,6 +50,9 @@ void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rou
 
				 /* make the first distribution of ressource in contexts by assigning the first x available ressources to each one */
			
 
				 void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw);
			
 
				 
			
 
				+/* make the first distribution of ressource in contexts by assigning the first x available ressources to each one, share not integer no of workers */
			
 
				+void sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw);
			
 
				+
			
 
				 /* place resources in contexts dependig on whether they already have workers or not */
			
 
				 void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], unsigned *sched_ctxs, int *workers, unsigned do_size, struct types_of_workers *tw);
			
 
				 
			
--- a/sc_hypervisor/include/sc_hypervisor_monitoring.h
+++ b/sc_hypervisor/include/sc_hypervisor_monitoring.h
@@ -132,6 +132,10 @@ struct sc_hypervisor_wrapper
 
				 	   worker to the idle of the context or just half*/
			
 
				 	unsigned compute_partial_idle[STARPU_NMAXWORKERS];
			
 
				 
			
 
				+	/* consider the max in the lp */
			
 
				+	unsigned consider_max;
			
 
				+
			
 
				+
			
 
				 };
			
 
				 
			
 
				 /* return the wrapper of context that saves its monitoring information */
			
--- a/sc_hypervisor/include/sc_hypervisor_policy.h
+++ b/sc_hypervisor/include/sc_hypervisor_policy.h
@@ -99,6 +99,12 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 
				 /* compute the speed of a type of worker in a context depending on its history */ 
			
 
				 double sc_hypervisor_get_ref_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch);
			
 
				 
			
 
				+/* compute the average speed of a type of worker in all ctxs from the begining of appl */
			
 
				+double sc_hypervisor_get_avg_speed(enum starpu_worker_archtype arch);
			
 
				+
			
 
				+/* verify if we need to consider the max in the lp */
			
 
				+void sc_hypervisor_check_if_consider_max(struct types_of_workers *tw);
			
 
				+
			
 
				 /* get the list of workers grouped by type */
			
 
				 void sc_hypervisor_group_workers_by_type(struct types_of_workers *tw, int *total_nw);
			
 
				 
			
--- a/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
@@ -26,8 +26,9 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 
				 {
			
 
				 	/* for vite */
			
 
				 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				 	printf("resize_no = %d %d ctxs\n", resize_no, ns);
			
 
				-
			
 
				+#endif
			
 
				 	if(ns <= 0) return;
			
 
				 
			
 
				 	unsigned *curr_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
			
@@ -54,14 +55,20 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 
				 	
			
 
				 	__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000;
			
 
				 	
			
 
				-	if(vmax != 0.0)
			
 
				+	if(vmax != -1.0)
			
 
				 	{
			
 
				-		int nworkers_per_ctx_rounded[ns][nw];
			
 
				-		sc_hypervisor_lp_round_double_to_int(ns, nw, nworkers_per_ctx, nworkers_per_ctx_rounded);
			
 
				-//		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, curr_sched_ctxs, tw);
			
 
				-		sc_hypervisor_lp_distribute_resources_in_ctxs(curr_sched_ctxs, ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, workers, curr_nworkers, tw);
			
 
				+/* 		int nworkers_per_ctx_rounded[ns][nw]; */
			
 
				+/* 		sc_hypervisor_lp_round_double_to_int(ns, nw, nworkers_per_ctx, nworkers_per_ctx_rounded); */
			
 
				+/* //		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, curr_sched_ctxs, tw); */
			
 
				+/* 		sc_hypervisor_lp_distribute_resources_in_ctxs(curr_sched_ctxs, ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, workers, curr_nworkers, tw); */
			
 
				+		sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(curr_sched_ctxs, ns, nw, nworkers_per_ctx, workers, curr_nworkers, tw);
			
 
				+
			
 
				 		sc_hypervisor_lp_share_remaining_resources(ns, curr_sched_ctxs, curr_nworkers, workers);
			
 
				 	}
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+	printf("*****finished resize \n");
			
 
				+#endif
			
 
				+	return;
			
 
				 }
			
 
				 
			
 
				 static void _try_resizing_hierarchically(unsigned levels, unsigned current_level, unsigned *sched_ctxs, unsigned nsched_ctxs, int *pus, int npus)
			
@@ -117,7 +124,9 @@ static int _get_first_level(unsigned *sched_ctxs, int nsched_ctxs, unsigned *fir
 
				 
			
 
				 static void _resize(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
			
 
				 {
			
 
				+#ifdef STARPU_USE_FXT
			
 
				 	starpu_fxt_trace_user_event(resize_no);
			
 
				+#endif
			
 
				 	unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
			
 
				 	if(nhierarchy_levels > 1)
			
 
				 	{
			
@@ -270,7 +279,9 @@ static void feft_lp_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *worker
 
				 	}
			
 
				 
			
 
				 	_resize(sched_ctxs, nsched_ctxs, workers, nworkers);
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				 	printf("finished size ctxs\n");
			
 
				+#endif
			
 
				 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 }
			
 
				 
			
@@ -296,22 +307,7 @@ static void _resize_leaves(int worker)
 
				 
			
 
				 	unsigned leaves[nsched_ctxs];
			
 
				 	unsigned nleaves = 0;
			
 
				-	for(s = 0; s < nworkers_sched_ctxs; s++)
			
 
				-	{
			
 
				-		unsigned is_someones_father = 0;
			
 
				-		for(s2 = 0; s2 < nworkers_sched_ctxs; s2++)
			
 
				-		{
			
 
				-			unsigned father = starpu_sched_ctx_get_inheritor(workers_sched_ctxs[s2]);
			
 
				-			if(workers_sched_ctxs[s] == father)
			
 
				-			{
			
 
				-				is_someones_father = 1;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		if(!is_someones_father)
			
 
				-			leaves[nleaves++] = workers_sched_ctxs[s];
			
 
				-	}
			
 
				-
			
 
				+	sc_hypervisor_get_leaves(workers_sched_ctxs, nworkers_sched_ctxs, leaves, &nleaves);
			
 
				 	for(s = 0; s < nleaves; s++)
			
 
				 		_resize_if_speed_diff(leaves[s], worker);
			
 
				 }
			
--- a/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
@@ -110,7 +110,6 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 
				 
			
 
				 static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
			
 
				 {
			
 
				-	starpu_fxt_trace_user_event(2);
			
 
				         int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
			
 
				 	int nw = nworkers == -1 ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
			
 
				         unsigned *curr_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
			
--- a/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
@@ -166,7 +166,6 @@ static void teft_lp_handle_submitted_job(struct starpu_codelet *cl, unsigned sch
 
				 
			
 
				 static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
			
 
				 {
			
 
				-	starpu_fxt_trace_user_event(2);
			
 
				 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
			
 
				 	int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
			
 
				 
			
--- a/sc_hypervisor/src/policies_utils/lp_programs.c
+++ b/sc_hypervisor/src/policies_utils/lp_programs.c
@@ -252,7 +252,7 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
				 double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], 
			
 
				 					       int  total_nw[nw], unsigned sched_ctxs[ns], double last_vmax)
			
 
				 {
			
 
				-	int integer = 1;
			
 
				+	int integer = 0;
			
 
				 	int s, w;
			
 
				 	glp_prob *lp;
			
 
				 
			
@@ -272,11 +272,13 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				 	   and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/
			
 
				 	glp_add_cols(lp, nw*ns+1);
			
 
				 
			
 
				+	struct sc_hypervisor_wrapper *sc_w = NULL;
			
 
				 	for(s = 0; s < ns; s++)
			
 
				 	{
			
 
				+		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
			
 
				+		struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]);
			
 
				 		for(w = 0; w < nw; w++)
			
 
				 		{
			
 
				-			struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]);
			
 
				 			char name[32];
			
 
				 			snprintf(name, sizeof(name), "worker%dctx%d", w, s);
			
 
				 			glp_set_col_name(lp, n, name);
			
@@ -284,22 +286,50 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				 			if (integer)
			
 
				 			{
			
 
				 				glp_set_col_kind(lp, n, GLP_IV);
			
 
				-				if(config->max_nworkers == 0)
			
 
				-					glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, config->max_nworkers);
			
 
				+				if(sc_w->consider_max)
			
 
				+				{
			
 
				+					if(config->max_nworkers == 0)
			
 
				+						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, config->max_nworkers);
			
 
				+					else
			
 
				+						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, config->max_nworkers);
			
 
				+				}
			
 
				 				else
			
 
				-					glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, config->max_nworkers);
			
 
				+				{
			
 
				+					if(total_nw[w] == 0)
			
 
				+						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, total_nw[w]);
			
 
				+					else
			
 
				+						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, total_nw[w]);
			
 
				+				}
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				-				if(config->max_nworkers == 0)
			
 
				-					glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, config->max_nworkers*1.0);
			
 
				+				if(sc_w->consider_max)
			
 
				+				{
			
 
				+					if(config->max_nworkers == 0)
			
 
				+						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, config->max_nworkers*1.0);
			
 
				+					else
			
 
				+						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, config->max_nworkers*1.0);
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+					printf("%d****************consider max %lf in lp\n", sched_ctxs[s], config->max_nworkers*1.0);
			
 
				+#endif
			
 
				+				}
			
 
				 				else
			
 
				-					glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, config->max_nworkers*1.0);
			
 
				+				{
			
 
				+					if(total_nw[w] == 0)
			
 
				+						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, total_nw[w]*1.0);
			
 
				+					else
			
 
				+						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, total_nw[w]*1.0);
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+					printf("%d****************don't consider max %d but total %d in lp\n", sched_ctxs[s], config->max_nworkers, total_nw[w]);
			
 
				+#endif
			
 
				+				}
			
 
				 			}
			
 
				 			n++;
			
 
				 		}
			
 
				 	}
			
 
				-
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+	printf("ns = %d nw = %d\n", ns, nw);
			
 
				+#endif
			
 
				 	/*1/tmax should belong to the interval [0.0;1.0]*/
			
 
				 	glp_set_col_name(lp, n, "vmax");
			
 
				 //	glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
			
@@ -456,8 +486,9 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				         }
			
 
				 
			
 
				 	double vmax = glp_get_obj_val(lp);
			
 
				-
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				 	printf("vmax = %lf \n", vmax);
			
 
				+#endif
			
 
				 	n = 1;
			
 
				 	for(s = 0; s < ns; s++)
			
 
				 	{
			
@@ -467,7 +498,9 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				                                 res[s][w] = (double)glp_mip_col_val(lp, n);
			
 
				 			else
			
 
				 				res[s][w] = glp_get_col_prim(lp, n);
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				   			printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]);
			
 
				+#endif
			
 
				 			n++;
			
 
				 		}
			
 
				 	}
			
--- a/sc_hypervisor/src/policies_utils/lp_tools.c
+++ b/sc_hypervisor/src/policies_utils/lp_tools.c
@@ -28,9 +28,9 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 	double v[nsched_ctxs][ntypes_of_workers];
			
 
				 	double flops[nsched_ctxs];
			
 
				 	
			
 
				-	unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
			
 
				-	if(nhierarchy_levels <= 1)
			
 
				-		sc_hypervisor_update_resize_interval(sched_ctxs, nsched_ctxs);
			
 
				+/* 	unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels(); */
			
 
				+/* 	if(nhierarchy_levels <= 1) */
			
 
				+	sc_hypervisor_update_resize_interval(sched_ctxs, nsched_ctxs, total_nw[0]);
			
 
				 
			
 
				 	int nw = tw->nw;
			
 
				 	int i = 0;
			
@@ -41,7 +41,7 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				 		int w;
			
 
				 		for(w = 0; w < nw; w++)
			
 
				-			v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
			
 
				+			v[i][w] = 5.0;//sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
			
 
				 
			
 
				 		double ready_flops = starpu_sched_ctx_get_nready_flops(sc_w->sched_ctx);
			
 
				 		unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
			
@@ -72,10 +72,57 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 		}
			
 
				 		if(flops[i] < 0.0)
			
 
				 			flops[i] = 0.0;
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				 		printf("%d: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n",
			
 
				 		       sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, ready_flops/1000000000, nready_tasks);
			
 
				+#endif
			
 
				 
			
 
				 	}
			
 
				+	sc_hypervisor_check_if_consider_max(tw);
			
 
				+	int w;
			
 
				+	for(w = 0; w < nw; w++)
			
 
				+	{
			
 
				+		double avg_speed = sc_hypervisor_get_avg_speed(sc_hypervisor_get_arch_for_index(w, tw));
			
 
				+		if(avg_speed != -1.0)
			
 
				+		{
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+			printf("avg_speed for cpus is %lf \n", avg_speed);
			
 
				+#endif
			
 
				+			unsigned consider_max_for_all = 0;
			
 
				+			for(i = 0; i < nsched_ctxs; i++)
			
 
				+			{
			
 
				+				sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				+				
			
 
				+				if(!sc_w->consider_max)
			
 
				+				{
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+					printf("ctx%: current speed is %lf and compare speed is min %lf max %lf\n", sched_ctxs[i], v[i][w], (0.1*avg_speed), (2*avg_speed));
			
 
				+#endif
			
 
				+					if(v[i][w] < 0.1*avg_speed || v[i][w] > 2*avg_speed)
			
 
				+					{
			
 
				+						sc_w->consider_max = 1;
			
 
				+						consider_max_for_all = 1;
			
 
				+					}
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+					printf("ctx %d consider max %d \n", sched_ctxs[i], sc_w->consider_max);
			
 
				+#endif
			
 
				+				}
			
 
				+
			
 
				+			}
			
 
				+			if(consider_max_for_all)
			
 
				+			{
			
 
				+				for(i = 0; i < nsched_ctxs; i++)
			
 
				+				{
			
 
				+					sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				+					sc_w->consider_max = 1;
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+					printf("ctx %d consider max %d anyway \n", sched_ctxs[i], sc_w->consider_max);
			
 
				+#endif
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	if(nsched_ctxs == 1)
			
 
				 	{
			
@@ -101,7 +148,8 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 	for(i = 0; i < nsched_ctxs; i++)
			
 
				 	{
			
 
				 		struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]);
			
 
				-		if(config->max_nworkers != 0)
			
 
				+		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				+		if(config->max_nworkers != 0 || !sc_w->consider_max)
			
 
				 		{
			
 
				 			tmp_sched_ctxs[tmp_nsched_ctxs] = sched_ctxs[i];
			
 
				 			tmp_flops[tmp_nsched_ctxs] = flops[i];
			
@@ -111,7 +159,8 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 			tmp_nsched_ctxs++;
			
 
				 		}
			
 
				 	}
			
 
				-	
			
 
				+	if(tmp_nsched_ctxs == 0)
			
 
				+		return -1.0;
			
 
				 	double ret = sc_hypervisor_lp_simulate_distrib_flops(tmp_nsched_ctxs, ntypes_of_workers, tmp_v, tmp_flops, tmp_res, total_nw, tmp_sched_ctxs, -1.0);
			
 
				 
			
 
				 	int j;
			
@@ -203,31 +252,47 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 
			
 
				 	}
			
 
				 
			
 
				+	/* if the lp could not give any workers to any context 
			
 
				+	   just split the workers btw the contexts */
			
 
				+	if(ret == 0.0)
			
 
				+	{
			
 
				+		double rand_res[nw];
			
 
				+		int w;
			
 
				+		for(w = 0; w < nw; w++)
			
 
				+			rand_res[w] = total_nw[w]/nsched_ctxs;
			
 
				+		int s;
			
 
				+		for(s = 0; s < nsched_ctxs; s++)
			
 
				+			for(w = 0; w < nw; w++)
			
 
				+				res[s][w] = rand_res[w];
			
 
				+	}
			
 
				+
			
 
				+	else
			
 
				 	/* keep the first speed */
			
 
				-	if(ret != 0.0)
			
 
				+//	if(ret != 0.0)
			
 
				 	{
			
 
				 		vmax = 1 / ret;
			
 
				-		double optimal_v = 0.0;
			
 
				-		for(i = 0; i < nsched_ctxs; i++)
			
 
				-		{
			
 
				+	}
			
 
				+	double optimal_v = 0.0;
			
 
				+	for(i = 0; i < nsched_ctxs; i++)
			
 
				+	{
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-			optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1];
			
 
				+		optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1];
			
 
				 #else
			
 
				-			optimal_v = res[i][0] * v[i][0];
			
 
				+		optimal_v = res[i][0] * v[i][0];
			
 
				 #endif //STARPU_USE_CUDA
			
 
				-			int w;
			
 
				-			unsigned no_workers = 1;
			
 
				-			for(w = 0; w < nw; w++)
			
 
				+		int w;
			
 
				+		unsigned no_workers = 1;
			
 
				+		for(w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			if(res[i][w] != 0.0)
			
 
				 			{
			
 
				-				if(res[i][w] != 0.0)
			
 
				-				{
			
 
				-					no_workers = 0;
			
 
				-					break;
			
 
				-				}
			
 
				+				no_workers = 0;
			
 
				+				break;
			
 
				 			}
			
 
				-			
			
 
				-			sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				-			
			
 
				+		}
			
 
				+		
			
 
				+		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				+		
			
 
				 /* if the hypervisor gave 0 workers to a context but the context still 
			
 
				    has some last flops or a ready task that does not even have any flops
			
 
				    we give a worker (in shared mode) to the context in order to leave him
			
@@ -235,15 +300,14 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				    the distribution function we take this into account and revert the variable
			
 
				    to its 0.0 value */ 
			
 
				 //		if(no_workers && (flops[i] != 0.0 || sc_w->nready_tasks > 0))
			
 
				-			if(no_workers)
			
 
				-			{
			
 
				-				for(w = 0; w < nw; w++)
			
 
				-					res[i][w] = -1.0;
			
 
				-			}
			
 
				+		if(no_workers)
			
 
				+		{
			
 
				+			for(w = 0; w < nw; w++)
			
 
				+				res[i][w] = -1.0;
			
 
				+		}
			
 
				 			
			
 
				 //			if(optimal_v != 0.0)
			
 
				-				_set_optimal_v(sched_ctxs[i], optimal_v);
			
 
				-		}
			
 
				+		_set_optimal_v(sched_ctxs[i], optimal_v);
			
 
				 	}
			
 
				 
			
 
				 	return vmax;
			
@@ -680,6 +744,98 @@ void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns,
 
				 	}
			
 
				 }
			
 
				 
			
 
				+void sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, double res[ns][nw], 
			
 
				+							       int *workers, int nworkers, struct types_of_workers *tw)
			
 
				+{
			
 
				+	int s, w;
			
 
				+	int start[nw];
			
 
				+	for(w = 0; w < nw; w++)
			
 
				+		start[w] = 0;
			
 
				+	for(s = 0; s < ns; s++)
			
 
				+	{
			
 
				+		int workers_add[STARPU_NMAXWORKERS];
			
 
				+                int nw_add = 0;
			
 
				+		double target_res = 0.0;
			
 
				+		for(w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			target_res += res[s][w];
			
 
				+			if(res[s][w] == -1.0) res[s][w] = 0.0;
			
 
				+		}
			
 
				+
			
 
				+		for(w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
			
 
				+			
			
 
				+			if(arch == STARPU_CPU_WORKER) 
			
 
				+			{
			
 
				+				int nworkers_to_add = ceil(res[s][w]);
			
 
				+				double ceil_double = (double)nworkers_to_add;
			
 
				+				double diff = ceil_double - res[s][w];
			
 
				+
			
 
				+				if(target_res < 0.0)
			
 
				+				{
			
 
				+					nworkers_to_add=1;
			
 
				+					int old_start = start[w];
			
 
				+					if(start[w] != 0)
			
 
				+						start[w]--;
			
 
				+					int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
			
 
				+					start[w] = old_start;
			
 
				+					int i;
			
 
				+					for(i = 0; i < nworkers_to_add; i++)
			
 
				+					{
			
 
				+						workers_add[nw_add++] = workers_to_add[i];
			
 
				+					}
			
 
				+					free(workers_to_add);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
			
 
				+					int i;
			
 
				+					for(i = 0; i < nworkers_to_add; i++)
			
 
				+						workers_add[nw_add++] = workers_to_add[i];
			
 
				+					free(workers_to_add);
			
 
				+				}
			
 
				+				if(diff != 0.0)
			
 
				+					start[w]--;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				double nworkers_to_add = res[s][w];
			
 
				+				int x = floor(nworkers_to_add);
			
 
				+				double x_double = (double)x;
			
 
				+				double diff = nworkers_to_add - x_double;
			
 
				+				if(diff == 0.0)
			
 
				+				{
			
 
				+					int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch);
			
 
				+					int i;
			
 
				+					for(i = 0; i < x; i++)
			
 
				+						workers_add[nw_add++] = workers_to_add[i];
			
 
				+					free(workers_to_add);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					x+=1;
			
 
				+					int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch);
			
 
				+					int i;
			
 
				+					if(diff >= 0.3)
			
 
				+						for(i = 0; i < x; i++)
			
 
				+							workers_add[nw_add++] = workers_to_add[i];
			
 
				+					else
			
 
				+						for(i = 0; i < x-1; i++)
			
 
				+							workers_add[nw_add++] = workers_to_add[i];
			
 
				+					
			
 
				+					free(workers_to_add);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+//		sc_hypervisor_start_resize(sched_ctxs[s]);
			
 
				+		sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
			
 
				+		int workers_remove[STARPU_NMAXWORKERS];
			
 
				+		int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove);
			
 
				+		sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /* nw = all the workers (either in a list or on all machine) */
			
 
				 void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], unsigned *sched_ctxs_input, int *workers_input, unsigned do_size, struct types_of_workers *tw)
			
 
				 {
			
--- a/sc_hypervisor/src/policies_utils/policy_tools.c
+++ b/sc_hypervisor/src/policies_utils/policy_tools.c
@@ -511,7 +511,7 @@ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(unsigned *sched_ctxs_in, int ns_
 
				 		double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_ctx, total_nw, tw, sched_ctxs);
			
 
				 
			
 
				 		
			
 
				-		if(vmax != 0.0)
			
 
				+//		if(vmax != 0.0)
			
 
				 		{
			
 
				 			for(i = 0; i < ns; i++)
			
 
				 			{
			
--- a/sc_hypervisor/src/policies_utils/speed.c
+++ b/sc_hypervisor/src/policies_utils/speed.c
@@ -140,6 +140,17 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 
				 			enum starpu_worker_archtype req_arch = starpu_worker_get_type(worker);
			
 
				 			if(arch == req_arch && sc_w->compute_idle[worker])
			
 
				 			{
			
 
				+				if(sc_w->exec_start_time[worker] != 0.0)
			
 
				+				{
			
 
				+					double current_exec_time = 0.0;
			
 
				+					if(sc_w->exec_start_time[worker] < sc_w->start_time)
			
 
				+						current_exec_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */ 
			
 
				+					else
			
 
				+						current_exec_time = (curr_time - sc_w->exec_start_time[worker]) / 1000000.0; /* in seconds */ 
			
 
				+					double suppl_flops = current_exec_time * sc_hypervisor_get_ref_speed_per_worker_type(sc_w, req_arch);
			
 
				+					all_workers_flops += suppl_flops;
			
 
				+				}		
			
 
				+
			
 
				 				all_workers_flops += sc_w->elapsed_flops[worker] / 1000000000.0; /*in gflops */
			
 
				 				if(max_workers_idle_time < sc_w->idle_time[worker])
			
 
				 					max_workers_idle_time = sc_w->idle_time[worker]; /* in seconds */
			
@@ -201,3 +212,112 @@ double sc_hypervisor_get_speed(struct sc_hypervisor_wrapper *sc_w, enum starpu_w
 
				 
			
 
				 	return speed;
			
 
				 }
			
 
				+
			
 
				+double sc_hypervisor_get_avg_speed(enum starpu_worker_archtype arch)
			
 
				+{
			
 
				+	double total_executed_flops = 0.0;
			
 
				+	double total_estimated_flops = 0.0;
			
 
				+	struct sc_hypervisor_wrapper *sc_w;
			
 
				+	double max_real_start_time = 0.0;
			
 
				+	int s;
			
 
				+	unsigned nworkers =  starpu_worker_get_count_by_type(arch);
			
 
				+
			
 
				+	unsigned *sched_ctxs;
			
 
				+	int nsched_ctxs;
			
 
				+	sc_hypervisor_get_ctxs_on_level(&sched_ctxs, &nsched_ctxs, 0, STARPU_NMAX_SCHED_CTXS);
			
 
				+	
			
 
				+	for(s = 0; s < nsched_ctxs; s++)
			
 
				+	{
			
 
				+		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
			
 
				+		struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctxs[s]);
			
 
				+		int worker;
			
 
				+		
			
 
				+		struct starpu_sched_ctx_iterator it;
			
 
				+		if(workers->init_iterator)
			
 
				+			workers->init_iterator(workers, &it);
			
 
				+
			
 
				+		while(workers->has_next(workers, &it))
			
 
				+		{
			
 
				+			worker = workers->get_next(workers, &it);
			
 
				+			enum starpu_worker_archtype req_arch = starpu_worker_get_type(worker);
			
 
				+			if(arch == req_arch)
			
 
				+			{
			
 
				+				total_executed_flops += sc_w->total_elapsed_flops[worker] / 1000000000.0; /*in gflops */;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		total_estimated_flops += sc_w->total_flops / 1000000000.0; /*in gflops */;;
			
 
				+
			
 
				+		if(max_real_start_time < sc_w->real_start_time)
			
 
				+			max_real_start_time = sc_w->real_start_time;
			
 
				+	}
			
 
				+	double speed = -1.0;
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+	printf("total_exec_flops %lf total_estimated_flops %lf max_real_start_time %lf nworkers %d \n", total_executed_flops, total_estimated_flops, max_real_start_time, nworkers);
			
 
				+#endif
			
 
				+	if(total_executed_flops > 0.5*total_estimated_flops)
			
 
				+	{
			
 
				+		double curr_time = starpu_timing_now();
			
 
				+		double time = (curr_time - max_real_start_time) / 1000000.0; /* in seconds */
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+		printf("time = %lf\n", time);
			
 
				+#endif
			
 
				+		speed = (total_executed_flops / time) / nworkers; 
			
 
				+	}
			
 
				+
			
 
				+	return speed;
			
 
				+}
			
 
				+
			
 
				+void _consider_max_for_children(unsigned sched_ctx, unsigned consider_max)
			
 
				+{
			
 
				+	struct sc_hypervisor_wrapper *sc_w = sc_hypervisor_get_wrapper(sched_ctx);
			
 
				+	sc_w->consider_max = consider_max;
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+	printf("ctx %d consider max %d \n", sched_ctx, sc_w->consider_max); 
			
 
				+#endif
			
 
				+
			
 
				+	int level = starpu_sched_ctx_get_hierarchy_level(sched_ctx);
			
 
				+	unsigned *sched_ctxs_child;
			
 
				+	int nsched_ctxs_child = 0;
			
 
				+	sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, level+1, sched_ctx);
			
 
				+	int s;
			
 
				+	for(s = 0; s < nsched_ctxs_child; s++)
			
 
				+		_consider_max_for_children(sched_ctxs_child[s], consider_max);
			
 
				+	if(nsched_ctxs_child > 0)
			
 
				+		free(sched_ctxs_child);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void sc_hypervisor_check_if_consider_max(struct types_of_workers *tw)
			
 
				+{
			
 
				+	unsigned *sched_ctxs;
			
 
				+	int nsched_ctxs;
			
 
				+	sc_hypervisor_get_ctxs_on_level(&sched_ctxs, &nsched_ctxs, 0, STARPU_NMAX_SCHED_CTXS);
			
 
				+
			
 
				+	int nw = tw->nw;
			
 
				+	double avg_speed_per_tw[nw];
			
 
				+	int w;
			
 
				+	for(w = 0; w < nw; w++)
			
 
				+	{
			
 
				+		avg_speed_per_tw[w] = sc_hypervisor_get_avg_speed(sc_hypervisor_get_arch_for_index(w, tw));
			
 
				+		if(avg_speed_per_tw[w] == -1.0)
			
 
				+			return;
			
 
				+	}
			
 
				+
			
 
				+	int s;
			
 
				+	for(s = 0; s < nsched_ctxs; s++)
			
 
				+	{
			
 
				+		for(w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			struct sc_hypervisor_wrapper *sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
			
 
				+			double speed = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+			printf("%d: speed %lf avg_speed %lf min %lf max %lf\n", sched_ctxs[s], speed, avg_speed_per_tw[w], (avg_speed_per_tw[w]*0.5), (avg_speed_per_tw[w]*1.5));
			
 
				+#endif
			
 
				+			if(speed < avg_speed_per_tw[w]*0.5 || speed > avg_speed_per_tw[w]*1.5)
			
 
				+				_consider_max_for_children(sched_ctxs[s], 1);
			
 
				+			else
			
 
				+				_consider_max_for_children(sched_ctxs[s], 0);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
--- a/sc_hypervisor/src/sc_hypervisor.c
+++ b/sc_hypervisor/src/sc_hypervisor.c
@@ -18,6 +18,7 @@
 
				 #include <sc_hypervisor_policy.h>
			
 
				 #include <starpu_config.h>
			
 
				 
			
 
				+double hyp_overhead = 0.0;
			
 
				 unsigned imposed_resize = 0;
			
 
				 unsigned type_of_tasks_known = 0;
			
 
				 struct starpu_sched_ctx_performance_counters* perf_counters = NULL;
			
@@ -164,6 +165,7 @@ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
 
				 
			
 
				 	starpu_pthread_mutex_init(&act_hypervisor_mutex, NULL);
			
 
				 	hypervisor.start_executing_time = starpu_timing_now();
			
 
				+
			
 
				 	int i;
			
 
				 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
 
				 	{
			
@@ -192,6 +194,7 @@ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
 
				 		hypervisor.sched_ctx_w[i].ref_speed[1] = -1.0;
			
 
				 		hypervisor.sched_ctx_w[i].total_flops_available = 0;
			
 
				 		hypervisor.sched_ctx_w[i].to_be_sized = 0;
			
 
				+		hypervisor.sched_ctx_w[i].consider_max = 0;
			
 
				 		int j;
			
 
				 		for(j = 0; j < STARPU_NMAXWORKERS; j++)
			
 
				 		{
			
@@ -296,6 +299,22 @@ void sc_hypervisor_shutdown(void)
 
				 	perf_counters = NULL;
			
 
				 
			
 
				 	starpu_pthread_mutex_destroy(&act_hypervisor_mutex);
			
 
				+
			
 
				+}
			
 
				+
			
 
				+void sc_hypervisor_print_overhead()
			
 
				+{
			
 
				+//	hyp_overhead /= 1000000.0;*
			
 
				+	FILE *f;
			
 
				+	const char *sched_env = getenv("OVERHEAD_FILE");
			
 
				+	if(!sched_env)
			
 
				+		f = fopen("overhead_microsec", "a");
			
 
				+	else
			
 
				+		f = fopen(sched_env, "a");
			
 
				+	fprintf(f, "%lf \n", hyp_overhead);
			
 
				+	fclose(f);
			
 
				+
			
 
				+
			
 
				 }
			
 
				 
			
 
				 /* the hypervisor is in charge only of the contexts registered to it*/
			
@@ -352,18 +371,21 @@ static void _rearange_sched_ctxs(unsigned *sched_ctxs, int old_nsched_ctxs)
 
				 /* unregistered contexts will no longer be resized */
			
 
				 void sc_hypervisor_unregister_ctx(unsigned sched_ctx)
			
 
				 {
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				 	printf("unregister ctx %d with remaining flops %lf \n", hypervisor.sched_ctx_w[sched_ctx].sched_ctx, hypervisor.sched_ctx_w[sched_ctx].remaining_flops);
			
 
				+#endif
			
 
				 	if(hypervisor.policy.end_ctx)
			
 
				 		hypervisor.policy.end_ctx(sched_ctx);
			
 
				 
			
 
				+	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				 	unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx);
			
 
				 	int *pus;
			
 
				 	unsigned npus = starpu_sched_ctx_get_workers_list(sched_ctx, &pus);
			
 
				 
			
 
				 	starpu_sched_ctx_set_priority(pus, npus, father, 1);
			
 
				+	starpu_sched_ctx_set_priority_on_level(pus, npus, father, 1);
			
 
				 	free(pus);
			
 
				 
			
 
				-	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				 	unsigned i;
			
 
				 	for(i = 0; i < hypervisor.nsched_ctxs; i++)
			
 
				 	{
			
@@ -506,7 +528,6 @@ static void _decrement_elapsed_flops_per_worker(unsigned sched_ctx, int worker,
 
				 
			
 
				 	return;
			
 
				 }
			
 
				-
			
 
				 void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sched_ctx)
			
 
				 {
			
 
				 	double start_time =  starpu_timing_now();
			
@@ -516,15 +537,17 @@ void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sche
 
				 		struct sc_hypervisor_wrapper *sender_sc_w = &hypervisor.sched_ctx_w[sender_sched_ctx];
			
 
				 		
			
 
				 		sender_sc_w->start_time = start_time;
			
 
				+		unsigned nworkers = starpu_worker_get_count();
			
 
				 		int i;
			
 
				-		for(i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				+ 		for(i = 0; i < nworkers; i++)
			
 
				 		{
			
 
				 			sender_sc_w->start_time_w[i] = start_time;
			
 
				 			sender_sc_w->idle_time[i] = 0.0;
			
 
				 			sender_sc_w->idle_start_time[i] = 0.0;
			
 
				 			hypervisor.sched_ctx_w[sender_sched_ctx].exec_time[i] = 0.0;
			
 
				-			hypervisor.sched_ctx_w[sender_sched_ctx].exec_start_time[i] = (hypervisor.sched_ctx_w[sender_sched_ctx].exec_start_time[i] != 0.0) ? starpu_timing_now() : 0.0;
			
 
				+//			hypervisor.sched_ctx_w[sender_sched_ctx].exec_start_time[i] = (hypervisor.sched_ctx_w[sender_sched_ctx].exec_start_time[i] != 0.0) ? starpu_timing_now() : 0.0;
			
 
				 			_decrement_elapsed_flops_per_worker(sender_sched_ctx, i, hypervisor.sched_ctx_w[sender_sched_ctx].elapsed_flops[i]); 
			
 
				+
			
 
				 		}
			
 
				 		_set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
			
 
				 	}
			
@@ -534,13 +557,15 @@ void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sche
 
				 		struct sc_hypervisor_wrapper *receiver_sc_w = &hypervisor.sched_ctx_w[receiver_sched_ctx];
			
 
				 		
			
 
				 		receiver_sc_w->start_time = start_time;
			
 
				+
			
 
				+		unsigned nworkers = starpu_worker_get_count();
			
 
				 		int i;
			
 
				-		for(i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				+ 		for(i = 0; i < nworkers; i++)
			
 
				 		{
			
 
				 			receiver_sc_w->start_time_w[i] = (receiver_sc_w->start_time_w[i] != 0.0) ? starpu_timing_now() : 0.0;
			
 
				 			receiver_sc_w->idle_time[i] = 0.0;
			
 
				 			receiver_sc_w->idle_start_time[i] = (receiver_sc_w->exec_start_time[i] != 0.0) ? 0.0 : starpu_timing_now();
			
 
				-			hypervisor.sched_ctx_w[receiver_sched_ctx].exec_start_time[i] = (receiver_sc_w->exec_start_time[i] != 0.0) ? starpu_timing_now() : 0.0;
			
 
				+//			hypervisor.sched_ctx_w[receiver_sched_ctx].exec_start_time[i] = (receiver_sc_w->exec_start_time[i] != 0.0) ? starpu_timing_now() : 0.0;
			
 
				 			hypervisor.sched_ctx_w[receiver_sched_ctx].exec_time[i] = 0.0;
			
 
				 			_decrement_elapsed_flops_per_worker(receiver_sched_ctx, i, hypervisor.sched_ctx_w[receiver_sched_ctx].elapsed_flops[i]); 
			
 
				 		}
			
@@ -557,22 +582,25 @@ void sc_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sch
 
				 	{
			
 
				 		_print_current_time();
			
 
				 		unsigned j;
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				 		printf("resize ctx %d with %d workers", sender_sched_ctx, nworkers_to_move);
			
 
				 		for(j = 0; j < nworkers_to_move; j++)
			
 
				 			printf(" %d", workers_to_move[j]);
			
 
				 		printf("\n");
			
 
				-		starpu_fxt_trace_user_event(1);
			
 
				+#endif
			
 
				+
			
 
				 		hypervisor.allow_remove[receiver_sched_ctx] = 0;
			
 
				 		starpu_sched_ctx_add_workers(workers_to_move, nworkers_to_move, receiver_sched_ctx);
			
 
				 
			
 
				 		if(now)
			
 
				 		{
			
 
				 			unsigned j;
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				 			printf("remove now from ctx %d:", sender_sched_ctx);
			
 
				 			for(j = 0; j < nworkers_to_move; j++)
			
 
				 				printf(" %d", workers_to_move[j]);
			
 
				 			printf("\n");
			
 
				-
			
 
				+#endif
			
 
				 			starpu_sched_ctx_remove_workers(workers_to_move, nworkers_to_move, sender_sched_ctx);
			
 
				 			hypervisor.allow_remove[receiver_sched_ctx] = 1;
			
 
				 			_reset_resize_sample_info(sender_sched_ctx, receiver_sched_ctx);
			
@@ -616,10 +644,12 @@ void sc_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworke
 
				 	{
			
 
				 		_print_current_time();
			
 
				 		unsigned j;
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				 		printf("add to ctx %d:", sched_ctx);
			
 
				 		for(j = 0; j < nworkers_to_add; j++)
			
 
				 			printf(" %d", workers_to_add[j]);
			
 
				 		printf("\n");
			
 
				+#endif
			
 
				 		starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, sched_ctx);
			
 
				 		struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(sched_ctx);
			
 
				 		unsigned i;
			
@@ -647,22 +677,24 @@ void sc_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigne
 
				 		if(now)
			
 
				 		{
			
 
				 			unsigned j;
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				 			printf("remove explicitley now from ctx %d:", sched_ctx);
			
 
				 			for(j = 0; j < nworkers_to_remove; j++)
			
 
				 				printf(" %d", workers_to_remove[j]);
			
 
				 			printf("\n");
			
 
				-			
			
 
				+#endif
			
 
				 			starpu_sched_ctx_remove_workers(workers_to_remove, nworkers_to_remove, sched_ctx);
			
 
				 			_reset_resize_sample_info(sched_ctx, STARPU_NMAX_SCHED_CTXS);
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				 			printf("try to remove from ctx %d: ", sched_ctx);
			
 
				 			unsigned j;
			
 
				 			for(j = 0; j < nworkers_to_remove; j++)
			
 
				 				printf(" %d", workers_to_remove[j]);
			
 
				 			printf("\n");
			
 
				-
			
 
				+#endif
			
 
				 			int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
			
 
				 			if(ret != EBUSY)
			
 
				 			{
			
@@ -831,15 +863,115 @@ void _sc_hypervisor_allow_compute_idle(unsigned sched_ctx, int worker, unsigned
 
				 	hypervisor.sched_ctx_w[sched_ctx].compute_idle[worker] = allow;
			
 
				 }
			
 
				 
			
 
				-void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
			
 
				+
			
 
				+int _update_max_hierarchically(unsigned *sched_ctxs, int nsched_ctxs)
			
 
				+{
			
 
				+	int s, i;
			
 
				+	unsigned leaves[hypervisor.nsched_ctxs];
			
 
				+	int nleaves = 0;
			
 
				+	sc_hypervisor_get_leaves(hypervisor.sched_ctxs, hypervisor.nsched_ctxs, leaves, &nleaves);
			
 
				+
			
 
				+	int max = 0;
			
 
				+
			
 
				+	for(s = 0; s < nsched_ctxs; s++)
			
 
				+	{
			
 
				+		struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]);
			
 
				+		unsigned found = 0;
			
 
				+		int l = 0;
			
 
				+		for(l = 0; l < nleaves; l++)
			
 
				+		{
			
 
				+			if(leaves[l] == sched_ctxs[s])
			
 
				+			{
			
 
				+				found = 1;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		if(!found)
			
 
				+		{
			
 
				+			config->max_nworkers = 0;
			
 
				+			int level = starpu_sched_ctx_get_hierarchy_level(sched_ctxs[s]);
			
 
				+			unsigned *sched_ctxs_child;
			
 
				+			int nsched_ctxs_child = 0;
			
 
				+			sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, level+1, sched_ctxs[s]);
			
 
				+			if(nsched_ctxs_child > 0)
			
 
				+			{			
			
 
				+				config->max_nworkers += _update_max_hierarchically(sched_ctxs_child, nsched_ctxs_child);
			
 
				+				free(sched_ctxs_child);
			
 
				+				int max_possible_workers = starpu_worker_get_count();
			
 
				+				if(config->max_nworkers < 0)
			
 
				+					config->max_nworkers = 0;
			
 
				+				if(config->max_nworkers > max_possible_workers)
			
 
				+					config->max_nworkers = max_possible_workers;
			
 
				+			
			
 
				+			}
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+			printf("ctx %d has max %d \n", sched_ctxs[s], config->max_nworkers);
			
 
				+#endif
			
 
				+		}
			
 
				+		max += config->max_nworkers;
			
 
				+	}
			
 
				+	return max;
			
 
				+}
			
 
				+void _update_max_diff_hierarchically(unsigned father, double diff)
			
 
				+{
			
 
				+	int level = starpu_sched_ctx_get_hierarchy_level(father);
			
 
				+	unsigned *sched_ctxs_child;
			
 
				+	int nsched_ctxs_child = 0;
			
 
				+	sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, level+1, father);
			
 
				+	if(nsched_ctxs_child > 0)
			
 
				+	{
			
 
				+		int s;
			
 
				+		double total_nflops = 0.0;
			
 
				+		for(s = 0; s < nsched_ctxs_child; s++)
			
 
				+		{
			
 
				+			total_nflops += hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops < 0.0 ? 0.0 : hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops;
			
 
				+		}
			
 
				+
			
 
				+		int accumulated_diff = 0;
			
 
				+		for(s = 0; s < nsched_ctxs_child; s++)
			
 
				+		{
			
 
				+			struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs_child[s]);
			
 
				+			double remaining_flops = hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops < 0.0 ? 0.0 : hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops;
			
 
				+ 			int current_diff = total_nflops == 0.0 ? 0.0 : floor((remaining_flops / total_nflops) * diff);
			
 
				+			accumulated_diff += current_diff;
			
 
				+			if(s == (nsched_ctxs_child - 1) && accumulated_diff < diff)
			
 
				+				current_diff += (diff - accumulated_diff);
			
 
				+			config->max_nworkers += current_diff;
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+			printf("%d: redib max_nworkers incr %d diff = %d \n",  sched_ctxs_child[s], config->max_nworkers, current_diff);
			
 
				+#endif
			
 
				+			_update_max_diff_hierarchically(sched_ctxs_child[s], current_diff);
			
 
				+		}
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs, int max_workers)
			
 
				 {
			
 
				+	unsigned leaves[hypervisor.nsched_ctxs];
			
 
				+	unsigned nleaves = 0;
			
 
				+	sc_hypervisor_get_leaves(hypervisor.sched_ctxs, hypervisor.nsched_ctxs, leaves, &nleaves);
			
 
				+	int l;
			
 
				+
			
 
				 	unsigned sched_ctx;
			
 
				 	int total_max_nworkers = 0;
			
 
				-	int max_cpus = starpu_cpu_worker_get_count();
			
 
				+//	int max_cpus = starpu_cpu_worker_get_count();
			
 
				 	unsigned configured = 0;
			
 
				 	int i;
			
 
				 	for(i = 0; i < nsched_ctxs; i++)
			
 
				 	{
			
 
				+		unsigned found = 0;
			
 
				+		for(l = 0; l < nleaves; l++)
			
 
				+		{
			
 
				+			if(leaves[l] == sched_ctxs[i])
			
 
				+			{
			
 
				+				found = 1;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		if(!found)
			
 
				+			continue;
			
 
				+
			
 
				 		sched_ctx = sched_ctxs[i];
			
 
				 
			
 
				 		if(hypervisor.sched_ctx_w[sched_ctx].to_be_sized) continue;
			
@@ -896,7 +1028,12 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				-				double current_exec_time = (end_time - hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker]) / 1000000.0; /* in seconds */ 
			
 
				+				double current_exec_time = 0.0;
			
 
				+				if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] < hypervisor.sched_ctx_w[sched_ctx].start_time)
			
 
				+					current_exec_time = (end_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */ 
			
 
				+				else
			
 
				+					current_exec_time = (end_time - hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker]) / 1000000.0; /* in seconds */ 
			
 
				+
			
 
				 				exec_time = hypervisor.sched_ctx_w[sched_ctx].exec_time[worker] + current_exec_time;
			
 
				 			}		
			
 
				 			norm_exec_time += elapsed_time_worker[worker] == 0.0 ? 0.0 : exec_time / elapsed_time_worker[worker];
			
@@ -905,52 +1042,99 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 
				 		double curr_time = starpu_timing_now();
			
 
				 		double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */
			
 
				 		int nready_tasks = starpu_sched_ctx_get_nready_tasks(sched_ctx);
			
 
				-		if(norm_idle_time >= 0.9)
			
 
				-		{
			
 
				-			config->max_nworkers = lrint(norm_exec_time);
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			if(norm_idle_time < 0.1)
			
 
				-				config->max_nworkers = lrint(norm_exec_time)  + nready_tasks - 1; //workers->nworkers + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
			
 
				-			else
			
 
				-				config->max_nworkers = lrint(norm_exec_time);
			
 
				-		}
			
 
				+/* 		if(norm_idle_time >= 0.9) */
			
 
				+/* 		{ */
			
 
				+/* 			config->max_nworkers = lrint(norm_exec_time); */
			
 
				+/* 		} */
			
 
				+/* 		else */
			
 
				+/* 		{ */
			
 
				+/* 			if(norm_idle_time < 0.1) */
			
 
				+/* 				config->max_nworkers = lrint(norm_exec_time)  + nready_tasks - 1; //workers->nworkers + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; */
			
 
				+/* 			else */
			
 
				+/* 				config->max_nworkers = lrint(norm_exec_time); */
			
 
				+/* 		} */
			
 
				+		config->max_nworkers = lrint(norm_exec_time);
			
 
				 //		config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
			
 
				 		
			
 
				-		if(config->max_nworkers < 0)
			
 
				-			config->max_nworkers = 0;
			
 
				-		if(config->max_nworkers > max_cpus)
			
 
				-			config->max_nworkers = max_cpus;
			
 
				+		/* if(config->max_nworkers < 0) */
			
 
				+/* 			config->max_nworkers = 0; */
			
 
				+/* 		if(config->max_nworkers > max_workers) */
			
 
				+/* 			config->max_nworkers = max_workers; */
			
 
				 		
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				 		printf("%d: ready tasks  %d norm_idle_time %lf elapsed_time %lf norm_exec_time %lf nworker %d max %d \n", 
			
 
				 		       sched_ctx, nready_tasks, norm_idle_time, elapsed_time, norm_exec_time, workers->nworkers, config->max_nworkers);
			
 
				-
			
 
				+#endif
			
 
				 
			
 
				 		total_max_nworkers += config->max_nworkers;
			
 
				 		configured = 1;
			
 
				+		
			
 
				 	}
			
 
				 
			
 
				-	/*if the sum of the max cpus is smaller than the total cpus available 
			
 
				-	  increase the max for the ones having more ready tasks to exec */
			
 
				-	if(configured && total_max_nworkers < max_cpus)
			
 
				+	unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
			
 
				+	if(nhierarchy_levels > 1 && configured)
			
 
				 	{
			
 
				-		int diff = max_cpus - total_max_nworkers;
			
 
				-		int max_nready = -1;
			
 
				-		unsigned max_nready_sched_ctx = sched_ctxs[0];
			
 
				-		for(i = 0; i < nsched_ctxs; i++)
			
 
				+		unsigned *sched_ctxs2;
			
 
				+		int nsched_ctxs2;
			
 
				+		sc_hypervisor_get_ctxs_on_level(&sched_ctxs2, &nsched_ctxs2, 0, STARPU_NMAX_SCHED_CTXS);
			
 
				+		
			
 
				+		if(nsched_ctxs2  > 0)
			
 
				 		{
			
 
				-			int nready_tasks = starpu_sched_ctx_get_nready_tasks(sched_ctxs[i]);
			
 
				-			if(max_nready < nready_tasks)
			
 
				+			_update_max_hierarchically(sched_ctxs2, nsched_ctxs2);
			
 
				+			int s;
			
 
				+			int current_total_max_nworkers = 0;
			
 
				+			double max_nflops = 0.0;
			
 
				+			unsigned max_nflops_sched_ctx = sched_ctxs2[0];
			
 
				+			for(s = 0; s < nsched_ctxs2; s++)
			
 
				+			{
			
 
				+				struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs2[s]);
			
 
				+				current_total_max_nworkers += config->max_nworkers;
			
 
				+				if(max_nflops < hypervisor.sched_ctx_w[sched_ctxs2[s]].remaining_flops)
			
 
				+				{
			
 
				+					max_nflops = hypervisor.sched_ctx_w[sched_ctxs2[s]].remaining_flops;
			
 
				+					max_nflops_sched_ctx = sched_ctxs2[s];
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			int max_possible_workers = starpu_worker_get_count();
			
 
				+			/*if the sum of the max cpus is smaller than the total cpus available 
			
 
				+			  increase the max for the ones having more ready tasks to exec */
			
 
				+			if(current_total_max_nworkers < max_possible_workers)
			
 
				 			{
			
 
				-				max_nready = nready_tasks;
			
 
				-				max_nready_sched_ctx = sched_ctxs[i];
			
 
				+				int diff = max_possible_workers - current_total_max_nworkers;
			
 
				+				struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(max_nflops_sched_ctx);
			
 
				+				config->max_nworkers += diff;
			
 
				+#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				+				printf("%d: redib max_nworkers incr %d \n",  max_nflops_sched_ctx, config->max_nworkers);
			
 
				+#endif
			
 
				+				_update_max_diff_hierarchically(max_nflops_sched_ctx, diff);
			
 
				 			}
			
 
				 		}
			
 
				-		struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(max_nready_sched_ctx);
			
 
				-		config->max_nworkers += diff;
			
 
				-		printf("%d: redib max_nworkers incr %d \n",  max_nready_sched_ctx, config->max_nworkers);
			
 
				 	}
			
 
				+
			
 
				+	
			
 
				+
			
 
				+	/*if the sum of the max cpus is smaller than the total cpus available 
			
 
				+	  increase the max for the ones having more ready tasks to exec */
			
 
				+	/* if(configured && total_max_nworkers < max_workers) */
			
 
				+/* 	{ */
			
 
				+/* 		int diff = max_workers - total_max_nworkers; */
			
 
				+/* 		int max_nready = -1; */
			
 
				+/* 		unsigned max_nready_sched_ctx = sched_ctxs[0]; */
			
 
				+/* 		for(i = 0; i < nsched_ctxs; i++) */
			
 
				+/* 		{ */
			
 
				+/* 			int nready_tasks = starpu_sched_ctx_get_nready_tasks(sched_ctxs[i]); */
			
 
				+/* 			if(max_nready < nready_tasks) */
			
 
				+/* 			{ */
			
 
				+/* 				max_nready = nready_tasks; */
			
 
				+/* 				max_nready_sched_ctx = sched_ctxs[i]; */
			
 
				+/* 			} */
			
 
				+/* 		} */
			
 
				+/* 		struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(max_nready_sched_ctx); */
			
 
				+/* 		config->max_nworkers += diff; */
			
 
				+/* 		printf("%d: redib max_nworkers incr %d \n",  max_nready_sched_ctx, config->max_nworkers); */
			
 
				+/* 	} */
			
 
				+       
			
 
				 }
			
 
				 
			
 
				 /* notifies the hypervisor that a new task was pushed on the queue of the worker */
			
@@ -961,10 +1145,9 @@ static void notify_pushed_task(unsigned sched_ctx, int worker)
 
				 		hypervisor.sched_ctx_w[sched_ctx].start_time = starpu_timing_now();
			
 
				 
			
 
				 	if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0)
			
 
				+	{
			
 
				 		hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] = starpu_timing_now();
			
 
				-
			
 
				-	if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].real_start_time == 0.0)
			
 
				-		hypervisor.sched_ctx_w[sched_ctx].real_start_time = starpu_timing_now();
			
 
				+	}
			
 
				 
			
 
				 	int ntasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].pushed_tasks);
			
 
				 
			
@@ -1018,9 +1201,16 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 
				 
			
 
				 	if(hypervisor.resize[sched_ctx] && hypervisor.policy.handle_idle_cycle)
			
 
				 	{
			
 
				-		if(sc_w->sched_ctx != STARPU_NMAX_SCHED_CTXS && sc_w->hyp_react_start_time != 0.0)
			
 
				+		if(sc_w->hyp_react_start_time == 0.0)
			
 
				+			sc_w->hyp_react_start_time = starpu_timing_now();
			
 
				+		
			
 
				+		double curr_time = starpu_timing_now();
			
 
				+		double elapsed_time = (curr_time - sc_w->hyp_react_start_time) / 1000000.0; /* in seconds */
			
 
				+		if(sc_w->sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > sc_w->config->time_sample)
			
 
				 		{
			
 
				 			unsigned idle_everywhere = 0;
			
 
				+			unsigned *sched_ctxs = NULL;
			
 
				+			unsigned nsched_ctxs = 0;
			
 
				 			int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				 			if(ret != EBUSY)
			
 
				 			{
			
@@ -1028,8 +1218,7 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 
				 				{
			
 
				 					idle_everywhere = 1;
			
 
				 				
			
 
				-					unsigned *sched_ctxs = NULL;
			
 
				-					unsigned nsched_ctxs = starpu_worker_get_sched_ctx_list(worker, &sched_ctxs);
			
 
				+					nsched_ctxs = starpu_worker_get_sched_ctx_list(worker, &sched_ctxs);
			
 
				 					int s;
			
 
				 					for(s = 0; s < nsched_ctxs; s++)
			
 
				 					{
			
@@ -1045,21 +1234,43 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 
				 			}
			
 
				 			
			
 
				 			if(idle_everywhere)
			
 
				+			{
			
 
				+				double hyp_overhead_start = starpu_timing_now();
			
 
				 				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
			
 
				+				double hyp_overhead_end = starpu_timing_now();
			
 
				+				hyp_overhead += (hyp_overhead_end - hyp_overhead_start);
			
 
				+			}
			
 
				+
			
 
				+
			
 
				+			sc_w->hyp_react_start_time = starpu_timing_now();
			
 
				 		}
			
 
				 	}
			
 
				 	return;
			
 
				 }
			
 
				 
			
 
				+void _update_real_start_time_hierarchically(unsigned sched_ctx)
			
 
				+{
			
 
				+	hypervisor.sched_ctx_w[sched_ctx].real_start_time = starpu_timing_now();
			
 
				+	if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
			
 
				+	{
			
 
				+		_update_real_start_time_hierarchically(starpu_sched_ctx_get_inheritor(sched_ctx));
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
 
				 
			
 
				 /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */
			
 
				 static void notify_poped_task(unsigned sched_ctx, int worker)
			
 
				 {
			
 
				+	if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].real_start_time == 0.0)
			
 
				+		_update_real_start_time_hierarchically(sched_ctx);
			
 
				+
			
 
				 	if(hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0)
			
 
				+	{
			
 
				 		hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] = starpu_timing_now();
			
 
				+	}
			
 
				 
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] = starpu_timing_now();
			
 
				-
			
 
				+		
			
 
				 	if(hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] > 0.0)
			
 
				 	{
			
 
				 		int ns = hypervisor.nsched_ctxs;
			
@@ -1089,7 +1300,6 @@ static void notify_poped_task(unsigned sched_ctx, int worker)
 
				 				
			
 
				 	if(hypervisor.policy.handle_idle_end)
			
 
				 		hypervisor.policy.handle_idle_end(sched_ctx, worker);
			
 
				-
			
 
				 }
			
 
				 
			
 
				  
			
@@ -1120,8 +1330,9 @@ static void notify_post_exec_task(struct starpu_task *task, size_t data_size, ui
 
				 	if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] != 0.0)
			
 
				 	{
			
 
				 		double current_time = starpu_timing_now();
			
 
				-		hypervisor.sched_ctx_w[sched_ctx].exec_time[worker] += (current_time - 
			
 
				-									hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker]) / 1000000.0; /* in seconds */ 
			
 
				+		double exec_time = (current_time - 
			
 
				+				    hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker]) / 1000000.0; /* in seconds */ 
			
 
				+		hypervisor.sched_ctx_w[sched_ctx].exec_time[worker] += exec_time;
			
 
				 		hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] = 0.0;
			
 
				 	}
			
 
				 
			
@@ -1153,7 +1364,10 @@ static void notify_post_exec_task(struct starpu_task *task, size_t data_size, ui
 
				 			double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time) / 1000000.0; /* in seconds */
			
 
				 			if(hypervisor.sched_ctx_w[sched_ctx].sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > hypervisor.sched_ctx_w[sched_ctx].config->time_sample)
			
 
				 			{
			
 
				+				double hyp_overhead_start = starpu_timing_now();
			
 
				 				hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
			
 
				+				double hyp_overhead_end = starpu_timing_now();
			
 
				+				hyp_overhead += (hyp_overhead_end - hyp_overhead_start);
			
 
				 				hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
			
 
				 			}
			
 
				 		}
			
@@ -1349,17 +1563,22 @@ struct types_of_workers* sc_hypervisor_get_types_of_workers(int *workers, unsign
 
				 
			
 
				 void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total_flops)
			
 
				 {
			
 
				+//	double hyp_overhead_start = starpu_timing_now();
			
 
				 	starpu_pthread_mutex_lock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].total_flops += diff_total_flops;
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].remaining_flops += diff_total_flops;	
			
 
				 	starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
			
 
				+/* 	double hyp_overhead_end = starpu_timing_now(); */
			
 
				+/* 	hyp_overhead += (hyp_overhead_end - hyp_overhead_start); */
			
 
				 	if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
			
 
				 		sc_hypervisor_update_diff_total_flops(starpu_sched_ctx_get_inheritor(sched_ctx), diff_total_flops);
			
 
				+	return;
			
 
				 
			
 
				 }
			
 
				 
			
 
				 void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_elapsed_flops)
			
 
				 {
			
 
				+//	double hyp_overhead_start = starpu_timing_now();
			
 
				 	int workerid = starpu_worker_get_id();
			
 
				 	if(workerid != -1)
			
 
				 	{
			
@@ -1368,8 +1587,11 @@ void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_ela
 
				 		hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[workerid] += diff_elapsed_flops;
			
 
				 //		starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
			
 
				 	}
			
 
				+/* 	double hyp_overhead_end = starpu_timing_now(); */
			
 
				+/* 	hyp_overhead += (hyp_overhead_end - hyp_overhead_start); */
			
 
				 	if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
			
 
				 		sc_hypervisor_update_diff_elapsed_flops(starpu_sched_ctx_get_inheritor(sched_ctx), diff_elapsed_flops);
			
 
				+	return;
			
 
				 }
			
 
				 
			
 
				 void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, unsigned hierarchy_level, unsigned father_sched_ctx_id)
			
@@ -1407,3 +1629,24 @@ unsigned sc_hypervisor_get_nhierarchy_levels(void)
 
				 	}
			
 
				 	return nlevels;
			
 
				 }
			
 
				+
			
 
				+void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *leaves, int *nleaves)
			
 
				+{
			
 
				+	int s, s2;
			
 
				+	for(s = 0; s < nsched_ctxs; s++)
			
 
				+	{
			
 
				+		unsigned is_someones_father = 0;
			
 
				+		for(s2 = 0; s2 < nsched_ctxs; s2++)
			
 
				+		{
			
 
				+			unsigned father = starpu_sched_ctx_get_inheritor(sched_ctxs[s2]);
			
 
				+			if(sched_ctxs[s] == father)
			
 
				+			{
			
 
				+				is_someones_father = 1;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		if(!is_someones_father)
			
 
				+			leaves[(*nleaves)++] = sched_ctxs[s];
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -606,6 +606,7 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 
				 	{
			
 
				 		starpu_sched_ctx_add_workers(workerids, nworkers_ctx, inheritor_sched_ctx_id);
			
 
				 		starpu_sched_ctx_set_priority(workerids, nworkers_ctx, inheritor_sched_ctx_id, 1);
			
 
				+		starpu_sched_ctx_set_priority_on_level(workerids, nworkers_ctx, inheritor_sched_ctx_id, 1);
			
 
				 	}
			
 
				 
			
 
				 	if(!_starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx_id))
			
@@ -700,12 +701,38 @@ void _starpu_fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx
 
				 
			
 
				 }
			
 
				 
			
 
				+void starpu_sched_ctx_set_priority_on_level(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority)
			
 
				+{
			
 
				+/* 	int w; */
			
 
				+/* 	struct _starpu_worker *worker = NULL; */
			
 
				+/* 	for(w = 0; w < nworkers_to_add; w++) */
			
 
				+/* 	{ */
			
 
				+/* 		worker = _starpu_get_worker_struct(workers_to_add[w]); */
			
 
				+/* 		STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex); */
			
 
				+/* 		struct _starpu_sched_ctx_list *l = NULL; */
			
 
				+/* 		for (l = worker->sched_ctx_list; l; l = l->next) */
			
 
				+/* 		{ */
			
 
				+/* 			if(l->sched_ctx != STARPU_NMAX_SCHED_CTXS && l->sched_ctx != sched_ctx && */
			
 
				+/* 			   starpu_sched_ctx_get_hierarchy_level(l->sched_ctx) == starpu_sched_ctx_get_hierarchy_level(sched_ctx)) */
			
 
				+/* 			{ */
			
 
				+/* 				/\* the lock is taken inside the func *\/ */
			
 
				+/* 				STARPU_PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex); */
			
 
				+/* 				starpu_sched_ctx_set_priority(&workers_to_add[w], 1, l->sched_ctx, priority); */
			
 
				+/* 				STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex); */
			
 
				+/* 			} */
			
 
				+/* 		} */
			
 
				+/* 		STARPU_PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex); */
			
 
				+/* 	} */
			
 
				+/* 	return; */
			
 
				+
			
 
				+}
			
 
				 static void _set_priority_hierarchically(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority)
			
 
				 {
			
 
				 	if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
			
 
				 	{
			
 
				 		unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx);
			
 
				 		starpu_sched_ctx_set_priority(workers_to_add, nworkers_to_add, father, priority);
			
 
				+		starpu_sched_ctx_set_priority_on_level(workers_to_add, nworkers_to_add, father, priority);
			
 
				 		_set_priority_hierarchically(workers_to_add, nworkers_to_add, father, priority);
			
 
				 	}
			
 
				 	return;
			
@@ -733,6 +760,7 @@ void starpu_sched_ctx_add_workers(int *workers_to_add, int nworkers_to_add, unsi
 
				 		{
			
 
				 			_starpu_update_workers_with_ctx(added_workers, n_added_workers, sched_ctx->id);
			
 
				 		}
			
 
				+		starpu_sched_ctx_set_priority(workers_to_add, nworkers_to_add, sched_ctx_id, 1);
			
 
				 		_set_priority_hierarchically(workers_to_add, nworkers_to_add, sched_ctx_id, 0);
			
 
				 
			
 
				 	}
			
@@ -769,7 +797,9 @@ void starpu_sched_ctx_remove_workers(int *workers_to_remove, int nworkers_to_rem
 
				 		_starpu_remove_workers_from_sched_ctx(sched_ctx, workers_to_remove, nworkers_to_remove, removed_workers, &n_removed_workers);
			
 
				 
			
 
				 		if(n_removed_workers > 0)
			
 
				-			_starpu_update_workers_without_ctx(removed_workers, n_removed_workers, sched_ctx->id, 0);
			
 
				+		{
			
 
				+			_starpu_update_workers_without_ctx(removed_workers, n_removed_workers, sched_ctx_id, 0);
			
 
				+		}
			
 
				 
			
 
				 	}
			
 
				 	STARPU_PTHREAD_RWLOCK_UNLOCK(&changing_ctx_mutex[sched_ctx_id]);
			
@@ -846,6 +876,7 @@ void _starpu_decrement_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id)
 
				 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
			
 
				 	int reached = _starpu_barrier_counter_get_reached_start(&sched_ctx->tasks_barrier);
			
 
				 	int finished = reached == 1;
			
 
				+
			
 
				         /* when finished decrementing the tasks if the user signaled he will not submit tasks anymore
			
 
				            we can move all its workers to the inheritor context */
			
 
				 	if(finished && sched_ctx->inheritor != STARPU_NMAX_SCHED_CTXS)
			
@@ -1394,6 +1425,7 @@ void starpu_sched_ctx_set_priority(int *workers, int nworkers, unsigned sched_ct
 
				 		for(w = 0; w < nworkers; w++)
			
 
				 		{
			
 
				 			worker = _starpu_get_worker_struct(workers[w]);
			
 
				+			STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
			
 
				 			struct _starpu_sched_ctx_list *l = NULL;
			
 
				 			for (l = worker->sched_ctx_list; l; l = l->next)
			
 
				 			{
			
@@ -1403,11 +1435,26 @@ void starpu_sched_ctx_set_priority(int *workers, int nworkers, unsigned sched_ct
 
				 					break;
			
 
				 				}
			
 
				 			}
			
 
				+			STARPU_PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex);
			
 
				 		}
			
 
				 	}
			
 
				 	return;
			
 
				 }
			
 
				 
			
 
				+unsigned starpu_sched_ctx_get_priority(int workerid, unsigned sched_ctx_id)
			
 
				+{
			
 
				+	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
			
 
				+	struct _starpu_sched_ctx_list *l = NULL;
			
 
				+	for (l = worker->sched_ctx_list; l; l = l->next)
			
 
				+	{
			
 
				+		if(l->sched_ctx == sched_ctx_id)
			
 
				+		{
			
 
				+			return l->priority;
			
 
				+		}
			
 
				+	}
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				 unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker)
			
 
				 {
			
 
				 	struct _starpu_sched_ctx_list *l = NULL;
			
@@ -1597,5 +1644,3 @@ void* starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void* param, uns
 
				 
			
 
				 	return ret;
			
 
				 }
			
 
				-
			
 
				-
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -25,6 +25,8 @@
 
				 #include <core/debug.h>
			
 
				 
			
 
				 static int use_prefetch = 0;
			
 
				+double idle[STARPU_NMAXWORKERS];
			
 
				+double idle_start[STARPU_NMAXWORKERS];
			
 
				 
			
 
				 int starpu_get_prefetch_flag(void)
			
 
				 {
			
@@ -627,7 +629,8 @@ struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if(worker->pop_ctx_priority == 0 && first_sched_ctx == STARPU_NMAX_SCHED_CTXS)
			
 
				+//	if(worker->pop_ctx_priority == 0 && first_sched_ctx == STARPU_NMAX_SCHED_CTXS)
			
 
				+	if(first_sched_ctx == STARPU_NMAX_SCHED_CTXS)
			
 
				 		first_sched_ctx = worker->sched_ctx_list->sched_ctx;
			
 
				 
			
 
				 	worker->poped_in_ctx[first_sched_ctx] = !worker->poped_in_ctx[first_sched_ctx];
			
@@ -729,9 +732,18 @@ pick:
 
				 
			
 
				 
			
 
				 	if (!task)
			
 
				+	{
			
 
				+		idle_start[worker->workerid] = starpu_timing_now();
			
 
				 		return NULL;
			
 
				+	}
			
 
				 
			
 
				-
			
 
				+	if(idle_start[worker->workerid] != 0.0)
			
 
				+	{
			
 
				+		double idle_end = starpu_timing_now();
			
 
				+		idle[worker->workerid] += (idle_end - idle_start[worker->workerid]);
			
 
				+		idle_start[worker->workerid] = 0.0;
			
 
				+	}
			
 
				+	
			
 
				 
			
 
				 #ifdef STARPU_USE_SC_HYPERVISOR
			
 
				 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
			
@@ -866,3 +878,21 @@ int starpu_push_local_task(int workerid, struct starpu_task *task, int prio)
 
				 
			
 
				 	return  _starpu_push_local_task(worker, task, prio);
			
 
				 }
			
 
				+
			
 
				+void _starpu_print_idle_time()
			
 
				+{
			
 
				+	double all_idle = 0.0;
			
 
				+	int i = 0;
			
 
				+	for(i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				+		all_idle += idle[i];
			
 
				+
			
 
				+	FILE *f;
			
 
				+	const char *sched_env = getenv("IDLE_FILE");
			
 
				+	if(!sched_env)
			
 
				+		f = fopen("idle_microsec", "a");
			
 
				+	else
			
 
				+		f = fopen(sched_env, "a");
			
 
				+	fprintf(f, "%lf \n", all_idle);
			
 
				+	fclose(f);
			
 
				+	
			
 
				+}
			
--- a/src/core/sched_policy.h
+++ b/src/core/sched_policy.h
@@ -54,6 +54,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
				 
			
 
				 void _starpu_sched_pre_exec_hook(struct starpu_task *task);
			
 
				 
			
 
				+void _starpu_print_idle_time();
			
 
				 /*
			
 
				  *	Predefined policies
			
 
				  */
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -1342,7 +1342,7 @@ void starpu_shutdown(void)
 
				 	if (_starpu_scc_common_is_mp_initialized())
			
 
				 		_starpu_scc_src_mp_deinit();
			
 
				 #endif
			
 
				-
			
 
				+//	_starpu_print_idle_time();
			
 
				 	_STARPU_DEBUG("Shutdown finished\n");
			
 
				 }
			
 
				 
			
--- a/src/worker_collection/worker_tree.c
+++ b/src/worker_collection/worker_tree.c
@@ -64,7 +64,7 @@ static int tree_get_next(struct starpu_worker_collection *workers, struct starpu
 
				 
			
 
				 	ret = _starpu_worker_get_workerid(neighbour->id);
			
 
				 	STARPU_ASSERT_MSG(ret != -1, "bind id not correct");
			
 
				-	it->visited[ret] = 1;
			
 
				+	it->visited[neighbour->id] = 1;
			
 
				 
			
 
				 	return ret;
			
 
				 }