浏览代码

resize hierarchic ctxs hierarchically :)

Andra Hugo 11 年之前
父节点
当前提交
60d44eb737

+ 3 - 0
sc_hypervisor/include/sc_hypervisor.h

@@ -136,6 +136,9 @@ void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, un
 
 /* returns the number of levels of ctxs registered to the hyp */
 unsigned sc_hypervisor_get_nhierarchy_levels(void);
+
+/* returns the nready flops of all ctxs below in hierachy of sched_ctx */
+double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx);
 #ifdef __cplusplus
 }
 #endif

+ 2 - 1
sc_hypervisor/include/sc_hypervisor_lp.h

@@ -35,7 +35,8 @@ extern "C"
 struct sc_hypervisor_policy_task_pool; 
 struct types_of_workers;
 /* returns tmax, and computes in table res the nr of workers needed by each context st the system ends up in the smallest tmax*/
-double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers], struct types_of_workers *tw);
+double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], 
+					     int total_nw[ntypes_of_workers], struct types_of_workers *tw, unsigned *in_sched_ctxs);
 
 /* returns tmax of the system */
 double sc_hypervisor_lp_get_tmax(int nw, int *workers);

+ 4 - 1
sc_hypervisor/include/sc_hypervisor_policy.h

@@ -115,7 +115,10 @@ unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker);
 unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker);
 
 /* check if there is a speed gap btw ctxs */
-unsigned sc_hypervisor_check_speed_gap_btw_ctxs(void);
+unsigned sc_hypervisor_check_speed_gap_btw_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers);
+
+/* check if there is a speed gap btw ctxs on one level */
+unsigned sc_hypervisor_check_speed_gap_btw_ctxs_on_level(int level, int *workers_in, int nworkers_in, unsigned father_sched_ctx_id, unsigned **sched_ctxs, int *nsched_ctxs);
 
 /* check what triggers resizing (idle, speed, etc.)*/
 unsigned sc_hypervisor_get_resize_criteria();

+ 133 - 105
sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c

@@ -26,7 +26,6 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 {
 	/* for vite */
 	printf("resize_no = %d\n", resize_no);
-	starpu_fxt_trace_user_event(resize_no++);
 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
 
 //	if(ns <= 1) return;
@@ -46,7 +45,7 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 	struct timeval end_time;
 	gettimeofday(&start_time, NULL);
 	
-	double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_ctx, total_nw, tw);
+	double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_ctx, total_nw, tw, sched_ctxs);
 	gettimeofday(&end_time, NULL);
 	
 	long diff_s = end_time.tv_sec  - start_time.tv_sec;
@@ -75,22 +74,142 @@ static void _try_resizing_hierarchically(unsigned levels, unsigned current_level
 	for(s = 0; s < nsched_ctxs; s++)
 	{
 		unsigned *sched_ctxs_child;
-		int nsched_ctxs_child;
+		int nsched_ctxs_child = 0;
 		sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, current_level+1, sched_ctxs[s]);
 		if(nsched_ctxs_child > 0)
 		{
-			printf("ns %d on level %d \n", nsched_ctxs_child, current_level);
 			int *pus_father;
 			unsigned npus_father = 0;
 			npus_father = starpu_sched_ctx_get_workers_list(sched_ctxs[s], &pus_father);
-		
+			
 			_try_resizing_hierarchically(levels-1, current_level+1, sched_ctxs_child, nsched_ctxs_child, pus_father, npus_father);
 
 			free(pus_father);
 		}
 	}
+	return;
+}
+
+static int _get_min_level(unsigned *sched_ctxs, int nsched_ctxs)
+{
+	int min = sc_hypervisor_get_nhierarchy_levels();
+	int s;
+	for(s = 0; s < nsched_ctxs; s++)
+	{
+		int level = starpu_sched_ctx_get_hierarchy_level(sched_ctxs[s]);
+		if(level < min)
+			min = level;
+	}
+	return min;
 }
-static void feft_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx, __attribute__((unused))int worker, 
+
+static int _get_first_level(unsigned *sched_ctxs, int nsched_ctxs, unsigned *first_level, int *nsched_ctxs_first_level)
+{
+	int min = _get_min_level(sched_ctxs, nsched_ctxs);
+	int s;
+	for(s = 0; s < nsched_ctxs; s++)
+		if(starpu_sched_ctx_get_hierarchy_level(sched_ctxs[s]) == min)
+			first_level[(*nsched_ctxs_first_level)++] = sched_ctxs[s];
+	return min;
+}
+
+static void _resize(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
+{
+	starpu_fxt_trace_user_event(resize_no);
+	unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
+	if(nhierarchy_levels > 1)
+	{
+		if(nsched_ctxs == -1)
+		{
+			unsigned *sched_ctxs2;
+			int nsched_ctxs2;
+			sc_hypervisor_get_ctxs_on_level(&sched_ctxs2, &nsched_ctxs2, 0, STARPU_NMAX_SCHED_CTXS);
+			
+			if(nsched_ctxs2  > 0)
+			{
+				_try_resizing_hierarchically(nhierarchy_levels, 0, sched_ctxs2, nsched_ctxs2, workers, nworkers);
+				free(sched_ctxs2);
+			}
+		}
+		else
+		{
+			unsigned first_level[nsched_ctxs];
+			int nsched_ctxs_first_level = 0;
+			int min = _get_first_level(sched_ctxs, nsched_ctxs, first_level, &nsched_ctxs_first_level);
+			
+			_try_resizing_hierarchically(nhierarchy_levels, min, first_level, nsched_ctxs_first_level, workers, nworkers);
+		}
+	}
+	else
+		_try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers);
+	resize_no++;
+}
+
+static void _resize_if_speed_diff(unsigned sched_ctx, int worker)
+{
+	unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
+	if(nhierarchy_levels > 1)
+	{
+		
+		unsigned current_level = starpu_sched_ctx_get_hierarchy_level(sched_ctx);
+		unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx);
+		int level;
+		int *pus_father_old = NULL;
+		unsigned npus_father_old = 0;
+		unsigned *sched_ctxs_old = NULL;
+		int nsched_ctxs_old = 0;
+		unsigned is_speed_diff = 0;
+		unsigned last_level_diff = 0;
+
+		for(level = current_level ; level >= 0; level--)
+		{
+			int *pus_father;
+			unsigned npus_father = 0;
+			npus_father = starpu_sched_ctx_get_workers_list(father, &pus_father);
+			
+			
+			unsigned *sched_ctxs;
+			int nsched_ctxs;
+			is_speed_diff = sc_hypervisor_check_speed_gap_btw_ctxs_on_level(level, pus_father, npus_father, father, &sched_ctxs, &nsched_ctxs);
+			if(!is_speed_diff)
+			{
+				if(level == current_level)
+					break;
+				else
+				{
+					_resize(sched_ctxs_old, nsched_ctxs_old, pus_father_old, npus_father_old);
+					free(pus_father_old);
+					free(sched_ctxs_old);
+					pus_father_old = NULL;
+					sched_ctxs_old = NULL;
+					free(pus_father);
+					free(sched_ctxs);
+					break;
+				}
+			}	
+			if(pus_father_old)
+				free(pus_father_old);
+			if(sched_ctxs_old)
+				free(sched_ctxs_old);
+			pus_father_old = pus_father;
+			sched_ctxs_old = sched_ctxs;
+			npus_father_old = npus_father;
+			nsched_ctxs_old = nsched_ctxs;
+			
+			father = starpu_sched_ctx_get_inheritor(father);
+		}
+		if(is_speed_diff)
+		{
+			_resize(NULL, -1, NULL, -1);
+		}
+	}
+	else if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
+	{
+		_resize(NULL, -1, NULL, -1);
+	}
+}
+
+static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, 
 				      __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint)
 {
 	unsigned criteria = sc_hypervisor_get_resize_criteria();
@@ -100,44 +219,16 @@ static void feft_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx,
 		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
 		if(ret != EBUSY)
 		{
-			if(sc_hypervisor_check_speed_gap_btw_ctxs())
-			{
-				unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
-				if(nhierarchy_levels > 1)
-				{
-					unsigned *sched_ctxs;
-					int nsched_ctxs;
-					sc_hypervisor_get_ctxs_on_level(&sched_ctxs, &nsched_ctxs, 0, STARPU_NMAX_SCHED_CTXS);
-					if(nsched_ctxs > 0)
-					{
-						_try_resizing_hierarchically(nhierarchy_levels, 0, sched_ctxs, nsched_ctxs, NULL, -1);
-						free(sched_ctxs);
-					}
-				}
-				else
-					_try_resizing(NULL, -1, NULL, -1);
-			}
-	
+			_resize_if_speed_diff(sched_ctx, worker);
 			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 		}
 	}
-
 }
+
 static void feft_lp_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 {
-	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
-	unsigned *curr_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
-	unsigned curr_nworkers = nworkers == -1 ? starpu_worker_get_count() : (unsigned)nworkers;
-	
-	struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, curr_nworkers);
-	int nw = tw->nw;
-	double nworkers_per_type[ns][nw];
-
-	int total_nw[nw];
-	sc_hypervisor_group_workers_by_type(tw, total_nw);
-	
-
 	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
+
 	struct sc_hypervisor_wrapper* sc_w  = NULL;
 	int s = 0;
 	for(s = 0; s < nsched_ctxs; s++)
@@ -146,49 +237,7 @@ static void feft_lp_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *worker
 		sc_w->to_be_sized = 1;
 	}
 
-	double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_type, total_nw, tw);
-	if(vmax != 0.0)
-	{
-// 		printf("********size\n");
-/* 		int i; */
-/* 		for( i = 0; i < nsched_ctxs; i++) */
-/* 		{ */
-/* 			printf("ctx %d/worker type %d: n = %lf \n", i, 0, nworkers_per_type[i][0]); */
-/* #ifdef STARPU_USE_CUDA */
-/* 			int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); */
-/* 			if(ncuda != 0) */
-/* 				printf("ctx %d/worker type %d: n = %lf \n", i, 1, nworkers_per_type[i][1]); */
-/* #endif */
-/* 		} */
-		int nworkers_per_type_rounded[ns][nw];
-		sc_hypervisor_lp_round_double_to_int(ns, nw, nworkers_per_type, nworkers_per_type_rounded);
-/*       	for( i = 0; i < nsched_ctxs; i++) */
-/* 		{ */
-/* 			printf("ctx %d/worker type %d: n = %d \n", i, 0, nworkers_per_type_rounded[i][0]); */
-/* #ifdef STARPU_USE_CUDA */
-/* 			int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); */
-/* 			if(ncuda != 0) */
-/* 				printf("ctx %d/worker type %d: n = %d \n", i, 1, nworkers_per_type_rounded[i][1]); */
-/* #endif */
-/* 		} */
-
-		unsigned has_workers = 0;
-		int s;
-		for(s = 0; s < ns; s++)
-		{
-			int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(curr_sched_ctxs[s], STARPU_ANY_WORKER);
-			if(nworkers_ctx != 0)
-			{
-				has_workers = 1;
-				break;
-			}
-		}
-
-		if(has_workers)
-			sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, nw, nworkers_per_type_rounded, nworkers_per_type, curr_sched_ctxs, tw);
-		else
-			sc_hypervisor_lp_distribute_resources_in_ctxs(sched_ctxs, ns, nw, nworkers_per_type_rounded, nworkers_per_type, workers, curr_nworkers, tw);
-	}
+	_resize(sched_ctxs, nsched_ctxs, workers, nworkers);
 	printf("finished size ctxs\n");
 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 }
@@ -201,8 +250,8 @@ static void feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
 		if(ret != EBUSY)
 		{
-			printf("trigger idle \n");
-			_try_resizing(NULL, -1, NULL, -1);
+//			printf("trigger idle \n");
+			_resize_if_speed_diff(sched_ctx, worker);
 			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 		}
 	}
@@ -226,29 +275,8 @@ static void feft_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs ,
 				 return;
 			 }
 		}
-//		_try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers);
-		unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
-		if(nhierarchy_levels > 1)
-		{
-			if(nsched_ctxs == -1)
-			{
-				unsigned *sched_ctxs2;
-				int nsched_ctxs2;
-				sc_hypervisor_get_ctxs_on_level(&sched_ctxs2, &nsched_ctxs2, 0, STARPU_NMAX_SCHED_CTXS);
-				
-				if(nsched_ctxs2  > 0)
-				{
-					printf("ns = %d on 0 \n", nsched_ctxs2);
-					_try_resizing_hierarchically(nhierarchy_levels, 0, sched_ctxs2, nsched_ctxs2, workers, nworkers);
-					free(sched_ctxs2);
-				}
-			}
-			else
-				_try_resizing_hierarchically(nhierarchy_levels, 0, sched_ctxs, nsched_ctxs, workers, nworkers);
-		}
-		else
-//			_try_resizing(NULL, -1, NULL, -1);
-			_try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers);
+
+		_resize(sched_ctxs, nsched_ctxs, workers, nworkers);
 
 		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 	}

+ 1 - 1
sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c

@@ -182,7 +182,7 @@ static void ispeed_lp_handle_poped_task(__attribute__((unused))unsigned sched_ct
                 unsigned criteria = sc_hypervisor_get_resize_criteria();
                 if(criteria != SC_NOTHING && criteria == SC_SPEED)
                 {
-                        if(sc_hypervisor_check_speed_gap_btw_ctxs())
+                        if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
                         {
                                 _try_resizing(NULL, -1, NULL, -1);
                         }

+ 1 - 1
sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c

@@ -252,7 +252,7 @@ static void teft_lp_handle_poped_task(unsigned sched_ctx, __attribute__((unused)
 		if(criteria != SC_NOTHING && criteria == SC_SPEED)
 		{
 			
-			if(sc_hypervisor_check_speed_gap_btw_ctxs())
+			if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
 			{
 				_try_resizing(NULL, -1, NULL, -1);
 			}

+ 1 - 1
sc_hypervisor/src/hypervisor_policies/throughput_lp_policy.c

@@ -291,7 +291,7 @@ static void throughput_lp_handle_poped_task(__attribute__((unused))unsigned sche
 		unsigned criteria = sc_hypervisor_get_resize_criteria();
 		if(criteria != SC_NOTHING && criteria == SC_SPEED)
 		{
-			if(sc_hypervisor_check_speed_gap_btw_ctxs())
+			if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
 			{
 				_try_resizing(NULL, -1, NULL, -1);
 			}

+ 1 - 0
sc_hypervisor/src/policies_utils/lp_programs.c

@@ -280,6 +280,7 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 			char name[32];
 			snprintf(name, sizeof(name), "worker%dctx%d", w, s);
 			glp_set_col_name(lp, n, name);
+
 			if (integer)
 			{
 				glp_set_col_kind(lp, n, GLP_IV);

+ 16 - 7
sc_hypervisor/src/policies_utils/lp_tools.c

@@ -21,14 +21,16 @@
 #include <starpu_config.h>
 
 double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], 
-					     int total_nw[ntypes_of_workers], struct types_of_workers *tw)
+					     int total_nw[ntypes_of_workers], struct types_of_workers *tw, unsigned *in_sched_ctxs)
 {
-	unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs();
+	unsigned *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs;
 #ifdef STARPU_HAVE_GLPK_H
 	double v[nsched_ctxs][ntypes_of_workers];
 	double flops[nsched_ctxs];
-
-	sc_hypervisor_update_resize_interval(sched_ctxs, nsched_ctxs);
+	
+	unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
+	if(nhierarchy_levels <= 1)
+		sc_hypervisor_update_resize_interval(sched_ctxs, nsched_ctxs);
 
 	int nw = tw->nw;
 	int i = 0;
@@ -42,6 +44,10 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 			v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
 
 		double ready_flops = starpu_sched_ctx_get_nready_flops(sc_w->sched_ctx);
+		unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
+		if(nhierarchy_levels > 1)
+			ready_flops = sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(sc_w->sched_ctx);
+
 		int nready_tasks = starpu_sched_ctx_get_nready_tasks(sc_w->sched_ctx);
 		
 		if(sc_w->to_be_sized)
@@ -51,6 +57,9 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 		}
 		else
 		{
+			if(nhierarchy_levels > 1)
+				flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
+			else
 			if(sc_w->remaining_flops < 0.0)
 				flops[i] = ready_flops/1000000000.0; /* in gflops*/
 			else
@@ -216,8 +225,8 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 					res[i][w] = -1.0;
 			}
 			
-			if(optimal_v != 0.0)
-				_set_optimal_v(i, optimal_v);
+//			if(optimal_v != 0.0)
+				_set_optimal_v(sched_ctxs[i], optimal_v);
 		}
 	}
 
@@ -238,7 +247,7 @@ double sc_hypervisor_lp_get_tmax(int nworkers, int *workers)
 	int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
 
 	double res[nsched_ctxs][nw];
-	return sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, res, total_nw, tw) * 1000.0;
+	return sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, res, total_nw, tw, NULL) * 1000.0;
 }
 
 void sc_hypervisor_lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw])

+ 22 - 11
sc_hypervisor/src/policies_utils/policy_tools.c

@@ -476,12 +476,12 @@ unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker)
 }
 
 /* check if there is a big speed gap between the contexts */
-unsigned sc_hypervisor_check_speed_gap_btw_ctxs(void)
+unsigned sc_hypervisor_check_speed_gap_btw_ctxs(unsigned *sched_ctxs_in, int ns_in, int *workers_in, int nworkers_in)
 {
-	unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs();
-	int ns = sc_hypervisor_get_nsched_ctxs();
-	int *workers = NULL;
-	int nworkers = starpu_worker_get_count();
+	unsigned *sched_ctxs = sched_ctxs_in == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs_in;
+	int ns = ns_in == -1 ? sc_hypervisor_get_nsched_ctxs() : ns_in;
+	int *workers = workers_in;
+	int nworkers = nworkers_in == -1 ? starpu_worker_get_count() : nworkers_in;
 	int i = 0, j = 0;
 	struct sc_hypervisor_wrapper* sc_w;
 	struct sc_hypervisor_wrapper* other_sc_w;
@@ -491,7 +491,7 @@ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(void)
 	unsigned has_opt_v = 1;
 	for(i = 0; i < ns; i++)
 	{
-		optimal_v[i] = _get_optimal_v(i);
+		optimal_v[i] = _get_optimal_v(sched_ctxs[i]);
 		if(optimal_v[i] == 0.0)
 		{
 			has_opt_v = 0;
@@ -508,7 +508,7 @@ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(void)
 		int total_nw[nw];
 		sc_hypervisor_group_workers_by_type(tw, total_nw);
 
-		double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_ctx, total_nw, tw);
+		double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_ctx, total_nw, tw, sched_ctxs);
 
 		
 		if(vmax != 0.0)
@@ -522,10 +522,9 @@ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(void)
 				for(w = 0; w < nw; w++)
 				{
 					v[w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw));
-					
-					optimal_v[i] += nworkers_per_ctx[i][w]*v[w];
+					optimal_v[i] += nworkers_per_ctx[i][w] == -1.0 ? 0.0 : nworkers_per_ctx[i][w]*v[w];
 				}
-				_set_optimal_v(i, optimal_v[i]);
+				_set_optimal_v(sched_ctxs[i], optimal_v[i]);
 			}
 			has_opt_v = 1;
 		}
@@ -549,8 +548,11 @@ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(void)
 			sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
 			
 			double ctx_v = sc_hypervisor_get_ctx_speed(sc_w);
+			ctx_v = ctx_v < 0.01 ? 0.0 : ctx_v;
 			if(ctx_v != -1.0 && ((ctx_v < 0.8*optimal_v[i]) || ctx_v > 1.2*optimal_v[i])) 
+			{
 				return 1;
+			}
 		}
 	}
 	else /* if we have not been able to compute a theoretical speed consider the env variable
@@ -589,6 +591,15 @@ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(void)
 	return 0;
 }
 
+unsigned sc_hypervisor_check_speed_gap_btw_ctxs_on_level(int level, int *workers_in, int nworkers_in, unsigned father_sched_ctx_id, unsigned **sched_ctxs, int *nsched_ctxs)
+{
+	sc_hypervisor_get_ctxs_on_level(sched_ctxs, nsched_ctxs, level, father_sched_ctx_id);
+	
+	
+	if(*nsched_ctxs  > 0)
+		return sc_hypervisor_check_speed_gap_btw_ctxs(*sched_ctxs, *nsched_ctxs, workers_in, nworkers_in);
+	return 0;	
+}
 
 unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker)
 {
@@ -598,7 +609,7 @@ unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker)
 		if(criteria == SC_IDLE)
 			return sc_hypervisor_check_idle(sched_ctx, worker);
 		else
-			return sc_hypervisor_check_speed_gap_btw_ctxs();
+			return sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1);
 	}
 	else
 		return 0;

+ 1 - 1
sc_hypervisor/src/policies_utils/speed.c

@@ -198,6 +198,6 @@ double sc_hypervisor_get_speed(struct sc_hypervisor_wrapper *sc_w, enum starpu_w
 		/* a default value */
 		speed = arch == STARPU_CPU_WORKER ? SC_HYPERVISOR_DEFAULT_CPU_SPEED : SC_HYPERVISOR_DEFAULT_CUDA_SPEED;
 	}
-       
+
 	return speed;
 }

+ 78 - 14
sc_hypervisor/src/sc_hypervisor.c

@@ -473,6 +473,31 @@ double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_
 	return ret_val;
 }
 
+double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx)
+{
+	double ready_flops = starpu_sched_ctx_get_nready_flops(sched_ctx);
+	unsigned *sched_ctxs;
+	int nsched_ctxs = 0;
+	sc_hypervisor_get_ctxs_on_level(&sched_ctxs, &nsched_ctxs, starpu_sched_ctx_get_hierarchy_level(sched_ctx), sched_ctx);
+	int s;
+	for(s = 0; s < nsched_ctxs; s++)
+		ready_flops += sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(sched_ctxs[s]);
+		//ready_flops += starpu_get_nready_flops_of_sched_ctx(sched_ctxs[s]);
+
+	return ready_flops;
+}
+static void _decrement_elapsed_flops_per_worker(unsigned sched_ctx, int worker, double flops)
+{
+	if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
+	{
+		unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx);
+		hypervisor.sched_ctx_w[father].elapsed_flops[worker] -= flops;
+		_decrement_elapsed_flops_per_worker(father, worker, flops);
+	}
+
+	return;
+}
+
 void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sched_ctx)
 {
 	double start_time =  starpu_timing_now();
@@ -482,7 +507,6 @@ void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sche
 		struct sc_hypervisor_wrapper *sender_sc_w = &hypervisor.sched_ctx_w[sender_sched_ctx];
 		
 		sender_sc_w->start_time = start_time;
-		_set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
 		int i;
 		for(i = 0; i < STARPU_NMAXWORKERS; i++)
 		{
@@ -491,8 +515,9 @@ void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sche
 			sender_sc_w->idle_start_time[i] = 0.0;
 			hypervisor.sched_ctx_w[sender_sched_ctx].exec_time[i] = 0.0;
 			hypervisor.sched_ctx_w[sender_sched_ctx].exec_start_time[i] = (hypervisor.sched_ctx_w[sender_sched_ctx].exec_start_time[i] != 0.0) ? starpu_timing_now() : 0.0;
+			_decrement_elapsed_flops_per_worker(sender_sched_ctx, i, hypervisor.sched_ctx_w[sender_sched_ctx].elapsed_flops[i]); 
 		}
-		
+		_set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
 	}
 
 	if(receiver_sched_ctx != STARPU_NMAX_SCHED_CTXS)
@@ -500,7 +525,6 @@ void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sche
 		struct sc_hypervisor_wrapper *receiver_sc_w = &hypervisor.sched_ctx_w[receiver_sched_ctx];
 		
 		receiver_sc_w->start_time = start_time;
-		_set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
 		int i;
 		for(i = 0; i < STARPU_NMAXWORKERS; i++)
 		{
@@ -509,9 +533,11 @@ void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sche
 			receiver_sc_w->idle_start_time[i] = (receiver_sc_w->exec_start_time[i] != 0.0) ? 0.0 : starpu_timing_now();
 			hypervisor.sched_ctx_w[receiver_sched_ctx].exec_start_time[i] = (receiver_sc_w->exec_start_time[i] != 0.0) ? starpu_timing_now() : 0.0;
 			hypervisor.sched_ctx_w[receiver_sched_ctx].exec_time[i] = 0.0;
+			_decrement_elapsed_flops_per_worker(receiver_sched_ctx, i, hypervisor.sched_ctx_w[receiver_sched_ctx].elapsed_flops[i]); 
 		}
-
+		_set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
 	}
+	return;
 }
 
 /* actually move the workers: the cpus are moved, gpus are only shared  */
@@ -988,32 +1014,43 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 		{
 			if(sc_hypervisor_check_idle(sched_ctx, worker))
 			{
+				unsigned sched_ctxs_idle[STARPU_NMAX_SCHED_CTXS];
+				unsigned nsched_ctxs_idle = 0;
+				unsigned idle_everywhere = 1;
 				int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
 				if(ret != EBUSY)
 				{
 					int j;
 					int ns = hypervisor.nsched_ctxs;
-					unsigned idle_everywhere = 1;
 					for(j = 0; j < ns; j++)
 					{
-						if(starpu_sched_ctx_contains_worker(worker, hypervisor.sched_ctxs[j]))
+						if(hypervisor.sched_ctxs[j] != STARPU_NMAX_SCHED_CTXS && 
+						   starpu_sched_ctx_contains_worker(worker, hypervisor.sched_ctxs[j]))
 						{
 							if(!sc_hypervisor_check_idle(hypervisor.sched_ctxs[j], worker))
 								idle_everywhere = 0;
+							else
+								sched_ctxs_idle[nsched_ctxs_idle++] = hypervisor.sched_ctxs[j];
 						}
 					}
 					if(idle_everywhere)
 					{
-						unsigned other_ctx = choose_ctx_to_steal(worker);
-						if(other_ctx != STARPU_NMAX_SCHED_CTXS)
-						{
-							sc_hypervisor_add_workers_to_sched_ctx(&worker, 1, other_ctx);
-							starpu_sched_ctx_set_priority(&worker, 1, other_ctx, 0);
-							_sc_hypervisor_allow_compute_idle(other_ctx, worker, 0);
-						}
+/* 						unsigned other_ctx = choose_ctx_to_steal(worker); */
+/* 						if(other_ctx != STARPU_NMAX_SCHED_CTXS) */
+/* 						{ */
+/* 							sc_hypervisor_add_workers_to_sched_ctx(&worker, 1, other_ctx); */
+/* 							starpu_sched_ctx_set_priority(&worker, 1, other_ctx, 0); */
+/* 							_sc_hypervisor_allow_compute_idle(other_ctx, worker, 0); */
+/* 						} */
 					}
 					starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 				}
+				if(idle_everywhere)
+				{
+					unsigned s;
+					for(s = 0; s < nsched_ctxs_idle; s++)
+						hypervisor.policy.handle_idle_cycle(sched_ctxs_idle[s], worker);
+				}
 //				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
 			}
 		}
@@ -1063,6 +1100,24 @@ static void notify_poped_task(unsigned sched_ctx, int worker)
 }
 
  
+static void _update_counters_hierarchically(int worker, unsigned sched_ctx, double flops, size_t data_size)
+{
+	hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++;
+	hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += flops;
+	hypervisor.sched_ctx_w[sched_ctx].elapsed_data[worker] += data_size ;
+	hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[worker]++ ;
+	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += flops;
+
+	starpu_pthread_mutex_lock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
+	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= flops;
+	starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
+		
+	if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
+		_update_counters_hierarchically(worker, starpu_sched_ctx_get_inheritor(sched_ctx), flops, data_size);
+
+	return;
+}
+
 /* notifies the hypervisor that a tagged task has just been executed */
 static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint, int task_tag, double flops)
 {
@@ -1089,6 +1144,10 @@ static void notify_post_exec_task(struct starpu_task *task, size_t data_size, ui
 		_ack_resize_completed(sched_ctx, worker);
 	starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
 
+	if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
+	{
+		_update_counters_hierarchically(worker, starpu_sched_ctx_get_inheritor(sched_ctx), flops, data_size);
+	}
 	
 	if(hypervisor.resize[sched_ctx])
 	{	
@@ -1301,6 +1360,9 @@ void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total
 	hypervisor.sched_ctx_w[sched_ctx].total_flops += diff_total_flops;
 	hypervisor.sched_ctx_w[sched_ctx].remaining_flops += diff_total_flops;	
 	starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
+	if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
+		sc_hypervisor_update_diff_total_flops(starpu_sched_ctx_get_inheritor(sched_ctx), diff_total_flops);
+
 }
 
 void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_elapsed_flops)
@@ -1313,6 +1375,8 @@ void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_ela
 		hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[workerid] += diff_elapsed_flops;
 //		starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
 	}
+	if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
+		sc_hypervisor_update_diff_elapsed_flops(starpu_sched_ctx_get_inheritor(sched_ctx), diff_elapsed_flops);
 }
 
 void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, unsigned hierarchy_level, unsigned father_sched_ctx_id)
@@ -1325,7 +1389,7 @@ void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, un
 		/* if father == STARPU_NMAX_SCHED_CTXS we take all the ctxs in this level */
 		if(starpu_sched_ctx_get_hierarchy_level(hypervisor.sched_ctxs[s]) == hierarchy_level && 
 		   (starpu_sched_ctx_get_inheritor(hypervisor.sched_ctxs[s]) == father_sched_ctx_id || father_sched_ctx_id == STARPU_NMAX_SCHED_CTXS))
-			(*sched_ctxs)[(*nsched_ctxs)++] = hypervisor.sched_ctxs[s];
+		        (*sched_ctxs)[(*nsched_ctxs)++] = hypervisor.sched_ctxs[s];
 	}
 	if(*nsched_ctxs == 0)
 		free(*sched_ctxs);

+ 1 - 1
src/sched_policies/eager_central_policy.c

@@ -35,7 +35,7 @@ struct _starpu_eager_center_policy_data
 
 static void initialize_eager_center_policy(unsigned sched_ctx_id)
 {
-	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
+	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_TREE);
 
 	struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)malloc(sizeof(struct _starpu_eager_center_policy_data));
 

+ 0 - 1
src/worker_collection/worker_tree.c

@@ -19,7 +19,6 @@
 #include <starpu.h>
 #ifdef STARPU_HAVE_HWLOC
 #include <hwloc.h>
-/* #include "tree.h" */
 #include "core/workers.h"
 
 static unsigned tree_has_next(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it)