преди 12 години · 8d8d2aa2f2
--- a/sc_hypervisor/include/sc_hypervisor_monitoring.h
+++ b/sc_hypervisor/include/sc_hypervisor_monitoring.h
@@ -50,6 +50,9 @@ struct sc_hypervisor_wrapper
 
				 	/* idle time of workers in this context */
			
 
				 	double current_idle_time[STARPU_NMAXWORKERS];
			
 
				 	
			
 
				+	double idle_time[STARPU_NMAXWORKERS];
			
 
				+	double idle_start_time[STARPU_NMAXWORKERS];
			
 
				+	
			
 
				 	/* list of workers that will leave this contexts (lazy resizing process) */
			
 
				 	int worker_to_be_removed[STARPU_NMAXWORKERS];
			
 
				 
			
--- a/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
@@ -22,56 +22,56 @@
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				 static void _try_resizing(void)
			
 
				 {
			
 
				+	/* for vite */
			
 
				+	starpu_trace_user_event(2);
			
 
				+
			
 
				 	int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
			
 
				-	
			
 
				 	double nworkers[nsched_ctxs][2];
			
 
				-	
			
 
				-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				-	if(ret != EBUSY)
			
 
				-	{
			
 
				-		starpu_trace_user_event(2);
			
 
				-		int nw = 1;
			
 
				+	int nw = 1;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			
 
				-		nw = ncuda != 0 ? 2 : 1;
			
 
				+	int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			
 
				+	nw = ncuda != 0 ? 2 : 1;
			
 
				 #endif
			
 
				-		int total_nw[nw];
			
 
				-		sc_hypervisor_group_workers_by_type(NULL, -1, nw, total_nw);
			
 
				-		
			
 
				-		
			
 
				-		struct timeval start_time;
			
 
				-		struct timeval end_time;
			
 
				-		gettimeofday(&start_time, NULL);
			
 
				-		
			
 
				-		double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, nworkers, total_nw);
			
 
				-		gettimeofday(&end_time, NULL);
			
 
				-		
			
 
				-		long diff_s = end_time.tv_sec  - start_time.tv_sec;
			
 
				-		long diff_us = end_time.tv_usec  - start_time.tv_usec;
			
 
				-		
			
 
				-		float timing = (float)(diff_s*1000000 + diff_us)/1000;
			
 
				-
			
 
				-		if(vmax != 0.0)
			
 
				-		{
			
 
				-			int nworkers_rounded[nsched_ctxs][nw];
			
 
				-			sc_hypervisor_lp_round_double_to_int(nsched_ctxs, nw, nworkers, nworkers_rounded);
			
 
				-			sc_hypervisor_lp_redistribute_resources_in_ctxs(nsched_ctxs, nw, nworkers_rounded, nworkers);
			
 
				-		}
			
 
				-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				+	int total_nw[nw];
			
 
				+	sc_hypervisor_group_workers_by_type(NULL, -1, nw, total_nw);
			
 
				+	
			
 
				+	
			
 
				+	struct timeval start_time;
			
 
				+	struct timeval end_time;
			
 
				+	gettimeofday(&start_time, NULL);
			
 
				+	
			
 
				+	double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, nworkers, total_nw);
			
 
				+	gettimeofday(&end_time, NULL);
			
 
				+	
			
 
				+	long diff_s = end_time.tv_sec  - start_time.tv_sec;
			
 
				+	long diff_us = end_time.tv_usec  - start_time.tv_usec;
			
 
				+	
			
 
				+	float timing = (float)(diff_s*1000000 + diff_us)/1000;
			
 
				+	
			
 
				+	if(vmax != 0.0)
			
 
				+	{
			
 
				+		int nworkers_rounded[nsched_ctxs][nw];
			
 
				+		sc_hypervisor_lp_round_double_to_int(nsched_ctxs, nw, nworkers, nworkers_rounded);
			
 
				+		sc_hypervisor_lp_redistribute_resources_in_ctxs(nsched_ctxs, nw, nworkers_rounded, nworkers);
			
 
				 	}
			
 
				 	
			
 
				 }
			
 
				 static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
			
 
				 {
			
 
				-	unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				-	if(criteria != SC_NOTHING && criteria == SC_VELOCITY)
			
 
				+	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				+	if(ret != EBUSY)
			
 
				 	{
			
 
				-
			
 
				-		if(sc_hypervisor_check_velocity_gap_btw_ctxs())
			
 
				+		unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				+		if(criteria != SC_NOTHING && criteria == SC_VELOCITY)
			
 
				 		{
			
 
				-			_try_resizing();
			
 
				+			if(sc_hypervisor_check_velocity_gap_btw_ctxs())
			
 
				+			{
			
 
				+				_try_resizing();
			
 
				+			}
			
 
				 		}
			
 
				+		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 	}
			
 
				+
			
 
				 }
			
 
				 static void feft_lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworkers)
			
 
				 {
			
@@ -111,15 +111,13 @@ static void feft_lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworker
 
				 /* 				printf("ctx %d/worker type %d: n = %d \n", i, 1, nworkers_per_type_rounded[i][1]); */
			
 
				 /* #endif */
			
 
				 /* 		} */
			
 
				-		int *current_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : 
			
 
				-			sched_ctxs;
			
 
				+		int *current_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
			
 
				 
			
 
				 		unsigned has_workers = 0;
			
 
				 		int s;
			
 
				 		for(s = 0; s < ns; s++)
			
 
				 		{
			
 
				-			int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(current_sched_ctxs[s], 
			
 
				-									     STARPU_ANY_WORKER);
			
 
				+			int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(current_sched_ctxs[s], STARPU_ANY_WORKER);
			
 
				 			if(nworkers_ctx != 0)
			
 
				 			{
			
 
				 				has_workers = 1;
			
@@ -136,14 +134,20 @@ static void feft_lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworker
 
				 
			
 
				 static feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
			
 
				 {
			
 
				-	unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				-	if(criteria != SC_NOTHING && criteria == SC_IDLE)
			
 
				+	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				+	if(ret != EBUSY)
			
 
				 	{
			
 
				-
			
 
				-		if(sc_hypervisor_check_idle(sched_ctx, worker))
			
 
				+		unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				+		if(criteria != SC_NOTHING && criteria == SC_IDLE)
			
 
				 		{
			
 
				-			_try_resizing();
			
 
				+			
			
 
				+			if(sc_hypervisor_check_idle(sched_ctx, worker))
			
 
				+			{
			
 
				+//				_try_resizing();
			
 
				+				sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);
			
 
				+			}
			
 
				 		}
			
 
				+		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
@@ -274,7 +274,8 @@ static int teft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 
				 			
			
 
				 			if(sc_hypervisor_check_idle(sched_ctx, worker))
			
 
				 			{
			
 
				-				_try_resizing();
			
 
				+//				_try_resizing();
			
 
				+				sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);
			
 
				 			}
			
 
				 		}
			
 
				 		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
--- a/sc_hypervisor/src/policies_utils/lp_tools.c
+++ b/sc_hypervisor/src/policies_utils/lp_tools.c
@@ -17,6 +17,7 @@
 
				 #include <math.h>
			
 
				 #include "sc_hypervisor_lp.h"
			
 
				 #include "sc_hypervisor_policy.h"
			
 
				+#include "sc_hypervisor_intern.h"
			
 
				 #include <starpu_config.h>
			
 
				 
			
 
				 #ifdef STARPU_HAVE_GLPK_H
			
@@ -48,11 +49,26 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 #else
			
 
				 		v[i][0] = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				 #endif // STARPU_USE_CUDA
			
 
				-		flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
			
 
				+		
			
 
				+		flops[i] = sc_w->remaining_flops < 0.0 ? 0.0 : sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
			
 
				 //		printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
			
 
				 	}
			
 
				 
			
 
				-	return 1/sc_hypervisor_lp_simulate_distrib_flops(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
			
 
				+	double vmax = 1/sc_hypervisor_lp_simulate_distrib_flops(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
			
 
				+	double optimal_v = 0.0;
			
 
				+	for(i = 0; i < nsched_ctxs; i++)
			
 
				+	{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1];
			
 
				+#else
			
 
				+		optimal_v = res[i][0] * v[i][0];
			
 
				+#endif //STARPU_USE_CUDA
			
 
				+//				printf("%d: set opt %lf\n", i, optimal_v[i]);
			
 
				+		if(optimal_v != 0.0)
			
 
				+			_set_optimal_v(i, optimal_v);
			
 
				+	}
			
 
				+
			
 
				+	return vmax;
			
 
				 #else//STARPU_HAVE_GLPK_H
			
 
				 	return 0.0;
			
 
				 #endif//STARPU_HAVE_GLPK_H
			
--- a/sc_hypervisor/src/policies_utils/policy_tools.c
+++ b/sc_hypervisor/src/policies_utils/policy_tools.c
@@ -16,6 +16,7 @@
 
				 
			
 
				 #include "sc_hypervisor_policy.h"
			
 
				 #include "sc_hypervisor_intern.h"
			
 
				+#include "sc_hypervisor_lp.h"
			
 
				 #include <math.h>
			
 
				 
			
 
				 static int _compute_priority(unsigned sched_ctx)
			
@@ -366,7 +367,14 @@ double sc_hypervisor_get_ctx_velocity(struct sc_hypervisor_wrapper* sc_w)
 
				 /* 	if(elapsed_time2 > 5.0 && elapsed_flops < sample) */
			
 
				 /* 		return (elapsed_flops/1000000000.0)/elapsed_time2;/\* in Gflops/s *\/ */
			
 
				 
			
 
				-	if(elapsed_flops >= sample)
			
 
				+	double total_elapsed_flops = sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w);
			
 
				+	double total_flops = sc_w->total_flops;
			
 
				+	char *start_sample_prc_char = getenv("SC_HYPERVISOR_START_RESIZE");
			
 
				+	double start_sample_prc = start_sample_prc_char ? atof(start_sample_prc_char) : 0.0;
			
 
				+	double start_sample = start_sample_prc > 0.0 ? (start_sample_prc / 100) * total_flops : sample;
			
 
				+	double redim_sample = elapsed_flops == total_elapsed_flops ? (start_sample > 0.0 ? start_sample : sample) : sample;
			
 
				+
			
 
				+	if(elapsed_flops >= redim_sample)
			
 
				         {
			
 
				                 double curr_time = starpu_timing_now();
			
 
				                 double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
			
@@ -536,33 +544,104 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 
				 	struct sc_hypervisor_wrapper* sc_w;
			
 
				 	struct sc_hypervisor_wrapper* other_sc_w;
			
 
				 
			
 
				+	
			
 
				+	double optimal_v[nsched_ctxs];
			
 
				+	unsigned has_opt_v = 1;
			
 
				 	for(i = 0; i < nsched_ctxs; i++)
			
 
				 	{
			
 
				-		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				-		double ctx_v = sc_hypervisor_get_ctx_velocity(sc_w);
			
 
				-		if(ctx_v != -1.0)
			
 
				+		optimal_v[i] = _get_optimal_v(i);
			
 
				+		if(optimal_v[i] == 0.0)
			
 
				+		{
			
 
				+			has_opt_v = 0;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if(!has_opt_v)
			
 
				+	{
			
 
				+		int nw = 1;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			
 
				+		nw = ncuda != 0 ? 2 : 1;
			
 
				+#endif	
			
 
				+		double nworkers_per_type[nsched_ctxs][nw];
			
 
				+		int total_nw[nw];
			
 
				+		for(i = 0; i < nw; i++)
			
 
				 		{
			
 
				 			for(j = 0; j < nsched_ctxs; j++)
			
 
				+				nworkers_per_type[j][i] = 0.0;
			
 
				+			total_nw[i] = 0;
			
 
				+		}
			
 
				+		sc_hypervisor_group_workers_by_type(NULL, -1, nw, total_nw);
			
 
				+		
			
 
				+		double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, nworkers_per_type, total_nw);
			
 
				+		
			
 
				+		if(vmax != 0.0)
			
 
				+		{
			
 
				+			for(i = 0; i < nsched_ctxs; i++)
			
 
				 			{
			
 
				-				if(sched_ctxs[i] != sched_ctxs[j])
			
 
				-				{
			
 
				-					unsigned nworkers = starpu_sched_ctx_get_nworkers(sched_ctxs[j]);
			
 
				-					if(nworkers == 0) 
			
 
				-						return 1;
			
 
				+				sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				+				double v[nw];
			
 
				+				v[0] = sc_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
			
 
				+				v[1] = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				+				
			
 
				+				optimal_v[i] = nworkers_per_type[i][0] * v[0] + nworkers_per_type[i][1]* v[1];
			
 
				+				_set_optimal_v(i, optimal_v[i]);
			
 
				+			}
			
 
				+			has_opt_v = 1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if(has_opt_v)
			
 
				+	{
			
 
				+		for(i = 0; i < nsched_ctxs; i++)
			
 
				+		{
			
 
				+			sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				+			
			
 
				+			double ctx_v = sc_hypervisor_get_ctx_velocity(sc_w);
			
 
				+			if(ctx_v == -1.0)
			
 
				+				return 0;
			
 
				+		}
			
 
				 
			
 
				-					other_sc_w = sc_hypervisor_get_wrapper(sched_ctxs[j]);
			
 
				-					double other_ctx_v = sc_hypervisor_get_ctx_velocity(other_sc_w);
			
 
				-					if(other_ctx_v != -1.0)
			
 
				+		for(i = 0; i < nsched_ctxs; i++)
			
 
				+		{
			
 
				+			sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				+			
			
 
				+			double ctx_v = sc_hypervisor_get_ctx_velocity(sc_w);
			
 
				+			if(ctx_v != -1.0 && ((ctx_v < 0.8*optimal_v[i]) || ctx_v > 1.2*optimal_v[i])) 
			
 
				+				return 1;
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		for(i = 0; i < nsched_ctxs; i++)
			
 
				+		{
			
 
				+			sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				+			double ctx_v = sc_hypervisor_get_ctx_velocity(sc_w);
			
 
				+			if(ctx_v != -1.0)
			
 
				+			{
			
 
				+				for(j = 0; j < nsched_ctxs; j++)
			
 
				+				{
			
 
				+					if(sched_ctxs[i] != sched_ctxs[j])
			
 
				 					{
			
 
				-						double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v ;
			
 
				-//						if(gap > 1.5)
			
 
				-						if(gap > _get_max_velocity_gap())
			
 
				+						unsigned nworkers = starpu_sched_ctx_get_nworkers(sched_ctxs[j]);
			
 
				+						if(nworkers == 0)
			
 
				 							return 1;
			
 
				+						
			
 
				+						other_sc_w = sc_hypervisor_get_wrapper(sched_ctxs[j]);
			
 
				+						double other_ctx_v = sc_hypervisor_get_ctx_velocity(other_sc_w);
			
 
				+						if(other_ctx_v != -1.0)
			
 
				+						{
			
 
				+							double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v;
			
 
				+							double max_vel = _get_max_velocity_gap();
			
 
				+							if(gap > max_vel-1 && gap < max_vel+1)
			
 
				+								return 1;
			
 
				+						}
			
 
				 					}
			
 
				 				}
			
 
				 			}
			
 
				+			
			
 
				 		}
			
 
				-
			
 
				 	}
			
 
				 	return 0;
			
 
				 }
			
--- a/sc_hypervisor/src/sc_hypervisor.c
+++ b/sc_hypervisor/src/sc_hypervisor.c
@@ -113,7 +113,7 @@ static struct sc_hypervisor_policy *_select_hypervisor_policy(struct sc_hypervis
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		policy_name = getenv("HYPERVISOR_POLICY");
			
 
				+		policy_name = getenv("SC_HYPERVISOR_POLICY");
			
 
				 	}
			
 
				 
			
 
				 	if (policy_name)
			
@@ -134,9 +134,9 @@ struct starpu_sched_ctx_performance_counters* sc_hypervisor_init(struct sc_hyper
 
				 {
			
 
				 	hypervisor.min_tasks = 0;
			
 
				 	hypervisor.nsched_ctxs = 0;
			
 
				-	char* vel_gap = getenv("MAX_VELOCITY_GAP");
			
 
				+	char* vel_gap = getenv("SC_HYPERVISOR_MAX_VELOCITY_GAP");
			
 
				 	hypervisor.max_velocity_gap = vel_gap ? atof(vel_gap) : SC_VELOCITY_MAX_GAP_DEFAULT;
			
 
				-	char* crit =  getenv("HYPERVISOR_TRIGGER_RESIZE");
			
 
				+	char* crit =  getenv("SC_HYPERVISOR_TRIGGER_RESIZE");
			
 
				 	hypervisor.resize_criteria = !crit ? SC_NOTHING : strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_VELOCITY : SC_NOTHING);
			
 
				 
			
 
				 	starpu_pthread_mutex_init(&act_hypervisor_mutex, NULL);
			
@@ -162,11 +162,14 @@ struct starpu_sched_ctx_performance_counters* sc_hypervisor_init(struct sc_hyper
 
				 		hypervisor.sched_ctx_w[i].resize_ack.nmoved_workers = 0;
			
 
				 		hypervisor.sched_ctx_w[i].resize_ack.acked_workers = NULL;
			
 
				 		starpu_pthread_mutex_init(&hypervisor.sched_ctx_w[i].mutex, NULL);
			
 
				+		hypervisor.optimal_v[i] = 0.0;
			
 
				 
			
 
				 		int j;
			
 
				 		for(j = 0; j < STARPU_NMAXWORKERS; j++)
			
 
				 		{
			
 
				 			hypervisor.sched_ctx_w[i].current_idle_time[j] = 0.0;
			
 
				+			hypervisor.sched_ctx_w[i].idle_time[j] = 0.0;
			
 
				+			hypervisor.sched_ctx_w[i].idle_start_time[j] = 0.0;
			
 
				 			hypervisor.sched_ctx_w[i].pushed_tasks[j] = 0;
			
 
				 			hypervisor.sched_ctx_w[i].poped_tasks[j] = 0;
			
 
				 			hypervisor.sched_ctx_w[i].elapsed_flops[j] = 0.0;
			
@@ -216,7 +219,7 @@ void sc_hypervisor_start_resize(unsigned sched_ctx)
 
				 
			
 
				 static void _print_current_time()
			
 
				 {
			
 
				-	if(!getenv("HYPERVISOR_STOP_PRINT"))
			
 
				+	if(!getenv("SC_HYPERVISOR_STOP_PRINT"))
			
 
				 	{
			
 
				 		double curr_time = starpu_timing_now();
			
 
				 		double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */
			
@@ -372,6 +375,52 @@ static double _get_best_total_elapsed_flops(struct sc_hypervisor_wrapper* sc_w,
 
				 
			
 
				 	return ret_val;
			
 
				 }
			
 
				+static double _get_total_idle_time_per_worker_type(struct sc_hypervisor_wrapper *sc_w, int *npus, enum starpu_worker_archtype req_arch)
			
 
				+{
			
 
				+	double ret_val = 0.0;
			
 
				+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
 
				+        int worker;
			
 
				+
			
 
				+	struct starpu_sched_ctx_iterator it;
			
 
				+	if(workers->init_iterator)
			
 
				+                workers->init_iterator(workers, &it);
			
 
				+
			
 
				+        while(workers->has_next(workers, &it))
			
 
				+	{
			
 
				+                worker = workers->get_next(workers, &it);
			
 
				+                enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
			
 
				+                if(arch == req_arch)
			
 
				+                {
			
 
				+			ret_val += sc_w->idle_start_time[worker];
			
 
				+			(*npus)++;
			
 
				+                }
			
 
				+        }
			
 
				+
			
 
				+	return ret_val;
			
 
				+}
			
 
				+
			
 
				+static void _reset_idle_time_per_worker_type(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype req_arch)
			
 
				+{
			
 
				+	double ret_val = 0.0;
			
 
				+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
 
				+        int worker;
			
 
				+
			
 
				+	struct starpu_sched_ctx_iterator it;
			
 
				+	if(workers->init_iterator)
			
 
				+                workers->init_iterator(workers, &it);
			
 
				+
			
 
				+        while(workers->has_next(workers, &it))
			
 
				+	{
			
 
				+                worker = workers->get_next(workers, &it);
			
 
				+                enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
			
 
				+                if(arch == req_arch)
			
 
				+                {
			
 
				+			sc_w->idle_start_time[worker] = 0.0;
			
 
				+                }
			
 
				+        }
			
 
				+
			
 
				+	return;
			
 
				+}
			
 
				 
			
 
				 double _get_max_velocity_gap()
			
 
				 {
			
@@ -388,6 +437,7 @@ double sc_hypervisorsc_hypervisor_get_velocity_per_worker_type(struct sc_hypervi
 
				 {
			
 
				         int npus = 0;
			
 
				         double elapsed_flops = _get_best_total_elapsed_flops(sc_w, &npus, arch) / 1000000000.0 ; /* in gflops */
			
 
				+	double total_idle_time = _get_total_idle_time_per_worker_type(sc_w, &npus, arch);
			
 
				 	if(npus == 0)
			
 
				 		return -1.0; 
			
 
				 
			
@@ -395,7 +445,10 @@ double sc_hypervisorsc_hypervisor_get_velocity_per_worker_type(struct sc_hypervi
 
				         {
			
 
				                 double curr_time = starpu_timing_now();
			
 
				                 double elapsed_time = (curr_time - sc_w->real_start_time) / 1000000.0; /* in seconds */
			
 
				+		elapsed_time -= total_idle_time;
			
 
				 		double velocity = (elapsed_flops/elapsed_time); /* in Gflops/s */
			
 
				+		_reset_idle_time_per_worker_type(sc_w, arch);
			
 
				+
			
 
				                 return velocity;
			
 
				         }
			
 
				 
			
@@ -508,7 +561,6 @@ double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_
 
				 	return ret_val;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sched_ctx)
			
 
				 {
			
 
				 	/* info concerning only the gflops_rate strateg */
			
@@ -793,6 +845,15 @@ static void notify_idle_end(unsigned sched_ctx, int worker)
 
				 	if(hypervisor.resize[sched_ctx])
			
 
				 		hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] = 0.0;
			
 
				 
			
 
				+	struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
			
 
				+
			
 
				+	if(sc_w->idle_start_time[worker] != 0.0)
			
 
				+	{
			
 
				+		double end_time  = starpu_timing_now();
			
 
				+		sc_w->idle_time[worker] += (end_time - sc_w->idle_start_time[worker]) / 1000000.0; /* in seconds */ 
			
 
				+		sc_w->idle_start_time[worker] = 0.0;
			
 
				+	}
			
 
				+
			
 
				 	if(hypervisor.policy.handle_idle_end)
			
 
				 		hypervisor.policy.handle_idle_end(sched_ctx, worker);
			
 
				 
			
@@ -805,6 +866,10 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 
				 	{
			
 
				 		struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
			
 
				 		sc_w->current_idle_time[worker] += idle_time;
			
 
				+
			
 
				+		if(sc_w->idle_start_time[worker] == 0.0)
			
 
				+			sc_w->idle_start_time[worker] = starpu_timing_now();
			
 
				+
			
 
				 		if(hypervisor.policy.handle_idle_cycle)
			
 
				 		{
			
 
				 			hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
			
@@ -1005,3 +1070,13 @@ double sc_hypervisor_get_velocity(struct sc_hypervisor_wrapper *sc_w, enum starp
 
				        
			
 
				 	return velocity;
			
 
				 }
			
 
				+
			
 
				+double _get_optimal_v(unsigned sched_ctx)
			
 
				+{
			
 
				+	return hypervisor.optimal_v[sched_ctx];
			
 
				+}
			
 
				+
			
 
				+void _set_optimal_v(unsigned sched_ctx, double optimal_v)
			
 
				+{
			
 
				+	hypervisor.optimal_v[sched_ctx] = optimal_v;
			
 
				+}
			
--- a/sc_hypervisor/src/sc_hypervisor_intern.h
+++ b/sc_hypervisor/src/sc_hypervisor_intern.h
@@ -83,6 +83,9 @@ struct sc_hypervisor
 
				 	
			
 
				 	/* criteria to trigger resizing */
			
 
				 	unsigned resize_criteria;
			
 
				+
			
 
				+	/* value of the speed to compare the speed of the context to */
			
 
				+	double optimal_v[STARPU_NMAX_SCHED_CTXS];
			
 
				 };
			
 
				 
			
 
				 struct sc_hypervisor_adjustment
			
@@ -100,3 +103,5 @@ void _remove_config(unsigned sched_ctx);
 
				 
			
 
				 double _get_max_velocity_gap();
			
 
				 
			
 
				+double _get_optimal_v(unsigned sched_ctx);
			
 
				+void _set_optimal_v(unsigned sched_ctx, double optimal_v);
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -233,9 +233,9 @@ noinst_PROGRAMS +=				\
 
				 	datawizard/reclaim
			
 
				 endif
			
 
				 
			
 
				-noinst_nulldir=/tmp
			
 
				-noinst_null_PROGRAMS =				\
			
 
				-	$(LOADER)
			
 
				+# noinst_nulldir=/tmp
			
 
				+# noinst_null_PROGRAMS =				\
			
 
				+# 	$(LOADER)
			
 
				 
			
 
				 examplebin_PROGRAMS = \
			
 
				 	microbenchs/tasks_size_overhead		\