12 years ago · f54a30a655
--- a/sched_ctx_hypervisor/include/sched_ctx_hypervisor.h
+++ b/sched_ctx_hypervisor/include/sched_ctx_hypervisor.h
@@ -108,6 +108,7 @@ struct sched_ctx_hypervisor_wrapper
 
				 	double total_flops;
			
 
				 	double total_elapsed_flops[STARPU_NMAXWORKERS];
			
 
				 	double elapsed_flops[STARPU_NMAXWORKERS];
			
 
				+	double ref_velocity[STARPU_NMAXWORKERS];
			
 
				 	double submitted_flops;
			
 
				 	double remaining_flops;
			
 
				 	double start_time;
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
@@ -18,7 +18,7 @@
 
				 #include "lp_tools.h"
			
 
				 #include <math.h>
			
 
				 
			
 
				-static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers);
			
 
				+static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers, unsigned integer);
			
 
				 static double _find_tmax(double t1, double t2);
			
 
				 
			
 
				 
			
@@ -32,6 +32,7 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 
				 	int *sched_ctxs = in_sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : in_sched_ctxs;
			
 
				 	
			
 
				 	int w,s;
			
 
				+	struct sched_ctx_hypervisor_wrapper* sc_w = NULL;
			
 
				 
			
 
				 	for(s = 0; s < ns; s++)
			
 
				 	{
			
@@ -43,13 +44,16 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 
				 			draft_flops_on_w[s][w] = 0.0;
			
 
				 			int worker = workers == NULL ? w : workers[w];
			
 
				 
			
 
				-			velocity[s][w] = _get_velocity_per_worker(sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]), worker);
			
 
				+			sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]);
			
 
				+			velocity[s][w] = _get_velocity_per_worker(sc_w, worker);
			
 
				 			if(velocity[s][w] == -1.0)
			
 
				 			{
			
 
				 				enum starpu_archtype arch = starpu_worker_get_type(worker);
			
 
				-				velocity[s][w] = _get_velocity_per_worker_type(sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]), arch);
			
 
				+				velocity[s][w] = _get_velocity_per_worker_type(sc_w, arch);
			
 
				 				if(velocity[s][w] == -1.0)
			
 
				-					velocity[s][w] = arch == STARPU_CPU_WORKER ? 5.0 : 50.0;
			
 
				+					velocity[s][w] = sc_w->ref_velocity[worker];
			
 
				+				if(velocity[s][w] == -1.0)
			
 
				+					velocity[s][w] = arch == STARPU_CPU_WORKER ? 5.0 : 150.0;
			
 
				 			}
			
 
				 			
			
 
				 //			printf("v[w%d][s%d] = %lf\n",w, s, velocity[s][w]);
			
@@ -63,7 +67,8 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 
				 	   as starting point and then try to minimize it
			
 
				 	   as increasing it a little for the faster ctxs */
			
 
				 	double tmax = _get_slowest_ctx_exec_time();
			
 
				-	double smallest_tmax = tmax - 0.5*tmax;
			
 
				+	double smallest_tmax = _get_fastest_ctx_exec_time(); //tmax - 0.5*tmax;
			
 
				+//	printf("tmax %lf smallest %lf\n", tmax, smallest_tmax);
			
 
				 
			
 
				 	double res = 1.0;
			
 
				 	unsigned has_sol = 0;
			
@@ -83,7 +88,7 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 
				 		/* find solution and save the values in draft tables
			
 
				 		   only if there is a solution for the system we save them
			
 
				 		   in the proper table */
			
 
				-		res = _glp_resolve(ns, nw, velocity, flops, tmax, draft_flops_on_w, draft_w_in_s, workers);
			
 
				+		res = _glp_resolve(ns, nw, velocity, flops, tmax, draft_flops_on_w, draft_w_in_s, workers, 1);
			
 
				 		if(res != 0.0)
			
 
				 		{
			
 
				 			for(s = 0; s < ns; s++)
			
@@ -140,18 +145,19 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 
				  */
			
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				 #include <glpk.h>
			
 
				-static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers)
			
 
				+static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers, unsigned integer)
			
 
				 {
			
 
				 	int w, s;
			
 
				 	glp_prob *lp;
			
 
				 
			
 
				+//	printf("try with tmax %lf\n", tmax);
			
 
				 	lp = glp_create_prob();
			
 
				 	glp_set_prob_name(lp, "StarPU theoretical bound");
			
 
				 	glp_set_obj_dir(lp, GLP_MAX);
			
 
				 	glp_set_obj_name(lp, "total execution time");
			
 
				 
			
 
				 	{
			
 
				-		int ne = 4 * ns * nw /* worker execution time */
			
 
				+		int ne = 5 * ns * nw /* worker execution time */
			
 
				 			+ 1; /* glp dumbness */
			
 
				 		int n = 1;
			
 
				 		int ia[ne], ja[ne];
			
@@ -176,7 +182,13 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops
 
				 
			
 
				 				snprintf(name, sizeof(name), "w%ds%dn", w, s);
			
 
				 				glp_set_col_name(lp, nw*ns+colnum(w,s), name);
			
 
				-				glp_set_col_bnds(lp, nw*ns+colnum(w,s), GLP_DB, 0.0, 1.0);
			
 
				+				if (integer)
			
 
				+				{
			
 
				+                                        glp_set_col_kind(lp, nw*ns+colnum(w, s), GLP_IV);
			
 
				+					glp_set_col_bnds(lp, nw*ns+colnum(w,s), GLP_DB, 0, 1);
			
 
				+				}
			
 
				+				else
			
 
				+					glp_set_col_bnds(lp, nw*ns+colnum(w,s), GLP_DB, 0.0, 1.0);
			
 
				 
			
 
				 			}
			
 
				 
			
@@ -248,9 +260,33 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops
 
				 				ar[n] = 1;
			
 
				 				n++;
			
 
				 			}
			
 
				+			if(integer)				
			
 
				+				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1);
			
 
				+			else
			
 
				+				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
			
 
				+		}
			
 
				+
			
 
				+		curr_row_idx += nw;
			
 
				+
			
 
				+		/* sum(nflops[s][w]) > 0*/
			
 
				+		glp_add_rows(lp, nw);
			
 
				+		for (w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			char name[32], title[64];
			
 
				+			starpu_worker_get_name(w, name, sizeof(name));
			
 
				+			snprintf(title, sizeof(title), "flopsw%x", w);
			
 
				+			glp_set_row_name(lp, curr_row_idx+w+1, title);
			
 
				+			for(s = 0; s < ns; s++)
			
 
				+			{
			
 
				+				ia[n] = curr_row_idx+w+1;
			
 
				+				ja[n] = colnum(w,s);
			
 
				+				ar[n] = 1;
			
 
				+				n++;
			
 
				+			}
			
 
				 
			
 
				-			glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
			
 
				+			glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_LO, 0.1, 0.);
			
 
				 		}
			
 
				+
			
 
				 		if(n != ne)
			
 
				 			printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne);
			
 
				 		STARPU_ASSERT(n == ne);
			
@@ -269,6 +305,14 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops
 
				 		return 0.0;
			
 
				 	}
			
 
				 
			
 
				+        if (integer)
			
 
				+        {
			
 
				+                glp_iocp iocp;
			
 
				+                glp_init_iocp(&iocp);
			
 
				+                iocp.msg_lev = GLP_MSG_OFF;
			
 
				+                glp_intopt(lp, &iocp);
			
 
				+        }
			
 
				+
			
 
				 	int stat = glp_get_prim_stat(lp);
			
 
				 	/* if we don't have a solution return */
			
 
				 	if(stat == GLP_NOFEAS)
			
@@ -284,7 +328,10 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops
 
				 		for(w = 0; w < nw; w++)
			
 
				 		{
			
 
				 			flops_on_w[s][w] = glp_get_col_prim(lp, colnum(w, s));
			
 
				-			w_in_s[s][w] = glp_get_col_prim(lp, nw*ns+colnum(w,s));
			
 
				+			if (integer)
			
 
				+				w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*ns+colnum(w, s));
			
 
				+			else
			
 
				+				w_in_s[s][w] = glp_get_col_prim(lp, nw*ns+colnum(w,s));
			
 
				 //			printf("w_in_s[s%d][w%d] = %lf flops[s%d][w%d] = %lf \n", s, w, w_in_s[s][w], s, w, flops_on_w[s][w]);
			
 
				 		}
			
 
				 
			
@@ -344,7 +391,7 @@ static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker)
 
				 						else
			
 
				 						{
			
 
				 							nworkers[s][1] += w_in_s[s][w];
			
 
				-							if(w_in_s[s][w] > 0.3)
			
 
				+							if(w_in_s[s][w] > 0.5)
			
 
				 								nworkers_rounded[s][1]++;
			
 
				 						}
			
 
				 					}
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_policy.c
@@ -44,13 +44,13 @@ static unsigned _get_slowest_sched_ctx(void)
 
				 	int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
			
 
				 
			
 
				 	double smallest_velocity = _get_ctx_velocity(sched_ctx_hypervisor_get_wrapper(sched_ctxs[0]));
			
 
				-	unsigned slowest_sched_ctx = smallest_velocity == 0.0  ? STARPU_NMAX_SCHED_CTXS : sched_ctxs[0];
			
 
				+	unsigned slowest_sched_ctx = smallest_velocity == -1.0  ? STARPU_NMAX_SCHED_CTXS : sched_ctxs[0];
			
 
				 	double curr_velocity = 0.0;
			
 
				 	int i;
			
 
				 	for(i = 1; i < nsched_ctxs; i++)
			
 
				 	{
			
 
				 		curr_velocity = _get_ctx_velocity(sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]));
			
 
				-		if((curr_velocity < smallest_velocity || smallest_velocity == 0.0) && curr_velocity != 0.0)
			
 
				+		if((curr_velocity < smallest_velocity || smallest_velocity == 0.0) && curr_velocity != -1.0)
			
 
				 		{
			
 
				 			smallest_velocity = curr_velocity;
			
 
				 			slowest_sched_ctx = sched_ctxs[i];
			
@@ -166,7 +166,7 @@ static void ispeed_handle_poped_task(unsigned sched_ctx, int worker)
 
				 						double slowest_speed = _get_ctx_velocity(sched_ctx_hypervisor_get_wrapper(slowest_sched_ctx));
			
 
				 //						printf("fast_speed(%d) %lf slow_speed(%d) %lf new speed(%d) %lf \n", fastest_sched_ctx, fastest_speed, slowest_sched_ctx, 
			
 
				 //						       slowest_speed, workers_to_move[0], new_speed);
			
 
				-						if((slowest_speed + new_speed) <= (fastest_speed - new_speed))
			
 
				+						if(fastest_speed != -1.0 && slowest_speed != -1.0 && (slowest_speed + new_speed) <= (fastest_speed - new_speed))
			
 
				 						{
			
 
				 							sched_ctx_hypervisor_move_workers(fastest_sched_ctx, slowest_sched_ctx, workers_to_move, nworkers_to_move, 0);
			
 
				 						}
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c
@@ -272,18 +272,18 @@ void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw],
 
				 	int s, s2, w;
			
 
				 	for(s = 0; s < ns; s++)
			
 
				 	{
			
 
				+		int workers_move[STARPU_NMAXWORKERS];
			
 
				+		int nw_move = 0;
			
 
				+		
			
 
				+		int workers_add[STARPU_NMAXWORKERS];
			
 
				+		int nw_add = 0;
			
 
				+
			
 
				 		for(w = 0; w < nw; w++)
			
 
				 		{
			
 
				 			enum starpu_archtype arch;
			
 
				 			if(w == 0) arch = STARPU_CUDA_WORKER;
			
 
				 			if(w == 1) arch = STARPU_CPU_WORKER;
			
 
				 
			
 
				-			int workers_move[STARPU_NMAXWORKERS];
			
 
				-			int nw_move = 0;
			
 
				-
			
 
				-			int workers_add[STARPU_NMAXWORKERS];
			
 
				-			int nw_add = 0;
			
 
				-
			
 
				 			if(w == 1)
			
 
				 			{
			
 
				 				int nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctxs[s], arch);
			
@@ -339,30 +339,35 @@ void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw],
 
				 					}
			
 
				 				}
			
 
				 			}
			
 
				+		}
			
 
				 
			
 
				-			for(s2 = 0; s2 < ns; s2++)
			
 
				+		for(s2 = 0; s2 < ns; s2++)
			
 
				+		{
			
 
				+			if(sched_ctxs[s2] != sched_ctxs[s])
			
 
				 			{
			
 
				-				if(sched_ctxs[s2] != sched_ctxs[s])
			
 
				+				double nworkers_ctx2 = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctxs[s2], STARPU_ANY_WORKER) * 1.0;
			
 
				+				int total_res = 0;
			
 
				+				for(w = 0; w < nw; w++)
			
 
				+					total_res += res[s2][w];
			
 
				+//				if(( total_res - nworkers_ctx2) >= 0.0 && nw_move > 0)
			
 
				+				if(nw_move > 0)
			
 
				 				{
			
 
				-					double nworkers_ctx2 = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctxs[s2], arch) * 1.0;
			
 
				-					if((res[s2][w] - nworkers_ctx2) >= 0.0 && nw_move > 0)
			
 
				-					{
			
 
				-						sched_ctx_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0);
			
 
				-						nw_move = 0;
			
 
				-						break;
			
 
				-					}
			
 
				-					if((res[s2][w] - nworkers_ctx2) >= 0.0 &&  (res[s2][w] - nworkers_ctx2) <= (double)nw_add && nw_add > 0)
			
 
				-					{
			
 
				-						sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
			
 
				-						nw_add = 0;
			
 
				-						break;
			
 
				-					}
			
 
				-
			
 
				+					sched_ctx_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0);
			
 
				+					nw_move = 0;
			
 
				+//					break;
			
 
				+				}
			
 
				+//				if((total_res - nworkers_ctx2) >= 0.0 &&  (total_res - nworkers_ctx2) <= (double)nw_add && nw_add > 0)
			
 
				+				if(nw_add > 0)
			
 
				+				{
			
 
				+					sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
			
 
				+					nw_add = 0;
			
 
				+//					break;
			
 
				 				}
			
 
				+				
			
 
				 			}
			
 
				-			if(nw_move > 0)
			
 
				-				sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
			
 
				 		}
			
 
				+		if(nw_move > 0)
			
 
				+			sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
@@ -345,22 +345,45 @@ static double _get_ispeed_sample_for_type_of_worker(struct sched_ctx_hypervisor_
 
				 	return 0.0;
			
 
				 }
			
 
				 
			
 
				+static double _get_ispeed_sample_for_sched_ctx(unsigned sched_ctx)
			
 
				+{
			
 
				+	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
			
 
				+	struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sched_ctx);
			
 
				+        
			
 
				+	int worker;
			
 
				+	double ispeed_sample = 0.0;
			
 
				+	struct starpu_iterator it;
			
 
				+
			
 
				+	if(workers->init_iterator)
			
 
				+                workers->init_iterator(workers, &it);
			
 
				+
			
 
				+        while(workers->has_next(workers, &it))
			
 
				+	{
			
 
				+                worker = workers->get_next(workers, &it);
			
 
				+	        ispeed_sample += config->ispeed_w_sample[worker];
			
 
				+        }
			
 
				+
			
 
				+	return ispeed_sample;
			
 
				+}
			
 
				+
			
 
				 double _get_ctx_velocity(struct sched_ctx_hypervisor_wrapper* sc_w)
			
 
				 {
			
 
				 	struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
			
 
				         double elapsed_flops = sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
			
 
				-	double total_elapsed_flops = sched_ctx_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w);
			
 
				-	double prc = config->ispeed_ctx_sample != 0.0 ? elapsed_flops : elapsed_flops/sc_w->total_flops;
			
 
				-	double redim_sample = config->ispeed_ctx_sample != 0.0 ? config->ispeed_ctx_sample : 
			
 
				-		(elapsed_flops == total_elapsed_flops ? HYPERVISOR_START_REDIM_SAMPLE : HYPERVISOR_REDIM_SAMPLE);
			
 
				+	double sample = _get_ispeed_sample_for_sched_ctx(sc_w->sched_ctx);
			
 
				+
			
 
				+/* 	double total_elapsed_flops = sched_ctx_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w); */
			
 
				+/* 	double prc = config->ispeed_ctx_sample != 0.0 ? elapsed_flops : elapsed_flops/sc_w->total_flops; */
			
 
				+/* 	double redim_sample = config->ispeed_ctx_sample != 0.0 ? config->ispeed_ctx_sample :  */
			
 
				+/* 		(elapsed_flops == total_elapsed_flops ? HYPERVISOR_START_REDIM_SAMPLE : HYPERVISOR_REDIM_SAMPLE); */
			
 
				 //	printf("%d: prc %lf sample %lf\n", sc_w->sched_ctx, prc, redim_sample);
			
 
				-	if(prc >= redim_sample)
			
 
				+	if(elapsed_flops >= sample)
			
 
				         {
			
 
				                 double curr_time = starpu_timing_now();
			
 
				-                double elapsed_time = (curr_time - sc_w->start_time) / 1000000; /* in seconds */
			
 
				-                return (elapsed_flops/1000000000)/elapsed_time;/* in Gflops/s */
			
 
				+                double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
			
 
				+                return (elapsed_flops/1000000000.0)/elapsed_time;/* in Gflops/s */
			
 
				         }
			
 
				-	return 0.0;
			
 
				+	return -1.0;
			
 
				 }
			
 
				 
			
 
				 double _get_slowest_ctx_exec_time(void)
			
@@ -377,44 +400,80 @@ double _get_slowest_ctx_exec_time(void)
 
				 	{
			
 
				 		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]);
			
 
				 
			
 
				-                double elapsed_time = curr_time - sc_w->start_time;
			
 
				+/*                 double elapsed_time = curr_time - sc_w->start_time; */
			
 
				+/* 		if(elapsed_time > slowest_time) */
			
 
				+/* 			slowest_time = elapsed_time; */
			
 
				+
			
 
				+//		double elapsed_flops = sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
			
 
				+		struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
			
 
				+                double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/_get_ctx_velocity(sc_w);
			
 
				 		if(elapsed_time > slowest_time)
			
 
				 			slowest_time = elapsed_time;
			
 
				+
			
 
				         }
			
 
				+//	return slowest_time / 1000000.0;
			
 
				 	return slowest_time;
			
 
				 }
			
 
				 
			
 
				+double _get_fastest_ctx_exec_time(void)
			
 
				+{
			
 
				+	int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
			
 
				+	int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
			
 
				+
			
 
				+	double curr_time = starpu_timing_now();
			
 
				+	double fastest_time = curr_time;
			
 
				+
			
 
				+	int s;
			
 
				+	struct sched_ctx_hypervisor_wrapper* sc_w;		
			
 
				+	for(s = 0; s < nsched_ctxs; s++)
			
 
				+	{
			
 
				+		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]);
			
 
				+
			
 
				+		struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
			
 
				+                double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/_get_ctx_velocity(sc_w);
			
 
				+
			
 
				+		if(elapsed_time < fastest_time)
			
 
				+			fastest_time = elapsed_time;
			
 
				+
			
 
				+        }
			
 
				+//	return fastest_time / 1000000.0;
			
 
				+	return fastest_time;
			
 
				+}
			
 
				+
			
 
				+
			
 
				 double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsigned worker)
			
 
				 {
			
 
				 	if(!starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx))
			
 
				 		return -1.0;
			
 
				 
			
 
				-        double elapsed_flops = sc_w->elapsed_flops[worker] / 1000000000; /*in gflops */
			
 
				+        double elapsed_flops = sc_w->elapsed_flops[worker] / 1000000000.0; /*in gflops */
			
 
				 	struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
			
 
				-	double sample = config->ispeed_w_sample[worker] / 1000000000; /*in gflops */
			
 
				+	double sample = config->ispeed_w_sample[worker] / 1000000000.0; /*in gflops */
			
 
				 
			
 
				 	double ctx_elapsed_flops = sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
			
 
				 	double ctx_sample = config->ispeed_ctx_sample;
			
 
				 	if(ctx_elapsed_flops > ctx_sample && elapsed_flops == 0.0)
			
 
				 		return 0.00000000000001;
			
 
				 
			
 
				-        if( elapsed_flops >= sample)
			
 
				-        {
			
 
				-                double curr_time = starpu_timing_now();
			
 
				-                double elapsed_time = (curr_time - sc_w->start_time) / 1000000; /* in seconds */
			
 
				-                return (elapsed_flops/elapsed_time); /* in Gflops/s */
			
 
				-        }
			
 
				-
			
 
				-        return -1.0;
			
 
				-
			
 
				-/*         if( elapsed_flops != 0.0) */
			
 
				+/*         if( elapsed_flops >= sample) */
			
 
				 /*         { */
			
 
				 /*                 double curr_time = starpu_timing_now(); */
			
 
				-/*                 double elapsed_time = curr_time - sc_w->start_time; */
			
 
				-/*                 return (elapsed_flops/elapsed_time); */
			
 
				+/*                 double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /\* in seconds *\/ */
			
 
				+/* 		sc_w->ref_velocity[worker] = (elapsed_flops/elapsed_time); /\* in Gflops/s *\/ */
			
 
				+/*                 return sc_w->ref_velocity[worker]; */
			
 
				 /*         } */
			
 
				 
			
 
				-/*         return 0.00000000000001; */
			
 
				+/*         return -1.0; */
			
 
				+
			
 
				+        if( elapsed_flops != 0.0)
			
 
				+        {
			
 
				+                double curr_time = starpu_timing_now();
			
 
				+                double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
			
 
				+                sc_w->ref_velocity[worker] = (elapsed_flops/elapsed_time);/* in Gflops/s */
			
 
				+                return sc_w->ref_velocity[worker];
			
 
				+        }
			
 
				+
			
 
				+        return 0.00000000000001;
			
 
				 
			
 
				 
			
 
				 }
			
@@ -423,14 +482,14 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
 
				 double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch)
			
 
				 {
			
 
				         int npus = 0;
			
 
				-        double elapsed_flops = _get_elapsed_flops(sc_w, &npus, arch) / 1000000000 ; /* in gflops */
			
 
				+        double elapsed_flops = _get_elapsed_flops(sc_w, &npus, arch) / 1000000000.0 ; /* in gflops */
			
 
				 	double avg_elapsed_flops = elapsed_flops / npus;
			
 
				-	double sample = _get_ispeed_sample_for_type_of_worker(sc_w, arch) / 1000000000;
			
 
				+	double sample = _get_ispeed_sample_for_type_of_worker(sc_w, arch) / 1000000000.0;
			
 
				 
			
 
				         if( avg_elapsed_flops >= sample)
			
 
				         {
			
 
				                 double curr_time = starpu_timing_now();
			
 
				-                double elapsed_time = (curr_time - sc_w->start_time) / 1000000; /* in seconds */
			
 
				+                double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
			
 
				                 return avg_elapsed_flops/elapsed_time; /* in Gflops/s */
			
 
				         }
			
 
				 
			
@@ -451,7 +510,7 @@ int _velocity_gap_btw_ctxs()
 
				 	{
			
 
				 		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				 		double ctx_v = _get_ctx_velocity(sc_w);
			
 
				-		if(ctx_v != 0.0)
			
 
				+		if(ctx_v != -1.0)
			
 
				 		{
			
 
				 			for(j = 0; j < nsched_ctxs; j++)
			
 
				 			{
			
@@ -463,7 +522,7 @@ int _velocity_gap_btw_ctxs()
 
				 
			
 
				 					other_sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[j]);
			
 
				 					double other_ctx_v = _get_ctx_velocity(other_sc_w);
			
 
				-					if(other_ctx_v != 0.0)
			
 
				+					if(other_ctx_v != -1.0)
			
 
				 					{
			
 
				 						double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v ;
			
 
				 						if(gap > 1.5)
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h
@@ -52,6 +52,8 @@ double _get_ctx_velocity(struct sched_ctx_hypervisor_wrapper* sc_w);
 
				 
			
 
				 double _get_slowest_ctx_exec_time(void);
			
 
				 
			
 
				+double _get_fastest_ctx_exec_time(void);
			
 
				+
			
 
				 double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsigned worker); 
			
 
				 
			
 
				 double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch);
			
--- a/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
+++ b/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
@@ -158,6 +158,7 @@ struct starpu_performance_counters* sched_ctx_hypervisor_init(struct sched_ctx_h
 
				 			hypervisor.sched_ctx_w[i].elapsed_flops[j] = 0.0;
			
 
				 			hypervisor.sched_ctx_w[i].total_elapsed_flops[j] = 0.0;
			
 
				 			hypervisor.sched_ctx_w[i].worker_to_be_removed[j] = 0;
			
 
				+			hypervisor.sched_ctx_w[i].ref_velocity[j] = -1.0;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -610,7 +611,7 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 
				 				_set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
			
 
				 
			
 
				 				hypervisor.resize[sender_sched_ctx] = 1;
			
 
				-//				hypervisor.resize[receiver_sched_ctx] = 1;
			
 
				+				//	hypervisor.resize[receiver_sched_ctx] = 1;
			
 
				 				/* if the user allowed resizing leave the decisions to the application */
			
 
				 				if(imposed_resize)  imposed_resize = 0;
			
 
				 
			
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -190,9 +190,12 @@ static void _starpu_remove_workers_from_sched_ctx(struct _starpu_sched_ctx *sche
 
				 	{
			
 
				 		if(workers->nworkers > 0)
			
 
				 		{
			
 
				-			int worker = workers->remove(workers, workerids[i]);
			
 
				-			if(worker >= 0)
			
 
				-				removed_workers[(*n_removed_workers)++] = worker;
			
 
				+			if(_starpu_worker_belongs_to_a_sched_ctx(workerids[i], sched_ctx->id))
			
 
				+			{
			
 
				+				int worker = workers->remove(workers, workerids[i]);
			
 
				+				if(worker >= 0)
			
 
				+					removed_workers[(*n_removed_workers)++] = worker;
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -924,6 +927,21 @@ unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id)
			
 
				+{
			
 
				+	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
			
 
				+	int i;
			
 
				+	struct _starpu_sched_ctx *sched_ctx = NULL;
			
 
				+	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
 
				+	{
			
 
				+		 sched_ctx = &config->sched_ctxs[i];
			
 
				+		 if(sched_ctx && sched_ctx->id != STARPU_NMAX_SCHED_CTXS && sched_ctx->id != sched_ctx_id)
			
 
				+			 if(starpu_sched_ctx_contains_worker(workerid, sched_ctx->id))
			
 
				+				 return 1;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+		 
			
 
				 unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid)
			
 
				 {
			
 
				 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
			
--- a/src/core/sched_ctx.h
+++ b/src/core/sched_ctx.h
@@ -135,6 +135,9 @@ int starpu_get_workers_of_sched_ctx(unsigned sched_ctx_id, int *pus, enum starpu
 
				    it should stop poping from it */
			
 
				 void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker);
			
 
				 
			
 
				+/* Check if the worker belongs to another sched_ctx */
			
 
				+unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id);
			
 
				+
			
 
				 #if defined(_MSC_VER) || defined(STARPU_SIMGRID)
			
 
				 _starpu_pthread_mutex_t* starpu_get_changing_ctx_mutex(unsigned sched_ctx_id);
			
 
				 #endif