Browse Source

fix ispeed policy

Andra Hugo 12 years ago
parent
commit
f54a30a655

+ 1 - 0
sched_ctx_hypervisor/include/sched_ctx_hypervisor.h

@@ -108,6 +108,7 @@ struct sched_ctx_hypervisor_wrapper
 	double total_flops;
 	double total_elapsed_flops[STARPU_NMAXWORKERS];
 	double elapsed_flops[STARPU_NMAXWORKERS];
+	double ref_velocity[STARPU_NMAXWORKERS];
 	double submitted_flops;
 	double remaining_flops;
 	double start_time;

+ 59 - 12
sched_ctx_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c

@@ -18,7 +18,7 @@
 #include "lp_tools.h"
 #include <math.h>
 
-static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers);
+static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers, unsigned integer);
 static double _find_tmax(double t1, double t2);
 
 
@@ -32,6 +32,7 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 	int *sched_ctxs = in_sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : in_sched_ctxs;
 	
 	int w,s;
+	struct sched_ctx_hypervisor_wrapper* sc_w = NULL;
 
 	for(s = 0; s < ns; s++)
 	{
@@ -43,13 +44,16 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 			draft_flops_on_w[s][w] = 0.0;
 			int worker = workers == NULL ? w : workers[w];
 
-			velocity[s][w] = _get_velocity_per_worker(sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]), worker);
+			sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]);
+			velocity[s][w] = _get_velocity_per_worker(sc_w, worker);
 			if(velocity[s][w] == -1.0)
 			{
 				enum starpu_archtype arch = starpu_worker_get_type(worker);
-				velocity[s][w] = _get_velocity_per_worker_type(sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]), arch);
+				velocity[s][w] = _get_velocity_per_worker_type(sc_w, arch);
 				if(velocity[s][w] == -1.0)
-					velocity[s][w] = arch == STARPU_CPU_WORKER ? 5.0 : 50.0;
+					velocity[s][w] = sc_w->ref_velocity[worker];
+				if(velocity[s][w] == -1.0)
+					velocity[s][w] = arch == STARPU_CPU_WORKER ? 5.0 : 150.0;
 			}
 			
 //			printf("v[w%d][s%d] = %lf\n",w, s, velocity[s][w]);
@@ -63,7 +67,8 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 	   as starting point and then try to minimize it
 	   as increasing it a little for the faster ctxs */
 	double tmax = _get_slowest_ctx_exec_time();
-	double smallest_tmax = tmax - 0.5*tmax;
+	double smallest_tmax = _get_fastest_ctx_exec_time(); //tmax - 0.5*tmax;
+//	printf("tmax %lf smallest %lf\n", tmax, smallest_tmax);
 
 	double res = 1.0;
 	unsigned has_sol = 0;
@@ -83,7 +88,7 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 		/* find solution and save the values in draft tables
 		   only if there is a solution for the system we save them
 		   in the proper table */
-		res = _glp_resolve(ns, nw, velocity, flops, tmax, draft_flops_on_w, draft_w_in_s, workers);
+		res = _glp_resolve(ns, nw, velocity, flops, tmax, draft_flops_on_w, draft_w_in_s, workers, 1);
 		if(res != 0.0)
 		{
 			for(s = 0; s < ns; s++)
@@ -140,18 +145,19 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
  */
 #ifdef STARPU_HAVE_GLPK_H
 #include <glpk.h>
-static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers)
+static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers, unsigned integer)
 {
 	int w, s;
 	glp_prob *lp;
 
+//	printf("try with tmax %lf\n", tmax);
 	lp = glp_create_prob();
 	glp_set_prob_name(lp, "StarPU theoretical bound");
 	glp_set_obj_dir(lp, GLP_MAX);
 	glp_set_obj_name(lp, "total execution time");
 
 	{
-		int ne = 4 * ns * nw /* worker execution time */
+		int ne = 5 * ns * nw /* worker execution time */
 			+ 1; /* glp dumbness */
 		int n = 1;
 		int ia[ne], ja[ne];
@@ -176,7 +182,13 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops
 
 				snprintf(name, sizeof(name), "w%ds%dn", w, s);
 				glp_set_col_name(lp, nw*ns+colnum(w,s), name);
-				glp_set_col_bnds(lp, nw*ns+colnum(w,s), GLP_DB, 0.0, 1.0);
+				if (integer)
+				{
+                                        glp_set_col_kind(lp, nw*ns+colnum(w, s), GLP_IV);
+					glp_set_col_bnds(lp, nw*ns+colnum(w,s), GLP_DB, 0, 1);
+				}
+				else
+					glp_set_col_bnds(lp, nw*ns+colnum(w,s), GLP_DB, 0.0, 1.0);
 
 			}
 
@@ -248,9 +260,33 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops
 				ar[n] = 1;
 				n++;
 			}
+			if(integer)				
+				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1);
+			else
+				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
+		}
+
+		curr_row_idx += nw;
+
+		/* sum(nflops[s][w]) > 0*/
+		glp_add_rows(lp, nw);
+		for (w = 0; w < nw; w++)
+		{
+			char name[32], title[64];
+			starpu_worker_get_name(w, name, sizeof(name));
+			snprintf(title, sizeof(title), "flopsw%x", w);
+			glp_set_row_name(lp, curr_row_idx+w+1, title);
+			for(s = 0; s < ns; s++)
+			{
+				ia[n] = curr_row_idx+w+1;
+				ja[n] = colnum(w,s);
+				ar[n] = 1;
+				n++;
+			}
 
-			glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
+			glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_LO, 0.1, 0.);
 		}
+
 		if(n != ne)
 			printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne);
 		STARPU_ASSERT(n == ne);
@@ -269,6 +305,14 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops
 		return 0.0;
 	}
 
+        if (integer)
+        {
+                glp_iocp iocp;
+                glp_init_iocp(&iocp);
+                iocp.msg_lev = GLP_MSG_OFF;
+                glp_intopt(lp, &iocp);
+        }
+
 	int stat = glp_get_prim_stat(lp);
 	/* if we don't have a solution return */
 	if(stat == GLP_NOFEAS)
@@ -284,7 +328,10 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops
 		for(w = 0; w < nw; w++)
 		{
 			flops_on_w[s][w] = glp_get_col_prim(lp, colnum(w, s));
-			w_in_s[s][w] = glp_get_col_prim(lp, nw*ns+colnum(w,s));
+			if (integer)
+				w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*ns+colnum(w, s));
+			else
+				w_in_s[s][w] = glp_get_col_prim(lp, nw*ns+colnum(w,s));
 //			printf("w_in_s[s%d][w%d] = %lf flops[s%d][w%d] = %lf \n", s, w, w_in_s[s][w], s, w, flops_on_w[s][w]);
 		}
 
@@ -344,7 +391,7 @@ static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker)
 						else
 						{
 							nworkers[s][1] += w_in_s[s][w];
-							if(w_in_s[s][w] > 0.3)
+							if(w_in_s[s][w] > 0.5)
 								nworkers_rounded[s][1]++;
 						}
 					}

+ 3 - 3
sched_ctx_hypervisor/src/hypervisor_policies/ispeed_policy.c

@@ -44,13 +44,13 @@ static unsigned _get_slowest_sched_ctx(void)
 	int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
 
 	double smallest_velocity = _get_ctx_velocity(sched_ctx_hypervisor_get_wrapper(sched_ctxs[0]));
-	unsigned slowest_sched_ctx = smallest_velocity == 0.0  ? STARPU_NMAX_SCHED_CTXS : sched_ctxs[0];
+	unsigned slowest_sched_ctx = smallest_velocity == -1.0  ? STARPU_NMAX_SCHED_CTXS : sched_ctxs[0];
 	double curr_velocity = 0.0;
 	int i;
 	for(i = 1; i < nsched_ctxs; i++)
 	{
 		curr_velocity = _get_ctx_velocity(sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]));
-		if((curr_velocity < smallest_velocity || smallest_velocity == 0.0) && curr_velocity != 0.0)
+		if((curr_velocity < smallest_velocity || smallest_velocity == 0.0) && curr_velocity != -1.0)
 		{
 			smallest_velocity = curr_velocity;
 			slowest_sched_ctx = sched_ctxs[i];
@@ -166,7 +166,7 @@ static void ispeed_handle_poped_task(unsigned sched_ctx, int worker)
 						double slowest_speed = _get_ctx_velocity(sched_ctx_hypervisor_get_wrapper(slowest_sched_ctx));
 //						printf("fast_speed(%d) %lf slow_speed(%d) %lf new speed(%d) %lf \n", fastest_sched_ctx, fastest_speed, slowest_sched_ctx, 
 //						       slowest_speed, workers_to_move[0], new_speed);
-						if((slowest_speed + new_speed) <= (fastest_speed - new_speed))
+						if(fastest_speed != -1.0 && slowest_speed != -1.0 && (slowest_speed + new_speed) <= (fastest_speed - new_speed))
 						{
 							sched_ctx_hypervisor_move_workers(fastest_sched_ctx, slowest_sched_ctx, workers_to_move, nworkers_to_move, 0);
 						}

+ 29 - 24
sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c

@@ -272,18 +272,18 @@ void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw],
 	int s, s2, w;
 	for(s = 0; s < ns; s++)
 	{
+		int workers_move[STARPU_NMAXWORKERS];
+		int nw_move = 0;
+		
+		int workers_add[STARPU_NMAXWORKERS];
+		int nw_add = 0;
+
 		for(w = 0; w < nw; w++)
 		{
 			enum starpu_archtype arch;
 			if(w == 0) arch = STARPU_CUDA_WORKER;
 			if(w == 1) arch = STARPU_CPU_WORKER;
 
-			int workers_move[STARPU_NMAXWORKERS];
-			int nw_move = 0;
-
-			int workers_add[STARPU_NMAXWORKERS];
-			int nw_add = 0;
-
 			if(w == 1)
 			{
 				int nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctxs[s], arch);
@@ -339,30 +339,35 @@ void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw],
 					}
 				}
 			}
+		}
 
-			for(s2 = 0; s2 < ns; s2++)
+		for(s2 = 0; s2 < ns; s2++)
+		{
+			if(sched_ctxs[s2] != sched_ctxs[s])
 			{
-				if(sched_ctxs[s2] != sched_ctxs[s])
+				double nworkers_ctx2 = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctxs[s2], STARPU_ANY_WORKER) * 1.0;
+				int total_res = 0;
+				for(w = 0; w < nw; w++)
+					total_res += res[s2][w];
+//				if(( total_res - nworkers_ctx2) >= 0.0 && nw_move > 0)
+				if(nw_move > 0)
 				{
-					double nworkers_ctx2 = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctxs[s2], arch) * 1.0;
-					if((res[s2][w] - nworkers_ctx2) >= 0.0 && nw_move > 0)
-					{
-						sched_ctx_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0);
-						nw_move = 0;
-						break;
-					}
-					if((res[s2][w] - nworkers_ctx2) >= 0.0 &&  (res[s2][w] - nworkers_ctx2) <= (double)nw_add && nw_add > 0)
-					{
-						sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
-						nw_add = 0;
-						break;
-					}
-
+					sched_ctx_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0);
+					nw_move = 0;
+//					break;
+				}
+//				if((total_res - nworkers_ctx2) >= 0.0 &&  (total_res - nworkers_ctx2) <= (double)nw_add && nw_add > 0)
+				if(nw_add > 0)
+				{
+					sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
+					nw_add = 0;
+//					break;
 				}
+				
 			}
-			if(nw_move > 0)
-				sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
 		}
+		if(nw_move > 0)
+			sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
 	}
 }
 

+ 88 - 29
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c

@@ -345,22 +345,45 @@ static double _get_ispeed_sample_for_type_of_worker(struct sched_ctx_hypervisor_
 	return 0.0;
 }
 
+static double _get_ispeed_sample_for_sched_ctx(unsigned sched_ctx)
+{
+	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
+	struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sched_ctx);
+        
+	int worker;
+	double ispeed_sample = 0.0;
+	struct starpu_iterator it;
+
+	if(workers->init_iterator)
+                workers->init_iterator(workers, &it);
+
+        while(workers->has_next(workers, &it))
+	{
+                worker = workers->get_next(workers, &it);
+	        ispeed_sample += config->ispeed_w_sample[worker];
+        }
+
+	return ispeed_sample;
+}
+
 double _get_ctx_velocity(struct sched_ctx_hypervisor_wrapper* sc_w)
 {
 	struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
         double elapsed_flops = sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
-	double total_elapsed_flops = sched_ctx_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w);
-	double prc = config->ispeed_ctx_sample != 0.0 ? elapsed_flops : elapsed_flops/sc_w->total_flops;
-	double redim_sample = config->ispeed_ctx_sample != 0.0 ? config->ispeed_ctx_sample : 
-		(elapsed_flops == total_elapsed_flops ? HYPERVISOR_START_REDIM_SAMPLE : HYPERVISOR_REDIM_SAMPLE);
+	double sample = _get_ispeed_sample_for_sched_ctx(sc_w->sched_ctx);
+
+/* 	double total_elapsed_flops = sched_ctx_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w); */
+/* 	double prc = config->ispeed_ctx_sample != 0.0 ? elapsed_flops : elapsed_flops/sc_w->total_flops; */
+/* 	double redim_sample = config->ispeed_ctx_sample != 0.0 ? config->ispeed_ctx_sample :  */
+/* 		(elapsed_flops == total_elapsed_flops ? HYPERVISOR_START_REDIM_SAMPLE : HYPERVISOR_REDIM_SAMPLE); */
 //	printf("%d: prc %lf sample %lf\n", sc_w->sched_ctx, prc, redim_sample);
-	if(prc >= redim_sample)
+	if(elapsed_flops >= sample)
         {
                 double curr_time = starpu_timing_now();
-                double elapsed_time = (curr_time - sc_w->start_time) / 1000000; /* in seconds */
-                return (elapsed_flops/1000000000)/elapsed_time;/* in Gflops/s */
+                double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
+                return (elapsed_flops/1000000000.0)/elapsed_time;/* in Gflops/s */
         }
-	return 0.0;
+	return -1.0;
 }
 
 double _get_slowest_ctx_exec_time(void)
@@ -377,44 +400,80 @@ double _get_slowest_ctx_exec_time(void)
 	{
 		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]);
 
-                double elapsed_time = curr_time - sc_w->start_time;
+/*                 double elapsed_time = curr_time - sc_w->start_time; */
+/* 		if(elapsed_time > slowest_time) */
+/* 			slowest_time = elapsed_time; */
+
+//		double elapsed_flops = sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
+		struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
+                double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/_get_ctx_velocity(sc_w);
 		if(elapsed_time > slowest_time)
 			slowest_time = elapsed_time;
+
         }
+//	return slowest_time / 1000000.0;
 	return slowest_time;
 }
 
+double _get_fastest_ctx_exec_time(void)
+{
+	int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
+	int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
+
+	double curr_time = starpu_timing_now();
+	double fastest_time = curr_time;
+
+	int s;
+	struct sched_ctx_hypervisor_wrapper* sc_w;		
+	for(s = 0; s < nsched_ctxs; s++)
+	{
+		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]);
+
+		struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
+                double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/_get_ctx_velocity(sc_w);
+
+		if(elapsed_time < fastest_time)
+			fastest_time = elapsed_time;
+
+        }
+//	return fastest_time / 1000000.0;
+	return fastest_time;
+}
+
+
 double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsigned worker)
 {
 	if(!starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx))
 		return -1.0;
 
-        double elapsed_flops = sc_w->elapsed_flops[worker] / 1000000000; /*in gflops */
+        double elapsed_flops = sc_w->elapsed_flops[worker] / 1000000000.0; /*in gflops */
 	struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
-	double sample = config->ispeed_w_sample[worker] / 1000000000; /*in gflops */
+	double sample = config->ispeed_w_sample[worker] / 1000000000.0; /*in gflops */
 
 	double ctx_elapsed_flops = sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
 	double ctx_sample = config->ispeed_ctx_sample;
 	if(ctx_elapsed_flops > ctx_sample && elapsed_flops == 0.0)
 		return 0.00000000000001;
 
-        if( elapsed_flops >= sample)
-        {
-                double curr_time = starpu_timing_now();
-                double elapsed_time = (curr_time - sc_w->start_time) / 1000000; /* in seconds */
-                return (elapsed_flops/elapsed_time); /* in Gflops/s */
-        }
-
-        return -1.0;
-
-/*         if( elapsed_flops != 0.0) */
+/*         if( elapsed_flops >= sample) */
 /*         { */
 /*                 double curr_time = starpu_timing_now(); */
-/*                 double elapsed_time = curr_time - sc_w->start_time; */
-/*                 return (elapsed_flops/elapsed_time); */
+/*                 double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /\* in seconds *\/ */
+/* 		sc_w->ref_velocity[worker] = (elapsed_flops/elapsed_time); /\* in Gflops/s *\/ */
+/*                 return sc_w->ref_velocity[worker]; */
 /*         } */
 
-/*         return 0.00000000000001; */
+/*         return -1.0; */
+
+        if( elapsed_flops != 0.0)
+        {
+                double curr_time = starpu_timing_now();
+                double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
+                sc_w->ref_velocity[worker] = (elapsed_flops/elapsed_time);/* in Gflops/s */
+                return sc_w->ref_velocity[worker];
+        }
+
+        return 0.00000000000001;
 
 
 }
@@ -423,14 +482,14 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
 double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch)
 {
         int npus = 0;
-        double elapsed_flops = _get_elapsed_flops(sc_w, &npus, arch) / 1000000000 ; /* in gflops */
+        double elapsed_flops = _get_elapsed_flops(sc_w, &npus, arch) / 1000000000.0 ; /* in gflops */
 	double avg_elapsed_flops = elapsed_flops / npus;
-	double sample = _get_ispeed_sample_for_type_of_worker(sc_w, arch) / 1000000000;
+	double sample = _get_ispeed_sample_for_type_of_worker(sc_w, arch) / 1000000000.0;
 
         if( avg_elapsed_flops >= sample)
         {
                 double curr_time = starpu_timing_now();
-                double elapsed_time = (curr_time - sc_w->start_time) / 1000000; /* in seconds */
+                double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
                 return avg_elapsed_flops/elapsed_time; /* in Gflops/s */
         }
 
@@ -451,7 +510,7 @@ int _velocity_gap_btw_ctxs()
 	{
 		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
 		double ctx_v = _get_ctx_velocity(sc_w);
-		if(ctx_v != 0.0)
+		if(ctx_v != -1.0)
 		{
 			for(j = 0; j < nsched_ctxs; j++)
 			{
@@ -463,7 +522,7 @@ int _velocity_gap_btw_ctxs()
 
 					other_sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[j]);
 					double other_ctx_v = _get_ctx_velocity(other_sc_w);
-					if(other_ctx_v != 0.0)
+					if(other_ctx_v != -1.0)
 					{
 						double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v ;
 						if(gap > 1.5)

+ 2 - 0
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h

@@ -52,6 +52,8 @@ double _get_ctx_velocity(struct sched_ctx_hypervisor_wrapper* sc_w);
 
 double _get_slowest_ctx_exec_time(void);
 
+double _get_fastest_ctx_exec_time(void);
+
 double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsigned worker); 
 
 double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch);

+ 2 - 1
sched_ctx_hypervisor/src/sched_ctx_hypervisor.c

@@ -158,6 +158,7 @@ struct starpu_performance_counters* sched_ctx_hypervisor_init(struct sched_ctx_h
 			hypervisor.sched_ctx_w[i].elapsed_flops[j] = 0.0;
 			hypervisor.sched_ctx_w[i].total_elapsed_flops[j] = 0.0;
 			hypervisor.sched_ctx_w[i].worker_to_be_removed[j] = 0;
+			hypervisor.sched_ctx_w[i].ref_velocity[j] = -1.0;
 		}
 	}
 
@@ -610,7 +611,7 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 				_set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
 
 				hypervisor.resize[sender_sched_ctx] = 1;
-//				hypervisor.resize[receiver_sched_ctx] = 1;
+				//	hypervisor.resize[receiver_sched_ctx] = 1;
 				/* if the user allowed resizing leave the decisions to the application */
 				if(imposed_resize)  imposed_resize = 0;
 

+ 21 - 3
src/core/sched_ctx.c

@@ -190,9 +190,12 @@ static void _starpu_remove_workers_from_sched_ctx(struct _starpu_sched_ctx *sche
 	{
 		if(workers->nworkers > 0)
 		{
-			int worker = workers->remove(workers, workerids[i]);
-			if(worker >= 0)
-				removed_workers[(*n_removed_workers)++] = worker;
+			if(_starpu_worker_belongs_to_a_sched_ctx(workerids[i], sched_ctx->id))
+			{
+				int worker = workers->remove(workers, workerids[i]);
+				if(worker >= 0)
+					removed_workers[(*n_removed_workers)++] = worker;
+			}
 		}
 	}
 
@@ -924,6 +927,21 @@ unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id)
 	return 0;
 }
 
+unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id)
+{
+	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
+	int i;
+	struct _starpu_sched_ctx *sched_ctx = NULL;
+	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+	{
+		 sched_ctx = &config->sched_ctxs[i];
+		 if(sched_ctx && sched_ctx->id != STARPU_NMAX_SCHED_CTXS && sched_ctx->id != sched_ctx_id)
+			 if(starpu_sched_ctx_contains_worker(workerid, sched_ctx->id))
+				 return 1;
+	}
+	return 0;
+}
+		 
 unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid)
 {
 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);

+ 3 - 0
src/core/sched_ctx.h

@@ -135,6 +135,9 @@ int starpu_get_workers_of_sched_ctx(unsigned sched_ctx_id, int *pus, enum starpu
    it should stop poping from it */
 void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker);
 
+/* Check if the worker belongs to another sched_ctx */
+unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id);
+
 #if defined(_MSC_VER) || defined(STARPU_SIMGRID)
 _starpu_pthread_mutex_t* starpu_get_changing_ctx_mutex(unsigned sched_ctx_id);
 #endif