Kaynağa Gözat

added new resizing policy

Andra Hugo 12 yıl önce
ebeveyn
işleme
8f40b19fec

+ 2 - 1
sched_ctx_hypervisor/src/Makefile.am

@@ -33,7 +33,8 @@ libsched_ctx_hypervisor_la_SOURCES = 			\
 	hypervisor_policies/gflops_rate_policy.c	\
 	hypervisor_policies/lp_policy.c			\
 	hypervisor_policies/lp2_policy.c		\
-	hypervisor_policies/ispeed_policy.c
+	hypervisor_policies/ispeed_policy.c		\
+	hypervisor_policies/ispeed_lp_policy.c
 
 noinst_HEADERS = sched_ctx_hypervisor_intern.h		\
 	hypervisor_policies/policy_tools.h		\

+ 376 - 0
sched_ctx_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c

@@ -0,0 +1,376 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011, 2012  INRIA
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_config.h>
+#include "lp_tools.h"
+#include <math.h>
+
+static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers);
+static double _find_tmax(double t1, double t2);
+
+
+static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_in_s[ns][nw], double flops_on_w[ns][nw], int *in_sched_ctxs, int *workers)
+{
+	double draft_w_in_s[ns][nw];
+	double draft_flops_on_w[ns][nw];
+	double flops[ns];
+	double velocity[ns][nw];
+
+	int *sched_ctxs = in_sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : in_sched_ctxs;
+	
+	int w,s;
+
+	for(s = 0; s < ns; s++)
+	{
+		for(w = 0; w < nw; w++)
+		{
+			w_in_s[s][w] = 0.0;
+			draft_w_in_s[s][w] = 0.0;
+			flops_on_w[s][w] = 0.0;
+			draft_flops_on_w[s][w] = 0.0;
+			int worker = workers == NULL ? w : workers[w];
+
+			velocity[s][w] = _get_velocity_per_worker(sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]), worker);
+			if(velocity[s][w] == -1.0)
+			{
+				enum starpu_archtype arch = starpu_worker_get_type(worker);
+				velocity[s][w] = _get_velocity_per_worker_type(sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]), arch);
+				if(velocity[s][w] == -1.0)
+					velocity[s][w] = arch == STARPU_CPU_WORKER ? 1 / 5.0 : 1 / 50.0;
+			}
+			
+		}
+		struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sched_ctxs[s]);
+		flops[s] = config->ispeed_ctx_sample;
+	}
+
+
+	/* take the exec time of the slowest ctx 
+	   as starting point and then try to minimize it
+	   as increasing it a little for the faster ctxs */
+	double tmax = _get_slowest_ctx_exec_time();
+	double smallest_tmax = tmax - 0.5*tmax;
+
+	double res = 1.0;
+	unsigned has_sol = 0;
+	double tmin = 0.0;
+	double old_tmax = 0.0;
+	unsigned found_sol = 0;
+
+	struct timeval start_time;
+	struct timeval end_time;
+	int nd = 0;
+	gettimeofday(&start_time, NULL);
+
+	/* we fix tmax and we do not treat it as an unknown
+	   we just vary by dichotomy its values*/
+	while(tmax > 1.0)
+	{
+		/* find solution and save the values in draft tables
+		   only if there is a solution for the system we save them
+		   in the proper table */
+		res = _glp_resolve(ns, nw, velocity, flops, tmax, draft_flops_on_w, draft_w_in_s, workers);
+		if(res != 0.0)
+		{
+			for(s = 0; s < ns; s++)
+				for(w = 0; w < nw; w++)
+				{
+					w_in_s[s][w] = draft_w_in_s[s][w];
+					flops_on_w[s][w] = draft_flops_on_w[s][w];
+				}
+			has_sol = 1;
+			found_sol = 1;
+		}
+		else
+			has_sol = 0;
+
+		/* if we have a solution with this tmax try a smaller value
+		   bigger than the old min */
+		if(has_sol)
+		{
+			if(old_tmax != 0.0 && (old_tmax - tmax) < 0.5)
+				break;
+			old_tmax = tmax;
+		}
+		else /*else try a bigger one but smaller than the old tmax */
+		{
+			tmin = tmax;
+			if(old_tmax != 0.0)
+				tmax = old_tmax;
+		}
+		if(tmin == tmax) break;
+		tmax = _find_tmax(tmin, tmax);
+
+		if(tmax < smallest_tmax)
+		{
+			tmax = old_tmax;
+			tmin = smallest_tmax;
+			tmax = _find_tmax(tmin, tmax);
+		}
+		nd++;
+	}
+	gettimeofday(&end_time, NULL);
+
+	long diff_s = end_time.tv_sec  - start_time.tv_sec;
+	long diff_us = end_time.tv_usec  - start_time.tv_usec;
+
+	float timing = (float)(diff_s*1000000 + diff_us)/1000;
+
+//        fprintf(stdout, "nd = %d total time: %f ms \n", nd, timing);
+
+	return found_sol;
+}
+
+/*
+ * GNU Linear Programming Kit backend
+ */
+#ifdef STARPU_HAVE_GLPK_H
+#include <glpk.h>
+static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers)
+{
+	int w, s;
+	glp_prob *lp;
+
+	lp = glp_create_prob();
+	glp_set_prob_name(lp, "StarPU theoretical bound");
+	glp_set_obj_dir(lp, GLP_MAX);
+	glp_set_obj_name(lp, "total execution time");
+
+	{
+		int ne = 4 * ns * nw /* worker execution time */
+			+ 1; /* glp dumbness */
+		int n = 1;
+		int ia[ne], ja[ne];
+		double ar[ne];
+
+
+		/* Variables: number of flops assigned to worker w in context s, and 
+		 the acknwoledgment that the worker w belongs to the context s */
+		glp_add_cols(lp, 2*nw*ns);
+#define colnum(w, s) ((s)*nw+(w)+1)
+		for(s = 0; s < ns; s++)
+			for(w = 0; w < nw; w++)
+				glp_set_obj_coef(lp, nw*ns+colnum(w,s), 1.);
+		
+		for(s = 0; s < ns; s++)
+			for(w = 0; w < nw; w++)
+			{
+				char name[32];
+				snprintf(name, sizeof(name), "flopsw%ds%dn", w, s);
+				glp_set_col_name(lp, colnum(w,s), name);
+				glp_set_col_bnds(lp, colnum(w,s), GLP_LO, 0., 0.);
+
+				snprintf(name, sizeof(name), "w%ds%dn", w, s);
+				glp_set_col_name(lp, nw*ns+colnum(w,s), name);
+				glp_set_col_bnds(lp, nw*ns+colnum(w,s), GLP_DB, 0.0, 1.0);
+
+			}
+
+
+		int curr_row_idx = 0;
+		/* Total worker execution time */
+		glp_add_rows(lp, nw*ns);
+
+		/*nflops[s][w]/v[s][w] < x[s][w]*tmax */
+		for(s = 0; s < ns; s++)
+		{
+			for (w = 0; w < nw; w++)
+			{
+				char name[32], title[64];
+				starpu_worker_get_name(w, name, sizeof(name));
+				snprintf(title, sizeof(title), "worker %s", name);
+				glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title);
+
+				/* nflosp[s][w] */
+				ia[n] = curr_row_idx+s*nw+w+1;
+				ja[n] = colnum(w, s);
+				ar[n] = 1 / velocity[s][w];
+
+				n++;
+				
+				/* x[s][w] = 1 | 0 */
+				ia[n] = curr_row_idx+s*nw+w+1;
+				ja[n] = nw*ns+colnum(w,s);
+				ar[n] = (-1) * tmax;
+				n++;
+				glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
+			}
+		}
+
+		curr_row_idx += nw*ns;
+
+		/* sum(flops[s][w]) = flops[s] */
+		glp_add_rows(lp, ns);
+		for (s = 0; s < ns; s++)
+		{
+			char name[32], title[64];
+			starpu_worker_get_name(w, name, sizeof(name));
+			snprintf(title, sizeof(title), "flops %lf ctx%d", flops[s], s);
+			glp_set_row_name(lp, curr_row_idx+s+1, title);
+			for (w = 0; w < nw; w++)
+			{
+				ia[n] = curr_row_idx+s+1;
+				ja[n] = colnum(w, s);
+				ar[n] = 1;
+				n++;
+			}
+			glp_set_row_bnds(lp, curr_row_idx+s+1, GLP_FX, flops[s], flops[s]);
+		}
+
+		curr_row_idx += ns;
+
+		/* sum(x[s][w]) = 1 */
+		glp_add_rows(lp, nw);
+		for (w = 0; w < nw; w++)
+		{
+			char name[32], title[64];
+			starpu_worker_get_name(w, name, sizeof(name));
+			snprintf(title, sizeof(title), "w%x", w);
+			glp_set_row_name(lp, curr_row_idx+w+1, title);
+			for(s = 0; s < ns; s++)
+			{
+				ia[n] = curr_row_idx+w+1;
+				ja[n] = nw*ns+colnum(w,s);
+				ar[n] = 1;
+				n++;
+			}
+
+			glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
+		}
+		if(n != ne)
+			printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne);
+		STARPU_ASSERT(n == ne);
+
+		glp_load_matrix(lp, ne-1, ia, ja, ar);
+	}
+
+	glp_smcp parm;
+	glp_init_smcp(&parm);
+	parm.msg_lev = GLP_MSG_OFF;
+	int ret = glp_simplex(lp, &parm);
+	if (ret)
+	{
+		glp_delete_prob(lp);
+		lp = NULL;
+		return 0.0;
+	}
+
+	int stat = glp_get_prim_stat(lp);
+	/* if we don't have a solution return */
+	if(stat == GLP_NOFEAS)
+	{
+		glp_delete_prob(lp);
+		lp = NULL;
+		return 0.0;
+	}
+
+	double res = glp_get_obj_val(lp);
+
+	for(s = 0; s < ns; s++)
+		for(w = 0; w < nw; w++)
+		{
+			flops_on_w[s][w] = glp_get_col_prim(lp, colnum(w, s));
+			w_in_s[s][w] = glp_get_col_prim(lp, nw*ns+colnum(w,s));
+//			printf("%d/%d: w in s %lf flops %lf \n", w, s, w_in_s[s][w], flops_on_w[s][w]);
+		}
+
+	glp_delete_prob(lp);
+	return res;
+}
+
+
+static double _find_tmax(double t1, double t2)
+{
+	return t1 + ((t2 - t1)/2);
+}
+
+
+static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker)
+{
+
+	int ret = pthread_mutex_trylock(&act_hypervisor_mutex);
+	if(ret != EBUSY)
+	{
+		if(_velocity_gap_btw_ctxs())
+		{
+			int ns = sched_ctx_hypervisor_get_nsched_ctxs();
+			int nw = starpu_worker_get_count(); /* Number of different workers */
+
+			double w_in_s[ns][nw];
+			double flops_on_w[ns][nw];
+
+			unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw,  w_in_s, flops_on_w, NULL, NULL);
+			/* if we did find at least one solution redistribute the resources */
+			if(found_sol)
+			{
+				int w, s;
+				double nworkers[ns][2];
+				int nworkers_rounded[ns][2];
+				for(s = 0; s < ns; s++)
+				{
+					nworkers[s][0] = 0.0;
+					nworkers[s][1] = 0.0;
+					nworkers_rounded[s][0] = 0;
+					nworkers_rounded[s][1] = 0;
+
+				}
+
+				for(s = 0; s < ns; s++)
+				{
+					for(w = 0; w < nw; w++)
+					{
+						enum starpu_archtype arch = starpu_worker_get_type(w);
+
+						if(arch == STARPU_CUDA_WORKER)
+						{
+							nworkers[s][0] += w_in_s[s][w];
+							if(w_in_s[s][w] >= 0.3)
+								nworkers_rounded[s][0]++;
+						}
+						else
+						{
+							nworkers[s][1] += w_in_s[s][w];
+							if(w_in_s[s][w] > 0.3)
+								nworkers_rounded[s][1]++;
+						}
+					}
+				}
+/* 				for(s = 0; s < ns; s++) */
+/* 					printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
+/* 					       nworkers_rounded[s][1], nworkers_rounded[s][0]); */
+
+				_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
+
+			}
+		}
+		pthread_mutex_unlock(&act_hypervisor_mutex);
+	}
+}
+
+
+struct sched_ctx_hypervisor_policy ispeed_lp_policy = {
+	.size_ctxs = NULL,
+	.handle_poped_task = ispeed_lp_handle_poped_task,
+	.handle_pushed_task = NULL,
+	.handle_idle_cycle = NULL,
+	.handle_idle_end = NULL,
+	.handle_post_exec_hook = NULL,
+	.handle_submitted_job = NULL,
+	.custom = 0,
+	.name = "ispeed_lp"
+};
+
+#endif /* STARPU_HAVE_GLPK_H */

+ 3 - 0
sched_ctx_hypervisor/src/hypervisor_policies/ispeed_policy.c

@@ -120,6 +120,7 @@ static int* _get_slowest_workers(unsigned sched_ctx, int *nworkers, enum starpu_
 								config->priority[curr_workers[index]])
 							{
 								double curr_worker_velocity = _get_velocity_per_worker(sc_w, curr_workers[index]);
+//								printf("speed[%d] = %lf speed[%d] = %lf\n", worker, worker_velocity, curr_workers[index], curr_worker_velocity);
 								if(worker_velocity < curr_worker_velocity && curr_worker_velocity != -1.0)
 								{
 									curr_workers[index] = worker;
@@ -163,6 +164,8 @@ static void ispeed_handle_poped_task(unsigned sched_ctx, int worker)
 							new_speed += _get_velocity_per_worker(sched_ctx_hypervisor_get_wrapper(fastest_sched_ctx), workers_to_move[i]);
 						double fastest_speed = _get_ctx_velocity(sched_ctx_hypervisor_get_wrapper(fastest_sched_ctx));
 						double slowest_speed = _get_ctx_velocity(sched_ctx_hypervisor_get_wrapper(slowest_sched_ctx));
+//						printf("fast_speed(%d) %lf slow_speed(%d) %lf new speed(%d) %lf \n", fastest_sched_ctx, fastest_speed, slowest_sched_ctx, 
+//						       slowest_speed, workers_to_move[0], new_speed);
 						if((slowest_speed + new_speed) <= (fastest_speed - new_speed))
 						{
 							sched_ctx_hypervisor_move_workers(fastest_sched_ctx, slowest_sched_ctx, workers_to_move, nworkers_to_move, 0);

+ 1 - 1
sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c

@@ -18,6 +18,7 @@
 #include <starpu_config.h>
 
 
+#ifdef STARPU_HAVE_GLPK_H
 static void lp_handle_poped_task(unsigned sched_ctx, int worker)
 {
 	if(_velocity_gap_btw_ctxs())
@@ -85,7 +86,6 @@ static void lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworkers)
 	pthread_mutex_unlock(&act_hypervisor_mutex);
 }
 
-#ifdef STARPU_HAVE_GLPK_H
 struct sched_ctx_hypervisor_policy lp_policy = {
 	.size_ctxs = lp_size_ctxs,
 	.handle_poped_task = lp_handle_poped_task,

+ 3 - 6
sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c

@@ -20,7 +20,7 @@
 
 #ifdef STARPU_HAVE_GLPK_H
 
-static double _glp_get_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int  total_nw[nw])
+double _lp_compute_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int  total_nw[nw])
 {
 	int s, w;
 	glp_prob *lp;
@@ -180,22 +180,19 @@ double _lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double r
 #ifdef STARPU_HAVE_GLPK_H
 	double v[nsched_ctxs][ntypes_of_workers];
 	double flops[nsched_ctxs];
-#endif
+
 	int i = 0;
 	struct sched_ctx_hypervisor_wrapper* sc_w;
 	for(i = 0; i < nsched_ctxs; i++)
 	{
 		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
-#ifdef STARPU_HAVE_GLPK_H
 		v[i][0] = 200.0;//_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
 		v[i][1] = 20.0;//_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
 		flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
 //			printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
-#endif
 	}
 
-#ifdef STARPU_HAVE_GLPK_H
-	return 1/_glp_get_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
+	return 1/_lp_compute_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
 #else
 	return 0.0;
 #endif

+ 4 - 0
sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.h

@@ -24,6 +24,10 @@
 #include <glpk.h>
 #endif //STARPU_HAVE_GLPK_H
 
+/* returns 1/tmax, and computes in table res the nr of workers needed by each context st the system ends up in the smallest tmax*/
+double _lp_compute_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double velocity[nsched_ctxs][ntypes_of_workers], double flops[nsched_ctxs], 
+				    double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers]);
+
 /* returns tmax, and computes in table res the nr of workers needed by each context st the system ends up in the smallest tmax*/
 double _lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers]);
 

+ 68 - 3
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c

@@ -322,6 +322,29 @@ static double _get_elapsed_flops(struct sched_ctx_hypervisor_wrapper* sc_w, int
 	return ret_val;
 }
 
+static double _get_ispeed_sample_for_type_of_worker(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype req_arch)
+{
+	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
+        int worker;
+
+	struct starpu_iterator it;
+	if(workers->init_iterator)
+                workers->init_iterator(workers, &it);
+
+        while(workers->has_next(workers, &it))
+	{
+                worker = workers->get_next(workers, &it);
+                enum starpu_archtype arch = starpu_worker_get_type(worker);
+                if(arch == req_arch)
+                {
+			struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
+			return config->ispeed_w_sample[worker];
+		}
+        }
+
+	return 0.0;
+}
+
 double _get_ctx_velocity(struct sched_ctx_hypervisor_wrapper* sc_w)
 {
 	struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
@@ -330,6 +353,7 @@ double _get_ctx_velocity(struct sched_ctx_hypervisor_wrapper* sc_w)
 	double prc = config->ispeed_ctx_sample != 0.0 ? elapsed_flops : elapsed_flops/sc_w->total_flops;
 	double redim_sample = config->ispeed_ctx_sample != 0.0 ? config->ispeed_ctx_sample : 
 		(elapsed_flops == total_elapsed_flops ? HYPERVISOR_START_REDIM_SAMPLE : HYPERVISOR_REDIM_SAMPLE);
+//	printf("%d: prc %lf sample %lf\n", sc_w->sched_ctx, prc, redim_sample);
 	if(prc >= redim_sample)
         {
                 double curr_time = starpu_timing_now();
@@ -339,12 +363,41 @@ double _get_ctx_velocity(struct sched_ctx_hypervisor_wrapper* sc_w)
 	return 0.0;
 }
 
+double _get_slowest_ctx_exec_time(void)
+{
+	int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
+	int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
+
+	double curr_time = starpu_timing_now();
+	double slowest_time = 0.0;
+
+	int s;
+	struct sched_ctx_hypervisor_wrapper* sc_w;		
+	for(s = 0; s < nsched_ctxs; s++)
+	{
+		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]);
+
+                double elapsed_time = curr_time - sc_w->start_time;
+		if(elapsed_time > slowest_time)
+			slowest_time = elapsed_time;
+        }
+	return slowest_time;
+}
+
 double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsigned worker)
 {
+	if(!starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx))
+		return -1.0;
+
         double elapsed_flops = sc_w->elapsed_flops[worker];
 	struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
 	double sample = config->ispeed_w_sample[worker];
 
+	double ctx_elapsed_flops = sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
+	double ctx_sample = config->ispeed_ctx_sample;
+	if(ctx_elapsed_flops > ctx_sample && elapsed_flops == 0.0)
+		return 0.00000000000001;
+
         if( elapsed_flops >= sample)
         {
                 double curr_time = starpu_timing_now();
@@ -354,6 +407,16 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
 
         return -1.0;
 
+/*         if( elapsed_flops != 0.0) */
+/*         { */
+/*                 double curr_time = starpu_timing_now(); */
+/*                 double elapsed_time = curr_time - sc_w->start_time; */
+/*                 return (elapsed_flops/elapsed_time); */
+/*         } */
+
+/*         return 0.00000000000001; */
+
+
 }
 
 /* compute an average value of the cpu velocity */
@@ -361,12 +424,14 @@ double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w,
 {
         int npus = 0;
         double elapsed_flops = _get_elapsed_flops(sc_w, &npus, arch);
+	double avg_elapsed_flops = elapsed_flops / npus;
+	double sample = _get_ispeed_sample_for_type_of_worker(sc_w, arch);
 
-        if( elapsed_flops != 0.0)
+        if( avg_elapsed_flops >= sample)
         {
                 double curr_time = starpu_timing_now();
                 double elapsed_time = curr_time - sc_w->start_time;
-                return (elapsed_flops/elapsed_time) / npus;
+                return elapsed_flops/elapsed_time;
         }
 
         return -1.0;
@@ -422,7 +487,7 @@ void _get_total_nw(int *workers, int nworkers, int ntypes_of_workers, int total_
 
 	for(w = 0; w < current_nworkers; w++)
 	{
-		enum starpu_archtype arch = workers == NULL ? starpu_worker_get_type(w) :
+ 		enum starpu_archtype arch = workers == NULL ? starpu_worker_get_type(w) :
 			starpu_worker_get_type(workers[w]);
 		if(arch == STARPU_CPU_WORKER)
 			total_nw[1]++;

+ 2 - 0
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h

@@ -50,6 +50,8 @@ unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now);
 
 double _get_ctx_velocity(struct sched_ctx_hypervisor_wrapper* sc_w);
 
+double _get_slowest_ctx_exec_time(void);
+
 double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsigned worker); 
 
 double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch);

+ 2 - 0
sched_ctx_hypervisor/src/sched_ctx_config.c

@@ -64,7 +64,9 @@ void sched_ctx_hypervisor_set_config(unsigned sched_ctx, void *config)
 		_update_config(hypervisor.sched_ctx_w[sched_ctx].config, config);
 	}
 	else
+	{
 		hypervisor.sched_ctx_w[sched_ctx].config = config;
+	}
 
 	return;
 }

+ 3 - 1
sched_ctx_hypervisor/src/sched_ctx_hypervisor.c

@@ -34,8 +34,9 @@ extern struct sched_ctx_hypervisor_policy gflops_rate_policy;
 #ifdef STARPU_HAVE_GLPK_H
 extern struct sched_ctx_hypervisor_policy lp_policy;
 extern struct sched_ctx_hypervisor_policy lp2_policy;
+extern struct sched_ctx_hypervisor_policy ispeed_lp_policy;
+#endif // STARPU_HAVE_GLPK_
 extern struct sched_ctx_hypervisor_policy ispeed_policy;
-#endif // STARPU_HAVE_GLPK_H
 
 
 static struct sched_ctx_hypervisor_policy *predefined_policies[] =
@@ -45,6 +46,7 @@ static struct sched_ctx_hypervisor_policy *predefined_policies[] =
 #ifdef STARPU_HAVE_GLPK_H
 	&lp_policy,
 	&lp2_policy,
+	&ispeed_lp_policy,
 #endif // STARPU_HAVE_GLPK_H
 	&gflops_rate_policy,
 	&ispeed_policy