12 years ago · 8f40b19fec
--- a/sched_ctx_hypervisor/src/Makefile.am
+++ b/sched_ctx_hypervisor/src/Makefile.am
@@ -33,7 +33,8 @@ libsched_ctx_hypervisor_la_SOURCES = 			\
 
				 	hypervisor_policies/gflops_rate_policy.c	\
			
 
				 	hypervisor_policies/lp_policy.c			\
			
 
				 	hypervisor_policies/lp2_policy.c		\
			
 
				-	hypervisor_policies/ispeed_policy.c
			
 
				+	hypervisor_policies/ispeed_policy.c		\
			
 
				+	hypervisor_policies/ispeed_lp_policy.c
			
 
				 
			
 
				 noinst_HEADERS = sched_ctx_hypervisor_intern.h		\
			
 
				 	hypervisor_policies/policy_tools.h		\
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
@@ -0,0 +1,376 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011, 2012  INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+#include "lp_tools.h"
			
 
				+#include <math.h>
			
 
				+
			
 
				+static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers);
			
 
				+static double _find_tmax(double t1, double t2);
			
 
				+
			
 
				+
			
 
				+static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_in_s[ns][nw], double flops_on_w[ns][nw], int *in_sched_ctxs, int *workers)
			
 
				+{
			
 
				+	double draft_w_in_s[ns][nw];
			
 
				+	double draft_flops_on_w[ns][nw];
			
 
				+	double flops[ns];
			
 
				+	double velocity[ns][nw];
			
 
				+
			
 
				+	int *sched_ctxs = in_sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : in_sched_ctxs;
			
 
				+	
			
 
				+	int w,s;
			
 
				+
			
 
				+	for(s = 0; s < ns; s++)
			
 
				+	{
			
 
				+		for(w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			w_in_s[s][w] = 0.0;
			
 
				+			draft_w_in_s[s][w] = 0.0;
			
 
				+			flops_on_w[s][w] = 0.0;
			
 
				+			draft_flops_on_w[s][w] = 0.0;
			
 
				+			int worker = workers == NULL ? w : workers[w];
			
 
				+
			
 
				+			velocity[s][w] = _get_velocity_per_worker(sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]), worker);
			
 
				+			if(velocity[s][w] == -1.0)
			
 
				+			{
			
 
				+				enum starpu_archtype arch = starpu_worker_get_type(worker);
			
 
				+				velocity[s][w] = _get_velocity_per_worker_type(sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]), arch);
			
 
				+				if(velocity[s][w] == -1.0)
			
 
				+					velocity[s][w] = arch == STARPU_CPU_WORKER ? 1 / 5.0 : 1 / 50.0;
			
 
				+			}
			
 
				+			
			
 
				+		}
			
 
				+		struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sched_ctxs[s]);
			
 
				+		flops[s] = config->ispeed_ctx_sample;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	/* take the exec time of the slowest ctx 
			
 
				+	   as starting point and then try to minimize it
			
 
				+	   as increasing it a little for the faster ctxs */
			
 
				+	double tmax = _get_slowest_ctx_exec_time();
			
 
				+	double smallest_tmax = tmax - 0.5*tmax;
			
 
				+
			
 
				+	double res = 1.0;
			
 
				+	unsigned has_sol = 0;
			
 
				+	double tmin = 0.0;
			
 
				+	double old_tmax = 0.0;
			
 
				+	unsigned found_sol = 0;
			
 
				+
			
 
				+	struct timeval start_time;
			
 
				+	struct timeval end_time;
			
 
				+	int nd = 0;
			
 
				+	gettimeofday(&start_time, NULL);
			
 
				+
			
 
				+	/* we fix tmax and we do not treat it as an unknown
			
 
				+	   we just vary by dichotomy its values*/
			
 
				+	while(tmax > 1.0)
			
 
				+	{
			
 
				+		/* find solution and save the values in draft tables
			
 
				+		   only if there is a solution for the system we save them
			
 
				+		   in the proper table */
			
 
				+		res = _glp_resolve(ns, nw, velocity, flops, tmax, draft_flops_on_w, draft_w_in_s, workers);
			
 
				+		if(res != 0.0)
			
 
				+		{
			
 
				+			for(s = 0; s < ns; s++)
			
 
				+				for(w = 0; w < nw; w++)
			
 
				+				{
			
 
				+					w_in_s[s][w] = draft_w_in_s[s][w];
			
 
				+					flops_on_w[s][w] = draft_flops_on_w[s][w];
			
 
				+				}
			
 
				+			has_sol = 1;
			
 
				+			found_sol = 1;
			
 
				+		}
			
 
				+		else
			
 
				+			has_sol = 0;
			
 
				+
			
 
				+		/* if we have a solution with this tmax try a smaller value
			
 
				+		   bigger than the old min */
			
 
				+		if(has_sol)
			
 
				+		{
			
 
				+			if(old_tmax != 0.0 && (old_tmax - tmax) < 0.5)
			
 
				+				break;
			
 
				+			old_tmax = tmax;
			
 
				+		}
			
 
				+		else /*else try a bigger one but smaller than the old tmax */
			
 
				+		{
			
 
				+			tmin = tmax;
			
 
				+			if(old_tmax != 0.0)
			
 
				+				tmax = old_tmax;
			
 
				+		}
			
 
				+		if(tmin == tmax) break;
			
 
				+		tmax = _find_tmax(tmin, tmax);
			
 
				+
			
 
				+		if(tmax < smallest_tmax)
			
 
				+		{
			
 
				+			tmax = old_tmax;
			
 
				+			tmin = smallest_tmax;
			
 
				+			tmax = _find_tmax(tmin, tmax);
			
 
				+		}
			
 
				+		nd++;
			
 
				+	}
			
 
				+	gettimeofday(&end_time, NULL);
			
 
				+
			
 
				+	long diff_s = end_time.tv_sec  - start_time.tv_sec;
			
 
				+	long diff_us = end_time.tv_usec  - start_time.tv_usec;
			
 
				+
			
 
				+	float timing = (float)(diff_s*1000000 + diff_us)/1000;
			
 
				+
			
 
				+//        fprintf(stdout, "nd = %d total time: %f ms \n", nd, timing);
			
 
				+
			
 
				+	return found_sol;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * GNU Linear Programming Kit backend
			
 
				+ */
			
 
				+#ifdef STARPU_HAVE_GLPK_H
			
 
				+#include <glpk.h>
			
 
				+static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers)
			
 
				+{
			
 
				+	int w, s;
			
 
				+	glp_prob *lp;
			
 
				+
			
 
				+	lp = glp_create_prob();
			
 
				+	glp_set_prob_name(lp, "StarPU theoretical bound");
			
 
				+	glp_set_obj_dir(lp, GLP_MAX);
			
 
				+	glp_set_obj_name(lp, "total execution time");
			
 
				+
			
 
				+	{
			
 
				+		int ne = 4 * ns * nw /* worker execution time */
			
 
				+			+ 1; /* glp dumbness */
			
 
				+		int n = 1;
			
 
				+		int ia[ne], ja[ne];
			
 
				+		double ar[ne];
			
 
				+
			
 
				+
			
 
				+		/* Variables: number of flops assigned to worker w in context s, and 
			
 
				+		 the acknwoledgment that the worker w belongs to the context s */
			
 
				+		glp_add_cols(lp, 2*nw*ns);
			
 
				+#define colnum(w, s) ((s)*nw+(w)+1)
			
 
				+		for(s = 0; s < ns; s++)
			
 
				+			for(w = 0; w < nw; w++)
			
 
				+				glp_set_obj_coef(lp, nw*ns+colnum(w,s), 1.);
			
 
				+		
			
 
				+		for(s = 0; s < ns; s++)
			
 
				+			for(w = 0; w < nw; w++)
			
 
				+			{
			
 
				+				char name[32];
			
 
				+				snprintf(name, sizeof(name), "flopsw%ds%dn", w, s);
			
 
				+				glp_set_col_name(lp, colnum(w,s), name);
			
 
				+				glp_set_col_bnds(lp, colnum(w,s), GLP_LO, 0., 0.);
			
 
				+
			
 
				+				snprintf(name, sizeof(name), "w%ds%dn", w, s);
			
 
				+				glp_set_col_name(lp, nw*ns+colnum(w,s), name);
			
 
				+				glp_set_col_bnds(lp, nw*ns+colnum(w,s), GLP_DB, 0.0, 1.0);
			
 
				+
			
 
				+			}
			
 
				+
			
 
				+
			
 
				+		int curr_row_idx = 0;
			
 
				+		/* Total worker execution time */
			
 
				+		glp_add_rows(lp, nw*ns);
			
 
				+
			
 
				+		/*nflops[s][w]/v[s][w] < x[s][w]*tmax */
			
 
				+		for(s = 0; s < ns; s++)
			
 
				+		{
			
 
				+			for (w = 0; w < nw; w++)
			
 
				+			{
			
 
				+				char name[32], title[64];
			
 
				+				starpu_worker_get_name(w, name, sizeof(name));
			
 
				+				snprintf(title, sizeof(title), "worker %s", name);
			
 
				+				glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title);
			
 
				+
			
 
				+				/* nflosp[s][w] */
			
 
				+				ia[n] = curr_row_idx+s*nw+w+1;
			
 
				+				ja[n] = colnum(w, s);
			
 
				+				ar[n] = 1 / velocity[s][w];
			
 
				+
			
 
				+				n++;
			
 
				+				
			
 
				+				/* x[s][w] = 1 | 0 */
			
 
				+				ia[n] = curr_row_idx+s*nw+w+1;
			
 
				+				ja[n] = nw*ns+colnum(w,s);
			
 
				+				ar[n] = (-1) * tmax;
			
 
				+				n++;
			
 
				+				glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		curr_row_idx += nw*ns;
			
 
				+
			
 
				+		/* sum(flops[s][w]) = flops[s] */
			
 
				+		glp_add_rows(lp, ns);
			
 
				+		for (s = 0; s < ns; s++)
			
 
				+		{
			
 
				+			char name[32], title[64];
			
 
				+			starpu_worker_get_name(w, name, sizeof(name));
			
 
				+			snprintf(title, sizeof(title), "flops %lf ctx%d", flops[s], s);
			
 
				+			glp_set_row_name(lp, curr_row_idx+s+1, title);
			
 
				+			for (w = 0; w < nw; w++)
			
 
				+			{
			
 
				+				ia[n] = curr_row_idx+s+1;
			
 
				+				ja[n] = colnum(w, s);
			
 
				+				ar[n] = 1;
			
 
				+				n++;
			
 
				+			}
			
 
				+			glp_set_row_bnds(lp, curr_row_idx+s+1, GLP_FX, flops[s], flops[s]);
			
 
				+		}
			
 
				+
			
 
				+		curr_row_idx += ns;
			
 
				+
			
 
				+		/* sum(x[s][w]) = 1 */
			
 
				+		glp_add_rows(lp, nw);
			
 
				+		for (w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			char name[32], title[64];
			
 
				+			starpu_worker_get_name(w, name, sizeof(name));
			
 
				+			snprintf(title, sizeof(title), "w%x", w);
			
 
				+			glp_set_row_name(lp, curr_row_idx+w+1, title);
			
 
				+			for(s = 0; s < ns; s++)
			
 
				+			{
			
 
				+				ia[n] = curr_row_idx+w+1;
			
 
				+				ja[n] = nw*ns+colnum(w,s);
			
 
				+				ar[n] = 1;
			
 
				+				n++;
			
 
				+			}
			
 
				+
			
 
				+			glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
			
 
				+		}
			
 
				+		if(n != ne)
			
 
				+			printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne);
			
 
				+		STARPU_ASSERT(n == ne);
			
 
				+
			
 
				+		glp_load_matrix(lp, ne-1, ia, ja, ar);
			
 
				+	}
			
 
				+
			
 
				+	glp_smcp parm;
			
 
				+	glp_init_smcp(&parm);
			
 
				+	parm.msg_lev = GLP_MSG_OFF;
			
 
				+	int ret = glp_simplex(lp, &parm);
			
 
				+	if (ret)
			
 
				+	{
			
 
				+		glp_delete_prob(lp);
			
 
				+		lp = NULL;
			
 
				+		return 0.0;
			
 
				+	}
			
 
				+
			
 
				+	int stat = glp_get_prim_stat(lp);
			
 
				+	/* if we don't have a solution return */
			
 
				+	if(stat == GLP_NOFEAS)
			
 
				+	{
			
 
				+		glp_delete_prob(lp);
			
 
				+		lp = NULL;
			
 
				+		return 0.0;
			
 
				+	}
			
 
				+
			
 
				+	double res = glp_get_obj_val(lp);
			
 
				+
			
 
				+	for(s = 0; s < ns; s++)
			
 
				+		for(w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			flops_on_w[s][w] = glp_get_col_prim(lp, colnum(w, s));
			
 
				+			w_in_s[s][w] = glp_get_col_prim(lp, nw*ns+colnum(w,s));
			
 
				+//			printf("%d/%d: w in s %lf flops %lf \n", w, s, w_in_s[s][w], flops_on_w[s][w]);
			
 
				+		}
			
 
				+
			
 
				+	glp_delete_prob(lp);
			
 
				+	return res;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static double _find_tmax(double t1, double t2)
			
 
				+{
			
 
				+	return t1 + ((t2 - t1)/2);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker)
			
 
				+{
			
 
				+
			
 
				+	int ret = pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				+	if(ret != EBUSY)
			
 
				+	{
			
 
				+		if(_velocity_gap_btw_ctxs())
			
 
				+		{
			
 
				+			int ns = sched_ctx_hypervisor_get_nsched_ctxs();
			
 
				+			int nw = starpu_worker_get_count(); /* Number of different workers */
			
 
				+
			
 
				+			double w_in_s[ns][nw];
			
 
				+			double flops_on_w[ns][nw];
			
 
				+
			
 
				+			unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw,  w_in_s, flops_on_w, NULL, NULL);
			
 
				+			/* if we did find at least one solution redistribute the resources */
			
 
				+			if(found_sol)
			
 
				+			{
			
 
				+				int w, s;
			
 
				+				double nworkers[ns][2];
			
 
				+				int nworkers_rounded[ns][2];
			
 
				+				for(s = 0; s < ns; s++)
			
 
				+				{
			
 
				+					nworkers[s][0] = 0.0;
			
 
				+					nworkers[s][1] = 0.0;
			
 
				+					nworkers_rounded[s][0] = 0;
			
 
				+					nworkers_rounded[s][1] = 0;
			
 
				+
			
 
				+				}
			
 
				+
			
 
				+				for(s = 0; s < ns; s++)
			
 
				+				{
			
 
				+					for(w = 0; w < nw; w++)
			
 
				+					{
			
 
				+						enum starpu_archtype arch = starpu_worker_get_type(w);
			
 
				+
			
 
				+						if(arch == STARPU_CUDA_WORKER)
			
 
				+						{
			
 
				+							nworkers[s][0] += w_in_s[s][w];
			
 
				+							if(w_in_s[s][w] >= 0.3)
			
 
				+								nworkers_rounded[s][0]++;
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							nworkers[s][1] += w_in_s[s][w];
			
 
				+							if(w_in_s[s][w] > 0.3)
			
 
				+								nworkers_rounded[s][1]++;
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+/* 				for(s = 0; s < ns; s++) */
			
 
				+/* 					printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
			
 
				+/* 					       nworkers_rounded[s][1], nworkers_rounded[s][0]); */
			
 
				+
			
 
				+				_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
			
 
				+
			
 
				+			}
			
 
				+		}
			
 
				+		pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+struct sched_ctx_hypervisor_policy ispeed_lp_policy = {
			
 
				+	.size_ctxs = NULL,
			
 
				+	.handle_poped_task = ispeed_lp_handle_poped_task,
			
 
				+	.handle_pushed_task = NULL,
			
 
				+	.handle_idle_cycle = NULL,
			
 
				+	.handle_idle_end = NULL,
			
 
				+	.handle_post_exec_hook = NULL,
			
 
				+	.handle_submitted_job = NULL,
			
 
				+	.custom = 0,
			
 
				+	.name = "ispeed_lp"
			
 
				+};
			
 
				+
			
 
				+#endif /* STARPU_HAVE_GLPK_H */
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_policy.c
@@ -120,6 +120,7 @@ static int* _get_slowest_workers(unsigned sched_ctx, int *nworkers, enum starpu_
 
				 								config->priority[curr_workers[index]])
			
 
				 							{
			
 
				 								double curr_worker_velocity = _get_velocity_per_worker(sc_w, curr_workers[index]);
			
 
				+//								printf("speed[%d] = %lf speed[%d] = %lf\n", worker, worker_velocity, curr_workers[index], curr_worker_velocity);
			
 
				 								if(worker_velocity < curr_worker_velocity && curr_worker_velocity != -1.0)
			
 
				 								{
			
 
				 									curr_workers[index] = worker;
			
@@ -163,6 +164,8 @@ static void ispeed_handle_poped_task(unsigned sched_ctx, int worker)
 
				 							new_speed += _get_velocity_per_worker(sched_ctx_hypervisor_get_wrapper(fastest_sched_ctx), workers_to_move[i]);
			
 
				 						double fastest_speed = _get_ctx_velocity(sched_ctx_hypervisor_get_wrapper(fastest_sched_ctx));
			
 
				 						double slowest_speed = _get_ctx_velocity(sched_ctx_hypervisor_get_wrapper(slowest_sched_ctx));
			
 
				+//						printf("fast_speed(%d) %lf slow_speed(%d) %lf new speed(%d) %lf \n", fastest_sched_ctx, fastest_speed, slowest_sched_ctx, 
			
 
				+//						       slowest_speed, workers_to_move[0], new_speed);
			
 
				 						if((slowest_speed + new_speed) <= (fastest_speed - new_speed))
			
 
				 						{
			
 
				 							sched_ctx_hypervisor_move_workers(fastest_sched_ctx, slowest_sched_ctx, workers_to_move, nworkers_to_move, 0);
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c
@@ -18,6 +18,7 @@
 
				 #include <starpu_config.h>
			
 
				 
			
 
				 
			
 
				+#ifdef STARPU_HAVE_GLPK_H
			
 
				 static void lp_handle_poped_task(unsigned sched_ctx, int worker)
			
 
				 {
			
 
				 	if(_velocity_gap_btw_ctxs())
			
@@ -85,7 +86,6 @@ static void lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworkers)
 
				 	pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 }
			
 
				 
			
 
				-#ifdef STARPU_HAVE_GLPK_H
			
 
				 struct sched_ctx_hypervisor_policy lp_policy = {
			
 
				 	.size_ctxs = lp_size_ctxs,
			
 
				 	.handle_poped_task = lp_handle_poped_task,
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c
@@ -20,7 +20,7 @@
 
				 
			
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				 
			
 
				-static double _glp_get_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int  total_nw[nw])
			
 
				+double _lp_compute_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int  total_nw[nw])
			
 
				 {
			
 
				 	int s, w;
			
 
				 	glp_prob *lp;
			
@@ -180,22 +180,19 @@ double _lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double r
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				 	double v[nsched_ctxs][ntypes_of_workers];
			
 
				 	double flops[nsched_ctxs];
			
 
				-#endif
			
 
				+
			
 
				 	int i = 0;
			
 
				 	struct sched_ctx_hypervisor_wrapper* sc_w;
			
 
				 	for(i = 0; i < nsched_ctxs; i++)
			
 
				 	{
			
 
				 		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				-#ifdef STARPU_HAVE_GLPK_H
			
 
				 		v[i][0] = 200.0;//_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
			
 
				 		v[i][1] = 20.0;//_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
			
 
				 		flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
			
 
				 //			printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
			
 
				-#endif
			
 
				 	}
			
 
				 
			
 
				-#ifdef STARPU_HAVE_GLPK_H
			
 
				-	return 1/_glp_get_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
			
 
				+	return 1/_lp_compute_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
			
 
				 #else
			
 
				 	return 0.0;
			
 
				 #endif
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.h
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.h
@@ -24,6 +24,10 @@
 
				 #include <glpk.h>
			
 
				 #endif //STARPU_HAVE_GLPK_H
			
 
				 
			
 
				+/* returns 1/tmax, and computes in table res the nr of workers needed by each context st the system ends up in the smallest tmax*/
			
 
				+double _lp_compute_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double velocity[nsched_ctxs][ntypes_of_workers], double flops[nsched_ctxs], 
			
 
				+				    double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers]);
			
 
				+
			
 
				 /* returns tmax, and computes in table res the nr of workers needed by each context st the system ends up in the smallest tmax*/
			
 
				 double _lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers]);
			
 
				 
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
@@ -322,6 +322,29 @@ static double _get_elapsed_flops(struct sched_ctx_hypervisor_wrapper* sc_w, int
 
				 	return ret_val;
			
 
				 }
			
 
				 
			
 
				+static double _get_ispeed_sample_for_type_of_worker(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype req_arch)
			
 
				+{
			
 
				+	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
 
				+        int worker;
			
 
				+
			
 
				+	struct starpu_iterator it;
			
 
				+	if(workers->init_iterator)
			
 
				+                workers->init_iterator(workers, &it);
			
 
				+
			
 
				+        while(workers->has_next(workers, &it))
			
 
				+	{
			
 
				+                worker = workers->get_next(workers, &it);
			
 
				+                enum starpu_archtype arch = starpu_worker_get_type(worker);
			
 
				+                if(arch == req_arch)
			
 
				+                {
			
 
				+			struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
			
 
				+			return config->ispeed_w_sample[worker];
			
 
				+		}
			
 
				+        }
			
 
				+
			
 
				+	return 0.0;
			
 
				+}
			
 
				+
			
 
				 double _get_ctx_velocity(struct sched_ctx_hypervisor_wrapper* sc_w)
			
 
				 {
			
 
				 	struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
			
@@ -330,6 +353,7 @@ double _get_ctx_velocity(struct sched_ctx_hypervisor_wrapper* sc_w)
 
				 	double prc = config->ispeed_ctx_sample != 0.0 ? elapsed_flops : elapsed_flops/sc_w->total_flops;
			
 
				 	double redim_sample = config->ispeed_ctx_sample != 0.0 ? config->ispeed_ctx_sample : 
			
 
				 		(elapsed_flops == total_elapsed_flops ? HYPERVISOR_START_REDIM_SAMPLE : HYPERVISOR_REDIM_SAMPLE);
			
 
				+//	printf("%d: prc %lf sample %lf\n", sc_w->sched_ctx, prc, redim_sample);
			
 
				 	if(prc >= redim_sample)
			
 
				         {
			
 
				                 double curr_time = starpu_timing_now();
			
@@ -339,12 +363,41 @@ double _get_ctx_velocity(struct sched_ctx_hypervisor_wrapper* sc_w)
 
				 	return 0.0;
			
 
				 }
			
 
				 
			
 
				+double _get_slowest_ctx_exec_time(void)
			
 
				+{
			
 
				+	int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
			
 
				+	int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
			
 
				+
			
 
				+	double curr_time = starpu_timing_now();
			
 
				+	double slowest_time = 0.0;
			
 
				+
			
 
				+	int s;
			
 
				+	struct sched_ctx_hypervisor_wrapper* sc_w;		
			
 
				+	for(s = 0; s < nsched_ctxs; s++)
			
 
				+	{
			
 
				+		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]);
			
 
				+
			
 
				+                double elapsed_time = curr_time - sc_w->start_time;
			
 
				+		if(elapsed_time > slowest_time)
			
 
				+			slowest_time = elapsed_time;
			
 
				+        }
			
 
				+	return slowest_time;
			
 
				+}
			
 
				+
			
 
				 double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsigned worker)
			
 
				 {
			
 
				+	if(!starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx))
			
 
				+		return -1.0;
			
 
				+
			
 
				         double elapsed_flops = sc_w->elapsed_flops[worker];
			
 
				 	struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
			
 
				 	double sample = config->ispeed_w_sample[worker];
			
 
				 
			
 
				+	double ctx_elapsed_flops = sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
			
 
				+	double ctx_sample = config->ispeed_ctx_sample;
			
 
				+	if(ctx_elapsed_flops > ctx_sample && elapsed_flops == 0.0)
			
 
				+		return 0.00000000000001;
			
 
				+
			
 
				         if( elapsed_flops >= sample)
			
 
				         {
			
 
				                 double curr_time = starpu_timing_now();
			
@@ -354,6 +407,16 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
 
				 
			
 
				         return -1.0;
			
 
				 
			
 
				+/*         if( elapsed_flops != 0.0) */
			
 
				+/*         { */
			
 
				+/*                 double curr_time = starpu_timing_now(); */
			
 
				+/*                 double elapsed_time = curr_time - sc_w->start_time; */
			
 
				+/*                 return (elapsed_flops/elapsed_time); */
			
 
				+/*         } */
			
 
				+
			
 
				+/*         return 0.00000000000001; */
			
 
				+
			
 
				+
			
 
				 }
			
 
				 
			
 
				 /* compute an average value of the cpu velocity */
			
@@ -361,12 +424,14 @@ double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w,
 
				 {
			
 
				         int npus = 0;
			
 
				         double elapsed_flops = _get_elapsed_flops(sc_w, &npus, arch);
			
 
				+	double avg_elapsed_flops = elapsed_flops / npus;
			
 
				+	double sample = _get_ispeed_sample_for_type_of_worker(sc_w, arch);
			
 
				 
			
 
				-        if( elapsed_flops != 0.0)
			
 
				+        if( avg_elapsed_flops >= sample)
			
 
				         {
			
 
				                 double curr_time = starpu_timing_now();
			
 
				                 double elapsed_time = curr_time - sc_w->start_time;
			
 
				-                return (elapsed_flops/elapsed_time) / npus;
			
 
				+                return elapsed_flops/elapsed_time;
			
 
				         }
			
 
				 
			
 
				         return -1.0;
			
@@ -422,7 +487,7 @@ void _get_total_nw(int *workers, int nworkers, int ntypes_of_workers, int total_
 
				 
			
 
				 	for(w = 0; w < current_nworkers; w++)
			
 
				 	{
			
 
				-		enum starpu_archtype arch = workers == NULL ? starpu_worker_get_type(w) :
			
 
				+ 		enum starpu_archtype arch = workers == NULL ? starpu_worker_get_type(w) :
			
 
				 			starpu_worker_get_type(workers[w]);
			
 
				 		if(arch == STARPU_CPU_WORKER)
			
 
				 			total_nw[1]++;
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h
@@ -50,6 +50,8 @@ unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now);
 
				 
			
 
				 double _get_ctx_velocity(struct sched_ctx_hypervisor_wrapper* sc_w);
			
 
				 
			
 
				+double _get_slowest_ctx_exec_time(void);
			
 
				+
			
 
				 double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsigned worker); 
			
 
				 
			
 
				 double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch);
			
--- a/sched_ctx_hypervisor/src/sched_ctx_config.c
+++ b/sched_ctx_hypervisor/src/sched_ctx_config.c
@@ -64,7 +64,9 @@ void sched_ctx_hypervisor_set_config(unsigned sched_ctx, void *config)
 
				 		_update_config(hypervisor.sched_ctx_w[sched_ctx].config, config);
			
 
				 	}
			
 
				 	else
			
 
				+	{
			
 
				 		hypervisor.sched_ctx_w[sched_ctx].config = config;
			
 
				+	}
			
 
				 
			
 
				 	return;
			
 
				 }
			
--- a/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
+++ b/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
@@ -34,8 +34,9 @@ extern struct sched_ctx_hypervisor_policy gflops_rate_policy;
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				 extern struct sched_ctx_hypervisor_policy lp_policy;
			
 
				 extern struct sched_ctx_hypervisor_policy lp2_policy;
			
 
				+extern struct sched_ctx_hypervisor_policy ispeed_lp_policy;
			
 
				+#endif // STARPU_HAVE_GLPK_
			
 
				 extern struct sched_ctx_hypervisor_policy ispeed_policy;
			
 
				-#endif // STARPU_HAVE_GLPK_H
			
 
				 
			
 
				 
			
 
				 static struct sched_ctx_hypervisor_policy *predefined_policies[] =
			
@@ -45,6 +46,7 @@ static struct sched_ctx_hypervisor_policy *predefined_policies[] =
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				 	&lp_policy,
			
 
				 	&lp2_policy,
			
 
				+	&ispeed_lp_policy,
			
 
				 #endif // STARPU_HAVE_GLPK_H
			
 
				 	&gflops_rate_policy,
			
 
				 	&ispeed_policy