12 年前 · 45c4f38b10
--- a/doc/doxygen/chapters/api/performance_model.doxy
+++ b/doc/doxygen/chapters/api/performance_model.doxy
@@ -262,10 +262,18 @@ of use can be seen in \ref PerformanceModelExample.
 
				 
			
 
				 \fn double starpu_get_bandwidth_RAM_CUDA(unsigned cudadev)
			
 
				 \ingroup API_Performance_Model
			
 
				-Used to compute the velocity of resources
			
 
				+Used to compute the execution time of tasks
			
 
				 
			
 
				 \fn double starpu_get_latency_RAM_CUDA(unsigned cudadev)
			
 
				 \ingroup API_Performance_Model
			
 
				-Used to compute the velocity of resources
			
 
				+Used to compute the execution time of tasks
			
 
				+
			
 
				+\fn double starpu_get_bandwidth_CUDA_RAM(unsigned cudadev)
			
 
				+\ingroup API_Performance_Mode
			
 
				+Used to compute the execution time of tasks
			
 
				+
			
 
				+\fn double starpu_get_latency_CUDA_RAM(unsigned cudadev)
			
 
				+\ingroup API_Performance_Model
			
 
				+Used to compute the execution time of tasks
			
 
				 
			
 
				 */
			
--- a/doc/doxygen/chapters/scheduling_context_hypervisor.doxy
+++ b/doc/doxygen/chapters/scheduling_context_hypervisor.doxy
@@ -114,7 +114,7 @@ sc_hypervisor_ioctl(sched_ctx_id,
 
				 \endcode
			
 
				 
			
 
				 The <b>Gflops rate</b> based strategy resizes the scheduling contexts such that they all finish at the same time.
			
 
				-The velocity of each of them is considered and once one of them is significantly slower the resizing process is triggered.
			
 
				+The speed of each of them is considered and once one of them is significantly slower the resizing process is triggered.
			
 
				 In order to do these computations the user has to input the total number of instructions needed to be executed by the
			
 
				 parallel kernels and the number of instruction to be executed by each
			
 
				 task.
			
--- a/doc/texinfo/chapters/sc_hypervisor.texi
+++ b/doc/texinfo/chapters/sc_hypervisor.texi
@@ -100,7 +100,7 @@ sc_hypervisor_ioctl(sched_ctx_id,
 
				 @end cartouche
			
 
				 
			
 
				 The @b{Gflops rate} based strategy resizes the scheduling contexts such that they all finish at the same time.
			
 
				-The velocity of each of them is considered and once one of them is significantly slower the resizing process is triggered.
			
 
				+The speed of each of them is considered and once one of them is significantly slower the resizing process is triggered.
			
 
				 In order to do these computations the user has to input the total number of instructions needed to be executed by the
			
 
				 parallel kernels and the number of instruction to be executed by each task.
			
 
				 The number of flops to be executed by a context are passed as parameter when they are registered to the hypervisor,
			
--- a/sc_hypervisor/include/sc_hypervisor_lp.h
+++ b/sc_hypervisor/include/sc_hypervisor_lp.h
@@ -64,7 +64,7 @@ unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				 /* linear program that returns 1/tmax, and computes in table res the nr of workers needed by each context st 
			
 
				    the system ends up in the smallest tmax*/
			
 
				-double sc_hypervisor_lp_simulate_distrib_flops(int nsched_ctxs, int ntypes_of_workers, double velocity[nsched_ctxs][ntypes_of_workers], 
			
 
				+double sc_hypervisor_lp_simulate_distrib_flops(int nsched_ctxs, int ntypes_of_workers, double speed[nsched_ctxs][ntypes_of_workers], 
			
 
				 					       double flops[nsched_ctxs], double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers]);
			
 
				 
			
 
				 /* linear program that simulates a distribution of tasks that minimises the execution time of the tasks in the pool */
			
--- a/sc_hypervisor/include/sc_hypervisor_monitoring.h
+++ b/sc_hypervisor/include/sc_hypervisor_monitoring.h
@@ -79,7 +79,7 @@ struct sc_hypervisor_wrapper
 
				 
			
 
				 	/* the average speed of the type of workers when they belonged to this context */
			
 
				 	/* 0 - cuda 1 - cpu */
			
 
				-	double ref_velocity[2];
			
 
				+	double ref_speed[2];
			
 
				 
			
 
				 	/* number of flops submitted to this ctx */
			
 
				 	double submitted_flops;
			
@@ -119,11 +119,11 @@ double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrappe
 
				 /* get the number of flops executed by a context since the begining */
			
 
				 double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w);
			
 
				 
			
 
				-/* compute an average value of the cpu/cuda velocity */
			
 
				-double sc_hypervisorsc_hypervisor_get_velocity_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch);
			
 
				+/* compute an average value of the cpu/cuda speed */
			
 
				+double sc_hypervisorsc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch);
			
 
				 
			
 
				-/* compte the actual velocity of all workers of a specific type of worker */
			
 
				-double sc_hypervisor_get_velocity(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch);
			
 
				+/* compte the actual speed of all workers of a specific type of worker */
			
 
				+double sc_hypervisor_get_speed(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/sc_hypervisor/include/sc_hypervisor_policy.h
+++ b/sc_hypervisor/include/sc_hypervisor_policy.h
@@ -29,7 +29,7 @@ extern "C"
 
				 #define HYPERVISOR_START_REDIM_SAMPLE 0.1
			
 
				 #define SC_NOTHING 0
			
 
				 #define SC_IDLE 1
			
 
				-#define SC_VELOCITY 2
			
 
				+#define SC_SPEED 2
			
 
				 
			
 
				 struct sc_hypervisor_policy_task_pool
			
 
				 {
			
@@ -74,8 +74,8 @@ unsigned sc_hypervisor_policy_resize(unsigned sender_sched_ctx, unsigned receive
 
				 /* check the policy's constraints in order to resize  and find a context willing the resources */
			
 
				 unsigned sc_hypervisor_policy_resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now);
			
 
				 
			
 
				-/* compute the velocity of a context */
			
 
				-double sc_hypervisor_get_ctx_velocity(struct sc_hypervisor_wrapper* sc_w);
			
 
				+/* compute the speed of a context */
			
 
				+double sc_hypervisor_get_ctx_speed(struct sc_hypervisor_wrapper* sc_w);
			
 
				 
			
 
				 /* get the time of execution of the slowest context */
			
 
				 double sc_hypervisor_get_slowest_ctx_exec_time(void);
			
@@ -83,14 +83,14 @@ double sc_hypervisor_get_slowest_ctx_exec_time(void);
 
				 /* get the time of execution of the fastest context */
			
 
				 double sc_hypervisor_get_fastest_ctx_exec_time(void);
			
 
				 
			
 
				-/* compute the velocity of a workers in a context */
			
 
				-double sc_hypervisor_get_velocity_per_worker(struct sc_hypervisor_wrapper *sc_w, unsigned worker); 
			
 
				+/* compute the speed of a workers in a context */
			
 
				+double sc_hypervisor_get_speed_per_worker(struct sc_hypervisor_wrapper *sc_w, unsigned worker); 
			
 
				 
			
 
				-/* compute the velocity of a type of worker in a context */
			
 
				-double sc_hypervisor_get_velocity_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch);
			
 
				+/* compute the speed of a type of worker in a context */
			
 
				+double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch);
			
 
				 
			
 
				-/* compute the velocity of a type of worker in a context depending on its history */ 
			
 
				-double sc_hypervisor_get_ref_velocity_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch);
			
 
				+/* compute the speed of a type of worker in a context depending on its history */ 
			
 
				+double sc_hypervisor_get_ref_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch);
			
 
				 
			
 
				 /* get the list of workers grouped by type */
			
 
				 void sc_hypervisor_group_workers_by_type(int *workers, int nworkers, int ntypes_of_workers, int total_nw[ntypes_of_workers]);
			
@@ -101,10 +101,10 @@ unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker);
 
				 /* check if worker was idle long enough */
			
 
				 unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker);
			
 
				 
			
 
				-/* check if there is a velocity gap btw ctxs */
			
 
				-unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void);
			
 
				+/* check if there is a speed gap btw ctxs */
			
 
				+unsigned sc_hypervisor_check_speed_gap_btw_ctxs(void);
			
 
				 
			
 
				-/* check what triggers resizing (idle, velocity, etc.)*/
			
 
				+/* check what triggers resizing (idle, speed, etc.)*/
			
 
				 unsigned sc_hypervisor_get_resize_criteria();
			
 
				 
			
 
				 #ifdef __cplusplus
			
--- a/sc_hypervisor/src/hypervisor_policies/debit_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/debit_lp_policy.c
@@ -20,12 +20,12 @@
 
				 #include <math.h>
			
 
				 #include <sys/time.h>
			
 
				 
			
 
				-static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double w_in_s[ns][nw], int *workers, unsigned integer);
			
 
				+static double _glp_resolve(int ns, int nw, double speed[ns][nw], double w_in_s[ns][nw], int *workers, unsigned integer);
			
 
				 
			
 
				 
			
 
				-static unsigned _compute_max_velocity(int ns, int nw, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers)
			
 
				+static unsigned _compute_max_speed(int ns, int nw, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers)
			
 
				 {
			
 
				-	double velocity[ns][nw];
			
 
				+	double speed[ns][nw];
			
 
				 
			
 
				 	int *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs;
			
 
				 	
			
@@ -41,7 +41,7 @@ static unsigned _compute_max_velocity(int ns, int nw, double w_in_s[ns][nw], int
 
				 			int worker = workers == NULL ? w : workers[w];
			
 
				 
			
 
				 			enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
			
 
				-			velocity[s][w] = sc_hypervisor_get_velocity(sc_w, arch);
			
 
				+			speed[s][w] = sc_hypervisor_get_speed(sc_w, arch);
			
 
				 		}
			
 
				 	}
			
 
				 	
			
@@ -50,7 +50,7 @@ static unsigned _compute_max_velocity(int ns, int nw, double w_in_s[ns][nw], int
 
				 	struct timeval end_time;
			
 
				 	gettimeofday(&start_time, NULL);
			
 
				 
			
 
				-	double res = _glp_resolve(ns, nw, velocity, w_in_s, workers, 1);
			
 
				+	double res = _glp_resolve(ns, nw, speed, w_in_s, workers, 1);
			
 
				 	gettimeofday(&end_time, NULL);
			
 
				 
			
 
				 	long diff_s = end_time.tv_sec  - start_time.tv_sec;
			
@@ -68,7 +68,7 @@ static unsigned _compute_max_velocity(int ns, int nw, double w_in_s[ns][nw], int
 
				  */
			
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				 #include <glpk.h>
			
 
				-static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double w_in_s[ns][nw], int *workers, unsigned integer)
			
 
				+static double _glp_resolve(int ns, int nw, double speed[ns][nw], double w_in_s[ns][nw], int *workers, unsigned integer)
			
 
				 {
			
 
				 	int w, s;
			
 
				 	glp_prob *lp;
			
@@ -76,7 +76,7 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double w_in_
 
				 	lp = glp_create_prob();
			
 
				 	glp_set_prob_name(lp, "StarPU theoretical bound");
			
 
				 	glp_set_obj_dir(lp, GLP_MAX);
			
 
				-	glp_set_obj_name(lp, "total velocity");
			
 
				+	glp_set_obj_name(lp, "total speed");
			
 
				 
			
 
				 	{
			
 
				 		int ne = 2 * ns * nw /* worker execution time */
			
@@ -115,10 +115,10 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double w_in_
 
				 
			
 
				 
			
 
				 		int curr_row_idx = 0;
			
 
				-		/* Total worker velocity */
			
 
				+		/* Total worker speed */
			
 
				 		glp_add_rows(lp, 1);
			
 
				 
			
 
				-		/*sum(x[s][w]*velocity[s][w]) >= vmax */
			
 
				+		/*sum(x[s][w]*speed[s][w]) >= vmax */
			
 
				 		char name[32], title[64];
			
 
				 		starpu_worker_get_name(w, name, sizeof(name));
			
 
				 		snprintf(title, sizeof(title), "worker %s", name);
			
@@ -131,7 +131,7 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double w_in_
 
				 				/* x[s][w] */
			
 
				 				ia[n] = curr_row_idx + 1;
			
 
				 				ja[n] = s*nw+w+1;
			
 
				-				ar[n] = velocity[s][w];
			
 
				+				ar[n] = speed[s][w];
			
 
				 				n++;
			
 
				 			}
			
 
				 		}
			
@@ -231,7 +231,7 @@ static void _try_resizing(void)
 
				 	int nw = starpu_worker_get_count(); /* Number of different workers */
			
 
				 	
			
 
				 	double w_in_s[ns][nw];
			
 
				-	unsigned found_sol = _compute_max_velocity(ns, nw,  w_in_s, NULL, NULL);
			
 
				+	unsigned found_sol = _compute_max_speed(ns, nw,  w_in_s, NULL, NULL);
			
 
				 	/* if we did find at least one solution redistribute the resources */
			
 
				 	if(found_sol)
			
 
				 	{
			
@@ -282,9 +282,9 @@ static void debit_lp_handle_poped_task(unsigned sched_ctx, int worker, struct st
 
				         if(ret != EBUSY)
			
 
				 	{
			
 
				 		unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				-		if(criteria != SC_NOTHING && criteria == SC_VELOCITY)
			
 
				+		if(criteria != SC_NOTHING && criteria == SC_SPEED)
			
 
				 		{
			
 
				-			if(sc_hypervisor_check_velocity_gap_btw_ctxs())
			
 
				+			if(sc_hypervisor_check_speed_gap_btw_ctxs())
			
 
				 			{
			
 
				 				_try_resizing();
			
 
				 			}
			
@@ -318,7 +318,7 @@ static void debit_lp_end_ctx(unsigned sched_ctx)
 
				 	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
			
 
				 	int worker;
			
 
				 /* 	for(worker = 0; worker < 12; worker++) */
			
 
				-/* 		printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_velocity[worker]); */
			
 
				+/* 		printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_speed[worker]); */
			
 
				 
			
 
				 	return;
			
 
				 }
			
--- a/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
@@ -62,9 +62,9 @@ static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct sta
 
				 	if(ret != EBUSY)
			
 
				 	{
			
 
				 		unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				-		if(criteria != SC_NOTHING && criteria == SC_VELOCITY)
			
 
				+		if(criteria != SC_NOTHING && criteria == SC_SPEED)
			
 
				 		{
			
 
				-			if(sc_hypervisor_check_velocity_gap_btw_ctxs())
			
 
				+			if(sc_hypervisor_check_speed_gap_btw_ctxs())
			
 
				 			{
			
 
				 				_try_resizing();
			
 
				 			}
			
--- a/sc_hypervisor/src/hypervisor_policies/gflops_rate_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/gflops_rate_policy.c
@@ -58,10 +58,10 @@ static int* _get_workers_to_move(unsigned sender_sched_ctx, unsigned receiver_sc
 
				 	struct sc_hypervisor_wrapper* sender_sc_w = sc_hypervisor_get_wrapper(sender_sched_ctx);
			
 
				 	struct sc_hypervisor_wrapper* receiver_sc_w = sc_hypervisor_get_wrapper(receiver_sched_ctx);
			
 
				         int *workers = NULL;
			
 
				-        double v_receiver = sc_hypervisor_get_ctx_velocity(receiver_sc_w);
			
 
				+        double v_receiver = sc_hypervisor_get_ctx_speed(receiver_sc_w);
			
 
				         double receiver_remainig_flops = receiver_sc_w->remaining_flops;
			
 
				         double sender_exp_end = _get_exp_end(sender_sched_ctx);
			
 
				-        double sender_v_cpu = sc_hypervisor_get_velocity_per_worker_type(sender_sc_w, STARPU_CPU_WORKER);
			
 
				+        double sender_v_cpu = sc_hypervisor_get_speed_per_worker_type(sender_sc_w, STARPU_CPU_WORKER);
			
 
				         double v_for_rctx = (receiver_remainig_flops/(sender_exp_end - starpu_timing_now())) - v_receiver;
			
 
				 
			
 
				         int nworkers_needed = v_for_rctx/sender_v_cpu;
			
--- a/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
@@ -22,7 +22,7 @@
 
				 
			
 
				 struct ispeed_lp_data
			
 
				 {
			
 
				-	double **velocity;
			
 
				+	double **speed;
			
 
				 	double *flops;
			
 
				 	double **flops_on_w;
			
 
				 	int *workers;
			
@@ -38,7 +38,7 @@ static double _glp_resolve (int ns, int nw, double final_w_in_s[ns][nw],
 
				 {
			
 
				 	struct ispeed_lp_data *sd = (struct ispeed_lp_data *)specific_data;
			
 
				 
			
 
				-	double **velocity = sd->velocity;
			
 
				+	double **speed = sd->speed;
			
 
				 	double *flops = sd->flops;
			
 
				 	
			
 
				 	double **final_flops_on_w = sd->flops_on_w;
			
@@ -110,7 +110,7 @@ static double _glp_resolve (int ns, int nw, double final_w_in_s[ns][nw],
 
				 				/* nflosp[s][w] */
			
 
				 				ia[n] = curr_row_idx+s*nw+w+1;
			
 
				 				ja[n] = colnum(w, s);
			
 
				-				ar[n] = 1 / velocity[s][w];
			
 
				+				ar[n] = 1 / speed[s][w];
			
 
				 
			
 
				 				n++;
			
 
				 				
			
@@ -257,12 +257,12 @@ static double _glp_resolve (int ns, int nw, double final_w_in_s[ns][nw],
 
				 static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_in_s[ns][nw], double **flops_on_w, int *in_sched_ctxs, int *workers)
			
 
				 {
			
 
				 //	double flops[ns];
			
 
				-//	double velocity[ns][nw];
			
 
				+//	double speed[ns][nw];
			
 
				 	double *flops = (double*)malloc(ns*sizeof(double));
			
 
				-	double **velocity = (double **)malloc(ns*sizeof(double*));
			
 
				+	double **speed = (double **)malloc(ns*sizeof(double*));
			
 
				 	int i;
			
 
				 	for(i = 0; i < ns; i++)
			
 
				-		velocity[i] = (double*)malloc(nw*sizeof(double));
			
 
				+		speed[i] = (double*)malloc(nw*sizeof(double));
			
 
				 
			
 
				 	int *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs;
			
 
				 	
			
@@ -278,24 +278,24 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 
				 			w_in_s[s][w] = 0.0;
			
 
				 			int worker = workers == NULL ? w : workers[w];
			
 
				 
			
 
				-			velocity[s][w] = sc_hypervisor_get_velocity_per_worker(sc_w, worker);
			
 
				-			if(velocity[s][w] == -1.0)
			
 
				+			speed[s][w] = sc_hypervisor_get_speed_per_worker(sc_w, worker);
			
 
				+			if(speed[s][w] == -1.0)
			
 
				 			{
			
 
				 				enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
			
 
				-				velocity[s][w] = sc_hypervisor_get_velocity(sc_w, arch);
			
 
				+				speed[s][w] = sc_hypervisor_get_speed(sc_w, arch);
			
 
				 				if(arch == STARPU_CUDA_WORKER)
			
 
				 				{
			
 
				 					unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx);
			
 
				 					if(!worker_in_ctx)
			
 
				 					{
			
 
				-						double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker) / 1000;
			
 
				-						velocity[s][w] = (velocity[s][w] * transfer_velocity) / (velocity[s][w] + transfer_velocity);
			
 
				+						double transfer_speed = starpu_get_bandwidth_RAM_CUDA(worker) / 1000;
			
 
				+						speed[s][w] = (speed[s][w] * transfer_speed) / (speed[s][w] + transfer_speed);
			
 
				 					}
			
 
				 				}
			
 
				 
			
 
				 			}
			
 
				 			
			
 
				-//			printf("v[w%d][s%d] = %lf\n",w, s, velocity[s][w]);
			
 
				+//			printf("v[w%d][s%d] = %lf\n",w, s, speed[s][w]);
			
 
				 		}
			
 
				 		struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]);
			
 
				 		flops[s] = config->ispeed_ctx_sample/1000000000; /* in gflops */
			
@@ -310,7 +310,7 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 
				 	double tmin = 0.0;
			
 
				 
			
 
				         struct ispeed_lp_data specific_data;
			
 
				-        specific_data.velocity = velocity;
			
 
				+        specific_data.speed = speed;
			
 
				         specific_data.flops = flops;
			
 
				         specific_data.flops_on_w = flops_on_w;
			
 
				         specific_data.workers = workers;
			
@@ -319,8 +319,8 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 
				 								tmin, tmax, smallest_tmax, _glp_resolve);
			
 
				 
			
 
				 	for(i = 0; i < ns; i++)
			
 
				-		free(velocity[i]);
			
 
				-	free(velocity);
			
 
				+		free(speed[i]);
			
 
				+	free(speed);
			
 
				 	
			
 
				 	return found_sol;
			
 
				 }
			
@@ -390,9 +390,9 @@ static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker, struct s
 
				         if(ret != EBUSY)
			
 
				         {
			
 
				                 unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				-                if(criteria != SC_NOTHING && criteria == SC_VELOCITY)
			
 
				+                if(criteria != SC_NOTHING && criteria == SC_SPEED)
			
 
				                 {
			
 
				-                        if(sc_hypervisor_check_velocity_gap_btw_ctxs())
			
 
				+                        if(sc_hypervisor_check_speed_gap_btw_ctxs())
			
 
				                         {
			
 
				                                 _try_resizing();
			
 
				                         }
			
@@ -426,7 +426,7 @@ static void ispeed_lp_end_ctx(unsigned sched_ctx)
 
				 	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
			
 
				 	int worker;
			
 
				 /* 	for(worker = 0; worker < 12; worker++) */
			
 
				-/* 		printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_velocity[worker]); */
			
 
				+/* 		printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_speed[worker]); */
			
 
				 
			
 
				 	return;
			
 
				 }
			
--- a/sc_hypervisor/src/hypervisor_policies/ispeed_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/ispeed_policy.c
@@ -22,16 +22,16 @@ static unsigned _get_fastest_sched_ctx(void)
 
				 	int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
			
 
				 
			
 
				 	int fastest_sched_ctx = STARPU_NMAX_SCHED_CTXS;
			
 
				-	double curr_velocity = 0.0;
			
 
				-	double biggest_velocity = 0.0;
			
 
				+	double curr_speed = 0.0;
			
 
				+	double biggest_speed = 0.0;
			
 
				 	int i;
			
 
				 	for(i = 0; i < nsched_ctxs; i++)
			
 
				 	{
			
 
				-		curr_velocity = sc_hypervisor_get_ctx_velocity(sc_hypervisor_get_wrapper(sched_ctxs[i]));
			
 
				-		if( curr_velocity > biggest_velocity)
			
 
				+		curr_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(sched_ctxs[i]));
			
 
				+		if( curr_speed > biggest_speed)
			
 
				 		{
			
 
				 			fastest_sched_ctx = sched_ctxs[i];
			
 
				-			biggest_velocity = curr_velocity;
			
 
				+			biggest_speed = curr_speed;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -43,16 +43,16 @@ static unsigned _get_slowest_sched_ctx(void)
 
				 	int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
			
 
				 	int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
			
 
				 
			
 
				-	double smallest_velocity = sc_hypervisor_get_ctx_velocity(sc_hypervisor_get_wrapper(sched_ctxs[0]));
			
 
				-	unsigned slowest_sched_ctx = smallest_velocity == -1.0  ? STARPU_NMAX_SCHED_CTXS : sched_ctxs[0];
			
 
				-	double curr_velocity = 0.0;
			
 
				+	double smallest_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(sched_ctxs[0]));
			
 
				+	unsigned slowest_sched_ctx = smallest_speed == -1.0  ? STARPU_NMAX_SCHED_CTXS : sched_ctxs[0];
			
 
				+	double curr_speed = 0.0;
			
 
				 	int i;
			
 
				 	for(i = 1; i < nsched_ctxs; i++)
			
 
				 	{
			
 
				-		curr_velocity = sc_hypervisor_get_ctx_velocity(sc_hypervisor_get_wrapper(sched_ctxs[i]));
			
 
				-		if((curr_velocity < smallest_velocity || smallest_velocity == 0.0) && curr_velocity != -1.0)
			
 
				+		curr_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(sched_ctxs[i]));
			
 
				+		if((curr_speed < smallest_speed || smallest_speed == 0.0) && curr_speed != -1.0)
			
 
				 		{
			
 
				-			smallest_velocity = curr_velocity;
			
 
				+			smallest_speed = curr_speed;
			
 
				 			slowest_sched_ctx = sched_ctxs[i];
			
 
				 		}
			
 
				 	}
			
@@ -104,8 +104,8 @@ static int* _get_slowest_workers(unsigned sched_ctx, int *nworkers, enum starpu_
 
				 
			
 
				 					if(!considered)
			
 
				 					{
			
 
				-						double worker_velocity = sc_hypervisor_get_velocity_per_worker(sc_w, worker);
			
 
				-						if(worker_velocity != -1.0)
			
 
				+						double worker_speed = sc_hypervisor_get_speed_per_worker(sc_w, worker);
			
 
				+						if(worker_speed != -1.0)
			
 
				 						{
			
 
				 							/* the first iteration*/
			
 
				 							if(curr_workers[index] < 0)
			
@@ -119,9 +119,9 @@ static int* _get_slowest_workers(unsigned sched_ctx, int *nworkers, enum starpu_
 
				 							else if(config->priority[worker] ==
			
 
				 								config->priority[curr_workers[index]])
			
 
				 							{
			
 
				-								double curr_worker_velocity = sc_hypervisor_get_velocity_per_worker(sc_w, curr_workers[index]);
			
 
				-//								printf("speed[%d] = %lf speed[%d] = %lf\n", worker, worker_velocity, curr_workers[index], curr_worker_velocity);
			
 
				-								if(worker_velocity < curr_worker_velocity && curr_worker_velocity != -1.0)
			
 
				+								double curr_worker_speed = sc_hypervisor_get_speed_per_worker(sc_w, curr_workers[index]);
			
 
				+//								printf("speed[%d] = %lf speed[%d] = %lf\n", worker, worker_speed, curr_workers[index], curr_worker_speed);
			
 
				+								if(worker_speed < curr_worker_speed && curr_worker_speed != -1.0)
			
 
				 								{
			
 
				 									curr_workers[index] = worker;
			
 
				 								}
			
@@ -161,9 +161,9 @@ static void ispeed_handle_poped_task(unsigned sched_ctx, int worker, struct star
 
				 						double new_speed = 0.0;
			
 
				 						int i;
			
 
				 						for(i = 0; i < nworkers_to_move; i++)
			
 
				-							new_speed += sc_hypervisor_get_velocity_per_worker(sc_hypervisor_get_wrapper(fastest_sched_ctx), workers_to_move[i]);
			
 
				-						double fastest_speed = sc_hypervisor_get_ctx_velocity(sc_hypervisor_get_wrapper(fastest_sched_ctx));
			
 
				-						double slowest_speed = sc_hypervisor_get_ctx_velocity(sc_hypervisor_get_wrapper(slowest_sched_ctx));
			
 
				+							new_speed += sc_hypervisor_get_speed_per_worker(sc_hypervisor_get_wrapper(fastest_sched_ctx), workers_to_move[i]);
			
 
				+						double fastest_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(fastest_sched_ctx));
			
 
				+						double slowest_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(slowest_sched_ctx));
			
 
				 //						printf("fast_speed(%d) %lf slow_speed(%d) %lf new speed(%d) %lf \n", fastest_sched_ctx, fastest_speed, slowest_sched_ctx, 
			
 
				 //						       slowest_speed, workers_to_move[0], new_speed);
			
 
				 						if(fastest_speed != -1.0 && slowest_speed != -1.0 && (slowest_speed + new_speed) <= (fastest_speed - new_speed))
			
--- a/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
@@ -240,10 +240,10 @@ static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct sta
 
				 		}
			
 
				 
			
 
				 		unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				-		if(criteria != SC_NOTHING && criteria == SC_VELOCITY)
			
 
				+		if(criteria != SC_NOTHING && criteria == SC_SPEED)
			
 
				 		{
			
 
				 			
			
 
				-			if(sc_hypervisor_check_velocity_gap_btw_ctxs())
			
 
				+			if(sc_hypervisor_check_speed_gap_btw_ctxs())
			
 
				 			{
			
 
				 				_try_resizing();
			
 
				 			}
			
--- a/sc_hypervisor/src/policies_utils/lp_tools.c
+++ b/sc_hypervisor/src/policies_utils/lp_tools.c
@@ -41,13 +41,13 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 		int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			
 
				 		if(ncuda != 0)
			
 
				 		{
			
 
				-			v[i][0] = sc_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
			
 
				-			v[i][1] = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				+			v[i][0] = sc_hypervisor_get_speed(sc_w, STARPU_CUDA_WORKER);
			
 
				+			v[i][1] = sc_hypervisor_get_speed(sc_w, STARPU_CPU_WORKER);
			
 
				 		}
			
 
				 		else
			
 
				-			v[i][0] = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				+			v[i][0] = sc_hypervisor_get_speed(sc_w, STARPU_CPU_WORKER);
			
 
				 #else
			
 
				-		v[i][0] = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				+		v[i][0] = sc_hypervisor_get_speed(sc_w, STARPU_CPU_WORKER);
			
 
				 #endif // STARPU_USE_CUDA
			
 
				 		
			
 
				 		flops[i] = sc_w->remaining_flops < 0.0 ? 0.0 : sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
			
--- a/sc_hypervisor/src/policies_utils/policy_tools.c
+++ b/sc_hypervisor/src/policies_utils/policy_tools.c
@@ -365,7 +365,7 @@ double sc_hypervisor_get_slowest_ctx_exec_time(void)
 
				 
			
 
				 //		double elapsed_time  = (curr_time - sc_w->start_time)/1000000;
			
 
				 		struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx);
			
 
				-		double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/sc_hypervisor_get_ctx_velocity(sc_w);
			
 
				+		double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/sc_hypervisor_get_ctx_speed(sc_w);
			
 
				 		if(elapsed_time > slowest_time)
			
 
				 			slowest_time = elapsed_time;
			
 
				 
			
@@ -388,7 +388,7 @@ double sc_hypervisor_get_fastest_ctx_exec_time(void)
 
				 		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
			
 
				 
			
 
				 		struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx);
			
 
				-		double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/sc_hypervisor_get_ctx_velocity(sc_w);
			
 
				+		double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/sc_hypervisor_get_ctx_speed(sc_w);
			
 
				 		
			
 
				 		if(elapsed_time < fastest_time)
			
 
				 			fastest_time = elapsed_time;
			
@@ -446,8 +446,8 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
 
				 				{
			
 
				 					if(arch == STARPU_CUDA_WORKER)
			
 
				 					{
			
 
				-						double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker);
			
 
				-						transfer_time +=  (tp->data_size / transfer_velocity) / 1000. ;
			
 
				+						double transfer_speed = starpu_get_bandwidth_RAM_CUDA(worker);
			
 
				+						transfer_time +=  (tp->data_size / transfer_speed) / 1000. ;
			
 
				 						double latency = starpu_get_latency_RAM_CUDA(worker);
			
 
				 						transfer_time += latency/1000.;
			
 
				 						
			
@@ -457,8 +457,8 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
 
				 					{
			
 
				 						if(!starpu_sched_ctx_contains_type_of_worker(arch, tp->sched_ctx_id))
			
 
				 						{
			
 
				-							double transfer_velocity = starpu_get_bandwidth_CUDA_RAM(worker);
			
 
				-							transfer_time += (tp->data_size / transfer_velocity) / 1000. ;
			
 
				+							double transfer_speed = starpu_get_bandwidth_CUDA_RAM(worker);
			
 
				+							transfer_time += (tp->data_size / transfer_speed) / 1000. ;
			
 
				 							double latency = starpu_get_latency_CUDA_RAM(worker);
			
 
				 							transfer_time += latency / 1000.;
			
 
				 						}
			
@@ -489,8 +489,8 @@ unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/* check if there is a big velocity gap between the contexts */
			
 
				-unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
			
 
				+/* check if there is a big speed gap between the contexts */
			
 
				+unsigned sc_hypervisor_check_speed_gap_btw_ctxs(void)
			
 
				 {
			
 
				 	int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
			
 
				 	int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
			
@@ -537,8 +537,8 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 
				 			{
			
 
				 				sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				 				double v[nw];
			
 
				-				v[0] = sc_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
			
 
				-				v[1] = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				+				v[0] = sc_hypervisor_get_speed(sc_w, STARPU_CUDA_WORKER);
			
 
				+				v[1] = sc_hypervisor_get_speed(sc_w, STARPU_CPU_WORKER);
			
 
				 				
			
 
				 				optimal_v[i] = nworkers_per_type[i][0] * v[0] + nworkers_per_type[i][1]* v[1];
			
 
				 				_set_optimal_v(i, optimal_v[i]);
			
@@ -555,7 +555,7 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 
				 		{
			
 
				 			sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				 			
			
 
				-			double ctx_v = sc_hypervisor_get_ctx_velocity(sc_w);
			
 
				+			double ctx_v = sc_hypervisor_get_ctx_speed(sc_w);
			
 
				 			if(ctx_v == -1.0)
			
 
				 				return 0;
			
 
				 		}
			
@@ -564,19 +564,19 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 
				 		{
			
 
				 			sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				 			
			
 
				-			double ctx_v = sc_hypervisor_get_ctx_velocity(sc_w);
			
 
				+			double ctx_v = sc_hypervisor_get_ctx_speed(sc_w);
			
 
				 			if(ctx_v != -1.0 && ((ctx_v < 0.8*optimal_v[i]) || ctx_v > 1.2*optimal_v[i])) 
			
 
				 				return 1;
			
 
				 		}
			
 
				 	}
			
 
				-	else /* if we have not been able to compute a theoretical velocity consider the env variable
			
 
				-		SC_MAX_VELOCITY_GAP and compare the speed of the contexts, whenever the difference
			
 
				+	else /* if we have not been able to compute a theoretical speed consider the env variable
			
 
				+		SC_MAX_SPEED_GAP and compare the speed of the contexts, whenever the difference
			
 
				 		btw them is greater than the max value the function returns true */
			
 
				 	{
			
 
				 		for(i = 0; i < nsched_ctxs; i++)
			
 
				 		{
			
 
				 			sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				-			double ctx_v = sc_hypervisor_get_ctx_velocity(sc_w);
			
 
				+			double ctx_v = sc_hypervisor_get_ctx_speed(sc_w);
			
 
				 			if(ctx_v != -1.0)
			
 
				 			{
			
 
				 				for(j = 0; j < nsched_ctxs; j++)
			
@@ -588,11 +588,11 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 
				 							return 1;
			
 
				 						
			
 
				 						other_sc_w = sc_hypervisor_get_wrapper(sched_ctxs[j]);
			
 
				-						double other_ctx_v = sc_hypervisor_get_ctx_velocity(other_sc_w);
			
 
				+						double other_ctx_v = sc_hypervisor_get_ctx_speed(other_sc_w);
			
 
				 						if(other_ctx_v != -1.0)
			
 
				 						{
			
 
				 							double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v;
			
 
				-							double max_vel = _get_max_velocity_gap();
			
 
				+							double max_vel = _get_max_speed_gap();
			
 
				 							if(gap > max_vel)
			
 
				 								return 1;
			
 
				 						}
			
@@ -614,7 +614,7 @@ unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker)
 
				 		if(criteria == SC_IDLE)
			
 
				 			return sc_hypervisor_check_idle(sched_ctx, worker);
			
 
				 		else
			
 
				-			return sc_hypervisor_check_velocity_gap_btw_ctxs();
			
 
				+			return sc_hypervisor_check_speed_gap_btw_ctxs();
			
 
				 	}
			
 
				 	else
			
 
				 		return 0;
			
--- a/sc_hypervisor/src/policies_utils/speed.c
+++ b/sc_hypervisor/src/policies_utils/speed.c
@@ -19,7 +19,7 @@
 
				 #include <math.h>
			
 
				 
			
 
				 
			
 
				-double sc_hypervisor_get_ctx_velocity(struct sc_hypervisor_wrapper* sc_w)
			
 
				+double sc_hypervisor_get_ctx_speed(struct sc_hypervisor_wrapper* sc_w)
			
 
				 {
			
 
				 	struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx);
			
 
				         double elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
			
@@ -43,7 +43,7 @@ double sc_hypervisor_get_ctx_velocity(struct sc_hypervisor_wrapper* sc_w)
 
				 	return -1.0;
			
 
				 }
			
 
				 
			
 
				-double sc_hypervisor_get_velocity_per_worker(struct sc_hypervisor_wrapper *sc_w, unsigned worker)
			
 
				+double sc_hypervisor_get_speed_per_worker(struct sc_hypervisor_wrapper *sc_w, unsigned worker)
			
 
				 {
			
 
				 	if(!starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx))
			
 
				 		return -1.0;
			
@@ -74,8 +74,8 @@ double sc_hypervisor_get_velocity_per_worker(struct sc_hypervisor_wrapper *sc_w,
 
				 /* /\* 			if(!worker_in_ctx) *\/ */
			
 
				 /* /\* 			{ *\/ */
			
 
				 
			
 
				-/* /\* 				double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker); *\/ */
			
 
				-/* /\* 				elapsed_time +=  (elapsed_data_used / transfer_velocity) / 1000000 ; *\/ */
			
 
				+/* /\* 				double transfer_speed = starpu_get_bandwidth_RAM_CUDA(worker); *\/ */
			
 
				+/* /\* 				elapsed_time +=  (elapsed_data_used / transfer_speed) / 1000000 ; *\/ */
			
 
				 /* /\* 			} *\/ */
			
 
				 /* 			double latency = starpu_get_latency_RAM_CUDA(worker); */
			
 
				 /* //			printf("%d/%d: latency %lf elapsed_time before %lf ntasks %d\n", worker, sc_w->sched_ctx, latency, elapsed_time, elapsed_tasks); */
			
@@ -93,8 +93,8 @@ double sc_hypervisor_get_velocity_per_worker(struct sc_hypervisor_wrapper *sc_w,
 
				 }
			
 
				 
			
 
				 
			
 
				-/* compute an average value of the cpu/cuda velocity */
			
 
				-double sc_hypervisor_get_velocity_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch)
			
 
				+/* compute an average value of the cpu/cuda speed */
			
 
				+double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch)
			
 
				 {
			
 
				 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
 
				         int worker;
			
@@ -103,7 +103,7 @@ double sc_hypervisor_get_velocity_per_worker_type(struct sc_hypervisor_wrapper*
 
				 	if(workers->init_iterator)
			
 
				                 workers->init_iterator(workers, &it);
			
 
				 
			
 
				-	double velocity = 0.0;
			
 
				+	double speed = 0.0;
			
 
				 	unsigned nworkers = 0;
			
 
				         while(workers->has_next(workers, &it))
			
 
				 	{
			
@@ -111,51 +111,51 @@ double sc_hypervisor_get_velocity_per_worker_type(struct sc_hypervisor_wrapper*
 
				                 enum starpu_worker_archtype req_arch = starpu_worker_get_type(worker);
			
 
				                 if(arch == req_arch)
			
 
				                 {
			
 
				-			double _vel = sc_hypervisor_get_velocity_per_worker(sc_w, worker);
			
 
				+			double _vel = sc_hypervisor_get_speed_per_worker(sc_w, worker);
			
 
				 			if(_vel > 0.0)
			
 
				 			{
			
 
				-				velocity += _vel;
			
 
				+				speed += _vel;
			
 
				 				nworkers++;
			
 
				 
			
 
				 			}
			
 
				 		}
			
 
				 	}			
			
 
				 
			
 
				-	velocity = ((nworkers != 0 && velocity > 0.1) ? velocity / nworkers : -1.0);
			
 
				-	if(velocity != -1.0)
			
 
				+	speed = ((nworkers != 0 && speed > 0.1) ? speed / nworkers : -1.0);
			
 
				+	if(speed != -1.0)
			
 
				 	{
			
 
				 		if(arch == STARPU_CUDA_WORKER)
			
 
				-			sc_w->ref_velocity[0] = sc_w->ref_velocity[0] > 1.0 ? (sc_w->ref_velocity[0] + velocity) / 2 : velocity; 
			
 
				+			sc_w->ref_speed[0] = sc_w->ref_speed[0] > 1.0 ? (sc_w->ref_speed[0] + speed) / 2 : speed; 
			
 
				 		else
			
 
				-			sc_w->ref_velocity[1] = sc_w->ref_velocity[1] > 1.0 ? (sc_w->ref_velocity[1] + velocity) / 2 : velocity; 
			
 
				+			sc_w->ref_speed[1] = sc_w->ref_speed[1] > 1.0 ? (sc_w->ref_speed[1] + speed) / 2 : speed; 
			
 
				 	}
			
 
				-	return velocity;
			
 
				+	return speed;
			
 
				 }
			
 
				 
			
 
				-/* compute an average value of the cpu/cuda old velocity */
			
 
				-double sc_hypervisor_get_ref_velocity_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch)
			
 
				+/* compute an average value of the cpu/cuda old speed */
			
 
				+double sc_hypervisor_get_ref_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch)
			
 
				 {
			
 
				-	if(arch == STARPU_CUDA_WORKER && sc_w->ref_velocity[0] > 0.0)
			
 
				-		return sc_w->ref_velocity[0];
			
 
				+	if(arch == STARPU_CUDA_WORKER && sc_w->ref_speed[0] > 0.0)
			
 
				+		return sc_w->ref_speed[0];
			
 
				 	else
			
 
				-		if(arch == STARPU_CPU_WORKER && sc_w->ref_velocity[1] > 0.0)
			
 
				-			return sc_w->ref_velocity[1];
			
 
				+		if(arch == STARPU_CPU_WORKER && sc_w->ref_speed[1] > 0.0)
			
 
				+			return sc_w->ref_speed[1];
			
 
				 
			
 
				 	return -1.0;
			
 
				 }
			
 
				 
			
 
				-double sc_hypervisor_get_velocity(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch)
			
 
				+double sc_hypervisor_get_speed(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch)
			
 
				 {
			
 
				 
			
 
				-	double velocity = sc_hypervisor_get_velocity_per_worker_type(sc_w, arch);
			
 
				-	if(velocity == -1.0)
			
 
				+	double speed = sc_hypervisor_get_speed_per_worker_type(sc_w, arch);
			
 
				+	if(speed == -1.0)
			
 
				 	{
			
 
				-		velocity = sc_hypervisor_get_ref_velocity_per_worker_type(sc_w, arch);
			
 
				+		speed = sc_hypervisor_get_ref_speed_per_worker_type(sc_w, arch);
			
 
				 	}
			
 
				-	if(velocity == -1.0)
			
 
				+	if(speed == -1.0)
			
 
				 	{
			
 
				-		velocity = arch == STARPU_CPU_WORKER ? 5.0 : 100.0;
			
 
				+		speed = arch == STARPU_CPU_WORKER ? 5.0 : 100.0;
			
 
				 	}
			
 
				        
			
 
				-	return velocity;
			
 
				+	return speed;
			
 
				 }
			
--- a/sc_hypervisor/src/sc_hypervisor.c
+++ b/sc_hypervisor/src/sc_hypervisor.c
@@ -134,10 +134,10 @@ struct starpu_sched_ctx_performance_counters* sc_hypervisor_init(struct sc_hyper
 
				 {
			
 
				 	hypervisor.min_tasks = 0;
			
 
				 	hypervisor.nsched_ctxs = 0;
			
 
				-	char* vel_gap = getenv("SC_HYPERVISOR_MAX_VELOCITY_GAP");
			
 
				-	hypervisor.max_velocity_gap = vel_gap ? atof(vel_gap) : SC_VELOCITY_MAX_GAP_DEFAULT;
			
 
				+	char* vel_gap = getenv("SC_HYPERVISOR_MAX_SPEED_GAP");
			
 
				+	hypervisor.max_speed_gap = vel_gap ? atof(vel_gap) : SC_SPEED_MAX_GAP_DEFAULT;
			
 
				 	char* crit =  getenv("SC_HYPERVISOR_TRIGGER_RESIZE");
			
 
				-	hypervisor.resize_criteria = !crit ? SC_IDLE : strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_VELOCITY : SC_NOTHING);
			
 
				+	hypervisor.resize_criteria = !crit ? SC_IDLE : strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_SPEED : SC_NOTHING);
			
 
				 
			
 
				 	starpu_pthread_mutex_init(&act_hypervisor_mutex, NULL);
			
 
				 	hypervisor.start_executing_time = starpu_timing_now();
			
@@ -164,8 +164,8 @@ struct starpu_sched_ctx_performance_counters* sc_hypervisor_init(struct sc_hyper
 
				 		starpu_pthread_mutex_init(&hypervisor.sched_ctx_w[i].mutex, NULL);
			
 
				 		hypervisor.optimal_v[i] = 0.0;
			
 
				 
			
 
				-		hypervisor.sched_ctx_w[i].ref_velocity[0] = -1.0;
			
 
				-		hypervisor.sched_ctx_w[i].ref_velocity[1] = -1.0;
			
 
				+		hypervisor.sched_ctx_w[i].ref_speed[0] = -1.0;
			
 
				+		hypervisor.sched_ctx_w[i].ref_speed[1] = -1.0;
			
 
				 
			
 
				 		int j;
			
 
				 		for(j = 0; j < STARPU_NMAXWORKERS; j++)
			
@@ -233,8 +233,8 @@ static void _print_current_time()
 
				 			{
			
 
				 				struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
			
 
				 				
			
 
				-				double cpu_speed = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				-				double cuda_speed = sc_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
			
 
				+				double cpu_speed = sc_hypervisor_get_speed(sc_w, STARPU_CPU_WORKER);
			
 
				+				double cuda_speed = sc_hypervisor_get_speed(sc_w, STARPU_CUDA_WORKER);
			
 
				 				int ncpus = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CPU_WORKER);
			
 
				 				int ncuda = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CUDA_WORKER);
			
 
				 				fprintf(stdout, "%d: cpu_v = %lf cuda_v = %lf ncpus = %d ncuda = %d\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed, ncpus, ncuda);
			
@@ -354,9 +354,9 @@ void sc_hypervisor_unregister_ctx(unsigned sched_ctx)
 
				 }
			
 
				 
			
 
				 
			
 
				-double _get_max_velocity_gap()
			
 
				+double _get_max_speed_gap()
			
 
				 {
			
 
				-	return hypervisor.max_velocity_gap;
			
 
				+	return hypervisor.max_speed_gap;
			
 
				 }
			
 
				 
			
 
				 unsigned sc_hypervisor_get_resize_criteria()
			
--- a/sc_hypervisor/src/sc_hypervisor_intern.h
+++ b/sc_hypervisor/src/sc_hypervisor_intern.h
@@ -17,7 +17,7 @@
 
				 #include <sc_hypervisor.h>
			
 
				 #include <common/uthash.h>
			
 
				 
			
 
				-#define SC_VELOCITY_MAX_GAP_DEFAULT 50
			
 
				+#define SC_SPEED_MAX_GAP_DEFAULT 50
			
 
				 
			
 
				 struct size_request
			
 
				 {
			
@@ -78,8 +78,8 @@ struct sc_hypervisor
 
				 	/* time when the hypervisor started */
			
 
				 	double start_executing_time;
			
 
				 
			
 
				-	/* max velocity diff btw ctx before triggering resizing */
			
 
				-	double max_velocity_gap;
			
 
				+	/* max speed diff btw ctx before triggering resizing */
			
 
				+	double max_speed_gap;
			
 
				 	
			
 
				 	/* criteria to trigger resizing */
			
 
				 	unsigned resize_criteria;
			
@@ -101,7 +101,7 @@ void _add_config(unsigned sched_ctx);
 
				 
			
 
				 void _remove_config(unsigned sched_ctx);
			
 
				 
			
 
				-double _get_max_velocity_gap();
			
 
				+double _get_max_speed_gap();
			
 
				 
			
 
				 double _get_optimal_v(unsigned sched_ctx);
			
 
				 void _set_optimal_v(unsigned sched_ctx, double optimal_v);