10 years ago · ba3e4d7403
--- a/include/starpu_sched_ctx_hypervisor.h
+++ b/include/starpu_sched_ctx_hypervisor.h
@@ -43,6 +43,8 @@ void starpu_sched_ctx_notify_hypervisor_exists(void);
 
				 
			
 
				 unsigned starpu_sched_ctx_check_if_hypervisor_exists(void);
			
 
				 
			
 
				+void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double start_sample);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/include/starpu_scheduler.h
+++ b/include/starpu_scheduler.h
@@ -33,6 +33,7 @@ struct starpu_sched_policy
 
				 	void (*deinit_sched)(unsigned sched_ctx_id);
			
 
				 
			
 
				 	int (*push_task)(struct starpu_task *);
			
 
				+	double (*simulate_push_task)(struct starpu_task *);
			
 
				 	void (*push_task_notify)(struct starpu_task *, int workerid, int perf_workerid, unsigned sched_ctx_id);
			
 
				 	struct starpu_task *(*pop_task)(unsigned sched_ctx_id);
			
 
				 	struct starpu_task *(*pop_every_task)(unsigned sched_ctx_id);
			
--- a/sc_hypervisor/include/sc_hypervisor.h
+++ b/sc_hypervisor/include/sc_hypervisor.h
@@ -144,6 +144,7 @@ void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *l
 
				 double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx);
			
 
				 
			
 
				 void sc_hypervisor_print_overhead();
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/sc_hypervisor/src/Makefile.am
+++ b/sc_hypervisor/src/Makefile.am
@@ -37,7 +37,8 @@ libsc_hypervisor_la_SOURCES = 				\
 
				 	hypervisor_policies/teft_lp_policy.c		\
			
 
				 	hypervisor_policies/ispeed_policy.c		\
			
 
				 	hypervisor_policies/ispeed_lp_policy.c		\
			
 
				-	hypervisor_policies/throughput_lp_policy.c
			
 
				+	hypervisor_policies/throughput_lp_policy.c	\
			
 
				+	hypervisor_policies/hard_coded_policy.c
			
 
				 
			
 
				 noinst_HEADERS = sc_hypervisor_intern.h		
			
 
				 
			
--- a/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
@@ -20,7 +20,7 @@
 
				 #include <starpu_config.h>
			
 
				 #include <sys/time.h>
			
 
				 
			
 
				-int resize_no = 0;
			
 
				+unsigned long resize_no = 0;
			
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				 static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
			
 
				 {
			
@@ -53,8 +53,8 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 
				 	long diff_s = end_time.tv_sec  - start_time.tv_sec;
			
 
				 	long diff_us = end_time.tv_usec  - start_time.tv_usec;
			
 
				 	
			
 
				-	__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000;
			
 
				-	
			
 
				+	__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000.0;
			
 
				+
			
 
				 	if(vmax != -1.0)
			
 
				 	{
			
 
				 /* 		int nworkers_per_ctx_rounded[ns][nw]; */
			
@@ -124,9 +124,8 @@ static int _get_first_level(unsigned *sched_ctxs, int nsched_ctxs, unsigned *fir
 
				 
			
 
				 static void _resize(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
			
 
				 {
			
 
				-#ifdef STARPU_USE_FXT
			
 
				 	starpu_fxt_trace_user_event(resize_no);
			
 
				-#endif
			
 
				+
			
 
				 	unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
			
 
				 	if(nhierarchy_levels > 1)
			
 
				 	{
			
@@ -243,9 +242,19 @@ static void _resize_if_speed_diff(unsigned sched_ctx, int worker)
 
				 			_resize(NULL, -1, NULL, -1);
			
 
				 		}
			
 
				 	}
			
 
				-	else if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
			
 
				+	else 
			
 
				 	{
			
 
				-		_resize(NULL, -1, NULL, -1);
			
 
				+		unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				+		if(criteria != SC_NOTHING && criteria == SC_IDLE)
			
 
				+		{
			
 
				+
			
 
				+			_resize(NULL, -1, NULL, -1);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
			
 
				+				_resize(NULL, -1, NULL, -1);
			
 
				+		}
			
 
				 	}
			
 
				 	return;
			
 
				 }
			
@@ -253,6 +262,7 @@ static void _resize_if_speed_diff(unsigned sched_ctx, int worker)
 
				 static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, 
			
 
				 				      __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint)
			
 
				 {
			
 
				+	if(worker == -2) return;
			
 
				 	unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				 	if(criteria != SC_NOTHING && criteria == SC_SPEED)
			
 
				 	{
			
@@ -320,7 +330,6 @@ static void feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 
				 		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				 		if(ret != EBUSY)
			
 
				 		{
			
 
				-//			printf("trigger idle \n");
			
 
				 			_resize_leaves(worker);
			
 
				 			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 		}
			
--- a/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
@@ -119,13 +119,23 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 
				 
			
 
				 
			
 
				 	double w_in_s[ns][nw];
			
 
				-//			double flops_on_w[ns][nw];
			
 
				+
			
 
				 	double **flops_on_w = (double**)malloc(ns*sizeof(double*));
			
 
				 	int i;
			
 
				 	for(i = 0; i < ns; i++)
			
 
				 		flops_on_w[i] = (double*)malloc(nw*sizeof(double));
			
 
				-	
			
 
				+
			
 
				+	struct timeval start_time;
			
 
				+	struct timeval end_time;
			
 
				+	gettimeofday(&start_time, NULL);
			
 
				 	unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw,  w_in_s, flops_on_w, curr_sched_ctxs, workers);
			
 
				+	gettimeofday(&end_time, NULL);
			
 
				+	
			
 
				+	long diff_s = end_time.tv_sec  - start_time.tv_sec;
			
 
				+	long diff_us = end_time.tv_usec  - start_time.tv_usec;
			
 
				+	
			
 
				+	__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000.0;
			
 
				+
			
 
				 	/* if we did find at least one solution redistribute the resources */
			
 
				 	if(found_sol)
			
 
				 	{
			
--- a/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
@@ -79,7 +79,7 @@ static void _size_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int
 
				 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
			
 
				 	int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
			
 
				 	int nt = 0; /* Number of different kinds of tasks */
			
 
				-	starpu_pthread_mutex_lock(&mutex);
			
 
				+
			
 
				 	struct sc_hypervisor_policy_task_pool * tp;
			
 
				 	for (tp = task_pools; tp; tp = tp->next)
			
 
				 		nt++;
			
@@ -105,12 +105,16 @@ static void _size_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int
 
				 	double possible_tmax = sc_hypervisor_lp_get_tmax(nw, workers); 
			
 
				 	double smallest_tmax = possible_tmax / 3;
			
 
				 	double tmax = possible_tmax * ns;
			
 
				-	double tmin = smallest_tmax;
			
 
				+	double tmin = 0.0;
			
 
				+	unsigned found_sol = 0;
			
 
				+
			
 
				+	if(nt > 0 && tmax > 0.0)
			
 
				+	{
			
 
				+		found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
			
 
				+							       tmin, tmax, smallest_tmax, _compute_workers_distrib);
			
 
				+	}
			
 
				 
			
 
				-	unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
			
 
				-								tmin, tmax, smallest_tmax, _compute_workers_distrib);
			
 
				 
			
 
				-	starpu_pthread_mutex_unlock(&mutex);
			
 
				 	/* if we did find at least one solution redistribute the resources */
			
 
				 	if(found_sol)
			
 
				 	{
			
@@ -147,7 +151,7 @@ static void size_if_required()
 
				 
			
 
				 		if(ready_to_size)
			
 
				 		{
			
 
				-			_size_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers);
			
 
				+			_size_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers);			
			
 
				 			sc_hypervisor_free_size_req();
			
 
				 		}
			
 
				 		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
@@ -167,6 +171,7 @@ static void teft_lp_handle_submitted_job(struct starpu_codelet *cl, unsigned sch
 
				 static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
			
 
				 {
			
 
				 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
			
 
				+	if(ns < 2) return;
			
 
				 	int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
			
 
				 
			
 
				 	sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
			
@@ -186,7 +191,6 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 
				 	for (tp = task_pools; tp; tp = tp->next)
			
 
				 		nt++;
			
 
				 	
			
 
				-	
			
 
				 	double w_in_s[ns][nw];
			
 
				 	double **tasks_per_worker=(double**)malloc(nw*sizeof(double*));
			
 
				 	int i;
			
@@ -205,12 +209,27 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 
				 	   compute the nr of flops and not the tasks */
			
 
				         /*lp computes it in s but it's converted to ms just before return */
			
 
				 	double possible_tmax = sc_hypervisor_lp_get_tmax(nw, NULL);
			
 
				-	double smallest_tmax = 0.0;
			
 
				-	double tmax = possible_tmax * ns;
			
 
				+	double smallest_tmax = possible_tmax/2.0;
			
 
				+	double tmax = possible_tmax + smallest_tmax;
			
 
				 	double tmin = smallest_tmax;
			
 
				+	unsigned found_sol = 0;
			
 
				 
			
 
				-	unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
			
 
				-								tmin, tmax, smallest_tmax, _compute_workers_distrib);
			
 
				+	if(nt > 0 && tmax > 0.0)
			
 
				+	{
			
 
				+		struct timeval start_time;
			
 
				+		struct timeval end_time;
			
 
				+		gettimeofday(&start_time, NULL);
			
 
				+		
			
 
				+		
			
 
				+		found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
			
 
				+							       tmin, tmax, smallest_tmax, _compute_workers_distrib);
			
 
				+		gettimeofday(&end_time, NULL);
			
 
				+		
			
 
				+		long diff_s = end_time.tv_sec  - start_time.tv_sec;
			
 
				+		long diff_us = end_time.tv_usec  - start_time.tv_usec;
			
 
				+		
			
 
				+		__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000.0;
			
 
				+	}
			
 
				 //	starpu_pthread_mutex_unlock(&mutex);
			
 
				 	
			
 
				 	/* if we did find at least one solution redistribute the resources */
			
@@ -234,61 +253,48 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 
				 	free(tasks_per_worker);
			
 
				 }
			
 
				 
			
 
				-static void teft_lp_handle_poped_task(unsigned sched_ctx, __attribute__((unused))int worker, struct starpu_task *task, uint32_t footprint)
			
 
				+static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
			
 
				 {
			
 
				-	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
			
 
				-
			
 
				-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				-	if(ret != EBUSY)
			
 
				+	if(worker > -2)
			
 
				 	{
			
 
				-		if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops)
			
 
				-		{
			
 
				-			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				-		if(criteria != SC_NOTHING && criteria == SC_SPEED)
			
 
				+		struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
			
 
				+		
			
 
				+		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				+		if(ret != EBUSY)
			
 
				 		{
			
 
				-			
			
 
				-			if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
			
 
				+			unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				+			if(criteria != SC_NOTHING && criteria == SC_SPEED)
			
 
				 			{
			
 
				-				_try_resizing(NULL, -1, NULL, -1);
			
 
				+				
			
 
				+				if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
			
 
				+				{
			
 
				+					_try_resizing(NULL, -1, NULL, -1);
			
 
				+				}
			
 
				 			}
			
 
				+			
			
 
				+			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 		}
			
 
				-
			
 
				-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 	}
			
 
				 	/* too expensive to take this mutex and correct value of the number of tasks is not compulsory */
			
 
				-//	starpu_pthread_mutex_lock(&mutex);
			
 
				+	starpu_pthread_mutex_lock(&mutex);
			
 
				 	sc_hypervisor_policy_remove_task_from_pool(task, footprint, &task_pools);
			
 
				-//	starpu_pthread_mutex_unlock(&mutex);
			
 
				+	starpu_pthread_mutex_unlock(&mutex);
			
 
				 
			
 
				 }
			
 
				 
			
 
				 static void teft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
			
 
				 {
			
 
				-	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
			
 
				-
			
 
				-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				-	if(ret != EBUSY)
			
 
				+	unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				+	if(criteria != SC_NOTHING)// && criteria == SC_IDLE)
			
 
				 	{
			
 
				-		if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops)
			
 
				+		struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
			
 
				+		
			
 
				+		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				+		if(ret != EBUSY)
			
 
				 		{
			
 
				+			_try_resizing(NULL, -1, NULL, -1);
			
 
				 			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				-		if(criteria != SC_NOTHING && criteria == SC_IDLE)
			
 
				-		{
			
 
				-			
			
 
				-			if(sc_hypervisor_check_idle(sched_ctx, worker))
			
 
				-			{
			
 
				-				_try_resizing(NULL, -1, NULL, -1);
			
 
				-			}
			
 
				 		}
			
 
				-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 	}
			
 
				 	return;
			
 
				 }
			
--- a/sc_hypervisor/src/policies_utils/dichotomy.c
+++ b/sc_hypervisor/src/policies_utils/dichotomy.c
@@ -23,60 +23,95 @@
 
				    still has solutions */
			
 
				 unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw], unsigned solve_lp_integer, void *specific_data,
			
 
				 					    double tmin, double tmax, double smallest_tmax,
			
 
				-					    double (*lp_estimated_distrib_func)(int ns, int nw, double draft_w_in_s[ns][nw], 
			
 
				+					    double (*lp_estimated_distrib_func)(int ns, int nw, double draft_w_in_s[ns][nw],
			
 
				 									     unsigned is_integer, double tmax, void *specifc_data))
			
 
				 {
			
 
				 	double res = 1.0;
			
 
				 	unsigned has_sol = 0;
			
 
				-	double old_tmax = 0.0;
			
 
				+	double tmid = tmax;
			
 
				 	unsigned found_sol = 0;
			
 
				-
			
 
				 	struct timeval start_time;
			
 
				 	struct timeval end_time;
			
 
				 	int nd = 0;
			
 
				+	double found_tmid = tmax;
			
 
				+	double potential_tmid = tmid;
			
 
				+	double threashold = tmax*0.1;
			
 
				 	gettimeofday(&start_time, NULL);
			
 
				-
			
 
				+	
			
 
				 	/* we fix tmax and we do not treat it as an unknown
			
 
				 	   we just vary by dichotomy its values*/
			
 
				-	while(tmax > 1.0)
			
 
				+	while(1)
			
 
				 	{
			
 
				 		/* find solution and save the values in draft tables
			
 
				 		   only if there is a solution for the system we save them
			
 
				 		   in the proper table */
			
 
				-		res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, tmax, specific_data);
			
 
				-		if(res != 0.0)
			
 
				+		printf("solving for tmid %lf \n", tmid);
			
 
				+		res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, tmid, specific_data);
			
 
				+		if(res < 0.0)
			
 
				+		{
			
 
				+			printf("timeouted no point in continuing\n");
			
 
				+			found_sol = 0;
			
 
				+			break;
			
 
				+		}
			
 
				+		else if(res != 0.0)
			
 
				 		{
			
 
				 			has_sol = 1;
			
 
				 			found_sol = 1;
			
 
				+			found_tmid = tmid;
			
 
				+			printf("found sol for tmid %lf \n", tmid);
			
 
				 		}
			
 
				 		else
			
 
				+		{
			
 
				+			printf("failed for tmid %lf \n", tmid);
			
 
				+			if(tmid == tmax)
			
 
				+			{
			
 
				+				printf("failed for tmid %lf from the first time\n", tmid);
			
 
				+				break;
			
 
				+			}
			
 
				 			has_sol = 0;
			
 
				+		}
			
 
				 
			
 
				-		/* if we have a solution with this tmax try a smaller value
			
 
				-		   bigger than the old min */
			
 
				+		/* if we have a solution with this tmid try a smaller value
			
 
				+		   bigger than the old one */
			
 
				 		if(has_sol)
			
 
				 		{
			
 
				-			if(old_tmax != 0.0 && (old_tmax - tmax) < 0.5)
			
 
				+			/* if the difference between tmax and tmid is smaller than
			
 
				+			   a given threashold there is no point in searching more 
			
 
				+			   precision */
			
 
				+			tmax = tmid;
			
 
				+			potential_tmid = tmin + ((tmax-tmin)/2.0); 
			
 
				+			if((tmax - potential_tmid) < threashold)
			
 
				+			{
			
 
				+				printf("had_sol but stop doing it for tmin %lf tmax %lf and potential tmid %lf \n", tmin, tmax, potential_tmid);
			
 
				 				break;
			
 
				-			old_tmax = tmax;
			
 
				+			}
			
 
				+			printf("try for smaller potential tmid %lf \n", potential_tmid);
			
 
				 		}
			
 
				-		else /*else try a bigger one but smaller than the old tmax */
			
 
				+		else /*else try a bigger one */
			
 
				 		{
			
 
				-			tmin = tmax;
			
 
				-			if(old_tmax != 0.0)
			
 
				-				tmax = old_tmax;
			
 
				+			/* if we previously found a good sol and we keep failing
			
 
				+			   we stop searching for a better sol */
			
 
				+			tmin = tmid;
			
 
				+			potential_tmid = tmin + ((tmax-tmin)/2.0); 
			
 
				+			if((tmax - potential_tmid) < threashold)
			
 
				+			{
			
 
				+				printf("didn't have sol but stop doing it for tmin %lf tmax %lf and potential tmid %lf \n", tmin, tmax, potential_tmid);
			
 
				+				break;
			
 
				+			}
			
 
				+			printf("try for bigger potential tmid %lf \n", potential_tmid);
			
 
				 		}
			
 
				-		if(tmin == tmax) break;
			
 
				-		tmax = sc_hypervisor_lp_find_tmax(tmin, tmax);
			
 
				 
			
 
				-		if(tmax < smallest_tmax)
			
 
				-		{
			
 
				-			tmax = old_tmax;
			
 
				-			tmin = smallest_tmax;
			
 
				-			tmax = sc_hypervisor_lp_find_tmax(tmin, tmax);
			
 
				-		}
			
 
				+		tmid = potential_tmid;
			
 
				+
			
 
				 		nd++;
			
 
				 	}
			
 
				+	printf("solve againd for tmid %lf \n", found_tmid);
			
 
				+	if(found_sol)
			
 
				+	{
			
 
				+		res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, found_tmid, specific_data);
			
 
				+		found_sol = (res != 0.0);
			
 
				+	}
			
 
				+	printf("found sol %d for tmid %lf\n", found_sol, found_tmid);
			
 
				 	gettimeofday(&end_time, NULL);
			
 
				 
			
 
				 	long diff_s = end_time.tv_sec  - start_time.tv_sec;
			
--- a/sc_hypervisor/src/policies_utils/lp_programs.c
+++ b/sc_hypervisor/src/policies_utils/lp_programs.c
@@ -30,6 +30,7 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
				 	int t, w, s;
			
 
				 	glp_prob *lp;
			
 
				 
			
 
				+	
			
 
				 	lp = glp_create_prob();
			
 
				 	glp_set_prob_name(lp, "StarPU theoretical bound");
			
 
				 	glp_set_obj_dir(lp, GLP_MAX);
			
@@ -57,12 +58,12 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
				 				char name[32];
			
 
				 				snprintf(name, sizeof(name), "w%dt%dn", w, t);
			
 
				 				glp_set_col_name(lp, colnum(w, t), name);
			
 
				-/* 				if (integer) */
			
 
				-/*                                 { */
			
 
				-/*                                         glp_set_col_kind(lp, colnum(w, t), GLP_IV); */
			
 
				-/* 					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0); */
			
 
				-/*                                 } */
			
 
				-/* 				else */
			
 
				+				if (is_integer)
			
 
				+                                {
			
 
				+                                        glp_set_col_kind(lp, colnum(w, t), GLP_IV);
			
 
				+					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0);
			
 
				+                                }
			
 
				+				else
			
 
				 					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0.0, 0.0);
			
 
				 			}
			
 
				 		for(s = 0; s < ns; s++)
			
@@ -115,7 +116,10 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
				 						ia[n] = curr_row_idx+s*nw+w+1;
			
 
				 						ja[n] = colnum(w, t);
			
 
				 						if (isnan(times[w][t]))
			
 
				+						{
			
 
				+							printf("had to insert huge val \n");
			
 
				 							ar[n] = 1000000000.;
			
 
				+						}
			
 
				 						else
			
 
				 							ar[n] = times[w][t];
			
 
				 						n++;
			
@@ -126,7 +130,12 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
				 				ja[n] = nw*nt+s*nw+w+1;
			
 
				 				ar[n] = (-1) * tmax;
			
 
				 				n++;
			
 
				-				glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
			
 
				+				if (is_integer)
			
 
				+                                {
			
 
				+					glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0, 0);
			
 
				+                                }
			
 
				+                                else
			
 
				+					glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
			
 
				 			}
			
 
				 		}
			
 
				 
			
@@ -184,10 +193,10 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
				 	parm.msg_lev = GLP_MSG_OFF;
			
 
				 	int ret = glp_simplex(lp, &parm);
			
 
				 
			
 
				-/* 	char str[50]; */
			
 
				-/* 	sprintf(str, "outpu_lp_%g", tmax); */
			
 
				+	/* char str[50]; */
			
 
				+	/* sprintf(str, "outpu_lp_%g", tmax); */
			
 
				 
			
 
				-/* 	glp_print_sol(lp, str); */
			
 
				+	/* glp_print_sol(lp, str); */
			
 
				 
			
 
				 	if (ret)
			
 
				 	{
			
@@ -213,12 +222,15 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
				                 glp_iocp iocp;
			
 
				                 glp_init_iocp(&iocp);
			
 
				                 iocp.msg_lev = GLP_MSG_OFF;
			
 
				+//		iocp.tm_lim = 1000;
			
 
				 		glp_intopt(lp, &iocp);
			
 
				 		int stat = glp_mip_status(lp);
			
 
				 		/* if we don't have a solution return */
			
 
				-		if(stat == GLP_NOFEAS)
			
 
				+		if(stat == GLP_NOFEAS || stat == GLP_ETMLIM || stat == GLP_UNDEF)
			
 
				 		{
			
 
				 //			printf("no int sol in tmax = %lf\n", tmax);
			
 
				+			if(stat == GLP_ETMLIM || stat == GLP_UNDEF)
			
 
				+				printf("timeout \n");
			
 
				 			glp_delete_prob(lp);
			
 
				 			lp = NULL;
			
 
				 			return 0.0;
			
@@ -228,12 +240,13 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
				 	double res = glp_get_obj_val(lp);
			
 
				 	for (w = 0; w < nw; w++)
			
 
				 		for (t = 0; t < nt; t++)
			
 
				-/* 			if (integer) */
			
 
				-/* 				tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t)); */
			
 
				-/*                         else */
			
 
				+			if (is_integer)
			
 
				+				tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t));
			
 
				+                        else
			
 
				 				tasks[w][t] = glp_get_col_prim(lp, colnum(w, t));
			
 
				-	
			
 
				-//	printf("for tmax %lf\n", tmax);
			
 
				+
			
 
				+	/* printf("**********************************************\n"); */
			
 
				+	/* printf("for tmax %lf\n", tmax); */
			
 
				 	for(s = 0; s < ns; s++)
			
 
				 		for(w = 0; w < nw; w++)
			
 
				 		{
			
@@ -243,8 +256,8 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
				 				w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
			
 
				 //			printf("w %d in ctx %d = %lf\n", w, s, w_in_s[s][w]);
			
 
				 		}
			
 
				-//	printf("\n");
			
 
				-
			
 
				+	/* printf("\n"); */
			
 
				+	/* printf("**********************************************\n"); */
			
 
				 	glp_delete_prob(lp);
			
 
				 	return res;
			
 
				 }
			
@@ -286,14 +299,14 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				 			if (integer)
			
 
				 			{
			
 
				 				glp_set_col_kind(lp, n, GLP_IV);
			
 
				-				if(sc_w->consider_max)
			
 
				-				{
			
 
				-					if(config->max_nworkers == 0)
			
 
				-						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, config->max_nworkers);
			
 
				-					else
			
 
				-						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, config->max_nworkers);
			
 
				-				}
			
 
				-				else
			
 
				+				/* if(sc_w->consider_max) */
			
 
				+				/* { */
			
 
				+				/* 	if(config->max_nworkers == 0) */
			
 
				+				/* 		glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, config->max_nworkers); */
			
 
				+				/* 	else */
			
 
				+				/* 		glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, config->max_nworkers); */
			
 
				+				/* } */
			
 
				+				/* else */
			
 
				 				{
			
 
				 					if(total_nw[w] == 0)
			
 
				 						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, total_nw[w]);
			
@@ -303,17 +316,17 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				-				if(sc_w->consider_max)
			
 
				-				{
			
 
				-					if(config->max_nworkers == 0)
			
 
				-						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, config->max_nworkers*1.0);
			
 
				-					else
			
 
				-						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, config->max_nworkers*1.0);
			
 
				-#ifdef STARPU_SC_HYPERVISOR_DEBUG
			
 
				-					printf("%d****************consider max %lf in lp\n", sched_ctxs[s], config->max_nworkers*1.0);
			
 
				-#endif
			
 
				-				}
			
 
				-				else
			
 
				+/* 				if(sc_w->consider_max) */
			
 
				+/* 				{ */
			
 
				+/* 					if(config->max_nworkers == 0) */
			
 
				+/* 						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, config->max_nworkers*1.0); */
			
 
				+/* 					else */
			
 
				+/* 						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, config->max_nworkers*1.0); */
			
 
				+/* #ifdef STARPU_SC_HYPERVISOR_DEBUG */
			
 
				+/* 					printf("%d****************consider max %lf in lp\n", sched_ctxs[s], config->max_nworkers*1.0); */
			
 
				+/* #endif */
			
 
				+/* 				} */
			
 
				+/* 				else */
			
 
				 				{
			
 
				 					if(total_nw[w] == 0)
			
 
				 						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, total_nw[w]*1.0);
			
@@ -418,27 +431,13 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				 //		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				 		n++;
			
 
				 
			
 
				-//		if(last_vmax == -1.0)
			
 
				-		{
			
 
				-			/*sum(all gpus) = 3*/
			
 
				-			if(w == 0)
			
 
				-				glp_set_row_bnds(lp, ns+w+1, GLP_UP, 0, total_nw[0]);
			
 
				-			
			
 
				-			/*sum(all cpus) = 9*/
			
 
				-			if(w == 1)
			
 
				-				glp_set_row_bnds(lp, ns+w+1, GLP_UP, 0, total_nw[1]);
			
 
				-
			
 
				-		}
			
 
				-/* 		else */
			
 
				-/* 		{ */
			
 
				-/* 			/\*sum(all gpus) = 3*\/ */
			
 
				-/* 			if(w == 0) */
			
 
				-/* 				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]); */
			
 
				-			
			
 
				-/* 			/\*sum(all cpus) = 9*\/ */
			
 
				-/* 			if(w == 1) */
			
 
				-/* 				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]); */
			
 
				-/* 		} */
			
 
				+		/*sum(all gpus) = 3*/
			
 
				+		if(w == 0)
			
 
				+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
			
 
				+		
			
 
				+		/*sum(all cpus) = 9*/
			
 
				+		if(w == 1)
			
 
				+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
			
 
				 	}
			
 
				 
			
 
				 	STARPU_ASSERT(n == ne);
			
--- a/sc_hypervisor/src/policies_utils/lp_tools.c
+++ b/sc_hypervisor/src/policies_utils/lp_tools.c
@@ -41,7 +41,7 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				 		int w;
			
 
				 		for(w = 0; w < nw; w++)
			
 
				-			v[i][w] = 5.0;//sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
			
 
				+			v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
			
 
				 
			
 
				 		double ready_flops = starpu_sched_ctx_get_nready_flops(sc_w->sched_ctx);
			
 
				 		unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
			
@@ -624,6 +624,7 @@ void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rou
 
				 				
			
 
				 		_lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move, 
			
 
				 					   &nw_move, workers_move);
			
 
				+
			
 
				 		if(nw_move > 0)
			
 
				 			sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
			
 
				 	}
			
@@ -635,8 +636,8 @@ int _lp_get_unwanted_workers(int *workers_add, int nw_add, unsigned sched_ctx, i
 
				 	int worker;
			
 
				 
			
 
				 	struct starpu_sched_ctx_iterator it;
			
 
				-
			
 
				 	workers->init_iterator(workers, &it);
			
 
				+
			
 
				 	while(workers->has_next(workers, &it))
			
 
				 	{
			
 
				 		worker = workers->get_next(workers, &it);
			
--- a/sc_hypervisor/src/policies_utils/policy_tools.c
+++ b/sc_hypervisor/src/policies_utils/policy_tools.c
@@ -28,8 +28,8 @@ static int _compute_priority(unsigned sched_ctx)
 
				 	int worker;
			
 
				 
			
 
				 	struct starpu_sched_ctx_iterator it;
			
 
				-
			
 
				 	workers->init_iterator(workers, &it);
			
 
				+
			
 
				 	while(workers->has_next(workers, &it))
			
 
				 	{
			
 
				 		worker = workers->get_next(workers, &it);
			
@@ -117,8 +117,8 @@ int* sc_hypervisor_get_idlest_workers(unsigned sched_ctx, int *nworkers, enum st
 
				 	int considered = 0;
			
 
				 
			
 
				 	struct starpu_sched_ctx_iterator it;
			
 
				-
			
 
				 	workers->init_iterator(workers, &it);
			
 
				+
			
 
				 	for(index = 0; index < *nworkers; index++)
			
 
				 	{
			
 
				 		while(workers->has_next(workers, &it))
			
@@ -183,7 +183,6 @@ int sc_hypervisor_get_movable_nworkers(struct sc_hypervisor_policy_config *confi
 
				 	int worker;
			
 
				 
			
 
				 	struct starpu_sched_ctx_iterator it;
			
 
				-
			
 
				 	workers->init_iterator(workers, &it);
			
 
				 	while(workers->has_next(workers, &it))
			
 
				 	{
			
@@ -409,6 +408,9 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
 
				 {
			
 
				         struct sc_hypervisor_policy_task_pool *tp;
			
 
				         int w, t;
			
 
				+	for(w = 0; w < nw; w++)
			
 
				+		for(t = 0; t < nt; t++)
			
 
				+			times[w][t] = NAN;
			
 
				         for (w = 0; w < nw; w++)
			
 
				         {
			
 
				                 for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
@@ -422,7 +424,6 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
 
				 			else
			
 
				 			{
			
 
				                                 times[w][t] = (length / 1000.);
			
 
				-
			
 
				 				double transfer_time = 0.0;
			
 
				 				unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, tp->sched_ctx_id);
			
 
				 				enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
			
@@ -431,18 +432,21 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
 
				 					if(arch == STARPU_CUDA_WORKER)
			
 
				 					{
			
 
				 						double transfer_speed = starpu_transfer_bandwidth(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker));
			
 
				-						transfer_time +=  (tp->data_size / transfer_speed) / 1000. ;
			
 
				+						if(transfer_speed > 0.0)
			
 
				+							transfer_time +=  (tp->data_size / transfer_speed) / 1000. ;
			
 
				+	
			
 
				 						double latency = starpu_transfer_latency(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker));
			
 
				 						transfer_time += latency/1000.;
			
 
				-						
			
 
				-						
			
 
				+//						transfer_time *=4;
			
 
				 					}
			
 
				 					else if(arch == STARPU_CPU_WORKER)
			
 
				 					{
			
 
				 						if(!starpu_sched_ctx_contains_type_of_worker(arch, tp->sched_ctx_id))
			
 
				 						{
			
 
				 							double transfer_speed = starpu_transfer_bandwidth(starpu_worker_get_memory_node(worker), STARPU_MAIN_RAM);
			
 
				-							transfer_time += (tp->data_size / transfer_speed) / 1000. ;
			
 
				+							if(transfer_speed > 0.0)
			
 
				+								transfer_time += (tp->data_size / transfer_speed) / 1000. ;
			
 
				+
			
 
				 							double latency = starpu_transfer_latency(starpu_worker_get_memory_node(worker), STARPU_MAIN_RAM);
			
 
				 							transfer_time += latency / 1000.;
			
 
				 						}
			
@@ -465,7 +469,7 @@ unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker)
 
				 	{
			
 
				 		if(sc_w->idle_time[worker] > config->max_idle[worker])
			
 
				 		{
			
 
				-//			printf("w%d/ctx%d: current idle %lf all idle %lf max_idle %lf\n", worker, sched_ctx, idle, idle_time, config->max_idle[worker]);
			
 
				+//			printf("w%d/ctx%d: current idle %lf  max_idle %lf\n", worker, sched_ctx, sc_w->idle_time[worker], config->max_idle[worker]);
			
 
				 			return 1;
			
 
				 		}
			
 
				 	}
			
@@ -547,7 +551,8 @@ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(unsigned *sched_ctxs_in, int ns_
 
				 			
			
 
				 			double ctx_v = sc_hypervisor_get_ctx_speed(sc_w);
			
 
				 			ctx_v = ctx_v < 0.01 ? 0.0 : ctx_v;
			
 
				-			if(ctx_v != -1.0 && ((ctx_v < 0.8*optimal_v[i]) || ctx_v > 1.2*optimal_v[i])) 
			
 
				+			double max_vel = _get_max_speed_gap();
			
 
				+			if(ctx_v != -1.0 && ((ctx_v < (1-max_vel)*optimal_v[i]) || ctx_v > (1+max_vel)*optimal_v[i])) 
			
 
				 			{
			
 
				 				return 1;
			
 
				 			}
			
--- a/sc_hypervisor/src/policies_utils/speed.c
+++ b/sc_hypervisor/src/policies_utils/speed.c
@@ -127,13 +127,12 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 
				 		int worker;
			
 
				 		
			
 
				 		struct starpu_sched_ctx_iterator it;
			
 
				+		workers->init_iterator(workers, &it);
			
 
				 		
			
 
				 		double speed = 0.0;
			
 
				 		unsigned nworkers = 0;
			
 
				 		double all_workers_flops = 0.0;
			
 
				 		double max_workers_idle_time = 0.0;
			
 
				-
			
 
				-		workers->init_iterator(workers, &it);
			
 
				 		while(workers->has_next(workers, &it))
			
 
				 		{
			
 
				 			worker = workers->get_next(workers, &it);
			
@@ -168,6 +167,7 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 
				 		
			
 
				 		if(speed != -1.0)
			
 
				 		{
			
 
				+
			
 
				 			/* if ref_speed started being corrupted bc of the old bad distribution
			
 
				 			   register only the last frame otherwise make the average with the speed 
			
 
				 			   behavior of the application until now */
			
@@ -233,8 +233,8 @@ double sc_hypervisor_get_avg_speed(enum starpu_worker_archtype arch)
 
				 		int worker;
			
 
				 		
			
 
				 		struct starpu_sched_ctx_iterator it;
			
 
				-
			
 
				 		workers->init_iterator(workers, &it);
			
 
				+
			
 
				 		while(workers->has_next(workers, &it))
			
 
				 		{
			
 
				 			worker = workers->get_next(workers, &it);
			
--- a/sc_hypervisor/src/policies_utils/task_pool.c
+++ b/sc_hypervisor/src/policies_utils/task_pool.c
@@ -69,8 +69,7 @@ void sc_hypervisor_policy_remove_task_from_pool(struct starpu_task *task, uint32
 
				 				free(tp);
			
 
				 				tp = NULL;
			
 
				 				
			
 
				-				if(next_tp)
			
 
				-					*task_pools = next_tp;
			
 
				+				*task_pools = next_tp;
			
 
				 				
			
 
				 			}
			
 
				 			else
			
--- a/sc_hypervisor/src/sc_hypervisor.c
+++ b/sc_hypervisor/src/sc_hypervisor.c
@@ -42,6 +42,7 @@ extern struct sc_hypervisor_policy ispeed_lp_policy;
 
				 extern struct sc_hypervisor_policy throughput_lp_policy;
			
 
				 #endif // STARPU_HAVE_GLPK_
			
 
				 extern struct sc_hypervisor_policy ispeed_policy;
			
 
				+extern struct sc_hypervisor_policy hard_coded_policy;
			
 
				 
			
 
				 
			
 
				 static struct sc_hypervisor_policy *predefined_policies[] =
			
@@ -55,7 +56,8 @@ static struct sc_hypervisor_policy *predefined_policies[] =
 
				 	&throughput_lp_policy,
			
 
				 #endif // STARPU_HAVE_GLPK_H
			
 
				 	&gflops_rate_policy,
			
 
				-	&ispeed_policy
			
 
				+	&ispeed_policy,
			
 
				+	&hard_coded_policy
			
 
				 };
			
 
				 
			
 
				 static void _load_hypervisor_policy(struct sc_hypervisor_policy *policy)
			
@@ -164,7 +166,7 @@ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
 
				 	hypervisor.resize_criteria = !crit ? SC_IDLE : strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_SPEED : SC_NOTHING);
			
 
				 
			
 
				 	starpu_pthread_mutex_init(&act_hypervisor_mutex, NULL);
			
 
				-	hypervisor.start_executing_time = starpu_timing_now();
			
 
				+//	hypervisor.start_executing_time = starpu_timing_now();
			
 
				 
			
 
				 	int i;
			
 
				 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
@@ -254,8 +256,17 @@ void sc_hypervisor_start_resize(unsigned sched_ctx)
 
				 
			
 
				 static void _print_current_time()
			
 
				 {
			
 
				-	if(!getenv("SC_HYPERVISOR_STOP_PRINT"))
			
 
				+	char* stop_print = getenv("SC_HYPERVISOR_STOP_PRINT");
			
 
				+        int sp = stop_print ? atoi(stop_print) : 1;
			
 
				+
			
 
				+	if(!sp)
			
 
				 	{
			
 
				+		if(hypervisor.start_executing_time == 0.0)
			
 
				+		{
			
 
				+			fprintf(stdout, "Time: %lf\n", -1.0);
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				 		double curr_time = starpu_timing_now();
			
 
				 		double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */
			
 
				 		fprintf(stdout, "Time: %lf\n", elapsed_time);
			
@@ -332,7 +343,7 @@ void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops)
 
				 
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].total_flops = total_flops;
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].remaining_flops = total_flops;
			
 
				-	hypervisor.resize[sched_ctx] = 1;
			
 
				+	hypervisor.resize[sched_ctx] = 0;//1;
			
 
				 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 }
			
 
				 
			
@@ -412,6 +423,13 @@ void sc_hypervisor_unregister_ctx(unsigned sched_ctx)
 
				 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 }
			
 
				 
			
 
				+void sc_hypervisor_reset_react_start_time(unsigned sched_ctx, unsigned now)
			
 
				+{
			
 
				+	if(now)
			
 
				+		hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
			
 
				+	starpu_sched_ctx_update_start_resizing_sample(sched_ctx, starpu_timing_now());
			
 
				+}
			
 
				+
			
 
				 
			
 
				 double _get_max_speed_gap()
			
 
				 {
			
@@ -441,8 +459,8 @@ int sc_hypervisor_get_nworkers_ctx(unsigned sched_ctx, enum starpu_worker_archty
 
				 	int worker;
			
 
				 
			
 
				 	struct starpu_sched_ctx_iterator it;
			
 
				-
			
 
				 	workers->init_iterator(workers, &it);
			
 
				+
			
 
				 	while(workers->has_next(workers, &it))
			
 
				 	{
			
 
				 		worker = workers->get_next(workers, &it);
			
@@ -475,8 +493,8 @@ double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrappe
 
				 	int worker;
			
 
				 	
			
 
				 	struct starpu_sched_ctx_iterator it;
			
 
				-
			
 
				 	workers->init_iterator(workers, &it);
			
 
				+		
			
 
				 	while(workers->has_next(workers, &it))
			
 
				 	{
			
 
				 		worker = workers->get_next(workers, &it);
			
@@ -493,8 +511,8 @@ double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_
 
				 	int worker;
			
 
				 	
			
 
				 	struct starpu_sched_ctx_iterator it;
			
 
				-
			
 
				 	workers->init_iterator(workers, &it);
			
 
				+		
			
 
				 	while(workers->has_next(workers, &it))
			
 
				 	{
			
 
				 		worker = workers->get_next(workers, &it);
			
@@ -981,12 +999,11 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs,
 
				 		int worker;
			
 
				 		
			
 
				 		struct starpu_sched_ctx_iterator it;
			
 
				+		workers->init_iterator(workers, &it);
			
 
				 		
			
 
				 		double elapsed_time_worker[STARPU_NMAXWORKERS];
			
 
				 		double norm_idle_time = 0.0;
			
 
				 		double end_time  = starpu_timing_now();
			
 
				-
			
 
				-		workers->init_iterator(workers, &it);
			
 
				 		while(workers->has_next(workers, &it))
			
 
				 		{
			
 
				 			double idle_time = 0.0;
			
@@ -1184,6 +1201,7 @@ unsigned choose_ctx_to_steal(int worker)
 
				 /* notifies the hypervisor that the worker spent another cycle in idle time */
			
 
				 static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
			
 
				 {
			
 
				+	if(hypervisor.start_executing_time == 0.0) return;
			
 
				 	struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
			
 
				 	sc_w->current_idle_time[worker] += idle_time;
			
 
				 	
			
@@ -1202,7 +1220,7 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 
				 	if(hypervisor.resize[sched_ctx] && hypervisor.policy.handle_idle_cycle)
			
 
				 	{
			
 
				 		if(sc_w->hyp_react_start_time == 0.0)
			
 
				-			sc_w->hyp_react_start_time = starpu_timing_now();
			
 
				+			sc_hypervisor_reset_react_start_time(sched_ctx, 1);
			
 
				 		
			
 
				 		double curr_time = starpu_timing_now();
			
 
				 		double elapsed_time = (curr_time - sc_w->hyp_react_start_time) / 1000000.0; /* in seconds */
			
@@ -1236,13 +1254,15 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 
				 			if(idle_everywhere)
			
 
				 			{
			
 
				 				double hyp_overhead_start = starpu_timing_now();
			
 
				-				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
			
 
				+				if(elapsed_time > (sc_w->config->time_sample*2))
			
 
				+					hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
			
 
				 				double hyp_overhead_end = starpu_timing_now();
			
 
				 				hyp_overhead += (hyp_overhead_end - hyp_overhead_start);
			
 
				+				if(elapsed_time > (sc_w->config->time_sample*2))
			
 
				+					sc_hypervisor_reset_react_start_time(sched_ctx, 1);
			
 
				+				else
			
 
				+					sc_hypervisor_reset_react_start_time(sched_ctx, 0);
			
 
				 			}
			
 
				-
			
 
				-
			
 
				-			sc_w->hyp_react_start_time = starpu_timing_now();
			
 
				 		}
			
 
				 	}
			
 
				 	return;
			
@@ -1261,6 +1281,11 @@ void _update_real_start_time_hierarchically(unsigned sched_ctx)
 
				 /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */
			
 
				 static void notify_poped_task(unsigned sched_ctx, int worker)
			
 
				 {
			
 
				+	if(hypervisor.start_executing_time == 0.0)
			
 
				+		hypervisor.start_executing_time = starpu_timing_now();
			
 
				+	if(!hypervisor.resize[sched_ctx])
			
 
				+		hypervisor.resize[sched_ctx] = 1;
			
 
				+
			
 
				 	if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].real_start_time == 0.0)
			
 
				 		_update_real_start_time_hierarchically(sched_ctx);
			
 
				 
			
@@ -1358,18 +1383,25 @@ static void notify_post_exec_task(struct starpu_task *task, size_t data_size, ui
 
				 		if(hypervisor.policy.handle_poped_task)
			
 
				 		{	
			
 
				 			if(hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time == 0.0)
			
 
				-				hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
			
 
				+				sc_hypervisor_reset_react_start_time(sched_ctx, 1);
			
 
				 
			
 
				 			double curr_time = starpu_timing_now();
			
 
				 			double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time) / 1000000.0; /* in seconds */
			
 
				 			if(hypervisor.sched_ctx_w[sched_ctx].sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > hypervisor.sched_ctx_w[sched_ctx].config->time_sample)
			
 
				 			{
			
 
				 				double hyp_overhead_start = starpu_timing_now();
			
 
				-				hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
			
 
				+				if(elapsed_time > (hypervisor.sched_ctx_w[sched_ctx].config->time_sample*2))
			
 
				+					hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
			
 
				 				double hyp_overhead_end = starpu_timing_now();
			
 
				 				hyp_overhead += (hyp_overhead_end - hyp_overhead_start);
			
 
				-				hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
			
 
				+				if(elapsed_time > (hypervisor.sched_ctx_w[sched_ctx].config->time_sample*2))
			
 
				+					sc_hypervisor_reset_react_start_time(sched_ctx, 1);
			
 
				+				else
			
 
				+					sc_hypervisor_reset_react_start_time(sched_ctx, 0);
			
 
				 			}
			
 
				+			else
			
 
				+                                /* no need to consider resizing, just remove the task from the pool if the strategy requires it*/
			
 
				+				hypervisor.policy.handle_poped_task(sched_ctx, -2, task, footprint);
			
 
				 		}
			
 
				 	}
			
 
				 /* 	starpu_pthread_mutex_lock(&act_hypervisor_mutex); */
			
@@ -1434,8 +1466,9 @@ static void notify_submitted_job(struct starpu_task *task, uint32_t footprint, s
 
				 	hypervisor.sched_ctx_w[sched_ctx].submitted_flops += task->flops;
			
 
				 	starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
			
 
				 
			
 
				-	if(hypervisor.policy.handle_submitted_job && !type_of_tasks_known)
			
 
				-		hypervisor.policy.handle_submitted_job(task->cl, task->sched_ctx, footprint, data_size);
			
 
				+	/* signaled by the user - no need to wait for them */
			
 
				+	/* if(hypervisor.policy.handle_submitted_job && !type_of_tasks_known) */
			
 
				+	/* 	hypervisor.policy.handle_submitted_job(task->cl, task->sched_ctx, footprint, data_size); */
			
 
				 }
			
 
				 
			
 
				 static void notify_empty_ctx(unsigned sched_ctx_id, struct starpu_task *task)
			
@@ -1650,3 +1683,4 @@ void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *l
 
				 	}
			
 
				 	return;
			
 
				 }
			
 
				+
			
--- a/src/common/barrier_counter.c
+++ b/src/common/barrier_counter.c
@@ -96,7 +96,6 @@ int _starpu_barrier_counter_increment(struct _starpu_barrier_counter *barrier_c,
 
				 
			
 
				 	barrier->reached_start++;
			
 
				 	barrier->reached_flops += flops;
			
 
				-
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
			
 
				 	return 0;
			
 
				 }
			
--- a/src/common/fxt.c
+++ b/src/common/fxt.c
@@ -224,7 +224,7 @@ void starpu_fxt_stop_profiling()
 
				 
			
 
				 #endif // STARPU_USE_FXT
			
 
				 
			
 
				-void starpu_fxt_trace_user_event(unsigned long code STARPU_ATTRIBUTE_UNUSED)
			
 
				+void starpu_fxt_trace_user_event(unsigned long code)
			
 
				 {
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 	_STARPU_TRACE_USER_EVENT(code);
			
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -30,6 +30,7 @@ static double hyp_start_sample[STARPU_NMAX_SCHED_CTXS];
 
				 static double hyp_start_allow_sample[STARPU_NMAX_SCHED_CTXS];
			
 
				 static double flops[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS];
			
 
				 static size_t data_size[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS];
			
 
				+static double hyp_actual_start_sample[STARPU_NMAX_SCHED_CTXS];
			
 
				 
			
 
				 static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *config);
			
 
				 static void _starpu_sched_ctx_add_workers_to_master(unsigned sched_ctx_id, int *workerids, int nworkers, int new_master);
			
@@ -472,9 +473,12 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 
				 	STARPU_ASSERT(nworkers_ctx <= nworkers);
			
 
				 
			
 
				 	STARPU_PTHREAD_MUTEX_INIT(&sched_ctx->empty_ctx_mutex, NULL);
			
 
				-
			
 
				 	starpu_task_list_init(&sched_ctx->empty_ctx_tasks);
			
 
				 
			
 
				+	STARPU_PTHREAD_MUTEX_INIT(&sched_ctx->waiting_tasks_mutex, NULL);
			
 
				+	starpu_task_list_init(&sched_ctx->waiting_tasks);
			
 
				+
			
 
				+
			
 
				 	sched_ctx->sched_policy = policy ? (struct starpu_sched_policy*)malloc(sizeof(struct starpu_sched_policy)) : NULL;
			
 
				 	sched_ctx->is_initial_sched = is_initial_sched;
			
 
				 	sched_ctx->name = sched_ctx_name;
			
@@ -815,6 +819,7 @@ static void _starpu_delete_sched_ctx(struct _starpu_sched_ctx *sched_ctx)
 
				 	}
			
 
				 
			
 
				 	STARPU_PTHREAD_MUTEX_DESTROY(&sched_ctx->empty_ctx_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_DESTROY(&sched_ctx->waiting_tasks_mutex);
			
 
				 	sched_ctx->id = STARPU_NMAX_SCHED_CTXS;
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 	hwloc_bitmap_free(sched_ctx->hwloc_workers_set);
			
@@ -954,6 +959,55 @@ void _starpu_fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx
 
				 	return;
			
 
				 
			
 
				 }
			
 
				+unsigned _starpu_can_push_task(struct _starpu_sched_ctx *sched_ctx, struct starpu_task *task)
			
 
				+{
			
 
				+	if(sched_ctx->sched_policy->simulate_push_task)
			
 
				+	{
			
 
				+		const char *env_window_size = getenv("STARPU_WINDOW_TIME_SIZE");
			
 
				+		if(!env_window_size) return 1;
			
 
				+		double window_size = atof(env_window_size);
			
 
				+		
			
 
				+		starpu_pthread_rwlock_t *changing_ctx_mutex = _starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx->id);
			
 
				+		STARPU_PTHREAD_RWLOCK_RDLOCK(changing_ctx_mutex);
			
 
				+		double expected_end = sched_ctx->sched_policy->simulate_push_task(task);
			
 
				+		STARPU_PTHREAD_RWLOCK_UNLOCK(changing_ctx_mutex);
			
 
				+		
			
 
				+		double expected_len = 0.0; 
			
 
				+		if(hyp_actual_start_sample[sched_ctx->id] != 0.0)
			
 
				+			expected_len = expected_end - hyp_actual_start_sample[sched_ctx->id] ;
			
 
				+		else 
			
 
				+		{
			
 
				+			printf("%d: sc start is 0.0\n", sched_ctx->id);
			
 
				+			expected_len = expected_end - starpu_timing_now();
			
 
				+		}
			
 
				+		if(expected_len < 0.0)
			
 
				+			printf("exp len negative %lf \n", expected_len);
			
 
				+		expected_len /= 1000000.0;
			
 
				+//		printf("exp_end %lf start %lf expected_len %lf \n", expected_end, hyp_actual_start_sample[sched_ctx->id], expected_len);
			
 
				+		if(expected_len > (window_size + 0.2*window_size))
			
 
				+			return 0;
			
 
				+	}
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+void _starpu_fetch_task_from_waiting_list(struct _starpu_sched_ctx *sched_ctx)
			
 
				+{
			
 
				+	if(starpu_task_list_empty(&sched_ctx->waiting_tasks))
			
 
				+		return;
			
 
				+	struct starpu_task *old_task = starpu_task_list_back(&sched_ctx->waiting_tasks);
			
 
				+	if(_starpu_can_push_task(sched_ctx, old_task))
			
 
				+	{
			
 
				+		old_task = starpu_task_list_pop_back(&sched_ctx->waiting_tasks);
			
 
				+		int ret =  _starpu_push_task_to_workers(old_task);
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void _starpu_push_task_to_waiting_list(struct _starpu_sched_ctx *sched_ctx, struct starpu_task *task)
			
 
				+{
			
 
				+	starpu_task_list_push_front(&sched_ctx->waiting_tasks, task);
			
 
				+	return;
			
 
				+}
			
 
				 
			
 
				 void starpu_sched_ctx_set_priority_on_level(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority)
			
 
				 {
			
@@ -1053,6 +1107,7 @@ void starpu_sched_ctx_remove_workers(int *workers_to_remove, int nworkers_to_rem
 
				 		if(n_removed_workers > 0)
			
 
				 		{
			
 
				 			_starpu_update_workers_without_ctx(removed_workers, n_removed_workers, sched_ctx_id, 0);
			
 
				+			starpu_sched_ctx_set_priority(removed_workers, n_removed_workers, sched_ctx_id, 1);
			
 
				 		}
			
 
				 
			
 
				 	}
			
@@ -1212,16 +1267,46 @@ int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id)
 
				 	return _starpu_barrier_counter_check(&sched_ctx->tasks_barrier);
			
 
				 }
			
 
				 
			
 
				-void _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops)
			
 
				+unsigned _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops, struct starpu_task *task)
			
 
				 {
			
 
				+	unsigned ret = 1;
			
 
				 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
			
 
				+
			
 
				+	if(!sched_ctx->is_initial_sched)
			
 
				+		STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->waiting_tasks_mutex);
			
 
				+
			
 
				 	_starpu_barrier_counter_increment(&sched_ctx->ready_tasks_barrier, ready_flops);
			
 
				+
			
 
				+
			
 
				+	if(!sched_ctx->is_initial_sched)
			
 
				+	{
			
 
				+		if(!_starpu_can_push_task(sched_ctx, task))
			
 
				+		{
			
 
				+			_starpu_push_task_to_waiting_list(sched_ctx, task);
			
 
				+			ret = 0;
			
 
				+		}
			
 
				+
			
 
				+		STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->waiting_tasks_mutex);
			
 
				+	}
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops)
			
 
				 {
			
 
				 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
			
 
				+
			
 
				+	if(!sched_ctx->is_initial_sched)
			
 
				+		STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->waiting_tasks_mutex);
			
 
				+
			
 
				 	_starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->ready_tasks_barrier, ready_flops);
			
 
				+
			
 
				+
			
 
				+	if(!sched_ctx->is_initial_sched)
			
 
				+	{
			
 
				+		_starpu_fetch_task_from_waiting_list(sched_ctx);
			
 
				+		STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->waiting_tasks_mutex);
			
 
				+	}
			
 
				+
			
 
				 }
			
 
				 
			
 
				 int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id)
			
@@ -1278,6 +1363,7 @@ void starpu_sched_ctx_notify_hypervisor_exists()
 
				 			flops[i][j] = 0.0;
			
 
				 			data_size[i][j] = 0;
			
 
				 		}
			
 
				+		hyp_actual_start_sample[i] = 0.0;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1286,6 +1372,11 @@ unsigned starpu_sched_ctx_check_if_hypervisor_exists()
 
				 	return with_hypervisor;
			
 
				 }
			
 
				 
			
 
				+void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double start_sample)
			
 
				+{
			
 
				+	hyp_actual_start_sample[sched_ctx_id] = start_sample;
			
 
				+}
			
 
				+
			
 
				 unsigned _starpu_sched_ctx_allow_hypervisor(unsigned sched_ctx_id)
			
 
				 {
			
 
				 	return 1;
			
@@ -1494,19 +1585,20 @@ unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id)
 
				         struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
			
 
				 
			
 
				         struct starpu_worker_collection *workers = sched_ctx->workers;
			
 
				-        int worker;
			
 
				-
			
 
				-	struct starpu_sched_ctx_iterator it;
			
 
				-
			
 
				-	workers->init_iterator(workers, &it);
			
 
				-        while(workers->has_next(workers, &it))
			
 
				-        {
			
 
				-                worker = workers->get_next(workers, &it);
			
 
				-		if(worker == workerid)
			
 
				-			return 1;
			
 
				-        }
			
 
				-
			
 
				+	if(workers)
			
 
				+	{
			
 
				+		int worker;
			
 
				 
			
 
				+		struct starpu_sched_ctx_iterator it;
			
 
				+		
			
 
				+		workers->init_iterator(workers, &it);
			
 
				+		while(workers->has_next(workers, &it))
			
 
				+		{
			
 
				+			worker = workers->get_next(workers, &it);
			
 
				+			if(worker == workerid)
			
 
				+				return 1;
			
 
				+		}
			
 
				+	}
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/src/core/sched_ctx.h
+++ b/src/core/sched_ctx.h
@@ -77,6 +77,12 @@ struct _starpu_sched_ctx
 
				 	/* mutext protecting empty_ctx_tasks list */
			
 
				 	starpu_pthread_mutex_t empty_ctx_mutex;
			
 
				 
			
 
				+	/*ready tasks that couldn't be pushed because the the window of tasks was already full*/
			
 
				+	struct starpu_task_list waiting_tasks;
			
 
				+
			
 
				+	/* mutext protecting waiting_tasks list */
			
 
				+	starpu_pthread_mutex_t waiting_tasks_mutex;
			
 
				+
			
 
				 	/* min CPUs to execute*/
			
 
				 	int min_ncpus;
			
 
				 
			
@@ -184,7 +190,7 @@ int _starpu_get_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id);
 
				 int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id);
			
 
				 
			
 
				 void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops);
			
 
				-void _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops);
			
 
				+unsigned _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops, struct starpu_task *task);
			
 
				 int _starpu_wait_for_no_ready_of_sched_ctx(unsigned sched_ctx_id);
			
 
				 
			
 
				 /* Return the corresponding index of the workerid in the ctx table */
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -351,7 +351,7 @@ int _starpu_push_task(struct _starpu_job *j)
 
				 
			
 
				 	_STARPU_LOG_IN();
			
 
				 
			
 
				-	_starpu_increment_nready_tasks_of_sched_ctx(task->sched_ctx, task->flops);
			
 
				+	unsigned can_push = _starpu_increment_nready_tasks_of_sched_ctx(task->sched_ctx, task->flops, task);
			
 
				 	task->status = STARPU_TASK_READY;
			
 
				 
			
 
				 #ifdef HAVE_AYUDAME_H
			
@@ -384,8 +384,11 @@ int _starpu_push_task(struct _starpu_job *j)
 
				 #endif
			
 
				 			return 0;
			
 
				 		}
			
 
				+
			
 
				 	}
			
 
				 
			
 
				+	if(!can_push)
			
 
				+		return 0;
			
 
				 	/* in case there is no codelet associated to the task (that's a control
			
 
				 	 * task), we directly execute its callback and enforce the
			
 
				 	 * corresponding dependencies */
			
@@ -657,6 +660,14 @@ static
 
				 struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker *worker)
			
 
				 {
			
 
				 	struct _starpu_sched_ctx_list *l = NULL;
			
 
				+	for (l = worker->sched_ctx_list; l; l = l->next)
			
 
				+	{
			
 
				+		if(worker->removed_from_ctx[l->sched_ctx] == 1)
			
 
				+		{
			
 
				+			return	_starpu_get_sched_ctx_struct(l->sched_ctx);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	unsigned are_2_priorities = 0;
			
 
				 	for (l = worker->sched_ctx_list; l; l = l->next)
			
 
				 	{
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -772,7 +772,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
				 	_starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
			
 
				 	j->submitted = 1;
			
 
				-	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops);
			
 
				+	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task);
			
 
				 	for (i=0 ; i<nbuffers ; i++)
			
 
				 	{
			
 
				 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -98,15 +98,19 @@ static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
 
				 {
			
 
				 	int i;
			
 
				 	_starpu_codelet_check_deprecated_fields(task->cl);
			
 
				-	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
			
 
				-	struct starpu_worker_collection *workers = sched_ctx->workers;
			
 
				 
			
 
				+        /* make sure there is a worker on the machine able to execute the 
			
 
				+	   task, independent of the sched_ctx, this latter may receive latter on 
			
 
				+	   the necessary worker - the user or the hypervisor should take care this happens */
			
 
				+	
			
 
				+	int check_entire_platform = starpu_get_env_number("STARPU_CHECK_ENTIRE_PLATFORM");
			
 
				+	struct _starpu_sched_ctx *sched_ctx = check_entire_platform == 1 ? _starpu_get_initial_sched_ctx() : _starpu_get_sched_ctx_struct(task->sched_ctx);
			
 
				+	struct starpu_worker_collection *workers = sched_ctx->workers;
			
 
				 	struct starpu_sched_ctx_iterator it;
			
 
				-
			
 
				 	workers->init_iterator(workers, &it);
			
 
				 	while(workers->has_next(workers, &it))
			
 
				 	{
			
 
				-                i = workers->get_next(workers, &it);
			
 
				+		i = workers->get_next(workers, &it);
			
 
				 		if (starpu_worker_get_type(i) != arch)
			
 
				 			continue;
			
 
				 
			
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -1607,7 +1607,6 @@ static void handle_task_wait_for_all(void)
 
				 static void handle_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
			
 
				 {
			
 
				 	char *event = (char*)&ev->param[0];
			
 
				-
			
 
				 	/* Add an event in the trace */
			
 
				 	if (out_paje_file)
			
 
				 	{
			
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -673,7 +673,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 	*max_exp_endp = max_exp_end;
			
 
				 }
			
 
				 
			
 
				-static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id)
			
 
				+static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id, unsigned simulate)
			
 
				 {
			
 
				 	/* find the queue */
			
 
				 	unsigned worker, worker_ctx = 0;
			
@@ -791,9 +791,19 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sch
 
				 
			
 
				 	//_STARPU_DEBUG("Scheduler dmda: kernel (%u)\n", best_impl);
			
 
				 	starpu_task_set_implementation(task, selected_impl);
			
 
				+
			
 
				+	if(!simulate)
			
 
				+	{	
			
 
				+		/* we should now have the best worker in variable "best" */
			
 
				+		return push_task_on_best_worker(task, best, model_best, transfer_model_best, prio, sched_ctx_id);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+//		double max_len = (max_exp_end - starpu_timing_now());
			
 
				+		/* printf("%d: dmda max_exp_end %lf best_exp_end %lf max_len %lf \n", sched_ctx_id, max_exp_end/1000000.0, best_exp_end/1000000.0, max_len/1000000.0);	 */
			
 
				 	
			
 
				-	/* we should now have the best worker in variable "best" */
			
 
				-	return push_task_on_best_worker(task, best, model_best, transfer_model_best, prio, sched_ctx_id);
			
 
				+		return exp_end[best_in_ctx][selected_impl] ;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static int dmda_push_sorted_task(struct starpu_task *task)
			
@@ -801,7 +811,7 @@ static int dmda_push_sorted_task(struct starpu_task *task)
 
				 #ifdef STARPU_DEVEL
			
 
				 #warning TODO: after defining a scheduling window, use that instead of empty_ctx_tasks
			
 
				 #endif
			
 
				-	return _dmda_push_task(task, 1, task->sched_ctx);
			
 
				+	return _dmda_push_task(task, 1, task->sched_ctx, 0);
			
 
				 }
			
 
				 
			
 
				 static int dm_push_task(struct starpu_task *task)
			
@@ -812,7 +822,12 @@ static int dm_push_task(struct starpu_task *task)
 
				 static int dmda_push_task(struct starpu_task *task)
			
 
				 {
			
 
				 	STARPU_ASSERT(task);
			
 
				-	return _dmda_push_task(task, 0, task->sched_ctx);
			
 
				+	return _dmda_push_task(task, 0, task->sched_ctx, 0);
			
 
				+}
			
 
				+static double dmda_simulate_push_task(struct starpu_task *task)
			
 
				+{
			
 
				+	STARPU_ASSERT(task);
			
 
				+	return _dmda_push_task(task, 0, task->sched_ctx, 1);
			
 
				 }
			
 
				 
			
 
				 static void dmda_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
			
@@ -1009,6 +1024,7 @@ struct starpu_sched_policy _starpu_sched_dm_policy =
 
				 	.add_workers = dmda_add_workers ,
			
 
				 	.remove_workers = dmda_remove_workers,
			
 
				 	.push_task = dm_push_task,
			
 
				+	.simulate_push_task = NULL,
			
 
				 	.pop_task = dmda_pop_task,
			
 
				 	.pre_exec_hook = dmda_pre_exec_hook,
			
 
				 	.post_exec_hook = dmda_post_exec_hook,
			
@@ -1024,6 +1040,7 @@ struct starpu_sched_policy _starpu_sched_dmda_policy =
 
				 	.add_workers = dmda_add_workers ,
			
 
				 	.remove_workers = dmda_remove_workers,
			
 
				 	.push_task = dmda_push_task,
			
 
				+	.simulate_push_task = dmda_simulate_push_task,
			
 
				 	.push_task_notify = dmda_push_task_notify,
			
 
				 	.pop_task = dmda_pop_task,
			
 
				 	.pre_exec_hook = dmda_pre_exec_hook,
			
@@ -1040,6 +1057,7 @@ struct starpu_sched_policy _starpu_sched_dmda_sorted_policy =
 
				 	.add_workers = dmda_add_workers ,
			
 
				 	.remove_workers = dmda_remove_workers,
			
 
				 	.push_task = dmda_push_sorted_task,
			
 
				+	.simulate_push_task = NULL,
			
 
				 	.push_task_notify = dmda_push_task_notify,
			
 
				 	.pop_task = dmda_pop_ready_task,
			
 
				 	.pre_exec_hook = dmda_pre_exec_hook,
			
@@ -1056,6 +1074,7 @@ struct starpu_sched_policy _starpu_sched_dmda_ready_policy =
 
				 	.add_workers = dmda_add_workers ,
			
 
				 	.remove_workers = dmda_remove_workers,
			
 
				 	.push_task = dmda_push_task,
			
 
				+	.simulate_push_task = dmda_simulate_push_task,
			
 
				 	.push_task_notify = dmda_push_task_notify,
			
 
				 	.pop_task = dmda_pop_ready_task,
			
 
				 	.pre_exec_hook = dmda_pre_exec_hook,