10 years ago · ba3e4d7403
--- a/include/starpu_sched_ctx_hypervisor.h
+++ b/include/starpu_sched_ctx_hypervisor.h
@@ -43,6 +43,8 @@ void starpu_sched_ctx_notify_hypervisor_exists(void);
 
																 unsigned starpu_sched_ctx_check_if_hypervisor_exists(void);
															
 
																+void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double start_sample);
															
 
																+
															
 
																 #ifdef __cplusplus
															
 
																 }
															
 
																 #endif
															
--- a/include/starpu_scheduler.h
+++ b/include/starpu_scheduler.h
@@ -33,6 +33,7 @@ struct starpu_sched_policy
 
																 	void (*deinit_sched)(unsigned sched_ctx_id);
															
 
																 	int (*push_task)(struct starpu_task *);
															
 
																+	double (*simulate_push_task)(struct starpu_task *);
															
 
																 	void (*push_task_notify)(struct starpu_task *, int workerid, int perf_workerid, unsigned sched_ctx_id);
															
 
																 	struct starpu_task *(*pop_task)(unsigned sched_ctx_id);
															
 
																 	struct starpu_task *(*pop_every_task)(unsigned sched_ctx_id);
															
--- a/sc_hypervisor/include/sc_hypervisor.h
+++ b/sc_hypervisor/include/sc_hypervisor.h
@@ -144,6 +144,7 @@ void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *l
 
																 double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx);
															
 
																 void sc_hypervisor_print_overhead();
															
 
																+
															
 
																 #ifdef __cplusplus
															
 
																 }
															
 
																 #endif
															
--- a/sc_hypervisor/src/Makefile.am
+++ b/sc_hypervisor/src/Makefile.am
@@ -37,7 +37,8 @@ libsc_hypervisor_la_SOURCES = 				\
 
																 	hypervisor_policies/teft_lp_policy.c		\
															
 
																 	hypervisor_policies/ispeed_policy.c		\
															
 
																 	hypervisor_policies/ispeed_lp_policy.c		\
															
 
																-	hypervisor_policies/throughput_lp_policy.c
															
 
																+	hypervisor_policies/throughput_lp_policy.c	\
															
 
																+	hypervisor_policies/hard_coded_policy.c
															
 
																 noinst_HEADERS = sc_hypervisor_intern.h		
															
--- a/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
@@ -20,7 +20,7 @@
 
																 #include <starpu_config.h>
															
 
																 #include <sys/time.h>
															
 
																-int resize_no = 0;
															
 
																+unsigned long resize_no = 0;
															
 
																 #ifdef STARPU_HAVE_GLPK_H
															
 
																 static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
															
 
																 {
															
@@ -53,8 +53,8 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 
																 	long diff_s = end_time.tv_sec  - start_time.tv_sec;
															
 
																 	long diff_us = end_time.tv_usec  - start_time.tv_usec;
															
 
																-	__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000;
															
 
																-	
															
 
																+	__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000.0;
															
 
																+
															
 
																 	if(vmax != -1.0)
															
 
																 	{
															
 
																 /* 		int nworkers_per_ctx_rounded[ns][nw]; */
															
@@ -124,9 +124,8 @@ static int _get_first_level(unsigned *sched_ctxs, int nsched_ctxs, unsigned *fir
 
																 static void _resize(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
															
 
																 {
															
 
																-#ifdef STARPU_USE_FXT
															
 
																 	starpu_fxt_trace_user_event(resize_no);
															
 
																-#endif
															
 
																+
															
 
																 	unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
															
 
																 	if(nhierarchy_levels > 1)
															
 
																 	{
															
@@ -243,9 +242,19 @@ static void _resize_if_speed_diff(unsigned sched_ctx, int worker)
 
																 			_resize(NULL, -1, NULL, -1);
															
 
																 		}
															
 
																 	}
															
 
																-	else if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
															
 
																+	else 
															
 
																 	{
															
 
																-		_resize(NULL, -1, NULL, -1);
															
 
																+		unsigned criteria = sc_hypervisor_get_resize_criteria();
															
 
																+		if(criteria != SC_NOTHING && criteria == SC_IDLE)
															
 
																+		{
															
 
																+
															
 
																+			_resize(NULL, -1, NULL, -1);
															
 
																+		}
															
 
																+		else
															
 
																+		{
															
 
																+			if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
															
 
																+				_resize(NULL, -1, NULL, -1);
															
 
																+		}
															
 
																 	}
															
 
																 	return;
															
 
																 }
															
@@ -253,6 +262,7 @@ static void _resize_if_speed_diff(unsigned sched_ctx, int worker)
 
																 static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, 
															
 
																 				      __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint)
															
 
																 {
															
 
																+	if(worker == -2) return;
															
 
																 	unsigned criteria = sc_hypervisor_get_resize_criteria();
															
 
																 	if(criteria != SC_NOTHING && criteria == SC_SPEED)
															
 
																 	{
															
@@ -320,7 +330,6 @@ static void feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 
																 		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
															
 
																 		if(ret != EBUSY)
															
 
																 		{
															
 
																-//			printf("trigger idle \n");
															
 
																 			_resize_leaves(worker);
															
 
																 			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																 		}
															
--- a/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
@@ -119,13 +119,23 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 
																 	double w_in_s[ns][nw];
															
 
																-//			double flops_on_w[ns][nw];
															
 
																+
															
 
																 	double **flops_on_w = (double**)malloc(ns*sizeof(double*));
															
 
																 	int i;
															
 
																 	for(i = 0; i < ns; i++)
															
 
																 		flops_on_w[i] = (double*)malloc(nw*sizeof(double));
															
 
																-	
															
 
																+
															
 
																+	struct timeval start_time;
															
 
																+	struct timeval end_time;
															
 
																+	gettimeofday(&start_time, NULL);
															
 
																 	unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw,  w_in_s, flops_on_w, curr_sched_ctxs, workers);
															
 
																+	gettimeofday(&end_time, NULL);
															
 
																+	
															
 
																+	long diff_s = end_time.tv_sec  - start_time.tv_sec;
															
 
																+	long diff_us = end_time.tv_usec  - start_time.tv_usec;
															
 
																+	
															
 
																+	__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000.0;
															
 
																+
															
 
																 	/* if we did find at least one solution redistribute the resources */
															
 
																 	if(found_sol)
															
 
																 	{
															
--- a/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
@@ -79,7 +79,7 @@ static void _size_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int
 
																 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
															
 
																 	int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
															
 
																 	int nt = 0; /* Number of different kinds of tasks */
															
 
																-	starpu_pthread_mutex_lock(&mutex);
															
 
																+
															
 
																 	struct sc_hypervisor_policy_task_pool * tp;
															
 
																 	for (tp = task_pools; tp; tp = tp->next)
															
 
																 		nt++;
															
@@ -105,12 +105,16 @@ static void _size_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int
 
																 	double possible_tmax = sc_hypervisor_lp_get_tmax(nw, workers); 
															
 
																 	double smallest_tmax = possible_tmax / 3;
															
 
																 	double tmax = possible_tmax * ns;
															
 
																-	double tmin = smallest_tmax;
															
 
																+	double tmin = 0.0;
															
 
																+	unsigned found_sol = 0;
															
 
																+
															
 
																+	if(nt > 0 && tmax > 0.0)
															
 
																+	{
															
 
																+		found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
															
 
																+							       tmin, tmax, smallest_tmax, _compute_workers_distrib);
															
 
																+	}
															
 
																-	unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
															
 
																-								tmin, tmax, smallest_tmax, _compute_workers_distrib);
															
 
																-	starpu_pthread_mutex_unlock(&mutex);
															
 
																 	/* if we did find at least one solution redistribute the resources */
															
 
																 	if(found_sol)
															
 
																 	{
															
@@ -147,7 +151,7 @@ static void size_if_required()
 
																 		if(ready_to_size)
															
 
																 		{
															
 
																-			_size_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers);
															
 
																+			_size_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers);			
															
 
																 			sc_hypervisor_free_size_req();
															
 
																 		}
															
 
																 		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
															
@@ -167,6 +171,7 @@ static void teft_lp_handle_submitted_job(struct starpu_codelet *cl, unsigned sch
 
																 static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
															
 
																 {
															
 
																 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
															
 
																+	if(ns < 2) return;
															
 
																 	int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
															
 
																 	sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
															
@@ -186,7 +191,6 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 
																 	for (tp = task_pools; tp; tp = tp->next)
															
 
																 		nt++;
															
 
																-	
															
 
																 	double w_in_s[ns][nw];
															
 
																 	double **tasks_per_worker=(double**)malloc(nw*sizeof(double*));
															
 
																 	int i;
															
@@ -205,12 +209,27 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 
																 	   compute the nr of flops and not the tasks */
															
 
																         /*lp computes it in s but it's converted to ms just before return */
															
 
																 	double possible_tmax = sc_hypervisor_lp_get_tmax(nw, NULL);
															
 
																-	double smallest_tmax = 0.0;
															
 
																-	double tmax = possible_tmax * ns;
															
 
																+	double smallest_tmax = possible_tmax/2.0;
															
 
																+	double tmax = possible_tmax + smallest_tmax;
															
 
																 	double tmin = smallest_tmax;
															
 
																+	unsigned found_sol = 0;
															
 
																-	unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
															
 
																-								tmin, tmax, smallest_tmax, _compute_workers_distrib);
															
 
																+	if(nt > 0 && tmax > 0.0)
															
 
																+	{
															
 
																+		struct timeval start_time;
															
 
																+		struct timeval end_time;
															
 
																+		gettimeofday(&start_time, NULL);
															
 
																+		
															
 
																+		
															
 
																+		found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
															
 
																+							       tmin, tmax, smallest_tmax, _compute_workers_distrib);
															
 
																+		gettimeofday(&end_time, NULL);
															
 
																+		
															
 
																+		long diff_s = end_time.tv_sec  - start_time.tv_sec;
															
 
																+		long diff_us = end_time.tv_usec  - start_time.tv_usec;
															
 
																+		
															
 
																+		__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000.0;
															
 
																+	}
															
 
																 //	starpu_pthread_mutex_unlock(&mutex);
															
 
																 	/* if we did find at least one solution redistribute the resources */
															
@@ -234,61 +253,48 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 
																 	free(tasks_per_worker);
															
 
																 }
															
 
																-static void teft_lp_handle_poped_task(unsigned sched_ctx, __attribute__((unused))int worker, struct starpu_task *task, uint32_t footprint)
															
 
																+static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
															
 
																 {
															
 
																-	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
															
 
																-
															
 
																-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
															
 
																-	if(ret != EBUSY)
															
 
																+	if(worker > -2)
															
 
																 	{
															
 
																-		if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops)
															
 
																-		{
															
 
																-			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																-			return;
															
 
																-		}
															
 
																-
															
 
																-		unsigned criteria = sc_hypervisor_get_resize_criteria();
															
 
																-		if(criteria != SC_NOTHING && criteria == SC_SPEED)
															
 
																+		struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
															
 
																+		
															
 
																+		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
															
 
																+		if(ret != EBUSY)
															
 
																 		{
															
 
																-			
															
 
																-			if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
															
 
																+			unsigned criteria = sc_hypervisor_get_resize_criteria();
															
 
																+			if(criteria != SC_NOTHING && criteria == SC_SPEED)
															
 
																 			{
															
 
																-				_try_resizing(NULL, -1, NULL, -1);
															
 
																+				
															
 
																+				if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
															
 
																+				{
															
 
																+					_try_resizing(NULL, -1, NULL, -1);
															
 
																+				}
															
 
																 			}
															
 
																+			
															
 
																+			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																 		}
															
 
																-
															
 
																-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																 	}
															
 
																 	/* too expensive to take this mutex and correct value of the number of tasks is not compulsory */
															
 
																-//	starpu_pthread_mutex_lock(&mutex);
															
 
																+	starpu_pthread_mutex_lock(&mutex);
															
 
																 	sc_hypervisor_policy_remove_task_from_pool(task, footprint, &task_pools);
															
 
																-//	starpu_pthread_mutex_unlock(&mutex);
															
 
																+	starpu_pthread_mutex_unlock(&mutex);
															
 
																 }
															
 
																 static void teft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
															
 
																 {
															
 
																-	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
															
 
																-
															
 
																-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
															
 
																-	if(ret != EBUSY)
															
 
																+	unsigned criteria = sc_hypervisor_get_resize_criteria();
															
 
																+	if(criteria != SC_NOTHING)// && criteria == SC_IDLE)
															
 
																 	{
															
 
																-		if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops)
															
 
																+		struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
															
 
																+		
															
 
																+		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
															
 
																+		if(ret != EBUSY)
															
 
																 		{
															
 
																+			_try_resizing(NULL, -1, NULL, -1);
															
 
																 			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																-			return;
															
 
																-		}
															
 
																-
															
 
																-		unsigned criteria = sc_hypervisor_get_resize_criteria();
															
 
																-		if(criteria != SC_NOTHING && criteria == SC_IDLE)
															
 
																-		{
															
 
																-			
															
 
																-			if(sc_hypervisor_check_idle(sched_ctx, worker))
															
 
																-			{
															
 
																-				_try_resizing(NULL, -1, NULL, -1);
															
 
																-			}
															
 
																 		}
															
 
																-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																 	}
															
 
																 	return;
															
 
																 }
															
--- a/sc_hypervisor/src/policies_utils/dichotomy.c
+++ b/sc_hypervisor/src/policies_utils/dichotomy.c
@@ -23,60 +23,95 @@
 
																    still has solutions */
															
 
																 unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw], unsigned solve_lp_integer, void *specific_data,
															
 
																 					    double tmin, double tmax, double smallest_tmax,
															
 
																-					    double (*lp_estimated_distrib_func)(int ns, int nw, double draft_w_in_s[ns][nw], 
															
 
																+					    double (*lp_estimated_distrib_func)(int ns, int nw, double draft_w_in_s[ns][nw],
															
 
																 									     unsigned is_integer, double tmax, void *specifc_data))
															
 
																 {
															
 
																 	double res = 1.0;
															
 
																 	unsigned has_sol = 0;
															
 
																-	double old_tmax = 0.0;
															
 
																+	double tmid = tmax;
															
 
																 	unsigned found_sol = 0;
															
 
																-
															
 
																 	struct timeval start_time;
															
 
																 	struct timeval end_time;
															
 
																 	int nd = 0;
															
 
																+	double found_tmid = tmax;
															
 
																+	double potential_tmid = tmid;
															
 
																+	double threashold = tmax*0.1;
															
 
																 	gettimeofday(&start_time, NULL);
															
 
																-
															
 
																+	
															
 
																 	/* we fix tmax and we do not treat it as an unknown
															
 
																 	   we just vary by dichotomy its values*/
															
 
																-	while(tmax > 1.0)
															
 
																+	while(1)
															
 
																 	{
															
 
																 		/* find solution and save the values in draft tables
															
 
																 		   only if there is a solution for the system we save them
															
 
																 		   in the proper table */
															
 
																-		res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, tmax, specific_data);
															
 
																-		if(res != 0.0)
															
 
																+		printf("solving for tmid %lf \n", tmid);
															
 
																+		res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, tmid, specific_data);
															
 
																+		if(res < 0.0)
															
 
																+		{
															
 
																+			printf("timeouted no point in continuing\n");
															
 
																+			found_sol = 0;
															
 
																+			break;
															
 
																+		}
															
 
																+		else if(res != 0.0)
															
 
																 		{
															
 
																 			has_sol = 1;
															
 
																 			found_sol = 1;
															
 
																+			found_tmid = tmid;
															
 
																+			printf("found sol for tmid %lf \n", tmid);
															
 
																 		}
															
 
																 		else
															
 
																+		{
															
 
																+			printf("failed for tmid %lf \n", tmid);
															
 
																+			if(tmid == tmax)
															
 
																+			{
															
 
																+				printf("failed for tmid %lf from the first time\n", tmid);
															
 
																+				break;
															
 
																+			}
															
 
																 			has_sol = 0;
															
 
																+		}
															
 
																-		/* if we have a solution with this tmax try a smaller value
															
 
																-		   bigger than the old min */
															
 
																+		/* if we have a solution with this tmid try a smaller value
															
 
																+		   bigger than the old one */
															
 
																 		if(has_sol)
															
 
																 		{
															
 
																-			if(old_tmax != 0.0 && (old_tmax - tmax) < 0.5)
															
 
																+			/* if the difference between tmax and tmid is smaller than
															
 
																+			   a given threashold there is no point in searching more 
															
 
																+			   precision */
															
 
																+			tmax = tmid;
															
 
																+			potential_tmid = tmin + ((tmax-tmin)/2.0); 
															
 
																+			if((tmax - potential_tmid) < threashold)
															
 
																+			{
															
 
																+				printf("had_sol but stop doing it for tmin %lf tmax %lf and potential tmid %lf \n", tmin, tmax, potential_tmid);
															
 
																 				break;
															
 
																-			old_tmax = tmax;
															
 
																+			}
															
 
																+			printf("try for smaller potential tmid %lf \n", potential_tmid);
															
 
																 		}
															
 
																-		else /*else try a bigger one but smaller than the old tmax */
															
 
																+		else /*else try a bigger one */
															
 
																 		{
															
 
																-			tmin = tmax;
															
 
																-			if(old_tmax != 0.0)
															
 
																-				tmax = old_tmax;
															
 
																+			/* if we previously found a good sol and we keep failing
															
 
																+			   we stop searching for a better sol */
															
 
																+			tmin = tmid;
															
 
																+			potential_tmid = tmin + ((tmax-tmin)/2.0); 
															
 
																+			if((tmax - potential_tmid) < threashold)
															
 
																+			{
															
 
																+				printf("didn't have sol but stop doing it for tmin %lf tmax %lf and potential tmid %lf \n", tmin, tmax, potential_tmid);
															
 
																+				break;
															
 
																+			}
															
 
																+			printf("try for bigger potential tmid %lf \n", potential_tmid);
															
 
																 		}
															
 
																-		if(tmin == tmax) break;
															
 
																-		tmax = sc_hypervisor_lp_find_tmax(tmin, tmax);
															
 
																-		if(tmax < smallest_tmax)
															
 
																-		{
															
 
																-			tmax = old_tmax;
															
 
																-			tmin = smallest_tmax;
															
 
																-			tmax = sc_hypervisor_lp_find_tmax(tmin, tmax);
															
 
																-		}
															
 
																+		tmid = potential_tmid;
															
 
																+
															
 
																 		nd++;
															
 
																 	}
															
 
																+	printf("solve againd for tmid %lf \n", found_tmid);
															
 
																+	if(found_sol)
															
 
																+	{
															
 
																+		res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, found_tmid, specific_data);
															
 
																+		found_sol = (res != 0.0);
															
 
																+	}
															
 
																+	printf("found sol %d for tmid %lf\n", found_sol, found_tmid);
															
 
																 	gettimeofday(&end_time, NULL);
															
 
																 	long diff_s = end_time.tv_sec  - start_time.tv_sec;
															
--- a/sc_hypervisor/src/policies_utils/lp_programs.c
+++ b/sc_hypervisor/src/policies_utils/lp_programs.c
@@ -30,6 +30,7 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
																 	int t, w, s;
															
 
																 	glp_prob *lp;
															
 
																+	
															
 
																 	lp = glp_create_prob();
															
 
																 	glp_set_prob_name(lp, "StarPU theoretical bound");
															
 
																 	glp_set_obj_dir(lp, GLP_MAX);
															
@@ -57,12 +58,12 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
																 				char name[32];
															
 
																 				snprintf(name, sizeof(name), "w%dt%dn", w, t);
															
 
																 				glp_set_col_name(lp, colnum(w, t), name);
															
 
																-/* 				if (integer) */
															
 
																-/*                                 { */
															
 
																-/*                                         glp_set_col_kind(lp, colnum(w, t), GLP_IV); */
															
 
																-/* 					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0); */
															
 
																-/*                                 } */
															
 
																-/* 				else */
															
 
																+				if (is_integer)
															
 
																+                                {
															
 
																+                                        glp_set_col_kind(lp, colnum(w, t), GLP_IV);
															
 
																+					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0);
															
 
																+                                }
															
 
																+				else
															
 
																 					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0.0, 0.0);
															
 
																 			}
															
 
																 		for(s = 0; s < ns; s++)
															
@@ -115,7 +116,10 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
																 						ia[n] = curr_row_idx+s*nw+w+1;
															
 
																 						ja[n] = colnum(w, t);
															
 
																 						if (isnan(times[w][t]))
															
 
																+						{
															
 
																+							printf("had to insert huge val \n");
															
 
																 							ar[n] = 1000000000.;
															
 
																+						}
															
 
																 						else
															
 
																 							ar[n] = times[w][t];
															
 
																 						n++;
															
@@ -126,7 +130,12 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
																 				ja[n] = nw*nt+s*nw+w+1;
															
 
																 				ar[n] = (-1) * tmax;
															
 
																 				n++;
															
 
																-				glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
															
 
																+				if (is_integer)
															
 
																+                                {
															
 
																+					glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0, 0);
															
 
																+                                }
															
 
																+                                else
															
 
																+					glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
															
 
																 			}
															
 
																 		}
															
@@ -184,10 +193,10 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
																 	parm.msg_lev = GLP_MSG_OFF;
															
 
																 	int ret = glp_simplex(lp, &parm);
															
 
																-/* 	char str[50]; */
															
 
																-/* 	sprintf(str, "outpu_lp_%g", tmax); */
															
 
																+	/* char str[50]; */
															
 
																+	/* sprintf(str, "outpu_lp_%g", tmax); */
															
 
																-/* 	glp_print_sol(lp, str); */
															
 
																+	/* glp_print_sol(lp, str); */
															
 
																 	if (ret)
															
 
																 	{
															
@@ -213,12 +222,15 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
																                 glp_iocp iocp;
															
 
																                 glp_init_iocp(&iocp);
															
 
																                 iocp.msg_lev = GLP_MSG_OFF;
															
 
																+//		iocp.tm_lim = 1000;
															
 
																 		glp_intopt(lp, &iocp);
															
 
																 		int stat = glp_mip_status(lp);
															
 
																 		/* if we don't have a solution return */
															
 
																-		if(stat == GLP_NOFEAS)
															
 
																+		if(stat == GLP_NOFEAS || stat == GLP_ETMLIM || stat == GLP_UNDEF)
															
 
																 		{
															
 
																 //			printf("no int sol in tmax = %lf\n", tmax);
															
 
																+			if(stat == GLP_ETMLIM || stat == GLP_UNDEF)
															
 
																+				printf("timeout \n");
															
 
																 			glp_delete_prob(lp);
															
 
																 			lp = NULL;
															
 
																 			return 0.0;
															
@@ -228,12 +240,13 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
																 	double res = glp_get_obj_val(lp);
															
 
																 	for (w = 0; w < nw; w++)
															
 
																 		for (t = 0; t < nt; t++)
															
 
																-/* 			if (integer) */
															
 
																-/* 				tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t)); */
															
 
																-/*                         else */
															
 
																+			if (is_integer)
															
 
																+				tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t));
															
 
																+                        else
															
 
																 				tasks[w][t] = glp_get_col_prim(lp, colnum(w, t));
															
 
																-	
															
 
																-//	printf("for tmax %lf\n", tmax);
															
 
																+
															
 
																+	/* printf("**********************************************\n"); */
															
 
																+	/* printf("for tmax %lf\n", tmax); */
															
 
																 	for(s = 0; s < ns; s++)
															
 
																 		for(w = 0; w < nw; w++)
															
 
																 		{
															
@@ -243,8 +256,8 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
																 				w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
															
 
																 //			printf("w %d in ctx %d = %lf\n", w, s, w_in_s[s][w]);
															
 
																 		}
															
 
																-//	printf("\n");
															
 
																-
															
 
																+	/* printf("\n"); */
															
 
																+	/* printf("**********************************************\n"); */
															
 
																 	glp_delete_prob(lp);
															
 
																 	return res;
															
 
																 }
															
@@ -286,14 +299,14 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
																 			if (integer)
															
 
																 			{
															
 
																 				glp_set_col_kind(lp, n, GLP_IV);
															
 
																-				if(sc_w->consider_max)
															
 
																-				{
															
 
																-					if(config->max_nworkers == 0)
															
 
																-						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, config->max_nworkers);
															
 
																-					else
															
 
																-						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, config->max_nworkers);
															
 
																-				}
															
 
																-				else
															
 
																+				/* if(sc_w->consider_max) */
															
 
																+				/* { */
															
 
																+				/* 	if(config->max_nworkers == 0) */
															
 
																+				/* 		glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, config->max_nworkers); */
															
 
																+				/* 	else */
															
 
																+				/* 		glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, config->max_nworkers); */
															
 
																+				/* } */
															
 
																+				/* else */
															
 
																 				{
															
 
																 					if(total_nw[w] == 0)
															
 
																 						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, total_nw[w]);
															
@@ -303,17 +316,17 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
																 			}
															
 
																 			else
															
 
																 			{
															
 
																-				if(sc_w->consider_max)
															
 
																-				{
															
 
																-					if(config->max_nworkers == 0)
															
 
																-						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, config->max_nworkers*1.0);
															
 
																-					else
															
 
																-						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, config->max_nworkers*1.0);
															
 
																-#ifdef STARPU_SC_HYPERVISOR_DEBUG
															
 
																-					printf("%d****************consider max %lf in lp\n", sched_ctxs[s], config->max_nworkers*1.0);
															
 
																-#endif
															
 
																-				}
															
 
																-				else
															
 
																+/* 				if(sc_w->consider_max) */
															
 
																+/* 				{ */
															
 
																+/* 					if(config->max_nworkers == 0) */
															
 
																+/* 						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, config->max_nworkers*1.0); */
															
 
																+/* 					else */
															
 
																+/* 						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, config->max_nworkers*1.0); */
															
 
																+/* #ifdef STARPU_SC_HYPERVISOR_DEBUG */
															
 
																+/* 					printf("%d****************consider max %lf in lp\n", sched_ctxs[s], config->max_nworkers*1.0); */
															
 
																+/* #endif */
															
 
																+/* 				} */
															
 
																+/* 				else */
															
 
																 				{
															
 
																 					if(total_nw[w] == 0)
															
 
																 						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, total_nw[w]*1.0);
															
@@ -418,27 +431,13 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
																 //		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
															
 
																 		n++;
															
 
																-//		if(last_vmax == -1.0)
															
 
																-		{
															
 
																-			/*sum(all gpus) = 3*/
															
 
																-			if(w == 0)
															
 
																-				glp_set_row_bnds(lp, ns+w+1, GLP_UP, 0, total_nw[0]);
															
 
																-			
															
 
																-			/*sum(all cpus) = 9*/
															
 
																-			if(w == 1)
															
 
																-				glp_set_row_bnds(lp, ns+w+1, GLP_UP, 0, total_nw[1]);
															
 
																-
															
 
																-		}
															
 
																-/* 		else */
															
 
																-/* 		{ */
															
 
																-/* 			/\*sum(all gpus) = 3*\/ */
															
 
																-/* 			if(w == 0) */
															
 
																-/* 				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]); */
															
 
																-			
															
 
																-/* 			/\*sum(all cpus) = 9*\/ */
															
 
																-/* 			if(w == 1) */
															
 
																-/* 				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]); */
															
 
																-/* 		} */
															
 
																+		/*sum(all gpus) = 3*/
															
 
																+		if(w == 0)
															
 
																+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
															
 
																+		
															
 
																+		/*sum(all cpus) = 9*/
															
 
																+		if(w == 1)
															
 
																+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
															
 
																 	}
															
 
																 	STARPU_ASSERT(n == ne);
															
--- a/sc_hypervisor/src/policies_utils/lp_tools.c
+++ b/sc_hypervisor/src/policies_utils/lp_tools.c
@@ -41,7 +41,7 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
																 		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
															
 
																 		int w;
															
 
																 		for(w = 0; w < nw; w++)
															
 
																-			v[i][w] = 5.0;//sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
															
 
																+			v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
															
 
																 		double ready_flops = starpu_sched_ctx_get_nready_flops(sc_w->sched_ctx);
															
 
																 		unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
															
@@ -624,6 +624,7 @@ void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rou
 
																 		_lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move, 
															
 
																 					   &nw_move, workers_move);
															
 
																+
															
 
																 		if(nw_move > 0)
															
 
																 			sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
															
 
																 	}
															
@@ -635,8 +636,8 @@ int _lp_get_unwanted_workers(int *workers_add, int nw_add, unsigned sched_ctx, i
 
																 	int worker;
															
 
																 	struct starpu_sched_ctx_iterator it;
															
 
																-
															
 
																 	workers->init_iterator(workers, &it);
															
 
																+
															
 
																 	while(workers->has_next(workers, &it))
															
 
																 	{
															
 
																 		worker = workers->get_next(workers, &it);
															
--- a/sc_hypervisor/src/policies_utils/policy_tools.c
+++ b/sc_hypervisor/src/policies_utils/policy_tools.c
@@ -28,8 +28,8 @@ static int _compute_priority(unsigned sched_ctx)
 
																 	int worker;
															
 
																 	struct starpu_sched_ctx_iterator it;
															
 
																-
															
 
																 	workers->init_iterator(workers, &it);
															
 
																+
															
 
																 	while(workers->has_next(workers, &it))
															
 
																 	{
															
 
																 		worker = workers->get_next(workers, &it);
															
@@ -117,8 +117,8 @@ int* sc_hypervisor_get_idlest_workers(unsigned sched_ctx, int *nworkers, enum st
 
																 	int considered = 0;
															
 
																 	struct starpu_sched_ctx_iterator it;
															
 
																-
															
 
																 	workers->init_iterator(workers, &it);
															
 
																+
															
 
																 	for(index = 0; index < *nworkers; index++)
															
 
																 	{
															
 
																 		while(workers->has_next(workers, &it))
															
@@ -183,7 +183,6 @@ int sc_hypervisor_get_movable_nworkers(struct sc_hypervisor_policy_config *confi
 
																 	int worker;
															
 
																 	struct starpu_sched_ctx_iterator it;
															
 
																-
															
 
																 	workers->init_iterator(workers, &it);
															
 
																 	while(workers->has_next(workers, &it))
															
 
																 	{
															
@@ -409,6 +408,9 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
 
																 {
															
 
																         struct sc_hypervisor_policy_task_pool *tp;
															
 
																         int w, t;
															
 
																+	for(w = 0; w < nw; w++)
															
 
																+		for(t = 0; t < nt; t++)
															
 
																+			times[w][t] = NAN;
															
 
																         for (w = 0; w < nw; w++)
															
 
																         {
															
 
																                 for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
															
@@ -422,7 +424,6 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
 
																 			else
															
 
																 			{
															
 
																                                 times[w][t] = (length / 1000.);
															
 
																-
															
 
																 				double transfer_time = 0.0;
															
 
																 				unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, tp->sched_ctx_id);
															
 
																 				enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
															
@@ -431,18 +432,21 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
 
																 					if(arch == STARPU_CUDA_WORKER)
															
 
																 					{
															
 
																 						double transfer_speed = starpu_transfer_bandwidth(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker));
															
 
																-						transfer_time +=  (tp->data_size / transfer_speed) / 1000. ;
															
 
																+						if(transfer_speed > 0.0)
															
 
																+							transfer_time +=  (tp->data_size / transfer_speed) / 1000. ;
															
 
																+	
															
 
																 						double latency = starpu_transfer_latency(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker));
															
 
																 						transfer_time += latency/1000.;
															
 
																-						
															
 
																-						
															
 
																+//						transfer_time *=4;
															
 
																 					}
															
 
																 					else if(arch == STARPU_CPU_WORKER)
															
 
																 					{
															
 
																 						if(!starpu_sched_ctx_contains_type_of_worker(arch, tp->sched_ctx_id))
															
 
																 						{
															
 
																 							double transfer_speed = starpu_transfer_bandwidth(starpu_worker_get_memory_node(worker), STARPU_MAIN_RAM);
															
 
																-							transfer_time += (tp->data_size / transfer_speed) / 1000. ;
															
 
																+							if(transfer_speed > 0.0)
															
 
																+								transfer_time += (tp->data_size / transfer_speed) / 1000. ;
															
 
																+
															
 
																 							double latency = starpu_transfer_latency(starpu_worker_get_memory_node(worker), STARPU_MAIN_RAM);
															
 
																 							transfer_time += latency / 1000.;
															
 
																 						}
															
@@ -465,7 +469,7 @@ unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker)
 
																 	{
															
 
																 		if(sc_w->idle_time[worker] > config->max_idle[worker])
															
 
																 		{
															
 
																-//			printf("w%d/ctx%d: current idle %lf all idle %lf max_idle %lf\n", worker, sched_ctx, idle, idle_time, config->max_idle[worker]);
															
 
																+//			printf("w%d/ctx%d: current idle %lf  max_idle %lf\n", worker, sched_ctx, sc_w->idle_time[worker], config->max_idle[worker]);
															
 
																 			return 1;
															
 
																 		}
															
 
																 	}
															
@@ -547,7 +551,8 @@ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(unsigned *sched_ctxs_in, int ns_
 
																 			double ctx_v = sc_hypervisor_get_ctx_speed(sc_w);
															
 
																 			ctx_v = ctx_v < 0.01 ? 0.0 : ctx_v;
															
 
																-			if(ctx_v != -1.0 && ((ctx_v < 0.8*optimal_v[i]) || ctx_v > 1.2*optimal_v[i])) 
															
 
																+			double max_vel = _get_max_speed_gap();
															
 
																+			if(ctx_v != -1.0 && ((ctx_v < (1-max_vel)*optimal_v[i]) || ctx_v > (1+max_vel)*optimal_v[i])) 
															
 
																 			{
															
 
																 				return 1;
															
 
																 			}
															
--- a/sc_hypervisor/src/policies_utils/speed.c
+++ b/sc_hypervisor/src/policies_utils/speed.c
@@ -127,13 +127,12 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 
																 		int worker;
															
 
																 		struct starpu_sched_ctx_iterator it;
															
 
																+		workers->init_iterator(workers, &it);
															
 
																 		double speed = 0.0;
															
 
																 		unsigned nworkers = 0;
															
 
																 		double all_workers_flops = 0.0;
															
 
																 		double max_workers_idle_time = 0.0;
															
 
																-
															
 
																-		workers->init_iterator(workers, &it);
															
 
																 		while(workers->has_next(workers, &it))
															
 
																 		{
															
 
																 			worker = workers->get_next(workers, &it);
															
@@ -168,6 +167,7 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 
																 		if(speed != -1.0)
															
 
																 		{
															
 
																+
															
 
																 			/* if ref_speed started being corrupted bc of the old bad distribution
															
 
																 			   register only the last frame otherwise make the average with the speed 
															
 
																 			   behavior of the application until now */
															
@@ -233,8 +233,8 @@ double sc_hypervisor_get_avg_speed(enum starpu_worker_archtype arch)
 
																 		int worker;
															
 
																 		struct starpu_sched_ctx_iterator it;
															
 
																-
															
 
																 		workers->init_iterator(workers, &it);
															
 
																+
															
 
																 		while(workers->has_next(workers, &it))
															
 
																 		{
															
 
																 			worker = workers->get_next(workers, &it);
															
--- a/sc_hypervisor/src/policies_utils/task_pool.c
+++ b/sc_hypervisor/src/policies_utils/task_pool.c
@@ -69,8 +69,7 @@ void sc_hypervisor_policy_remove_task_from_pool(struct starpu_task *task, uint32
 
																 				free(tp);
															
 
																 				tp = NULL;
															
 
																-				if(next_tp)
															
 
																-					*task_pools = next_tp;
															
 
																+				*task_pools = next_tp;
															
 
																 			}
															
 
																 			else
															
--- a/sc_hypervisor/src/sc_hypervisor.c
+++ b/sc_hypervisor/src/sc_hypervisor.c
@@ -42,6 +42,7 @@ extern struct sc_hypervisor_policy ispeed_lp_policy;
 
																 extern struct sc_hypervisor_policy throughput_lp_policy;
															
 
																 #endif // STARPU_HAVE_GLPK_
															
 
																 extern struct sc_hypervisor_policy ispeed_policy;
															
 
																+extern struct sc_hypervisor_policy hard_coded_policy;
															
 
																 static struct sc_hypervisor_policy *predefined_policies[] =
															
@@ -55,7 +56,8 @@ static struct sc_hypervisor_policy *predefined_policies[] =
 
																 	&throughput_lp_policy,
															
 
																 #endif // STARPU_HAVE_GLPK_H
															
 
																 	&gflops_rate_policy,
															
 
																-	&ispeed_policy
															
 
																+	&ispeed_policy,
															
 
																+	&hard_coded_policy
															
 
																 };
															
 
																 static void _load_hypervisor_policy(struct sc_hypervisor_policy *policy)
															
@@ -164,7 +166,7 @@ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
 
																 	hypervisor.resize_criteria = !crit ? SC_IDLE : strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_SPEED : SC_NOTHING);
															
 
																 	starpu_pthread_mutex_init(&act_hypervisor_mutex, NULL);
															
 
																-	hypervisor.start_executing_time = starpu_timing_now();
															
 
																+//	hypervisor.start_executing_time = starpu_timing_now();
															
 
																 	int i;
															
 
																 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
															
@@ -254,8 +256,17 @@ void sc_hypervisor_start_resize(unsigned sched_ctx)
 
																 static void _print_current_time()
															
 
																 {
															
 
																-	if(!getenv("SC_HYPERVISOR_STOP_PRINT"))
															
 
																+	char* stop_print = getenv("SC_HYPERVISOR_STOP_PRINT");
															
 
																+        int sp = stop_print ? atoi(stop_print) : 1;
															
 
																+
															
 
																+	if(!sp)
															
 
																 	{
															
 
																+		if(hypervisor.start_executing_time == 0.0)
															
 
																+		{
															
 
																+			fprintf(stdout, "Time: %lf\n", -1.0);
															
 
																+			return;
															
 
																+		}
															
 
																+
															
 
																 		double curr_time = starpu_timing_now();
															
 
																 		double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */
															
 
																 		fprintf(stdout, "Time: %lf\n", elapsed_time);
															
@@ -332,7 +343,7 @@ void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops)
 
																 	hypervisor.sched_ctx_w[sched_ctx].total_flops = total_flops;
															
 
																 	hypervisor.sched_ctx_w[sched_ctx].remaining_flops = total_flops;
															
 
																-	hypervisor.resize[sched_ctx] = 1;
															
 
																+	hypervisor.resize[sched_ctx] = 0;//1;
															
 
																 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																 }
															
@@ -412,6 +423,13 @@ void sc_hypervisor_unregister_ctx(unsigned sched_ctx)
 
																 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																 }
															
 
																+void sc_hypervisor_reset_react_start_time(unsigned sched_ctx, unsigned now)
															
 
																+{
															
 
																+	if(now)
															
 
																+		hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
															
 
																+	starpu_sched_ctx_update_start_resizing_sample(sched_ctx, starpu_timing_now());
															
 
																+}
															
 
																+
															
 
																 double _get_max_speed_gap()
															
 
																 {
															
@@ -441,8 +459,8 @@ int sc_hypervisor_get_nworkers_ctx(unsigned sched_ctx, enum starpu_worker_archty
 
																 	int worker;
															
 
																 	struct starpu_sched_ctx_iterator it;
															
 
																-
															
 
																 	workers->init_iterator(workers, &it);
															
 
																+
															
 
																 	while(workers->has_next(workers, &it))
															
 
																 	{
															
 
																 		worker = workers->get_next(workers, &it);
															
@@ -475,8 +493,8 @@ double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrappe
 
																 	int worker;
															
 
																 	struct starpu_sched_ctx_iterator it;
															
 
																-
															
 
																 	workers->init_iterator(workers, &it);
															
 
																+		
															
 
																 	while(workers->has_next(workers, &it))
															
 
																 	{
															
 
																 		worker = workers->get_next(workers, &it);
															
@@ -493,8 +511,8 @@ double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_
 
																 	int worker;
															
 
																 	struct starpu_sched_ctx_iterator it;
															
 
																-
															
 
																 	workers->init_iterator(workers, &it);
															
 
																+		
															
 
																 	while(workers->has_next(workers, &it))
															
 
																 	{
															
 
																 		worker = workers->get_next(workers, &it);
															
@@ -981,12 +999,11 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs,
 
																 		int worker;
															
 
																 		struct starpu_sched_ctx_iterator it;
															
 
																+		workers->init_iterator(workers, &it);
															
 
																 		double elapsed_time_worker[STARPU_NMAXWORKERS];
															
 
																 		double norm_idle_time = 0.0;
															
 
																 		double end_time  = starpu_timing_now();
															
 
																-
															
 
																-		workers->init_iterator(workers, &it);
															
 
																 		while(workers->has_next(workers, &it))
															
 
																 		{
															
 
																 			double idle_time = 0.0;
															
@@ -1184,6 +1201,7 @@ unsigned choose_ctx_to_steal(int worker)
 
																 /* notifies the hypervisor that the worker spent another cycle in idle time */
															
 
																 static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
															
 
																 {
															
 
																+	if(hypervisor.start_executing_time == 0.0) return;
															
 
																 	struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
															
 
																 	sc_w->current_idle_time[worker] += idle_time;
															
@@ -1202,7 +1220,7 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 
																 	if(hypervisor.resize[sched_ctx] && hypervisor.policy.handle_idle_cycle)
															
 
																 	{
															
 
																 		if(sc_w->hyp_react_start_time == 0.0)
															
 
																-			sc_w->hyp_react_start_time = starpu_timing_now();
															
 
																+			sc_hypervisor_reset_react_start_time(sched_ctx, 1);
															
 
																 		double curr_time = starpu_timing_now();
															
 
																 		double elapsed_time = (curr_time - sc_w->hyp_react_start_time) / 1000000.0; /* in seconds */
															
@@ -1236,13 +1254,15 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 
																 			if(idle_everywhere)
															
 
																 			{
															
 
																 				double hyp_overhead_start = starpu_timing_now();
															
 
																-				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
															
 
																+				if(elapsed_time > (sc_w->config->time_sample*2))
															
 
																+					hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
															
 
																 				double hyp_overhead_end = starpu_timing_now();
															
 
																 				hyp_overhead += (hyp_overhead_end - hyp_overhead_start);
															
 
																+				if(elapsed_time > (sc_w->config->time_sample*2))
															
 
																+					sc_hypervisor_reset_react_start_time(sched_ctx, 1);
															
 
																+				else
															
 
																+					sc_hypervisor_reset_react_start_time(sched_ctx, 0);
															
 
																 			}
															
 
																-
															
 
																-
															
 
																-			sc_w->hyp_react_start_time = starpu_timing_now();
															
 
																 		}
															
 
																 	}
															
 
																 	return;
															
@@ -1261,6 +1281,11 @@ void _update_real_start_time_hierarchically(unsigned sched_ctx)
 
																 /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */
															
 
																 static void notify_poped_task(unsigned sched_ctx, int worker)
															
 
																 {
															
 
																+	if(hypervisor.start_executing_time == 0.0)
															
 
																+		hypervisor.start_executing_time = starpu_timing_now();
															
 
																+	if(!hypervisor.resize[sched_ctx])
															
 
																+		hypervisor.resize[sched_ctx] = 1;
															
 
																+
															
 
																 	if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].real_start_time == 0.0)
															
 
																 		_update_real_start_time_hierarchically(sched_ctx);
															
@@ -1358,18 +1383,25 @@ static void notify_post_exec_task(struct starpu_task *task, size_t data_size, ui
 
																 		if(hypervisor.policy.handle_poped_task)
															
 
																 		{	
															
 
																 			if(hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time == 0.0)
															
 
																-				hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
															
 
																+				sc_hypervisor_reset_react_start_time(sched_ctx, 1);
															
 
																 			double curr_time = starpu_timing_now();
															
 
																 			double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time) / 1000000.0; /* in seconds */
															
 
																 			if(hypervisor.sched_ctx_w[sched_ctx].sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > hypervisor.sched_ctx_w[sched_ctx].config->time_sample)
															
 
																 			{
															
 
																 				double hyp_overhead_start = starpu_timing_now();
															
 
																-				hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
															
 
																+				if(elapsed_time > (hypervisor.sched_ctx_w[sched_ctx].config->time_sample*2))
															
 
																+					hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
															
 
																 				double hyp_overhead_end = starpu_timing_now();
															
 
																 				hyp_overhead += (hyp_overhead_end - hyp_overhead_start);
															
 
																-				hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
															
 
																+				if(elapsed_time > (hypervisor.sched_ctx_w[sched_ctx].config->time_sample*2))
															
 
																+					sc_hypervisor_reset_react_start_time(sched_ctx, 1);
															
 
																+				else
															
 
																+					sc_hypervisor_reset_react_start_time(sched_ctx, 0);
															
 
																 			}
															
 
																+			else
															
 
																+                                /* no need to consider resizing, just remove the task from the pool if the strategy requires it*/
															
 
																+				hypervisor.policy.handle_poped_task(sched_ctx, -2, task, footprint);
															
 
																 		}
															
 
																 	}
															
 
																 /* 	starpu_pthread_mutex_lock(&act_hypervisor_mutex); */
															
@@ -1434,8 +1466,9 @@ static void notify_submitted_job(struct starpu_task *task, uint32_t footprint, s
 
																 	hypervisor.sched_ctx_w[sched_ctx].submitted_flops += task->flops;
															
 
																 	starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
															
 
																-	if(hypervisor.policy.handle_submitted_job && !type_of_tasks_known)
															
 
																-		hypervisor.policy.handle_submitted_job(task->cl, task->sched_ctx, footprint, data_size);
															
 
																+	/* signaled by the user - no need to wait for them */
															
 
																+	/* if(hypervisor.policy.handle_submitted_job && !type_of_tasks_known) */
															
 
																+	/* 	hypervisor.policy.handle_submitted_job(task->cl, task->sched_ctx, footprint, data_size); */
															
 
																 }
															
 
																 static void notify_empty_ctx(unsigned sched_ctx_id, struct starpu_task *task)
															
@@ -1650,3 +1683,4 @@ void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *l
 
																 	}
															
 
																 	return;
															
 
																 }
															
 
																+
															
--- a/src/common/barrier_counter.c
+++ b/src/common/barrier_counter.c
@@ -96,7 +96,6 @@ int _starpu_barrier_counter_increment(struct _starpu_barrier_counter *barrier_c,
 
																 	barrier->reached_start++;
															
 
																 	barrier->reached_flops += flops;
															
 
																-
															
 
																 	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
															
 
																 	return 0;
															
 
																 }
															
--- a/src/common/fxt.c
+++ b/src/common/fxt.c
@@ -224,7 +224,7 @@ void starpu_fxt_stop_profiling()
 
																 #endif // STARPU_USE_FXT
															
 
																-void starpu_fxt_trace_user_event(unsigned long code STARPU_ATTRIBUTE_UNUSED)
															
 
																+void starpu_fxt_trace_user_event(unsigned long code)
															
 
																 {
															
 
																 #ifdef STARPU_USE_FXT
															
 
																 	_STARPU_TRACE_USER_EVENT(code);
															
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -30,6 +30,7 @@ static double hyp_start_sample[STARPU_NMAX_SCHED_CTXS];
 
																 static double hyp_start_allow_sample[STARPU_NMAX_SCHED_CTXS];
															
 
																 static double flops[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS];
															
 
																 static size_t data_size[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS];
															
 
																+static double hyp_actual_start_sample[STARPU_NMAX_SCHED_CTXS];
															
 
																 static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *config);
															
 
																 static void _starpu_sched_ctx_add_workers_to_master(unsigned sched_ctx_id, int *workerids, int nworkers, int new_master);
															
@@ -472,9 +473,12 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 
																 	STARPU_ASSERT(nworkers_ctx <= nworkers);
															
 
																 	STARPU_PTHREAD_MUTEX_INIT(&sched_ctx->empty_ctx_mutex, NULL);
															
 
																-
															
 
																 	starpu_task_list_init(&sched_ctx->empty_ctx_tasks);
															
 
																+	STARPU_PTHREAD_MUTEX_INIT(&sched_ctx->waiting_tasks_mutex, NULL);
															
 
																+	starpu_task_list_init(&sched_ctx->waiting_tasks);
															
 
																+
															
 
																+
															
 
																 	sched_ctx->sched_policy = policy ? (struct starpu_sched_policy*)malloc(sizeof(struct starpu_sched_policy)) : NULL;
															
 
																 	sched_ctx->is_initial_sched = is_initial_sched;
															
 
																 	sched_ctx->name = sched_ctx_name;
															
@@ -815,6 +819,7 @@ static void _starpu_delete_sched_ctx(struct _starpu_sched_ctx *sched_ctx)
 
																 	}
															
 
																 	STARPU_PTHREAD_MUTEX_DESTROY(&sched_ctx->empty_ctx_mutex);
															
 
																+	STARPU_PTHREAD_MUTEX_DESTROY(&sched_ctx->waiting_tasks_mutex);
															
 
																 	sched_ctx->id = STARPU_NMAX_SCHED_CTXS;
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																 	hwloc_bitmap_free(sched_ctx->hwloc_workers_set);
															
@@ -954,6 +959,55 @@ void _starpu_fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx
 
																 	return;
															
 
																 }
															
 
																+unsigned _starpu_can_push_task(struct _starpu_sched_ctx *sched_ctx, struct starpu_task *task)
															
 
																+{
															
 
																+	if(sched_ctx->sched_policy->simulate_push_task)
															
 
																+	{
															
 
																+		const char *env_window_size = getenv("STARPU_WINDOW_TIME_SIZE");
															
 
																+		if(!env_window_size) return 1;
															
 
																+		double window_size = atof(env_window_size);
															
 
																+		
															
 
																+		starpu_pthread_rwlock_t *changing_ctx_mutex = _starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx->id);
															
 
																+		STARPU_PTHREAD_RWLOCK_RDLOCK(changing_ctx_mutex);
															
 
																+		double expected_end = sched_ctx->sched_policy->simulate_push_task(task);
															
 
																+		STARPU_PTHREAD_RWLOCK_UNLOCK(changing_ctx_mutex);
															
 
																+		
															
 
																+		double expected_len = 0.0; 
															
 
																+		if(hyp_actual_start_sample[sched_ctx->id] != 0.0)
															
 
																+			expected_len = expected_end - hyp_actual_start_sample[sched_ctx->id] ;
															
 
																+		else 
															
 
																+		{
															
 
																+			printf("%d: sc start is 0.0\n", sched_ctx->id);
															
 
																+			expected_len = expected_end - starpu_timing_now();
															
 
																+		}
															
 
																+		if(expected_len < 0.0)
															
 
																+			printf("exp len negative %lf \n", expected_len);
															
 
																+		expected_len /= 1000000.0;
															
 
																+//		printf("exp_end %lf start %lf expected_len %lf \n", expected_end, hyp_actual_start_sample[sched_ctx->id], expected_len);
															
 
																+		if(expected_len > (window_size + 0.2*window_size))
															
 
																+			return 0;
															
 
																+	}
															
 
																+	return 1;
															
 
																+}
															
 
																+
															
 
																+void _starpu_fetch_task_from_waiting_list(struct _starpu_sched_ctx *sched_ctx)
															
 
																+{
															
 
																+	if(starpu_task_list_empty(&sched_ctx->waiting_tasks))
															
 
																+		return;
															
 
																+	struct starpu_task *old_task = starpu_task_list_back(&sched_ctx->waiting_tasks);
															
 
																+	if(_starpu_can_push_task(sched_ctx, old_task))
															
 
																+	{
															
 
																+		old_task = starpu_task_list_pop_back(&sched_ctx->waiting_tasks);
															
 
																+		int ret =  _starpu_push_task_to_workers(old_task);
															
 
																+	}
															
 
																+	return;
															
 
																+}
															
 
																+
															
 
																+void _starpu_push_task_to_waiting_list(struct _starpu_sched_ctx *sched_ctx, struct starpu_task *task)
															
 
																+{
															
 
																+	starpu_task_list_push_front(&sched_ctx->waiting_tasks, task);
															
 
																+	return;
															
 
																+}
															
 
																 void starpu_sched_ctx_set_priority_on_level(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority)
															
 
																 {
															
@@ -1053,6 +1107,7 @@ void starpu_sched_ctx_remove_workers(int *workers_to_remove, int nworkers_to_rem
 
																 		if(n_removed_workers > 0)
															
 
																 		{
															
 
																 			_starpu_update_workers_without_ctx(removed_workers, n_removed_workers, sched_ctx_id, 0);
															
 
																+			starpu_sched_ctx_set_priority(removed_workers, n_removed_workers, sched_ctx_id, 1);
															
 
																 		}
															
 
																 	}
															
@@ -1212,16 +1267,46 @@ int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id)
 
																 	return _starpu_barrier_counter_check(&sched_ctx->tasks_barrier);
															
 
																 }
															
 
																-void _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops)
															
 
																+unsigned _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops, struct starpu_task *task)
															
 
																 {
															
 
																+	unsigned ret = 1;
															
 
																 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
															
 
																+
															
 
																+	if(!sched_ctx->is_initial_sched)
															
 
																+		STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->waiting_tasks_mutex);
															
 
																+
															
 
																 	_starpu_barrier_counter_increment(&sched_ctx->ready_tasks_barrier, ready_flops);
															
 
																+
															
 
																+
															
 
																+	if(!sched_ctx->is_initial_sched)
															
 
																+	{
															
 
																+		if(!_starpu_can_push_task(sched_ctx, task))
															
 
																+		{
															
 
																+			_starpu_push_task_to_waiting_list(sched_ctx, task);
															
 
																+			ret = 0;
															
 
																+		}
															
 
																+
															
 
																+		STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->waiting_tasks_mutex);
															
 
																+	}
															
 
																+	return ret;
															
 
																 }
															
 
																 void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops)
															
 
																 {
															
 
																 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
															
 
																+
															
 
																+	if(!sched_ctx->is_initial_sched)
															
 
																+		STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->waiting_tasks_mutex);
															
 
																+
															
 
																 	_starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->ready_tasks_barrier, ready_flops);
															
 
																+
															
 
																+
															
 
																+	if(!sched_ctx->is_initial_sched)
															
 
																+	{
															
 
																+		_starpu_fetch_task_from_waiting_list(sched_ctx);
															
 
																+		STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->waiting_tasks_mutex);
															
 
																+	}
															
 
																+
															
 
																 }
															
 
																 int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id)
															
@@ -1278,6 +1363,7 @@ void starpu_sched_ctx_notify_hypervisor_exists()
 
																 			flops[i][j] = 0.0;
															
 
																 			data_size[i][j] = 0;
															
 
																 		}
															
 
																+		hyp_actual_start_sample[i] = 0.0;
															
 
																 	}
															
 
																 }
															
@@ -1286,6 +1372,11 @@ unsigned starpu_sched_ctx_check_if_hypervisor_exists()
 
																 	return with_hypervisor;
															
 
																 }
															
 
																+void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double start_sample)
															
 
																+{
															
 
																+	hyp_actual_start_sample[sched_ctx_id] = start_sample;
															
 
																+}
															
 
																+
															
 
																 unsigned _starpu_sched_ctx_allow_hypervisor(unsigned sched_ctx_id)
															
 
																 {
															
 
																 	return 1;
															
@@ -1494,19 +1585,20 @@ unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id)
 
																         struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
															
 
																         struct starpu_worker_collection *workers = sched_ctx->workers;
															
 
																-        int worker;
															
 
																-
															
 
																-	struct starpu_sched_ctx_iterator it;
															
 
																-
															
 
																-	workers->init_iterator(workers, &it);
															
 
																-        while(workers->has_next(workers, &it))
															
 
																-        {
															
 
																-                worker = workers->get_next(workers, &it);
															
 
																-		if(worker == workerid)
															
 
																-			return 1;
															
 
																-        }
															
 
																-
															
 
																+	if(workers)
															
 
																+	{
															
 
																+		int worker;
															
 
																+		struct starpu_sched_ctx_iterator it;
															
 
																+		
															
 
																+		workers->init_iterator(workers, &it);
															
 
																+		while(workers->has_next(workers, &it))
															
 
																+		{
															
 
																+			worker = workers->get_next(workers, &it);
															
 
																+			if(worker == workerid)
															
 
																+				return 1;
															
 
																+		}
															
 
																+	}
															
 
																 	return 0;
															
 
																 }
															
--- a/src/core/sched_ctx.h
+++ b/src/core/sched_ctx.h
@@ -77,6 +77,12 @@ struct _starpu_sched_ctx
 
																 	/* mutext protecting empty_ctx_tasks list */
															
 
																 	starpu_pthread_mutex_t empty_ctx_mutex;
															
 
																+	/*ready tasks that couldn't be pushed because the the window of tasks was already full*/
															
 
																+	struct starpu_task_list waiting_tasks;
															
 
																+
															
 
																+	/* mutext protecting waiting_tasks list */
															
 
																+	starpu_pthread_mutex_t waiting_tasks_mutex;
															
 
																+
															
 
																 	/* min CPUs to execute*/
															
 
																 	int min_ncpus;
															
@@ -184,7 +190,7 @@ int _starpu_get_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id);
 
																 int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id);
															
 
																 void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops);
															
 
																-void _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops);
															
 
																+unsigned _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops, struct starpu_task *task);
															
 
																 int _starpu_wait_for_no_ready_of_sched_ctx(unsigned sched_ctx_id);
															
 
																 /* Return the corresponding index of the workerid in the ctx table */
															
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -351,7 +351,7 @@ int _starpu_push_task(struct _starpu_job *j)
 
																 	_STARPU_LOG_IN();
															
 
																-	_starpu_increment_nready_tasks_of_sched_ctx(task->sched_ctx, task->flops);
															
 
																+	unsigned can_push = _starpu_increment_nready_tasks_of_sched_ctx(task->sched_ctx, task->flops, task);
															
 
																 	task->status = STARPU_TASK_READY;
															
 
																 #ifdef HAVE_AYUDAME_H
															
@@ -384,8 +384,11 @@ int _starpu_push_task(struct _starpu_job *j)
 
																 #endif
															
 
																 			return 0;
															
 
																 		}
															
 
																+
															
 
																 	}
															
 
																+	if(!can_push)
															
 
																+		return 0;
															
 
																 	/* in case there is no codelet associated to the task (that's a control
															
 
																 	 * task), we directly execute its callback and enforce the
															
 
																 	 * corresponding dependencies */
															
@@ -657,6 +660,14 @@ static
 
																 struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker *worker)
															
 
																 {
															
 
																 	struct _starpu_sched_ctx_list *l = NULL;
															
 
																+	for (l = worker->sched_ctx_list; l; l = l->next)
															
 
																+	{
															
 
																+		if(worker->removed_from_ctx[l->sched_ctx] == 1)
															
 
																+		{
															
 
																+			return	_starpu_get_sched_ctx_struct(l->sched_ctx);
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																 	unsigned are_2_priorities = 0;
															
 
																 	for (l = worker->sched_ctx_list; l; l = l->next)
															
 
																 	{
															
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -772,7 +772,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
																 	_starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
															
 
																 	STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																 	j->submitted = 1;
															
 
																-	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops);
															
 
																+	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task);
															
 
																 	for (i=0 ; i<nbuffers ; i++)
															
 
																 	{
															
 
																 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -98,15 +98,19 @@ static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
 
																 {
															
 
																 	int i;
															
 
																 	_starpu_codelet_check_deprecated_fields(task->cl);
															
 
																-	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
															
 
																-	struct starpu_worker_collection *workers = sched_ctx->workers;
															
 
																+        /* make sure there is a worker on the machine able to execute the 
															
 
																+	   task, independent of the sched_ctx, this latter may receive latter on 
															
 
																+	   the necessary worker - the user or the hypervisor should take care this happens */
															
 
																+	
															
 
																+	int check_entire_platform = starpu_get_env_number("STARPU_CHECK_ENTIRE_PLATFORM");
															
 
																+	struct _starpu_sched_ctx *sched_ctx = check_entire_platform == 1 ? _starpu_get_initial_sched_ctx() : _starpu_get_sched_ctx_struct(task->sched_ctx);
															
 
																+	struct starpu_worker_collection *workers = sched_ctx->workers;
															
 
																 	struct starpu_sched_ctx_iterator it;
															
 
																-
															
 
																 	workers->init_iterator(workers, &it);
															
 
																 	while(workers->has_next(workers, &it))
															
 
																 	{
															
 
																-                i = workers->get_next(workers, &it);
															
 
																+		i = workers->get_next(workers, &it);
															
 
																 		if (starpu_worker_get_type(i) != arch)
															
 
																 			continue;
															
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -1607,7 +1607,6 @@ static void handle_task_wait_for_all(void)
 
																 static void handle_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
															
 
																 {
															
 
																 	char *event = (char*)&ev->param[0];
															
 
																-
															
 
																 	/* Add an event in the trace */
															
 
																 	if (out_paje_file)
															
 
																 	{
															
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -673,7 +673,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
																 	*max_exp_endp = max_exp_end;
															
 
																 }
															
 
																-static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id)
															
 
																+static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id, unsigned simulate)
															
 
																 {
															
 
																 	/* find the queue */
															
 
																 	unsigned worker, worker_ctx = 0;
															
@@ -791,9 +791,19 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sch
 
																 	//_STARPU_DEBUG("Scheduler dmda: kernel (%u)\n", best_impl);
															
 
																 	starpu_task_set_implementation(task, selected_impl);
															
 
																+
															
 
																+	if(!simulate)
															
 
																+	{	
															
 
																+		/* we should now have the best worker in variable "best" */
															
 
																+		return push_task_on_best_worker(task, best, model_best, transfer_model_best, prio, sched_ctx_id);
															
 
																+	}
															
 
																+	else
															
 
																+	{
															
 
																+//		double max_len = (max_exp_end - starpu_timing_now());
															
 
																+		/* printf("%d: dmda max_exp_end %lf best_exp_end %lf max_len %lf \n", sched_ctx_id, max_exp_end/1000000.0, best_exp_end/1000000.0, max_len/1000000.0);	 */
															
 
																-	/* we should now have the best worker in variable "best" */
															
 
																-	return push_task_on_best_worker(task, best, model_best, transfer_model_best, prio, sched_ctx_id);
															
 
																+		return exp_end[best_in_ctx][selected_impl] ;
															
 
																+	}
															
 
																 }
															
 
																 static int dmda_push_sorted_task(struct starpu_task *task)
															
@@ -801,7 +811,7 @@ static int dmda_push_sorted_task(struct starpu_task *task)
 
																 #ifdef STARPU_DEVEL
															
 
																 #warning TODO: after defining a scheduling window, use that instead of empty_ctx_tasks
															
 
																 #endif
															
 
																-	return _dmda_push_task(task, 1, task->sched_ctx);
															
 
																+	return _dmda_push_task(task, 1, task->sched_ctx, 0);
															
 
																 }
															
 
																 static int dm_push_task(struct starpu_task *task)
															
@@ -812,7 +822,12 @@ static int dm_push_task(struct starpu_task *task)
 
																 static int dmda_push_task(struct starpu_task *task)
															
 
																 {
															
 
																 	STARPU_ASSERT(task);
															
 
																-	return _dmda_push_task(task, 0, task->sched_ctx);
															
 
																+	return _dmda_push_task(task, 0, task->sched_ctx, 0);
															
 
																+}
															
 
																+static double dmda_simulate_push_task(struct starpu_task *task)
															
 
																+{
															
 
																+	STARPU_ASSERT(task);
															
 
																+	return _dmda_push_task(task, 0, task->sched_ctx, 1);
															
 
																 }
															
 
																 static void dmda_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
															
@@ -1009,6 +1024,7 @@ struct starpu_sched_policy _starpu_sched_dm_policy =
 
																 	.add_workers = dmda_add_workers ,
															
 
																 	.remove_workers = dmda_remove_workers,
															
 
																 	.push_task = dm_push_task,
															
 
																+	.simulate_push_task = NULL,
															
 
																 	.pop_task = dmda_pop_task,
															
 
																 	.pre_exec_hook = dmda_pre_exec_hook,
															
 
																 	.post_exec_hook = dmda_post_exec_hook,
															
@@ -1024,6 +1040,7 @@ struct starpu_sched_policy _starpu_sched_dmda_policy =
 
																 	.add_workers = dmda_add_workers ,
															
 
																 	.remove_workers = dmda_remove_workers,
															
 
																 	.push_task = dmda_push_task,
															
 
																+	.simulate_push_task = dmda_simulate_push_task,
															
 
																 	.push_task_notify = dmda_push_task_notify,
															
 
																 	.pop_task = dmda_pop_task,
															
 
																 	.pre_exec_hook = dmda_pre_exec_hook,
															
@@ -1040,6 +1057,7 @@ struct starpu_sched_policy _starpu_sched_dmda_sorted_policy =
 
																 	.add_workers = dmda_add_workers ,
															
 
																 	.remove_workers = dmda_remove_workers,
															
 
																 	.push_task = dmda_push_sorted_task,
															
 
																+	.simulate_push_task = NULL,
															
 
																 	.push_task_notify = dmda_push_task_notify,
															
 
																 	.pop_task = dmda_pop_ready_task,
															
 
																 	.pre_exec_hook = dmda_pre_exec_hook,
															
@@ -1056,6 +1074,7 @@ struct starpu_sched_policy _starpu_sched_dmda_ready_policy =
 
																 	.add_workers = dmda_add_workers ,
															
 
																 	.remove_workers = dmda_remove_workers,
															
 
																 	.push_task = dmda_push_task,
															
 
																+	.simulate_push_task = dmda_simulate_push_task,
															
 
																 	.push_task_notify = dmda_push_task_notify,
															
 
																 	.pop_task = dmda_pop_ready_task,
															
 
																 	.pre_exec_hook = dmda_pre_exec_hook,