Browse Source

small fixes done while writing my thesis + task window at context level (before pushing tasks to the scheduler) expressed w.r.t. to time and not number of tasks

Andra Hugo 10 years ago
parent
commit
ba3e4d7403

+ 2 - 0
include/starpu_sched_ctx_hypervisor.h

@@ -43,6 +43,8 @@ void starpu_sched_ctx_notify_hypervisor_exists(void);
 
 unsigned starpu_sched_ctx_check_if_hypervisor_exists(void);
 
+void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double start_sample);
+
 #ifdef __cplusplus
 }
 #endif

+ 1 - 0
include/starpu_scheduler.h

@@ -33,6 +33,7 @@ struct starpu_sched_policy
 	void (*deinit_sched)(unsigned sched_ctx_id);
 
 	int (*push_task)(struct starpu_task *);
+	double (*simulate_push_task)(struct starpu_task *);
 	void (*push_task_notify)(struct starpu_task *, int workerid, int perf_workerid, unsigned sched_ctx_id);
 	struct starpu_task *(*pop_task)(unsigned sched_ctx_id);
 	struct starpu_task *(*pop_every_task)(unsigned sched_ctx_id);

+ 1 - 0
sc_hypervisor/include/sc_hypervisor.h

@@ -144,6 +144,7 @@ void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *l
 double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx);
 
 void sc_hypervisor_print_overhead();
+
 #ifdef __cplusplus
 }
 #endif

+ 2 - 1
sc_hypervisor/src/Makefile.am

@@ -37,7 +37,8 @@ libsc_hypervisor_la_SOURCES = 				\
 	hypervisor_policies/teft_lp_policy.c		\
 	hypervisor_policies/ispeed_policy.c		\
 	hypervisor_policies/ispeed_lp_policy.c		\
-	hypervisor_policies/throughput_lp_policy.c
+	hypervisor_policies/throughput_lp_policy.c	\
+	hypervisor_policies/hard_coded_policy.c
 
 noinst_HEADERS = sc_hypervisor_intern.h		
 

+ 17 - 8
sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c

@@ -20,7 +20,7 @@
 #include <starpu_config.h>
 #include <sys/time.h>
 
-int resize_no = 0;
+unsigned long resize_no = 0;
 #ifdef STARPU_HAVE_GLPK_H
 static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 {
@@ -53,8 +53,8 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 	long diff_s = end_time.tv_sec  - start_time.tv_sec;
 	long diff_us = end_time.tv_usec  - start_time.tv_usec;
 	
-	__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000;
-	
+	__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000.0;
+
 	if(vmax != -1.0)
 	{
 /* 		int nworkers_per_ctx_rounded[ns][nw]; */
@@ -124,9 +124,8 @@ static int _get_first_level(unsigned *sched_ctxs, int nsched_ctxs, unsigned *fir
 
 static void _resize(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 {
-#ifdef STARPU_USE_FXT
 	starpu_fxt_trace_user_event(resize_no);
-#endif
+
 	unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
 	if(nhierarchy_levels > 1)
 	{
@@ -243,9 +242,19 @@ static void _resize_if_speed_diff(unsigned sched_ctx, int worker)
 			_resize(NULL, -1, NULL, -1);
 		}
 	}
-	else if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
+	else 
 	{
-		_resize(NULL, -1, NULL, -1);
+		unsigned criteria = sc_hypervisor_get_resize_criteria();
+		if(criteria != SC_NOTHING && criteria == SC_IDLE)
+		{
+
+			_resize(NULL, -1, NULL, -1);
+		}
+		else
+		{
+			if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
+				_resize(NULL, -1, NULL, -1);
+		}
 	}
 	return;
 }
@@ -253,6 +262,7 @@ static void _resize_if_speed_diff(unsigned sched_ctx, int worker)
 static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, 
 				      __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint)
 {
+	if(worker == -2) return;
 	unsigned criteria = sc_hypervisor_get_resize_criteria();
 	if(criteria != SC_NOTHING && criteria == SC_SPEED)
 	{
@@ -320,7 +330,6 @@ static void feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
 		if(ret != EBUSY)
 		{
-//			printf("trigger idle \n");
 			_resize_leaves(worker);
 			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 		}

+ 12 - 2
sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c

@@ -119,13 +119,23 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 
 
 	double w_in_s[ns][nw];
-//			double flops_on_w[ns][nw];
+
 	double **flops_on_w = (double**)malloc(ns*sizeof(double*));
 	int i;
 	for(i = 0; i < ns; i++)
 		flops_on_w[i] = (double*)malloc(nw*sizeof(double));
-	
+
+	struct timeval start_time;
+	struct timeval end_time;
+	gettimeofday(&start_time, NULL);
 	unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw,  w_in_s, flops_on_w, curr_sched_ctxs, workers);
+	gettimeofday(&end_time, NULL);
+	
+	long diff_s = end_time.tv_sec  - start_time.tv_sec;
+	long diff_us = end_time.tv_usec  - start_time.tv_usec;
+	
+	__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000.0;
+
 	/* if we did find at least one solution redistribute the resources */
 	if(found_sol)
 	{

+ 54 - 48
sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c

@@ -79,7 +79,7 @@ static void _size_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int
 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
 	int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
 	int nt = 0; /* Number of different kinds of tasks */
-	starpu_pthread_mutex_lock(&mutex);
+
 	struct sc_hypervisor_policy_task_pool * tp;
 	for (tp = task_pools; tp; tp = tp->next)
 		nt++;
@@ -105,12 +105,16 @@ static void _size_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int
 	double possible_tmax = sc_hypervisor_lp_get_tmax(nw, workers); 
 	double smallest_tmax = possible_tmax / 3;
 	double tmax = possible_tmax * ns;
-	double tmin = smallest_tmax;
+	double tmin = 0.0;
+	unsigned found_sol = 0;
+
+	if(nt > 0 && tmax > 0.0)
+	{
+		found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
+							       tmin, tmax, smallest_tmax, _compute_workers_distrib);
+	}
 
-	unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
-								tmin, tmax, smallest_tmax, _compute_workers_distrib);
 
-	starpu_pthread_mutex_unlock(&mutex);
 	/* if we did find at least one solution redistribute the resources */
 	if(found_sol)
 	{
@@ -147,7 +151,7 @@ static void size_if_required()
 
 		if(ready_to_size)
 		{
-			_size_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers);
+			_size_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers);			
 			sc_hypervisor_free_size_req();
 		}
 		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
@@ -167,6 +171,7 @@ static void teft_lp_handle_submitted_job(struct starpu_codelet *cl, unsigned sch
 static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
 {
 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
+	if(ns < 2) return;
 	int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
 
 	sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
@@ -186,7 +191,6 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 	for (tp = task_pools; tp; tp = tp->next)
 		nt++;
 	
-	
 	double w_in_s[ns][nw];
 	double **tasks_per_worker=(double**)malloc(nw*sizeof(double*));
 	int i;
@@ -205,12 +209,27 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 	   compute the nr of flops and not the tasks */
         /*lp computes it in s but it's converted to ms just before return */
 	double possible_tmax = sc_hypervisor_lp_get_tmax(nw, NULL);
-	double smallest_tmax = 0.0;
-	double tmax = possible_tmax * ns;
+	double smallest_tmax = possible_tmax/2.0;
+	double tmax = possible_tmax + smallest_tmax;
 	double tmin = smallest_tmax;
+	unsigned found_sol = 0;
 
-	unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
-								tmin, tmax, smallest_tmax, _compute_workers_distrib);
+	if(nt > 0 && tmax > 0.0)
+	{
+		struct timeval start_time;
+		struct timeval end_time;
+		gettimeofday(&start_time, NULL);
+		
+		
+		found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
+							       tmin, tmax, smallest_tmax, _compute_workers_distrib);
+		gettimeofday(&end_time, NULL);
+		
+		long diff_s = end_time.tv_sec  - start_time.tv_sec;
+		long diff_us = end_time.tv_usec  - start_time.tv_usec;
+		
+		__attribute__((unused))	float timing = (float)(diff_s*1000000 + diff_us)/1000.0;
+	}
 //	starpu_pthread_mutex_unlock(&mutex);
 	
 	/* if we did find at least one solution redistribute the resources */
@@ -234,61 +253,48 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers,
 	free(tasks_per_worker);
 }
 
-static void teft_lp_handle_poped_task(unsigned sched_ctx, __attribute__((unused))int worker, struct starpu_task *task, uint32_t footprint)
+static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
 {
-	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
-
-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
-	if(ret != EBUSY)
+	if(worker > -2)
 	{
-		if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops)
-		{
-			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
-			return;
-		}
-
-		unsigned criteria = sc_hypervisor_get_resize_criteria();
-		if(criteria != SC_NOTHING && criteria == SC_SPEED)
+		struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
+		
+		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+		if(ret != EBUSY)
 		{
-			
-			if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
+			unsigned criteria = sc_hypervisor_get_resize_criteria();
+			if(criteria != SC_NOTHING && criteria == SC_SPEED)
 			{
-				_try_resizing(NULL, -1, NULL, -1);
+				
+				if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
+				{
+					_try_resizing(NULL, -1, NULL, -1);
+				}
 			}
+			
+			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 		}
-
-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 	}
 	/* too expensive to take this mutex and correct value of the number of tasks is not compulsory */
-//	starpu_pthread_mutex_lock(&mutex);
+	starpu_pthread_mutex_lock(&mutex);
 	sc_hypervisor_policy_remove_task_from_pool(task, footprint, &task_pools);
-//	starpu_pthread_mutex_unlock(&mutex);
+	starpu_pthread_mutex_unlock(&mutex);
 
 }
 
 static void teft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 {
-	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
-
-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
-	if(ret != EBUSY)
+	unsigned criteria = sc_hypervisor_get_resize_criteria();
+	if(criteria != SC_NOTHING)// && criteria == SC_IDLE)
 	{
-		if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops)
+		struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
+		
+		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+		if(ret != EBUSY)
 		{
+			_try_resizing(NULL, -1, NULL, -1);
 			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
-			return;
-		}
-
-		unsigned criteria = sc_hypervisor_get_resize_criteria();
-		if(criteria != SC_NOTHING && criteria == SC_IDLE)
-		{
-			
-			if(sc_hypervisor_check_idle(sched_ctx, worker))
-			{
-				_try_resizing(NULL, -1, NULL, -1);
-			}
 		}
-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 	}
 	return;
 }

+ 58 - 23
sc_hypervisor/src/policies_utils/dichotomy.c

@@ -23,60 +23,95 @@
    still has solutions */
 unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw], unsigned solve_lp_integer, void *specific_data,
 					    double tmin, double tmax, double smallest_tmax,
-					    double (*lp_estimated_distrib_func)(int ns, int nw, double draft_w_in_s[ns][nw], 
+					    double (*lp_estimated_distrib_func)(int ns, int nw, double draft_w_in_s[ns][nw],
 									     unsigned is_integer, double tmax, void *specifc_data))
 {
 	double res = 1.0;
 	unsigned has_sol = 0;
-	double old_tmax = 0.0;
+	double tmid = tmax;
 	unsigned found_sol = 0;
-
 	struct timeval start_time;
 	struct timeval end_time;
 	int nd = 0;
+	double found_tmid = tmax;
+	double potential_tmid = tmid;
+	double threashold = tmax*0.1;
 	gettimeofday(&start_time, NULL);
-
+	
 	/* we fix tmax and we do not treat it as an unknown
 	   we just vary by dichotomy its values*/
-	while(tmax > 1.0)
+	while(1)
 	{
 		/* find solution and save the values in draft tables
 		   only if there is a solution for the system we save them
 		   in the proper table */
-		res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, tmax, specific_data);
-		if(res != 0.0)
+		printf("solving for tmid %lf \n", tmid);
+		res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, tmid, specific_data);
+		if(res < 0.0)
+		{
+			printf("timeouted no point in continuing\n");
+			found_sol = 0;
+			break;
+		}
+		else if(res != 0.0)
 		{
 			has_sol = 1;
 			found_sol = 1;
+			found_tmid = tmid;
+			printf("found sol for tmid %lf \n", tmid);
 		}
 		else
+		{
+			printf("failed for tmid %lf \n", tmid);
+			if(tmid == tmax)
+			{
+				printf("failed for tmid %lf from the first time\n", tmid);
+				break;
+			}
 			has_sol = 0;
+		}
 
-		/* if we have a solution with this tmax try a smaller value
-		   bigger than the old min */
+		/* if we have a solution with this tmid try a smaller value
+		   bigger than the old one */
 		if(has_sol)
 		{
-			if(old_tmax != 0.0 && (old_tmax - tmax) < 0.5)
+			/* if the difference between tmax and tmid is smaller than
+			   a given threashold there is no point in searching more 
+			   precision */
+			tmax = tmid;
+			potential_tmid = tmin + ((tmax-tmin)/2.0); 
+			if((tmax - potential_tmid) < threashold)
+			{
+				printf("had_sol but stop doing it for tmin %lf tmax %lf and potential tmid %lf \n", tmin, tmax, potential_tmid);
 				break;
-			old_tmax = tmax;
+			}
+			printf("try for smaller potential tmid %lf \n", potential_tmid);
 		}
-		else /*else try a bigger one but smaller than the old tmax */
+		else /*else try a bigger one */
 		{
-			tmin = tmax;
-			if(old_tmax != 0.0)
-				tmax = old_tmax;
+			/* if we previously found a good sol and we keep failing
+			   we stop searching for a better sol */
+			tmin = tmid;
+			potential_tmid = tmin + ((tmax-tmin)/2.0); 
+			if((tmax - potential_tmid) < threashold)
+			{
+				printf("didn't have sol but stop doing it for tmin %lf tmax %lf and potential tmid %lf \n", tmin, tmax, potential_tmid);
+				break;
+			}
+			printf("try for bigger potential tmid %lf \n", potential_tmid);
 		}
-		if(tmin == tmax) break;
-		tmax = sc_hypervisor_lp_find_tmax(tmin, tmax);
 
-		if(tmax < smallest_tmax)
-		{
-			tmax = old_tmax;
-			tmin = smallest_tmax;
-			tmax = sc_hypervisor_lp_find_tmax(tmin, tmax);
-		}
+		tmid = potential_tmid;
+
 		nd++;
 	}
+	printf("solve againd for tmid %lf \n", found_tmid);
+	if(found_sol)
+	{
+		res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, found_tmid, specific_data);
+		found_sol = (res != 0.0);
+	}
+	printf("found sol %d for tmid %lf\n", found_sol, found_tmid);
 	gettimeofday(&end_time, NULL);
 
 	long diff_s = end_time.tv_sec  - start_time.tv_sec;

+ 57 - 58
sc_hypervisor/src/policies_utils/lp_programs.c

@@ -30,6 +30,7 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 	int t, w, s;
 	glp_prob *lp;
 
+	
 	lp = glp_create_prob();
 	glp_set_prob_name(lp, "StarPU theoretical bound");
 	glp_set_obj_dir(lp, GLP_MAX);
@@ -57,12 +58,12 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 				char name[32];
 				snprintf(name, sizeof(name), "w%dt%dn", w, t);
 				glp_set_col_name(lp, colnum(w, t), name);
-/* 				if (integer) */
-/*                                 { */
-/*                                         glp_set_col_kind(lp, colnum(w, t), GLP_IV); */
-/* 					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0); */
-/*                                 } */
-/* 				else */
+				if (is_integer)
+                                {
+                                        glp_set_col_kind(lp, colnum(w, t), GLP_IV);
+					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0);
+                                }
+				else
 					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0.0, 0.0);
 			}
 		for(s = 0; s < ns; s++)
@@ -115,7 +116,10 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 						ia[n] = curr_row_idx+s*nw+w+1;
 						ja[n] = colnum(w, t);
 						if (isnan(times[w][t]))
+						{
+							printf("had to insert huge val \n");
 							ar[n] = 1000000000.;
+						}
 						else
 							ar[n] = times[w][t];
 						n++;
@@ -126,7 +130,12 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 				ja[n] = nw*nt+s*nw+w+1;
 				ar[n] = (-1) * tmax;
 				n++;
-				glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
+				if (is_integer)
+                                {
+					glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0, 0);
+                                }
+                                else
+					glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
 			}
 		}
 
@@ -184,10 +193,10 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 	parm.msg_lev = GLP_MSG_OFF;
 	int ret = glp_simplex(lp, &parm);
 
-/* 	char str[50]; */
-/* 	sprintf(str, "outpu_lp_%g", tmax); */
+	/* char str[50]; */
+	/* sprintf(str, "outpu_lp_%g", tmax); */
 
-/* 	glp_print_sol(lp, str); */
+	/* glp_print_sol(lp, str); */
 
 	if (ret)
 	{
@@ -213,12 +222,15 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
                 glp_iocp iocp;
                 glp_init_iocp(&iocp);
                 iocp.msg_lev = GLP_MSG_OFF;
+//		iocp.tm_lim = 1000;
 		glp_intopt(lp, &iocp);
 		int stat = glp_mip_status(lp);
 		/* if we don't have a solution return */
-		if(stat == GLP_NOFEAS)
+		if(stat == GLP_NOFEAS || stat == GLP_ETMLIM || stat == GLP_UNDEF)
 		{
 //			printf("no int sol in tmax = %lf\n", tmax);
+			if(stat == GLP_ETMLIM || stat == GLP_UNDEF)
+				printf("timeout \n");
 			glp_delete_prob(lp);
 			lp = NULL;
 			return 0.0;
@@ -228,12 +240,13 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 	double res = glp_get_obj_val(lp);
 	for (w = 0; w < nw; w++)
 		for (t = 0; t < nt; t++)
-/* 			if (integer) */
-/* 				tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t)); */
-/*                         else */
+			if (is_integer)
+				tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t));
+                        else
 				tasks[w][t] = glp_get_col_prim(lp, colnum(w, t));
-	
-//	printf("for tmax %lf\n", tmax);
+
+	/* printf("**********************************************\n"); */
+	/* printf("for tmax %lf\n", tmax); */
 	for(s = 0; s < ns; s++)
 		for(w = 0; w < nw; w++)
 		{
@@ -243,8 +256,8 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 				w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
 //			printf("w %d in ctx %d = %lf\n", w, s, w_in_s[s][w]);
 		}
-//	printf("\n");
-
+	/* printf("\n"); */
+	/* printf("**********************************************\n"); */
 	glp_delete_prob(lp);
 	return res;
 }
@@ -286,14 +299,14 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 			if (integer)
 			{
 				glp_set_col_kind(lp, n, GLP_IV);
-				if(sc_w->consider_max)
-				{
-					if(config->max_nworkers == 0)
-						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, config->max_nworkers);
-					else
-						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, config->max_nworkers);
-				}
-				else
+				/* if(sc_w->consider_max) */
+				/* { */
+				/* 	if(config->max_nworkers == 0) */
+				/* 		glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, config->max_nworkers); */
+				/* 	else */
+				/* 		glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, config->max_nworkers); */
+				/* } */
+				/* else */
 				{
 					if(total_nw[w] == 0)
 						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, total_nw[w]);
@@ -303,17 +316,17 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 			}
 			else
 			{
-				if(sc_w->consider_max)
-				{
-					if(config->max_nworkers == 0)
-						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, config->max_nworkers*1.0);
-					else
-						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, config->max_nworkers*1.0);
-#ifdef STARPU_SC_HYPERVISOR_DEBUG
-					printf("%d****************consider max %lf in lp\n", sched_ctxs[s], config->max_nworkers*1.0);
-#endif
-				}
-				else
+/* 				if(sc_w->consider_max) */
+/* 				{ */
+/* 					if(config->max_nworkers == 0) */
+/* 						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, config->max_nworkers*1.0); */
+/* 					else */
+/* 						glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, config->max_nworkers*1.0); */
+/* #ifdef STARPU_SC_HYPERVISOR_DEBUG */
+/* 					printf("%d****************consider max %lf in lp\n", sched_ctxs[s], config->max_nworkers*1.0); */
+/* #endif */
+/* 				} */
+/* 				else */
 				{
 					if(total_nw[w] == 0)
 						glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, total_nw[w]*1.0);
@@ -418,27 +431,13 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 //		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
 		n++;
 
-//		if(last_vmax == -1.0)
-		{
-			/*sum(all gpus) = 3*/
-			if(w == 0)
-				glp_set_row_bnds(lp, ns+w+1, GLP_UP, 0, total_nw[0]);
-			
-			/*sum(all cpus) = 9*/
-			if(w == 1)
-				glp_set_row_bnds(lp, ns+w+1, GLP_UP, 0, total_nw[1]);
-
-		}
-/* 		else */
-/* 		{ */
-/* 			/\*sum(all gpus) = 3*\/ */
-/* 			if(w == 0) */
-/* 				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]); */
-			
-/* 			/\*sum(all cpus) = 9*\/ */
-/* 			if(w == 1) */
-/* 				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]); */
-/* 		} */
+		/*sum(all gpus) = 3*/
+		if(w == 0)
+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
+		
+		/*sum(all cpus) = 9*/
+		if(w == 1)
+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
 	}
 
 	STARPU_ASSERT(n == ne);

+ 3 - 2
sc_hypervisor/src/policies_utils/lp_tools.c

@@ -41,7 +41,7 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
 		int w;
 		for(w = 0; w < nw; w++)
-			v[i][w] = 5.0;//sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
+			v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
 
 		double ready_flops = starpu_sched_ctx_get_nready_flops(sc_w->sched_ctx);
 		unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
@@ -624,6 +624,7 @@ void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rou
 				
 		_lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move, 
 					   &nw_move, workers_move);
+
 		if(nw_move > 0)
 			sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
 	}
@@ -635,8 +636,8 @@ int _lp_get_unwanted_workers(int *workers_add, int nw_add, unsigned sched_ctx, i
 	int worker;
 
 	struct starpu_sched_ctx_iterator it;
-
 	workers->init_iterator(workers, &it);
+
 	while(workers->has_next(workers, &it))
 	{
 		worker = workers->get_next(workers, &it);

+ 15 - 10
sc_hypervisor/src/policies_utils/policy_tools.c

@@ -28,8 +28,8 @@ static int _compute_priority(unsigned sched_ctx)
 	int worker;
 
 	struct starpu_sched_ctx_iterator it;
-
 	workers->init_iterator(workers, &it);
+
 	while(workers->has_next(workers, &it))
 	{
 		worker = workers->get_next(workers, &it);
@@ -117,8 +117,8 @@ int* sc_hypervisor_get_idlest_workers(unsigned sched_ctx, int *nworkers, enum st
 	int considered = 0;
 
 	struct starpu_sched_ctx_iterator it;
-
 	workers->init_iterator(workers, &it);
+
 	for(index = 0; index < *nworkers; index++)
 	{
 		while(workers->has_next(workers, &it))
@@ -183,7 +183,6 @@ int sc_hypervisor_get_movable_nworkers(struct sc_hypervisor_policy_config *confi
 	int worker;
 
 	struct starpu_sched_ctx_iterator it;
-
 	workers->init_iterator(workers, &it);
 	while(workers->has_next(workers, &it))
 	{
@@ -409,6 +408,9 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
 {
         struct sc_hypervisor_policy_task_pool *tp;
         int w, t;
+	for(w = 0; w < nw; w++)
+		for(t = 0; t < nt; t++)
+			times[w][t] = NAN;
         for (w = 0; w < nw; w++)
         {
                 for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
@@ -422,7 +424,6 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
 			else
 			{
                                 times[w][t] = (length / 1000.);
-
 				double transfer_time = 0.0;
 				unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, tp->sched_ctx_id);
 				enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
@@ -431,18 +432,21 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
 					if(arch == STARPU_CUDA_WORKER)
 					{
 						double transfer_speed = starpu_transfer_bandwidth(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker));
-						transfer_time +=  (tp->data_size / transfer_speed) / 1000. ;
+						if(transfer_speed > 0.0)
+							transfer_time +=  (tp->data_size / transfer_speed) / 1000. ;
+	
 						double latency = starpu_transfer_latency(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker));
 						transfer_time += latency/1000.;
-						
-						
+//						transfer_time *=4;
 					}
 					else if(arch == STARPU_CPU_WORKER)
 					{
 						if(!starpu_sched_ctx_contains_type_of_worker(arch, tp->sched_ctx_id))
 						{
 							double transfer_speed = starpu_transfer_bandwidth(starpu_worker_get_memory_node(worker), STARPU_MAIN_RAM);
-							transfer_time += (tp->data_size / transfer_speed) / 1000. ;
+							if(transfer_speed > 0.0)
+								transfer_time += (tp->data_size / transfer_speed) / 1000. ;
+
 							double latency = starpu_transfer_latency(starpu_worker_get_memory_node(worker), STARPU_MAIN_RAM);
 							transfer_time += latency / 1000.;
 						}
@@ -465,7 +469,7 @@ unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker)
 	{
 		if(sc_w->idle_time[worker] > config->max_idle[worker])
 		{
-//			printf("w%d/ctx%d: current idle %lf all idle %lf max_idle %lf\n", worker, sched_ctx, idle, idle_time, config->max_idle[worker]);
+//			printf("w%d/ctx%d: current idle %lf  max_idle %lf\n", worker, sched_ctx, sc_w->idle_time[worker], config->max_idle[worker]);
 			return 1;
 		}
 	}
@@ -547,7 +551,8 @@ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(unsigned *sched_ctxs_in, int ns_
 			
 			double ctx_v = sc_hypervisor_get_ctx_speed(sc_w);
 			ctx_v = ctx_v < 0.01 ? 0.0 : ctx_v;
-			if(ctx_v != -1.0 && ((ctx_v < 0.8*optimal_v[i]) || ctx_v > 1.2*optimal_v[i])) 
+			double max_vel = _get_max_speed_gap();
+			if(ctx_v != -1.0 && ((ctx_v < (1-max_vel)*optimal_v[i]) || ctx_v > (1+max_vel)*optimal_v[i])) 
 			{
 				return 1;
 			}

+ 3 - 3
sc_hypervisor/src/policies_utils/speed.c

@@ -127,13 +127,12 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 		int worker;
 		
 		struct starpu_sched_ctx_iterator it;
+		workers->init_iterator(workers, &it);
 		
 		double speed = 0.0;
 		unsigned nworkers = 0;
 		double all_workers_flops = 0.0;
 		double max_workers_idle_time = 0.0;
-
-		workers->init_iterator(workers, &it);
 		while(workers->has_next(workers, &it))
 		{
 			worker = workers->get_next(workers, &it);
@@ -168,6 +167,7 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 		
 		if(speed != -1.0)
 		{
+
 			/* if ref_speed started being corrupted bc of the old bad distribution
 			   register only the last frame otherwise make the average with the speed 
 			   behavior of the application until now */
@@ -233,8 +233,8 @@ double sc_hypervisor_get_avg_speed(enum starpu_worker_archtype arch)
 		int worker;
 		
 		struct starpu_sched_ctx_iterator it;
-
 		workers->init_iterator(workers, &it);
+
 		while(workers->has_next(workers, &it))
 		{
 			worker = workers->get_next(workers, &it);

+ 1 - 2
sc_hypervisor/src/policies_utils/task_pool.c

@@ -69,8 +69,7 @@ void sc_hypervisor_policy_remove_task_from_pool(struct starpu_task *task, uint32
 				free(tp);
 				tp = NULL;
 				
-				if(next_tp)
-					*task_pools = next_tp;
+				*task_pools = next_tp;
 				
 			}
 			else

+ 53 - 19
sc_hypervisor/src/sc_hypervisor.c

@@ -42,6 +42,7 @@ extern struct sc_hypervisor_policy ispeed_lp_policy;
 extern struct sc_hypervisor_policy throughput_lp_policy;
 #endif // STARPU_HAVE_GLPK_
 extern struct sc_hypervisor_policy ispeed_policy;
+extern struct sc_hypervisor_policy hard_coded_policy;
 
 
 static struct sc_hypervisor_policy *predefined_policies[] =
@@ -55,7 +56,8 @@ static struct sc_hypervisor_policy *predefined_policies[] =
 	&throughput_lp_policy,
 #endif // STARPU_HAVE_GLPK_H
 	&gflops_rate_policy,
-	&ispeed_policy
+	&ispeed_policy,
+	&hard_coded_policy
 };
 
 static void _load_hypervisor_policy(struct sc_hypervisor_policy *policy)
@@ -164,7 +166,7 @@ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
 	hypervisor.resize_criteria = !crit ? SC_IDLE : strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_SPEED : SC_NOTHING);
 
 	starpu_pthread_mutex_init(&act_hypervisor_mutex, NULL);
-	hypervisor.start_executing_time = starpu_timing_now();
+//	hypervisor.start_executing_time = starpu_timing_now();
 
 	int i;
 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
@@ -254,8 +256,17 @@ void sc_hypervisor_start_resize(unsigned sched_ctx)
 
 static void _print_current_time()
 {
-	if(!getenv("SC_HYPERVISOR_STOP_PRINT"))
+	char* stop_print = getenv("SC_HYPERVISOR_STOP_PRINT");
+        int sp = stop_print ? atoi(stop_print) : 1;
+
+	if(!sp)
 	{
+		if(hypervisor.start_executing_time == 0.0)
+		{
+			fprintf(stdout, "Time: %lf\n", -1.0);
+			return;
+		}
+
 		double curr_time = starpu_timing_now();
 		double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */
 		fprintf(stdout, "Time: %lf\n", elapsed_time);
@@ -332,7 +343,7 @@ void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops)
 
 	hypervisor.sched_ctx_w[sched_ctx].total_flops = total_flops;
 	hypervisor.sched_ctx_w[sched_ctx].remaining_flops = total_flops;
-	hypervisor.resize[sched_ctx] = 1;
+	hypervisor.resize[sched_ctx] = 0;//1;
 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 }
 
@@ -412,6 +423,13 @@ void sc_hypervisor_unregister_ctx(unsigned sched_ctx)
 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 }
 
+void sc_hypervisor_reset_react_start_time(unsigned sched_ctx, unsigned now)
+{
+	if(now)
+		hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
+	starpu_sched_ctx_update_start_resizing_sample(sched_ctx, starpu_timing_now());
+}
+
 
 double _get_max_speed_gap()
 {
@@ -441,8 +459,8 @@ int sc_hypervisor_get_nworkers_ctx(unsigned sched_ctx, enum starpu_worker_archty
 	int worker;
 
 	struct starpu_sched_ctx_iterator it;
-
 	workers->init_iterator(workers, &it);
+
 	while(workers->has_next(workers, &it))
 	{
 		worker = workers->get_next(workers, &it);
@@ -475,8 +493,8 @@ double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrappe
 	int worker;
 	
 	struct starpu_sched_ctx_iterator it;
-
 	workers->init_iterator(workers, &it);
+		
 	while(workers->has_next(workers, &it))
 	{
 		worker = workers->get_next(workers, &it);
@@ -493,8 +511,8 @@ double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_
 	int worker;
 	
 	struct starpu_sched_ctx_iterator it;
-
 	workers->init_iterator(workers, &it);
+		
 	while(workers->has_next(workers, &it))
 	{
 		worker = workers->get_next(workers, &it);
@@ -981,12 +999,11 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs,
 		int worker;
 		
 		struct starpu_sched_ctx_iterator it;
+		workers->init_iterator(workers, &it);
 		
 		double elapsed_time_worker[STARPU_NMAXWORKERS];
 		double norm_idle_time = 0.0;
 		double end_time  = starpu_timing_now();
-
-		workers->init_iterator(workers, &it);
 		while(workers->has_next(workers, &it))
 		{
 			double idle_time = 0.0;
@@ -1184,6 +1201,7 @@ unsigned choose_ctx_to_steal(int worker)
 /* notifies the hypervisor that the worker spent another cycle in idle time */
 static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 {
+	if(hypervisor.start_executing_time == 0.0) return;
 	struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
 	sc_w->current_idle_time[worker] += idle_time;
 	
@@ -1202,7 +1220,7 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 	if(hypervisor.resize[sched_ctx] && hypervisor.policy.handle_idle_cycle)
 	{
 		if(sc_w->hyp_react_start_time == 0.0)
-			sc_w->hyp_react_start_time = starpu_timing_now();
+			sc_hypervisor_reset_react_start_time(sched_ctx, 1);
 		
 		double curr_time = starpu_timing_now();
 		double elapsed_time = (curr_time - sc_w->hyp_react_start_time) / 1000000.0; /* in seconds */
@@ -1236,13 +1254,15 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 			if(idle_everywhere)
 			{
 				double hyp_overhead_start = starpu_timing_now();
-				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
+				if(elapsed_time > (sc_w->config->time_sample*2))
+					hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
 				double hyp_overhead_end = starpu_timing_now();
 				hyp_overhead += (hyp_overhead_end - hyp_overhead_start);
+				if(elapsed_time > (sc_w->config->time_sample*2))
+					sc_hypervisor_reset_react_start_time(sched_ctx, 1);
+				else
+					sc_hypervisor_reset_react_start_time(sched_ctx, 0);
 			}
-
-
-			sc_w->hyp_react_start_time = starpu_timing_now();
 		}
 	}
 	return;
@@ -1261,6 +1281,11 @@ void _update_real_start_time_hierarchically(unsigned sched_ctx)
 /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */
 static void notify_poped_task(unsigned sched_ctx, int worker)
 {
+	if(hypervisor.start_executing_time == 0.0)
+		hypervisor.start_executing_time = starpu_timing_now();
+	if(!hypervisor.resize[sched_ctx])
+		hypervisor.resize[sched_ctx] = 1;
+
 	if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].real_start_time == 0.0)
 		_update_real_start_time_hierarchically(sched_ctx);
 
@@ -1358,18 +1383,25 @@ static void notify_post_exec_task(struct starpu_task *task, size_t data_size, ui
 		if(hypervisor.policy.handle_poped_task)
 		{	
 			if(hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time == 0.0)
-				hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
+				sc_hypervisor_reset_react_start_time(sched_ctx, 1);
 
 			double curr_time = starpu_timing_now();
 			double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time) / 1000000.0; /* in seconds */
 			if(hypervisor.sched_ctx_w[sched_ctx].sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > hypervisor.sched_ctx_w[sched_ctx].config->time_sample)
 			{
 				double hyp_overhead_start = starpu_timing_now();
-				hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
+				if(elapsed_time > (hypervisor.sched_ctx_w[sched_ctx].config->time_sample*2))
+					hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
 				double hyp_overhead_end = starpu_timing_now();
 				hyp_overhead += (hyp_overhead_end - hyp_overhead_start);
-				hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
+				if(elapsed_time > (hypervisor.sched_ctx_w[sched_ctx].config->time_sample*2))
+					sc_hypervisor_reset_react_start_time(sched_ctx, 1);
+				else
+					sc_hypervisor_reset_react_start_time(sched_ctx, 0);
 			}
+			else
+                                /* no need to consider resizing, just remove the task from the pool if the strategy requires it*/
+				hypervisor.policy.handle_poped_task(sched_ctx, -2, task, footprint);
 		}
 	}
 /* 	starpu_pthread_mutex_lock(&act_hypervisor_mutex); */
@@ -1434,8 +1466,9 @@ static void notify_submitted_job(struct starpu_task *task, uint32_t footprint, s
 	hypervisor.sched_ctx_w[sched_ctx].submitted_flops += task->flops;
 	starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
 
-	if(hypervisor.policy.handle_submitted_job && !type_of_tasks_known)
-		hypervisor.policy.handle_submitted_job(task->cl, task->sched_ctx, footprint, data_size);
+	/* signaled by the user - no need to wait for them */
+	/* if(hypervisor.policy.handle_submitted_job && !type_of_tasks_known) */
+	/* 	hypervisor.policy.handle_submitted_job(task->cl, task->sched_ctx, footprint, data_size); */
 }
 
 static void notify_empty_ctx(unsigned sched_ctx_id, struct starpu_task *task)
@@ -1650,3 +1683,4 @@ void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *l
 	}
 	return;
 }
+

+ 0 - 1
src/common/barrier_counter.c

@@ -96,7 +96,6 @@ int _starpu_barrier_counter_increment(struct _starpu_barrier_counter *barrier_c,
 
 	barrier->reached_start++;
 	barrier->reached_flops += flops;
-
 	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
 	return 0;
 }

+ 1 - 1
src/common/fxt.c

@@ -224,7 +224,7 @@ void starpu_fxt_stop_profiling()
 
 #endif // STARPU_USE_FXT
 
-void starpu_fxt_trace_user_event(unsigned long code STARPU_ATTRIBUTE_UNUSED)
+void starpu_fxt_trace_user_event(unsigned long code)
 {
 #ifdef STARPU_USE_FXT
 	_STARPU_TRACE_USER_EVENT(code);

+ 106 - 14
src/core/sched_ctx.c

@@ -30,6 +30,7 @@ static double hyp_start_sample[STARPU_NMAX_SCHED_CTXS];
 static double hyp_start_allow_sample[STARPU_NMAX_SCHED_CTXS];
 static double flops[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS];
 static size_t data_size[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS];
+static double hyp_actual_start_sample[STARPU_NMAX_SCHED_CTXS];
 
 static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *config);
 static void _starpu_sched_ctx_add_workers_to_master(unsigned sched_ctx_id, int *workerids, int nworkers, int new_master);
@@ -472,9 +473,12 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 	STARPU_ASSERT(nworkers_ctx <= nworkers);
 
 	STARPU_PTHREAD_MUTEX_INIT(&sched_ctx->empty_ctx_mutex, NULL);
-
 	starpu_task_list_init(&sched_ctx->empty_ctx_tasks);
 
+	STARPU_PTHREAD_MUTEX_INIT(&sched_ctx->waiting_tasks_mutex, NULL);
+	starpu_task_list_init(&sched_ctx->waiting_tasks);
+
+
 	sched_ctx->sched_policy = policy ? (struct starpu_sched_policy*)malloc(sizeof(struct starpu_sched_policy)) : NULL;
 	sched_ctx->is_initial_sched = is_initial_sched;
 	sched_ctx->name = sched_ctx_name;
@@ -815,6 +819,7 @@ static void _starpu_delete_sched_ctx(struct _starpu_sched_ctx *sched_ctx)
 	}
 
 	STARPU_PTHREAD_MUTEX_DESTROY(&sched_ctx->empty_ctx_mutex);
+	STARPU_PTHREAD_MUTEX_DESTROY(&sched_ctx->waiting_tasks_mutex);
 	sched_ctx->id = STARPU_NMAX_SCHED_CTXS;
 #ifdef STARPU_HAVE_HWLOC
 	hwloc_bitmap_free(sched_ctx->hwloc_workers_set);
@@ -954,6 +959,55 @@ void _starpu_fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx
 	return;
 
 }
+unsigned _starpu_can_push_task(struct _starpu_sched_ctx *sched_ctx, struct starpu_task *task)
+{
+	if(sched_ctx->sched_policy->simulate_push_task)
+	{
+		const char *env_window_size = getenv("STARPU_WINDOW_TIME_SIZE");
+		if(!env_window_size) return 1;
+		double window_size = atof(env_window_size);
+		
+		starpu_pthread_rwlock_t *changing_ctx_mutex = _starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx->id);
+		STARPU_PTHREAD_RWLOCK_RDLOCK(changing_ctx_mutex);
+		double expected_end = sched_ctx->sched_policy->simulate_push_task(task);
+		STARPU_PTHREAD_RWLOCK_UNLOCK(changing_ctx_mutex);
+		
+		double expected_len = 0.0; 
+		if(hyp_actual_start_sample[sched_ctx->id] != 0.0)
+			expected_len = expected_end - hyp_actual_start_sample[sched_ctx->id] ;
+		else 
+		{
+			printf("%d: sc start is 0.0\n", sched_ctx->id);
+			expected_len = expected_end - starpu_timing_now();
+		}
+		if(expected_len < 0.0)
+			printf("exp len negative %lf \n", expected_len);
+		expected_len /= 1000000.0;
+//		printf("exp_end %lf start %lf expected_len %lf \n", expected_end, hyp_actual_start_sample[sched_ctx->id], expected_len);
+		if(expected_len > (window_size + 0.2*window_size))
+			return 0;
+	}
+	return 1;
+}
+
+void _starpu_fetch_task_from_waiting_list(struct _starpu_sched_ctx *sched_ctx)
+{
+	if(starpu_task_list_empty(&sched_ctx->waiting_tasks))
+		return;
+	struct starpu_task *old_task = starpu_task_list_back(&sched_ctx->waiting_tasks);
+	if(_starpu_can_push_task(sched_ctx, old_task))
+	{
+		old_task = starpu_task_list_pop_back(&sched_ctx->waiting_tasks);
+		int ret =  _starpu_push_task_to_workers(old_task);
+	}
+	return;
+}
+
+void _starpu_push_task_to_waiting_list(struct _starpu_sched_ctx *sched_ctx, struct starpu_task *task)
+{
+	starpu_task_list_push_front(&sched_ctx->waiting_tasks, task);
+	return;
+}
 
 void starpu_sched_ctx_set_priority_on_level(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority)
 {
@@ -1053,6 +1107,7 @@ void starpu_sched_ctx_remove_workers(int *workers_to_remove, int nworkers_to_rem
 		if(n_removed_workers > 0)
 		{
 			_starpu_update_workers_without_ctx(removed_workers, n_removed_workers, sched_ctx_id, 0);
+			starpu_sched_ctx_set_priority(removed_workers, n_removed_workers, sched_ctx_id, 1);
 		}
 
 	}
@@ -1212,16 +1267,46 @@ int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id)
 	return _starpu_barrier_counter_check(&sched_ctx->tasks_barrier);
 }
 
-void _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops)
+unsigned _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops, struct starpu_task *task)
 {
+	unsigned ret = 1;
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
+
+	if(!sched_ctx->is_initial_sched)
+		STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->waiting_tasks_mutex);
+
 	_starpu_barrier_counter_increment(&sched_ctx->ready_tasks_barrier, ready_flops);
+
+
+	if(!sched_ctx->is_initial_sched)
+	{
+		if(!_starpu_can_push_task(sched_ctx, task))
+		{
+			_starpu_push_task_to_waiting_list(sched_ctx, task);
+			ret = 0;
+		}
+
+		STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->waiting_tasks_mutex);
+	}
+	return ret;
 }
 
 void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
+
+	if(!sched_ctx->is_initial_sched)
+		STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->waiting_tasks_mutex);
+
 	_starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->ready_tasks_barrier, ready_flops);
+
+
+	if(!sched_ctx->is_initial_sched)
+	{
+		_starpu_fetch_task_from_waiting_list(sched_ctx);
+		STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->waiting_tasks_mutex);
+	}
+
 }
 
 int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id)
@@ -1278,6 +1363,7 @@ void starpu_sched_ctx_notify_hypervisor_exists()
 			flops[i][j] = 0.0;
 			data_size[i][j] = 0;
 		}
+		hyp_actual_start_sample[i] = 0.0;
 	}
 }
 
@@ -1286,6 +1372,11 @@ unsigned starpu_sched_ctx_check_if_hypervisor_exists()
 	return with_hypervisor;
 }
 
+void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double start_sample)
+{
+	hyp_actual_start_sample[sched_ctx_id] = start_sample;
+}
+
 unsigned _starpu_sched_ctx_allow_hypervisor(unsigned sched_ctx_id)
 {
 	return 1;
@@ -1494,19 +1585,20 @@ unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id)
         struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 
         struct starpu_worker_collection *workers = sched_ctx->workers;
-        int worker;
-
-	struct starpu_sched_ctx_iterator it;
-
-	workers->init_iterator(workers, &it);
-        while(workers->has_next(workers, &it))
-        {
-                worker = workers->get_next(workers, &it);
-		if(worker == workerid)
-			return 1;
-        }
-
+	if(workers)
+	{
+		int worker;
 
+		struct starpu_sched_ctx_iterator it;
+		
+		workers->init_iterator(workers, &it);
+		while(workers->has_next(workers, &it))
+		{
+			worker = workers->get_next(workers, &it);
+			if(worker == workerid)
+				return 1;
+		}
+	}
 	return 0;
 }
 

+ 7 - 1
src/core/sched_ctx.h

@@ -77,6 +77,12 @@ struct _starpu_sched_ctx
 	/* mutext protecting empty_ctx_tasks list */
 	starpu_pthread_mutex_t empty_ctx_mutex;
 
+	/*ready tasks that couldn't be pushed because the the window of tasks was already full*/
+	struct starpu_task_list waiting_tasks;
+
+	/* mutext protecting waiting_tasks list */
+	starpu_pthread_mutex_t waiting_tasks_mutex;
+
 	/* min CPUs to execute*/
 	int min_ncpus;
 
@@ -184,7 +190,7 @@ int _starpu_get_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id);
 int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id);
 
 void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops);
-void _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops);
+unsigned _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops, struct starpu_task *task);
 int _starpu_wait_for_no_ready_of_sched_ctx(unsigned sched_ctx_id);
 
 /* Return the corresponding index of the workerid in the ctx table */

+ 12 - 1
src/core/sched_policy.c

@@ -351,7 +351,7 @@ int _starpu_push_task(struct _starpu_job *j)
 
 	_STARPU_LOG_IN();
 
-	_starpu_increment_nready_tasks_of_sched_ctx(task->sched_ctx, task->flops);
+	unsigned can_push = _starpu_increment_nready_tasks_of_sched_ctx(task->sched_ctx, task->flops, task);
 	task->status = STARPU_TASK_READY;
 
 #ifdef HAVE_AYUDAME_H
@@ -384,8 +384,11 @@ int _starpu_push_task(struct _starpu_job *j)
 #endif
 			return 0;
 		}
+
 	}
 
+	if(!can_push)
+		return 0;
 	/* in case there is no codelet associated to the task (that's a control
 	 * task), we directly execute its callback and enforce the
 	 * corresponding dependencies */
@@ -657,6 +660,14 @@ static
 struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker *worker)
 {
 	struct _starpu_sched_ctx_list *l = NULL;
+	for (l = worker->sched_ctx_list; l; l = l->next)
+	{
+		if(worker->removed_from_ctx[l->sched_ctx] == 1)
+		{
+			return	_starpu_get_sched_ctx_struct(l->sched_ctx);
+		}
+	}
+
 	unsigned are_2_priorities = 0;
 	for (l = worker->sched_ctx_list; l; l = l->next)
 	{

+ 1 - 1
src/core/task.c

@@ -772,7 +772,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 	_starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
 	STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
 	j->submitted = 1;
-	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops);
+	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task);
 	for (i=0 ; i<nbuffers ; i++)
 	{
 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);

+ 8 - 4
src/core/workers.c

@@ -98,15 +98,19 @@ static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
 {
 	int i;
 	_starpu_codelet_check_deprecated_fields(task->cl);
-	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
-	struct starpu_worker_collection *workers = sched_ctx->workers;
 
+        /* make sure there is a worker on the machine able to execute the 
+	   task, independent of the sched_ctx, this latter may receive latter on 
+	   the necessary worker - the user or the hypervisor should take care this happens */
+	
+	int check_entire_platform = starpu_get_env_number("STARPU_CHECK_ENTIRE_PLATFORM");
+	struct _starpu_sched_ctx *sched_ctx = check_entire_platform == 1 ? _starpu_get_initial_sched_ctx() : _starpu_get_sched_ctx_struct(task->sched_ctx);
+	struct starpu_worker_collection *workers = sched_ctx->workers;
 	struct starpu_sched_ctx_iterator it;
-
 	workers->init_iterator(workers, &it);
 	while(workers->has_next(workers, &it))
 	{
-                i = workers->get_next(workers, &it);
+		i = workers->get_next(workers, &it);
 		if (starpu_worker_get_type(i) != arch)
 			continue;
 

+ 0 - 1
src/debug/traces/starpu_fxt.c

@@ -1607,7 +1607,6 @@ static void handle_task_wait_for_all(void)
 static void handle_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
 	char *event = (char*)&ev->param[0];
-
 	/* Add an event in the trace */
 	if (out_paje_file)
 	{

+ 24 - 5
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -673,7 +673,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 	*max_exp_endp = max_exp_end;
 }
 
-static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id)
+static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id, unsigned simulate)
 {
 	/* find the queue */
 	unsigned worker, worker_ctx = 0;
@@ -791,9 +791,19 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sch
 
 	//_STARPU_DEBUG("Scheduler dmda: kernel (%u)\n", best_impl);
 	starpu_task_set_implementation(task, selected_impl);
+
+	if(!simulate)
+	{	
+		/* we should now have the best worker in variable "best" */
+		return push_task_on_best_worker(task, best, model_best, transfer_model_best, prio, sched_ctx_id);
+	}
+	else
+	{
+//		double max_len = (max_exp_end - starpu_timing_now());
+		/* printf("%d: dmda max_exp_end %lf best_exp_end %lf max_len %lf \n", sched_ctx_id, max_exp_end/1000000.0, best_exp_end/1000000.0, max_len/1000000.0);	 */
 	
-	/* we should now have the best worker in variable "best" */
-	return push_task_on_best_worker(task, best, model_best, transfer_model_best, prio, sched_ctx_id);
+		return exp_end[best_in_ctx][selected_impl] ;
+	}
 }
 
 static int dmda_push_sorted_task(struct starpu_task *task)
@@ -801,7 +811,7 @@ static int dmda_push_sorted_task(struct starpu_task *task)
 #ifdef STARPU_DEVEL
 #warning TODO: after defining a scheduling window, use that instead of empty_ctx_tasks
 #endif
-	return _dmda_push_task(task, 1, task->sched_ctx);
+	return _dmda_push_task(task, 1, task->sched_ctx, 0);
 }
 
 static int dm_push_task(struct starpu_task *task)
@@ -812,7 +822,12 @@ static int dm_push_task(struct starpu_task *task)
 static int dmda_push_task(struct starpu_task *task)
 {
 	STARPU_ASSERT(task);
-	return _dmda_push_task(task, 0, task->sched_ctx);
+	return _dmda_push_task(task, 0, task->sched_ctx, 0);
+}
+static double dmda_simulate_push_task(struct starpu_task *task)
+{
+	STARPU_ASSERT(task);
+	return _dmda_push_task(task, 0, task->sched_ctx, 1);
 }
 
 static void dmda_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
@@ -1009,6 +1024,7 @@ struct starpu_sched_policy _starpu_sched_dm_policy =
 	.add_workers = dmda_add_workers ,
 	.remove_workers = dmda_remove_workers,
 	.push_task = dm_push_task,
+	.simulate_push_task = NULL,
 	.pop_task = dmda_pop_task,
 	.pre_exec_hook = dmda_pre_exec_hook,
 	.post_exec_hook = dmda_post_exec_hook,
@@ -1024,6 +1040,7 @@ struct starpu_sched_policy _starpu_sched_dmda_policy =
 	.add_workers = dmda_add_workers ,
 	.remove_workers = dmda_remove_workers,
 	.push_task = dmda_push_task,
+	.simulate_push_task = dmda_simulate_push_task,
 	.push_task_notify = dmda_push_task_notify,
 	.pop_task = dmda_pop_task,
 	.pre_exec_hook = dmda_pre_exec_hook,
@@ -1040,6 +1057,7 @@ struct starpu_sched_policy _starpu_sched_dmda_sorted_policy =
 	.add_workers = dmda_add_workers ,
 	.remove_workers = dmda_remove_workers,
 	.push_task = dmda_push_sorted_task,
+	.simulate_push_task = NULL,
 	.push_task_notify = dmda_push_task_notify,
 	.pop_task = dmda_pop_ready_task,
 	.pre_exec_hook = dmda_pre_exec_hook,
@@ -1056,6 +1074,7 @@ struct starpu_sched_policy _starpu_sched_dmda_ready_policy =
 	.add_workers = dmda_add_workers ,
 	.remove_workers = dmda_remove_workers,
 	.push_task = dmda_push_task,
+	.simulate_push_task = dmda_simulate_push_task,
 	.push_task_notify = dmda_push_task_notify,
 	.pop_task = dmda_pop_ready_task,
 	.pre_exec_hook = dmda_pre_exec_hook,