Procházet zdrojové kódy

whenever there are a few resources the lp cannot assign, all contexts share these resources

Andra Hugo před 11 roky
rodič
revize
1289532233

+ 3 - 0
sc_hypervisor/include/sc_hypervisor_lp.h

@@ -52,6 +52,9 @@ void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns,
 /* place resources in contexts dependig on whether they already have workers or not */
 void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], unsigned *sched_ctxs, int *workers, unsigned do_size, struct types_of_workers *tw);
 
+/* not used resources are shared between all contexts */
+void sc_hypervisor_lp_share_remaining_resources(int ns, unsigned *sched_ctxs,  int nworkers, int *workers);
+
 /* dichotomy btw t1 & t2 */
 double sc_hypervisor_lp_find_tmax(double t1, double t2);
 

+ 1 - 0
sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c

@@ -56,6 +56,7 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 		sc_hypervisor_lp_round_double_to_int(ns, nw, nworkers_per_ctx, nworkers_per_ctx_rounded);
 //		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, curr_sched_ctxs, tw);
 		sc_hypervisor_lp_distribute_resources_in_ctxs(curr_sched_ctxs, ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, workers, curr_nworkers, tw);
+		sc_hypervisor_lp_share_remaining_resources(ns, curr_sched_ctxs, curr_nworkers, workers);
 	}
 }
 

+ 10 - 10
sc_hypervisor/src/policies_utils/lp_programs.c

@@ -387,7 +387,7 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 //		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
 		n++;
 
-		if(last_vmax == -1.0)
+//		if(last_vmax == -1.0)
 		{
 			/*sum(all gpus) = 3*/
 			if(w == 0)
@@ -398,16 +398,16 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 				glp_set_row_bnds(lp, ns+w+1, GLP_UP, 0, total_nw[1]);
 
 		}
-		else
-		{
-			/*sum(all gpus) = 3*/
-			if(w == 0)
-				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
+/* 		else */
+/* 		{ */
+/* 			/\*sum(all gpus) = 3*\/ */
+/* 			if(w == 0) */
+/* 				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]); */
 			
-			/*sum(all cpus) = 9*/
-			if(w == 1)
-				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
-		}
+/* 			/\*sum(all cpus) = 9*\/ */
+/* 			if(w == 1) */
+/* 				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]); */
+/* 		} */
 	}
 
 	STARPU_ASSERT(n == ne);

+ 30 - 4
sc_hypervisor/src/policies_utils/lp_tools.c

@@ -72,24 +72,24 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 		double selected_flops[STARPU_NMAX_SCHED_CTXS];
 		double selected_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
 		int nselected = 0;
-		int available_cpus = total_nw[0];	
+		int available_cpus = total_nw[0];
 		int used_cpus = 0;
 
 		for(i = 0; i < nsched_ctxs; i++)
 		{
 			struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]);
-			if(res[i][0] < config->max_nworkers && config->max_nworkers != 0 && flops[i] != 0.0)
+			if(res[i][0] < config->max_nworkers && config->max_nworkers != 0 && flops[i] > 0.0001)
 			{
 				selected_flops[nselected] = flops[i];
 				selected_v[nselected][0] = v[i][0];
 				selected_sched_ctxs[nselected++] = sched_ctxs[i];
 			}
-			 else
+			else
 				available_cpus -= res[i][0];
 			used_cpus += res[i][0];
 		}
 
-		if(used_cpus != total_nw[0])
+		if(used_cpus < 0.8 * total_nw[0])
 		{
 			double old_ret = ret;
 			
@@ -670,6 +670,32 @@ void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][n
 	return;
 }
 
+void sc_hypervisor_lp_share_remaining_resources(int ns, unsigned *sched_ctxs,  int nworkers, int *workers)
+{
+	int s, w, worker, nw = 0;
+	int remaining_workers[nworkers];
+	for(w = 0; w < nworkers; w++)
+	{
+		worker = workers == NULL ? w : workers[w];
+		unsigned found = 0;
+		for(s = 0; s < ns; s++)
+		{
+			if(starpu_sched_ctx_contains_worker(worker, sched_ctxs[s]))
+			{
+				found = 1;
+				break;
+			}
+		}
+		if(!found)
+			remaining_workers[nw++] = worker;
+	}
+
+	if(nw > 0)
+		for(s = 0; s < ns; s++)
+			sc_hypervisor_add_workers_to_sched_ctx(remaining_workers, nw, sched_ctxs[s]);		
+
+}
+
 double sc_hypervisor_lp_find_tmax(double t1, double t2)
 {
 	return t1 + ((t2 - t1)/2);

+ 1 - 1
sc_hypervisor/src/policies_utils/speed.c

@@ -147,7 +147,7 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 			}
 		}			
 		
-		if(nworkers != 0 && all_workers_flops != 0.0)
+		if(nworkers != 0 && all_workers_flops > 0.0001)
 		{
 //			elapsed_time -= max_workers_idle_time;
 			speed = (all_workers_flops / elapsed_time) / nworkers;

+ 2 - 2
sc_hypervisor/src/sc_hypervisor.c

@@ -840,7 +840,7 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 		double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */
 		double norm_idle_time = max_workers_idle_time[i] / elapsed_time;
 
-		if(lrint(norm_idle_time) >= 1)
+		if(norm_idle_time >= 0.9)
 		{
 			config->max_nworkers = 	workers->nworkers - lrint(norm_idle_time);
 /* 			if(config->max_nworkers > hypervisor.sched_ctx_w[sched_ctx].nready_tasks) */
@@ -848,7 +848,7 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 		}
 		else
 		{
-			if(max_workers_idle_time[i] < 0.000001)
+			if(norm_idle_time < 0.1)//(max_workers_idle_time[i] < 0.000001)
 				config->max_nworkers = 	workers->nworkers + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
 			else
 				config->max_nworkers = workers->nworkers;