浏览代码

hypervisor: adapt lp solution when max has an important impact on the solution (exec lp twice for now, next co share resources)

Andra Hugo 11 年之前
父节点
当前提交
082dc8501c

+ 2 - 1
sc_hypervisor/include/sc_hypervisor_lp.h

@@ -65,7 +65,8 @@ unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw
 /* linear program that returns 1/tmax, and computes in table res the nr of workers needed by each context st 
    the system ends up in the smallest tmax*/
 double sc_hypervisor_lp_simulate_distrib_flops(int nsched_ctxs, int ntypes_of_workers, double speed[nsched_ctxs][ntypes_of_workers], 
-					       double flops[nsched_ctxs], double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers], unsigned sched_ctxs[nsched_ctxs]);
+					       double flops[nsched_ctxs], double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers], 
+					       unsigned sched_ctxs[nsched_ctxs], double vmax);
 
 /* linear program that simulates a distribution of tasks that minimises the execution time of the tasks in the pool */
 double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt],

+ 30 - 13
sc_hypervisor/src/policies_utils/lp_programs.c

@@ -249,7 +249,8 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 	return res;
 }
 
-double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int  total_nw[nw], unsigned sched_ctxs[ns])
+double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], 
+					       int  total_nw[nw], unsigned sched_ctxs[ns], double last_vmax)
 {
 	int integer = 1;
 	int s, w;
@@ -282,7 +283,6 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 			if (integer)
 			{
 				glp_set_col_kind(lp, n, GLP_IV);
-				printf("ctx %d idx %d min %d max %d \n", sched_ctxs[s], s, config->min_nworkers, config->max_nworkers);
 				if(config->max_nworkers == 0)
 					glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, config->max_nworkers);
 				else
@@ -302,7 +302,10 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 	/*1/tmax should belong to the interval [0.0;1.0]*/
 	glp_set_col_name(lp, n, "vmax");
 //	glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
-	glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0);
+	if(last_vmax != -1.0)
+		glp_set_col_bnds(lp, n, GLP_LO, last_vmax, last_vmax);
+	else
+		glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0);
 	/* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
 	glp_set_obj_coef(lp, n, 1.0);
 
@@ -384,13 +387,27 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 //		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
 		n++;
 
-		/*sum(all gpus) = 3*/
-		if(w == 0)
-			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
+		if(last_vmax == -1.0)
+		{
+			/*sum(all gpus) = 3*/
+			if(w == 0)
+				glp_set_row_bnds(lp, ns+w+1, GLP_UP, 0, total_nw[0]);
+			
+			/*sum(all cpus) = 9*/
+			if(w == 1)
+				glp_set_row_bnds(lp, ns+w+1, GLP_UP, 0, total_nw[1]);
 
-		/*sum(all cpus) = 9*/
-		if(w == 1)
-			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
+		}
+		else
+		{
+			/*sum(all gpus) = 3*/
+			if(w == 0)
+				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
+			
+			/*sum(all cpus) = 9*/
+			if(w == 1)
+				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
+		}
 	}
 
 	STARPU_ASSERT(n == ne);
@@ -399,7 +416,7 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
 	glp_smcp parm;
 	glp_init_smcp(&parm);
-	parm.msg_lev = GLP_MSG_OFF;
+  	parm.msg_lev = GLP_MSG_OFF;
 	int ret = glp_simplex(lp, &parm);
 	if (ret)
         {
@@ -414,7 +431,7 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
         if(stat == GLP_NOFEAS)
         {
                 glp_delete_prob(lp);
-//              printf("no_sol in tmax = %lf\n", tmax);                                                                                                                                                             
+		printf("no_sol\n");                                                                                                                                                             
                 lp = NULL;
                 return 0.0;
         }
@@ -430,7 +447,7 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
                 /* if we don't have a solution return */
                 if(stat == GLP_NOFEAS)
                 {
-//                      printf("no int sol in tmax = %lf\n", tmax);                                                                                                                                                 
+			printf("no int sol\n");                                                                                                                                                 
                         glp_delete_prob(lp);
                         lp = NULL;
                         return 0.0;
@@ -449,7 +466,7 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
                                 res[s][w] = (double)glp_mip_col_val(lp, n);
 			else
 				res[s][w] = glp_get_col_prim(lp, n);
-			printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]);
+  			printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]);
 			n++;
 		}
 	}

+ 101 - 28
sc_hypervisor/src/policies_utils/lp_tools.c

@@ -60,44 +60,117 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 		}
 /* 		printf("%d: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n", */
 /* 		       sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, sc_w->ready_flops/1000000000, sc_w->nready_tasks); */
+
 	}
-		
-	
-	double vmax = 1/sc_hypervisor_lp_simulate_distrib_flops(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw, sched_ctxs);
-	double optimal_v = 0.0;
-	for(i = 0; i < nsched_ctxs; i++)
+
+	double ret = sc_hypervisor_lp_simulate_distrib_flops(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw, sched_ctxs, -1.0);
+	double vmax = 0.0;
+	if(ret != 0.0)
 	{
+		/* redo the lp after cleaning out the contexts that got all the max workers required */
+		unsigned selected_sched_ctxs[STARPU_NMAX_SCHED_CTXS];
+		double selected_flops[STARPU_NMAX_SCHED_CTXS];
+		double selected_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
+		int nselected = 0;
+		int available_cpus = total_nw[0];	
+		int used_cpus = 0;
+
+		for(i = 0; i < nsched_ctxs; i++)
+		{
+			struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]);
+			if(res[i][0] < config->max_nworkers && config->max_nworkers != 0 && flops[i] != 0.0)
+			{
+				selected_flops[nselected] = flops[i];
+				selected_v[nselected][0] = v[i][0];
+				selected_sched_ctxs[nselected++] = sched_ctxs[i];
+			}
+			 else
+				available_cpus -= res[i][0];
+			used_cpus += res[i][0];
+		}
+
+		if(used_cpus != total_nw[0])
+		{
+			double old_ret = ret;
+			
+			if(nselected <= 0 || nselected == nsched_ctxs)
+			{
+				nselected = nsched_ctxs;
+				for(i = 0; i < nsched_ctxs; i++)
+				{
+					selected_flops[i] = flops[i];
+					selected_v[i][0] = v[i][0];
+					selected_sched_ctxs[i] = sched_ctxs[i];
+				}
+			}
+			else
+				total_nw[0] = available_cpus;
+			
+			double selected_res[nselected][ntypes_of_workers];
+			ret = sc_hypervisor_lp_simulate_distrib_flops(nselected, ntypes_of_workers, selected_v, selected_flops, selected_res, total_nw, selected_sched_ctxs, ret);
+			
+			if(ret != 0)
+			{
+				int j;
+				for(i = 0; i < nsched_ctxs; i++)
+				{
+					for(j = 0; j < nselected; j++)
+					{
+						if(sched_ctxs[i] == selected_sched_ctxs[j])
+						{
+							res[i][0] = selected_res[j][0];
+							v[i][0] = selected_v[i][0];
+						}
+					}
+				}
+			}
+			else
+				ret = old_ret;
+		}
+
+	}
+
+	/* keep the first speed */
+	if(ret != 0.0)
+	{
+		vmax = 1 / ret;
+		double optimal_v = 0.0;
+		for(i = 0; i < nsched_ctxs; i++)
+		{
 #ifdef STARPU_USE_CUDA
-		optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1];
+			optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1];
 #else
-		optimal_v = res[i][0] * v[i][0];
+			optimal_v = res[i][0] * v[i][0];
 #endif //STARPU_USE_CUDA
-		int w;
-		unsigned no_workers = 1;
-		for(w = 0; w < nw; w++)
-			if(res[i][w] != 0.0)
+			int w;
+			unsigned no_workers = 1;
+			for(w = 0; w < nw; w++)
 			{
-				no_workers = 0;
-				break;
+				if(res[i][w] != 0.0)
+				{
+					no_workers = 0;
+					break;
+				}
 			}
-
-		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
-
+			
+			sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
+			
 /* if the hypervisor gave 0 workers to a context but the context still 
-has some last flops or a ready task that does not even have any flops
-we give a worker (in shared mode) to the context in order to leave him
-finish its work = we give -1.0 value instead of 0.0 and further on in
-the distribution function we take this into account and revert the variable
-to its 0.0 value */ 
+   has some last flops or a ready task that does not even have any flops
+   we give a worker (in shared mode) to the context in order to leave him
+   finish its work = we give -1.0 value instead of 0.0 and further on in
+   the distribution function we take this into account and revert the variable
+   to its 0.0 value */ 
 //		if(no_workers && (flops[i] != 0.0 || sc_w->nready_tasks > 0))
-		if(no_workers)
-		{
-			for(w = 0; w < nw; w++)
-				res[i][w] = -1.0;
+			if(no_workers)
+			{
+				for(w = 0; w < nw; w++)
+					res[i][w] = -1.0;
+			}
+			
+			if(optimal_v != 0.0)
+				_set_optimal_v(i, optimal_v);
 		}
-
-		if(optimal_v != 0.0)
-			_set_optimal_v(i, optimal_v);
 	}
 
 	return vmax;

+ 1 - 1
sc_hypervisor/src/policies_utils/speed.c

@@ -147,7 +147,7 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 			}
 		}			
 		
-		if(nworkers != 0)
+		if(nworkers != 0 && all_workers_flops != 0.0)
 		{
 //			elapsed_time -= max_workers_idle_time;
 			speed = (all_workers_flops / elapsed_time) / nworkers;

+ 29 - 8
sc_hypervisor/src/sc_hypervisor.c

@@ -437,18 +437,39 @@ static void _set_elapsed_flops_per_sched_ctx(unsigned sched_ctx, double val)
 double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w)
 {
 	double ret_val = 0.0;
-	int i;
-	for(i = 0; i < STARPU_NMAXWORKERS; i++)
-		ret_val += sc_w->elapsed_flops[i];
+
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
+	int worker;
+	
+	struct starpu_sched_ctx_iterator it;
+	if(workers->init_iterator)
+		workers->init_iterator(workers, &it);
+		
+	while(workers->has_next(workers, &it))
+	{
+		worker = workers->get_next(workers, &it);
+		ret_val += sc_w->elapsed_flops[worker];
+	}
+
 	return ret_val;
 }
 
 double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w)
 {
 	double ret_val = 0.0;
-	int i;
-	for(i = 0; i < STARPU_NMAXWORKERS; i++)
-		ret_val += sc_w->total_elapsed_flops[i];
+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
+	int worker;
+	
+	struct starpu_sched_ctx_iterator it;
+	if(workers->init_iterator)
+		workers->init_iterator(workers, &it);
+		
+	while(workers->has_next(workers, &it))
+	{
+		worker = workers->get_next(workers, &it);
+		ret_val += sc_w->total_elapsed_flops[worker];
+	}
+
 	return ret_val;
 }
 
@@ -822,8 +843,8 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 		if(lrint(norm_idle_time) >= 1)
 		{
 			config->max_nworkers = 	workers->nworkers - lrint(norm_idle_time);
-			if(config->max_nworkers > hypervisor.sched_ctx_w[sched_ctx].nready_tasks)
-				config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
+/* 			if(config->max_nworkers > hypervisor.sched_ctx_w[sched_ctx].nready_tasks) */
+/* 				config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; */
 		}
 		else
 		{