11 年之前 · 082dc8501c
--- a/sc_hypervisor/include/sc_hypervisor_lp.h
+++ b/sc_hypervisor/include/sc_hypervisor_lp.h
@@ -65,7 +65,8 @@ unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw
 
				 /* linear program that returns 1/tmax, and computes in table res the nr of workers needed by each context st 
			
 
				    the system ends up in the smallest tmax*/
			
 
				 double sc_hypervisor_lp_simulate_distrib_flops(int nsched_ctxs, int ntypes_of_workers, double speed[nsched_ctxs][ntypes_of_workers], 
			
 
				-					       double flops[nsched_ctxs], double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers], unsigned sched_ctxs[nsched_ctxs]);
			
 
				+					       double flops[nsched_ctxs], double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers], 
			
 
				+					       unsigned sched_ctxs[nsched_ctxs], double vmax);
			
 
				 
			
 
				 /* linear program that simulates a distribution of tasks that minimises the execution time of the tasks in the pool */
			
 
				 double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt],
			
--- a/sc_hypervisor/src/policies_utils/lp_programs.c
+++ b/sc_hypervisor/src/policies_utils/lp_programs.c
@@ -249,7 +249,8 @@ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_
 
				 	return res;
			
 
				 }
			
 
				 
			
 
				-double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int  total_nw[nw], unsigned sched_ctxs[ns])
			
 
				+double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], 
			
 
				+					       int  total_nw[nw], unsigned sched_ctxs[ns], double last_vmax)
			
 
				 {
			
 
				 	int integer = 1;
			
 
				 	int s, w;
			
@@ -282,7 +283,6 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				 			if (integer)
			
 
				 			{
			
 
				 				glp_set_col_kind(lp, n, GLP_IV);
			
 
				-				printf("ctx %d idx %d min %d max %d \n", sched_ctxs[s], s, config->min_nworkers, config->max_nworkers);
			
 
				 				if(config->max_nworkers == 0)
			
 
				 					glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, config->max_nworkers);
			
 
				 				else
			
@@ -302,7 +302,10 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				 	/*1/tmax should belong to the interval [0.0;1.0]*/
			
 
				 	glp_set_col_name(lp, n, "vmax");
			
 
				 //	glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
			
 
				-	glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0);
			
 
				+	if(last_vmax != -1.0)
			
 
				+		glp_set_col_bnds(lp, n, GLP_LO, last_vmax, last_vmax);
			
 
				+	else
			
 
				+		glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0);
			
 
				 	/* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
			
 
				 	glp_set_obj_coef(lp, n, 1.0);
			
 
				 
			
@@ -384,13 +387,27 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				 //		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				 		n++;
			
 
				 
			
 
				-		/*sum(all gpus) = 3*/
			
 
				-		if(w == 0)
			
 
				-			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
			
 
				+		if(last_vmax == -1.0)
			
 
				+		{
			
 
				+			/*sum(all gpus) = 3*/
			
 
				+			if(w == 0)
			
 
				+				glp_set_row_bnds(lp, ns+w+1, GLP_UP, 0, total_nw[0]);
			
 
				+			
			
 
				+			/*sum(all cpus) = 9*/
			
 
				+			if(w == 1)
			
 
				+				glp_set_row_bnds(lp, ns+w+1, GLP_UP, 0, total_nw[1]);
			
 
				 
			
 
				-		/*sum(all cpus) = 9*/
			
 
				-		if(w == 1)
			
 
				-			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			/*sum(all gpus) = 3*/
			
 
				+			if(w == 0)
			
 
				+				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
			
 
				+			
			
 
				+			/*sum(all cpus) = 9*/
			
 
				+			if(w == 1)
			
 
				+				glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	STARPU_ASSERT(n == ne);
			
@@ -399,7 +416,7 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				 
			
 
				 	glp_smcp parm;
			
 
				 	glp_init_smcp(&parm);
			
 
				-	parm.msg_lev = GLP_MSG_OFF;
			
 
				+  	parm.msg_lev = GLP_MSG_OFF;
			
 
				 	int ret = glp_simplex(lp, &parm);
			
 
				 	if (ret)
			
 
				         {
			
@@ -414,7 +431,7 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				         if(stat == GLP_NOFEAS)
			
 
				         {
			
 
				                 glp_delete_prob(lp);
			
 
				-//              printf("no_sol in tmax = %lf\n", tmax);                                                                                                                                                             
			
 
				+		printf("no_sol\n");                                                                                                                                                             
			
 
				                 lp = NULL;
			
 
				                 return 0.0;
			
 
				         }
			
@@ -430,7 +447,7 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				                 /* if we don't have a solution return */
			
 
				                 if(stat == GLP_NOFEAS)
			
 
				                 {
			
 
				-//                      printf("no int sol in tmax = %lf\n", tmax);                                                                                                                                                 
			
 
				+			printf("no int sol\n");                                                                                                                                                 
			
 
				                         glp_delete_prob(lp);
			
 
				                         lp = NULL;
			
 
				                         return 0.0;
			
@@ -449,7 +466,7 @@ double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw],
 
				                                 res[s][w] = (double)glp_mip_col_val(lp, n);
			
 
				 			else
			
 
				 				res[s][w] = glp_get_col_prim(lp, n);
			
 
				-			printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]);
			
 
				+  			printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]);
			
 
				 			n++;
			
 
				 		}
			
 
				 	}
			
--- a/sc_hypervisor/src/policies_utils/lp_tools.c
+++ b/sc_hypervisor/src/policies_utils/lp_tools.c
@@ -60,44 +60,117 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 		}
			
 
				 /* 		printf("%d: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n", */
			
 
				 /* 		       sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, sc_w->ready_flops/1000000000, sc_w->nready_tasks); */
			
 
				+
			
 
				 	}
			
 
				-		
			
 
				-	
			
 
				-	double vmax = 1/sc_hypervisor_lp_simulate_distrib_flops(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw, sched_ctxs);
			
 
				-	double optimal_v = 0.0;
			
 
				-	for(i = 0; i < nsched_ctxs; i++)
			
 
				+
			
 
				+	double ret = sc_hypervisor_lp_simulate_distrib_flops(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw, sched_ctxs, -1.0);
			
 
				+	double vmax = 0.0;
			
 
				+	if(ret != 0.0)
			
 
				 	{
			
 
				+		/* redo the lp after cleaning out the contexts that got all the max workers required */
			
 
				+		unsigned selected_sched_ctxs[STARPU_NMAX_SCHED_CTXS];
			
 
				+		double selected_flops[STARPU_NMAX_SCHED_CTXS];
			
 
				+		double selected_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
			
 
				+		int nselected = 0;
			
 
				+		int available_cpus = total_nw[0];	
			
 
				+		int used_cpus = 0;
			
 
				+
			
 
				+		for(i = 0; i < nsched_ctxs; i++)
			
 
				+		{
			
 
				+			struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]);
			
 
				+			if(res[i][0] < config->max_nworkers && config->max_nworkers != 0 && flops[i] != 0.0)
			
 
				+			{
			
 
				+				selected_flops[nselected] = flops[i];
			
 
				+				selected_v[nselected][0] = v[i][0];
			
 
				+				selected_sched_ctxs[nselected++] = sched_ctxs[i];
			
 
				+			}
			
 
				+			 else
			
 
				+				available_cpus -= res[i][0];
			
 
				+			used_cpus += res[i][0];
			
 
				+		}
			
 
				+
			
 
				+		if(used_cpus != total_nw[0])
			
 
				+		{
			
 
				+			double old_ret = ret;
			
 
				+			
			
 
				+			if(nselected <= 0 || nselected == nsched_ctxs)
			
 
				+			{
			
 
				+				nselected = nsched_ctxs;
			
 
				+				for(i = 0; i < nsched_ctxs; i++)
			
 
				+				{
			
 
				+					selected_flops[i] = flops[i];
			
 
				+					selected_v[i][0] = v[i][0];
			
 
				+					selected_sched_ctxs[i] = sched_ctxs[i];
			
 
				+				}
			
 
				+			}
			
 
				+			else
			
 
				+				total_nw[0] = available_cpus;
			
 
				+			
			
 
				+			double selected_res[nselected][ntypes_of_workers];
			
 
				+			ret = sc_hypervisor_lp_simulate_distrib_flops(nselected, ntypes_of_workers, selected_v, selected_flops, selected_res, total_nw, selected_sched_ctxs, ret);
			
 
				+			
			
 
				+			if(ret != 0)
			
 
				+			{
			
 
				+				int j;
			
 
				+				for(i = 0; i < nsched_ctxs; i++)
			
 
				+				{
			
 
				+					for(j = 0; j < nselected; j++)
			
 
				+					{
			
 
				+						if(sched_ctxs[i] == selected_sched_ctxs[j])
			
 
				+						{
			
 
				+							res[i][0] = selected_res[j][0];
			
 
				+							v[i][0] = selected_v[i][0];
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+			else
			
 
				+				ret = old_ret;
			
 
				+		}
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	/* keep the first speed */
			
 
				+	if(ret != 0.0)
			
 
				+	{
			
 
				+		vmax = 1 / ret;
			
 
				+		double optimal_v = 0.0;
			
 
				+		for(i = 0; i < nsched_ctxs; i++)
			
 
				+		{
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1];
			
 
				+			optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1];
			
 
				 #else
			
 
				-		optimal_v = res[i][0] * v[i][0];
			
 
				+			optimal_v = res[i][0] * v[i][0];
			
 
				 #endif //STARPU_USE_CUDA
			
 
				-		int w;
			
 
				-		unsigned no_workers = 1;
			
 
				-		for(w = 0; w < nw; w++)
			
 
				-			if(res[i][w] != 0.0)
			
 
				+			int w;
			
 
				+			unsigned no_workers = 1;
			
 
				+			for(w = 0; w < nw; w++)
			
 
				 			{
			
 
				-				no_workers = 0;
			
 
				-				break;
			
 
				+				if(res[i][w] != 0.0)
			
 
				+				{
			
 
				+					no_workers = 0;
			
 
				+					break;
			
 
				+				}
			
 
				 			}
			
 
				-
			
 
				-		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				-
			
 
				+			
			
 
				+			sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				+			
			
 
				 /* if the hypervisor gave 0 workers to a context but the context still 
			
 
				-has some last flops or a ready task that does not even have any flops
			
 
				-we give a worker (in shared mode) to the context in order to leave him
			
 
				-finish its work = we give -1.0 value instead of 0.0 and further on in
			
 
				-the distribution function we take this into account and revert the variable
			
 
				-to its 0.0 value */ 
			
 
				+   has some last flops or a ready task that does not even have any flops
			
 
				+   we give a worker (in shared mode) to the context in order to leave him
			
 
				+   finish its work = we give -1.0 value instead of 0.0 and further on in
			
 
				+   the distribution function we take this into account and revert the variable
			
 
				+   to its 0.0 value */ 
			
 
				 //		if(no_workers && (flops[i] != 0.0 || sc_w->nready_tasks > 0))
			
 
				-		if(no_workers)
			
 
				-		{
			
 
				-			for(w = 0; w < nw; w++)
			
 
				-				res[i][w] = -1.0;
			
 
				+			if(no_workers)
			
 
				+			{
			
 
				+				for(w = 0; w < nw; w++)
			
 
				+					res[i][w] = -1.0;
			
 
				+			}
			
 
				+			
			
 
				+			if(optimal_v != 0.0)
			
 
				+				_set_optimal_v(i, optimal_v);
			
 
				 		}
			
 
				-
			
 
				-		if(optimal_v != 0.0)
			
 
				-			_set_optimal_v(i, optimal_v);
			
 
				 	}
			
 
				 
			
 
				 	return vmax;
			
--- a/sc_hypervisor/src/policies_utils/speed.c
+++ b/sc_hypervisor/src/policies_utils/speed.c
@@ -147,7 +147,7 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 
				 			}
			
 
				 		}			
			
 
				 		
			
 
				-		if(nworkers != 0)
			
 
				+		if(nworkers != 0 && all_workers_flops != 0.0)
			
 
				 		{
			
 
				 //			elapsed_time -= max_workers_idle_time;
			
 
				 			speed = (all_workers_flops / elapsed_time) / nworkers;
			
--- a/sc_hypervisor/src/sc_hypervisor.c
+++ b/sc_hypervisor/src/sc_hypervisor.c
@@ -437,18 +437,39 @@ static void _set_elapsed_flops_per_sched_ctx(unsigned sched_ctx, double val)
 
				 double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w)
			
 
				 {
			
 
				 	double ret_val = 0.0;
			
 
				-	int i;
			
 
				-	for(i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				-		ret_val += sc_w->elapsed_flops[i];
			
 
				+
			
 
				+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
 
				+	int worker;
			
 
				+	
			
 
				+	struct starpu_sched_ctx_iterator it;
			
 
				+	if(workers->init_iterator)
			
 
				+		workers->init_iterator(workers, &it);
			
 
				+		
			
 
				+	while(workers->has_next(workers, &it))
			
 
				+	{
			
 
				+		worker = workers->get_next(workers, &it);
			
 
				+		ret_val += sc_w->elapsed_flops[worker];
			
 
				+	}
			
 
				+
			
 
				 	return ret_val;
			
 
				 }
			
 
				 
			
 
				 double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w)
			
 
				 {
			
 
				 	double ret_val = 0.0;
			
 
				-	int i;
			
 
				-	for(i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				-		ret_val += sc_w->total_elapsed_flops[i];
			
 
				+	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
 
				+	int worker;
			
 
				+	
			
 
				+	struct starpu_sched_ctx_iterator it;
			
 
				+	if(workers->init_iterator)
			
 
				+		workers->init_iterator(workers, &it);
			
 
				+		
			
 
				+	while(workers->has_next(workers, &it))
			
 
				+	{
			
 
				+		worker = workers->get_next(workers, &it);
			
 
				+		ret_val += sc_w->total_elapsed_flops[worker];
			
 
				+	}
			
 
				+
			
 
				 	return ret_val;
			
 
				 }
			
 
				 
			
@@ -822,8 +843,8 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 
				 		if(lrint(norm_idle_time) >= 1)
			
 
				 		{
			
 
				 			config->max_nworkers = 	workers->nworkers - lrint(norm_idle_time);
			
 
				-			if(config->max_nworkers > hypervisor.sched_ctx_w[sched_ctx].nready_tasks)
			
 
				-				config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
			
 
				+/* 			if(config->max_nworkers > hypervisor.sched_ctx_w[sched_ctx].nready_tasks) */
			
 
				+/* 				config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; */
			
 
				 		}
			
 
				 		else
			
 
				 		{