Browse Source

hypervisor lp: max computation for the contexts sharing resources ( do not consider shared workers as full workers)

Andra Hugo 11 years ago
parent
commit
994728b26b

+ 2 - 0
include/starpu_worker.h

@@ -94,6 +94,8 @@ int starpu_worker_get_devid(int id);
 
 int starpu_worker_get_mp_nodeid(int id);
 
+int starpu_worker_get_nsched_ctxs(int workerid);
+
 #ifdef __cplusplus
 }
 #endif

+ 45 - 3
sc_hypervisor/src/policies_utils/lp_tools.c

@@ -63,7 +63,50 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
 	}
 
-	double ret = sc_hypervisor_lp_simulate_distrib_flops(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw, sched_ctxs, -1.0);
+	unsigned tmp_sched_ctxs[STARPU_NMAX_SCHED_CTXS];
+	double tmp_flops[STARPU_NMAX_SCHED_CTXS];
+	double tmp_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
+	double tmp_res[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
+	int tmp_nsched_ctxs = 0;
+	for(i = 0; i < nsched_ctxs; i++)
+	{
+		struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]);
+		if(config->max_nworkers != 0)
+		{
+			tmp_sched_ctxs[tmp_nsched_ctxs] = sched_ctxs[i];
+			tmp_flops[tmp_nsched_ctxs] = flops[i];
+			int w;
+			for(w = 0; w < ntypes_of_workers; w++)
+				tmp_v[tmp_nsched_ctxs][w] = v[i][w];
+			tmp_nsched_ctxs++;
+		}
+	}
+	
+	double ret = sc_hypervisor_lp_simulate_distrib_flops(tmp_nsched_ctxs, ntypes_of_workers, tmp_v, tmp_flops, tmp_res, total_nw, tmp_sched_ctxs, -1.0);
+
+	int j;
+	for(i = 0; i < nsched_ctxs; i++)
+	{
+		unsigned found = 0;
+		for(j = 0; j < tmp_nsched_ctxs; j++)
+		{
+			if(sched_ctxs[i] == tmp_sched_ctxs[j])
+			{
+				int w;
+				for(w = 0; w < ntypes_of_workers; w++)
+					res[i][w] = tmp_res[j][w];
+				found = 1;
+				break;
+			}
+		}
+		if(!found)
+		{
+			int w;
+			for(w = 0; w < ntypes_of_workers; w++)
+				res[i][w] = 0.0;
+		}
+	}
+
 	double vmax = 0.0;
 	if(ret != 0.0)
 	{
@@ -89,7 +132,7 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 			used_cpus += res[i][0];
 		}
 
-		if(used_cpus < 0.8 * total_nw[0])
+		if(used_cpus < 0.8 * total_nw[0] && nselected > 1)
 		{
 			double old_ret = ret;
 			
@@ -119,7 +162,6 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 						if(sched_ctxs[i] == selected_sched_ctxs[j])
 						{
 							res[i][0] = selected_res[j][0];
-							v[i][0] = selected_v[i][0];
 						}
 					}
 				}

+ 14 - 6
sc_hypervisor/src/sc_hypervisor.c

@@ -820,6 +820,8 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 			workers->init_iterator(workers, &it);
 		
 		max_workers_idle_time[i] = 0.0;
+		int nshared_workers = 0;
+		double cpu_used_in_shared = 0.0;
 		while(workers->has_next(workers, &it))
 		{
 			worker = workers->get_next(workers, &it);
@@ -832,7 +834,12 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 				double end_time  = starpu_timing_now();
 				double idle = (end_time - hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker]) / 1000000.0; /* in seconds */ 
 				max_workers_idle_time[i] += hypervisor.sched_ctx_w[sched_ctx].idle_time[worker] + idle;
-			}				
+			}		
+			/* if the worker is not shared between contexts */
+			/* 2 = the 1st one: the global ctx, the 2nd one: the current ctx */
+			int nctxs = starpu_worker_get_nsched_ctxs(worker);
+			if( nctxs > 2)
+				cpu_used_in_shared += (nctxs * 1.0 - 2.0) / (nctxs * 1.0);
 		}			
 
 		
@@ -840,18 +847,19 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 		double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */
 		double norm_idle_time = max_workers_idle_time[i] / elapsed_time;
 
+		int unused_cpus = lrint(cpu_used_in_shared);
 		if(norm_idle_time >= 0.9)
 		{
-			config->max_nworkers = 	workers->nworkers - lrint(norm_idle_time);
+			config->max_nworkers = 	workers->nworkers - unused_cpus - lrint(norm_idle_time);
 /* 			if(config->max_nworkers > hypervisor.sched_ctx_w[sched_ctx].nready_tasks) */
 /* 				config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; */
 		}
 		else
 		{
 			if(norm_idle_time < 0.1)//(max_workers_idle_time[i] < 0.000001)
-				config->max_nworkers = 	workers->nworkers + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
+				config->max_nworkers = workers->nworkers - unused_cpus + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
 			else
-				config->max_nworkers = workers->nworkers;
+				config->max_nworkers = workers->nworkers - unused_cpus;
 		}
 		
 		if(config->max_nworkers < 0)
@@ -859,8 +867,8 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 		if(config->max_nworkers > max_cpus)
 			config->max_nworkers = max_cpus;
 		
-		printf("%d: ready tasks  %d idle for long %lf norm_idle_time %lf elapsed_time %lf nworkers %d max %d \n", 
-		       sched_ctx, hypervisor.sched_ctx_w[sched_ctx].nready_tasks, max_workers_idle_time[i], norm_idle_time, elapsed_time, workers->nworkers, config->max_nworkers);
+		printf("%d: ready tasks  %d idle for long %lf norm_idle_time %lf elapsed_time %lf cpu_used_in_shared %d nworker %d max %d \n", 
+		       sched_ctx, hypervisor.sched_ctx_w[sched_ctx].nready_tasks, max_workers_idle_time[i], norm_idle_time, elapsed_time, unused_cpus, workers->nworkers, config->max_nworkers);
 
 
 		total_max_nworkers += config->max_nworkers;

+ 5 - 0
src/core/workers.c

@@ -1635,6 +1635,11 @@ struct _starpu_sched_ctx* _starpu_get_initial_sched_ctx(void)
 	return &config.sched_ctxs[STARPU_GLOBAL_SCHED_CTX];
 }
 
+int starpu_worker_get_nsched_ctxs(int workerid)
+{
+	return config.workers[workerid].nsched_ctxs;
+}
+
 int
 starpu_driver_run(struct starpu_driver *d)
 {