12 years ago · 329520f765
--- a/sched_ctx_hypervisor/include/sched_ctx_hypervisor.h
+++ b/sched_ctx_hypervisor/include/sched_ctx_hypervisor.h
@@ -145,6 +145,9 @@ struct sched_ctx_hypervisor_wrapper
 
				 	/* the start time of the resizing sample of this context*/
			
 
				 	double start_time;
			
 
				 
			
 
				+	/* the first time a task was pushed to this context*/
			
 
				+	double real_start_time;
			
 
				+
			
 
				 	/* the workers don't leave the current ctx until the receiver ctx 
			
 
				 	   doesn't ack the receive of these workers */
			
 
				 	struct sched_ctx_hypervisor_resize_ack resize_ack;
			
@@ -224,6 +227,9 @@ void sched_ctx_hypervisor_free_size_req(void);
 
				 
			
 
				 unsigned sched_ctx_hypervisor_can_resize(unsigned sched_ctx);
			
 
				 
			
 
				+/* compute an average value of the cpu/cuda velocity */
			
 
				+double sched_ctx_hypervisor_get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/sched_ctx_hypervisor/src/Makefile.am
+++ b/sched_ctx_hypervisor/src/Makefile.am
@@ -32,7 +32,8 @@ libsched_ctx_hypervisor_la_SOURCES = 			\
 
				 	hypervisor_policies/lp_policy.c			\
			
 
				 	hypervisor_policies/lp2_policy.c		\
			
 
				 	hypervisor_policies/ispeed_policy.c		\
			
 
				-	hypervisor_policies/ispeed_lp_policy.c
			
 
				+	hypervisor_policies/ispeed_lp_policy.c		\
			
 
				+	hypervisor_policies/debit_lp_policy.c
			
 
				 
			
 
				 noinst_HEADERS = sched_ctx_hypervisor_intern.h		\
			
 
				 	hypervisor_policies/policy_tools.h		\
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c
@@ -21,7 +21,7 @@
 
				 static struct bound_task_pool *task_pools = NULL;
			
 
				 
			
 
				 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
			
 
				-static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers);
			
 
				+static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers, unsigned interger);
			
 
				 static double _find_tmax(double t1, double t2);
			
 
				 static unsigned _compute_task_distribution_over_ctxs(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt], int *sched_ctxs, int *workers)
			
 
				 {
			
@@ -66,7 +66,7 @@ static unsigned _compute_task_distribution_over_ctxs(int ns, int nw, int nt, dou
 
				 		/* find solution and save the values in draft tables
			
 
				 		   only if there is a solution for the system we save them
			
 
				 		   in the proper table */
			
 
				-		res = _glp_resolve(ns, nw, nt, draft_tasks, tmax, draft_w_in_s, sched_ctxs, workers);
			
 
				+		res = _glp_resolve(ns, nw, nt, draft_tasks, tmax, draft_w_in_s, sched_ctxs, workers, 1);
			
 
				 		if(res != 0.0)
			
 
				 		{
			
 
				 			for(w = 0; w < nw; w++)
			
@@ -318,7 +318,7 @@ static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt], int *w
 
				  */
			
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				 #include <glpk.h>
			
 
				-static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers)
			
 
				+static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers, unsigned integer)
			
 
				 {
			
 
				 	struct bound_task_pool * tp;
			
 
				 	int t, w, s;
			
@@ -362,7 +362,13 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 				char name[32];
			
 
				 				snprintf(name, sizeof(name), "w%ds%dn", w, s);
			
 
				 				glp_set_col_name(lp, nw*nt+s*nw+w+1, name);
			
 
				-				glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0.0, 1.0);
			
 
				+				if (integer)
			
 
				+                                {
			
 
				+                                        glp_set_col_kind(lp, nw*nt+s*nw+w+1, GLP_IV);
			
 
				+                                        glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0, 1);
			
 
				+                                }
			
 
				+                                else
			
 
				+					glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0.0, 1.0);
			
 
				 			}
			
 
				 
			
 
				 		int *sched_ctxs = in_sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : in_sched_ctxs;
			
@@ -451,8 +457,10 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 				ar[n] = 1;
			
 
				 				n++;
			
 
				 			}
			
 
				-
			
 
				-			glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
			
 
				+			if(integer)
			
 
				+                                glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1);
			
 
				+			else
			
 
				+				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
			
 
				 		}
			
 
				 		if(n != ne)
			
 
				 			printf("ns= %d nw = %d nt = %d n = %d ne = %d\n", ns, nw, nt, n, ne);
			
@@ -472,6 +480,23 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 		return 0.0;
			
 
				 	}
			
 
				 
			
 
				+	if (integer)
			
 
				+        {
			
 
				+                glp_iocp iocp;
			
 
				+                glp_init_iocp(&iocp);
			
 
				+                iocp.msg_lev = GLP_MSG_OFF;
			
 
				+		glp_intopt(lp, &iocp);
			
 
				+		int stat = glp_mip_status(lp);
			
 
				+		/* if we don't have a solution return */
			
 
				+		if(stat == GLP_NOFEAS)
			
 
				+		{
			
 
				+			glp_delete_prob(lp);
			
 
				+			lp = NULL;
			
 
				+			return 0.0;
			
 
				+		}
			
 
				+		
			
 
				+        }
			
 
				+
			
 
				 	int stat = glp_get_prim_stat(lp);
			
 
				 	/* if we don't have a solution return */
			
 
				 	if(stat == GLP_NOFEAS)
			
@@ -488,7 +513,12 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 
			
 
				 	for(s = 0; s < ns; s++)
			
 
				 		for(w = 0; w < nw; w++)
			
 
				-			w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
			
 
				+		{
			
 
				+			if (integer)
			
 
				+				w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*nt+s*nw+w+1);
			
 
				+                        else
			
 
				+				w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
			
 
				+		}
			
 
				 
			
 
				 	glp_delete_prob(lp);
			
 
				 	return res;
			
@@ -557,7 +587,7 @@ static void lp2_handle_poped_task(unsigned sched_ctx, int worker)
 
				 						else
			
 
				 						{
			
 
				 							nworkers[s][1] += w_in_s[s][w];
			
 
				-							if(w_in_s[s][w] > 0.3)
			
 
				+							if(w_in_s[s][w] > 0.5)
			
 
				 								nworkers_rounded[s][1]++;
			
 
				 						}
			
 
				 					}
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c
@@ -396,10 +396,12 @@ void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw],
 
				 						tmp_nw_move[w] -=  nw_move;
			
 
				 					}
			
 
				 
			
 
				-					double needed = res[s2][w] - nw_ctx2 * 1.0;
			
 
				-					double x_double = (double)nw_needed;
			
 
				+					
			
 
				+					double needed = res[s2][w] - (nw_ctx2 * 1.0);
			
 
				+					int x = floor(needed);
			
 
				+					double x_double = (double)x;
			
 
				 					double diff = needed - x_double;
			
 
				-					if(diff > 0.3 && tmp_nw_add[w] != 0)
			
 
				+					if(diff > 0.3 && tmp_nw_add[w] > 0)
			
 
				 					{
			
 
				 						nw_add = tmp_nw_add[w];
			
 
				 						int i = 0;
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
@@ -297,31 +297,6 @@ unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now)
 
				 	return _resize(sender_sched_ctx, STARPU_NMAX_SCHED_CTXS, 0, now);
			
 
				 }
			
 
				 
			
 
				-static double _get_best_elapsed_flops(struct sched_ctx_hypervisor_wrapper* sc_w, int *npus, enum starpu_archtype req_arch)
			
 
				-{
			
 
				-	double ret_val = 0.0;
			
 
				-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
 
				-        int worker;
			
 
				-
			
 
				-	struct starpu_sched_ctx_iterator it;
			
 
				-	if(workers->init_iterator)
			
 
				-                workers->init_iterator(workers, &it);
			
 
				-
			
 
				-        while(workers->has_next(workers, &it))
			
 
				-	{
			
 
				-                worker = workers->get_next(workers, &it);
			
 
				-                enum starpu_archtype arch = starpu_worker_get_type(worker);
			
 
				-                if(arch == req_arch)
			
 
				-                {
			
 
				-			if(sc_w->elapsed_flops[worker] > ret_val)
			
 
				-				ret_val = sc_w->elapsed_flops[worker];
			
 
				-			(*npus)++;
			
 
				-                }
			
 
				-        }
			
 
				-
			
 
				-	return ret_val;
			
 
				-}
			
 
				-
			
 
				 static double _get_ispeed_sample_for_type_of_worker(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype req_arch)
			
 
				 {
			
 
				 	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
@@ -487,7 +462,7 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
 
				 		}
			
 
				 			
			
 
				                 double vel  = (elapsed_flops/elapsed_time);/* in Gflops/s */
			
 
				-		sc_w->ref_velocity[worker] = sc_w->ref_velocity[worker] > 0.0 ? (sc_w->ref_velocity[worker] + vel) / 2 : vel; 
			
 
				+		sc_w->ref_velocity[worker] = sc_w->ref_velocity[worker] > 1.0 ? (sc_w->ref_velocity[worker] + vel) / 2 : vel; 
			
 
				                 return vel;
			
 
				         }
			
 
				 
			
@@ -496,7 +471,32 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
 
				 
			
 
				 }
			
 
				 
			
 
				-/* compute an average value of the cpu velocity */
			
 
				+static double _get_best_elapsed_flops(struct sched_ctx_hypervisor_wrapper* sc_w, int *npus, enum starpu_archtype req_arch)
			
 
				+{
			
 
				+	double ret_val = 0.0;
			
 
				+	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
 
				+        int worker;
			
 
				+
			
 
				+	struct starpu_sched_ctx_iterator it;
			
 
				+	if(workers->init_iterator)
			
 
				+                workers->init_iterator(workers, &it);
			
 
				+
			
 
				+        while(workers->has_next(workers, &it))
			
 
				+	{
			
 
				+                worker = workers->get_next(workers, &it);
			
 
				+                enum starpu_archtype arch = starpu_worker_get_type(worker);
			
 
				+                if(arch == req_arch)
			
 
				+                {
			
 
				+			if(sc_w->elapsed_flops[worker] > ret_val)
			
 
				+				ret_val = sc_w->elapsed_flops[worker];
			
 
				+			(*npus)++;
			
 
				+                }
			
 
				+        }
			
 
				+
			
 
				+	return ret_val;
			
 
				+}
			
 
				+
			
 
				+/* compute an average value of the cpu/cuda velocity */
			
 
				 double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch)
			
 
				 {
			
 
				         int npus = 0;
			
@@ -504,20 +504,17 @@ double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w,
 
				 	if(npus == 0)
			
 
				 		return -1.0; 
			
 
				 
			
 
				-	double sample = _get_ispeed_sample_for_type_of_worker(sc_w, arch) / 1000000000.0;
			
 
				-
			
 
				         if( elapsed_flops != 0.0)
			
 
				         {
			
 
				                 double curr_time = starpu_timing_now();
			
 
				                 double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
			
 
				-		double velocity = elapsed_flops/elapsed_time; /* in Gflops/s */
			
 
				+		double velocity = (elapsed_flops/elapsed_time); /* in Gflops/s */
			
 
				                 return velocity;
			
 
				         }
			
 
				 
			
 
				         return -1.0;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 /* check if there is a big velocity gap between the contexts */
			
 
				 int _velocity_gap_btw_ctxs()
			
 
				 {
			
--- a/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
+++ b/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
@@ -35,6 +35,7 @@ extern struct sched_ctx_hypervisor_policy gflops_rate_policy;
 
				 extern struct sched_ctx_hypervisor_policy lp_policy;
			
 
				 extern struct sched_ctx_hypervisor_policy lp2_policy;
			
 
				 extern struct sched_ctx_hypervisor_policy ispeed_lp_policy;
			
 
				+extern struct sched_ctx_hypervisor_policy debit_lp_policy;
			
 
				 #endif // STARPU_HAVE_GLPK_
			
 
				 extern struct sched_ctx_hypervisor_policy ispeed_policy;
			
 
				 
			
@@ -47,6 +48,7 @@ static struct sched_ctx_hypervisor_policy *predefined_policies[] =
 
				 	&lp_policy,
			
 
				 	&lp2_policy,
			
 
				 	&ispeed_lp_policy,
			
 
				+	&debit_lp_policy,
			
 
				 #endif // STARPU_HAVE_GLPK_H
			
 
				 	&gflops_rate_policy,
			
 
				 	&ispeed_policy
			
@@ -145,6 +147,7 @@ struct starpu_sched_ctx_performance_counters* sched_ctx_hypervisor_init(struct s
 
				 		hypervisor.sched_ctx_w[i].submitted_flops = 0.0;
			
 
				 		hypervisor.sched_ctx_w[i].remaining_flops = 0.0;
			
 
				 		hypervisor.sched_ctx_w[i].start_time = 0.0;
			
 
				+		hypervisor.sched_ctx_w[i].real_start_time = 0.0;
			
 
				 		hypervisor.sched_ctx_w[i].resize_ack.receiver_sched_ctx = -1;
			
 
				 		hypervisor.sched_ctx_w[i].resize_ack.moved_workers = NULL;
			
 
				 		hypervisor.sched_ctx_w[i].resize_ack.nmoved_workers = 0;
			
@@ -309,11 +312,68 @@ void sched_ctx_hypervisor_unregister_ctx(unsigned sched_ctx)
 
				 	pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 }
			
 
				 
			
 
				+static double _get_best_total_elapsed_flops(struct sched_ctx_hypervisor_wrapper* sc_w, int *npus, enum starpu_archtype req_arch)
			
 
				+{
			
 
				+	double ret_val = 0.0;
			
 
				+	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
 
				+        int worker;
			
 
				+
			
 
				+	struct starpu_sched_ctx_iterator it;
			
 
				+	if(workers->init_iterator)
			
 
				+                workers->init_iterator(workers, &it);
			
 
				+
			
 
				+        while(workers->has_next(workers, &it))
			
 
				+	{
			
 
				+                worker = workers->get_next(workers, &it);
			
 
				+                enum starpu_archtype arch = starpu_worker_get_type(worker);
			
 
				+                if(arch == req_arch)
			
 
				+                {
			
 
				+			if(sc_w->total_elapsed_flops[worker] > ret_val)
			
 
				+				ret_val = sc_w->total_elapsed_flops[worker];
			
 
				+			(*npus)++;
			
 
				+                }
			
 
				+        }
			
 
				+
			
 
				+	return ret_val;
			
 
				+}
			
 
				+
			
 
				+/* compute an average value of the cpu/cuda velocity */
			
 
				+double sched_ctx_hypervisor_get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch)
			
 
				+{
			
 
				+        int npus = 0;
			
 
				+        double elapsed_flops = _get_best_total_elapsed_flops(sc_w, &npus, arch) / 1000000000.0 ; /* in gflops */
			
 
				+	if(npus == 0)
			
 
				+		return -1.0; 
			
 
				+
			
 
				+        if( elapsed_flops != 0.0)
			
 
				+        {
			
 
				+                double curr_time = starpu_timing_now();
			
 
				+                double elapsed_time = (curr_time - sc_w->real_start_time) / 1000000.0; /* in seconds */
			
 
				+		double velocity = (elapsed_flops/elapsed_time); /* in Gflops/s */
			
 
				+                return velocity;
			
 
				+        }
			
 
				+
			
 
				+        return -1.0;
			
 
				+}
			
 
				+
			
 
				 static void _print_current_time()
			
 
				 {
			
 
				 	double curr_time = starpu_timing_now();
			
 
				 	double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */
			
 
				-	printf("Time: %lf \n", elapsed_time);
			
 
				+	printf("Time: %lf\n", elapsed_time);
			
 
				+	int i;
			
 
				+	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
 
				+	{
			
 
				+		if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
			
 
				+		{
			
 
				+			struct sched_ctx_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
			
 
				+
			
 
				+			double cpu_speed = sched_ctx_hypervisor_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
			
 
				+			double cuda_speed = sched_ctx_hypervisor_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
			
 
				+			printf("%d: cpu_v = %lf cuda_v = %lf\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed);
			
 
				+		}
			
 
				+	}
			
 
				+	return;
			
 
				 }
			
 
				 
			
 
				 static int get_ntasks( int *tasks)
			
@@ -517,6 +577,12 @@ void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove,
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				+			printf("try to remove from ctx %d: ", sched_ctx);
			
 
				+			int j;
			
 
				+			for(j = 0; j < nworkers_to_remove; j++)
			
 
				+				printf(" %d", workers_to_remove[j]);
			
 
				+			printf("\n");
			
 
				+
			
 
				 			int ret = pthread_mutex_trylock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
			
 
				 			if(ret != EBUSY)
			
 
				 			{
			
@@ -702,6 +768,9 @@ static void notify_pushed_task(unsigned sched_ctx, int worker)
 
				 	if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].start_time == 0.0)
			
 
				 		hypervisor.sched_ctx_w[sched_ctx].start_time = starpu_timing_now();
			
 
				 
			
 
				+	if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].real_start_time == 0.0)
			
 
				+		hypervisor.sched_ctx_w[sched_ctx].real_start_time = starpu_timing_now();
			
 
				+
			
 
				 	int ntasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].pushed_tasks);
			
 
				 
			
 
				 	if((hypervisor.min_tasks == 0 || (!(hypervisor.resize[sched_ctx] == 0 && imposed_resize) && ntasks == hypervisor.min_tasks)) && hypervisor.check_min_tasks[sched_ctx])