Explorar o código

fix some bugs + print evaluation info

Andra Hugo %!s(int64=12) %!d(string=hai) anos
pai
achega
329520f765

+ 6 - 0
sched_ctx_hypervisor/include/sched_ctx_hypervisor.h

@@ -145,6 +145,9 @@ struct sched_ctx_hypervisor_wrapper
 	/* the start time of the resizing sample of this context*/
 	double start_time;
 
+	/* the first time a task was pushed to this context*/
+	double real_start_time;
+
 	/* the workers don't leave the current ctx until the receiver ctx 
 	   doesn't ack the receive of these workers */
 	struct sched_ctx_hypervisor_resize_ack resize_ack;
@@ -224,6 +227,9 @@ void sched_ctx_hypervisor_free_size_req(void);
 
 unsigned sched_ctx_hypervisor_can_resize(unsigned sched_ctx);
 
+/* compute an average value of the cpu/cuda velocity */
+double sched_ctx_hypervisor_get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch);
+
 #ifdef __cplusplus
 }
 #endif

+ 2 - 1
sched_ctx_hypervisor/src/Makefile.am

@@ -32,7 +32,8 @@ libsched_ctx_hypervisor_la_SOURCES = 			\
 	hypervisor_policies/lp_policy.c			\
 	hypervisor_policies/lp2_policy.c		\
 	hypervisor_policies/ispeed_policy.c		\
-	hypervisor_policies/ispeed_lp_policy.c
+	hypervisor_policies/ispeed_lp_policy.c		\
+	hypervisor_policies/debit_lp_policy.c
 
 noinst_HEADERS = sched_ctx_hypervisor_intern.h		\
 	hypervisor_policies/policy_tools.h		\

+ 38 - 8
sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c

@@ -21,7 +21,7 @@
 static struct bound_task_pool *task_pools = NULL;
 
 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers);
+static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers, unsigned interger);
 static double _find_tmax(double t1, double t2);
 static unsigned _compute_task_distribution_over_ctxs(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt], int *sched_ctxs, int *workers)
 {
@@ -66,7 +66,7 @@ static unsigned _compute_task_distribution_over_ctxs(int ns, int nw, int nt, dou
 		/* find solution and save the values in draft tables
 		   only if there is a solution for the system we save them
 		   in the proper table */
-		res = _glp_resolve(ns, nw, nt, draft_tasks, tmax, draft_w_in_s, sched_ctxs, workers);
+		res = _glp_resolve(ns, nw, nt, draft_tasks, tmax, draft_w_in_s, sched_ctxs, workers, 1);
 		if(res != 0.0)
 		{
 			for(w = 0; w < nw; w++)
@@ -318,7 +318,7 @@ static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt], int *w
  */
 #ifdef STARPU_HAVE_GLPK_H
 #include <glpk.h>
-static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers)
+static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers, unsigned integer)
 {
 	struct bound_task_pool * tp;
 	int t, w, s;
@@ -362,7 +362,13 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 				char name[32];
 				snprintf(name, sizeof(name), "w%ds%dn", w, s);
 				glp_set_col_name(lp, nw*nt+s*nw+w+1, name);
-				glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0.0, 1.0);
+				if (integer)
+                                {
+                                        glp_set_col_kind(lp, nw*nt+s*nw+w+1, GLP_IV);
+                                        glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0, 1);
+                                }
+                                else
+					glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0.0, 1.0);
 			}
 
 		int *sched_ctxs = in_sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : in_sched_ctxs;
@@ -451,8 +457,10 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 				ar[n] = 1;
 				n++;
 			}
-
-			glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
+			if(integer)
+                                glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1);
+			else
+				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
 		}
 		if(n != ne)
 			printf("ns= %d nw = %d nt = %d n = %d ne = %d\n", ns, nw, nt, n, ne);
@@ -472,6 +480,23 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 		return 0.0;
 	}
 
+	if (integer)
+        {
+                glp_iocp iocp;
+                glp_init_iocp(&iocp);
+                iocp.msg_lev = GLP_MSG_OFF;
+		glp_intopt(lp, &iocp);
+		int stat = glp_mip_status(lp);
+		/* if we don't have a solution return */
+		if(stat == GLP_NOFEAS)
+		{
+			glp_delete_prob(lp);
+			lp = NULL;
+			return 0.0;
+		}
+		
+        }
+
 	int stat = glp_get_prim_stat(lp);
 	/* if we don't have a solution return */
 	if(stat == GLP_NOFEAS)
@@ -488,7 +513,12 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
 	for(s = 0; s < ns; s++)
 		for(w = 0; w < nw; w++)
-			w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
+		{
+			if (integer)
+				w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*nt+s*nw+w+1);
+                        else
+				w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
+		}
 
 	glp_delete_prob(lp);
 	return res;
@@ -557,7 +587,7 @@ static void lp2_handle_poped_task(unsigned sched_ctx, int worker)
 						else
 						{
 							nworkers[s][1] += w_in_s[s][w];
-							if(w_in_s[s][w] > 0.3)
+							if(w_in_s[s][w] > 0.5)
 								nworkers_rounded[s][1]++;
 						}
 					}

+ 5 - 3
sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c

@@ -396,10 +396,12 @@ void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw],
 						tmp_nw_move[w] -=  nw_move;
 					}
 
-					double needed = res[s2][w] - nw_ctx2 * 1.0;
-					double x_double = (double)nw_needed;
+					
+					double needed = res[s2][w] - (nw_ctx2 * 1.0);
+					int x = floor(needed);
+					double x_double = (double)x;
 					double diff = needed - x_double;
-					if(diff > 0.3 && tmp_nw_add[w] != 0)
+					if(diff > 0.3 && tmp_nw_add[w] > 0)
 					{
 						nw_add = tmp_nw_add[w];
 						int i = 0;

+ 28 - 31
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c

@@ -297,31 +297,6 @@ unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now)
 	return _resize(sender_sched_ctx, STARPU_NMAX_SCHED_CTXS, 0, now);
 }
 
-static double _get_best_elapsed_flops(struct sched_ctx_hypervisor_wrapper* sc_w, int *npus, enum starpu_archtype req_arch)
-{
-	double ret_val = 0.0;
-	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
-        int worker;
-
-	struct starpu_sched_ctx_iterator it;
-	if(workers->init_iterator)
-                workers->init_iterator(workers, &it);
-
-        while(workers->has_next(workers, &it))
-	{
-                worker = workers->get_next(workers, &it);
-                enum starpu_archtype arch = starpu_worker_get_type(worker);
-                if(arch == req_arch)
-                {
-			if(sc_w->elapsed_flops[worker] > ret_val)
-				ret_val = sc_w->elapsed_flops[worker];
-			(*npus)++;
-                }
-        }
-
-	return ret_val;
-}
-
 static double _get_ispeed_sample_for_type_of_worker(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype req_arch)
 {
 	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
@@ -487,7 +462,7 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
 		}
 			
                 double vel  = (elapsed_flops/elapsed_time);/* in Gflops/s */
-		sc_w->ref_velocity[worker] = sc_w->ref_velocity[worker] > 0.0 ? (sc_w->ref_velocity[worker] + vel) / 2 : vel; 
+		sc_w->ref_velocity[worker] = sc_w->ref_velocity[worker] > 1.0 ? (sc_w->ref_velocity[worker] + vel) / 2 : vel; 
                 return vel;
         }
 
@@ -496,7 +471,32 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
 
 }
 
-/* compute an average value of the cpu velocity */
+static double _get_best_elapsed_flops(struct sched_ctx_hypervisor_wrapper* sc_w, int *npus, enum starpu_archtype req_arch)
+{
+	double ret_val = 0.0;
+	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
+        int worker;
+
+	struct starpu_sched_ctx_iterator it;
+	if(workers->init_iterator)
+                workers->init_iterator(workers, &it);
+
+        while(workers->has_next(workers, &it))
+	{
+                worker = workers->get_next(workers, &it);
+                enum starpu_archtype arch = starpu_worker_get_type(worker);
+                if(arch == req_arch)
+                {
+			if(sc_w->elapsed_flops[worker] > ret_val)
+				ret_val = sc_w->elapsed_flops[worker];
+			(*npus)++;
+                }
+        }
+
+	return ret_val;
+}
+
+/* compute an average value of the cpu/cuda velocity */
 double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch)
 {
         int npus = 0;
@@ -504,20 +504,17 @@ double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w,
 	if(npus == 0)
 		return -1.0; 
 
-	double sample = _get_ispeed_sample_for_type_of_worker(sc_w, arch) / 1000000000.0;
-
         if( elapsed_flops != 0.0)
         {
                 double curr_time = starpu_timing_now();
                 double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
-		double velocity = elapsed_flops/elapsed_time; /* in Gflops/s */
+		double velocity = (elapsed_flops/elapsed_time); /* in Gflops/s */
                 return velocity;
         }
 
         return -1.0;
 }
 
-
 /* check if there is a big velocity gap between the contexts */
 int _velocity_gap_btw_ctxs()
 {

+ 70 - 1
sched_ctx_hypervisor/src/sched_ctx_hypervisor.c

@@ -35,6 +35,7 @@ extern struct sched_ctx_hypervisor_policy gflops_rate_policy;
 extern struct sched_ctx_hypervisor_policy lp_policy;
 extern struct sched_ctx_hypervisor_policy lp2_policy;
 extern struct sched_ctx_hypervisor_policy ispeed_lp_policy;
+extern struct sched_ctx_hypervisor_policy debit_lp_policy;
 #endif // STARPU_HAVE_GLPK_
 extern struct sched_ctx_hypervisor_policy ispeed_policy;
 
@@ -47,6 +48,7 @@ static struct sched_ctx_hypervisor_policy *predefined_policies[] =
 	&lp_policy,
 	&lp2_policy,
 	&ispeed_lp_policy,
+	&debit_lp_policy,
 #endif // STARPU_HAVE_GLPK_H
 	&gflops_rate_policy,
 	&ispeed_policy
@@ -145,6 +147,7 @@ struct starpu_sched_ctx_performance_counters* sched_ctx_hypervisor_init(struct s
 		hypervisor.sched_ctx_w[i].submitted_flops = 0.0;
 		hypervisor.sched_ctx_w[i].remaining_flops = 0.0;
 		hypervisor.sched_ctx_w[i].start_time = 0.0;
+		hypervisor.sched_ctx_w[i].real_start_time = 0.0;
 		hypervisor.sched_ctx_w[i].resize_ack.receiver_sched_ctx = -1;
 		hypervisor.sched_ctx_w[i].resize_ack.moved_workers = NULL;
 		hypervisor.sched_ctx_w[i].resize_ack.nmoved_workers = 0;
@@ -309,11 +312,68 @@ void sched_ctx_hypervisor_unregister_ctx(unsigned sched_ctx)
 	pthread_mutex_unlock(&act_hypervisor_mutex);
 }
 
+static double _get_best_total_elapsed_flops(struct sched_ctx_hypervisor_wrapper* sc_w, int *npus, enum starpu_archtype req_arch)
+{
+	double ret_val = 0.0;
+	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
+        int worker;
+
+	struct starpu_sched_ctx_iterator it;
+	if(workers->init_iterator)
+                workers->init_iterator(workers, &it);
+
+        while(workers->has_next(workers, &it))
+	{
+                worker = workers->get_next(workers, &it);
+                enum starpu_archtype arch = starpu_worker_get_type(worker);
+                if(arch == req_arch)
+                {
+			if(sc_w->total_elapsed_flops[worker] > ret_val)
+				ret_val = sc_w->total_elapsed_flops[worker];
+			(*npus)++;
+                }
+        }
+
+	return ret_val;
+}
+
+/* compute an average value of the cpu/cuda velocity */
+double sched_ctx_hypervisor_get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch)
+{
+        int npus = 0;
+        double elapsed_flops = _get_best_total_elapsed_flops(sc_w, &npus, arch) / 1000000000.0 ; /* in gflops */
+	if(npus == 0)
+		return -1.0; 
+
+        if( elapsed_flops != 0.0)
+        {
+                double curr_time = starpu_timing_now();
+                double elapsed_time = (curr_time - sc_w->real_start_time) / 1000000.0; /* in seconds */
+		double velocity = (elapsed_flops/elapsed_time); /* in Gflops/s */
+                return velocity;
+        }
+
+        return -1.0;
+}
+
 static void _print_current_time()
 {
 	double curr_time = starpu_timing_now();
 	double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */
-	printf("Time: %lf \n", elapsed_time);
+	printf("Time: %lf\n", elapsed_time);
+	int i;
+	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+	{
+		if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
+		{
+			struct sched_ctx_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
+
+			double cpu_speed = sched_ctx_hypervisor_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
+			double cuda_speed = sched_ctx_hypervisor_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
+			printf("%d: cpu_v = %lf cuda_v = %lf\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed);
+		}
+	}
+	return;
 }
 
 static int get_ntasks( int *tasks)
@@ -517,6 +577,12 @@ void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove,
 		}
 		else
 		{
+			printf("try to remove from ctx %d: ", sched_ctx);
+			int j;
+			for(j = 0; j < nworkers_to_remove; j++)
+				printf(" %d", workers_to_remove[j]);
+			printf("\n");
+
 			int ret = pthread_mutex_trylock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
 			if(ret != EBUSY)
 			{
@@ -702,6 +768,9 @@ static void notify_pushed_task(unsigned sched_ctx, int worker)
 	if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].start_time == 0.0)
 		hypervisor.sched_ctx_w[sched_ctx].start_time = starpu_timing_now();
 
+	if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].real_start_time == 0.0)
+		hypervisor.sched_ctx_w[sched_ctx].real_start_time = starpu_timing_now();
+
 	int ntasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].pushed_tasks);
 
 	if((hypervisor.min_tasks == 0 || (!(hypervisor.resize[sched_ctx] == 0 && imposed_resize) && ntasks == hypervisor.min_tasks)) && hypervisor.check_min_tasks[sched_ctx])