Przeglądaj źródła

revert ispeed to fix sample

Andra Hugo 12 lat temu
rodzic
commit
ae9da154e9

+ 12 - 18
sched_ctx_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c

@@ -32,8 +32,9 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 	int *sched_ctxs = in_sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : in_sched_ctxs;
 	
 	int w,s;
-	struct sched_ctx_hypervisor_wrapper* sc_w = NULL;
 
+	struct sched_ctx_hypervisor_wrapper* sc_w = NULL;
+	double total_flops = 0.0;
 	for(s = 0; s < ns; s++)
 	{
 		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]);
@@ -56,23 +57,18 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 					velocity[s][w] = arch == STARPU_CPU_WORKER ? 5.0 : 100.0;
 			}
 			
-			printf("v[w%d][s%d] = %lf\n",w, s, velocity[s][w]);
+//			printf("v[w%d][s%d] = %lf\n",w, s, velocity[s][w]);
 		}
 		struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sched_ctxs[s]);
-//		flops[s] = config->ispeed_ctx_sample/1000000000; /* in gflops */
-		flops[s] = sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w)/1000000000.0; // in gflops 
-		printf("%d: elapsed flops %lf\n", sched_ctxs[s], flops[s]);
+		flops[s] = config->ispeed_ctx_sample/1000000000; /* in gflops */
 	}
-
-
+	
 	/* take the exec time of the slowest ctx 
 	   as starting point and then try to minimize it
 	   as increasing it a little for the faster ctxs */
 	double tmax = _get_slowest_ctx_exec_time();
-/* 	double smallest_tmax = _get_fastest_ctx_exec_time(); //tmax - 0.5*tmax; */
-	double smallest_tmax = tmax - 0.5*tmax;
-
-	printf("tmax %lf smallest %lf\n", tmax, smallest_tmax);
+ 	double smallest_tmax = _get_fastest_ctx_exec_time(); //tmax - 0.5*tmax; 
+//	printf("tmax %lf smallest %lf\n", tmax, smallest_tmax);
 
 	double res = 1.0;
 	unsigned has_sol = 0;
@@ -154,7 +150,7 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops
 	int w, s;
 	glp_prob *lp;
 
-	printf("try with tmax %lf\n", tmax);
+//	printf("try with tmax %lf\n", tmax);
 	lp = glp_create_prob();
 	glp_set_prob_name(lp, "StarPU theoretical bound");
 	glp_set_obj_dir(lp, GLP_MAX);
@@ -336,7 +332,7 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops
 				w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*ns+colnum(w, s));
 			else
 				w_in_s[s][w] = glp_get_col_prim(lp, nw*ns+colnum(w,s));
-			printf("w_in_s[s%d][w%d] = %lf flops[s%d][w%d] = %lf \n", s, w, w_in_s[s][w], s, w, flops_on_w[s][w]);
+//			printf("w_in_s[s%d][w%d] = %lf flops[s%d][w%d] = %lf \n", s, w, w_in_s[s][w], s, w, flops_on_w[s][w]);
 		}
 
 	glp_delete_prob(lp);
@@ -401,15 +397,13 @@ static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker)
 						}
 					}
 				}
-				for(s = 0; s < ns; s++)
-					printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0],
-					       nworkers_rounded[s][1], nworkers_rounded[s][0]);
+/* 				for(s = 0; s < ns; s++) */
+/* 					printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
+/* 					       nworkers_rounded[s][1], nworkers_rounded[s][0]); */
 
 				_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
 
 			}
-			else
-				printf("no sol\n");
 		}
 		pthread_mutex_unlock(&act_hypervisor_mutex);
 	}

+ 16 - 23
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c

@@ -401,7 +401,7 @@ double _get_slowest_ctx_exec_time(void)
 	int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
 	int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
 
-	double curr_time = starpu_timing_now();
+/* 	double curr_time = starpu_timing_now(); */
 	double slowest_time = 0.0;
 
 	int s;
@@ -410,17 +410,13 @@ double _get_slowest_ctx_exec_time(void)
 	{
 		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]);
 
-		double elapsed_time  = (curr_time - sc_w->start_time)/1000000;
-/* 		struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx); */
-/* 		double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/_get_ctx_velocity(sc_w); */
-/* 		double elapsed_flops = sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w); */
-/* 		double velocity = _get_ctx_velocity(sc_w); */
-/*                 double elapsed_time = (elapsed_flops/1000000000.0)/velocity; */
+//		double elapsed_time  = (curr_time - sc_w->start_time)/1000000;
+		struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
+		double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/_get_ctx_velocity(sc_w);
 		if(elapsed_time > slowest_time)
 			slowest_time = elapsed_time;
 
         }
-//	return slowest_time / 1000000.0;
 	return slowest_time;
 }
 
@@ -430,7 +426,7 @@ double _get_fastest_ctx_exec_time(void)
 	int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
 
 	double curr_time = starpu_timing_now();
-	double fastest_time = curr_time;
+ 	double fastest_time = curr_time;
 
 	int s;
 	struct sched_ctx_hypervisor_wrapper* sc_w;		
@@ -440,15 +436,12 @@ double _get_fastest_ctx_exec_time(void)
 
 		struct sched_ctx_hypervisor_policy_config *config = sched_ctx_hypervisor_get_config(sc_w->sched_ctx);
 		double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/_get_ctx_velocity(sc_w);
-/* 		double elapsed_flops = sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);		 */
-/* 		double velocity = _get_ctx_velocity(sc_w); */
-/*                 double elapsed_time = (elapsed_flops/1000000000.0)/velocity; */
 		
 		if(elapsed_time < fastest_time)
 			fastest_time = elapsed_time;
 
         }
-//	return fastest_time / 1000000.0;
+
 	return fastest_time;
 }
 
@@ -483,16 +476,16 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
         {
                 double curr_time = starpu_timing_now();
                 double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
-/*  		enum starpu_archtype arch = starpu_worker_get_type(worker); */
-/* 		if(arch == STARPU_CUDA_WORKER) */
-/* 		{	 */
-/* 			double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker); */
-/* 			elapsed_time +=  (elapsed_data_used / transfer_velocity) / 1000000 ; */
-/* 			double latency = starpu_get_latency_RAM_CUDA(worker); */
-/* //			printf("%d/%d: latency %lf elapsed_time before %lf ntasks %d\n", worker, sc_w->sched_ctx, latency, elapsed_time, elapsed_tasks); */
-/* 			elapsed_time += (elapsed_tasks * latency)/1000000; */
-/* //			printf("elapsed time after %lf \n", elapsed_time); */
-/* 		} */
+ 		enum starpu_archtype arch = starpu_worker_get_type(worker);
+		if(arch == STARPU_CUDA_WORKER)
+		{
+			double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker);
+			elapsed_time +=  (elapsed_data_used / transfer_velocity) / 1000000 ;
+			double latency = starpu_get_latency_RAM_CUDA(worker);
+//			printf("%d/%d: latency %lf elapsed_time before %lf ntasks %d\n", worker, sc_w->sched_ctx, latency, elapsed_time, elapsed_tasks);
+			elapsed_time += (elapsed_tasks * latency)/1000000;
+//			printf("elapsed time after %lf \n", elapsed_time);
+		}
 			
                 double vel  = (elapsed_flops/elapsed_time);/* in Gflops/s */
 		sc_w->ref_velocity[worker] = sc_w->ref_velocity[worker] > 0.0 ? (sc_w->ref_velocity[worker] + vel) / 2 : vel;