12 years ago · 9f20f53ce9
--- a/sc_hypervisor/include/sc_hypervisor_monitoring.h
+++ b/sc_hypervisor/include/sc_hypervisor_monitoring.h
@@ -112,6 +112,9 @@ struct sc_hypervisor_wrapper
 
																 	/* the number of ready tasks submitted to a ctx */
															
 
																 	int nready_tasks;
															
 
																+
															
 
																+	/* boolean indicating that a context is being sized */
															
 
																+	unsigned to_be_sized;
															
 
																 };
															
 
																 /* return the wrapper of context that saves its monitoring information */
															
--- a/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
@@ -19,11 +19,13 @@
 
																 #include <starpu_config.h>
															
 
																 #include <sys/time.h>
															
 
																+int resize_no = 0;
															
 
																 #ifdef STARPU_HAVE_GLPK_H
															
 
																 static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
															
 
																 {
															
 
																 	/* for vite */
															
 
																-	starpu_trace_user_event(2);
															
 
																+	printf("resize_no = %d\n", resize_no);
															
 
																+	starpu_trace_user_event(resize_no++);
															
 
																 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
															
 
																 	unsigned *curr_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
															
 
																 	unsigned curr_nworkers = nworkers == -1 ? starpu_worker_get_count() : (unsigned)nworkers;
															
@@ -90,6 +92,14 @@ static void feft_lp_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *worker
 
																 	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
															
 
																+	struct sc_hypervisor_wrapper* sc_w  = NULL;
															
 
																+	int s = 0;
															
 
																+	for(s = 0; s < nsched_ctxs; s++)
															
 
																+	{
															
 
																+		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
															
 
																+		sc_w->to_be_sized = 1;
															
 
																+	}
															
 
																+
															
 
																 	double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_type, total_nw, tw);
															
 
																 	if(vmax != 0.0)
															
 
																 	{
															
@@ -127,11 +137,13 @@ static void feft_lp_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *worker
 
																 				break;
															
 
																 			}
															
 
																 		}
															
 
																+
															
 
																 		if(has_workers)
															
 
																 			sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, nw, nworkers_per_type_rounded, nworkers_per_type, curr_sched_ctxs, tw);
															
 
																 		else
															
 
																 			sc_hypervisor_lp_distribute_resources_in_ctxs(sched_ctxs, ns, nw, nworkers_per_type_rounded, nworkers_per_type, workers, curr_nworkers, tw);
															
 
																 	}
															
 
																+	printf("finished size ctxs\n");
															
 
																 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																 }
															
@@ -168,7 +180,6 @@ static void feft_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs ,
 
																 				 return;
															
 
																 			 }
															
 
																 		}
															
 
																-
															
 
																 		_try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers);
															
 
																 		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																 	}
															
--- a/sc_hypervisor/src/policies_utils/lp_tools.c
+++ b/sc_hypervisor/src/policies_utils/lp_tools.c
@@ -39,17 +39,24 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
																 		for(w = 0; w < nw; w++)
															
 
																 			v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
															
 
																-//		flops[i] = sc_w->ready_flops/1000000000.0; /* in gflops*/
															
 
																-		if(sc_w->remaining_flops < 0.0)
															
 
																-			flops[i] = sc_w->ready_flops/1000000000.0; /* in gflops*/
															
 
																+		if(sc_w->to_be_sized)
															
 
																+		{
															
 
																+			flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
															
 
																+			sc_w->to_be_sized = 0;
															
 
																+		}
															
 
																 		else
															
 
																 		{
															
 
																-			if((sc_w->ready_flops/1000000000.0) <= 0.000002)
															
 
																-				flops[i] = 0.0;
															
 
																+			if(sc_w->remaining_flops < 0.0)
															
 
																+				flops[i] = sc_w->ready_flops/1000000000.0; /* in gflops*/
															
 
																 			else
															
 
																-				flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
															
 
																+			{
															
 
																+				if((sc_w->ready_flops/1000000000.0) <= 0.000002)
															
 
																+					flops[i] = 0.0;
															
 
																+				else
															
 
																+					flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
															
 
																+			}
															
 
																 		}
															
 
																-/* 		printf("%d: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n",  */
															
 
																+/* 		printf("%d: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n", */
															
 
																 /* 		       sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, sc_w->ready_flops/1000000000, sc_w->nready_tasks); */
															
 
																 	}
															
@@ -78,7 +85,7 @@ has some last flops or a ready task that does not even have any flops
 
																 we give a worker (in shared mode) to the context in order to leave him
															
 
																 finish its work = we give -1.0 value instead of 0.0 and further on in
															
 
																 the distribution function we take this into account and revert the variable
															
 
																-to its 0.0 value */
															
 
																+to its 0.0 value */ 
															
 
																 		if(no_workers && (flops[i] != 0.0 || sc_w->nready_tasks > 0))
															
 
																 		{
															
 
																 			for(w = 0; w < nw; w++)
															
@@ -515,17 +522,11 @@ void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns,
 
																 				}
															
 
																 			}
															
 
																 		}
															
 
																-		if(nw_add > 0)
															
 
																-		{
															
 
																-			sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
															
 
																-		}
															
 
																+//		sc_hypervisor_start_resize(sched_ctxs[s]);
															
 
																+		sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
															
 
																 		int workers_remove[STARPU_NMAXWORKERS];
															
 
																 		int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove);
															
 
																 		sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
															
 
																-		sc_hypervisor_start_resize(sched_ctxs[s]);
															
 
																-
															
 
																-
															
 
																-//		sc_hypervisor_stop_resize(current_sched_ctxs[s]);
															
 
																 	}
															
 
																 }
															
--- a/sc_hypervisor/src/policies_utils/speed.c
+++ b/sc_hypervisor/src/policies_utils/speed.c
@@ -45,7 +45,7 @@ double sc_hypervisor_get_ctx_speed(struct sc_hypervisor_wrapper* sc_w)
 
																 		int n_all_cuda = starpu_cuda_worker_get_count();
															
 
																 		double th_speed = SC_HYPERVISOR_DEFAULT_CPU_SPEED * n_all_cpus + SC_HYPERVISOR_DEFAULT_CUDA_SPEED * n_all_cuda;
															
 
																 		double time_sample = 0.1 * ((total_flops/1000000000.0) / th_speed);
															
 
																-		can_compute_speed = elapsed_time >= time_sample;
															
 
																+		can_compute_speed = elapsed_time > 1.0;//time_sample;
															
 
																 	}
															
 
																 	else
															
 
																 		can_compute_speed = elapsed_flops >= redim_sample;
															
@@ -127,7 +127,7 @@ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_
 
																 		double th_speed = SC_HYPERVISOR_DEFAULT_CPU_SPEED * n_all_cpus + SC_HYPERVISOR_DEFAULT_CUDA_SPEED * n_all_cuda;
															
 
																 		double total_flops = sc_w->total_flops;
															
 
																 		double time_sample = 0.1 * ((total_flops/1000000000.0) / th_speed);
															
 
																-		can_compute_speed = elapsed_time >= time_sample;
															
 
																+		can_compute_speed = elapsed_time > 1.0;
															
 
																 	}
															
 
																 	else
															
 
																 		can_compute_speed = ctx_elapsed_flops > ctx_sample;
															
--- a/sc_hypervisor/src/sc_hypervisor.c
+++ b/sc_hypervisor/src/sc_hypervisor.c
@@ -193,7 +193,7 @@ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
 
																 		hypervisor.sched_ctx_w[i].ready_flops = 0.0;
															
 
																 		hypervisor.sched_ctx_w[i].total_flops_available = 0;
															
 
																 		hypervisor.sched_ctx_w[i].nready_tasks = 0;
															
 
																-
															
 
																+		hypervisor.sched_ctx_w[i].to_be_sized = 0;
															
 
																 		int j;
															
 
																 		for(j = 0; j < STARPU_NMAXWORKERS; j++)
															
 
																 		{
															
@@ -313,8 +313,7 @@ void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops)
 
																 	hypervisor.sched_ctx_w[sched_ctx].total_flops = total_flops;
															
 
																 	hypervisor.sched_ctx_w[sched_ctx].remaining_flops = total_flops;
															
 
																-	if(strcmp(hypervisor.policy.name, "app_driven") == 0)
															
 
																-		hypervisor.resize[sched_ctx] = 1;
															
 
																+	hypervisor.resize[sched_ctx] = 1;
															
 
																 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
															
 
																 }