Przeglądaj źródła

workers must release the scheduling mutex before taking the role of hypervisor, update triggering issues for the other resizing strategies too

Andra Hugo 12 lat temu
rodzic
commit
6c954c729c

+ 72 - 45
sc_hypervisor/src/hypervisor_policies/debit_lp_policy.c

@@ -225,67 +225,94 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double w_in_
 }
 
 
-static void debit_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
+static void _try_resizing(void)
 {
-	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
-	sc_hypervisor_get_velocity_per_worker(sc_w, worker);
-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
-	if(ret != EBUSY)
+	int ns = sc_hypervisor_get_nsched_ctxs();
+	int nw = starpu_worker_get_count(); /* Number of different workers */
+	
+	double w_in_s[ns][nw];
+	unsigned found_sol = _compute_max_velocity(ns, nw,  w_in_s, NULL, NULL);
+	/* if we did find at least one solution redistribute the resources */
+	if(found_sol)
 	{
-		if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker))
+		int w, s;
+		double nworkers[ns][2];
+		int nworkers_rounded[ns][2];
+		for(s = 0; s < ns; s++)
 		{
-			int ns = sc_hypervisor_get_nsched_ctxs();
-			int nw = starpu_worker_get_count(); /* Number of different workers */
-
-			double w_in_s[ns][nw];
-			unsigned found_sol = _compute_max_velocity(ns, nw,  w_in_s, NULL, NULL);
-			/* if we did find at least one solution redistribute the resources */
-			if(found_sol)
+			nworkers[s][0] = 0.0;
+			nworkers[s][1] = 0.0;
+			nworkers_rounded[s][0] = 0;
+			nworkers_rounded[s][1] = 0;
+			
+		}
+		
+		for(s = 0; s < ns; s++)
+		{
+			for(w = 0; w < nw; w++)
 			{
-				int w, s;
-				double nworkers[ns][2];
-				int nworkers_rounded[ns][2];
-				for(s = 0; s < ns; s++)
+				enum starpu_worker_archtype arch = starpu_worker_get_type(w);
+				
+				if(arch == STARPU_CUDA_WORKER)
 				{
-					nworkers[s][0] = 0.0;
-					nworkers[s][1] = 0.0;
-					nworkers_rounded[s][0] = 0;
-					nworkers_rounded[s][1] = 0;
-
+					nworkers[s][0] += w_in_s[s][w];
+					if(w_in_s[s][w] >= 0.3)
+						nworkers_rounded[s][0]++;
 				}
-
-				for(s = 0; s < ns; s++)
+				else
 				{
-					for(w = 0; w < nw; w++)
-					{
-						enum starpu_worker_archtype arch = starpu_worker_get_type(w);
-
-						if(arch == STARPU_CUDA_WORKER)
-						{
-							nworkers[s][0] += w_in_s[s][w];
-							if(w_in_s[s][w] >= 0.3)
-								nworkers_rounded[s][0]++;
-						}
-						else
-						{
-							nworkers[s][1] += w_in_s[s][w];
-							if(w_in_s[s][w] > 0.5)
-								nworkers_rounded[s][1]++;
-						}
-					}
+					nworkers[s][1] += w_in_s[s][w];
+					if(w_in_s[s][w] > 0.5)
+						nworkers_rounded[s][1]++;
 				}
+			}
+		}
 /* 				for(s = 0; s < ns; s++) */
 /* 					printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
 /* 					       nworkers_rounded[s][1], nworkers_rounded[s][0]); */
+		
+		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
+		
+	}
+}
 
-				sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
-
+static void debit_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
+{
+	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+        if(ret != EBUSY)
+	{
+		unsigned criteria = sc_hypervisor_get_resize_criteria();
+		if(criteria != SC_NOTHING && criteria == SC_VELOCITY)
+		{
+			if(sc_hypervisor_check_velocity_gap_btw_ctxs())
+			{
+				_try_resizing();
 			}
 		}
-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+                starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 	}
 }
 
+static debit_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
+{
+	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+        if(ret != EBUSY)
+	{
+                unsigned criteria = sc_hypervisor_get_resize_criteria();
+                if(criteria != SC_NOTHING && criteria == SC_IDLE)
+                {
+
+			if(sc_hypervisor_check_idle(sched_ctx, worker))
+                        {
+                                _try_resizing();
+//                              sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);                                                                                                               \
+                                                                                                                                                                                                                    
+                        }
+                }
+                starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+        }
+}
+
 static void debit_lp_end_ctx(unsigned sched_ctx)
 {
 	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
@@ -300,7 +327,7 @@ struct sc_hypervisor_policy debit_lp_policy = {
 	.size_ctxs = NULL,
 	.handle_poped_task = debit_lp_handle_poped_task,
 	.handle_pushed_task = NULL,
-	.handle_idle_cycle = NULL,
+	.handle_idle_cycle = debit_lp_handle_idle_cycle,
 	.handle_idle_end = NULL,
 	.handle_post_exec_hook = NULL,
 	.handle_submitted_job = NULL,

+ 83 - 58
sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c

@@ -325,77 +325,102 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 	return found_sol;
 }
 
-
-
-static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
+static void _try_resizing(void)
 {
-	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
-	sc_hypervisor_get_velocity_per_worker(sc_w, worker);
-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
-	if(ret != EBUSY)
+	int ns = sc_hypervisor_get_nsched_ctxs();
+	int nw = starpu_worker_get_count(); /* Number of different workers */
+	
+	double w_in_s[ns][nw];
+//			double flops_on_w[ns][nw];
+	double **flops_on_w = (double**)malloc(ns*sizeof(double*));
+	int i;
+	for(i = 0; i < ns; i++)
+		flops_on_w[i] = (double*)malloc(nw*sizeof(double));
+	
+	unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw,  w_in_s, flops_on_w, NULL, NULL);
+	/* if we did find at least one solution redistribute the resources */
+	if(found_sol)
 	{
-		if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker))
+		int w, s;
+		double nworkers[ns][2];
+		int nworkers_rounded[ns][2];
+		for(s = 0; s < ns; s++)
 		{
-			int ns = sc_hypervisor_get_nsched_ctxs();
-			int nw = starpu_worker_get_count(); /* Number of different workers */
-
-			double w_in_s[ns][nw];
-//			double flops_on_w[ns][nw];
-			double **flops_on_w = (double**)malloc(ns*sizeof(double*));
-			int i;
-			for(i = 0; i < ns; i++)
-				flops_on_w[i] = (double*)malloc(nw*sizeof(double));
-
-			unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw,  w_in_s, flops_on_w, NULL, NULL);
-			/* if we did find at least one solution redistribute the resources */
-			if(found_sol)
+			nworkers[s][0] = 0.0;
+			nworkers[s][1] = 0.0;
+			nworkers_rounded[s][0] = 0;
+			nworkers_rounded[s][1] = 0;
+			
+		}
+		
+		for(s = 0; s < ns; s++)
+		{
+			for(w = 0; w < nw; w++)
 			{
-				int w, s;
-				double nworkers[ns][2];
-				int nworkers_rounded[ns][2];
-				for(s = 0; s < ns; s++)
+				enum starpu_worker_archtype arch = starpu_worker_get_type(w);
+				
+				if(arch == STARPU_CUDA_WORKER)
 				{
-					nworkers[s][0] = 0.0;
-					nworkers[s][1] = 0.0;
-					nworkers_rounded[s][0] = 0;
-					nworkers_rounded[s][1] = 0;
-
+					nworkers[s][0] += w_in_s[s][w];
+					if(w_in_s[s][w] >= 0.3)
+						nworkers_rounded[s][0]++;
 				}
-
-				for(s = 0; s < ns; s++)
+				else
 				{
-					for(w = 0; w < nw; w++)
-					{
-						enum starpu_worker_archtype arch = starpu_worker_get_type(w);
-
-						if(arch == STARPU_CUDA_WORKER)
-						{
-							nworkers[s][0] += w_in_s[s][w];
-							if(w_in_s[s][w] >= 0.3)
-								nworkers_rounded[s][0]++;
-						}
-						else
-						{
-							nworkers[s][1] += w_in_s[s][w];
-							if(w_in_s[s][w] > 0.5)
-								nworkers_rounded[s][1]++;
-						}
-					}
+					nworkers[s][1] += w_in_s[s][w];
+					if(w_in_s[s][w] > 0.5)
+						nworkers_rounded[s][1]++;
 				}
+			}
+		}
 /* 				for(s = 0; s < ns; s++) */
 /* 					printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
 /* 					       nworkers_rounded[s][1], nworkers_rounded[s][0]); */
-
-				sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
-			}
-			for(i = 0; i < ns; i++)
-				free(flops_on_w[i]);
-			free(flops_on_w);
-		}
-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+		
+		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
 	}
+	for(i = 0; i < ns; i++)
+		free(flops_on_w[i]);
+	free(flops_on_w);
+}
+
+static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
+{
+        int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+        if(ret != EBUSY)
+        {
+                unsigned criteria = sc_hypervisor_get_resize_criteria();
+                if(criteria != SC_NOTHING && criteria == SC_VELOCITY)
+                {
+                        if(sc_hypervisor_check_velocity_gap_btw_ctxs())
+                        {
+                                _try_resizing();
+                        }
+                }
+                starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+        }
 }
 
+static ispeed_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
+{
+        int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+        if(ret != EBUSY)
+        {
+                unsigned criteria = sc_hypervisor_get_resize_criteria();
+                if(criteria != SC_NOTHING && criteria == SC_IDLE)
+                {
+
+			if(sc_hypervisor_check_idle(sched_ctx, worker))
+                        {
+                                _try_resizing();
+//                              sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);                                                                                                                
+                        }
+                }
+                starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+        }
+}
+
+
 static void ispeed_lp_end_ctx(unsigned sched_ctx)
 {
 	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
@@ -410,7 +435,7 @@ struct sc_hypervisor_policy ispeed_lp_policy = {
 	.size_ctxs = NULL,
 	.handle_poped_task = ispeed_lp_handle_poped_task,
 	.handle_pushed_task = NULL,
-	.handle_idle_cycle = NULL,
+	.handle_idle_cycle = ispeed_lp_handle_idle_cycle,
 	.handle_idle_end = NULL,
 	.handle_post_exec_hook = NULL,
 	.handle_submitted_job = NULL,

+ 7 - 2
sc_hypervisor/src/policies_utils/policy_tools.c

@@ -498,6 +498,7 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 		}
 	}
 
+/*if an optimal speed has not been computed yet do it now */
 	if(!has_opt_v)
 	{
 		int nw = 1;
@@ -533,6 +534,8 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 		}
 	}
 
+/* if we have an optimal speed for each type of worker compare the monitored one with the 
+   theoretical one */
 	if(has_opt_v)
 	{
 		for(i = 0; i < nsched_ctxs; i++)
@@ -553,7 +556,9 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 				return 1;
 		}
 	}
-	else
+	else /* if we have not been able to compute a theoretical velocity consider the env variable
+		SC_MAX_VELOCITY_GAP and compare the speed of the contexts, whenever the difference
+		btw them is greater than the max value the function returns true */
 	{
 		for(i = 0; i < nsched_ctxs; i++)
 		{
@@ -575,7 +580,7 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 						{
 							double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v;
 							double max_vel = _get_max_velocity_gap();
-							if(gap > max_vel-1 && gap < max_vel+1)
+							if(gap > max_vel)
 								return 1;
 						}
 					}

+ 0 - 3
sc_hypervisor/src/policies_utils/speed.c

@@ -155,16 +155,13 @@ double sc_hypervisor_get_velocity(struct sc_hypervisor_wrapper *sc_w, enum starp
 {
 
 	double velocity = sc_hypervisor_get_velocity_per_worker_type(sc_w, arch);
-	printf("arch %d vel %lf\n", arch, velocity);
 	if(velocity == -1.0)
 	{
 		velocity = sc_hypervisor_get_ref_velocity_per_worker_type(sc_w, arch);
-		printf("arch %d ref_vel %lf\n", arch, velocity);
 	}
 	if(velocity == -1.0)
 	{
 		velocity = arch == STARPU_CPU_WORKER ? 5.0 : 100.0;
-		printf("arch %d default_vel %lf\n", arch, velocity);
 	}
        
 	return velocity;

+ 1 - 0
sc_hypervisor/src/sc_hypervisor.c

@@ -632,6 +632,7 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 					if(sc_w->resize_ack.moved_workers[j] == worker)
 					{
 						only_remove = 1;
+						starpu_pthread_mutex_unlock(&sc_w->mutex);
 						break;
 					}
 			}

+ 1 - 2
src/common/thread.c

@@ -120,8 +120,7 @@ int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex)
 	file += sizeof(char);
 	_STARPU_TRACE_TRYLOCK_MUTEX(file,__LINE__);
 
-	xbt_mutex_acquire(*mutex);
-	return 0;
+	return xbt_mutex_acquire(*mutex);
 }
 
 static int used_key[MAX_TSD];

+ 0 - 21
src/core/sched_policy.c

@@ -638,27 +638,6 @@ pick:
 		}
 	  }
 
-#ifdef STARPU_USE_SC_HYPERVISOR
-	struct _starpu_sched_ctx *sched_ctx = NULL;
-	struct starpu_sched_ctx_performance_counters *perf_counters = NULL;
-	int j;
-	for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++)
-	{
-		sched_ctx = worker->sched_ctx[j];
-		if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
-		{
-			perf_counters = sched_ctx->perf_counters;
-			if(perf_counters != NULL && perf_counters->notify_idle_cycle && perf_counters->notify_idle_end)
-			{
-				if(!task)
-					perf_counters->notify_idle_cycle(sched_ctx->id, worker->workerid, 1.0);
-				else
-					perf_counters->notify_idle_end(sched_ctx->id, worker->workerid);
-			}
-		}
-	}
-#endif //STARPU_USE_SC_HYPERVISOR
-
 
 	if (!task)
 		return NULL;

+ 27 - 0
src/drivers/driver_common/driver_common.c

@@ -203,11 +203,38 @@ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *args, int wor
 
 		STARPU_PTHREAD_MUTEX_UNLOCK(&args->sched_mutex);
 
+#ifdef STARPU_USE_SC_HYPERVISOR
+		struct _starpu_sched_ctx *sched_ctx = NULL;
+		struct starpu_sched_ctx_performance_counters *perf_counters = NULL;
+		int j;
+		for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++)
+		{
+			sched_ctx = args->sched_ctx[j];
+			if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
+			{
+				perf_counters = sched_ctx->perf_counters;
+				if(perf_counters != NULL && perf_counters->notify_idle_cycle)
+				{
+					perf_counters->notify_idle_cycle(sched_ctx->id, args->workerid, 1.0);
+					
+				}
+			}
+		}
+#endif //STARPU_USE_SC_HYPERVISOR
+
 		return NULL;
 	}
 
 	STARPU_PTHREAD_MUTEX_UNLOCK(&args->sched_mutex);
 
+#ifdef STARPU_USE_SC_HYPERVISOR
+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
+	struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters;
+
+	if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_idle_end)
+		perf_counters->notify_idle_end(task->sched_ctx, args->workerid);
+#endif //STARPU_USE_SC_HYPERVISOR
+
 	if (_starpu_worker_get_status(workerid) == STATUS_SLEEPING)
 	{
 		_STARPU_TRACE_WORKER_SLEEP_END;