Explorar o código

workers must release the scheduling mutex before taking the role of hypervisor, update triggering issues for the other resizing strategies too

Andra Hugo %!s(int64=12) %!d(string=hai) anos
pai
achega
6c954c729c

+ 72 - 45
sc_hypervisor/src/hypervisor_policies/debit_lp_policy.c

@@ -225,67 +225,94 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double w_in_
 }
 
 
-static void debit_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
+static void _try_resizing(void)
 {
-	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
-	sc_hypervisor_get_velocity_per_worker(sc_w, worker);
-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
-	if(ret != EBUSY)
+	int ns = sc_hypervisor_get_nsched_ctxs();
+	int nw = starpu_worker_get_count(); /* Number of different workers */
+	
+	double w_in_s[ns][nw];
+	unsigned found_sol = _compute_max_velocity(ns, nw,  w_in_s, NULL, NULL);
+	/* if we did find at least one solution redistribute the resources */
+	if(found_sol)
 	{
-		if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker))
+		int w, s;
+		double nworkers[ns][2];
+		int nworkers_rounded[ns][2];
+		for(s = 0; s < ns; s++)
 		{
-			int ns = sc_hypervisor_get_nsched_ctxs();
-			int nw = starpu_worker_get_count(); /* Number of different workers */
-
-			double w_in_s[ns][nw];
-			unsigned found_sol = _compute_max_velocity(ns, nw,  w_in_s, NULL, NULL);
-			/* if we did find at least one solution redistribute the resources */
-			if(found_sol)
+			nworkers[s][0] = 0.0;
+			nworkers[s][1] = 0.0;
+			nworkers_rounded[s][0] = 0;
+			nworkers_rounded[s][1] = 0;
+			
+		}
+		
+		for(s = 0; s < ns; s++)
+		{
+			for(w = 0; w < nw; w++)
 			{
-				int w, s;
-				double nworkers[ns][2];
-				int nworkers_rounded[ns][2];
-				for(s = 0; s < ns; s++)
+				enum starpu_worker_archtype arch = starpu_worker_get_type(w);
+				
+				if(arch == STARPU_CUDA_WORKER)
 				{
-					nworkers[s][0] = 0.0;
-					nworkers[s][1] = 0.0;
-					nworkers_rounded[s][0] = 0;
-					nworkers_rounded[s][1] = 0;
-
+					nworkers[s][0] += w_in_s[s][w];
+					if(w_in_s[s][w] >= 0.3)
+						nworkers_rounded[s][0]++;
 				}
-
-				for(s = 0; s < ns; s++)
+				else
 				{
-					for(w = 0; w < nw; w++)
-					{
-						enum starpu_worker_archtype arch = starpu_worker_get_type(w);
-
-						if(arch == STARPU_CUDA_WORKER)
-						{
-							nworkers[s][0] += w_in_s[s][w];
-							if(w_in_s[s][w] >= 0.3)
-								nworkers_rounded[s][0]++;
-						}
-						else
-						{
-							nworkers[s][1] += w_in_s[s][w];
-							if(w_in_s[s][w] > 0.5)
-								nworkers_rounded[s][1]++;
-						}
-					}
+					nworkers[s][1] += w_in_s[s][w];
+					if(w_in_s[s][w] > 0.5)
+						nworkers_rounded[s][1]++;
 				}
+			}
+		}
 /* 				for(s = 0; s < ns; s++) */
 /* 					printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
 /* 					       nworkers_rounded[s][1], nworkers_rounded[s][0]); */
+		
+		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
+		
+	}
+}
 
-				sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
-
+static void debit_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
+{
+	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+        if(ret != EBUSY)
+	{
+		unsigned criteria = sc_hypervisor_get_resize_criteria();
+		if(criteria != SC_NOTHING && criteria == SC_VELOCITY)
+		{
+			if(sc_hypervisor_check_velocity_gap_btw_ctxs())
+			{
+				_try_resizing();
 			}
 		}
-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+                starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 	}
 }
 
+static debit_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
+{
+	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+        if(ret != EBUSY)
+	{
+                unsigned criteria = sc_hypervisor_get_resize_criteria();
+                if(criteria != SC_NOTHING && criteria == SC_IDLE)
+                {
+
+			if(sc_hypervisor_check_idle(sched_ctx, worker))
+                        {
+                                _try_resizing();
+//                              sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);                                                                                                               \
+                                                                                                                                                                                                                    
+                        }
+                }
+                starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+        }
+}
+
 static void debit_lp_end_ctx(unsigned sched_ctx)
 {
 	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
@@ -300,7 +327,7 @@ struct sc_hypervisor_policy debit_lp_policy = {
 	.size_ctxs = NULL,
 	.handle_poped_task = debit_lp_handle_poped_task,
 	.handle_pushed_task = NULL,
-	.handle_idle_cycle = NULL,
+	.handle_idle_cycle = debit_lp_handle_idle_cycle,
 	.handle_idle_end = NULL,
 	.handle_post_exec_hook = NULL,
 	.handle_submitted_job = NULL,

+ 83 - 58
sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c

@@ -325,77 +325,102 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 	return found_sol;
 }
 
-
-
-static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
+static void _try_resizing(void)
 {
-	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
-	sc_hypervisor_get_velocity_per_worker(sc_w, worker);
-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
-	if(ret != EBUSY)
+	int ns = sc_hypervisor_get_nsched_ctxs();
+	int nw = starpu_worker_get_count(); /* Number of different workers */
+	
+	double w_in_s[ns][nw];
+//			double flops_on_w[ns][nw];
+	double **flops_on_w = (double**)malloc(ns*sizeof(double*));
+	int i;
+	for(i = 0; i < ns; i++)
+		flops_on_w[i] = (double*)malloc(nw*sizeof(double));
+	
+	unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw,  w_in_s, flops_on_w, NULL, NULL);
+	/* if we did find at least one solution redistribute the resources */
+	if(found_sol)
 	{
-		if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker))
+		int w, s;
+		double nworkers[ns][2];
+		int nworkers_rounded[ns][2];
+		for(s = 0; s < ns; s++)
 		{
-			int ns = sc_hypervisor_get_nsched_ctxs();
-			int nw = starpu_worker_get_count(); /* Number of different workers */
-
-			double w_in_s[ns][nw];
-//			double flops_on_w[ns][nw];
-			double **flops_on_w = (double**)malloc(ns*sizeof(double*));
-			int i;
-			for(i = 0; i < ns; i++)
-				flops_on_w[i] = (double*)malloc(nw*sizeof(double));
-
-			unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw,  w_in_s, flops_on_w, NULL, NULL);
-			/* if we did find at least one solution redistribute the resources */
-			if(found_sol)
+			nworkers[s][0] = 0.0;
+			nworkers[s][1] = 0.0;
+			nworkers_rounded[s][0] = 0;
+			nworkers_rounded[s][1] = 0;
+			
+		}
+		
+		for(s = 0; s < ns; s++)
+		{
+			for(w = 0; w < nw; w++)
 			{
-				int w, s;
-				double nworkers[ns][2];
-				int nworkers_rounded[ns][2];
-				for(s = 0; s < ns; s++)
+				enum starpu_worker_archtype arch = starpu_worker_get_type(w);
+				
+				if(arch == STARPU_CUDA_WORKER)
 				{
-					nworkers[s][0] = 0.0;
-					nworkers[s][1] = 0.0;
-					nworkers_rounded[s][0] = 0;
-					nworkers_rounded[s][1] = 0;
-
+					nworkers[s][0] += w_in_s[s][w];
+					if(w_in_s[s][w] >= 0.3)
+						nworkers_rounded[s][0]++;
 				}
-
-				for(s = 0; s < ns; s++)
+				else
 				{
-					for(w = 0; w < nw; w++)
-					{
-						enum starpu_worker_archtype arch = starpu_worker_get_type(w);
-
-						if(arch == STARPU_CUDA_WORKER)
-						{
-							nworkers[s][0] += w_in_s[s][w];
-							if(w_in_s[s][w] >= 0.3)
-								nworkers_rounded[s][0]++;
-						}
-						else
-						{
-							nworkers[s][1] += w_in_s[s][w];
-							if(w_in_s[s][w] > 0.5)
-								nworkers_rounded[s][1]++;
-						}
-					}
+					nworkers[s][1] += w_in_s[s][w];
+					if(w_in_s[s][w] > 0.5)
+						nworkers_rounded[s][1]++;
 				}
+			}
+		}
 /* 				for(s = 0; s < ns; s++) */
 /* 					printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
 /* 					       nworkers_rounded[s][1], nworkers_rounded[s][0]); */
-
-				sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
-			}
-			for(i = 0; i < ns; i++)
-				free(flops_on_w[i]);
-			free(flops_on_w);
-		}
-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+		
+		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
 	}
+	for(i = 0; i < ns; i++)
+		free(flops_on_w[i]);
+	free(flops_on_w);
+}
+
+static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
+{
+        int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+        if(ret != EBUSY)
+        {
+                unsigned criteria = sc_hypervisor_get_resize_criteria();
+                if(criteria != SC_NOTHING && criteria == SC_VELOCITY)
+                {
+                        if(sc_hypervisor_check_velocity_gap_btw_ctxs())
+                        {
+                                _try_resizing();
+                        }
+                }
+                starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+        }
 }
 
+static ispeed_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
+{
+        int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+        if(ret != EBUSY)
+        {
+                unsigned criteria = sc_hypervisor_get_resize_criteria();
+                if(criteria != SC_NOTHING && criteria == SC_IDLE)
+                {
+
+			if(sc_hypervisor_check_idle(sched_ctx, worker))
+                        {
+                                _try_resizing();
+//                              sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);                                                                                                                
+                        }
+                }
+                starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+        }
+}
+
+
 static void ispeed_lp_end_ctx(unsigned sched_ctx)
 {
 	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
@@ -410,7 +435,7 @@ struct sc_hypervisor_policy ispeed_lp_policy = {
 	.size_ctxs = NULL,
 	.handle_poped_task = ispeed_lp_handle_poped_task,
 	.handle_pushed_task = NULL,
-	.handle_idle_cycle = NULL,
+	.handle_idle_cycle = ispeed_lp_handle_idle_cycle,
 	.handle_idle_end = NULL,
 	.handle_post_exec_hook = NULL,
 	.handle_submitted_job = NULL,

+ 7 - 2
sc_hypervisor/src/policies_utils/policy_tools.c

@@ -498,6 +498,7 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 		}
 	}
 
+/*if an optimal speed has not been computed yet do it now */
 	if(!has_opt_v)
 	{
 		int nw = 1;
@@ -533,6 +534,8 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 		}
 	}
 
+/* if we have an optimal speed for each type of worker compare the monitored one with the 
+   theoretical one */
 	if(has_opt_v)
 	{
 		for(i = 0; i < nsched_ctxs; i++)
@@ -553,7 +556,9 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 				return 1;
 		}
 	}
-	else
+	else /* if we have not been able to compute a theoretical velocity consider the env variable
+		SC_MAX_VELOCITY_GAP and compare the speed of the contexts, whenever the difference
+		btw them is greater than the max value the function returns true */
 	{
 		for(i = 0; i < nsched_ctxs; i++)
 		{
@@ -575,7 +580,7 @@ unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 						{
 							double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v;
 							double max_vel = _get_max_velocity_gap();
-							if(gap > max_vel-1 && gap < max_vel+1)
+							if(gap > max_vel)
 								return 1;
 						}
 					}

+ 0 - 3
sc_hypervisor/src/policies_utils/speed.c

@@ -155,16 +155,13 @@ double sc_hypervisor_get_velocity(struct sc_hypervisor_wrapper *sc_w, enum starp
 {
 
 	double velocity = sc_hypervisor_get_velocity_per_worker_type(sc_w, arch);
-	printf("arch %d vel %lf\n", arch, velocity);
 	if(velocity == -1.0)
 	{
 		velocity = sc_hypervisor_get_ref_velocity_per_worker_type(sc_w, arch);
-		printf("arch %d ref_vel %lf\n", arch, velocity);
 	}
 	if(velocity == -1.0)
 	{
 		velocity = arch == STARPU_CPU_WORKER ? 5.0 : 100.0;
-		printf("arch %d default_vel %lf\n", arch, velocity);
 	}
        
 	return velocity;

+ 1 - 0
sc_hypervisor/src/sc_hypervisor.c

@@ -632,6 +632,7 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 					if(sc_w->resize_ack.moved_workers[j] == worker)
 					{
 						only_remove = 1;
+						starpu_pthread_mutex_unlock(&sc_w->mutex);
 						break;
 					}
 			}

+ 1 - 2
src/common/thread.c

@@ -120,8 +120,7 @@ int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex)
 	file += sizeof(char);
 	_STARPU_TRACE_TRYLOCK_MUTEX(file,__LINE__);
 
-	xbt_mutex_acquire(*mutex);
-	return 0;
+	return xbt_mutex_acquire(*mutex);
 }
 
 static int used_key[MAX_TSD];

+ 0 - 21
src/core/sched_policy.c

@@ -638,27 +638,6 @@ pick:
 		}
 	  }
 
-#ifdef STARPU_USE_SC_HYPERVISOR
-	struct _starpu_sched_ctx *sched_ctx = NULL;
-	struct starpu_sched_ctx_performance_counters *perf_counters = NULL;
-	int j;
-	for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++)
-	{
-		sched_ctx = worker->sched_ctx[j];
-		if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
-		{
-			perf_counters = sched_ctx->perf_counters;
-			if(perf_counters != NULL && perf_counters->notify_idle_cycle && perf_counters->notify_idle_end)
-			{
-				if(!task)
-					perf_counters->notify_idle_cycle(sched_ctx->id, worker->workerid, 1.0);
-				else
-					perf_counters->notify_idle_end(sched_ctx->id, worker->workerid);
-			}
-		}
-	}
-#endif //STARPU_USE_SC_HYPERVISOR
-
 
 	if (!task)
 		return NULL;

+ 27 - 0
src/drivers/driver_common/driver_common.c

@@ -203,11 +203,38 @@ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *args, int wor
 
 		STARPU_PTHREAD_MUTEX_UNLOCK(&args->sched_mutex);
 
+#ifdef STARPU_USE_SC_HYPERVISOR
+		struct _starpu_sched_ctx *sched_ctx = NULL;
+		struct starpu_sched_ctx_performance_counters *perf_counters = NULL;
+		int j;
+		for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++)
+		{
+			sched_ctx = args->sched_ctx[j];
+			if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
+			{
+				perf_counters = sched_ctx->perf_counters;
+				if(perf_counters != NULL && perf_counters->notify_idle_cycle)
+				{
+					perf_counters->notify_idle_cycle(sched_ctx->id, args->workerid, 1.0);
+					
+				}
+			}
+		}
+#endif //STARPU_USE_SC_HYPERVISOR
+
 		return NULL;
 	}
 
 	STARPU_PTHREAD_MUTEX_UNLOCK(&args->sched_mutex);
 
+#ifdef STARPU_USE_SC_HYPERVISOR
+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
+	struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters;
+
+	if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_idle_end)
+		perf_counters->notify_idle_end(task->sched_ctx, args->workerid);
+#endif //STARPU_USE_SC_HYPERVISOR
+
 	if (_starpu_worker_get_status(workerid) == STATUS_SLEEPING)
 	{
 		_STARPU_TRACE_WORKER_SLEEP_END;