瀏覽代碼

fix hierarchical resizing of contexts

Andra Hugo 11 年之前
父節點
當前提交
491a6c6124

+ 1 - 0
include/starpu_worker.h

@@ -107,6 +107,7 @@ int starpu_worker_get_mp_nodeid(int id);
 
 struct starpu_tree* starpu_workers_get_tree(void);
 
+unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx);
 #ifdef __cplusplus
 }
 #endif

+ 89 - 15
sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c

@@ -25,10 +25,10 @@ int resize_no = 0;
 static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 {
 	/* for vite */
-	printf("resize_no = %d\n", resize_no);
 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
+	printf("resize_no = %d %d ctxs\n", resize_no, ns);
 
-//	if(ns <= 1) return;
+	if(ns <= 0) return;
 
 	unsigned *curr_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
 	unsigned curr_nworkers = nworkers == -1 ? starpu_worker_get_count() : (unsigned)nworkers;
@@ -39,7 +39,8 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 
 	int total_nw[nw];
 	sc_hypervisor_group_workers_by_type(tw, total_nw);
-	
+	unsigned can_redistrib = 0;
+
 	
 	struct timeval start_time;
 	struct timeval end_time;
@@ -55,7 +56,7 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 	
 	if(vmax != 0.0)
 	{
-		int nworkers_per_ctx_rounded[nsched_ctxs][nw];
+		int nworkers_per_ctx_rounded[ns][nw];
 		sc_hypervisor_lp_round_double_to_int(ns, nw, nworkers_per_ctx, nworkers_per_ctx_rounded);
 //		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, curr_sched_ctxs, tw);
 		sc_hypervisor_lp_distribute_resources_in_ctxs(curr_sched_ctxs, ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, workers, curr_nworkers, tw);
@@ -85,6 +86,7 @@ static void _try_resizing_hierarchically(unsigned levels, unsigned current_level
 			_try_resizing_hierarchically(levels-1, current_level+1, sched_ctxs_child, nsched_ctxs_child, pus_father, npus_father);
 
 			free(pus_father);
+			free(sched_ctxs_child);
 		}
 	}
 	return;
@@ -152,6 +154,12 @@ static void _resize_if_speed_diff(unsigned sched_ctx, int worker)
 	{
 		
 		unsigned current_level = starpu_sched_ctx_get_hierarchy_level(sched_ctx);
+		if(current_level == 0)
+		{
+			_resize(NULL, -1, NULL, -1);			
+			return;
+		}
+
 		unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx);
 		int level;
 		int *pus_father_old = NULL;
@@ -163,27 +171,44 @@ static void _resize_if_speed_diff(unsigned sched_ctx, int worker)
 
 		for(level = current_level ; level >= 0; level--)
 		{
-			int *pus_father;
-			unsigned npus_father = 0;
-			npus_father = starpu_sched_ctx_get_workers_list(father, &pus_father);
+			int *pus_father = NULL;
+			int npus_father = -1;
+			if(level > 0)
+				npus_father = starpu_sched_ctx_get_workers_list(father, &pus_father);
 			
 			
-			unsigned *sched_ctxs;
-			int nsched_ctxs;
+			unsigned *sched_ctxs = NULL;
+			int nsched_ctxs = 0;
 			is_speed_diff = sc_hypervisor_check_speed_gap_btw_ctxs_on_level(level, pus_father, npus_father, father, &sched_ctxs, &nsched_ctxs);
 			if(!is_speed_diff)
 			{
 				if(level == current_level)
+				{
+					if(pus_father)
+						free(pus_father);
+					if(sched_ctxs)
+						free(sched_ctxs);
+					pus_father = NULL;
+					sched_ctxs = NULL;
 					break;
+				}
 				else
 				{
 					_resize(sched_ctxs_old, nsched_ctxs_old, pus_father_old, npus_father_old);
-					free(pus_father_old);
-					free(sched_ctxs_old);
+
+					if(pus_father_old)
+						free(pus_father_old);
+					if(sched_ctxs_old)
+						free(sched_ctxs_old);
 					pus_father_old = NULL;
 					sched_ctxs_old = NULL;
-					free(pus_father);
-					free(sched_ctxs);
+
+					if(pus_father)
+						free(pus_father);
+					if(nsched_ctxs > 0)
+						free(sched_ctxs);
+					pus_father = NULL;
+					sched_ctxs = NULL;
 					break;
 				}
 			}	
@@ -191,15 +216,21 @@ static void _resize_if_speed_diff(unsigned sched_ctx, int worker)
 				free(pus_father_old);
 			if(sched_ctxs_old)
 				free(sched_ctxs_old);
+			
 			pus_father_old = pus_father;
 			sched_ctxs_old = sched_ctxs;
 			npus_father_old = npus_father;
 			nsched_ctxs_old = nsched_ctxs;
 			
-			father = starpu_sched_ctx_get_inheritor(father);
+			father = level > 1 ? starpu_sched_ctx_get_inheritor(father) : STARPU_NMAX_SCHED_CTXS;
 		}
 		if(is_speed_diff)
 		{
+			if(pus_father_old)
+				free(pus_father_old);
+			if(sched_ctxs_old)
+				free(sched_ctxs_old);
+
 			_resize(NULL, -1, NULL, -1);
 		}
 	}
@@ -207,6 +238,7 @@ static void _resize_if_speed_diff(unsigned sched_ctx, int worker)
 	{
 		_resize(NULL, -1, NULL, -1);
 	}
+	return;
 }
 
 static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, 
@@ -242,6 +274,48 @@ static void feft_lp_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *worker
 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 }
 
+static void _resize_leaves(int worker)
+{
+	unsigned s, s2;
+	unsigned *sched_ctxs = NULL;
+	unsigned nsched_ctxs = starpu_worker_get_sched_ctx_list(worker, &sched_ctxs);
+       	unsigned workers_sched_ctxs[nsched_ctxs];
+	unsigned nworkers_sched_ctxs = 0;
+
+	struct sc_hypervisor_wrapper *sc_w = NULL;
+	for(s = 0; s < nsched_ctxs; s++)
+	{
+		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
+		if(sc_w->sched_ctx != STARPU_NMAX_SCHED_CTXS)
+		{
+			workers_sched_ctxs[nworkers_sched_ctxs++] = sched_ctxs[s];
+		}
+	}
+
+	free(sched_ctxs);
+
+	unsigned leaves[nsched_ctxs];
+	unsigned nleaves = 0;
+	for(s = 0; s < nworkers_sched_ctxs; s++)
+	{
+		unsigned is_someones_father = 0;
+		for(s2 = 0; s2 < nworkers_sched_ctxs; s2++)
+		{
+			unsigned father = starpu_sched_ctx_get_inheritor(workers_sched_ctxs[s2]);
+			if(workers_sched_ctxs[s] == father)
+			{
+				is_someones_father = 1;
+				break;
+			}
+		}
+		if(!is_someones_father)
+			leaves[nleaves++] = workers_sched_ctxs[s];
+	}
+
+	for(s = 0; s < nleaves; s++)
+		_resize_if_speed_diff(leaves[s], worker);
+}
+
 static void feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 {
 	unsigned criteria = sc_hypervisor_get_resize_criteria();
@@ -251,7 +325,7 @@ static void feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 		if(ret != EBUSY)
 		{
 //			printf("trigger idle \n");
-			_resize_if_speed_diff(sched_ctx, worker);
+			_resize_leaves(worker);
 			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 		}
 	}

+ 16 - 0
sc_hypervisor/src/policies_utils/lp_tools.c

@@ -77,6 +77,22 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
 	}
 
+	if(nsched_ctxs == 1)
+	{
+		int w;
+		for(w = 0; w < nw; w++)
+			res[0][w] = total_nw[w];
+		double optimal_v = 0.0;
+#ifdef STARPU_USE_CUDA
+		optimal_v = res[0][0] * v[0][0] + res[0][1]* v[0][1];
+#else
+		optimal_v = res[0][0] * v[0][0];
+#endif //STARPU_USE_CUDA
+		_set_optimal_v(sched_ctxs[0], optimal_v);
+		return 1.0;
+	}
+
+
 	unsigned tmp_sched_ctxs[STARPU_NMAX_SCHED_CTXS];
 	double tmp_flops[STARPU_NMAX_SCHED_CTXS];
 	double tmp_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];

+ 1 - 2
sc_hypervisor/src/policies_utils/policy_tools.c

@@ -594,8 +594,7 @@ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(unsigned *sched_ctxs_in, int ns_
 unsigned sc_hypervisor_check_speed_gap_btw_ctxs_on_level(int level, int *workers_in, int nworkers_in, unsigned father_sched_ctx_id, unsigned **sched_ctxs, int *nsched_ctxs)
 {
 	sc_hypervisor_get_ctxs_on_level(sched_ctxs, nsched_ctxs, level, father_sched_ctx_id);
-	
-	
+
 	if(*nsched_ctxs  > 0)
 		return sc_hypervisor_check_speed_gap_btw_ctxs(*sched_ctxs, *nsched_ctxs, workers_in, nworkers_in);
 	return 0;	

+ 26 - 33
sc_hypervisor/src/sc_hypervisor.c

@@ -352,8 +352,17 @@ static void _rearange_sched_ctxs(unsigned *sched_ctxs, int old_nsched_ctxs)
 /* unregistered contexts will no longer be resized */
 void sc_hypervisor_unregister_ctx(unsigned sched_ctx)
 {
+	printf("unregister ctx %d with remaining flops %lf \n", hypervisor.sched_ctx_w[sched_ctx].sched_ctx, hypervisor.sched_ctx_w[sched_ctx].remaining_flops);
 	if(hypervisor.policy.end_ctx)
 		hypervisor.policy.end_ctx(sched_ctx);
+
+	unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx);
+	int *pus;
+	unsigned npus = starpu_sched_ctx_get_workers_list(sched_ctx, &pus);
+
+	starpu_sched_ctx_set_priority(pus, npus, father, 1);
+	free(pus);
+
 	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
 	unsigned i;
 	for(i = 0; i < hypervisor.nsched_ctxs; i++)
@@ -612,7 +621,6 @@ void sc_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworke
 			printf(" %d", workers_to_add[j]);
 		printf("\n");
 		starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, sched_ctx);
-		starpu_sched_ctx_set_priority(workers_to_add, nworkers_to_add, sched_ctx, 1);
 		struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(sched_ctx);
 		unsigned i;
 		for(i = 0; i < nworkers_to_add; i++)
@@ -1012,47 +1020,32 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 	{
 		if(sc_w->sched_ctx != STARPU_NMAX_SCHED_CTXS && sc_w->hyp_react_start_time != 0.0)
 		{
-			if(sc_hypervisor_check_idle(sched_ctx, worker))
+			unsigned idle_everywhere = 0;
+			int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+			if(ret != EBUSY)
 			{
-				unsigned sched_ctxs_idle[STARPU_NMAX_SCHED_CTXS];
-				unsigned nsched_ctxs_idle = 0;
-				unsigned idle_everywhere = 1;
-				int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
-				if(ret != EBUSY)
+				if(sc_hypervisor_check_idle(sched_ctx, worker))
 				{
-					int j;
-					int ns = hypervisor.nsched_ctxs;
-					for(j = 0; j < ns; j++)
+					idle_everywhere = 1;
+				
+					unsigned *sched_ctxs = NULL;
+					unsigned nsched_ctxs = starpu_worker_get_sched_ctx_list(worker, &sched_ctxs);
+					int s;
+					for(s = 0; s < nsched_ctxs; s++)
 					{
-						if(hypervisor.sched_ctxs[j] != STARPU_NMAX_SCHED_CTXS && 
-						   starpu_sched_ctx_contains_worker(worker, hypervisor.sched_ctxs[j]))
+						if(hypervisor.sched_ctx_w[sched_ctxs[s]].sched_ctx != STARPU_NMAX_SCHED_CTXS)
 						{
-							if(!sc_hypervisor_check_idle(hypervisor.sched_ctxs[j], worker))
+							if(!sc_hypervisor_check_idle(sched_ctxs[s], worker))
 								idle_everywhere = 0;
-							else
-								sched_ctxs_idle[nsched_ctxs_idle++] = hypervisor.sched_ctxs[j];
 						}
 					}
-					if(idle_everywhere)
-					{
-/* 						unsigned other_ctx = choose_ctx_to_steal(worker); */
-/* 						if(other_ctx != STARPU_NMAX_SCHED_CTXS) */
-/* 						{ */
-/* 							sc_hypervisor_add_workers_to_sched_ctx(&worker, 1, other_ctx); */
-/* 							starpu_sched_ctx_set_priority(&worker, 1, other_ctx, 0); */
-/* 							_sc_hypervisor_allow_compute_idle(other_ctx, worker, 0); */
-/* 						} */
-					}
-					starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+					free(sched_ctxs);
 				}
-				if(idle_everywhere)
-				{
-					unsigned s;
-					for(s = 0; s < nsched_ctxs_idle; s++)
-						hypervisor.policy.handle_idle_cycle(sched_ctxs_idle[s], worker);
-				}
-//				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
+				starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 			}
+			
+			if(idle_everywhere)
+				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
 		}
 	}
 	return;

+ 15 - 0
src/core/sched_ctx.c

@@ -605,6 +605,7 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 	   !(nworkers_ctx == nworkers && nworkers_ctx == inheritor_sched_ctx->workers->nworkers))
 	{
 		starpu_sched_ctx_add_workers(workerids, nworkers_ctx, inheritor_sched_ctx_id);
+		starpu_sched_ctx_set_priority(workerids, nworkers_ctx, inheritor_sched_ctx_id, 1);
 	}
 
 	if(!_starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx_id))
@@ -698,6 +699,18 @@ void _starpu_fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx
 	return;
 
 }
+
+static void _set_priority_hierarchically(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority)
+{
+	if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
+	{
+		unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx);
+		starpu_sched_ctx_set_priority(workers_to_add, nworkers_to_add, father, priority);
+		_set_priority_hierarchically(workers_to_add, nworkers_to_add, father, priority);
+	}
+	return;
+}
+
 void starpu_sched_ctx_add_workers(int *workers_to_add, int nworkers_to_add, unsigned sched_ctx_id)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
@@ -720,11 +733,13 @@ void starpu_sched_ctx_add_workers(int *workers_to_add, int nworkers_to_add, unsi
 		{
 			_starpu_update_workers_with_ctx(added_workers, n_added_workers, sched_ctx->id);
 		}
+		_set_priority_hierarchically(workers_to_add, nworkers_to_add, sched_ctx_id, 0);
 
 	}
 
 	STARPU_PTHREAD_RWLOCK_UNLOCK(&changing_ctx_mutex[sched_ctx_id]);
 
+
 	_starpu_relock_mutex_if_prev_locked();
 
 	if(sched_ctx->id != STARPU_NMAX_SCHED_CTXS)

+ 14 - 0
src/core/workers.c

@@ -1863,3 +1863,17 @@ unsigned _starpu_worker_mutex_is_sched_mutex(int workerid, starpu_pthread_mutex_
 	struct _starpu_worker *w = _starpu_get_worker_struct(workerid);
 	return &w->sched_mutex == mutex;
 }
+
+unsigned starpu_worker_get_sched_ctx_list(int workerid, unsigned **sched_ctxs)
+{
+	unsigned s = 0;
+	unsigned nsched_ctxs = _starpu_worker_get_nsched_ctxs(workerid);
+	*sched_ctxs = (unsigned*)malloc(nsched_ctxs*sizeof(unsigned));
+	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
+	struct _starpu_sched_ctx_list *l = NULL;
+	for (l = worker->sched_ctx_list; l; l = l->next)
+	{
+		(*sched_ctxs)[s++] = l->sched_ctx;
+	}
+	return nsched_ctxs;
+}

+ 1 - 1
src/sched_policies/eager_central_policy.c

@@ -35,7 +35,7 @@ struct _starpu_eager_center_policy_data
 
 static void initialize_eager_center_policy(unsigned sched_ctx_id)
 {
-	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
+	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_TREE);
 
 	struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)malloc(sizeof(struct _starpu_eager_center_policy_data));