Browse Source

refactoring hypervisor policies

Andra Hugo 13 years ago
parent
commit
384fc510d9

+ 4 - 1
sched_ctx_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.c

@@ -219,7 +219,10 @@ void start_2ndbench(void (*bench)(float*, unsigned, unsigned))
 
 void construct_contexts(void (*bench)(float*, unsigned, unsigned))
 {
-	struct starpu_performance_counters *perf_counters = sched_ctx_hypervisor_init(IDLE_POLICY);
+	struct hypervisor_policy policy;
+	policy.custom = 0;
+	policy.name = "idle";
+	struct starpu_performance_counters *perf_counters = sched_ctx_hypervisor_init(&policy);
 	int nworkers1 = cpu1 + gpu + gpu1;
 	int nworkers2 = cpu2 + gpu + gpu2;
 	unsigned n_all_gpus = gpu + gpu1 + gpu2;

+ 11 - 13
sched_ctx_hypervisor/include/sched_ctx_hypervisor.h

@@ -71,8 +71,18 @@ struct sched_ctx_wrapper {
 	struct resize_ack resize_ack;
 };
 
+struct hypervisor_policy {
+	const char* name;
+	unsigned custom;
+	void (*handle_idle_cycle)(unsigned sched_ctx, int worker);
+	void (*handle_pushed_task)(unsigned sched_ctx, int worker);
+	void (*handle_poped_task)(unsigned sched_ctx, int worker);
+	void (*handle_idle_end)(unsigned sched_ctx, int worker);
+	void (*handle_post_exec_hook)(unsigned sched_ctx, struct starpu_htbl32_node_s* resize_requests, int task_tag);
+};
+
 
-struct starpu_performance_counters* sched_ctx_hypervisor_init(int type);
+struct starpu_performance_counters* sched_ctx_hypervisor_init(struct hypervisor_policy* policy);
 
 void sched_ctx_hypervisor_shutdown(void);
 
@@ -103,15 +113,3 @@ int sched_ctx_hypervisor_get_nsched_ctxs();
 struct sched_ctx_wrapper* sched_ctx_hypervisor_get_wrapper(unsigned sched_ctx);
 
 double sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(struct sched_ctx_wrapper* sc_w);
-/* hypervisor policies */
-#define IDLE_POLICY 1
-#define APP_DRIVEN_POLICY 2
-#define GFLOPS_RATE_POLICY 3
-
-struct hypervisor_policy {
-	void (*handle_idle_cycle)(unsigned sched_ctx, int worker);
-	void (*handle_pushed_task)(unsigned sched_ctx, int worker);
-	void (*handle_poped_task)(unsigned sched_ctx, int worker);
-	void (*handle_idle_end)(unsigned sched_ctx, int worker);
-	void (*handle_post_exec_hook)(unsigned sched_ctx, struct starpu_htbl32_node_s* resize_requests, int task_tag);
-};

+ 3 - 1
sched_ctx_hypervisor/src/hypervisor_policies/app_driven_policy.c

@@ -16,5 +16,7 @@ struct hypervisor_policy app_driven_policy = {
 	.handle_pushed_task = NULL,
 	.handle_idle_cycle = NULL,
 	.handle_idle_end = NULL,
-	.handle_post_exec_hook = app_driven_handle_post_exec_hook
+	.handle_post_exec_hook = app_driven_handle_post_exec_hook,
+	.custom = 0,
+	.name = "app_driven"
 };

+ 3 - 1
sched_ctx_hypervisor/src/hypervisor_policies/gflops_rate_policy.c

@@ -327,5 +327,7 @@ struct hypervisor_policy gflops_rate_policy = {
 	.handle_pushed_task = NULL,
 	.handle_idle_cycle = NULL,
 	.handle_idle_end = NULL,
-	.handle_post_exec_hook = NULL
+	.handle_post_exec_hook = NULL,
+	.custom = 0,
+	.name = "gflops_rate"
 };

+ 3 - 1
sched_ctx_hypervisor/src/hypervisor_policies/idle_policy.c

@@ -13,5 +13,7 @@ struct hypervisor_policy idle_policy = {
 	.handle_pushed_task = NULL,
 	.handle_idle_cycle = idle_handle_idle_cycle,
 	.handle_idle_end = NULL,
-	.handle_post_exec_hook = NULL
+	.handle_post_exec_hook = NULL,
+	.custom = 0,
+	.name = "idle"
 };

+ 90 - 80
sched_ctx_hypervisor/src/sched_ctx_hypervisor.c

@@ -3,44 +3,101 @@
 unsigned imposed_resize = 0;
 struct starpu_performance_counters* perf_counters = NULL;
 
-extern struct hypervisor_policy idle_policy;
-extern struct hypervisor_policy app_driven_policy;
-extern struct hypervisor_policy gflops_rate_policy;
-
 static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time);
 static void notify_pushed_task(unsigned sched_ctx, int worker);
 static void notify_poped_task(unsigned sched_ctx, int worker, double flops);
 static void notify_post_exec_hook(unsigned sched_ctx, int taskid);
 static void notify_idle_end(unsigned sched_ctx, int  worker);
 
-static void _load_hypervisor_policy(int type)
-{
-	struct hypervisor_policy *policy = NULL;
 
-	switch(type)
-	{
-	case IDLE_POLICY:
-		policy = &idle_policy;
-		break;
-	case APP_DRIVEN_POLICY:
-		policy = &app_driven_policy;
-		break;
-	case GFLOPS_RATE_POLICY:
-		policy = &gflops_rate_policy;
-		break;
+extern struct hypervisor_policy idle_policy;
+extern struct hypervisor_policy app_driven_policy;
+extern struct hypervisor_policy gflops_rate_policy;
 
-	}
+
+static struct hypervisor_policy *predefined_policies[] = {
+        &idle_policy,
+	&app_driven_policy,
+	&gflops_rate_policy
+};
+
+static void _load_hypervisor_policy(struct hypervisor_policy *policy)
+{
+        STARPU_ASSERT(policy);
+
+#ifdef STARPU_VERBOSE
+        if (policy->name)
+        {
+		_STARPU_DEBUG("Use %s hypervisor policy \n", policy->name);
+        }
+#endif
 
 	hypervisor.policy.handle_poped_task = policy->handle_poped_task;
 	hypervisor.policy.handle_pushed_task = policy->handle_pushed_task;
 	hypervisor.policy.handle_idle_cycle = policy->handle_idle_cycle;
 	hypervisor.policy.handle_idle_end = policy->handle_idle_end;
 	hypervisor.policy.handle_post_exec_hook = policy->handle_post_exec_hook;
+}
+
+
+static struct hypervisor_policy *_find_hypervisor_policy_from_name(const char *policy_name)
+{
+
+        if (!policy_name)
+                return NULL;
+
+        unsigned i;
+        for (i = 0; i < sizeof(predefined_policies)/sizeof(predefined_policies[0]); i++)
+        {
+                struct hypervisor_policy *p;
+                p = predefined_policies[i];
+                if (p->name)
+                {
+                        if (strcmp(policy_name, p->name) == 0) {
+                                /* we found a policy with the requested name */
+                                return p;
+                        }
+                }
+        }
+        fprintf(stderr, "Warning: hypervisor policy \"%s\" was not found, try \"help\" to get a list\n", policy_name);
 
+        /* nothing was found */
+        return NULL;
 }
 
+static struct hypervisor_policy *_select_hypervisor_policy(struct hypervisor_policy* hypervisor_policy)
+{
+	struct hypervisor_policy *selected_policy = NULL;
+
+	if(hypervisor_policy && hypervisor_policy->custom)
+		return hypervisor_policy;
+
+        /* we look if the application specified the name of a policy to load */
+        const char *policy_name;
+        if (hypervisor_policy && hypervisor_policy->name)
+        {
+                policy_name = hypervisor_policy->name;
+        }
+        else 
+	{
+                policy_name = getenv("HYPERVISOR_POLICY");
+        }
+
+        if (policy_name)
+                selected_policy = _find_hypervisor_policy_from_name(policy_name);
+
+        /* Perhaps there was no policy that matched the name */
+        if (selected_policy)
+                return selected_policy;
+
+        /* If no policy was specified, we use the idle policy as a default */
+
+        return &idle_policy;
+}
+
+
 /* initializez the performance counters that starpu will use to retrive hints for resizing */
-struct starpu_performance_counters** sched_ctx_hypervisor_init(int type)
+struct starpu_performance_counters** sched_ctx_hypervisor_init(struct hypervisor_policy *hypervisor_policy)
 {
 	hypervisor.min_tasks = 0;
 	hypervisor.nsched_ctxs = 0;
@@ -72,7 +129,8 @@ struct starpu_performance_counters** sched_ctx_hypervisor_init(int type)
 		}
 	}
 
-	_load_hypervisor_policy(type);
+	struct hypervisor_policy *selected_hypervisor_policy = _select_hypervisor_policy(hypervisor_policy);
+	_load_hypervisor_policy(selected_hypervisor_policy);
 
 	perf_counters = (struct starpu_performance_counters*)malloc(sizeof(struct starpu_performance_counters));
 	perf_counters->notify_idle_cycle = notify_idle_cycle;
@@ -219,7 +277,7 @@ static void _get_cpus(int *workers, int nworkers, int *cpus, int *ncpus)
 /* forbids another resize request before this one is take into account */
 void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int* workers_to_move, unsigned nworkers_to_move)
 {
-	if(nworkers_to_move > 0)
+	if(nworkers_to_move > 0 && hypervisor.resize[sender_sched_ctx])
 	{
 		int j;
 		printf("resize ctx %d with", sender_sched_ctx);
@@ -305,44 +363,14 @@ void sched_ctx_hypervisor_resize(unsigned sched_ctx, int task_tag)
 	_starpu_htbl_insert_32(&hypervisor.resize_requests[sched_ctx], (uint32_t)task_tag, (void*)sched_ctx);	
 }
 
-void get_overage_workers(unsigned sched_ctx, int *workerids, int nworkers, int *overage_workers, int *noverage_workers)
-{
-	struct worker_collection *workers = starpu_get_worker_collection_of_sched_ctx(sched_ctx);
-	int worker, i, found = -1;
-
-	if(workers->init_cursor)
-		workers->init_cursor(workers);
-
-	while(workers->has_next(workers))
-	{
-		worker = workers->get_next(workers);
-		for(i = 0; i < nworkers; i++)
-			if(workerids[i] == worker)
-			{
-				found = worker;
-				break;
-			}
-		if(found == -1)
-			overage_workers[(*noverage_workers)++]  = worker;
-		found = -1;
-	}
-
-	if(workers->init_cursor)
-		workers->deinit_cursor(workers);
-}
-
 /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */
 static void notify_idle_end(unsigned sched_ctx, int worker)
 {
 	if(hypervisor.resize[sched_ctx])
 		hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] = 0.0;
 
-	if(idle_policy.handle_idle_end)
-		idle_policy.handle_idle_end(sched_ctx, worker);
-	if(app_driven_policy.handle_idle_end)
-		app_driven_policy.handle_idle_end(sched_ctx, worker);
-	if(gflops_rate_policy.handle_idle_end)
-		gflops_rate_policy.handle_idle_end(sched_ctx, worker);
+	if(hypervisor.policy.handle_idle_end)
+		hypervisor.policy.handle_idle_end(sched_ctx, worker);
 }
 
 /* notifies the hypervisor that the worker spent another cycle in idle time */
@@ -355,13 +383,8 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 		{
 			sc_w->current_idle_time[worker] += idle_time;
 
-/* 			if(idle_policy.handle_idle_cycle) */
-/* 				idle_policy.handle_idle_cycle(sched_ctx, worker); */
-			if(app_driven_policy.handle_idle_cycle)
-				app_driven_policy.handle_idle_cycle(sched_ctx, worker);
-			if(gflops_rate_policy.handle_idle_cycle)
-				gflops_rate_policy.handle_idle_cycle(sched_ctx, worker);
-
+			if(hypervisor.policy.handle_idle_cycle)
+				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
 		}		
 		else if(sc_w->resize_ack.receiver_sched_ctx != -1)
 		{
@@ -384,13 +407,8 @@ static void notify_pushed_task(unsigned sched_ctx, int worker)
 	if(!imposed_resize && ntasks == hypervisor.min_tasks)
 		hypervisor.resize[sched_ctx] = 1;
 
-	if(idle_policy.handle_pushed_task)
-		idle_policy.handle_pushed_task(sched_ctx, worker);
-	if(app_driven_policy.handle_pushed_task)
-		app_driven_policy.handle_pushed_task(sched_ctx, worker);
-	if(gflops_rate_policy.handle_pushed_task)
-		gflops_rate_policy.handle_pushed_task(sched_ctx, worker);
-
+	if(hypervisor.policy.handle_pushed_task)
+		hypervisor.policy.handle_pushed_task(sched_ctx, worker);
 }
 
 /* notifies the hypervisor that a task was poped from the queue of the worker */
@@ -405,12 +423,8 @@ static void notify_poped_task(unsigned sched_ctx, int worker, double elapsed_flo
 		struct sched_ctx_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
 		if(hypervisor.resize[sched_ctx])
 		{
-			if(idle_policy.handle_poped_task)
-				idle_policy.handle_poped_task(sched_ctx, worker);
-			if(app_driven_policy.handle_poped_task)
-				app_driven_policy.handle_poped_task(sched_ctx, worker);
-/* 			if(gflops_rate_policy.handle_poped_task) */
-/* 				gflops_rate_policy.handle_poped_task(sched_ctx, worker); */
+			if(hypervisor.policy.handle_poped_task)
+				hypervisor.policy.handle_poped_task(sched_ctx, worker);
 		}
 		else if(sc_w->resize_ack.receiver_sched_ctx != -1)
 		{
@@ -447,12 +461,8 @@ static void notify_post_exec_hook(unsigned sched_ctx, int task_tag)
 		{
 			struct starpu_htbl32_node_s* resize_requests = hypervisor.resize_requests[sched_ctx];
 
-			if(idle_policy.handle_post_exec_hook)
-				idle_policy.handle_post_exec_hook(sched_ctx, resize_requests, task_tag);
-			if(app_driven_policy.handle_post_exec_hook)
-				app_driven_policy.handle_post_exec_hook(sched_ctx, resize_requests, task_tag);
-			if(gflops_rate_policy.handle_post_exec_hook)
-				gflops_rate_policy.handle_post_exec_hook(sched_ctx, resize_requests, task_tag);
+			if(hypervisor.policy.handle_post_exec_hook)
+				hypervisor.policy.handle_post_exec_hook(sched_ctx, resize_requests, task_tag);
 		}
 		else if(sc_w->resize_ack.receiver_sched_ctx != -1)
 		{