瀏覽代碼

fixes for triggering the resize

Andra Hugo 12 年之前
父節點
當前提交
76282f234b

+ 13 - 1
sc_hypervisor/include/sc_hypervisor_policy.h

@@ -1,4 +1,4 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
+/* StarPUf --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2012  INRIA
  *
@@ -27,6 +27,9 @@ extern "C"
 
 #define HYPERVISOR_REDIM_SAMPLE 0.02
 #define HYPERVISOR_START_REDIM_SAMPLE 0.1
+#define SC_NOTHING 0
+#define SC_IDLE 1
+#define SC_VELOCITY 2
 
 struct sc_hypervisor_policy_task_pool
 {
@@ -94,6 +97,15 @@ void sc_hypervisor_group_workers_by_type(int *workers, int nworkers, int ntypes_
 /* check if we trigger resizing or not */
 unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker);
 
+/* check if worker was idle long enough */
+unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker);
+
+/* check if there is a velocity gap btw ctxs */
+unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void);
+
+/* check what triggers resizing (idle, velocity, etc.)*/
+unsigned sc_hypervisor_get_resize_criteria();
+
 #ifdef __cplusplus
 }
 #endif

+ 59 - 34
sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c

@@ -15,49 +15,61 @@
  */
 
 #include "sc_hypervisor_lp.h"
+#include "sc_hypervisor_policy.h"
 #include <starpu_config.h>
 #include <sys/time.h>
 
 #ifdef STARPU_HAVE_GLPK_H
-static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
+static void _try_resizing(void)
 {
-	if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker))
+	int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
+	
+	double nworkers[nsched_ctxs][2];
+	
+	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+	if(ret != EBUSY)
 	{
-		int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
-
-		double nworkers[nsched_ctxs][2];
-
-		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
-		if(ret != EBUSY)
-		{
-			int nw = 1;
+		starpu_trace_user_event(2);
+		int nw = 1;
 #ifdef STARPU_USE_CUDA
-			int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
-			nw = ncuda != 0 ? 2 : 1;
+		int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
+		nw = ncuda != 0 ? 2 : 1;
 #endif
-			int total_nw[nw];
-			sc_hypervisor_group_workers_by_type(NULL, -1, nw, total_nw);
-
-
-			struct timeval start_time;
-			struct timeval end_time;
-			gettimeofday(&start_time, NULL);
-
-			double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, nworkers, total_nw);
-			gettimeofday(&end_time, NULL);
+		int total_nw[nw];
+		sc_hypervisor_group_workers_by_type(NULL, -1, nw, total_nw);
+		
+		
+		struct timeval start_time;
+		struct timeval end_time;
+		gettimeofday(&start_time, NULL);
+		
+		double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, nworkers, total_nw);
+		gettimeofday(&end_time, NULL);
+		
+		long diff_s = end_time.tv_sec  - start_time.tv_sec;
+		long diff_us = end_time.tv_usec  - start_time.tv_usec;
+		
+		float timing = (float)(diff_s*1000000 + diff_us)/1000;
 
-			long diff_s = end_time.tv_sec  - start_time.tv_sec;
-			long diff_us = end_time.tv_usec  - start_time.tv_usec;
-
-			float timing = (float)(diff_s*1000000 + diff_us)/1000;
+		if(vmax != 0.0)
+		{
+			int nworkers_rounded[nsched_ctxs][nw];
+			sc_hypervisor_lp_round_double_to_int(nsched_ctxs, nw, nworkers, nworkers_rounded);
+			sc_hypervisor_lp_redistribute_resources_in_ctxs(nsched_ctxs, nw, nworkers_rounded, nworkers);
+		}
+		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+	}
+	
+}
+static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
+{
+	unsigned criteria = sc_hypervisor_get_resize_criteria();
+	if(criteria != SC_NOTHING && criteria == SC_VELOCITY)
+	{
 
-			if(vmax != 0.0)
-			{
-				int nworkers_rounded[nsched_ctxs][nw];
-				sc_hypervisor_lp_round_double_to_int(nsched_ctxs, nw, nworkers, nworkers_rounded);
-				sc_hypervisor_lp_redistribute_resources_in_ctxs(nsched_ctxs, nw, nworkers_rounded, nworkers);
-			}
-			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+		if(sc_hypervisor_check_velocity_gap_btw_ctxs())
+		{
+			_try_resizing();
 		}
 	}
 }
@@ -122,11 +134,24 @@ static void feft_lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworker
 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 }
 
+static feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
+{
+	unsigned criteria = sc_hypervisor_get_resize_criteria();
+	if(criteria != SC_NOTHING && criteria == SC_IDLE)
+	{
+
+		if(sc_hypervisor_check_idle(sched_ctx, worker))
+		{
+			_try_resizing();
+		}
+	}
+}
+
 struct sc_hypervisor_policy feft_lp_policy = {
 	.size_ctxs = feft_lp_size_ctxs,
 	.handle_poped_task = feft_lp_handle_poped_task,
 	.handle_pushed_task = NULL,
-	.handle_idle_cycle = NULL,
+	.handle_idle_cycle = feft_lp_handle_idle_cycle, //NULL,
 	.handle_idle_end = NULL,
 	.handle_post_exec_hook = NULL,
 	.handle_submitted_job = NULL,

+ 0 - 1
sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c

@@ -346,7 +346,6 @@ static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker, struct s
 			for(i = 0; i < ns; i++)
 				flops_on_w[i] = (double*)malloc(nw*sizeof(double));
 
-			printf("ns = %d nw = %d\n", ns, nw);
 			unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw,  w_in_s, flops_on_w, NULL, NULL);
 			/* if we did find at least one solution redistribute the resources */
 			if(found_sol)

+ 103 - 61
sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c

@@ -135,12 +135,16 @@ static void size_if_required()
 		for(s = 0; s < nsched_ctxs; s++)
 		{
 			sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
-			if(sc_w->submitted_flops < sc_w->total_flops)
+//			if(sc_w->submitted_flops < sc_w->total_flops)
+			if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops)
 				ready_to_size = 0;
 		}
 
 		if(ready_to_size)
+		{
 			_size_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers);
+			sc_hypervisor_free_size_req();
+		}
 		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 	}
 }
@@ -155,6 +159,69 @@ static void teft_lp_handle_submitted_job(struct starpu_codelet *cl, unsigned sch
 	size_if_required();
 }
 
+static void _try_resizing(void)
+{
+	starpu_trace_user_event(2);
+	int ns = sc_hypervisor_get_nsched_ctxs();
+	int nw = starpu_worker_get_count(); /* Number of different workers */
+	int nt = 0; /* Number of different kinds of tasks */
+	
+//			starpu_pthread_mutex_lock(&mutex);
+	
+	/* we don't take the mutex bc a correct value of the number of tasks is
+	   not required but we do a copy in order to be sure
+	   that the linear progr won't segfault if the list of 
+	   submitted task will change during the exec */
+	
+	struct sc_hypervisor_policy_task_pool *tp = NULL;
+	struct sc_hypervisor_policy_task_pool *tmp_task_pools = sc_hypervisor_policy_clone_task_pool(task_pools);
+	
+	for (tp = task_pools; tp; tp = tp->next)
+		nt++;
+	
+	
+	double w_in_s[ns][nw];
+//			double tasks_per_worker[nw][nt];
+	double **tasks_per_worker=(double**)malloc(nw*sizeof(double*));
+	int i;
+	for(i = 0; i < nw; i++)
+		tasks_per_worker[i] = (double*)malloc(nt*sizeof(double));
+	
+	struct teft_lp_data specific_data;
+	specific_data.nt = nt;
+	specific_data.tasks = tasks_per_worker;
+	specific_data.in_sched_ctxs = NULL;
+	specific_data.workers = NULL;
+	specific_data.tmp_task_pools = tmp_task_pools;
+	specific_data.size_ctxs = 0;
+
+			/* smallest possible tmax, difficult to obtain as we
+			   compute the nr of flops and not the tasks */
+	double possible_tmax = sc_hypervisor_lp_get_tmax(nw, NULL);
+	double smallest_tmax = possible_tmax / 3;
+	double tmax = possible_tmax * ns;
+	double tmin = smallest_tmax;
+	unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
+								tmin, tmax, smallest_tmax, _compute_workers_distrib);
+//			starpu_pthread_mutex_unlock(&mutex);
+	
+	/* if we did find at least one solution redistribute the resources */
+	if(found_sol)
+		sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, NULL, NULL, 0);
+	
+	struct sc_hypervisor_policy_task_pool *next = NULL;
+	struct sc_hypervisor_policy_task_pool *tmp_tp = tmp_task_pools;
+	while(tmp_task_pools)
+	{
+		next = tmp_tp->next;
+		free(tmp_tp);
+		tmp_tp = next;
+		tmp_task_pools = next;
+	}
+	for(i = 0; i < nw; i++)
+		free(tasks_per_worker[i]);
+	free(tasks_per_worker);
+}
 static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
 {
 	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
@@ -162,74 +229,22 @@ static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct sta
 	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
 	if(ret != EBUSY)
 	{
-		if(sc_w->submitted_flops < sc_w->total_flops)
+		if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops)
 		{
 			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 			return;
 		}
 
-		if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker))
+		unsigned criteria = sc_hypervisor_get_resize_criteria();
+		if(criteria != SC_NOTHING && criteria == SC_VELOCITY)
 		{
-			int ns = sc_hypervisor_get_nsched_ctxs();
-			int nw = starpu_worker_get_count(); /* Number of different workers */
-			int nt = 0; /* Number of different kinds of tasks */
-
-//			starpu_pthread_mutex_lock(&mutex);
-
-			/* we don't take the mutex bc a correct value of the number of tasks is
-			   not required but we do a copy in order to be sure
-			   that the linear progr won't segfault if the list of 
-			   submitted task will change during the exec */
-
-			struct sc_hypervisor_policy_task_pool *tp = NULL;
-			struct sc_hypervisor_policy_task_pool *tmp_task_pools = sc_hypervisor_policy_clone_task_pool(task_pools);
-
-			for (tp = task_pools; tp; tp = tp->next)
-				nt++;
-
-
-			double w_in_s[ns][nw];
-//			double tasks_per_worker[nw][nt];
-			double **tasks_per_worker=(double**)malloc(nw*sizeof(double*));
-			int i;
-			for(i = 0; i < nw; i++)
-				tasks_per_worker[i] = (double*)malloc(nt*sizeof(double));
-
-			struct teft_lp_data specific_data;
-			specific_data.nt = nt;
-			specific_data.tasks = tasks_per_worker;
-			specific_data.in_sched_ctxs = NULL;
-			specific_data.workers = NULL;
-			specific_data.tmp_task_pools = tmp_task_pools;
-			specific_data.size_ctxs = 0;
-
-			/* smallest possible tmax, difficult to obtain as we
-			   compute the nr of flops and not the tasks */
-			double possible_tmax = sc_hypervisor_lp_get_tmax(nw, NULL);
-			double smallest_tmax = possible_tmax / 3;
-			double tmax = possible_tmax * ns;
-			double tmin = smallest_tmax;
-			unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
-								tmin, tmax, smallest_tmax, _compute_workers_distrib);
-//			starpu_pthread_mutex_unlock(&mutex);
-
-			/* if we did find at least one solution redistribute the resources */
-			if(found_sol)
-				sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, NULL, NULL, 0);
-
-			struct sc_hypervisor_policy_task_pool *next = NULL;
-			struct sc_hypervisor_policy_task_pool *tmp_tp = tmp_task_pools;
-			while(tmp_task_pools)
+			
+			if(sc_hypervisor_check_velocity_gap_btw_ctxs())
 			{
-				next = tmp_tp->next;
-				free(tmp_tp);
-				tmp_tp = next;
-				tmp_task_pools = next;
+				_try_resizing();
 			}
-			for(i = 0; i < nw; i++)
-				free(tasks_per_worker[i]);
-			free(tasks_per_worker);
 		}
+
 		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 	}
 	/* too expensive to take this mutex and correct value of the number of tasks is not compulsory */
@@ -239,6 +254,33 @@ static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct sta
 
 }
 
+static int teft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
+{
+	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
+
+	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+	if(ret != EBUSY)
+	{
+		if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops)
+		{
+			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+			return;
+		}
+
+
+		unsigned criteria = sc_hypervisor_get_resize_criteria();
+		if(criteria != SC_NOTHING && criteria == SC_IDLE)
+		{
+			
+			if(sc_hypervisor_check_idle(sched_ctx, worker))
+			{
+				_try_resizing();
+			}
+		}
+		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+	}
+	return 0;
+}
 
 static void teft_lp_size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
 {
@@ -249,7 +291,7 @@ struct sc_hypervisor_policy teft_lp_policy = {
 	.size_ctxs = teft_lp_size_ctxs,
 	.handle_poped_task = teft_lp_handle_poped_task,
 	.handle_pushed_task = NULL,
-	.handle_idle_cycle = NULL,
+	.handle_idle_cycle = teft_lp_handle_idle_cycle,
 	.handle_idle_end = NULL,
 	.handle_post_exec_hook = NULL,
 	.handle_submitted_job = teft_lp_handle_submitted_job,

+ 11 - 11
sc_hypervisor/src/policies_utils/policy_tools.c

@@ -352,8 +352,9 @@ double sc_hypervisor_get_ctx_velocity(struct sc_hypervisor_wrapper* sc_w)
 {
 	struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx);
         double elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
-	double sample = _get_ispeed_sample_for_sched_ctx(sc_w->sched_ctx);
-
+//	double sample = _get_ispeed_sample_for_sched_ctx(sc_w->sched_ctx);
+	double sample = config->ispeed_ctx_sample;
+	
 /* 	double total_elapsed_flops = sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w); */
 /* 	double prc = config->ispeed_ctx_sample != 0.0 ? elapsed_flops : elapsed_flops/sc_w->total_flops; */
 /* 	double redim_sample = config->ispeed_ctx_sample != 0.0 ? config->ispeed_ctx_sample :  */
@@ -527,7 +528,7 @@ double sc_hypervisor_get_velocity_per_worker_type(struct sc_hypervisor_wrapper*
 
 
 /* check if there is a big velocity gap between the contexts */
-unsigned _check_velocity_gap_btw_ctxs()
+unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
 {
 	int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
 	int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
@@ -629,17 +630,16 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
         }
 }
 
-static unsigned _check_idle(unsigned sched_ctx, int worker)
+unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker)
 {
 	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
 	struct sc_hypervisor_policy_config *config = sc_w->config;
 	if(config != NULL)
 	{
-		int j;
-		for(j = 0; j < STARPU_NMAXWORKERS; j++)
+		if(sc_w->current_idle_time[worker] > config->max_idle[worker])
 		{
-			if(sc_w->current_idle_time[j] > config->max_idle[j])
-				return 1;
+			sc_w->current_idle_time[worker] = 0.0;
+			return 1;
 		}
 	}
 
@@ -648,13 +648,13 @@ static unsigned _check_idle(unsigned sched_ctx, int worker)
 
 unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker)
 {
-	unsigned criteria = _get_resize_criteria();
+	unsigned criteria = sc_hypervisor_get_resize_criteria();
 	if(criteria != SC_NOTHING)
 	{
 		if(criteria == SC_IDLE)
-			return _check_idle(sched_ctx, worker);
+			return sc_hypervisor_check_idle(sched_ctx, worker);
 		else
-			return _check_velocity_gap_btw_ctxs();
+			return sc_hypervisor_check_velocity_gap_btw_ctxs();
 	}
 	else
 		return 0;

+ 4 - 3
sc_hypervisor/src/sc_hypervisor.c

@@ -15,6 +15,7 @@
  */
 
 #include <sc_hypervisor_intern.h>
+#include <sc_hypervisor_policy.h>
 #include <common/uthash.h>
 #include <starpu_config.h>
 
@@ -136,7 +137,7 @@ struct starpu_sched_ctx_performance_counters* sc_hypervisor_init(struct sc_hyper
 	char* vel_gap = getenv("MAX_VELOCITY_GAP");
 	hypervisor.max_velocity_gap = vel_gap ? atof(vel_gap) : SC_VELOCITY_MAX_GAP_DEFAULT;
 	char* crit =  getenv("HYPERVISOR_TRIGGER_RESIZE");
-	hypervisor.resize_criteria = strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_SPEED : SC_NOTHING);
+	hypervisor.resize_criteria = strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_VELOCITY : SC_NOTHING);
 
 	starpu_pthread_mutex_init(&act_hypervisor_mutex, NULL);
 	hypervisor.start_executing_time = starpu_timing_now();
@@ -377,7 +378,7 @@ double _get_max_velocity_gap()
 	return hypervisor.max_velocity_gap;
 }
 
-unsigned _get_resize_criteria()
+unsigned sc_hypervisor_get_resize_criteria()
 {
 	return hypervisor.resize_criteria;
 }
@@ -534,7 +535,7 @@ void sc_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sch
 		for(j = 0; j < nworkers_to_move; j++)
 			printf(" %d", workers_to_move[j]);
 		printf("\n");
-
+		starpu_trace_user_event(1);
 		hypervisor.allow_remove[receiver_sched_ctx] = 0;
 		starpu_sched_ctx_add_workers(workers_to_move, nworkers_to_move, receiver_sched_ctx);
 

+ 1 - 4
sc_hypervisor/src/sc_hypervisor_intern.h

@@ -18,9 +18,7 @@
 #include <common/uthash.h>
 
 #define SC_VELOCITY_MAX_GAP_DEFAULT 50
-#define SC_NOTHING 0
-#define SC_IDLE 1
-#define SC_SPEED 2
+
 struct size_request
 {
 	int *workers;
@@ -102,4 +100,3 @@ void _remove_config(unsigned sched_ctx);
 
 double _get_max_velocity_gap();
 
-unsigned _get_resize_criteria();