Просмотр исходного кода

allow calling the resizing linear programs from the appl

Andra Hugo лет назад: 12
Родитель
Сommit
054afc9556

+ 2 - 1
sc_hypervisor/examples/Makefile.am

@@ -20,7 +20,8 @@ AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_GLPK_LDFLA
 
 noinst_PROGRAMS =				\
 	app_driven_test/app_driven_test		\
-	lp_test/lp_test
+	lp_test/lp_test				\
+	lp_test/lp_resize_test
 
 if !NO_BLAS_LIB
 noinst_PROGRAMS +=				\

+ 5 - 4
sc_hypervisor/examples/app_driven_test/app_driven_test.c

@@ -37,7 +37,7 @@ pthread_mutex_t mut[2];
  * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
  * are no data input/output managed by the DSM (cl.nbuffers = 0) */
 
-void cpu_func(void *buffers[], void *cl_arg)
+void cpu_func(__attribute__((unused))void *buffers[], void *cl_arg)
 {
 	struct params *params = (struct params *) cl_arg;
 
@@ -88,7 +88,7 @@ void* submit_tasks_thread(void *arg)
 			printf("require resize for sched_ctx %d at tag %d\n", sched_ctx, tag);
 			/* specify that the contexts should be resized when the task having this
 			   particular tag will finish executing */
-			sc_hypervisor_resize(sched_ctx, tag);
+			sc_hypervisor_post_resize_request(sched_ctx, tag);
 		}
 
 		params[i].sched_ctx = sched_ctx;
@@ -97,11 +97,12 @@ void* submit_tasks_thread(void *arg)
 		task[i]->cl_arg = &params[i];
 		task[i]->cl_arg_size = sizeof(params);
 
-		starpu_task_submit(task[i]);
+		int ret = starpu_task_submit(task[i]);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
 
 	starpu_task_wait_for_all();
-	return;
+	return NULL;
 }
 
 int main()

+ 137 - 0
sc_hypervisor/examples/lp_test/lp_resize_test.c

@@ -0,0 +1,137 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2012  INRIA
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <starpu.h>
+#include <sc_hypervisor.h>
+
+#define NTASKS 1000
+#define NINCR 10
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
+
+unsigned val[2];
+pthread_mutex_t mut[2];
+
+/* Every implementation of a codelet must have this prototype, the first                                                                                                                                             * argument (buffers) describes the buffers/streams that are managed by the
+ * DSM; the second arguments references read-only data that is passed as an
+ * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
+ * are no data input/output managed by the DSM (cl.nbuffers = 0) */
+
+void cpu_func(__attribute__((unused))void *buffers[], void *cl_arg)
+{
+	unsigned sched_ctx = *((unsigned *) cl_arg);
+
+	int i;
+	for(i = 0; i < NINCR; i++)
+	{
+		pthread_mutex_lock(&mut[sched_ctx - 1]);
+		val[sched_ctx - 1]++;
+		pthread_mutex_unlock(&mut[sched_ctx - 1]);
+	}
+}
+
+struct starpu_codelet cl = {0};
+
+void* submit_tasks_thread(void *arg)
+{
+	unsigned sched_ctx = *((unsigned*)arg);
+	starpu_sched_ctx_set_context(&sched_ctx);
+
+	struct starpu_task *task[NTASKS];
+	int i;
+	for(i = 0; i < NTASKS; i++)
+	{
+		task[i] = starpu_task_create();
+		cl.cpu_funcs[0] = cpu_func;
+		cl.nbuffers = 0;
+
+		task[i]->cl = &cl;
+
+		task[i]->cl_arg = &sched_ctx;
+		task[i]->cl_arg_size = sizeof(unsigned);
+
+		task[i]->flops = NINCR*1000000000.0;
+		int ret = starpu_task_submit(task[i]);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+		if(i == NTASKS/2)
+			sc_hypervisor_resize_ctxs(NULL, -1, NULL, -1);
+	}
+
+	starpu_task_wait_for_all();
+	return;
+}
+
+int main()
+{
+	int ret = starpu_init(NULL);
+
+	if (ret == -ENODEV)
+        return 77;
+
+
+	/* create contexts */
+	unsigned sched_ctx1 = starpu_sched_ctx_create("dmda", NULL, 0, "sched_ctx1");
+	unsigned sched_ctx2 = starpu_sched_ctx_create("dmda", NULL, 0, "sched_ctx2");
+
+	/* initialize the hypervisor */
+	struct sc_hypervisor_policy policy;
+	policy.custom = 0;
+	/* indicate which strategy to use
+	   in this particular case we use app_driven which allows the user to resize 
+	   the ctxs dynamically at particular moments of the execution of the application */
+	policy.name = "feft_lp";
+	void *perf_counters = sc_hypervisor_init(&policy);
+
+	/* let starpu know which performance counters should use 
+	   to inform the hypervisor how the application and the resources are executing */
+	starpu_sched_ctx_set_perf_counters(sched_ctx1, (struct starpu_sched_ctx_performance_counters*)perf_counters);
+	starpu_sched_ctx_set_perf_counters(sched_ctx2, (struct starpu_sched_ctx_performance_counters*)perf_counters);
+
+	double flops1 = NTASKS*NINCR*1000000000.0;
+	double flops2 = NTASKS*NINCR*1000000000.0;
+	/* register the contexts that should be managed by the hypervisor
+	   and indicate an approximate amount of workload if known;
+	   in this case we don't know it and we put 0 */
+	sc_hypervisor_register_ctx(sched_ctx1, flops1);
+	sc_hypervisor_register_ctx(sched_ctx2, flops2);
+        /* lp strategy allows sizing the contexts because we know the total number of flops
+	   to be executed */
+	sc_hypervisor_size_ctxs(NULL, -1, NULL, -1);
+
+	starpu_pthread_t tid[2];
+
+	val[0] = 0;
+	val[1] = 0;
+	pthread_mutex_init(&mut[0], NULL);
+	pthread_mutex_init(&mut[1], NULL);
+
+	/* we create two threads to simulate simultaneous submission of tasks */
+	starpu_pthread_create(&tid[0], NULL, submit_tasks_thread, (void*)&sched_ctx1);
+	starpu_pthread_create(&tid[1], NULL, submit_tasks_thread, (void*)&sched_ctx2);
+
+	starpu_pthread_join(tid[0], NULL);
+	starpu_pthread_join(tid[1], NULL);
+
+	/* free starpu and hypervisor data */
+	starpu_shutdown();
+	sc_hypervisor_shutdown();
+
+	FPRINTF(stdout, "ctx = %d executed %d counter_tests out of %d \n", sched_ctx1, val[0], NTASKS*NINCR);
+	FPRINTF(stdout, "ctx = %d executed %d counter_tests out of %d \n", sched_ctx2, val[1], NTASKS*NINCR);
+	return 0;
+}

+ 4 - 2
sc_hypervisor/examples/lp_test/lp_test.c

@@ -32,7 +32,7 @@ pthread_mutex_t mut[2];
  * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
  * are no data input/output managed by the DSM (cl.nbuffers = 0) */
 
-void cpu_func(void *buffers[], void *cl_arg)
+void cpu_func(__attribute__((unused))void *buffers[], void *cl_arg)
 {
 	unsigned sched_ctx = *((unsigned *) cl_arg);
 
@@ -66,7 +66,9 @@ void* submit_tasks_thread(void *arg)
 		task[i]->cl_arg_size = sizeof(unsigned);
 
 		task[i]->flops = NINCR*1000000000.0;
-		starpu_task_submit(task[i]);
+		int ret = starpu_task_submit(task[i]);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
 	}
 
 	starpu_task_wait_for_all();

+ 8 - 3
sc_hypervisor/include/sc_hypervisor.h

@@ -45,10 +45,12 @@ struct sc_hypervisor_policy
 	/* indicate if it is a policiy create by the user or not */
 	unsigned custom;
 
-	/* if knwing the future the hypervisor can find the good 
-	   distribution of workers on contexts even at the begining of the program */
+	/* Distribute workers to contexts even at the begining of the program */
 	void (*size_ctxs)(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers);
 
+	/* Require explicit resizing */
+	void (*resize_ctxs)(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers);
+
 	/* the hypervisor takes a decision when the worker was idle for another cyle in this ctx */
 	void (*handle_idle_cycle)(unsigned sched_ctx, int worker);
 
@@ -84,7 +86,10 @@ void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops);
 void sc_hypervisor_unregister_ctx(unsigned sched_ctx);
 
 /* submit a requirement of resizing when a task taged with task_tag is executed */
-void sc_hypervisor_resize(unsigned sched_ctx, int task_tag);
+void sc_hypervisor_post_resize_request(unsigned sched_ctx, int task_tag);
+
+/* reevaluate the distribution of the resources and eventually resize if needed */
+void sc_hypervisor_resize_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers);
 
 /* don't allow the hypervisor to resize a context */
 void sc_hypervisor_stop_resize(unsigned sched_ctx);

+ 1 - 1
sc_hypervisor/include/sc_hypervisor_lp.h

@@ -44,7 +44,7 @@ double sc_hypervisor_lp_get_tmax(int nw, int *workers);
 void sc_hypervisor_lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw]);
 
 /* redistribute the ressource in contexts by assigning the first x available ressources to each one */
-void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw]);
+void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *sched_ctxs);
 
 /* make the first distribution of ressource in contexts by assigning the first x available ressources to each one */
 void sc_hypervisor_lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers);

+ 30 - 17
sc_hypervisor/src/hypervisor_policies/debit_lp_policy.c

@@ -225,25 +225,27 @@ static double _glp_resolve(int ns, int nw, double speed[ns][nw], double w_in_s[n
 }
 
 
-static void _try_resizing(void)
+static void _try_resizing(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
 {
-	int ns = sc_hypervisor_get_nsched_ctxs();
-	int nw = starpu_worker_get_count(); /* Number of different workers */
+	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
+	int nw = workers == NULL ? starpu_worker_get_count() : nworkers; /* Number of different workers */
 	
+	sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
+
 	double w_in_s[ns][nw];
-	unsigned found_sol = _compute_max_speed(ns, nw,  w_in_s, NULL, NULL);
+	unsigned found_sol = _compute_max_speed(ns, nw,  w_in_s, sched_ctxs, workers);
 	/* if we did find at least one solution redistribute the resources */
 	if(found_sol)
 	{
 		int w, s;
-		double nworkers[ns][2];
-		int nworkers_rounded[ns][2];
+		double nworkers_per_ctx[ns][2];
+		int nworkers_per_ctx_rounded[ns][2];
 		for(s = 0; s < ns; s++)
 		{
-			nworkers[s][0] = 0.0;
-			nworkers[s][1] = 0.0;
-			nworkers_rounded[s][0] = 0;
-			nworkers_rounded[s][1] = 0;
+			nworkers_per_ctx[s][0] = 0.0;
+			nworkers_per_ctx[s][1] = 0.0;
+			nworkers_per_ctx_rounded[s][0] = 0;
+			nworkers_per_ctx_rounded[s][1] = 0;
 			
 		}
 		
@@ -255,15 +257,15 @@ static void _try_resizing(void)
 				
 				if(arch == STARPU_CUDA_WORKER)
 				{
-					nworkers[s][0] += w_in_s[s][w];
+					nworkers_per_ctx[s][0] += w_in_s[s][w];
 					if(w_in_s[s][w] >= 0.3)
-						nworkers_rounded[s][0]++;
+						nworkers_per_ctx_rounded[s][0]++;
 				}
 				else
 				{
-					nworkers[s][1] += w_in_s[s][w];
+					nworkers_per_ctx[s][1] += w_in_s[s][w];
 					if(w_in_s[s][w] > 0.5)
-						nworkers_rounded[s][1]++;
+						nworkers_per_ctx_rounded[s][1]++;
 				}
 			}
 		}
@@ -271,7 +273,7 @@ static void _try_resizing(void)
 /* 					printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
 /* 					       nworkers_rounded[s][1], nworkers_rounded[s][0]); */
 		
-		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
+		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_per_ctx_rounded, nworkers_per_ctx, sched_ctxs);
 		
 	}
 }
@@ -286,7 +288,7 @@ static void debit_lp_handle_poped_task(unsigned sched_ctx, int worker, struct st
 		{
 			if(sc_hypervisor_check_speed_gap_btw_ctxs())
 			{
-				_try_resizing();
+				_try_resizing(NULL, -1, NULL, -1);
 			}
 		}
                 starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
@@ -304,7 +306,7 @@ static debit_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 
 			if(sc_hypervisor_check_idle(sched_ctx, worker))
                         {
-                                _try_resizing();
+                                _try_resizing(NULL, -1, NULL, -1);
 //                              sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);                                                                                                               \
                                                                                                                                                                                                                     
                         }
@@ -313,6 +315,16 @@ static debit_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
         }
 }
 
+static void debit_lp_resize_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
+{
+	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+	if(ret != EBUSY)
+	{
+		_try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers);
+		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+	}
+}
+
 static void debit_lp_end_ctx(unsigned sched_ctx)
 {
 	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
@@ -325,6 +337,7 @@ static void debit_lp_end_ctx(unsigned sched_ctx)
 
 struct sc_hypervisor_policy debit_lp_policy = {
 	.size_ctxs = NULL,
+	.resize_ctxs = debit_lp_resize_ctxs,
 	.handle_poped_task = debit_lp_handle_poped_task,
 	.handle_pushed_task = NULL,
 	.handle_idle_cycle = debit_lp_handle_idle_cycle,

+ 47 - 20
sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c

@@ -20,13 +20,14 @@
 #include <sys/time.h>
 
 #ifdef STARPU_HAVE_GLPK_H
-static void _try_resizing(void)
+static void _try_resizing(int *sched_ctxs, int nsched_ctxs)
 {
 	/* for vite */
 	starpu_trace_user_event(2);
+	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
+	sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
 
-	int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
-	double nworkers[nsched_ctxs][2];
+	double nworkers_per_ctx[ns][2];
 	int nw = 1;
 #ifdef STARPU_USE_CUDA
 	int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
@@ -40,7 +41,7 @@ static void _try_resizing(void)
 	struct timeval end_time;
 	gettimeofday(&start_time, NULL);
 	
-	double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, nworkers, total_nw);
+	double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_ctx, total_nw);
 	gettimeofday(&end_time, NULL);
 	
 	long diff_s = end_time.tv_sec  - start_time.tv_sec;
@@ -50,13 +51,14 @@ static void _try_resizing(void)
 	
 	if(vmax != 0.0)
 	{
-		int nworkers_rounded[nsched_ctxs][nw];
-		sc_hypervisor_lp_round_double_to_int(nsched_ctxs, nw, nworkers, nworkers_rounded);
-		sc_hypervisor_lp_redistribute_resources_in_ctxs(nsched_ctxs, nw, nworkers_rounded, nworkers);
+		int nworkers_per_ctx_rounded[nsched_ctxs][nw];
+		sc_hypervisor_lp_round_double_to_int(ns, nw, nworkers_per_ctx, nworkers_per_ctx_rounded);
+		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, sched_ctxs);
 	}
-	
 }
-static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
+
+static void feft_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx, __attribute__((unused))int worker, 
+				      __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint)
 {
 	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
 	if(ret != EBUSY)
@@ -66,27 +68,27 @@ static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct sta
 		{
 			if(sc_hypervisor_check_speed_gap_btw_ctxs())
 			{
-				_try_resizing();
+				_try_resizing(NULL, -1);
 			}
 		}
 		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 	}
 
 }
-static void feft_lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworkers)
+static void feft_lp_size_ctxs(int *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 {
-	int nsched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : ns;
+	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
 	int nw = 1;
 #ifdef STARPU_USE_CUDA
 	int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
 	nw = ncuda != 0 ? 2 : 1;
 #endif
-	double nworkers_per_type[nsched_ctxs][nw];
+	double nworkers_per_type[ns][nw];
 	int total_nw[nw];
 	sc_hypervisor_group_workers_by_type(workers, nworkers, nw, total_nw);
 
 	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
-	double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, nworkers_per_type, total_nw);
+	double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_type, total_nw);
 	if(vmax != 0.0)
 	{
 // 		printf("********size\n");
@@ -100,8 +102,8 @@ static void feft_lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworker
 /* 				printf("ctx %d/worker type %d: n = %lf \n", i, 1, nworkers_per_type[i][1]); */
 /* #endif */
 /* 		} */
-		int nworkers_per_type_rounded[nsched_ctxs][nw];
-		sc_hypervisor_lp_round_double_to_int(nsched_ctxs, nw, nworkers_per_type, nworkers_per_type_rounded);
+		int nworkers_per_type_rounded[ns][nw];
+		sc_hypervisor_lp_round_double_to_int(ns, nw, nworkers_per_type, nworkers_per_type_rounded);
 /*       	for( i = 0; i < nsched_ctxs; i++) */
 /* 		{ */
 /* 			printf("ctx %d/worker type %d: n = %d \n", i, 0, nworkers_per_type_rounded[i][0]); */
@@ -125,14 +127,14 @@ static void feft_lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworker
 			}
 		}
 		if(has_workers)
-			sc_hypervisor_lp_redistribute_resources_in_ctxs(nsched_ctxs, nw, nworkers_per_type_rounded, nworkers_per_type);
+			sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, nw, nworkers_per_type_rounded, nworkers_per_type, current_sched_ctxs);
 		else
-			sc_hypervisor_lp_distribute_resources_in_ctxs(sched_ctxs, nsched_ctxs, nw, nworkers_per_type_rounded, nworkers_per_type, workers, nworkers);
+			sc_hypervisor_lp_distribute_resources_in_ctxs(sched_ctxs, ns, nw, nworkers_per_type_rounded, nworkers_per_type, workers, nworkers);
 	}
 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 }
 
-static feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
+static void feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 {
 	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
 	if(ret != EBUSY)
@@ -143,7 +145,7 @@ static feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 			
 			if(sc_hypervisor_check_idle(sched_ctx, worker))
 			{
-				_try_resizing();
+				_try_resizing(NULL, -1);
 //				sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);
 			}
 		}
@@ -151,8 +153,33 @@ static feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 	}
 }
 
+static void feft_lp_resize_ctxs(int *sched_ctxs, int nsched_ctxs , 
+				__attribute__((unused))int *workers, __attribute__((unused))int nworkers)
+{
+	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+	if(ret != EBUSY)
+	{
+		struct sc_hypervisor_wrapper* sc_w  = NULL;
+		int s = 0;
+		for(s = 0; s < nsched_ctxs; s++)
+		{
+			 sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
+			
+			 if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops)
+			 {
+				 starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+				 return;
+			 }
+		}
+
+		_try_resizing(sched_ctxs, nsched_ctxs);
+		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+	}
+}
+
 struct sc_hypervisor_policy feft_lp_policy = {
 	.size_ctxs = feft_lp_size_ctxs,
+	.resize_ctxs = feft_lp_resize_ctxs,
 	.handle_poped_task = feft_lp_handle_poped_task,
 	.handle_pushed_task = NULL,
 	.handle_idle_cycle = feft_lp_handle_idle_cycle, //NULL,

+ 34 - 23
sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c

@@ -42,7 +42,6 @@ static double _glp_resolve (int ns, int nw, double final_w_in_s[ns][nw],
 	double *flops = sd->flops;
 	
 	double **final_flops_on_w = sd->flops_on_w;
-        int *workers = sd->workers;
 	
 	double w_in_s[ns][nw];
 	double flops_on_w[ns][nw];
@@ -269,7 +268,6 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 	int w,s;
 
 	struct sc_hypervisor_wrapper* sc_w = NULL;
-	double total_flops = 0.0;
 	for(s = 0; s < ns; s++)
 	{
 		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
@@ -325,10 +323,12 @@ static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_i
 	return found_sol;
 }
 
-static void _try_resizing(void)
+static void _try_resizing(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
 {
-	int ns = sc_hypervisor_get_nsched_ctxs();
-	int nw = starpu_worker_get_count(); /* Number of different workers */
+	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
+	int nw = nworkers == -1 ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
+
+	sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
 	
 	double w_in_s[ns][nw];
 //			double flops_on_w[ns][nw];
@@ -337,19 +337,19 @@ static void _try_resizing(void)
 	for(i = 0; i < ns; i++)
 		flops_on_w[i] = (double*)malloc(nw*sizeof(double));
 	
-	unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw,  w_in_s, flops_on_w, NULL, NULL);
+	unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw,  w_in_s, flops_on_w, sched_ctxs, workers);
 	/* if we did find at least one solution redistribute the resources */
 	if(found_sol)
 	{
 		int w, s;
-		double nworkers[ns][2];
-		int nworkers_rounded[ns][2];
+		double nworkers_per_ctx[ns][2];
+		int nworkers_per_ctx_rounded[ns][2];
 		for(s = 0; s < ns; s++)
 		{
-			nworkers[s][0] = 0.0;
-			nworkers[s][1] = 0.0;
-			nworkers_rounded[s][0] = 0;
-			nworkers_rounded[s][1] = 0;
+			nworkers_per_ctx[s][0] = 0.0;
+			nworkers_per_ctx[s][1] = 0.0;
+			nworkers_per_ctx_rounded[s][0] = 0;
+			nworkers_per_ctx_rounded[s][1] = 0;
 			
 		}
 		
@@ -361,15 +361,15 @@ static void _try_resizing(void)
 				
 				if(arch == STARPU_CUDA_WORKER)
 				{
-					nworkers[s][0] += w_in_s[s][w];
+					nworkers_per_ctx[s][0] += w_in_s[s][w];
 					if(w_in_s[s][w] >= 0.3)
-						nworkers_rounded[s][0]++;
+						nworkers_per_ctx_rounded[s][0]++;
 				}
 				else
 				{
-					nworkers[s][1] += w_in_s[s][w];
+					nworkers_per_ctx[s][1] += w_in_s[s][w];
 					if(w_in_s[s][w] > 0.5)
-						nworkers_rounded[s][1]++;
+						nworkers_per_ctx_rounded[s][1]++;
 				}
 			}
 		}
@@ -377,14 +377,15 @@ static void _try_resizing(void)
 /* 					printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
 /* 					       nworkers_rounded[s][1], nworkers_rounded[s][0]); */
 		
-		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
+		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_per_ctx_rounded, nworkers_per_ctx, sched_ctxs);
 	}
 	for(i = 0; i < ns; i++)
 		free(flops_on_w[i]);
 	free(flops_on_w);
 }
 
-static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
+static void ispeed_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx, __attribute__((unused))int worker, 
+					__attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint)
 {
         int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
         if(ret != EBUSY)
@@ -394,14 +395,14 @@ static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker, struct s
                 {
                         if(sc_hypervisor_check_speed_gap_btw_ctxs())
                         {
-                                _try_resizing();
+                                _try_resizing(NULL, -1, NULL, -1);
                         }
                 }
                 starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
         }
 }
 
-static ispeed_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
+static void ispeed_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 {
         int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
         if(ret != EBUSY)
@@ -412,7 +413,7 @@ static ispeed_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 
 			if(sc_hypervisor_check_idle(sched_ctx, worker))
                         {
-                                _try_resizing();
+                                _try_resizing(NULL, -1, NULL, -1);
 //                              sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);                                                                                                                
                         }
                 }
@@ -420,11 +421,20 @@ static ispeed_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
         }
 }
 
+static void ispeed_lp_resize_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
+{
+	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+	if(ret != EBUSY)
+	{
+		_try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers);
+		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+	}
+}
 
 static void ispeed_lp_end_ctx(unsigned sched_ctx)
 {
-	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
-	int worker;
+/* 	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); */
+/* 	int worker; */
 /* 	for(worker = 0; worker < 12; worker++) */
 /* 		printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_speed[worker]); */
 
@@ -433,6 +443,7 @@ static void ispeed_lp_end_ctx(unsigned sched_ctx)
 
 struct sc_hypervisor_policy ispeed_lp_policy = {
 	.size_ctxs = NULL,
+	.resize_ctxs = ispeed_lp_resize_ctxs,
 	.handle_poped_task = ispeed_lp_handle_poped_task,
 	.handle_pushed_task = NULL,
 	.handle_idle_cycle = ispeed_lp_handle_idle_cycle,

+ 35 - 6
sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c

@@ -161,11 +161,14 @@ static void teft_lp_handle_submitted_job(struct starpu_codelet *cl, unsigned sch
 	size_if_required();
 }
 
-static void _try_resizing(void)
+static void _try_resizing(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
 {
 	starpu_trace_user_event(2);
-	int ns = sc_hypervisor_get_nsched_ctxs();
-	int nw = starpu_worker_get_count(); /* Number of different workers */
+	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
+	int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
+
+	sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
+
 	int nt = 0; /* Number of different kinds of tasks */
 	
 //			starpu_pthread_mutex_lock(&mutex);
@@ -211,7 +214,7 @@ static void _try_resizing(void)
 	
 	/* if we did find at least one solution redistribute the resources */
 	if(found_sol)
-		sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, NULL, NULL, 0);
+		sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, sched_ctxs, workers, 0);
 	
 	struct sc_hypervisor_policy_task_pool *next = NULL;
 	struct sc_hypervisor_policy_task_pool *tmp_tp = tmp_task_pools;
@@ -226,6 +229,7 @@ static void _try_resizing(void)
 		free(tasks_per_worker[i]);
 	free(tasks_per_worker);
 }
+
 static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
 {
 	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
@@ -245,7 +249,7 @@ static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct sta
 			
 			if(sc_hypervisor_check_speed_gap_btw_ctxs())
 			{
-				_try_resizing();
+				_try_resizing(NULL, -1, NULL, -1);
 			}
 		}
 
@@ -278,7 +282,7 @@ static int teft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 			
 			if(sc_hypervisor_check_idle(sched_ctx, worker))
 			{
-				_try_resizing();
+				_try_resizing(NULL, -1, NULL, -1);
 //				sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);
 			}
 		}
@@ -292,8 +296,33 @@ static void teft_lp_size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, i
 	sc_hypervisor_save_size_req(sched_ctxs, nsched_ctxs, workers, nworkers);
 }
 
+static void teft_lp_resize_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
+{
+	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+	if(ret != EBUSY)
+	{
+		struct sc_hypervisor_wrapper* sc_w  = NULL;
+		int s = 0;
+		for(s = 0; s < nsched_ctxs; s++)
+		{
+			 sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
+			
+			if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops)
+			{
+				starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+				return;
+			}
+		}
+
+
+		_try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers);
+		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+	}
+}
+
 struct sc_hypervisor_policy teft_lp_policy = {
 	.size_ctxs = teft_lp_size_ctxs,
+	.resize_ctxs = teft_lp_resize_ctxs,
 	.handle_poped_task = teft_lp_handle_poped_task,
 	.handle_pushed_task = NULL,
 	.handle_idle_cycle = teft_lp_handle_idle_cycle,

+ 3 - 4
sc_hypervisor/src/policies_utils/lp_tools.c

@@ -299,9 +299,8 @@ void _lp_find_workers_to_remove(int nw, int tmp_nw_move[nw], int tmp_workers_mov
 	}
 }
 
-void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw])
+void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *sched_ctxs)
 {
-	int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
 	int s, s2, w;
 	for(s = 0; s < ns; s++)
 	{
@@ -490,7 +489,7 @@ void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][n
 	}
 	
 	if(!do_size)
-		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
+		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers, sched_ctxs_input);
 	else
 	{
 		int *current_sched_ctxs = sched_ctxs_input == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs_input;
@@ -507,7 +506,7 @@ void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][n
 			}
 		}
 		if(has_workers)
-			sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
+			sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers, current_sched_ctxs);
 		else
 			sc_hypervisor_lp_distribute_resources_in_ctxs(current_sched_ctxs, ns, 2, nworkers_rounded, nworkers, workers_input, nw);
 	}

+ 8 - 1
sc_hypervisor/src/sc_hypervisor.c

@@ -63,6 +63,7 @@ static void _load_hypervisor_policy(struct sc_hypervisor_policy *policy)
 
 	hypervisor.policy.name = policy->name;
 	hypervisor.policy.size_ctxs = policy->size_ctxs;
+	hypervisor.policy.resize_ctxs = policy->resize_ctxs;
 	hypervisor.policy.handle_poped_task = policy->handle_poped_task;
 	hypervisor.policy.handle_pushed_task = policy->handle_pushed_task;
 	hypervisor.policy.handle_idle_cycle = policy->handle_idle_cycle;
@@ -718,7 +719,7 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 
 /* Enqueue a resize request for 'sched_ctx', to be executed when the
  * 'task_tag' tasks of 'sched_ctx' complete.  */
-void sc_hypervisor_resize(unsigned sched_ctx, int task_tag)
+void sc_hypervisor_post_resize_request(unsigned sched_ctx, int task_tag)
 {
 	struct resize_request_entry *entry;
 
@@ -733,6 +734,12 @@ void sc_hypervisor_resize(unsigned sched_ctx, int task_tag)
 	starpu_pthread_mutex_unlock(&hypervisor.resize_mut[sched_ctx]);
 }
 
+void sc_hypervisor_resize_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
+{
+	if(hypervisor.policy.resize_ctxs)
+		hypervisor.policy.resize_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers);
+}
+
 /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */
 static void notify_idle_end(unsigned sched_ctx, int worker)
 {