瀏覽代碼

linear progr strateg for the hypervisor taking into account the type of
tasks

Andra Hugo 13 年之前
父節點
當前提交
5e367ff87f

+ 2 - 1
include/starpu.h

@@ -123,7 +123,8 @@ enum starpu_archtype
 	STARPU_CPU_WORKER, /* CPU core */
 	STARPU_CPU_WORKER, /* CPU core */
 	STARPU_CUDA_WORKER, /* NVIDIA CUDA device */
 	STARPU_CUDA_WORKER, /* NVIDIA CUDA device */
 	STARPU_OPENCL_WORKER, /* OpenCL CUDA device */
 	STARPU_OPENCL_WORKER, /* OpenCL CUDA device */
-	STARPU_GORDON_WORKER /* Cell SPU */
+	STARPU_GORDON_WORKER, /* Cell SPU */
+	STARPU_ALL
 };
 };
 
 
 /* This function returns the type of worker associated to an identifier (as
 /* This function returns the type of worker associated to an identifier (as

+ 1 - 1
include/starpu_perfmodel.h

@@ -208,7 +208,7 @@ int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel *model
 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, char *path, size_t maxlen, unsigned nimpl);
 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, char *path, size_t maxlen, unsigned nimpl);
 void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen, unsigned nimpl);
 void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen, unsigned nimpl);
 int starpu_list_models(FILE *output);
 int starpu_list_models(FILE *output);
-
+double starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned footprint);
 void starpu_force_bus_sampling(void);
 void starpu_force_bus_sampling(void);
 void starpu_bus_print_bandwidth(FILE *f);
 void starpu_bus_print_bandwidth(FILE *f);
 
 

+ 1 - 0
include/starpu_scheduler.h

@@ -155,6 +155,7 @@ struct starpu_performance_counters {
 	void (*notify_pushed_task)(unsigned sched_ctx, int worker);
 	void (*notify_pushed_task)(unsigned sched_ctx, int worker);
 	void (*notify_poped_task)(unsigned sched_ctx, int worker, double flops);
 	void (*notify_poped_task)(unsigned sched_ctx, int worker, double flops);
 	void (*notify_post_exec_hook)(unsigned sched_ctx, int taskid);
 	void (*notify_post_exec_hook)(unsigned sched_ctx, int taskid);
+	void (*notify_submitted_job)(struct starpu_task *task, unsigned footprint);
 };
 };
 
 
 #ifdef STARPU_BUILD_SCHED_CTX_HYPERVISOR
 #ifdef STARPU_BUILD_SCHED_CTX_HYPERVISOR

+ 7 - 0
sched_ctx_hypervisor/include/sched_ctx_hypervisor.h

@@ -96,6 +96,7 @@ struct hypervisor_policy {
 	void (*handle_poped_task)(unsigned sched_ctx, int worker);
 	void (*handle_poped_task)(unsigned sched_ctx, int worker);
 	void (*handle_idle_end)(unsigned sched_ctx, int worker);
 	void (*handle_idle_end)(unsigned sched_ctx, int worker);
 	void (*handle_post_exec_hook)(unsigned sched_ctx, struct starpu_htbl32_node* resize_requests, int task_tag);
 	void (*handle_post_exec_hook)(unsigned sched_ctx, struct starpu_htbl32_node* resize_requests, int task_tag);
+	void (*handle_submitted_job)(struct starpu_task *task, unsigned footprint);
 };
 };
 
 
 
 
@@ -125,8 +126,14 @@ int* sched_ctx_hypervisor_get_sched_ctxs();
 
 
 int sched_ctx_hypervisor_get_nsched_ctxs();
 int sched_ctx_hypervisor_get_nsched_ctxs();
 
 
+int get_nworkers_ctx(unsigned sched_ctx, enum starpu_archtype arch);
+
 struct sched_ctx_wrapper* sched_ctx_hypervisor_get_wrapper(unsigned sched_ctx);
 struct sched_ctx_wrapper* sched_ctx_hypervisor_get_wrapper(unsigned sched_ctx);
 
 
 double sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(struct sched_ctx_wrapper* sc_w);
 double sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(struct sched_ctx_wrapper* sc_w);
 
 
 char* sched_ctx_hypervisor_get_policy();
 char* sched_ctx_hypervisor_get_policy();
+
+void sched_ctx_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx);
+
+void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx);

+ 2 - 1
sched_ctx_hypervisor/src/Makefile.am

@@ -30,7 +30,8 @@ libsched_ctx_hypervisor_la_SOURCES = 			\
 	hypervisor_policies/idle_policy.c		\
 	hypervisor_policies/idle_policy.c		\
 	hypervisor_policies/app_driven_policy.c		\
 	hypervisor_policies/app_driven_policy.c		\
 	hypervisor_policies/gflops_rate_policy.c	\
 	hypervisor_policies/gflops_rate_policy.c	\
-	hypervisor_policies/lp_policy.c	
+	hypervisor_policies/lp_policy.c			\
+	hypervisor_policies/lp2_policy.c	
 
 
 noinst_HEADERS = sched_ctx_hypervisor_intern.h		\
 noinst_HEADERS = sched_ctx_hypervisor_intern.h		\
 	hypervisor_policies/policy_utils.h
 	hypervisor_policies/policy_utils.h

+ 1 - 0
sched_ctx_hypervisor/src/hypervisor_policies/app_driven_policy.c

@@ -33,6 +33,7 @@ struct hypervisor_policy app_driven_policy = {
 	.handle_idle_cycle = NULL,
 	.handle_idle_cycle = NULL,
 	.handle_idle_end = NULL,
 	.handle_idle_end = NULL,
 	.handle_post_exec_hook = app_driven_handle_post_exec_hook,
 	.handle_post_exec_hook = app_driven_handle_post_exec_hook,
+	.handle_submitted_job = NULL,
 	.custom = 0,
 	.custom = 0,
 	.name = "app_driven"
 	.name = "app_driven"
 };
 };

+ 1 - 0
sched_ctx_hypervisor/src/hypervisor_policies/gflops_rate_policy.c

@@ -294,6 +294,7 @@ struct hypervisor_policy gflops_rate_policy = {
 	.handle_idle_cycle = NULL,
 	.handle_idle_cycle = NULL,
 	.handle_idle_end = NULL,
 	.handle_idle_end = NULL,
 	.handle_post_exec_hook = NULL,
 	.handle_post_exec_hook = NULL,
+	.handle_submitted_job = NULL,
 	.custom = 0,
 	.custom = 0,
 	.name = "gflops_rate"
 	.name = "gflops_rate"
 };
 };

+ 1 - 0
sched_ctx_hypervisor/src/hypervisor_policies/idle_policy.c

@@ -30,6 +30,7 @@ struct hypervisor_policy idle_policy = {
 	.handle_idle_cycle = idle_handle_idle_cycle,
 	.handle_idle_cycle = idle_handle_idle_cycle,
 	.handle_idle_end = NULL,
 	.handle_idle_end = NULL,
 	.handle_post_exec_hook = NULL,
 	.handle_post_exec_hook = NULL,
+	.handle_submitted_job = NULL,
 	.custom = 0,
 	.custom = 0,
 	.name = "idle"
 	.name = "idle"
 };
 };

+ 368 - 0
sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c

@@ -0,0 +1,368 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011, 2012  INRIA
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "policy_utils.h"
+#include <math.h>
+struct bound_task_pool
+{
+	/* Which codelet has been executed */
+	struct starpu_codelet *cl;
+	/* Task footprint key */
+	uint32_t footprint;
+	/* Context the task belongs to */
+	unsigned sched_ctx_id;
+	/* Number of tasks of this kind */
+	unsigned long n;
+	/* Other task kinds */
+	struct bound_task_pool *next;
+};
+
+
+static struct bound_task_pool *task_pools, *last;
+
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void lp2_handle_submitted_job(struct starpu_task *task, unsigned footprint)
+{
+	pthread_mutex_lock(&mutex);
+	struct bound_task_pool *tp;
+	
+	if (last && last->cl == task->cl && last->footprint == footprint && last->sched_ctx_id == task->sched_ctx)
+		tp = last;
+	else
+		for (tp = task_pools; tp; tp = tp->next)
+			if (tp->cl == task->cl && tp->footprint == footprint && tp->sched_ctx_id == task->sched_ctx)
+					break;
+	
+	if (!tp)
+	{
+		tp = (struct bound_task_pool *) malloc(sizeof(*tp));
+		tp->cl = task->cl;
+		tp->footprint = footprint;
+		tp->sched_ctx_id = task->sched_ctx;
+		tp->n = 0;
+		tp->next = task_pools;
+		task_pools = tp;
+	}
+	
+	/* One more task of this kind */
+	tp->n++;
+	pthread_mutex_unlock(&mutex);
+}
+
+static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt])
+{
+        struct bound_task_pool *tp;
+        int w, t;
+        for (w = 0; w < nw; w++)
+        {
+                for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+                {
+                        enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
+                        double length = starpu_history_based_job_expected_perf(tp->cl->model, arch, tp->footprint);
+
+                        if (isnan(length))
+                                times[w][t] = NAN;
+                        else
+                                times[w][t] = length / 1000.;
+                }
+        }
+}
+
+int _get_idx_sched_ctx(int sched_ctx_id)
+{
+	int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
+	int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
+	int i;
+	for(i = 0; i < nsched_ctxs; i++)
+		if(sched_ctxs[i] == sched_ctx_id)
+			return i;
+	return -1;
+}
+
+/*                                                                                                                                                                                                                  
+ * GNU Linear Programming Kit backend                                                                                                                                                                               
+ */
+#ifdef HAVE_GLPK_H
+#include <glpk.h>
+static void _glp_resolve(int ns, int nw, int nt, double res[ns][nw][nt], int integer)
+{
+	struct bound_task_pool * tp;
+	int t, w, s;
+	glp_prob *lp;
+
+	lp = glp_create_prob();
+	glp_set_prob_name(lp, "StarPU theoretical bound");
+	glp_set_obj_dir(lp, GLP_MIN);
+	glp_set_obj_name(lp, "total execution time");
+
+	{
+		double times[nw][nt];
+		int ne =
+			nw * (nt+1)	/* worker execution time */
+			+ nt * nw
+			+ nw * (nt+ns)
+			+ 1; /* glp dumbness */
+		int n = 1;
+		int ia[ne], ja[ne];
+		double ar[ne];
+
+		_starpu_get_tasks_times(nw, nt, times);
+
+		/* Variables: number of tasks i assigned to worker j, and tmax */
+		glp_add_cols(lp, nw*nt+1);
+#define colnum(w, t) ((t)*nw+(w)+1)
+		glp_set_obj_coef(lp, nw*nt+1, 1.);
+
+		for (w = 0; w < nw; w++)
+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+			{
+				char name[32];
+				snprintf(name, sizeof(name), "w%dt%dn", w, t);
+				glp_set_col_name(lp, colnum(w, t), name);
+				if (integer)
+					glp_set_col_kind(lp, colnum(w, t), GLP_IV);
+				glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
+			}
+		glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.);
+
+		/* Total worker execution time */
+		glp_add_rows(lp, nw);
+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+		{
+			int someone = 0;
+			for (w = 0; w < nw; w++)
+				if (!isnan(times[w][t]))
+					someone = 1;
+			if (!someone)
+			{
+				/* This task does not have any performance model at all, abort */
+				glp_delete_prob(lp);
+				return NULL;
+			}
+		}
+		for (w = 0; w < nw; w++)
+		{
+			char name[32], title[64];
+			starpu_worker_get_name(w, name, sizeof(name));
+			snprintf(title, sizeof(title), "worker %s", name);
+			glp_set_row_name(lp, w+1, title);
+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+			{
+				ia[n] = w+1;
+				ja[n] = colnum(w, t);
+				if (isnan(times[w][t]))
+					ar[n] = 1000000000.;
+				else
+					ar[n] = times[w][t];
+				n++;
+			}
+			/* tmax */
+			ia[n] = w+1;
+			ja[n] = nw*nt+1;
+			ar[n] = -1;
+			n++;
+			glp_set_row_bnds(lp, w+1, GLP_UP, 0, 0);
+		}
+
+		/* Total task completion */
+		glp_add_rows(lp, nt);
+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+		{
+			char name[32], title[64];
+			starpu_worker_get_name(w, name, sizeof(name));
+			snprintf(title, sizeof(title), "task %s key %x", tp->cl->name, (unsigned) tp->footprint);
+			glp_set_row_name(lp, nw+t+1, title);
+			for (w = 0; w < nw; w++)
+			{
+				ia[n] = nw+t+1;
+				ja[n] = colnum(w, t);
+				ar[n] = 1;
+				n++;
+			}
+			glp_set_row_bnds(lp, nw+t+1, GLP_FX, tp->n, tp->n);
+		}
+
+		int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
+		/* Number of task * time > 0.3 * tmax */
+		glp_add_rows(lp, nw*ns);
+		for (w = 0; w < nw; w++)
+		{
+			for(s = 0; s < ns; s++)
+			{
+				char name[32], title[64];
+				starpu_worker_get_name(w, name, sizeof(name));
+				snprintf(title, sizeof(title), "worker %x ctx %x limit", w, s);
+				glp_set_row_name(lp, nw+nt+w+(s*nw)+1, title);
+				for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+				{
+					if(tp->sched_ctx_id == sched_ctxs[s])
+					{
+						ia[n] = nw+nt+w+(s*nw)+1;
+						ja[n] = colnum(w, t);
+						ar[n] = 1;
+						n++;
+					}
+				}
+
+				/* tmax */
+				ia[n] = nw+nt+w+(s*nw)+1;
+				ja[n] = nw*nt+1;
+				ar[n] = -0.3;
+				n++;
+
+				glp_set_row_bnds(lp, nw+nt+w+(s*nw)+1, GLP_UP, 0.0, 0.0);
+			}
+		}
+
+		STARPU_ASSERT(n == ne);
+
+		glp_load_matrix(lp, ne-1, ia, ja, ar);
+	}
+
+	glp_smcp parm;
+	glp_init_smcp(&parm);
+	parm.msg_lev = GLP_MSG_OFF;
+	int ret = glp_simplex(lp, &parm);
+	if (ret)
+	{
+		glp_delete_prob(lp);
+		lp = NULL;
+		return NULL;
+	}
+
+        if (integer)
+        {
+                glp_iocp iocp;
+                glp_init_iocp(&iocp);
+                iocp.msg_lev = GLP_MSG_OFF;
+                glp_intopt(lp, &iocp);
+        }
+
+
+	double tmax = glp_get_obj_val(lp);
+
+        printf("Theoretical minimum execution time: %f ms\n", tmax);
+
+	for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+	{
+		for (w = 0; w < nw; w++)
+		{
+			s = _get_idx_sched_ctx(tp->sched_ctx_id);
+			res[s][w][t] = glp_get_col_prim(lp, colnum(w, t));
+		}
+	}
+
+	glp_delete_prob(lp);
+}
+
+void _redistribute_resources_in_ctxs2(int ns, int nw, int nt, double res[ns][nw][nt])
+{
+	int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
+        struct bound_task_pool * tp;
+	int s, s2, w, t;
+
+	for(s = 0; s < ns; s++)
+	{
+		int workers_to_add[nw], workers_to_remove[nw];
+		for(w = 0; w < nw; w++)
+		{
+			workers_to_add[w] = -1;
+			workers_to_remove[w] = -1;
+		}
+
+		int nadd = 0, nremove = 0;
+
+		for(w = 0; w < nw; w++)
+		{
+			int found = 0;
+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+			{
+				if(tp->sched_ctx_id == sched_ctxs[s])
+				{
+					if(res[s][w][t] >= 1.0)
+					{
+						workers_to_add[nadd++] = w;
+						found = 1;
+						break;
+					}
+				}
+			}
+			if(!found)
+				workers_to_remove[nremove++] = w;
+		}
+
+		
+		unsigned nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], STARPU_ALL);
+		if(nadd != nworkers_ctx)
+		{
+			printf("%d: add %d \n", sched_ctxs[s], nadd);
+			printf("%d: remove %d \n", sched_ctxs[s], nremove);
+			sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, nadd, sched_ctxs[s]);
+			sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s]);
+
+			struct policy_config *new_config = sched_ctx_hypervisor_get_config(sched_ctxs[s]);
+			int i;
+			for(i = 0; i < nadd; i++)
+				new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] :  new_config->new_workers_max_idle;
+		}
+	}
+}
+
+void lp2_handle_poped_task(unsigned sched_ctx, int worker)
+{
+	if(_velocity_gap_btw_ctxs())
+	{
+		int ns = sched_ctx_hypervisor_get_nsched_ctxs();
+		int nw = starpu_worker_get_count(); /* Number of different workers */
+		int nt = 0; /* Number of different kinds of tasks */
+		struct bound_task_pool * tp;
+		for (tp = task_pools; tp; tp = tp->next)
+			nt++;
+		
+       		double res[ns][nw][nt];
+
+		int ret = pthread_mutex_trylock(&act_hypervisor_mutex);
+		if(ret != EBUSY)
+		{
+			_glp_resolve(ns, nw, nt, res, 0);
+/* 			int i, j, k; */
+/* 			for( i = 0; i < ns; i++) */
+/* 				for(j = 0; j < nw; j++) */
+/* 					for(k = 0; k < nt; k++) */
+/* 					{ */
+/* 						printf("ctx %d/worker %d/task type %d: res = %lf \n", i, j, k, res[i][j][k]); */
+/* 					} */
+		
+			_redistribute_resources_in_ctxs2(ns, nw, nt, res);
+			pthread_mutex_unlock(&act_hypervisor_mutex);
+		}
+	}		
+}
+
+struct hypervisor_policy lp2_policy = {
+	.handle_poped_task = lp2_handle_poped_task,
+	.handle_pushed_task = NULL,
+	.handle_idle_cycle = NULL,
+	.handle_idle_end = NULL,
+	.handle_post_exec_hook = NULL,
+	.handle_submitted_job = lp2_handle_submitted_job,
+	.custom = 0,
+	.name = "lp2"
+};
+	
+#endif /* HAVE_GLPK_H */
+

+ 201 - 36
sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c

@@ -22,14 +22,13 @@
  */
  */
 #ifdef HAVE_GLPK_H
 #ifdef HAVE_GLPK_H
 #include <glpk.h>
 #include <glpk.h>
-static void _glp_resolve(int ns, int nw, double v[ns][nw], double flops[ns])
+static void _glp_resolve(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw])
 {
 {
 	int s, w;
 	int s, w;
 	glp_prob *lp;
 	glp_prob *lp;
 	
 	
 	int ne =
 	int ne =
-		ns * (nw+1)     /* worker execution time */
-		+ ns * nw
+		(ns*nw+1)*(ns+nw)
 		+ 1; /* glp dumbness */
 		+ 1; /* glp dumbness */
 	int n = 1;
 	int n = 1;
 	int ia[ne], ja[ne];
 	int ia[ne], ja[ne];
@@ -41,13 +40,9 @@ static void _glp_resolve(int ns, int nw, double v[ns][nw], double flops[ns])
 	glp_set_obj_dir(lp, GLP_MAX);
 	glp_set_obj_dir(lp, GLP_MAX);
         glp_set_obj_name(lp, "max speed");
         glp_set_obj_name(lp, "max speed");
 
 
-
-#define colnum(s, w) ((w)*ns+(s)+1)
 	/* we add nw*ns columns one for each type of worker in each context 
 	/* we add nw*ns columns one for each type of worker in each context 
 	   and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/
 	   and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/
 	glp_add_cols(lp, nw*ns+1);
 	glp_add_cols(lp, nw*ns+1);
-	/* Z = 1/tmax -> 1/vmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
-	glp_set_obj_coef(lp, nw*ns+1, 1.);
 
 
 	for(s = 0; s < ns; s++)
 	for(s = 0; s < ns; s++)
 	{
 	{
@@ -55,13 +50,19 @@ static void _glp_resolve(int ns, int nw, double v[ns][nw], double flops[ns])
 		{
 		{
 			char name[32];
 			char name[32];
 			snprintf(name, sizeof(name), "worker%dctx%d", w, s);
 			snprintf(name, sizeof(name), "worker%dctx%d", w, s);
-			glp_set_col_name(lp, colnum(s,w), name);
-			glp_set_col_bnds(lp, colnum(s,w), GLP_LO, 0.0, 0.0);
+			glp_set_col_name(lp, n, name);
+			glp_set_col_bnds(lp, n, GLP_LO, 0.3, 0.0);
+			n++;
 		}
 		}
 	}
 	}
-	glp_set_col_bnds(lp, nw*ns+1, GLP_DB, 0.0, 1.0);
 
 
+	/*1/tmax should belong to the interval [0.0;1.0]*/
+	glp_set_col_name(lp, n, "vmax");
+	glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
+	/* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
+	glp_set_obj_coef(lp, n, 1.0);
 
 
+	n = 1;
 	/* one row corresponds to one ctx*/
 	/* one row corresponds to one ctx*/
 	glp_add_rows(lp, ns);
 	glp_add_rows(lp, ns);
 
 
@@ -71,23 +72,38 @@ static void _glp_resolve(int ns, int nw, double v[ns][nw], double flops[ns])
 		snprintf(name, sizeof(name), "ctx%d", s);
 		snprintf(name, sizeof(name), "ctx%d", s);
 		glp_set_row_name(lp, s+1, name);
 		glp_set_row_name(lp, s+1, name);
 		glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.);
 		glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.);
+
 		for(w = 0; w < nw; w++)
 		for(w = 0; w < nw; w++)
 		{
 		{
-			ia[n] = s+1;
-			ja[n] = colnum(s, w);
-			ar[n] = v[s][w];
-			printf("v[%d][%d] = %lf\n", s, w, v[s][w]);
-			n++;
+			int s2;
+			for(s2 = 0; s2 < ns; s2++)
+			{
+				if(s2 == s)
+				{
+					ia[n] = s+1;
+					ja[n] = w + nw*s2 + 1;
+					ar[n] = v[s][w];
+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
+				}
+				else
+				{
+					ia[n] = s+1;
+					ja[n] = w + nw*s2 + 1;
+					ar[n] = 0.0;
+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
+				}
+				n++;
+			}
 		}
 		}
 		/* 1/tmax */
 		/* 1/tmax */
 		ia[n] = s+1;
 		ia[n] = s+1;
 		ja[n] = ns*nw+1;
 		ja[n] = ns*nw+1;
 		ar[n] = (-1) * flops[s];
 		ar[n] = (-1) * flops[s];
-		printf("%d: flops %lf\n", s, flops[s]);
+//		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
 		n++;
 		n++;
 	}
 	}
 	
 	
-	/*we add another linear constraint : sum(all cpus) = 3 and sum(all gpus) = 9 */
+	/*we add another linear constraint : sum(all cpus) = 9 and sum(all gpus) = 3 */
 	glp_add_rows(lp, nw);
 	glp_add_rows(lp, nw);
 
 
 	for(w = 0; w < nw; w++)
 	for(w = 0; w < nw; w++)
@@ -97,36 +113,64 @@ static void _glp_resolve(int ns, int nw, double v[ns][nw], double flops[ns])
 		glp_set_row_name(lp, ns+w+1, name);
 		glp_set_row_name(lp, ns+w+1, name);
 		for(s = 0; s < ns; s++)
 		for(s = 0; s < ns; s++)
 		{
 		{
-			ia[n] = ns+w+1;
-			ja[n] = colnum(s,w);
-			ar[n] = 1;
-			n++;
+			int w2;
+			for(w2 = 0; w2 < nw; w2++)
+			{
+				if(w2 == w)
+				{
+					ia[n] = ns+w+1;
+					ja[n] = w2+s*nw + 1;
+					ar[n] = 1.0;
+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
+				}
+				else
+				{
+					ia[n] = ns+w+1;
+					ja[n] = w2+s*nw + 1;
+					ar[n] = 0.0;
+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
+				}
+				n++;
+			}
 		}
 		}
-		if(w == 0) glp_set_row_bnds(lp, ns+w+1, GLP_FX, 3., 3.);
-		if(w == 1) glp_set_row_bnds(lp, ns+w+1, GLP_FX, 9., 9.);
-	}
+		/* 1/tmax */
+		ia[n] = ns+w+1;
+		ja[n] = ns*nw+1;
+		ar[n] = 0.0;
+//		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
+		n++;
+
+		/*sum(all gpus) = 3*/
+		if(w == 0)
+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, 3., 3.);
 
 
+		/*sum(all cpus) = 9*/
+		if(w == 1) 
+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, 9., 9.);
+	}
 
 
 	STARPU_ASSERT(n == ne);
 	STARPU_ASSERT(n == ne);
 
 
 	glp_load_matrix(lp, ne-1, ia, ja, ar);
 	glp_load_matrix(lp, ne-1, ia, ja, ar);
 
 
-	glp_simplex(lp, NULL);
+	glp_smcp parm;
+	glp_init_smcp(&parm);
+	parm.msg_lev = GLP_MSG_OFF;
+	glp_simplex(lp, &parm);
+//	glp_simplex(lp, NULL);
+	
 	double vmax1 = glp_get_obj_val(lp);
 	double vmax1 = glp_get_obj_val(lp);
-	printf("vmax1 = %lf \n", vmax1);
-	double res[ne];
+//	printf("vmax1 = %lf \n", vmax1);
+
 	n = 1;
 	n = 1;
-	for(w = 0; w < nw; w++)
+	for(s = 0; s < ns; s++)
 	{
 	{
-		for(s = 0; s < ns; s++)
+		for(w = 0; w < nw; w++)
 		{
 		{
-			res[n] = glp_get_col_prim(lp, colnum(s,w));
-			printf("ctx %d/worker type %d: n = %lf \n", s, w, res[n]);
+			res[s][w] = glp_get_col_prim(lp, n);
 			n++;
 			n++;
 		}
 		}
 	}
 	}
-//	res[n] = glp_get_col_prim(lp, ns*nw+1);
-//	printf("vmax = %lf \n", res[n]);
 
 
 	glp_delete_prob(lp);
 	glp_delete_prob(lp);
 	return;
 	return;
@@ -152,7 +196,7 @@ int _velocity_gap_btw_ctxs()
 				other_sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[j]);
 				other_sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[j]);
 				double other_ctx_v = _get_ctx_velocity(other_sc_w);
 				double other_ctx_v = _get_ctx_velocity(other_sc_w);
 				double gap = ctx_v < other_ctx_v ? ctx_v / other_ctx_v : other_ctx_v / ctx_v;
 				double gap = ctx_v < other_ctx_v ? ctx_v / other_ctx_v : other_ctx_v / ctx_v;
-				if(gap > 0.5)
+				if(gap > 0.7)
 					return 1;
 					return 1;
 			}
 			}
 		}
 		}
@@ -161,15 +205,120 @@ int _velocity_gap_btw_ctxs()
 	return 0;
 	return 0;
 }
 }
 
 
+void _round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw])
+{
+	int s, w;
+	double left_res[nw];
+	for(w = 0; w < nw; w++)
+		left_res[nw] = 0.0;
+	for(s = 0; s < ns; s++)
+	{
+		for(w = 0; w < nw; w++)
+		{
+			int x = floor(res[s][w]);
+			double x_double = (double)x;
+			double diff = res[s][w] - x_double;
+			
+			if(diff != 0.0)
+			{
+				if(diff > 0.5)
+				{
+					if(left_res[w] != 0.0)
+					{
+						if((diff + left_res[w]) > 0.5)
+						{
+							res_rounded[s][w] = x + 1;
+							left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
+						}
+						else
+						{
+							res_rounded[s][w] = x;
+							left_res[w] = (-1.0) * (diff + left_res[w]);
+						}
+					}
+					else
+					{
+						res_rounded[s][w] = x + 1;
+						left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]);
+					}
+
+				}
+				else
+				{
+					if((diff + left_res[w]) > 0.5)
+					{
+						res_rounded[s][w] = x + 1;
+						left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
+					}
+					else
+					{
+						res_rounded[s][w] = x;
+						left_res[w] = diff;
+					}
+				}
+			}
+		}
+	}		
+}
+
+void _redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw])
+{
+	int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
+	int s, s2, w;
+	for(s = 0; s < ns; s++)
+	{
+		for(w = 0; w < nw; w++)
+		{
+			enum starpu_archtype arch;
+			if(w == 0) arch = STARPU_CUDA_WORKER;
+			if(w == 1) arch = STARPU_CPU_WORKER;
+
+			unsigned nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], arch);
+			if(nworkers_ctx > res_rounded[s][w])
+			{
+				int nworkers_to_move = nworkers_ctx - res_rounded[s][w];
+				int receiving_s = -1;
+				
+				for(s2 = 0; s2 < ns; s2++)
+				{
+					if(sched_ctxs[s2] != sched_ctxs[s])
+					{
+						int nworkers_ctx2 = get_nworkers_ctx(sched_ctxs[s2], arch);
+						if((res_rounded[s2][w] - nworkers_ctx2) == nworkers_to_move)
+						{
+							receiving_s = sched_ctxs[s2];
+							break;
+						}
+					}
+				}
+				if(receiving_s != -1)
+				{
+					int *workers_to_move = _get_first_workers(sched_ctxs[s], &nworkers_to_move, arch);
+					sched_ctx_hypervisor_move_workers(sched_ctxs[s], receiving_s, workers_to_move, nworkers_to_move);
+					struct policy_config *new_config = sched_ctx_hypervisor_get_config(receiving_s);
+					int i;
+					for(i = 0; i < nworkers_to_move; i++)
+						new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] :  new_config->new_workers_max_idle;
+
+					free(workers_to_move);
+				}
+			}
+		}
+	}
+}
+
 void lp_handle_poped_task(unsigned sched_ctx, int worker)
 void lp_handle_poped_task(unsigned sched_ctx, int worker)
 {
 {
 	if(_velocity_gap_btw_ctxs())
 	if(_velocity_gap_btw_ctxs())
 	{
 	{
 		int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
 		int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
 		int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
 		int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
+//		int nsched_ctxs = 3;
 		
 		
 		double v[nsched_ctxs][2];
 		double v[nsched_ctxs][2];
 		double flops[nsched_ctxs];
 		double flops[nsched_ctxs];
+		double res[nsched_ctxs][2];
+
 		int i = 0;
 		int i = 0;
 		struct sched_ctx_wrapper* sc_w;
 		struct sched_ctx_wrapper* sc_w;
 		for(i = 0; i < nsched_ctxs; i++)
 		for(i = 0; i < nsched_ctxs; i++)
@@ -177,13 +326,28 @@ void lp_handle_poped_task(unsigned sched_ctx, int worker)
 			sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
 			sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
 			v[i][0] = 200.0;//_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
 			v[i][0] = 200.0;//_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
 			v[i][1] = 20.0;//_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
 			v[i][1] = 20.0;//_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
-			flops[i] = sc_w->total_flops;
+			flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
 		}
 		}
                 
                 
 		int ret = pthread_mutex_trylock(&act_hypervisor_mutex);
 		int ret = pthread_mutex_trylock(&act_hypervisor_mutex);
 		if(ret != EBUSY)
 		if(ret != EBUSY)
 		{
 		{
-			_glp_resolve(nsched_ctxs, 2, v, flops);
+			_glp_resolve(nsched_ctxs, 2, v, flops, res);
+/* 			for( i = 0; i < nsched_ctxs; i++) */
+/* 			{ */
+/* 				printf("ctx %d/worker type %d: n = %lf \n", i, 0, res[i][0]); */
+/* 				printf("ctx %d/worker type %d: n = %lf \n", i, 1, res[i][1]); */
+/* 			} */
+			int res_rounded[nsched_ctxs][2];
+			_round_double_to_int(nsched_ctxs, 2, res, res_rounded);
+/* 			for( i = 0; i < nsched_ctxs; i++) */
+/* 			{ */
+/* 				printf("ctx %d/worker type %d: n = %d \n", i, 0, res_rounded[i][0]); */
+/* 				printf("ctx %d/worker type %d: n = %d \n", i, 1, res_rounded[i][1]); */
+/* 			} */
+			
+			_redistribute_resources_in_ctxs(nsched_ctxs, 2, res_rounded);
+			
 			pthread_mutex_unlock(&act_hypervisor_mutex);
 			pthread_mutex_unlock(&act_hypervisor_mutex);
 		}
 		}
 	}		
 	}		
@@ -195,6 +359,7 @@ struct hypervisor_policy lp_policy = {
 	.handle_idle_cycle = NULL,
 	.handle_idle_cycle = NULL,
 	.handle_idle_end = NULL,
 	.handle_idle_end = NULL,
 	.handle_post_exec_hook = NULL,
 	.handle_post_exec_hook = NULL,
+	.handle_submitted_job = NULL,
 	.custom = 0,
 	.custom = 0,
 	.name = "lp"
 	.name = "lp"
 };
 };

+ 2 - 1
sched_ctx_hypervisor/src/hypervisor_policies/policy_utils.c

@@ -1,7 +1,7 @@
 #include <sched_ctx_hypervisor.h>
 #include <sched_ctx_hypervisor.h>
 #include <pthread.h>
 #include <pthread.h>
 
 
-enum starpu_archtype STARPU_ALL;
+//enum starpu_archtype STARPU_ALL;
 
 
 static int _compute_priority(unsigned sched_ctx)
 static int _compute_priority(unsigned sched_ctx)
 {
 {
@@ -300,6 +300,7 @@ double _get_ctx_velocity(struct sched_ctx_wrapper* sc_w)
                 double elapsed_time = curr_time - sc_w->start_time;
                 double elapsed_time = curr_time - sc_w->start_time;
                 return elapsed_flops/elapsed_time;
                 return elapsed_flops/elapsed_time;
         }
         }
+	return 0.0;
 }
 }
 
 
 /* compute an average value of the cpu velocity */
 /* compute an average value of the cpu velocity */

+ 98 - 2
sched_ctx_hypervisor/src/sched_ctx_hypervisor.c

@@ -24,13 +24,14 @@ static void notify_pushed_task(unsigned sched_ctx, int worker);
 static void notify_poped_task(unsigned sched_ctx, int worker, double flops);
 static void notify_poped_task(unsigned sched_ctx, int worker, double flops);
 static void notify_post_exec_hook(unsigned sched_ctx, int taskid);
 static void notify_post_exec_hook(unsigned sched_ctx, int taskid);
 static void notify_idle_end(unsigned sched_ctx, int  worker);
 static void notify_idle_end(unsigned sched_ctx, int  worker);
-
+static void notify_submitted_job(struct starpu_task *task, unsigned footprint);
 
 
 extern struct hypervisor_policy idle_policy;
 extern struct hypervisor_policy idle_policy;
 extern struct hypervisor_policy app_driven_policy;
 extern struct hypervisor_policy app_driven_policy;
 extern struct hypervisor_policy gflops_rate_policy;
 extern struct hypervisor_policy gflops_rate_policy;
 #ifdef HAVE_GLPK_H
 #ifdef HAVE_GLPK_H
 extern struct hypervisor_policy lp_policy;
 extern struct hypervisor_policy lp_policy;
+extern struct hypervisor_policy lp2_policy;
 #endif
 #endif
 
 
 static struct hypervisor_policy *predefined_policies[] = {
 static struct hypervisor_policy *predefined_policies[] = {
@@ -38,6 +39,7 @@ static struct hypervisor_policy *predefined_policies[] = {
 	&app_driven_policy,
 	&app_driven_policy,
 #ifdef HAVE_GLPK_H
 #ifdef HAVE_GLPK_H
 	&lp_policy,
 	&lp_policy,
+	&lp2_policy,
 #endif
 #endif
 	&gflops_rate_policy
 	&gflops_rate_policy
 };
 };
@@ -58,6 +60,7 @@ static void _load_hypervisor_policy(struct hypervisor_policy *policy)
 	hypervisor.policy.handle_idle_cycle = policy->handle_idle_cycle;
 	hypervisor.policy.handle_idle_cycle = policy->handle_idle_cycle;
 	hypervisor.policy.handle_idle_end = policy->handle_idle_end;
 	hypervisor.policy.handle_idle_end = policy->handle_idle_end;
 	hypervisor.policy.handle_post_exec_hook = policy->handle_post_exec_hook;
 	hypervisor.policy.handle_post_exec_hook = policy->handle_post_exec_hook;
+	hypervisor.policy.handle_submitted_job = policy->handle_submitted_job;
 }
 }
 
 
 
 
@@ -162,6 +165,7 @@ struct starpu_performance_counters* sched_ctx_hypervisor_init(struct hypervisor_
 	perf_counters->notify_poped_task = notify_poped_task;
 	perf_counters->notify_poped_task = notify_poped_task;
 	perf_counters->notify_post_exec_hook = notify_post_exec_hook;
 	perf_counters->notify_post_exec_hook = notify_post_exec_hook;
 	perf_counters->notify_idle_end = notify_idle_end;
 	perf_counters->notify_idle_end = notify_idle_end;
+	perf_counters->notify_submitted_job = notify_submitted_job;
 
 
 	starpu_notify_hypervisor_exists();
 	starpu_notify_hypervisor_exists();
 
 
@@ -307,10 +311,30 @@ static void _get_cpus(int *workers, int nworkers, int *cpus, int *ncpus)
 	}
 	}
 }
 }
 
 
+int get_nworkers_ctx(unsigned sched_ctx, enum starpu_archtype arch)
+{
+	int nworkers_ctx = 0;
+	struct worker_collection *workers = starpu_get_worker_collection_of_sched_ctx(sched_ctx);
+	int worker;
+
+	if(workers->init_cursor)
+		workers->init_cursor(workers);
+
+	while(workers->has_next(workers))
+	{
+		worker = workers->get_next(workers);
+		enum starpu_archtype curr_arch = starpu_worker_get_type(worker);
+		if( curr_arch == arch)
+			nworkers_ctx++;
+	}
+	return nworkers_ctx;
+}
+
 /* actually move the workers: the cpus are moved, gpus are only shared  */
 /* actually move the workers: the cpus are moved, gpus are only shared  */
 /* forbids another resize request before this one is take into account */
 /* forbids another resize request before this one is take into account */
 void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int* workers_to_move, unsigned nworkers_to_move)
 void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int* workers_to_move, unsigned nworkers_to_move)
 {
 {
+	printf("nworkers to move %d resize_sender %d resize_receiver %d\n", nworkers_to_move,  hypervisor.resize[sender_sched_ctx], hypervisor.resize[receiver_sched_ctx]);
 	if(nworkers_to_move > 0 && hypervisor.resize[sender_sched_ctx] && hypervisor.resize[receiver_sched_ctx])
 	if(nworkers_to_move > 0 && hypervisor.resize[sender_sched_ctx] && hypervisor.resize[receiver_sched_ctx])
 	{
 	{
 		int j;
 		int j;
@@ -324,7 +348,7 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 
 
 		_get_cpus(workers_to_move, nworkers_to_move, cpus, &ncpus);
 		_get_cpus(workers_to_move, nworkers_to_move, cpus, &ncpus);
 
 
-		if(ncpus != 0)
+//		if(ncpus != 0)
 			starpu_remove_workers_from_sched_ctx(cpus, ncpus, sender_sched_ctx);
 			starpu_remove_workers_from_sched_ctx(cpus, ncpus, sender_sched_ctx);
 
 
 		starpu_add_workers_to_sched_ctx(workers_to_move, nworkers_to_move, receiver_sched_ctx);
 		starpu_add_workers_to_sched_ctx(workers_to_move, nworkers_to_move, receiver_sched_ctx);
@@ -350,6 +374,70 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 	return;
 	return;
 }
 }
 
 
+void sched_ctx_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx)
+{
+	if(nworkers_to_add > 0 && hypervisor.resize[sched_ctx])
+	{
+		int j;
+		printf("resize ctx %d with", sched_ctx);
+		for(j = 0; j < nworkers_to_add; j++)
+			printf(" %d", workers_to_add[j]);
+		printf("\n");
+
+		starpu_add_workers_to_sched_ctx(workers_to_add, nworkers_to_add, sched_ctx);
+
+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = sched_ctx;
+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_add * sizeof(int));
+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = nworkers_to_add;
+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_add * sizeof(int));
+
+
+		int i;
+		for(i = 0; i < nworkers_to_add; i++)
+		{
+			hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers_to_add[i]] = 0.0;
+			hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers_to_add[i];	
+			hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;	
+		}
+
+		hypervisor.resize[sched_ctx] = 0;
+	}
+
+	return;
+}
+
+void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx)
+{
+	if(nworkers_to_remove > 0 && hypervisor.resize[sched_ctx])
+	{
+		int j;
+		printf("resize ctx %d with", sched_ctx);
+		for(j = 0; j < nworkers_to_remove; j++)
+			printf(" %d", workers_to_remove[j]);
+		printf("\n");
+
+		starpu_remove_workers_from_sched_ctx(workers_to_remove, nworkers_to_remove, sched_ctx);
+
+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = sched_ctx;
+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = nworkers_to_remove;
+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
+
+
+		int i;
+		for(i = 0; i < nworkers_to_remove; i++)
+		{
+			hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers_to_remove[i]] = 0.0;
+			hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers_to_remove[i];	
+			hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;	
+		}
+
+		hypervisor.resize[sched_ctx] = 0;
+	}
+
+	return;
+}
+
 static void _set_elapsed_flops_per_sched_ctx(unsigned sched_ctx, double val)
 static void _set_elapsed_flops_per_sched_ctx(unsigned sched_ctx, double val)
 {
 {
 	int i;
 	int i;
@@ -514,6 +602,7 @@ static void notify_poped_task(unsigned sched_ctx, int worker, double elapsed_flo
 	hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++;
 	hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++;
 	hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += elapsed_flops;
 	hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += elapsed_flops;
 	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += elapsed_flops;
 	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += elapsed_flops;
+	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(&hypervisor.sched_ctx_w[sched_ctx]);
 
 
 	if(hypervisor.nsched_ctxs > 1)
 	if(hypervisor.nsched_ctxs > 1)
 	{
 	{
@@ -561,6 +650,13 @@ static void notify_post_exec_hook(unsigned sched_ctx, int task_tag)
 		}
 		}
 	}
 	}
 }
 }
+
+static void notify_submitted_job(struct starpu_task *task, unsigned footprint)
+{
+	if(hypervisor.policy.handle_submitted_job)
+		hypervisor.policy.handle_submitted_job(task, footprint);
+}
+
 struct sched_ctx_wrapper* sched_ctx_hypervisor_get_wrapper(unsigned sched_ctx)
 struct sched_ctx_wrapper* sched_ctx_hypervisor_get_wrapper(unsigned sched_ctx)
 {
 {
 	return &hypervisor.sched_ctx_w[sched_ctx];
 	return &hypervisor.sched_ctx_w[sched_ctx];

+ 10 - 0
src/core/perfmodel/perfmodel_history.c

@@ -1026,6 +1026,16 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 	return exp;
 	return exp;
 }
 }
 
 
+double starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned footprint)
+{
+	struct _starpu_job j =
+		{
+			.footprint = footprint,
+			.footprint_is_computed = 1,
+		};
+	return _starpu_history_based_job_expected_perf(model, arch, &j, j.nimpl);
+}
+
 void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned nimpl)
 void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned nimpl)
 {
 {
 	if (model)
 	if (model)

+ 11 - 0
src/core/task.c

@@ -219,6 +219,17 @@ int _starpu_submit_job(struct _starpu_job *j)
 
 
 	_starpu_increment_nsubmitted_tasks();
 	_starpu_increment_nsubmitted_tasks();
 	_starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
 	_starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
+	
+#ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx);
+	if(sched_ctx != NULL && j->task->sched_ctx != 0 && j->task->sched_ctx != STARPU_NMAX_SCHED_CTXS
+	   && sched_ctx->perf_counters != NULL)
+	{
+		_starpu_compute_buffers_footprint(NULL, STARPU_CPU_DEFAULT, 0, j);
+		sched_ctx->perf_counters->notify_submitted_job(j->task, j->footprint);
+	}
+#endif
+
 	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
 	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
 
 
 	/* Need to atomically set submitted to 1 and check dependencies, since
 	/* Need to atomically set submitted to 1 and check dependencies, since