hace 12 años · ac4f56a48c
--- a/include/starpu_sched_ctx.h
+++ b/include/starpu_sched_ctx.h
@@ -67,6 +67,7 @@ struct starpu_sched_ctx_performance_counters
 
				 	void (*notify_poped_task)(unsigned sched_ctx_id, int worker, double flops, size_t data_size);
			
 
				 	void (*notify_post_exec_hook)(unsigned sched_ctx_id, int taskid);
			
 
				 	void (*notify_submitted_job)(struct starpu_task *task, uint32_t footprint);
			
 
				+	void (*notify_delete_context)(unsigned sched_ctx);
			
 
				 };
			
 
				 
			
 
				 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
			
--- a/mpi/src/starpu_mpi.c
+++ b/mpi/src/starpu_mpi.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2012  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -69,7 +69,6 @@ static int posted_requests = 0, newer_requests, barrier_running = 0;
 
				 /********************************************************/
			
 
				 
			
 
				 static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
			
 
				-							      size_t size,
			
 
				 							      int srcdst, int mpi_tag, MPI_Comm comm,
			
 
				 							      unsigned detached, void (*callback)(void *), void *arg,
			
 
				 							      enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
			
@@ -91,7 +90,6 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle
 
				 	req->request_type = request_type;
			
 
				 
			
 
				 	req->data_handle = data_handle;
			
 
				-	req->count = size;
			
 
				 	req->srcdst = srcdst;
			
 
				 	req->mpi_tag = mpi_tag;
			
 
				 	req->comm = comm;
			
@@ -145,7 +143,13 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 }
			
 
				 
			
 
				-static void _starpu_mpi_isend_pack_func(struct _starpu_mpi_req *req)
			
 
				+static void _starpu_mpi_isend_size_callback(void *arg)
			
 
				+{
			
 
				+	struct _starpu_mpi_req *req = (struct _starpu_mpi_req *) arg;
			
 
				+	_starpu_mpi_isend_data_func(req);
			
 
				+}
			
 
				+
			
 
				+static void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
			
 
				 {
			
 
				 	_starpu_mpi_handle_allocate_datatype(req->data_handle, &req->datatype, &req->user_datatype);
			
 
				 	if (req->user_datatype == 0)
			
@@ -156,8 +160,12 @@ static void _starpu_mpi_isend_pack_func(struct _starpu_mpi_req *req)
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				+		starpu_data_handle_t count_handle;
			
 
				+
			
 
				 		starpu_handle_pack_data(req->data_handle, &req->ptr, &req->count);
			
 
				-		_starpu_mpi_isend_data_func(req);
			
 
				+		starpu_variable_data_register(&count_handle, 0, (uintptr_t)&req->count, sizeof(req->count));
			
 
				+		_starpu_mpi_isend_common(count_handle, req->srcdst, req->mpi_tag, req->comm, 1, _starpu_mpi_isend_size_callback, req);
			
 
				+		starpu_data_unregister_submit(count_handle);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -165,20 +173,7 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t dat
 
				 							int dest, int mpi_tag, MPI_Comm comm,
			
 
				 							unsigned detached, void (*callback)(void *), void *arg)
			
 
				 {
			
 
				-	enum starpu_data_interface_id id = starpu_handle_get_interface_id(data_handle);
			
 
				-	size_t size;
			
 
				-
			
 
				-	size = starpu_handle_get_size(data_handle);
			
 
				-
			
 
				-	if (id >= STARPU_MAX_INTERFACE_ID)
			
 
				-	{
			
 
				-		starpu_data_handle_t size_handle;
			
 
				-		starpu_variable_data_register(&size_handle, 0, (uintptr_t)&(size), sizeof(size));
			
 
				-		starpu_mpi_send(size_handle, dest, mpi_tag, comm);
			
 
				-		starpu_data_unregister(size_handle);
			
 
				-	}
			
 
				-
			
 
				-	return _starpu_mpi_isend_irecv_common(data_handle, size, dest, mpi_tag, comm, detached, callback, arg, SEND_REQ, _starpu_mpi_isend_pack_func, STARPU_R);
			
 
				+	return _starpu_mpi_isend_irecv_common(data_handle, dest, mpi_tag, comm, detached, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func, STARPU_R);
			
 
				 }
			
 
				 
			
 
				 int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int mpi_tag, MPI_Comm comm)
			
@@ -253,7 +248,27 @@ static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 }
			
 
				 
			
 
				-static void _starpu_mpi_irecv_pack_func(struct _starpu_mpi_req *req)
			
 
				+struct _starpu_mpi_irecv_size_callback
			
 
				+{
			
 
				+	starpu_data_handle_t handle;
			
 
				+	struct _starpu_mpi_req *req;
			
 
				+};
			
 
				+
			
 
				+static void _starpu_mpi_irecv_size_callback(void *arg)
			
 
				+{
			
 
				+	struct _starpu_mpi_irecv_size_callback *callback = (struct _starpu_mpi_irecv_size_callback *)arg;
			
 
				+
			
 
				+	starpu_data_unregister(callback->handle);
			
 
				+	callback->req->ptr = malloc(callback->req->count);
			
 
				+#ifdef STARPU_DEVEL
			
 
				+#warning TODO: in some cases, callback->req->count is incorrect, we need to fix that
			
 
				+#endif
			
 
				+	STARPU_ASSERT_MSG(callback->req->ptr, "cannot allocate message of size %ld\n", callback->req->count);
			
 
				+	_starpu_mpi_irecv_data_func(callback->req);
			
 
				+	free(callback);
			
 
				+}
			
 
				+
			
 
				+static void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
			
 
				 {
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				 
			
@@ -266,27 +281,16 @@ static void _starpu_mpi_irecv_pack_func(struct _starpu_mpi_req *req)
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		req->ptr = malloc(req->count);
			
 
				-		_starpu_mpi_irecv_data_func(req);
			
 
				+		struct _starpu_mpi_irecv_size_callback *callback = malloc(sizeof(struct _starpu_mpi_irecv_size_callback));
			
 
				+		callback->req = req;
			
 
				+		starpu_variable_data_register(&callback->handle, 0, (uintptr_t)&(callback->req->count), sizeof(callback->req->count));
			
 
				+		_starpu_mpi_irecv_common(callback->handle, req->srcdst, req->mpi_tag, req->comm, 1, _starpu_mpi_irecv_size_callback, callback);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg)
			
 
				 {
			
 
				-	enum starpu_data_interface_id id = starpu_handle_get_interface_id(data_handle);
			
 
				-	size_t size=0;
			
 
				-
			
 
				-	if (id >= STARPU_MAX_INTERFACE_ID)
			
 
				-	{
			
 
				-		starpu_data_handle_t size_handle;
			
 
				-		MPI_Status status;
			
 
				-
			
 
				-		starpu_variable_data_register(&size_handle, 0, (uintptr_t)&(size), sizeof(size));
			
 
				-		starpu_mpi_recv(size_handle, source, mpi_tag, comm, &status);
			
 
				-		starpu_data_unregister(size_handle);
			
 
				-	}
			
 
				-
			
 
				-	return _starpu_mpi_isend_irecv_common(data_handle, size, source, mpi_tag, comm, detached, callback, arg, RECV_REQ, _starpu_mpi_irecv_pack_func, STARPU_W);
			
 
				+	return _starpu_mpi_isend_irecv_common(data_handle, source, mpi_tag, comm, detached, callback, arg, RECV_REQ, _starpu_mpi_irecv_size_func, STARPU_W);
			
 
				 }
			
 
				 
			
 
				 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int mpi_tag, MPI_Comm comm)
			
@@ -351,13 +355,8 @@ static void _starpu_mpi_probe_func(struct _starpu_mpi_req *req)
 
				 
			
 
				 int starpu_mpi_irecv_probe_detached(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
			
 
				 {
			
 
				-	size_t size;
			
 
				-
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				-
			
 
				-	size = starpu_handle_get_size(data_handle);
			
 
				-	_starpu_mpi_isend_irecv_common(data_handle, size, source, mpi_tag, comm, 1, callback, arg, PROBE_REQ, _starpu_mpi_probe_func, STARPU_W);
			
 
				-
			
 
				+	_starpu_mpi_isend_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, PROBE_REQ, _starpu_mpi_probe_func, STARPU_W);
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 	return 0;
			
 
				 }
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/debit_lp_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/debit_lp_policy.c
@@ -48,8 +48,6 @@ static unsigned _compute_max_velocity(int ns, int nw, double w_in_s[ns][nw], int
 
				 				if(velocity[s][w] < 1.0)
			
 
				 					velocity[s][w] = arch == STARPU_CPU_WORKER ? 5.0 : 100.0;
			
 
				 			}
			
 
				-			
			
 
				-//			printf("v[w%d][s%d] = %lf\n",w, s, velocity[s][w]);
			
 
				 		}
			
 
				 	}
			
 
				 	
			
@@ -66,12 +64,8 @@ static unsigned _compute_max_velocity(int ns, int nw, double w_in_s[ns][nw], int
 
				 
			
 
				 	float timing = (float)(diff_s*1000000 + diff_us)/1000;
			
 
				 
			
 
				-//        fprintf(stdout, "nd = %d total time: %f ms \n", nd, timing);
			
 
				 	if(res > 0.0)
			
 
				-	{
			
 
				-//		printf("maxv = %lf\n", res);
			
 
				 		return 1;
			
 
				-	}
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -85,7 +79,6 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double w_in_
 
				 	int w, s;
			
 
				 	glp_prob *lp;
			
 
				 
			
 
				-//	printf("try with tmax %lf\n", tmax);
			
 
				 	lp = glp_create_prob();
			
 
				 	glp_set_prob_name(lp, "StarPU theoretical bound");
			
 
				 	glp_set_obj_dir(lp, GLP_MAX);
			
@@ -202,6 +195,14 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double w_in_
 
				                 glp_init_iocp(&iocp);
			
 
				                 iocp.msg_lev = GLP_MSG_OFF;
			
 
				                 glp_intopt(lp, &iocp);
			
 
				+		int stat = glp_mip_status(lp);
			
 
				+		/* if we don't have a solution return */
			
 
				+		if(stat == GLP_NOFEAS)
			
 
				+		{
			
 
				+			glp_delete_prob(lp);
			
 
				+			lp = NULL;
			
 
				+			return 0.0;
			
 
				+		}
			
 
				         }
			
 
				 
			
 
				 	int stat = glp_get_prim_stat(lp);
			
@@ -223,7 +224,6 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double w_in_
 
				 				w_in_s[s][w] = (double)glp_mip_col_val(lp, s*nw+w+1);
			
 
				 			else
			
 
				 				w_in_s[s][w] = glp_get_col_prim(lp, s*nw+w+1);
			
 
				-//			printf("w_in_s[s%d][w%d] = %lf \n", s, w, w_in_s[s][w]);
			
 
				 		}
			
 
				 
			
 
				 	glp_delete_prob(lp);
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
@@ -311,6 +311,14 @@ static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops
 
				                 glp_init_iocp(&iocp);
			
 
				                 iocp.msg_lev = GLP_MSG_OFF;
			
 
				                 glp_intopt(lp, &iocp);
			
 
				+		int stat = glp_mip_status(lp);
			
 
				+		/* if we don't have a solution return */
			
 
				+		if(stat == GLP_NOFEAS)
			
 
				+		{
			
 
				+			glp_delete_prob(lp);
			
 
				+			lp = NULL;
			
 
				+			return 0.0;
			
 
				+		}
			
 
				         }
			
 
				 
			
 
				 	int stat = glp_get_prim_stat(lp);
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c
@@ -46,7 +46,7 @@ static unsigned _compute_task_distribution_over_ctxs(int ns, int nw, int nt, dou
 
				 	/* smallest possible tmax, difficult to obtain as we
			
 
				 	   compute the nr of flops and not the tasks */
			
 
				 	double smallest_tmax = _lp_get_tmax(nw, workers);
			
 
				-	double tmax = smallest_tmax * ns;
			
 
				+	double tmax = smallest_tmax * ns * 2;
			
 
				 
			
 
				 	double res = 1.0;
			
 
				 	unsigned has_sol = 0;
			
@@ -231,11 +231,65 @@ static void _size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nwor
 
				 		nt++;
			
 
				 
			
 
				 	double w_in_s[ns][nw];
			
 
				-
			
 
				-	unsigned found_sol = _compute_task_distribution_over_ctxs(ns, nw, nt, w_in_s, NULL, sched_ctxs, workers);
			
 
				+	double tasks[nw][nt];
			
 
				+	unsigned found_sol = _compute_task_distribution_over_ctxs(ns, nw, nt, w_in_s, tasks, sched_ctxs, workers);
			
 
				 	/* if we did find at least one solution redistribute the resources */
			
 
				 	if(found_sol)
			
 
				-		_redistribute_resources_in_ctxs(ns, nw, nt, w_in_s, 1, sched_ctxs, workers);
			
 
				+	{
			
 
				+		int w, s;
			
 
				+		double nworkers[ns][2];
			
 
				+		int nworkers_rounded[ns][2];
			
 
				+		for(s = 0; s < ns; s++)
			
 
				+		{
			
 
				+			nworkers[s][0] = 0.0;
			
 
				+			nworkers[s][1] = 0.0;
			
 
				+			nworkers_rounded[s][0] = 0;
			
 
				+			nworkers_rounded[s][1] = 0;
			
 
				+			
			
 
				+		}
			
 
				+		
			
 
				+		for(s = 0; s < ns; s++)
			
 
				+		{
			
 
				+			for(w = 0; w < nw; w++)
			
 
				+			{
			
 
				+				enum starpu_perf_archtype arch = starpu_worker_get_type(w);
			
 
				+				
			
 
				+				if(arch == STARPU_CUDA_WORKER)
			
 
				+				{
			
 
				+					nworkers[s][0] += w_in_s[s][w];
			
 
				+					if(w_in_s[s][w] >= 0.3)
			
 
				+						nworkers_rounded[s][0]++;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					nworkers[s][1] += w_in_s[s][w];
			
 
				+					if(w_in_s[s][w] > 0.5)
			
 
				+						nworkers_rounded[s][1]++;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		int *current_sched_ctxs = sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : 
			
 
				+			sched_ctxs;
			
 
				+
			
 
				+		unsigned has_workers = 0;
			
 
				+		for(s = 0; s < ns; s++)
			
 
				+		{
			
 
				+			int nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(current_sched_ctxs[s], 
			
 
				+									     STARPU_ANY_WORKER);
			
 
				+			if(nworkers_ctx != 0)
			
 
				+			{
			
 
				+				has_workers = 1;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		if(has_workers)
			
 
				+			_lp_redistribute_resources_in_ctxs(nsched_ctxs, 2, nworkers_rounded, nworkers);
			
 
				+		else
			
 
				+			_lp_distribute_resources_in_ctxs(sched_ctxs, nsched_ctxs, 2, nworkers_rounded, nworkers, workers, nworkers);
			
 
				+	
			
 
				+//		_redistribute_resources_in_ctxs(ns, nw, nt, w_in_s, 1, sched_ctxs, workers);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void size_if_required()
			
@@ -480,6 +534,16 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 		return 0.0;
			
 
				 	}
			
 
				 
			
 
				+	int stat = glp_get_prim_stat(lp);
			
 
				+	/* if we don't have a solution return */
			
 
				+	if(stat == GLP_NOFEAS)
			
 
				+	{
			
 
				+		glp_delete_prob(lp);
			
 
				+		lp = NULL;
			
 
				+		return 0.0;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				 	if (integer)
			
 
				         {
			
 
				                 glp_iocp iocp;
			
@@ -494,16 +558,6 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
				 			lp = NULL;
			
 
				 			return 0.0;
			
 
				 		}
			
 
				-		
			
 
				-        }
			
 
				-
			
 
				-	int stat = glp_get_prim_stat(lp);
			
 
				-	/* if we don't have a solution return */
			
 
				-	if(stat == GLP_NOFEAS)
			
 
				-	{
			
 
				-		glp_delete_prob(lp);
			
 
				-		lp = NULL;
			
 
				-		return 0.0;
			
 
				 	}
			
 
				 
			
 
				 	double res = glp_get_obj_val(lp);
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c
@@ -68,20 +68,38 @@ static void lp_size_ctxs(int *sched_ctxs, int ns, int *workers, int nworkers)
 
				 	if(vmax != 0.0)
			
 
				 	{
			
 
				 		printf("********size\n");
			
 
				-/* 		for( i = 0; i < nsched_ctxs; i++) */
			
 
				-/* 		{ */
			
 
				-/* 			printf("ctx %d/worker type %d: n = %lf \n", i, 0, res[i][0]); */
			
 
				-/* 			printf("ctx %d/worker type %d: n = %lf \n", i, 1, res[i][1]); */
			
 
				-/* 		} */
			
 
				+		int i;
			
 
				+		for( i = 0; i < nsched_ctxs; i++)
			
 
				+		{
			
 
				+			printf("ctx %d/worker type %d: n = %lf \n", i, 0, nworkers_per_type[i][0]);
			
 
				+			printf("ctx %d/worker type %d: n = %lf \n", i, 1, nworkers_per_type[i][1]);
			
 
				+		}
			
 
				 		int nworkers_per_type_rounded[nsched_ctxs][2];
			
 
				 		_lp_round_double_to_int(nsched_ctxs, 2, nworkers_per_type, nworkers_per_type_rounded);
			
 
				-/*       		for( i = 0; i < nsched_ctxs; i++) */
			
 
				-/* 		{ */
			
 
				-/* 			printf("ctx %d/worker type %d: n = %d \n", i, 0, res_rounded[i][0]); */
			
 
				-/* 			printf("ctx %d/worker type %d: n = %d \n", i, 1, res_rounded[i][1]); */
			
 
				-/* 		} */
			
 
				+      		for( i = 0; i < nsched_ctxs; i++)
			
 
				+		{
			
 
				+			printf("ctx %d/worker type %d: n = %d \n", i, 0, nworkers_per_type_rounded[i][0]);
			
 
				+			printf("ctx %d/worker type %d: n = %d \n", i, 1, nworkers_per_type_rounded[i][1]);
			
 
				+		}
			
 
				+		int *current_sched_ctxs = sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : 
			
 
				+			sched_ctxs;
			
 
				 
			
 
				-		_lp_distribute_resources_in_ctxs(sched_ctxs, nsched_ctxs, 2, nworkers_per_type_rounded, nworkers_per_type, workers, nworkers);
			
 
				+		unsigned has_workers = 0;
			
 
				+		int s;
			
 
				+		for(s = 0; s < ns; s++)
			
 
				+		{
			
 
				+			int nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(current_sched_ctxs[s], 
			
 
				+									     STARPU_ANY_WORKER);
			
 
				+			if(nworkers_ctx != 0)
			
 
				+			{
			
 
				+				has_workers = 1;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		if(has_workers)
			
 
				+			_lp_redistribute_resources_in_ctxs(nsched_ctxs, 2, nworkers_per_type_rounded, nworkers_per_type);
			
 
				+		else
			
 
				+			_lp_distribute_resources_in_ctxs(sched_ctxs, nsched_ctxs, 2, nworkers_per_type_rounded, nworkers_per_type, workers, nworkers);
			
 
				 	}
			
 
				 	pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 }
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_tools.c
@@ -186,8 +186,18 @@ double _lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double r
 
				 	for(i = 0; i < nsched_ctxs; i++)
			
 
				 	{
			
 
				 		sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
			
 
				-		v[i][0] = 200.0;//_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
			
 
				-		v[i][1] = 20.0;//_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
			
 
				+		v[i][0] = _get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
			
 
				+		if(v[i][0] == -1.0)
			
 
				+			v[i][0] = _get_ref_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
			
 
				+		if(v[i][0] == -1.0)
			
 
				+			v[i][0] = 20.0;
			
 
				+		v[i][1] = _get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
			
 
				+
			
 
				+		if(v[i][1] == -1.0)
			
 
				+			v[i][0] = _get_ref_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
			
 
				+		if(v[i][1] == -1.0)
			
 
				+			v[i][1] = 200.0;
			
 
				+
			
 
				 		flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
			
 
				 //			printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
			
 
				 	}
			
@@ -266,6 +276,161 @@ void _lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded
 
				 	}
			
 
				 }
			
 
				 
			
 
				+void _lp_find_workers_to_give_away(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx, 
			
 
				+				  int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS], 
			
 
				+				  int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
			
 
				+				  int res_rounded[ns][nw], double res[ns][nw])
			
 
				+{
			
 
				+	int w;
			
 
				+	for(w = 0; w < nw; w++)
			
 
				+	{
			
 
				+		enum starpu_archtype arch = STARPU_ANY_WORKER;
			
 
				+		if(w == 0) arch = STARPU_CUDA_WORKER;
			
 
				+		if(w == 1) arch = STARPU_CPU_WORKER;
			
 
				+		
			
 
				+		
			
 
				+		if(w == 1)
			
 
				+		{
			
 
				+			int nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctx, arch);
			
 
				+			if(nworkers_ctx > res_rounded[sched_ctx_idx][w])
			
 
				+			{
			
 
				+				int nworkers_to_move = nworkers_ctx - res_rounded[sched_ctx_idx][w];
			
 
				+				int *workers_to_move = _get_first_workers(sched_ctx, &nworkers_to_move, arch);
			
 
				+				int i;
			
 
				+				for(i = 0; i < nworkers_to_move; i++)
			
 
				+					tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
			
 
				+				free(workers_to_move);
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			double nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctx, arch) * 1.0;
			
 
				+			if(nworkers_ctx > res[sched_ctx_idx][w])
			
 
				+			{
			
 
				+				double nworkers_to_move = nworkers_ctx - res[sched_ctx_idx][w];
			
 
				+				int x = floor(nworkers_to_move);
			
 
				+				double x_double = (double)x;
			
 
				+				double diff = nworkers_to_move - x_double;
			
 
				+				if(diff == 0.0)
			
 
				+				{
			
 
				+					int *workers_to_move = _get_first_workers(sched_ctx, &x, arch);
			
 
				+					if(x > 0)
			
 
				+					{
			
 
				+						int i;
			
 
				+						for(i = 0; i < x; i++)
			
 
				+							tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
			
 
				+						
			
 
				+					}
			
 
				+					free(workers_to_move);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					x+=1;
			
 
				+					int *workers_to_move = _get_first_workers(sched_ctx, &x, arch);
			
 
				+					if(x > 0)
			
 
				+					{
			
 
				+						int i;
			
 
				+						for(i = 0; i < x-1; i++)
			
 
				+							tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
			
 
				+						
			
 
				+						if(diff > 0.8)
			
 
				+							tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[x-1];
			
 
				+						else
			
 
				+							if(diff > 0.3)
			
 
				+								tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[x-1];
			
 
				+						
			
 
				+					}
			
 
				+					free(workers_to_move);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void _lp_find_workers_to_accept(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx, 
			
 
				+				int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS], 
			
 
				+				int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
			
 
				+				int *nw_move, int workers_move[STARPU_NMAXWORKERS], 
			
 
				+				int *nw_add, int workers_add[STARPU_NMAXWORKERS],
			
 
				+				int res_rounded[ns][nw], double res[ns][nw])
			
 
				+{
			
 
				+	int w;
			
 
				+	int j = 0, k = 0;
			
 
				+	for(w = 0; w < nw; w++)
			
 
				+	{
			
 
				+		enum starpu_archtype arch = STARPU_ANY_WORKER;
			
 
				+		if(w == 0) arch = STARPU_CUDA_WORKER;
			
 
				+		if(w == 1) arch = STARPU_CPU_WORKER;
			
 
				+		
			
 
				+		int nw_ctx2 = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctx, arch);
			
 
				+		int nw_needed = res_rounded[sched_ctx_idx][w] - nw_ctx2;
			
 
				+		
			
 
				+		if( nw_needed > 0 && tmp_nw_move[w] > 0)
			
 
				+		{
			
 
				+			*nw_move += nw_needed >= tmp_nw_move[w] ? tmp_nw_move[w] : nw_needed;
			
 
				+			int i = 0;
			
 
				+			for(i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				+			{
			
 
				+				if(tmp_workers_move[w][i] != -1)
			
 
				+				{
			
 
				+					workers_move[j++] = tmp_workers_move[w][i];
			
 
				+					tmp_workers_move[w][i] = -1;
			
 
				+					if(j == *nw_move)
			
 
				+						break;
			
 
				+				}
			
 
				+			}
			
 
				+			tmp_nw_move[w] -=  *nw_move;
			
 
				+		}
			
 
				+		
			
 
				+		
			
 
				+		double needed = res[sched_ctx_idx][w] - (nw_ctx2 * 1.0);
			
 
				+		int x = floor(needed);
			
 
				+		double x_double = (double)x;
			
 
				+		double diff = needed - x_double;
			
 
				+		if(diff > 0.3 && tmp_nw_add[w] > 0)
			
 
				+		{
			
 
				+			*nw_add = tmp_nw_add[w];
			
 
				+			int i = 0;
			
 
				+			for(i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				+			{
			
 
				+				if(tmp_workers_add[w][i] != -1)
			
 
				+				{
			
 
				+					workers_add[k++] = tmp_workers_add[w][i];
			
 
				+					tmp_workers_add[w][i] = -1;
			
 
				+					if(k == *nw_add)
			
 
				+						break;
			
 
				+				}
			
 
				+			}
			
 
				+			tmp_nw_add[w] -=  *nw_add;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void _lp_find_workers_to_remove(int nw, int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS], 
			
 
				+				int *nw_move, int workers_move[STARPU_NMAXWORKERS])
			
 
				+{
			
 
				+	int w;
			
 
				+	for(w = 0; w < nw; w++)
			
 
				+	{
			
 
				+		if(tmp_nw_move[w] > 0)
			
 
				+		{
			
 
				+			*nw_move += tmp_nw_move[w];
			
 
				+			int i = 0, j = 0;
			
 
				+			for(i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				+			{
			
 
				+				if(tmp_workers_move[w][i] != -1)
			
 
				+				{
			
 
				+					workers_move[j++] = tmp_workers_move[w][i];
			
 
				+					tmp_workers_move[w][i] = -1;
			
 
				+					if(j == *nw_move)
			
 
				+						break;
			
 
				+				}
			
 
				+			}
			
 
				+			
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw])
			
 
				 {
			
 
				 	int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
			
@@ -292,69 +457,9 @@ void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw],
 
				 		}
			
 
				 
			
 
				 		/* find workers that ctx s has to give away */
			
 
				-		for(w = 0; w < nw; w++)
			
 
				-		{
			
 
				-			enum starpu_archtype arch = STARPU_ANY_WORKER;
			
 
				-			if(w == 0) arch = STARPU_CUDA_WORKER;
			
 
				-			if(w == 1) arch = STARPU_CPU_WORKER;
			
 
				-			
			
 
				-
			
 
				-			if(w == 1)
			
 
				-			{
			
 
				-				int nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctxs[s], arch);
			
 
				-				if(nworkers_ctx > res_rounded[s][w])
			
 
				-				{
			
 
				-					int nworkers_to_move = nworkers_ctx - res_rounded[s][w];
			
 
				-					int *workers_to_move = _get_first_workers(sched_ctxs[s], &nworkers_to_move, arch);
			
 
				-					int i;
			
 
				-					for(i = 0; i < nworkers_to_move; i++)
			
 
				-						tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
			
 
				-					free(workers_to_move);
			
 
				-				}
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				double nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctxs[s], arch) * 1.0;
			
 
				-				if(nworkers_ctx > res[s][w])
			
 
				-				{
			
 
				-					double nworkers_to_move = nworkers_ctx - res[s][w];
			
 
				-					int x = floor(nworkers_to_move);
			
 
				-					double x_double = (double)x;
			
 
				-					double diff = nworkers_to_move - x_double;
			
 
				-					if(diff == 0.0)
			
 
				-					{
			
 
				-						int *workers_to_move = _get_first_workers(sched_ctxs[s], &x, arch);
			
 
				-						if(x > 0)
			
 
				-						{
			
 
				-							int i;
			
 
				-							for(i = 0; i < x; i++)
			
 
				-								tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
			
 
				-
			
 
				-						}
			
 
				-						free(workers_to_move);
			
 
				-					}
			
 
				-					else
			
 
				-					{
			
 
				-						x+=1;
			
 
				-						int *workers_to_move = _get_first_workers(sched_ctxs[s], &x, arch);
			
 
				-						if(x > 0)
			
 
				-						{
			
 
				-							int i;
			
 
				-							for(i = 0; i < x-1; i++)
			
 
				-								tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
			
 
				-
			
 
				-							if(diff > 0.8)
			
 
				-								tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[x-1];
			
 
				-							else
			
 
				-								if(diff > 0.3)
			
 
				-									tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[x-1];
			
 
				-
			
 
				-						}
			
 
				-						free(workers_to_move);
			
 
				-					}
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				+		_lp_find_workers_to_give_away(nw, ns, sched_ctxs[s], s, 
			
 
				+					      tmp_nw_move, tmp_workers_move, 
			
 
				+					      tmp_nw_add, tmp_workers_add, res_rounded, res);
			
 
				 
			
 
				 		for(s2 = 0; s2 < ns; s2++)
			
 
				 		{
			
@@ -367,58 +472,14 @@ void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw],
 
				 				
			
 
				 				int workers_add[STARPU_NMAXWORKERS];
			
 
				 				int nw_add = 0;
			
 
				+				
			
 
				 
			
 
				-				int w;
			
 
				-				int j = 0, k = 0;
			
 
				-				for(w = 0; w < nw; w++)
			
 
				-				{
			
 
				-					enum starpu_archtype arch = STARPU_ANY_WORKER;
			
 
				-					if(w == 0) arch = STARPU_CUDA_WORKER;
			
 
				-					if(w == 1) arch = STARPU_CPU_WORKER;
			
 
				-
			
 
				-					int nw_ctx2 = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctxs[s2], arch);
			
 
				-					int nw_needed = res_rounded[s2][w] - nw_ctx2;
			
 
				-
			
 
				-					if( nw_needed > 0 && tmp_nw_move[w] > 0)
			
 
				-					{
			
 
				-						nw_move += nw_needed >= tmp_nw_move[w] ? tmp_nw_move[w] : nw_needed;
			
 
				-						int i = 0;
			
 
				-						for(i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				-						{
			
 
				-							if(tmp_workers_move[w][i] != -1)
			
 
				-							{
			
 
				-								workers_move[j++] = tmp_workers_move[w][i];
			
 
				-								tmp_workers_move[w][i] = -1;
			
 
				-								if(j == nw_move)
			
 
				-									break;
			
 
				-							}
			
 
				-						}
			
 
				-						tmp_nw_move[w] -=  nw_move;
			
 
				-					}
			
 
				-
			
 
				-					
			
 
				-					double needed = res[s2][w] - (nw_ctx2 * 1.0);
			
 
				-					int x = floor(needed);
			
 
				-					double x_double = (double)x;
			
 
				-					double diff = needed - x_double;
			
 
				-					if(diff > 0.3 && tmp_nw_add[w] > 0)
			
 
				-					{
			
 
				-						nw_add = tmp_nw_add[w];
			
 
				-						int i = 0;
			
 
				-						for(i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				-						{
			
 
				-							if(tmp_workers_add[w][i] != -1)
			
 
				-							{
			
 
				-								workers_add[k++] = tmp_workers_add[w][i];
			
 
				-								tmp_workers_add[w][i] = -1;
			
 
				-								if(k == nw_add)
			
 
				-									break;
			
 
				-							}
			
 
				-						}
			
 
				-						tmp_nw_add[w] -=  nw_add;
			
 
				-					}
			
 
				-				}
			
 
				-
			
 
				+				_lp_find_workers_to_accept(nw, ns, sched_ctxs[s2], s2, 
			
 
				+							   tmp_nw_move, tmp_workers_move, 
			
 
				+							   tmp_nw_add, tmp_workers_add,
			
 
				+							   &nw_move, workers_move, 
			
 
				+							   &nw_add, workers_add,
			
 
				+							   res_rounded, res);
			
 
				 				
			
 
				 				if(nw_move > 0)
			
 
				 				{
			
@@ -439,27 +500,8 @@ void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw],
 
				 		int workers_move[STARPU_NMAXWORKERS];
			
 
				 		int nw_move = 0;
			
 
				 				
			
 
				-		int w;
			
 
				-		for(w = 0; w < nw; w++)
			
 
				-		{
			
 
				-			if(tmp_nw_move[w] > 0)
			
 
				-			{
			
 
				-				nw_move += tmp_nw_move[w];
			
 
				-				int i = 0, j = 0;
			
 
				-				for(i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				-				{
			
 
				-					if(tmp_workers_move[w][i] != -1)
			
 
				-					{
			
 
				-						workers_move[j++] = tmp_workers_move[w][i];
			
 
				-						tmp_workers_move[w][i] = -1;
			
 
				-						if(j == nw_move)
			
 
				-							break;
			
 
				-					}
			
 
				-				}
			
 
				-
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				+		_lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move, 
			
 
				+					   &nw_move, workers_move);
			
 
				 		if(nw_move > 0)
			
 
				 			sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
			
 
				 	}
			
@@ -471,8 +513,14 @@ void _lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_r
 
				 	int *current_sched_ctxs = sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : sched_ctxs;
			
 
				 
			
 
				 	int s, w;
			
 
				+	int start[nw];
			
 
				+	for(w = 0; w < nw; w++)
			
 
				+		start[w] = 0;
			
 
				 	for(s = 0; s < ns; s++)
			
 
				 	{
			
 
				+		int workers_add[STARPU_NMAXWORKERS];
			
 
				+                int nw_add = 0;
			
 
				+		
			
 
				 		for(w = 0; w < nw; w++)
			
 
				 		{
			
 
				 			enum starpu_archtype arch;
			
@@ -482,19 +530,13 @@ void _lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_r
 
				 			if(w == 1)
			
 
				 			{
			
 
				 				int nworkers_to_add = res_rounded[s][w];
			
 
				-				int *workers_to_add = _get_first_workers_in_list(workers, current_nworkers, &nworkers_to_add, arch);
			
 
				-
			
 
				-				if(nworkers_to_add > 0)
			
 
				-				{
			
 
				-					sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, nworkers_to_add, current_sched_ctxs[s]);
			
 
				-					sched_ctx_hypervisor_start_resize(current_sched_ctxs[s]);
			
 
				-					struct sched_ctx_hypervisor_policy_config *new_config = sched_ctx_hypervisor_get_config(current_sched_ctxs[s]);
			
 
				-					int i;
			
 
				-					for(i = 0; i < nworkers_to_add; i++)
			
 
				-						new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] :  new_config->new_workers_max_idle;
			
 
				-				}
			
 
				+				int *workers_to_add = _get_first_workers_in_list(&start[w], workers, current_nworkers, &nworkers_to_add, arch);
			
 
				+				int i;
			
 
				+				for(i = 0; i < nworkers_to_add; i++)
			
 
				+					workers_add[nw_add++] = workers_to_add[i];
			
 
				 				free(workers_to_add);
			
 
				 			}
			
 
				+			
			
 
				 			else
			
 
				 			{
			
 
				 				double nworkers_to_add = res[s][w];
			
@@ -503,31 +545,40 @@ void _lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_r
 
				 				double diff = nworkers_to_add - x_double;
			
 
				 				if(diff == 0.0)
			
 
				 				{
			
 
				-					int *workers_to_add = _get_first_workers_in_list(workers, current_nworkers, &x, arch);
			
 
				+					int *workers_to_add = _get_first_workers_in_list(&start[w], workers, current_nworkers, &x, arch);
			
 
				 					if(x > 0)
			
 
				 					{
			
 
				-						sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, x, current_sched_ctxs[s]);
			
 
				-						sched_ctx_hypervisor_start_resize(current_sched_ctxs[s]);
			
 
				+						int i;
			
 
				+						for(i = 0; i < x; i++)
			
 
				+							workers_add[nw_add++] = workers_to_add[i];
			
 
				 					}
			
 
				 					free(workers_to_add);
			
 
				 				}
			
 
				 				else
			
 
				 				{
			
 
				 					x+=1;
			
 
				-					int *workers_to_add = _get_first_workers_in_list(workers, current_nworkers, &x, arch);
			
 
				+					int *workers_to_add = _get_first_workers_in_list(&start[w], workers, current_nworkers, &x, arch);
			
 
				 					if(x > 0)
			
 
				 					{
			
 
				+						int i;
			
 
				 						if(diff >= 0.3)
			
 
				-							sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, x, current_sched_ctxs[s]);
			
 
				+							for(i = 0; i < x; i++)
			
 
				+								workers_add[nw_add++] = workers_to_add[i];
			
 
				 						else
			
 
				-							sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, x-1, current_sched_ctxs[s]);
			
 
				-						sched_ctx_hypervisor_start_resize(current_sched_ctxs[s]);
			
 
				+							for(i = 0; i < x-1; i++)
			
 
				+								workers_add[nw_add++] = workers_to_add[i];
			
 
				+
			
 
				 					}
			
 
				 					free(workers_to_add);
			
 
				 				}
			
 
				 			}
			
 
				-
			
 
				 		}
			
 
				-		sched_ctx_hypervisor_stop_resize(current_sched_ctxs[s]);
			
 
				+		if(nw_add > 0)
			
 
				+		{
			
 
				+			sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
			
 
				+			sched_ctx_hypervisor_start_resize(sched_ctxs[s]);
			
 
				+		}
			
 
				+
			
 
				+//		sched_ctx_hypervisor_stop_resize(current_sched_ctxs[s]);
			
 
				 	}
			
 
				 }
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
@@ -75,7 +75,7 @@ unsigned _find_poor_sched_ctx(unsigned req_sched_ctx, int nworkers_to_move)
 
				 	return sched_ctx;
			
 
				 }
			
 
				 
			
 
				-int* _get_first_workers_in_list(int *workers, int nall_workers,  unsigned *nworkers, enum starpu_archtype arch)
			
 
				+int* _get_first_workers_in_list(int *start, int *workers, int nall_workers,  unsigned *nworkers, enum starpu_archtype arch)
			
 
				 {
			
 
				 	int *curr_workers = (int*)malloc((*nworkers)*sizeof(int));
			
 
				 
			
@@ -87,7 +87,11 @@ int* _get_first_workers_in_list(int *workers, int nall_workers,  unsigned *nwork
 
				 		enum starpu_archtype curr_arch = starpu_worker_get_type(worker);
			
 
				 		if(arch == STARPU_ANY_WORKER || curr_arch == arch)
			
 
				 		{
			
 
				-			curr_workers[nfound_workers++] = worker;
			
 
				+			if(w >= *start)
			
 
				+			{
			
 
				+				curr_workers[nfound_workers++] = worker;
			
 
				+				*start = w+1;
			
 
				+			}
			
 
				 		}
			
 
				 		if(nfound_workers == *nworkers)
			
 
				 			break;
			
@@ -515,6 +519,34 @@ double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w,
 
				         return -1.0;
			
 
				 }
			
 
				 
			
 
				+/* compute an average value of the cpu/cuda old velocity */
			
 
				+double _get_ref_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch)
			
 
				+{
			
 
				+	double ref_velocity = 0.0;
			
 
				+	unsigned nw = 0;
			
 
				+
			
 
				+	struct starpu_sched_ctx_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
			
 
				+	int worker;
			
 
				+
			
 
				+	struct starpu_sched_ctx_iterator it;
			
 
				+	if(workers->init_iterator)
			
 
				+		workers->init_iterator(workers, &it);
			
 
				+
			
 
				+	while(workers->has_next(workers, &it))
			
 
				+	{
			
 
				+		worker = workers->get_next(workers, &it);
			
 
				+		if(sc_w->ref_velocity[worker] > 1.0)
			
 
				+		{
			
 
				+			ref_velocity += sc_w->ref_velocity[worker];
			
 
				+			nw++;
			
 
				+		}
			
 
				+	}
			
 
				+	
			
 
				+	if(nw > 0)
			
 
				+		return ref_velocity / nw;
			
 
				+	return -1.0;
			
 
				+}
			
 
				+
			
 
				 /* check if there is a big velocity gap between the contexts */
			
 
				 int _velocity_gap_btw_ctxs()
			
 
				 {
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h
@@ -38,7 +38,7 @@ unsigned _find_poor_sched_ctx(unsigned req_sched_ctx, int nworkers_to_move);
 
				 
			
 
				 int* _get_first_workers(unsigned sched_ctx, int *nworkers, enum starpu_archtype arch);
			
 
				 
			
 
				-int* _get_first_workers_in_list(int *workers, int nall_workers,  unsigned *nworkers, enum starpu_archtype arch);
			
 
				+int* _get_first_workers_in_list(int *start, int *workers, int nall_workers,  unsigned *nworkers, enum starpu_archtype arch);
			
 
				 
			
 
				 unsigned _get_potential_nworkers(struct sched_ctx_hypervisor_policy_config *config, unsigned sched_ctx, enum starpu_archtype arch);
			
 
				 
			
@@ -58,6 +58,8 @@ double _get_velocity_per_worker(struct sched_ctx_hypervisor_wrapper *sc_w, unsig
 
				 
			
 
				 double _get_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch);
			
 
				 
			
 
				+double _get_ref_velocity_per_worker_type(struct sched_ctx_hypervisor_wrapper* sc_w, enum starpu_archtype arch);
			
 
				+
			
 
				 int _velocity_gap_btw_ctxs(void);
			
 
				 
			
 
				 void _get_total_nw(int *workers, int nworkers, int ntypes_of_workers, int total_nw[ntypes_of_workers]);
			
--- a/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
+++ b/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
@@ -27,6 +27,7 @@ static void notify_poped_task(unsigned sched_ctx, int worker, double flops, size
 
				 static void notify_post_exec_hook(unsigned sched_ctx, int taskid);
			
 
				 static void notify_idle_end(unsigned sched_ctx, int  worker);
			
 
				 static void notify_submitted_job(struct starpu_task *task, unsigned footprint);
			
 
				+static void notify_delete_context(unsigned sched_ctx);
			
 
				 
			
 
				 extern struct sched_ctx_hypervisor_policy idle_policy;
			
 
				 extern struct sched_ctx_hypervisor_policy app_driven_policy;
			
@@ -137,6 +138,7 @@ struct starpu_sched_ctx_performance_counters* sched_ctx_hypervisor_init(struct s
 
				 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
 
				 	{
			
 
				 		hypervisor.resize[i] = 0;
			
 
				+		hypervisor.allow_remove[i] = 1;
			
 
				 		hypervisor.configurations[i] = NULL;
			
 
				 		hypervisor.sr = NULL;
			
 
				 		hypervisor.check_min_tasks[i] = 1;
			
@@ -179,6 +181,7 @@ struct starpu_sched_ctx_performance_counters* sched_ctx_hypervisor_init(struct s
 
				 	perf_counters->notify_post_exec_hook = notify_post_exec_hook;
			
 
				 	perf_counters->notify_idle_end = notify_idle_end;
			
 
				 	perf_counters->notify_submitted_job = notify_submitted_job;
			
 
				+	perf_counters->notify_delete_context = notify_delete_context;
			
 
				 
			
 
				 	starpu_sched_ctx_notify_hypervisor_exists();
			
 
				 
			
@@ -204,9 +207,33 @@ void sched_ctx_hypervisor_start_resize(unsigned sched_ctx)
 
				 	hypervisor.resize[sched_ctx] = 1;
			
 
				 }
			
 
				 
			
 
				+static void _print_current_time()
			
 
				+{
			
 
				+/* 	double curr_time = starpu_timing_now(); */
			
 
				+/* 	double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /\* in seconds *\/ */
			
 
				+/* 	fprintf(stdout, "Time: %lf\n", elapsed_time); */
			
 
				+/* 	int i; */
			
 
				+/* 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) */
			
 
				+/* 	{ */
			
 
				+/* 		if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS) */
			
 
				+/* 		{ */
			
 
				+/* 			struct sched_ctx_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]]; */
			
 
				+
			
 
				+/* 			double cpu_speed = sched_ctx_hypervisor_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER); */
			
 
				+/* 			double cuda_speed = sched_ctx_hypervisor_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER); */
			
 
				+/* 			int ncpus = sched_ctx_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CPU_WORKER); */
			
 
				+/* 			int ncuda = sched_ctx_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CUDA_WORKER); */
			
 
				+/* 			cpu_speed = cpu_speed == -1.0 ? 0.0 : cpu_speed; */
			
 
				+/* 			cuda_speed = cuda_speed == -1.0 ? 0.0 : cuda_speed; */
			
 
				+/* 			fprintf(stdout, "%d: cpu_v = %lf cuda_v = %lf ncpus = %d ncuda = %d\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed, ncpus, ncuda); */
			
 
				+/* 		} */
			
 
				+/* 	} */
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				 void sched_ctx_hypervisor_shutdown(void)
			
 
				 {
			
 
				-	printf("shutdown\n");
			
 
				+//	printf("shutdown\n");
			
 
				 	int i;
			
 
				 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
 
				 	{
			
@@ -222,6 +249,7 @@ void sched_ctx_hypervisor_shutdown(void)
 
				 	perf_counters->notify_poped_task = NULL;
			
 
				 	perf_counters->notify_post_exec_hook = NULL;
			
 
				 	perf_counters->notify_idle_end = NULL;
			
 
				+	perf_counters->notify_delete_context = NULL;
			
 
				 
			
 
				 	free(perf_counters);
			
 
				 	perf_counters = NULL;
			
@@ -356,26 +384,6 @@ double sched_ctx_hypervisor_get_velocity_per_worker_type(struct sched_ctx_hyperv
 
				         return -1.0;
			
 
				 }
			
 
				 
			
 
				-static void _print_current_time()
			
 
				-{
			
 
				-	double curr_time = starpu_timing_now();
			
 
				-	double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */
			
 
				-	printf("Time: %lf\n", elapsed_time);
			
 
				-	int i;
			
 
				-	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
 
				-	{
			
 
				-		if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
			
 
				-		{
			
 
				-			struct sched_ctx_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
			
 
				-
			
 
				-			double cpu_speed = sched_ctx_hypervisor_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
			
 
				-			double cuda_speed = sched_ctx_hypervisor_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
			
 
				-			printf("%d: cpu_v = %lf cuda_v = %lf\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed);
			
 
				-		}
			
 
				-	}
			
 
				-	return;
			
 
				-}
			
 
				-
			
 
				 static int get_ntasks( int *tasks)
			
 
				 {
			
 
				 	int ntasks = 0;
			
@@ -484,6 +492,7 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 
				 			printf(" %d", workers_to_move[j]);
			
 
				 		printf("\n");
			
 
				 
			
 
				+		hypervisor.allow_remove[receiver_sched_ctx] = 0;
			
 
				 		starpu_sched_ctx_add_workers(workers_to_move, nworkers_to_move, receiver_sched_ctx);
			
 
				 
			
 
				 		if(now)
			
@@ -495,7 +504,7 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 
				 			printf("\n");
			
 
				 
			
 
				 			starpu_sched_ctx_remove_workers(workers_to_move, nworkers_to_move, sender_sched_ctx);
			
 
				-			
			
 
				+			hypervisor.allow_remove[receiver_sched_ctx] = 1;
			
 
				 			_reset_resize_sample_info(sender_sched_ctx, receiver_sched_ctx);
			
 
				 		}
			
 
				 		else
			
@@ -559,7 +568,7 @@ unsigned sched_ctx_hypervisor_can_resize(unsigned sched_ctx)
 
				 
			
 
				 void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now)
			
 
				 {
			
 
				-	if(nworkers_to_remove > 0 && hypervisor.resize[sched_ctx])
			
 
				+	if(nworkers_to_remove > 0 && hypervisor.resize[sched_ctx] && hypervisor.allow_remove[sched_ctx])
			
 
				 	{
			
 
				 		_print_current_time();
			
 
				 		int nworkers=0;
			
@@ -700,6 +709,7 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 
				 				_reset_resize_sample_info(sender_sched_ctx, receiver_sched_ctx);
			
 
				 
			
 
				 				hypervisor.resize[sender_sched_ctx] = 1;
			
 
				+				hypervisor.allow_remove[receiver_sched_ctx] = 1;
			
 
				 				//	hypervisor.resize[receiver_sched_ctx] = 1;
			
 
				 				/* if the user allowed resizing leave the decisions to the application */
			
 
				 				if(imposed_resize)  imposed_resize = 0;
			
@@ -800,6 +810,8 @@ static void notify_poped_task(unsigned sched_ctx, int worker, double elapsed_flo
 
				 			hypervisor.policy.handle_poped_task(sched_ctx, worker);
			
 
				 	}
			
 
				 	_ack_resize_completed(sched_ctx, worker);
			
 
				+	if(hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker] % 100 == 0)
			
 
				+		_print_current_time();
			
 
				 }
			
 
				 
			
 
				 /* notifies the hypervisor that a tagged task has just been executed */
			
@@ -866,6 +878,12 @@ static void notify_submitted_job(struct starpu_task *task, uint32_t footprint)
 
				 		hypervisor.policy.handle_submitted_job(task, footprint);
			
 
				 }
			
 
				 
			
 
				+static void notify_delete_context(unsigned sched_ctx)
			
 
				+{
			
 
				+	_print_current_time();
			
 
				+	sched_ctx_hypervisor_unregister_ctx(sched_ctx);
			
 
				+}
			
 
				+
			
 
				 void sched_ctx_hypervisor_size_ctxs(int *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
			
 
				 {
			
 
				 	pthread_mutex_lock(&act_hypervisor_mutex);
			
--- a/sched_ctx_hypervisor/src/sched_ctx_hypervisor_intern.h
+++ b/sched_ctx_hypervisor/src/sched_ctx_hypervisor_intern.h
@@ -58,6 +58,7 @@ struct sched_ctx_hypervisor
 
				 	int sched_ctxs[STARPU_NMAX_SCHED_CTXS];
			
 
				 	unsigned nsched_ctxs;
			
 
				 	unsigned resize[STARPU_NMAX_SCHED_CTXS];
			
 
				+	unsigned allow_remove[STARPU_NMAX_SCHED_CTXS];
			
 
				 	int min_tasks;
			
 
				 	struct sched_ctx_hypervisor_policy policy;
			
 
				 
			
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -470,6 +470,12 @@ static void _starpu_delete_sched_ctx(struct _starpu_sched_ctx *sched_ctx)
 
				 void starpu_sched_ctx_delete(unsigned sched_ctx_id)
			
 
				 {
			
 
				 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
			
 
				+#ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				+	if(sched_ctx != NULL && sched_ctx_id != 0 && sched_ctx_id != STARPU_NMAX_SCHED_CTXS
			
 
				+	   && sched_ctx->perf_counters != NULL)
			
 
				+		sched_ctx->perf_counters->notify_delete_context(sched_ctx_id);
			
 
				+#endif //STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				+
			
 
				 	unsigned inheritor_sched_ctx_id = sched_ctx->inheritor;
			
 
				 	struct _starpu_sched_ctx *inheritor_sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx->inheritor);