13 years ago · c1c43407db
--- a/include/starpu_scheduler.h
+++ b/include/starpu_scheduler.h
@@ -154,7 +154,7 @@ struct starpu_performance_counters {
 
				 	void (*notify_idle_end)(unsigned sched_ctx, int worker);
			
 
				 	void (*notify_pushed_task)(unsigned sched_ctx, int worker);
			
 
				 	void (*notify_poped_task)(unsigned sched_ctx, int worker, double flops);
			
 
				-	void (*notify_post_exec_hook)(unsigned sched_ctx, int taskid);
			
 
				+	void (*notify_post_exec_hook)(unsigned sched_ctx, int taskid, int workerid);
			
 
				 	void (*notify_submitted_job)(struct starpu_task *task, uint32_t footprint);
			
 
				 };
			
 
				 
			
@@ -206,6 +206,8 @@ unsigned starpu_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id
 
				 
			
 
				 unsigned starpu_worker_belongs_to_sched_ctx(int workerid, unsigned sched_ctx_id);
			
 
				 
			
 
				+void starpu_stop_task_submission(unsigned sched_ctx);
			
 
				+
			
 
				 /* Check if the worker specified by workerid can execute the codelet. */
			
 
				 int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl);
			
 
				 
			
--- a/sched_ctx_hypervisor/include/sched_ctx_hypervisor.h
+++ b/sched_ctx_hypervisor/include/sched_ctx_hypervisor.h
@@ -114,7 +114,7 @@ void sched_ctx_hypervisor_unregister_ctx(unsigned sched_ctx);
 
				 
			
 
				 void sched_ctx_hypervisor_resize(unsigned sched_ctx, int task_tag);
			
 
				 
			
 
				-void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receier_sched_ctx, int *workers_to_move, unsigned nworkers_to_move);
			
 
				+void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receier_sched_ctx, int *workers_to_move, unsigned nworkers_to_move, unsigned now);
			
 
				 
			
 
				 void sched_ctx_hypervisor_stop_resize(unsigned sched_ctx);
			
 
				 
			
@@ -142,7 +142,7 @@ char* sched_ctx_hypervisor_get_policy();
 
				 
			
 
				 void sched_ctx_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx);
			
 
				 
			
 
				-void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx);
			
 
				+void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now);
			
 
				 
			
 
				 void sched_ctx_hypervisor_size_ctxs(int *sched_ctxs, int nsched_ctxs, int *workers, int nworkers);
			
 
				 
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/app_driven_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/app_driven_policy.c
@@ -21,7 +21,7 @@ void app_driven_handle_post_exec_hook(unsigned sched_ctx, struct starpu_htbl32_n
 
				 	void* sched_ctx_pt =  _starpu_htbl_search_32(resize_requests, (uint32_t)task_tag);
			
 
				 	if(sched_ctx_pt && sched_ctx_pt != resize_requests)
			
 
				 	{
			
 
				-		_resize_to_unknown_receiver(sched_ctx);
			
 
				+		_resize_to_unknown_receiver(sched_ctx, 1);
			
 
				 		_starpu_htbl_insert_32(&resize_requests, (uint32_t)task_tag, NULL);
			
 
				 	}
			
 
				 
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/gflops_rate_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/gflops_rate_policy.c
@@ -147,7 +147,7 @@ static unsigned _gflops_rate_resize(unsigned sender_sched_ctx, unsigned receiver
 
				                 int *workers_to_move =  _get_workers_to_move(sender_sched_ctx, receiver_sched_ctx, &nworkers_to_move);
			
 
				 		if(nworkers_to_move > 0)
			
 
				                 {
			
 
				-                        sched_ctx_hypervisor_move_workers(sender_sched_ctx, receiver_sched_ctx, workers_to_move, nworkers_to_move);
			
 
				+                        sched_ctx_hypervisor_move_workers(sender_sched_ctx, receiver_sched_ctx, workers_to_move, nworkers_to_move, 0);
			
 
				 
			
 
				                         struct policy_config *new_config = sched_ctx_hypervisor_get_config(receiver_sched_ctx);
			
 
				                         int i;
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/idle_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/idle_policy.c
@@ -21,7 +21,7 @@ void idle_handle_idle_cycle(unsigned sched_ctx, int worker)
 
				 	struct sched_ctx_wrapper* sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctx);
			
 
				 	struct policy_config *config = sc_w->config;
			
 
				 	if(config != NULL &&  sc_w->current_idle_time[worker] > config->max_idle[worker])
			
 
				-		_resize_to_unknown_receiver(sched_ctx);
			
 
				+		_resize_to_unknown_receiver(sched_ctx, 0);
			
 
				 }
			
 
				 
			
 
				 struct hypervisor_policy idle_policy = {
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c
@@ -408,7 +408,7 @@ void _redistribute_resources_in_ctxs2(int ns, int nw, int nt, double tasks[nw][n
 
				 		unsigned nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], STARPU_ALL);
			
 
				 	
			
 
				 		if(nworkers_ctx > nremove)
			
 
				-			sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s]);
			
 
				+			sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s], 0);
			
 
				 	
			
 
				 		if(nworkers_ctx != STARPU_NMAXWORKERS)
			
 
				 		{
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp3_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp3_policy.c
@@ -392,7 +392,7 @@ static void _redistribute_resources_in_ctxs(int ns, int nw, int nt, double w_in_
 
				 				}
			
 
				 			}
			
 
				 			if(all_have_dest)
			
 
				-				sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s]);
			
 
				+				sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s], 0);
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c
@@ -107,7 +107,7 @@ static void _redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][
 
				 					if(receiving_s != -1)
			
 
				 					{
			
 
				 						int *workers_to_move = _get_first_workers(sched_ctxs[s], &nworkers_to_move, arch);
			
 
				-						sched_ctx_hypervisor_move_workers(sched_ctxs[s], receiving_s, workers_to_move, nworkers_to_move);
			
 
				+						sched_ctx_hypervisor_move_workers(sched_ctxs[s], receiving_s, workers_to_move, nworkers_to_move, 0);
			
 
				 						struct policy_config *new_config = sched_ctx_hypervisor_get_config(receiving_s);
			
 
				 						int i;
			
 
				 						for(i = 0; i < nworkers_to_move; i++)
			
@@ -147,7 +147,7 @@ static void _redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][
 
				 							int *workers_to_move = _get_first_workers(sched_ctxs[s], &x, arch);
			
 
				 							if(x > 0)
			
 
				 							{
			
 
				-								sched_ctx_hypervisor_move_workers(sched_ctxs[s], receiving_s, workers_to_move, x);
			
 
				+								sched_ctx_hypervisor_move_workers(sched_ctxs[s], receiving_s, workers_to_move, x, 0);
			
 
				 
			
 
				 								struct policy_config *new_config = sched_ctx_hypervisor_get_config(receiving_s);
			
 
				 								int i;
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
@@ -233,7 +233,7 @@ unsigned _get_nworkers_to_move(unsigned req_sched_ctx)
 
				 	return nworkers_to_move;
			
 
				 }
			
 
				 
			
 
				-unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize)
			
 
				+unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize, unsigned now)
			
 
				 {
			
 
				 	int ret = 1;
			
 
				 	if(force_resize)
			
@@ -264,7 +264,7 @@ unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigne
 
				 			if(poor_sched_ctx != STARPU_NMAX_SCHED_CTXS)
			
 
				 			{						
			
 
				 				int *workers_to_move = _get_first_workers(sender_sched_ctx, &nworkers_to_move, STARPU_ALL);
			
 
				-				sched_ctx_hypervisor_move_workers(sender_sched_ctx, poor_sched_ctx, workers_to_move, nworkers_to_move);
			
 
				+				sched_ctx_hypervisor_move_workers(sender_sched_ctx, poor_sched_ctx, workers_to_move, nworkers_to_move, now);
			
 
				 				
			
 
				 				struct policy_config *new_config = sched_ctx_hypervisor_get_config(poor_sched_ctx);
			
 
				 				int i;
			
@@ -282,9 +282,9 @@ unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigne
 
				 }
			
 
				 
			
 
				 
			
 
				-unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx)
			
 
				+unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now)
			
 
				 {
			
 
				-	return _resize(sender_sched_ctx, STARPU_NMAX_SCHED_CTXS, 0);
			
 
				+	return _resize(sender_sched_ctx, STARPU_NMAX_SCHED_CTXS, 0, now);
			
 
				 }
			
 
				 
			
 
				 static double _get_elapsed_flops(struct sched_ctx_wrapper* sc_w, int *npus, enum starpu_archtype req_arch)
			
@@ -319,9 +319,8 @@ double _get_ctx_velocity(struct sched_ctx_wrapper* sc_w)
 
				 	double total_elapsed_flops = sched_ctx_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w);
			
 
				 	double prc = elapsed_flops/sc_w->total_flops;
			
 
				 	unsigned nworkers = starpu_get_nworkers_of_sched_ctx(sc_w->sched_ctx);
			
 
				-
			
 
				-	double redim_sample = elapsed_flops == total_elapsed_flops ? HYPERVISOR_START_REDIM_SAMPLE*nworkers : HYPERVISOR_REDIM_SAMPLE*nworkers;
			
 
				-        if(prc >= redim_sample)
			
 
				+	double redim_sample = elapsed_flops == total_elapsed_flops ? HYPERVISOR_START_REDIM_SAMPLE*nworkers : HYPERVISOR_REDIM_SAMPLE*nworkers;  
			
 
				+	if(prc >= redim_sample)
			
 
				         {
			
 
				                 double curr_time = starpu_timing_now();
			
 
				                 double elapsed_time = curr_time - sc_w->start_time;
			
@@ -371,9 +370,11 @@ int _velocity_gap_btw_ctxs()
 
				 					if(other_ctx_v != 0.0)
			
 
				 					{
			
 
				 						double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v ;
			
 
				-						if(gap > 2)
			
 
				+						if(gap > 1.5)
			
 
				 							return 1;
			
 
				-					}
			
 
				+					} 
			
 
				+					else
			
 
				+						return 1;						
			
 
				 				}
			
 
				 			}
			
 
				 		}
			
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h
@@ -30,7 +30,7 @@ unsigned _get_nworkers_to_move(unsigned req_sched_ctx);
 
				 
			
 
				 unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize);
			
 
				 
			
 
				-unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx);
			
 
				+unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now);
			
 
				 
			
 
				 double _get_ctx_velocity(struct sched_ctx_wrapper* sc_w);
			
 
				 
			
--- a/sched_ctx_hypervisor/src/sched_ctx_config.c
+++ b/sched_ctx_hypervisor/src/sched_ctx_config.c
@@ -114,7 +114,7 @@ static struct policy_config* _ioctl(unsigned sched_ctx, va_list varg_list, unsig
 
				 	int *workerids;
			
 
				 	int nworkers;
			
 
				 
			
 
				-	while ((arg_type = va_arg(varg_list, int)) != 0) 
			
 
				+	while ((arg_type = va_arg(varg_list, int)) != -1) 
			
 
				 	{
			
 
				 		switch(arg_type)
			
 
				 		{
			
@@ -207,7 +207,7 @@ void sched_ctx_hypervisor_ioctl(unsigned sched_ctx, ...)
 
				 	int stop = 0;
			
 
				 	int task_tag = -1;
			
 
				 
			
 
				-	while ((arg_type = va_arg(varg_list, int)) != 0) 
			
 
				+	while ((arg_type = va_arg(varg_list, int)) != -1) 
			
 
				 	{
			
 
				 		switch(arg_type)
			
 
				 		{
			
@@ -230,7 +230,6 @@ void sched_ctx_hypervisor_ioctl(unsigned sched_ctx, ...)
 
				 
			
 
				 	/* if config not null => save hypervisor configuration and consider it later */
			
 
				 	struct policy_config *config = _ioctl(sched_ctx, varg_list, (task_tag > 0));
			
 
				-
			
 
				 	if(config != NULL)
			
 
				 	{
			
 
				 		pthread_mutex_lock(&hypervisor.conf_mut[sched_ctx]);
			
--- a/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
+++ b/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
@@ -22,7 +22,7 @@ struct starpu_performance_counters* perf_counters = NULL;
 
				 static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time);
			
 
				 static void notify_pushed_task(unsigned sched_ctx, int worker);
			
 
				 static void notify_poped_task(unsigned sched_ctx, int worker, double flops);
			
 
				-static void notify_post_exec_hook(unsigned sched_ctx, int taskid);
			
 
				+static void notify_post_exec_hook(unsigned sched_ctx, int taskid, int workerid);
			
 
				 static void notify_idle_end(unsigned sched_ctx, int  worker);
			
 
				 static void notify_submitted_job(struct starpu_task *task, unsigned footprint);
			
 
				 
			
@@ -296,6 +296,9 @@ void sched_ctx_hypervisor_unregister_ctx(unsigned sched_ctx)
 
				 	free(hypervisor.resize_requests[sched_ctx]);
			
 
				 	pthread_mutex_destroy(&hypervisor.conf_mut[sched_ctx]);
			
 
				 	pthread_mutex_destroy(&hypervisor.resize_mut[sched_ctx]);
			
 
				+	if(hypervisor.nsched_ctxs == 1)
			
 
				+		sched_ctx_hypervisor_stop_resize(hypervisor.sched_ctxs[0]);
			
 
				+
			
 
				 	pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 }
			
 
				 
			
@@ -346,7 +349,7 @@ int get_nworkers_ctx(unsigned sched_ctx, enum starpu_archtype arch)
 
				 
			
 
				 /* actually move the workers: the cpus are moved, gpus are only shared  */
			
 
				 /* forbids another resize request before this one is take into account */
			
 
				-void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int* workers_to_move, unsigned nworkers_to_move)
			
 
				+void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int* workers_to_move, unsigned nworkers_to_move, unsigned now)
			
 
				 {
			
 
				 	if(nworkers_to_move > 0 && hypervisor.resize[sender_sched_ctx])// && hypervisor.resize[receiver_sched_ctx])
			
 
				 	{
			
@@ -365,25 +368,40 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 
				 //			starpu_remove_workers_from_sched_ctx(cpus, ncpus, sender_sched_ctx);
			
 
				 
			
 
				 		starpu_add_workers_to_sched_ctx(workers_to_move, nworkers_to_move, receiver_sched_ctx);
			
 
				-		pthread_mutex_lock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
			
 
				-		hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.receiver_sched_ctx = receiver_sched_ctx;
			
 
				-		hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_move * sizeof(int));
			
 
				-		hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.nmoved_workers = nworkers_to_move;
			
 
				-		hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_move * sizeof(int));
			
 
				-
			
 
				 
			
 
				-		int i;
			
 
				-		for(i = 0; i < nworkers_to_move; i++)
			
 
				+		if(now)
			
 
				 		{
			
 
				-			hypervisor.sched_ctx_w[sender_sched_ctx].current_idle_time[workers_to_move[i]] = 0.0;
			
 
				-			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers[i] = workers_to_move[i];	
			
 
				-			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers[i] = 0;	
			
 
				+				int j;
			
 
				+				printf("remove from ctx %d:", sender_sched_ctx);
			
 
				+				for(j = 0; j < nworkers_to_move; j++)
			
 
				+					printf(" %d", workers_to_move[j]);
			
 
				+				printf("\n");
			
 
				+				
			
 
				+				starpu_remove_workers_from_sched_ctx(workers_to_move, nworkers_to_move, sender_sched_ctx);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			pthread_mutex_lock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
			
 
				+			
			
 
				+			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.receiver_sched_ctx = receiver_sched_ctx;
			
 
				+			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_move * sizeof(int));
			
 
				+			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.nmoved_workers = nworkers_to_move;
			
 
				+			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_move * sizeof(int));
			
 
				+			
			
 
				+			
			
 
				+			int i;
			
 
				+			for(i = 0; i < nworkers_to_move; i++)
			
 
				+			{
			
 
				+				hypervisor.sched_ctx_w[sender_sched_ctx].current_idle_time[workers_to_move[i]] = 0.0;
			
 
				+				hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers[i] = workers_to_move[i];	
			
 
				+				hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers[i] = 0;	
			
 
				+			}
			
 
				+			
			
 
				+			hypervisor.resize[sender_sched_ctx] = 0;
			
 
				+			hypervisor.resize[receiver_sched_ctx] = 0;
			
 
				+			
			
 
				+			pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
			
 
				 		}
			
 
				-
			
 
				-		hypervisor.resize[sender_sched_ctx] = 0;
			
 
				-		hypervisor.resize[receiver_sched_ctx] = 0;
			
 
				-
			
 
				-		pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
			
 
				 	}
			
 
				 	return;
			
 
				 }
			
@@ -412,37 +430,50 @@ unsigned sched_ctx_hypervisor_can_resize(unsigned sched_ctx)
 
				 	return hypervisor.resize[sched_ctx];
			
 
				 }
			
 
				 
			
 
				-void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx)
			
 
				+void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now)
			
 
				 {
			
 
				 	if(nworkers_to_remove > 0 && hypervisor.resize[sched_ctx])
			
 
				 	{
			
 
				 		int nworkers=0;
			
 
				 		int workers[nworkers_to_remove];
			
 
				 	
			
 
				-		int ret = pthread_mutex_trylock(&hypervisor.sched_ctx_w[sched_ctx].mutex);	
			
 
				-		if(ret != EBUSY)
			
 
				+		if(now)
			
 
				 		{
			
 
				-			
			
 
				-			int i;
			
 
				-			for(i = 0; i < nworkers_to_remove; i++)
			
 
				-				if(starpu_worker_belongs_to_sched_ctx(workers_to_remove[i], sched_ctx))
			
 
				-					workers[nworkers++] = workers_to_remove[i];
			
 
				-			
			
 
				-			hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = -1;
			
 
				-			hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
			
 
				-			hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = nworkers;
			
 
				-			hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
			
 
				-			
			
 
				-			
			
 
				-			for(i = 0; i < nworkers; i++)
			
 
				+				int j;
			
 
				+				printf("remove from ctx %d:", sched_ctx);
			
 
				+				for(j = 0; j < nworkers_to_remove; j++)
			
 
				+					printf(" %d", workers_to_remove[j]);
			
 
				+				printf("\n");
			
 
				+				
			
 
				+				starpu_remove_workers_from_sched_ctx(workers_to_remove, nworkers_to_remove, sched_ctx);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			int ret = pthread_mutex_trylock(&hypervisor.sched_ctx_w[sched_ctx].mutex);	
			
 
				+			if(ret != EBUSY)
			
 
				 			{
			
 
				-				hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers[i]] = 0.0;
			
 
				-				hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers[i];	
			
 
				-				hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;	
			
 
				-			}
			
 
				+				
			
 
				+				int i;
			
 
				+				for(i = 0; i < nworkers_to_remove; i++)
			
 
				+					if(starpu_worker_belongs_to_sched_ctx(workers_to_remove[i], sched_ctx))
			
 
				+						workers[nworkers++] = workers_to_remove[i];
			
 
				+				
			
 
				+				hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = -1;
			
 
				+				hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
			
 
				+				hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = nworkers;
			
 
				+				hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
			
 
				+				
			
 
				+				
			
 
				+				for(i = 0; i < nworkers; i++)
			
 
				+				{
			
 
				+					hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers[i]] = 0.0;
			
 
				+					hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers[i];	
			
 
				+					hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;	
			
 
				+				}
			
 
				 
			
 
				-			hypervisor.resize[sched_ctx] = 0;
			
 
				-			pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
			
 
				+				hypervisor.resize[sched_ctx] = 0;
			
 
				+				pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
			
 
				+			}
			
 
				 		}
			
 
				  	}
			
 
				 	return;
			
@@ -481,48 +512,48 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 
				 	struct resize_ack *resize_ack = NULL;
			
 
				 	unsigned sender_sched_ctx = STARPU_NMAX_SCHED_CTXS;
			
 
				 
			
 
				-	if(hypervisor.nsched_ctxs > 0)
			
 
				+	int i;
			
 
				+	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
 
				 	{
			
 
				-		int i;
			
 
				-		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
 
				+		if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
			
 
				 		{
			
 
				-			if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
			
 
				+			struct sched_ctx_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
			
 
				+			pthread_mutex_lock(&sc_w->mutex);
			
 
				+			unsigned only_remove = 0;
			
 
				+			if(sc_w->resize_ack.receiver_sched_ctx == -1 && hypervisor.sched_ctxs[i] != sched_ctx && 
			
 
				+			   sc_w->resize_ack.nmoved_workers > 0 && starpu_worker_belongs_to_sched_ctx(worker, hypervisor.sched_ctxs[i]))
			
 
				 			{
			
 
				-				struct sched_ctx_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
			
 
				-				unsigned only_remove = 0;
			
 
				-				if(sc_w->resize_ack.receiver_sched_ctx == -1 && hypervisor.sched_ctxs[i] != sched_ctx && 
			
 
				-				   sc_w->resize_ack.nmoved_workers > 0 && starpu_worker_belongs_to_sched_ctx(worker, hypervisor.sched_ctxs[i]))
			
 
				-				{
			
 
				-					int j;
			
 
				-					for(j = 0; j < sc_w->resize_ack.nmoved_workers; j++)
			
 
				-						if(sc_w->resize_ack.moved_workers[j] == worker)
			
 
				-						{
			
 
				-							only_remove = 1;
			
 
				-							break;
			
 
				-						}
			
 
				-				}
			
 
				-				if(only_remove || 
			
 
				-				   (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == sched_ctx))
			
 
				-				{
			
 
				-					resize_ack = &sc_w->resize_ack;
			
 
				-					sender_sched_ctx = hypervisor.sched_ctxs[i];
			
 
				-					break;
			
 
				-				}
			
 
				+				int j;
			
 
				+				for(j = 0; j < sc_w->resize_ack.nmoved_workers; j++)
			
 
				+					if(sc_w->resize_ack.moved_workers[j] == worker)
			
 
				+					{
			
 
				+						only_remove = 1;
			
 
				+						break;
			
 
				+					}
			
 
				+			}
			
 
				+			if(only_remove || 
			
 
				+			   (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == sched_ctx))
			
 
				+			{
			
 
				+				resize_ack = &sc_w->resize_ack;
			
 
				+				sender_sched_ctx = hypervisor.sched_ctxs[i];
			
 
				+				pthread_mutex_unlock(&sc_w->mutex);
			
 
				+				break;
			
 
				 			}
			
 
				+			pthread_mutex_unlock(&sc_w->mutex);
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	/* if there is no ctx waiting for its ack return 1*/
			
 
				 	if(resize_ack == NULL)
			
 
				 		return 1;
			
 
				-	int *moved_workers = resize_ack->moved_workers;
			
 
				-	int nmoved_workers = resize_ack->nmoved_workers;
			
 
				-	int *acked_workers = resize_ack->acked_workers;
			
 
				-	int i;
			
 
				 	
			
 
				 	int ret = pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
			
 
				 	if(ret != EBUSY)
			
 
				 	{
			
 
				+		int *moved_workers = resize_ack->moved_workers;
			
 
				+		int nmoved_workers = resize_ack->nmoved_workers;
			
 
				+		int *acked_workers = resize_ack->acked_workers;
			
 
				+
			
 
				 		if(worker != -1)
			
 
				 		{
			
 
				 			for(i = 0; i < nmoved_workers; i++)
			
@@ -579,6 +610,7 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 
				 				resize_ack->nmoved_workers = 0;
			
 
				 				free(resize_ack->moved_workers);
			
 
				 				free(resize_ack->acked_workers);
			
 
				+
			
 
				 			}
			
 
				 			pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
			
 
				 			return resize_completed;
			
@@ -598,34 +630,24 @@ void sched_ctx_hypervisor_resize(unsigned sched_ctx, int task_tag)
 
				 /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */
			
 
				 static void notify_idle_end(unsigned sched_ctx, int worker)
			
 
				 {
			
 
				-	if(hypervisor.nsched_ctxs > 1)
			
 
				-	{
			
 
				-
			
 
				-		if(hypervisor.resize[sched_ctx])
			
 
				-			hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] = 0.0;
			
 
				-		
			
 
				-		if(hypervisor.policy.handle_idle_end)
			
 
				-			hypervisor.policy.handle_idle_end(sched_ctx, worker);
			
 
				+	if(hypervisor.resize[sched_ctx])
			
 
				+		hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] = 0.0;
			
 
				+	
			
 
				+	if(hypervisor.policy.handle_idle_end)
			
 
				+		hypervisor.policy.handle_idle_end(sched_ctx, worker);
			
 
				 		
			
 
				-//			_ack_resize_completed(sched_ctx, worker);
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				 /* notifies the hypervisor that the worker spent another cycle in idle time */
			
 
				 static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
			
 
				 {
			
 
				-	if(hypervisor.nsched_ctxs > 1)
			
 
				+	if(hypervisor.resize[sched_ctx])
			
 
				 	{
			
 
				-		if(hypervisor.resize[sched_ctx])
			
 
				-		{
			
 
				-			struct sched_ctx_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
			
 
				-			sc_w->current_idle_time[worker] += idle_time;
			
 
				-			if(hypervisor.policy.handle_idle_cycle)
			
 
				+		struct sched_ctx_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
			
 
				+		sc_w->current_idle_time[worker] += idle_time;
			
 
				+		if(hypervisor.policy.handle_idle_cycle)
			
 
				 				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
			
 
				-		}		
			
 
				-/* 		else  */
			
 
				-/* 			_ack_resize_completed(sched_ctx, worker); */
			
 
				-	}
			
 
				+	}		
			
 
				 	return;
			
 
				 }
			
 
				 
			
@@ -657,55 +679,54 @@ static void notify_poped_task(unsigned sched_ctx, int worker, double elapsed_flo
 
				 	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += elapsed_flops;
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= elapsed_flops; //sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(&hypervisor.sched_ctx_w[sched_ctx]);
			
 
				 
			
 
				-	if(hypervisor.nsched_ctxs > 1)
			
 
				+	if(hypervisor.resize[sched_ctx])
			
 
				 	{
			
 
				-		if(hypervisor.resize[sched_ctx])
			
 
				-		{
			
 
				-			if(hypervisor.policy.handle_poped_task)
			
 
				-				hypervisor.policy.handle_poped_task(sched_ctx, worker);
			
 
				-		}
			
 
				-		_ack_resize_completed(sched_ctx, worker);
			
 
				-			
			
 
				+		if(hypervisor.policy.handle_poped_task)
			
 
				+			hypervisor.policy.handle_poped_task(sched_ctx, worker);
			
 
				 	}
			
 
				+	_ack_resize_completed(sched_ctx, worker);
			
 
				 }
			
 
				 
			
 
				 /* notifies the hypervisor that a tagged task has just been executed */
			
 
				-static void notify_post_exec_hook(unsigned sched_ctx, int task_tag)
			
 
				+static void notify_post_exec_hook(unsigned sched_ctx, int task_tag, int worker)
			
 
				 {
			
 
				 	STARPU_ASSERT(task_tag > 0);
			
 
				 
			
 
				-	if(hypervisor.nsched_ctxs > 1)
			
 
				+	unsigned conf_sched_ctx;
			
 
				+	int i;
			
 
				+	pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				+	unsigned ns = hypervisor.nsched_ctxs;
			
 
				+	pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				+	
			
 
				+	for(i = 0; i < ns; i++)
			
 
				 	{
			
 
				-		unsigned conf_sched_ctx;
			
 
				-		int i;
			
 
				-		for(i = 0; i < hypervisor.nsched_ctxs; i++)
			
 
				-		{
			
 
				-			conf_sched_ctx = hypervisor.sched_ctxs[i];
			
 
				-			pthread_mutex_lock(&hypervisor.conf_mut[sched_ctx]);
			
 
				-			void *config = _starpu_htbl_search_32(hypervisor.configurations[conf_sched_ctx], (uint32_t)task_tag);
			
 
				-			if(config && config != hypervisor.configurations[conf_sched_ctx])
			
 
				-			{
			
 
				-				sched_ctx_hypervisor_set_config(conf_sched_ctx, config);
			
 
				-				free(config);
			
 
				-				_starpu_htbl_insert_32(&hypervisor.configurations[sched_ctx], (uint32_t)task_tag, NULL);
			
 
				-			}
			
 
				-			pthread_mutex_unlock(&hypervisor.conf_mut[sched_ctx]);
			
 
				-		}	
			
 
				+		conf_sched_ctx = hypervisor.sched_ctxs[i];
			
 
				+		pthread_mutex_lock(&hypervisor.conf_mut[conf_sched_ctx]);
			
 
				+		void *config = _starpu_htbl_search_32(hypervisor.configurations[conf_sched_ctx], (uint32_t)task_tag);
			
 
				 
			
 
				-		/* for the app driven we have to wait for the resize to be available
			
 
				-		   because the event is required to be executed at this specific moment */
			
 
				-		while(!_ack_resize_completed(sched_ctx, -1));
			
 
				-
			
 
				-		if(hypervisor.resize[sched_ctx])
			
 
				+		if(config)// && config != hypervisor.configurations[conf_sched_ctx])
			
 
				 		{
			
 
				-			pthread_mutex_lock(&hypervisor.resize_mut[sched_ctx]);
			
 
				-			struct starpu_htbl32_node* resize_requests = hypervisor.resize_requests[sched_ctx];
			
 
				-
			
 
				-			if(hypervisor.policy.handle_post_exec_hook)
			
 
				-				hypervisor.policy.handle_post_exec_hook(sched_ctx, resize_requests, task_tag);
			
 
				-			pthread_mutex_unlock(&hypervisor.resize_mut[sched_ctx]);
			
 
				+			sched_ctx_hypervisor_set_config(conf_sched_ctx, config);
			
 
				+			free(config);
			
 
				+			_starpu_htbl_insert_32(&hypervisor.configurations[sched_ctx], (uint32_t)task_tag, NULL);
			
 
				 		}
			
 
				+		pthread_mutex_unlock(&hypervisor.conf_mut[conf_sched_ctx]);
			
 
				+	}	
			
 
				+	
			
 
				+	/* for the app driven we have to wait for the resize to be available
			
 
				+	   because the event is required to be executed at this specific moment */
			
 
				+//	while(!_ack_resize_completed(sched_ctx, worker));
			
 
				+	
			
 
				+	if(hypervisor.resize[sched_ctx])
			
 
				+	{
			
 
				+		pthread_mutex_lock(&hypervisor.resize_mut[sched_ctx]);
			
 
				+		struct starpu_htbl32_node* resize_requests = hypervisor.resize_requests[sched_ctx];
			
 
				+		
			
 
				+		if(hypervisor.policy.handle_post_exec_hook)
			
 
				+			hypervisor.policy.handle_post_exec_hook(sched_ctx, resize_requests, task_tag);
			
 
				+		pthread_mutex_unlock(&hypervisor.resize_mut[sched_ctx]);
			
 
				 	}
			
 
				+	return;
			
 
				 }
			
 
				 
			
 
				 static void notify_submitted_job(struct starpu_task *task, uint32_t footprint)
			
@@ -720,8 +741,10 @@ static void notify_submitted_job(struct starpu_task *task, uint32_t footprint)
 
				 
			
 
				 void sched_ctx_hypervisor_size_ctxs(int *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
			
 
				 {
			
 
				+	pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				 	int curr_nsched_ctxs = sched_ctxs == NULL ? hypervisor.nsched_ctxs : nsched_ctxs;
			
 
				 	int *curr_sched_ctxs = sched_ctxs == NULL ? hypervisor.sched_ctxs : sched_ctxs;
			
 
				+	pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 	int s;
			
 
				 	for(s = 0; s < curr_nsched_ctxs; s++)
			
 
				 		hypervisor.resize[curr_sched_ctxs[s]] = 1;
			
@@ -742,7 +765,9 @@ int* sched_ctx_hypervisor_get_sched_ctxs()
 
				 
			
 
				 int sched_ctx_hypervisor_get_nsched_ctxs()
			
 
				 {
			
 
				-	return hypervisor.nsched_ctxs;
			
 
				+	int ns;
			
 
				+	ns = hypervisor.nsched_ctxs;
			
 
				+	return ns;
			
 
				 }
			
 
				 
			
 
				 void sched_ctx_hypervisor_save_size_req(int *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -210,7 +210,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j, int workerid)
 
				 	/* control task should not execute post_exec_hook */
			
 
				 	if(j->task_size == 1 && task->cl != NULL && !task->control_task)
			
 
				 	{
			
 
				-	  _starpu_sched_post_exec_hook(task);
			
 
				+		_starpu_sched_post_exec_hook(task, workerid);
			
 
				 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				 	  starpu_call_poped_task_cb(workerid, task->sched_ctx, task->flops);
			
 
				 #endif //STARPU_USE_SCHED_CTX_HYPERVISOR
			
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -98,12 +98,18 @@ static void _starpu_update_workers(int *workerids, int nworkers, int sched_ctx_i
 
				 
			
 
				 			_starpu_exclude_task_from_dag(worker[i]->tasks[sched_ctx_id]);
			
 
				 
			
 
				-//			starpu_push_local_task(workerids[i], worker[i]->tasks[sched_ctx_id], 1);
			
 
				 			_starpu_task_submit_internally(worker[i]->tasks[sched_ctx_id]);
			
 
				 		}		
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+void starpu_stop_task_submission(unsigned sched_ctx)
			
 
				+{
			
 
				+	_starpu_exclude_task_from_dag(&stop_submission_task);
			
 
				+	
			
 
				+	_starpu_task_submit_internally(&stop_submission_task);
			
 
				+
			
 
				+}
			
 
				 
			
 
				 static void _starpu_add_workers_to_sched_ctx(struct _starpu_sched_ctx *sched_ctx, int *workerids, int nworkers, 
			
 
				 				       int *added_workers, int *n_added_workers)
			
@@ -361,6 +367,8 @@ void starpu_add_workers_to_sched_ctx(int *workers_to_add, int nworkers_to_add, u
 
				 		unlocked = 1;
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->empty_ctx_mutex);
			
 
				 
			
 
				+		if(old_task == &stop_submission_task)
			
 
				+			break;
			
 
				 		struct _starpu_job *old_j = _starpu_get_job_associated_to_task(old_task);
			
 
				 		_starpu_push_task(old_j);
			
 
				 	}
			
@@ -672,6 +680,7 @@ unsigned starpu_worker_belongs_to_sched_ctx(int workerid, unsigned sched_ctx_id)
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				+
			
 
				 void starpu_call_poped_task_cb(int workerid, unsigned sched_ctx_id, double flops)
			
 
				 {
			
 
				 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
			
--- a/src/core/sched_ctx.h
+++ b/src/core/sched_ctx.h
@@ -77,6 +77,8 @@ struct _starpu_sched_ctx {
 
				 
			
 
				 struct _starpu_machine_config;
			
 
				 
			
 
				+struct starpu_task stop_submission_task;
			
 
				+
			
 
				 /* init sched_ctx_id of all contextes*/
			
 
				 void _starpu_init_all_sched_ctxs(struct _starpu_machine_config *config);
			
 
				 
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -621,14 +621,14 @@ void _starpu_sched_pre_exec_hook(struct starpu_task *task)
 
				 		sched_ctx->sched_policy->pre_exec_hook(task);
			
 
				 }
			
 
				 
			
 
				-void _starpu_sched_post_exec_hook(struct starpu_task *task)
			
 
				+void _starpu_sched_post_exec_hook(struct starpu_task *task, int workerid)
			
 
				 {
			
 
				 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
			
 
				 
			
 
				 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				 	if(task->hypervisor_tag > 0 && sched_ctx != NULL && 
			
 
				 	   sched_ctx->id != 0 && sched_ctx->perf_counters != NULL)
			
 
				-		sched_ctx->perf_counters->notify_post_exec_hook(sched_ctx->id, task->hypervisor_tag);
			
 
				+		sched_ctx->perf_counters->notify_post_exec_hook(sched_ctx->id, task->hypervisor_tag, workerid);
			
 
				 #endif //STARPU_USE_SCHED_CTX_HYPERVISOR
			
 
				 
			
 
				 	if (sched_ctx->sched_policy->post_exec_hook)
			
--- a/src/core/sched_policy.h
+++ b/src/core/sched_policy.h
@@ -36,7 +36,7 @@ int _starpu_push_task(struct _starpu_job *task);
 
				 struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker);
			
 
				 /* pop every task that can be executed on the worker */
			
 
				 struct starpu_task *_starpu_pop_every_task(struct _starpu_sched_ctx *sched_ctx);
			
 
				-void _starpu_sched_post_exec_hook(struct starpu_task *task);
			
 
				+void _starpu_sched_post_exec_hook(struct starpu_task *task, int workerid);
			
 
				 
			
 
				 void _starpu_wait_on_sched_event(void);
			
 
				 
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -243,7 +243,7 @@ void *_starpu_cpu_worker(void *arg)
 
				 		 * Then we free the workers' task alias */
			
 
				 		if (is_parallel_task)
			
 
				 		{
			
 
				-			_starpu_sched_post_exec_hook(task);
			
 
				+			_starpu_sched_post_exec_hook(task, workerid);
			
 
				 			free(task);
			
 
				 		}