浏览代码

bug fixing ioctl (!!!take care, the parameter list ends with -1 now) +
bug fixing with empty ctx + bug fixing with app_driven (no more lazy transfer
of workers from one context to another in this case)

Andra Hugo 13 年之前
父节点
当前提交
c1c43407db

+ 3 - 1
include/starpu_scheduler.h

@@ -154,7 +154,7 @@ struct starpu_performance_counters {
 	void (*notify_idle_end)(unsigned sched_ctx, int worker);
 	void (*notify_pushed_task)(unsigned sched_ctx, int worker);
 	void (*notify_poped_task)(unsigned sched_ctx, int worker, double flops);
-	void (*notify_post_exec_hook)(unsigned sched_ctx, int taskid);
+	void (*notify_post_exec_hook)(unsigned sched_ctx, int taskid, int workerid);
 	void (*notify_submitted_job)(struct starpu_task *task, uint32_t footprint);
 };
 
@@ -206,6 +206,8 @@ unsigned starpu_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id
 
 unsigned starpu_worker_belongs_to_sched_ctx(int workerid, unsigned sched_ctx_id);
 
+void starpu_stop_task_submission(unsigned sched_ctx);
+
 /* Check if the worker specified by workerid can execute the codelet. */
 int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl);
 

+ 2 - 2
sched_ctx_hypervisor/include/sched_ctx_hypervisor.h

@@ -114,7 +114,7 @@ void sched_ctx_hypervisor_unregister_ctx(unsigned sched_ctx);
 
 void sched_ctx_hypervisor_resize(unsigned sched_ctx, int task_tag);
 
-void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receier_sched_ctx, int *workers_to_move, unsigned nworkers_to_move);
+void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receier_sched_ctx, int *workers_to_move, unsigned nworkers_to_move, unsigned now);
 
 void sched_ctx_hypervisor_stop_resize(unsigned sched_ctx);
 
@@ -142,7 +142,7 @@ char* sched_ctx_hypervisor_get_policy();
 
 void sched_ctx_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx);
 
-void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx);
+void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now);
 
 void sched_ctx_hypervisor_size_ctxs(int *sched_ctxs, int nsched_ctxs, int *workers, int nworkers);
 

+ 1 - 1
sched_ctx_hypervisor/src/hypervisor_policies/app_driven_policy.c

@@ -21,7 +21,7 @@ void app_driven_handle_post_exec_hook(unsigned sched_ctx, struct starpu_htbl32_n
 	void* sched_ctx_pt =  _starpu_htbl_search_32(resize_requests, (uint32_t)task_tag);
 	if(sched_ctx_pt && sched_ctx_pt != resize_requests)
 	{
-		_resize_to_unknown_receiver(sched_ctx);
+		_resize_to_unknown_receiver(sched_ctx, 1);
 		_starpu_htbl_insert_32(&resize_requests, (uint32_t)task_tag, NULL);
 	}
 

+ 1 - 1
sched_ctx_hypervisor/src/hypervisor_policies/gflops_rate_policy.c

@@ -147,7 +147,7 @@ static unsigned _gflops_rate_resize(unsigned sender_sched_ctx, unsigned receiver
                 int *workers_to_move =  _get_workers_to_move(sender_sched_ctx, receiver_sched_ctx, &nworkers_to_move);
 		if(nworkers_to_move > 0)
                 {
-                        sched_ctx_hypervisor_move_workers(sender_sched_ctx, receiver_sched_ctx, workers_to_move, nworkers_to_move);
+                        sched_ctx_hypervisor_move_workers(sender_sched_ctx, receiver_sched_ctx, workers_to_move, nworkers_to_move, 0);
 
                         struct policy_config *new_config = sched_ctx_hypervisor_get_config(receiver_sched_ctx);
                         int i;

+ 1 - 1
sched_ctx_hypervisor/src/hypervisor_policies/idle_policy.c

@@ -21,7 +21,7 @@ void idle_handle_idle_cycle(unsigned sched_ctx, int worker)
 	struct sched_ctx_wrapper* sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctx);
 	struct policy_config *config = sc_w->config;
 	if(config != NULL &&  sc_w->current_idle_time[worker] > config->max_idle[worker])
-		_resize_to_unknown_receiver(sched_ctx);
+		_resize_to_unknown_receiver(sched_ctx, 0);
 }
 
 struct hypervisor_policy idle_policy = {

+ 1 - 1
sched_ctx_hypervisor/src/hypervisor_policies/lp2_policy.c

@@ -408,7 +408,7 @@ void _redistribute_resources_in_ctxs2(int ns, int nw, int nt, double tasks[nw][n
 		unsigned nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], STARPU_ALL);
 	
 		if(nworkers_ctx > nremove)
-			sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s]);
+			sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s], 0);
 	
 		if(nworkers_ctx != STARPU_NMAXWORKERS)
 		{

+ 1 - 1
sched_ctx_hypervisor/src/hypervisor_policies/lp3_policy.c

@@ -392,7 +392,7 @@ static void _redistribute_resources_in_ctxs(int ns, int nw, int nt, double w_in_
 				}
 			}
 			if(all_have_dest)
-				sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s]);
+				sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s], 0);
 		}
 	}
 

+ 2 - 2
sched_ctx_hypervisor/src/hypervisor_policies/lp_policy.c

@@ -107,7 +107,7 @@ static void _redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][
 					if(receiving_s != -1)
 					{
 						int *workers_to_move = _get_first_workers(sched_ctxs[s], &nworkers_to_move, arch);
-						sched_ctx_hypervisor_move_workers(sched_ctxs[s], receiving_s, workers_to_move, nworkers_to_move);
+						sched_ctx_hypervisor_move_workers(sched_ctxs[s], receiving_s, workers_to_move, nworkers_to_move, 0);
 						struct policy_config *new_config = sched_ctx_hypervisor_get_config(receiving_s);
 						int i;
 						for(i = 0; i < nworkers_to_move; i++)
@@ -147,7 +147,7 @@ static void _redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][
 							int *workers_to_move = _get_first_workers(sched_ctxs[s], &x, arch);
 							if(x > 0)
 							{
-								sched_ctx_hypervisor_move_workers(sched_ctxs[s], receiving_s, workers_to_move, x);
+								sched_ctx_hypervisor_move_workers(sched_ctxs[s], receiving_s, workers_to_move, x, 0);
 
 								struct policy_config *new_config = sched_ctx_hypervisor_get_config(receiving_s);
 								int i;

+ 10 - 9
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c

@@ -233,7 +233,7 @@ unsigned _get_nworkers_to_move(unsigned req_sched_ctx)
 	return nworkers_to_move;
 }
 
-unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize)
+unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize, unsigned now)
 {
 	int ret = 1;
 	if(force_resize)
@@ -264,7 +264,7 @@ unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigne
 			if(poor_sched_ctx != STARPU_NMAX_SCHED_CTXS)
 			{						
 				int *workers_to_move = _get_first_workers(sender_sched_ctx, &nworkers_to_move, STARPU_ALL);
-				sched_ctx_hypervisor_move_workers(sender_sched_ctx, poor_sched_ctx, workers_to_move, nworkers_to_move);
+				sched_ctx_hypervisor_move_workers(sender_sched_ctx, poor_sched_ctx, workers_to_move, nworkers_to_move, now);
 				
 				struct policy_config *new_config = sched_ctx_hypervisor_get_config(poor_sched_ctx);
 				int i;
@@ -282,9 +282,9 @@ unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigne
 }
 
 
-unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx)
+unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now)
 {
-	return _resize(sender_sched_ctx, STARPU_NMAX_SCHED_CTXS, 0);
+	return _resize(sender_sched_ctx, STARPU_NMAX_SCHED_CTXS, 0, now);
 }
 
 static double _get_elapsed_flops(struct sched_ctx_wrapper* sc_w, int *npus, enum starpu_archtype req_arch)
@@ -319,9 +319,8 @@ double _get_ctx_velocity(struct sched_ctx_wrapper* sc_w)
 	double total_elapsed_flops = sched_ctx_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w);
 	double prc = elapsed_flops/sc_w->total_flops;
 	unsigned nworkers = starpu_get_nworkers_of_sched_ctx(sc_w->sched_ctx);
-
-	double redim_sample = elapsed_flops == total_elapsed_flops ? HYPERVISOR_START_REDIM_SAMPLE*nworkers : HYPERVISOR_REDIM_SAMPLE*nworkers;
-        if(prc >= redim_sample)
+	double redim_sample = elapsed_flops == total_elapsed_flops ? HYPERVISOR_START_REDIM_SAMPLE*nworkers : HYPERVISOR_REDIM_SAMPLE*nworkers;  
+	if(prc >= redim_sample)
         {
                 double curr_time = starpu_timing_now();
                 double elapsed_time = curr_time - sc_w->start_time;
@@ -371,9 +370,11 @@ int _velocity_gap_btw_ctxs()
 					if(other_ctx_v != 0.0)
 					{
 						double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v ;
-						if(gap > 2)
+						if(gap > 1.5)
 							return 1;
-					}
+					} 
+					else
+						return 1;						
 				}
 			}
 		}

+ 1 - 1
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h

@@ -30,7 +30,7 @@ unsigned _get_nworkers_to_move(unsigned req_sched_ctx);
 
 unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize);
 
-unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx);
+unsigned _resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now);
 
 double _get_ctx_velocity(struct sched_ctx_wrapper* sc_w);
 

+ 2 - 3
sched_ctx_hypervisor/src/sched_ctx_config.c

@@ -114,7 +114,7 @@ static struct policy_config* _ioctl(unsigned sched_ctx, va_list varg_list, unsig
 	int *workerids;
 	int nworkers;
 
-	while ((arg_type = va_arg(varg_list, int)) != 0) 
+	while ((arg_type = va_arg(varg_list, int)) != -1) 
 	{
 		switch(arg_type)
 		{
@@ -207,7 +207,7 @@ void sched_ctx_hypervisor_ioctl(unsigned sched_ctx, ...)
 	int stop = 0;
 	int task_tag = -1;
 
-	while ((arg_type = va_arg(varg_list, int)) != 0) 
+	while ((arg_type = va_arg(varg_list, int)) != -1) 
 	{
 		switch(arg_type)
 		{
@@ -230,7 +230,6 @@ void sched_ctx_hypervisor_ioctl(unsigned sched_ctx, ...)
 
 	/* if config not null => save hypervisor configuration and consider it later */
 	struct policy_config *config = _ioctl(sched_ctx, varg_list, (task_tag > 0));
-
 	if(config != NULL)
 	{
 		pthread_mutex_lock(&hypervisor.conf_mut[sched_ctx]);

+ 150 - 125
sched_ctx_hypervisor/src/sched_ctx_hypervisor.c

@@ -22,7 +22,7 @@ struct starpu_performance_counters* perf_counters = NULL;
 static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time);
 static void notify_pushed_task(unsigned sched_ctx, int worker);
 static void notify_poped_task(unsigned sched_ctx, int worker, double flops);
-static void notify_post_exec_hook(unsigned sched_ctx, int taskid);
+static void notify_post_exec_hook(unsigned sched_ctx, int taskid, int workerid);
 static void notify_idle_end(unsigned sched_ctx, int  worker);
 static void notify_submitted_job(struct starpu_task *task, unsigned footprint);
 
@@ -296,6 +296,9 @@ void sched_ctx_hypervisor_unregister_ctx(unsigned sched_ctx)
 	free(hypervisor.resize_requests[sched_ctx]);
 	pthread_mutex_destroy(&hypervisor.conf_mut[sched_ctx]);
 	pthread_mutex_destroy(&hypervisor.resize_mut[sched_ctx]);
+	if(hypervisor.nsched_ctxs == 1)
+		sched_ctx_hypervisor_stop_resize(hypervisor.sched_ctxs[0]);
+
 	pthread_mutex_unlock(&act_hypervisor_mutex);
 }
 
@@ -346,7 +349,7 @@ int get_nworkers_ctx(unsigned sched_ctx, enum starpu_archtype arch)
 
 /* actually move the workers: the cpus are moved, gpus are only shared  */
 /* forbids another resize request before this one is take into account */
-void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int* workers_to_move, unsigned nworkers_to_move)
+void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int* workers_to_move, unsigned nworkers_to_move, unsigned now)
 {
 	if(nworkers_to_move > 0 && hypervisor.resize[sender_sched_ctx])// && hypervisor.resize[receiver_sched_ctx])
 	{
@@ -365,25 +368,40 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 //			starpu_remove_workers_from_sched_ctx(cpus, ncpus, sender_sched_ctx);
 
 		starpu_add_workers_to_sched_ctx(workers_to_move, nworkers_to_move, receiver_sched_ctx);
-		pthread_mutex_lock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
-		hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.receiver_sched_ctx = receiver_sched_ctx;
-		hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_move * sizeof(int));
-		hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.nmoved_workers = nworkers_to_move;
-		hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_move * sizeof(int));
-
 
-		int i;
-		for(i = 0; i < nworkers_to_move; i++)
+		if(now)
 		{
-			hypervisor.sched_ctx_w[sender_sched_ctx].current_idle_time[workers_to_move[i]] = 0.0;
-			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers[i] = workers_to_move[i];	
-			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers[i] = 0;	
+				int j;
+				printf("remove from ctx %d:", sender_sched_ctx);
+				for(j = 0; j < nworkers_to_move; j++)
+					printf(" %d", workers_to_move[j]);
+				printf("\n");
+				
+				starpu_remove_workers_from_sched_ctx(workers_to_move, nworkers_to_move, sender_sched_ctx);
+		}
+		else
+		{
+			pthread_mutex_lock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
+			
+			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.receiver_sched_ctx = receiver_sched_ctx;
+			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_move * sizeof(int));
+			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.nmoved_workers = nworkers_to_move;
+			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_move * sizeof(int));
+			
+			
+			int i;
+			for(i = 0; i < nworkers_to_move; i++)
+			{
+				hypervisor.sched_ctx_w[sender_sched_ctx].current_idle_time[workers_to_move[i]] = 0.0;
+				hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers[i] = workers_to_move[i];	
+				hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers[i] = 0;	
+			}
+			
+			hypervisor.resize[sender_sched_ctx] = 0;
+			hypervisor.resize[receiver_sched_ctx] = 0;
+			
+			pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
 		}
-
-		hypervisor.resize[sender_sched_ctx] = 0;
-		hypervisor.resize[receiver_sched_ctx] = 0;
-
-		pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
 	}
 	return;
 }
@@ -412,37 +430,50 @@ unsigned sched_ctx_hypervisor_can_resize(unsigned sched_ctx)
 	return hypervisor.resize[sched_ctx];
 }
 
-void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx)
+void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now)
 {
 	if(nworkers_to_remove > 0 && hypervisor.resize[sched_ctx])
 	{
 		int nworkers=0;
 		int workers[nworkers_to_remove];
 	
-		int ret = pthread_mutex_trylock(&hypervisor.sched_ctx_w[sched_ctx].mutex);	
-		if(ret != EBUSY)
+		if(now)
 		{
-			
-			int i;
-			for(i = 0; i < nworkers_to_remove; i++)
-				if(starpu_worker_belongs_to_sched_ctx(workers_to_remove[i], sched_ctx))
-					workers[nworkers++] = workers_to_remove[i];
-			
-			hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = -1;
-			hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
-			hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = nworkers;
-			hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
-			
-			
-			for(i = 0; i < nworkers; i++)
+				int j;
+				printf("remove from ctx %d:", sched_ctx);
+				for(j = 0; j < nworkers_to_remove; j++)
+					printf(" %d", workers_to_remove[j]);
+				printf("\n");
+				
+				starpu_remove_workers_from_sched_ctx(workers_to_remove, nworkers_to_remove, sched_ctx);
+		}
+		else
+		{
+			int ret = pthread_mutex_trylock(&hypervisor.sched_ctx_w[sched_ctx].mutex);	
+			if(ret != EBUSY)
 			{
-				hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers[i]] = 0.0;
-				hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers[i];	
-				hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;	
-			}
+				
+				int i;
+				for(i = 0; i < nworkers_to_remove; i++)
+					if(starpu_worker_belongs_to_sched_ctx(workers_to_remove[i], sched_ctx))
+						workers[nworkers++] = workers_to_remove[i];
+				
+				hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = -1;
+				hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
+				hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = nworkers;
+				hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
+				
+				
+				for(i = 0; i < nworkers; i++)
+				{
+					hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers[i]] = 0.0;
+					hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers[i];	
+					hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;	
+				}
 
-			hypervisor.resize[sched_ctx] = 0;
-			pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
+				hypervisor.resize[sched_ctx] = 0;
+				pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
+			}
 		}
  	}
 	return;
@@ -481,48 +512,48 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 	struct resize_ack *resize_ack = NULL;
 	unsigned sender_sched_ctx = STARPU_NMAX_SCHED_CTXS;
 
-	if(hypervisor.nsched_ctxs > 0)
+	int i;
+	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
 	{
-		int i;
-		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+		if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
 		{
-			if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
+			struct sched_ctx_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
+			pthread_mutex_lock(&sc_w->mutex);
+			unsigned only_remove = 0;
+			if(sc_w->resize_ack.receiver_sched_ctx == -1 && hypervisor.sched_ctxs[i] != sched_ctx && 
+			   sc_w->resize_ack.nmoved_workers > 0 && starpu_worker_belongs_to_sched_ctx(worker, hypervisor.sched_ctxs[i]))
 			{
-				struct sched_ctx_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
-				unsigned only_remove = 0;
-				if(sc_w->resize_ack.receiver_sched_ctx == -1 && hypervisor.sched_ctxs[i] != sched_ctx && 
-				   sc_w->resize_ack.nmoved_workers > 0 && starpu_worker_belongs_to_sched_ctx(worker, hypervisor.sched_ctxs[i]))
-				{
-					int j;
-					for(j = 0; j < sc_w->resize_ack.nmoved_workers; j++)
-						if(sc_w->resize_ack.moved_workers[j] == worker)
-						{
-							only_remove = 1;
-							break;
-						}
-				}
-				if(only_remove || 
-				   (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == sched_ctx))
-				{
-					resize_ack = &sc_w->resize_ack;
-					sender_sched_ctx = hypervisor.sched_ctxs[i];
-					break;
-				}
+				int j;
+				for(j = 0; j < sc_w->resize_ack.nmoved_workers; j++)
+					if(sc_w->resize_ack.moved_workers[j] == worker)
+					{
+						only_remove = 1;
+						break;
+					}
+			}
+			if(only_remove || 
+			   (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == sched_ctx))
+			{
+				resize_ack = &sc_w->resize_ack;
+				sender_sched_ctx = hypervisor.sched_ctxs[i];
+				pthread_mutex_unlock(&sc_w->mutex);
+				break;
 			}
+			pthread_mutex_unlock(&sc_w->mutex);
 		}
 	}
 
 	/* if there is no ctx waiting for its ack return 1*/
 	if(resize_ack == NULL)
 		return 1;
-	int *moved_workers = resize_ack->moved_workers;
-	int nmoved_workers = resize_ack->nmoved_workers;
-	int *acked_workers = resize_ack->acked_workers;
-	int i;
 	
 	int ret = pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
 	if(ret != EBUSY)
 	{
+		int *moved_workers = resize_ack->moved_workers;
+		int nmoved_workers = resize_ack->nmoved_workers;
+		int *acked_workers = resize_ack->acked_workers;
+
 		if(worker != -1)
 		{
 			for(i = 0; i < nmoved_workers; i++)
@@ -579,6 +610,7 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 				resize_ack->nmoved_workers = 0;
 				free(resize_ack->moved_workers);
 				free(resize_ack->acked_workers);
+
 			}
 			pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
 			return resize_completed;
@@ -598,34 +630,24 @@ void sched_ctx_hypervisor_resize(unsigned sched_ctx, int task_tag)
 /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */
 static void notify_idle_end(unsigned sched_ctx, int worker)
 {
-	if(hypervisor.nsched_ctxs > 1)
-	{
-
-		if(hypervisor.resize[sched_ctx])
-			hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] = 0.0;
-		
-		if(hypervisor.policy.handle_idle_end)
-			hypervisor.policy.handle_idle_end(sched_ctx, worker);
+	if(hypervisor.resize[sched_ctx])
+		hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] = 0.0;
+	
+	if(hypervisor.policy.handle_idle_end)
+		hypervisor.policy.handle_idle_end(sched_ctx, worker);
 		
-//			_ack_resize_completed(sched_ctx, worker);
-	}
 }
 
 /* notifies the hypervisor that the worker spent another cycle in idle time */
 static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 {
-	if(hypervisor.nsched_ctxs > 1)
+	if(hypervisor.resize[sched_ctx])
 	{
-		if(hypervisor.resize[sched_ctx])
-		{
-			struct sched_ctx_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
-			sc_w->current_idle_time[worker] += idle_time;
-			if(hypervisor.policy.handle_idle_cycle)
+		struct sched_ctx_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
+		sc_w->current_idle_time[worker] += idle_time;
+		if(hypervisor.policy.handle_idle_cycle)
 				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
-		}		
-/* 		else  */
-/* 			_ack_resize_completed(sched_ctx, worker); */
-	}
+	}		
 	return;
 }
 
@@ -657,55 +679,54 @@ static void notify_poped_task(unsigned sched_ctx, int worker, double elapsed_flo
 	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += elapsed_flops;
 	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= elapsed_flops; //sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(&hypervisor.sched_ctx_w[sched_ctx]);
 
-	if(hypervisor.nsched_ctxs > 1)
+	if(hypervisor.resize[sched_ctx])
 	{
-		if(hypervisor.resize[sched_ctx])
-		{
-			if(hypervisor.policy.handle_poped_task)
-				hypervisor.policy.handle_poped_task(sched_ctx, worker);
-		}
-		_ack_resize_completed(sched_ctx, worker);
-			
+		if(hypervisor.policy.handle_poped_task)
+			hypervisor.policy.handle_poped_task(sched_ctx, worker);
 	}
+	_ack_resize_completed(sched_ctx, worker);
 }
 
 /* notifies the hypervisor that a tagged task has just been executed */
-static void notify_post_exec_hook(unsigned sched_ctx, int task_tag)
+static void notify_post_exec_hook(unsigned sched_ctx, int task_tag, int worker)
 {
 	STARPU_ASSERT(task_tag > 0);
 
-	if(hypervisor.nsched_ctxs > 1)
+	unsigned conf_sched_ctx;
+	int i;
+	pthread_mutex_lock(&act_hypervisor_mutex);
+	unsigned ns = hypervisor.nsched_ctxs;
+	pthread_mutex_unlock(&act_hypervisor_mutex);
+	
+	for(i = 0; i < ns; i++)
 	{
-		unsigned conf_sched_ctx;
-		int i;
-		for(i = 0; i < hypervisor.nsched_ctxs; i++)
-		{
-			conf_sched_ctx = hypervisor.sched_ctxs[i];
-			pthread_mutex_lock(&hypervisor.conf_mut[sched_ctx]);
-			void *config = _starpu_htbl_search_32(hypervisor.configurations[conf_sched_ctx], (uint32_t)task_tag);
-			if(config && config != hypervisor.configurations[conf_sched_ctx])
-			{
-				sched_ctx_hypervisor_set_config(conf_sched_ctx, config);
-				free(config);
-				_starpu_htbl_insert_32(&hypervisor.configurations[sched_ctx], (uint32_t)task_tag, NULL);
-			}
-			pthread_mutex_unlock(&hypervisor.conf_mut[sched_ctx]);
-		}	
+		conf_sched_ctx = hypervisor.sched_ctxs[i];
+		pthread_mutex_lock(&hypervisor.conf_mut[conf_sched_ctx]);
+		void *config = _starpu_htbl_search_32(hypervisor.configurations[conf_sched_ctx], (uint32_t)task_tag);
 
-		/* for the app driven we have to wait for the resize to be available
-		   because the event is required to be executed at this specific moment */
-		while(!_ack_resize_completed(sched_ctx, -1));
-
-		if(hypervisor.resize[sched_ctx])
+		if(config)// && config != hypervisor.configurations[conf_sched_ctx])
 		{
-			pthread_mutex_lock(&hypervisor.resize_mut[sched_ctx]);
-			struct starpu_htbl32_node* resize_requests = hypervisor.resize_requests[sched_ctx];
-
-			if(hypervisor.policy.handle_post_exec_hook)
-				hypervisor.policy.handle_post_exec_hook(sched_ctx, resize_requests, task_tag);
-			pthread_mutex_unlock(&hypervisor.resize_mut[sched_ctx]);
+			sched_ctx_hypervisor_set_config(conf_sched_ctx, config);
+			free(config);
+			_starpu_htbl_insert_32(&hypervisor.configurations[sched_ctx], (uint32_t)task_tag, NULL);
 		}
+		pthread_mutex_unlock(&hypervisor.conf_mut[conf_sched_ctx]);
+	}	
+	
+	/* for the app driven we have to wait for the resize to be available
+	   because the event is required to be executed at this specific moment */
+//	while(!_ack_resize_completed(sched_ctx, worker));
+	
+	if(hypervisor.resize[sched_ctx])
+	{
+		pthread_mutex_lock(&hypervisor.resize_mut[sched_ctx]);
+		struct starpu_htbl32_node* resize_requests = hypervisor.resize_requests[sched_ctx];
+		
+		if(hypervisor.policy.handle_post_exec_hook)
+			hypervisor.policy.handle_post_exec_hook(sched_ctx, resize_requests, task_tag);
+		pthread_mutex_unlock(&hypervisor.resize_mut[sched_ctx]);
 	}
+	return;
 }
 
 static void notify_submitted_job(struct starpu_task *task, uint32_t footprint)
@@ -720,8 +741,10 @@ static void notify_submitted_job(struct starpu_task *task, uint32_t footprint)
 
 void sched_ctx_hypervisor_size_ctxs(int *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 {
+	pthread_mutex_lock(&act_hypervisor_mutex);
 	int curr_nsched_ctxs = sched_ctxs == NULL ? hypervisor.nsched_ctxs : nsched_ctxs;
 	int *curr_sched_ctxs = sched_ctxs == NULL ? hypervisor.sched_ctxs : sched_ctxs;
+	pthread_mutex_unlock(&act_hypervisor_mutex);
 	int s;
 	for(s = 0; s < curr_nsched_ctxs; s++)
 		hypervisor.resize[curr_sched_ctxs[s]] = 1;
@@ -742,7 +765,9 @@ int* sched_ctx_hypervisor_get_sched_ctxs()
 
 int sched_ctx_hypervisor_get_nsched_ctxs()
 {
-	return hypervisor.nsched_ctxs;
+	int ns;
+	ns = hypervisor.nsched_ctxs;
+	return ns;
 }
 
 void sched_ctx_hypervisor_save_size_req(int *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)

+ 1 - 1
src/core/jobs.c

@@ -210,7 +210,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j, int workerid)
 	/* control task should not execute post_exec_hook */
 	if(j->task_size == 1 && task->cl != NULL && !task->control_task)
 	{
-	  _starpu_sched_post_exec_hook(task);
+		_starpu_sched_post_exec_hook(task, workerid);
 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
 	  starpu_call_poped_task_cb(workerid, task->sched_ctx, task->flops);
 #endif //STARPU_USE_SCHED_CTX_HYPERVISOR

+ 10 - 1
src/core/sched_ctx.c

@@ -98,12 +98,18 @@ static void _starpu_update_workers(int *workerids, int nworkers, int sched_ctx_i
 
 			_starpu_exclude_task_from_dag(worker[i]->tasks[sched_ctx_id]);
 
-//			starpu_push_local_task(workerids[i], worker[i]->tasks[sched_ctx_id], 1);
 			_starpu_task_submit_internally(worker[i]->tasks[sched_ctx_id]);
 		}		
 	}
 }
 
+void starpu_stop_task_submission(unsigned sched_ctx)
+{
+	_starpu_exclude_task_from_dag(&stop_submission_task);
+	
+	_starpu_task_submit_internally(&stop_submission_task);
+
+}
 
 static void _starpu_add_workers_to_sched_ctx(struct _starpu_sched_ctx *sched_ctx, int *workerids, int nworkers, 
 				       int *added_workers, int *n_added_workers)
@@ -361,6 +367,8 @@ void starpu_add_workers_to_sched_ctx(int *workers_to_add, int nworkers_to_add, u
 		unlocked = 1;
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->empty_ctx_mutex);
 
+		if(old_task == &stop_submission_task)
+			break;
 		struct _starpu_job *old_j = _starpu_get_job_associated_to_task(old_task);
 		_starpu_push_task(old_j);
 	}
@@ -672,6 +680,7 @@ unsigned starpu_worker_belongs_to_sched_ctx(int workerid, unsigned sched_ctx_id)
 }
 
 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
+
 void starpu_call_poped_task_cb(int workerid, unsigned sched_ctx_id, double flops)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);

+ 2 - 0
src/core/sched_ctx.h

@@ -77,6 +77,8 @@ struct _starpu_sched_ctx {
 
 struct _starpu_machine_config;
 
+struct starpu_task stop_submission_task;
+
 /* init sched_ctx_id of all contextes*/
 void _starpu_init_all_sched_ctxs(struct _starpu_machine_config *config);
 

+ 2 - 2
src/core/sched_policy.c

@@ -621,14 +621,14 @@ void _starpu_sched_pre_exec_hook(struct starpu_task *task)
 		sched_ctx->sched_policy->pre_exec_hook(task);
 }
 
-void _starpu_sched_post_exec_hook(struct starpu_task *task)
+void _starpu_sched_post_exec_hook(struct starpu_task *task, int workerid)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
 
 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
 	if(task->hypervisor_tag > 0 && sched_ctx != NULL && 
 	   sched_ctx->id != 0 && sched_ctx->perf_counters != NULL)
-		sched_ctx->perf_counters->notify_post_exec_hook(sched_ctx->id, task->hypervisor_tag);
+		sched_ctx->perf_counters->notify_post_exec_hook(sched_ctx->id, task->hypervisor_tag, workerid);
 #endif //STARPU_USE_SCHED_CTX_HYPERVISOR
 
 	if (sched_ctx->sched_policy->post_exec_hook)

+ 1 - 1
src/core/sched_policy.h

@@ -36,7 +36,7 @@ int _starpu_push_task(struct _starpu_job *task);
 struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker);
 /* pop every task that can be executed on the worker */
 struct starpu_task *_starpu_pop_every_task(struct _starpu_sched_ctx *sched_ctx);
-void _starpu_sched_post_exec_hook(struct starpu_task *task);
+void _starpu_sched_post_exec_hook(struct starpu_task *task, int workerid);
 
 void _starpu_wait_on_sched_event(void);
 

+ 1 - 1
src/drivers/cpu/driver_cpu.c

@@ -243,7 +243,7 @@ void *_starpu_cpu_worker(void *arg)
 		 * Then we free the workers' task alias */
 		if (is_parallel_task)
 		{
-			_starpu_sched_post_exec_hook(task);
+			_starpu_sched_post_exec_hook(task, workerid);
 			free(task);
 		}