11 年之前 · 12dec4c5cc
--- a/include/starpu_sched_ctx.h
+++ b/include/starpu_sched_ctx.h
@@ -102,6 +102,10 @@ void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id);
 
				 
			
 
				 void *starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void *param, unsigned sched_ctx_id);
			
 
				 
			
 
				+int starpu_get_nready_tasks_of_sched_ctx(unsigned sched_ctx_id);
			
 
				+
			
 
				+double starpu_get_nready_flops_of_sched_ctx(unsigned sched_ctx_id);
			
 
				+
			
 
				 #ifdef STARPU_USE_SC_HYPERVISOR
			
 
				 void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id);
			
 
				 #endif //STARPU_USE_SC_HYPERVISOR
			
--- a/include/starpu_sched_ctx_hypervisor.h
+++ b/include/starpu_sched_ctx_hypervisor.h
@@ -29,10 +29,8 @@ struct starpu_sched_ctx_performance_counters
 
				 	void (*notify_idle_cycle)(unsigned sched_ctx_id, int worker, double idle_time);
			
 
				 	void (*notify_poped_task)(unsigned sched_ctx_id, int worker);
			
 
				 	void (*notify_pushed_task)(unsigned sched_ctx_id, int worker);
			
 
				-	void (*notify_post_exec_task)(struct starpu_task *task, size_t data_size, uint32_t footprint, int hypervisor_tag,
			
 
				-				      int nready_tasks, double nready_flops);
			
 
				+	void (*notify_post_exec_task)(struct starpu_task *task, size_t data_size, uint32_t footprint, int hypervisor_tag, double flops);
			
 
				 	void (*notify_submitted_job)(struct starpu_task *task, uint32_t footprint, size_t data_size);
			
 
				-	void (*notify_ready_task)(unsigned sched_ctx_id, struct starpu_task *task);
			
 
				 	void (*notify_empty_ctx)(unsigned sched_ctx_id, struct starpu_task *task);
			
 
				 	void (*notify_delete_context)(unsigned sched_ctx);
			
 
				 };
			
--- a/sc_hypervisor/include/sc_hypervisor_monitoring.h
+++ b/sc_hypervisor/include/sc_hypervisor_monitoring.h
@@ -100,14 +100,15 @@ struct sc_hypervisor_wrapper
 
				 	/* number of flops that still have to be executed in this ctx */
			
 
				 	double remaining_flops;
			
 
				 	
			
 
				-	/* number of flops coresponding to the ready tasks in this ctx */
			
 
				-	double ready_flops;
			
 
				-
			
 
				 	/* the start time of the resizing sample of this context*/
			
 
				 	double start_time;
			
 
				 
			
 
				 	/* the first time a task was pushed to this context*/
			
 
				 	double real_start_time;
			
 
				+	
			
 
				+	/* the start time for sample in which the hyp is not allowed to react
			
 
				+	   bc too expensive */
			
 
				+	double hyp_react_start_time;
			
 
				 
			
 
				 	/* the workers don't leave the current ctx until the receiver ctx 
			
 
				 	   doesn't ack the receive of these workers */
			
@@ -120,9 +121,6 @@ struct sc_hypervisor_wrapper
 
				 	   flops of all the execution or not */
			
 
				 	unsigned total_flops_available;
			
 
				 
			
 
				-	/* the number of ready tasks submitted to a ctx */
			
 
				-	int nready_tasks;
			
 
				-
			
 
				 	/* boolean indicating that a context is being sized */
			
 
				 	unsigned to_be_sized;
			
 
				 
			
--- a/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
@@ -1,3 +1,4 @@
 
				+
			
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011 - 2013  INRIA
			
@@ -24,8 +25,8 @@ int resize_no = 0;
 
				 static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
			
 
				 {
			
 
				 	/* for vite */
			
 
				-/* 	printf("resize_no = %d\n", resize_no); */
			
 
				-/* 	starpu_trace_user_event(resize_no++); */
			
 
				+	printf("resize_no = %d\n", resize_no);
			
 
				+	starpu_trace_user_event(resize_no++);
			
 
				 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
			
 
				 	unsigned *curr_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
			
 
				 	unsigned curr_nworkers = nworkers == -1 ? starpu_worker_get_count() : (unsigned)nworkers;
			
@@ -63,19 +64,20 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 
				 static void feft_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx, __attribute__((unused))int worker, 
			
 
				 				      __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint)
			
 
				 {
			
 
				-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				-	if(ret != EBUSY)
			
 
				+	unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				+	if(criteria != SC_NOTHING && criteria == SC_SPEED)
			
 
				 	{
			
 
				-		unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				-		if(criteria != SC_NOTHING && criteria == SC_SPEED)
			
 
				+
			
 
				+		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				+		if(ret != EBUSY)
			
 
				 		{
			
 
				 			if(sc_hypervisor_check_speed_gap_btw_ctxs())
			
 
				 			{
			
 
				 				_try_resizing(NULL, -1, NULL, -1);
			
 
				 			}
			
 
				-		}
			
 
				 	
			
 
				-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				+			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 }
			
@@ -151,16 +153,16 @@ static void feft_lp_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *worker
 
				 
			
 
				 static void feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
			
 
				 {
			
 
				-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				-	if(ret != EBUSY)
			
 
				+	unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				+	if(criteria != SC_NOTHING && criteria == SC_IDLE)
			
 
				 	{
			
 
				-		unsigned criteria = sc_hypervisor_get_resize_criteria();
			
 
				-		if(criteria != SC_NOTHING && criteria == SC_IDLE)
			
 
				+		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				+		if(ret != EBUSY)
			
 
				 		{
			
 
				 			if(sc_hypervisor_check_idle(sched_ctx, worker))
			
 
				 				_try_resizing(NULL, -1, NULL, -1);
			
 
				+			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 		}
			
 
				-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/sc_hypervisor/src/policies_utils/lp_tools.c
+++ b/sc_hypervisor/src/policies_utils/lp_tools.c
@@ -40,6 +40,9 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 		int w;
			
 
				 		for(w = 0; w < nw; w++)
			
 
				 			v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
			
 
				+
			
 
				+		double ready_flops = starpu_get_nready_flops_of_sched_ctx(sc_w->sched_ctx);
			
 
				+		int nready_tasks = starpu_get_nready_tasks_of_sched_ctx(sc_w->sched_ctx);
			
 
				 		
			
 
				 		if(sc_w->to_be_sized)
			
 
				 		{
			
@@ -49,17 +52,19 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 		else
			
 
				 		{
			
 
				 			if(sc_w->remaining_flops < 0.0)
			
 
				-				flops[i] = sc_w->ready_flops/1000000000.0; /* in gflops*/
			
 
				+				flops[i] = ready_flops/1000000000.0; /* in gflops*/
			
 
				 			else
			
 
				 			{
			
 
				-				if((sc_w->ready_flops/1000000000.0) <= 0.000002)
			
 
				+				if((ready_flops/1000000000.0) <= 0.000002)
			
 
				 					flops[i] = 0.0;
			
 
				 				else
			
 
				 					flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
			
 
				 			}
			
 
				 		}
			
 
				-/* 		printf("%d: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n", */
			
 
				-/* 		       sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, sc_w->ready_flops/1000000000, sc_w->nready_tasks); */
			
 
				+		if(flops[i] < 0.0)
			
 
				+			flops[i] = 0.0;
			
 
				+		printf("%d: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n",
			
 
				+		       sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, ready_flops/1000000000, nready_tasks);
			
 
				 
			
 
				 	}
			
 
				 
			
@@ -108,6 +113,7 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 	}
			
 
				 
			
 
				 	double vmax = 0.0;
			
 
				+
			
 
				 	if(ret != 0.0)
			
 
				 	{
			
 
				 		/* redo the lp after cleaning out the contexts that got all the max workers required */
			
@@ -591,7 +597,7 @@ void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns,
 
				 				{
			
 
				 					nworkers_to_add=1;
			
 
				 					int old_start = start[w];
			
 
				-					if(start[w] == nworkers)
			
 
				+					if(start[w] != 0)
			
 
				 						start[w]--;
			
 
				 					int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
			
 
				 					start[w] = old_start;
			
--- a/sc_hypervisor/src/sc_hypervisor.c
+++ b/sc_hypervisor/src/sc_hypervisor.c
@@ -25,10 +25,9 @@ struct starpu_sched_ctx_performance_counters* perf_counters = NULL;
 
				 static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time);
			
 
				 static void notify_pushed_task(unsigned sched_ctx, int worker);
			
 
				 static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint, 
			
 
				-				  int hypervisor_tag, int nready_tasks, double ready_flops);
			
 
				+				  int hypervisor_tag, double flops);
			
 
				 static void notify_poped_task(unsigned sched_ctx, int  worker);
			
 
				 static void notify_submitted_job(struct starpu_task *task, unsigned footprint, size_t data_size);
			
 
				-static void notify_ready_task(unsigned sched_ctx, struct starpu_task *task);
			
 
				 static void notify_empty_ctx(unsigned sched_ctx, struct starpu_task *task);
			
 
				 static void notify_delete_context(unsigned sched_ctx);
			
 
				 
			
@@ -181,6 +180,7 @@ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
 
				 		hypervisor.sched_ctx_w[i].remaining_flops = 0.0;
			
 
				 		hypervisor.sched_ctx_w[i].start_time = 0.0;
			
 
				 		hypervisor.sched_ctx_w[i].real_start_time = 0.0;
			
 
				+		hypervisor.sched_ctx_w[i].hyp_react_start_time = 0.0;
			
 
				 		hypervisor.sched_ctx_w[i].resize_ack.receiver_sched_ctx = -1;
			
 
				 		hypervisor.sched_ctx_w[i].resize_ack.moved_workers = NULL;
			
 
				 		hypervisor.sched_ctx_w[i].resize_ack.nmoved_workers = 0;
			
@@ -190,9 +190,7 @@ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
 
				 
			
 
				 		hypervisor.sched_ctx_w[i].ref_speed[0] = -1.0;
			
 
				 		hypervisor.sched_ctx_w[i].ref_speed[1] = -1.0;
			
 
				-		hypervisor.sched_ctx_w[i].ready_flops = 0.0;
			
 
				 		hypervisor.sched_ctx_w[i].total_flops_available = 0;
			
 
				-		hypervisor.sched_ctx_w[i].nready_tasks = 0;
			
 
				 		hypervisor.sched_ctx_w[i].to_be_sized = 0;
			
 
				 		int j;
			
 
				 		for(j = 0; j < STARPU_NMAXWORKERS; j++)
			
@@ -223,7 +221,6 @@ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
 
				 	perf_counters->notify_poped_task = notify_poped_task;
			
 
				 	perf_counters->notify_post_exec_task = notify_post_exec_task;
			
 
				 	perf_counters->notify_submitted_job = notify_submitted_job;
			
 
				-	perf_counters->notify_ready_task = notify_ready_task;
			
 
				 	perf_counters->notify_empty_ctx = notify_empty_ctx;
			
 
				 	perf_counters->notify_delete_context = notify_delete_context;
			
 
				 
			
@@ -316,6 +313,7 @@ void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops)
 
				 	hypervisor.sched_ctx_w[sched_ctx].total_flops = total_flops;
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].remaining_flops = total_flops;
			
 
				 	hypervisor.resize[sched_ctx] = 1;
			
 
				+	hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
			
 
				 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 }
			
 
				 
			
@@ -861,35 +859,30 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 
				 			if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] == 0.0)
			
 
				 			{
			
 
				 				exec_time = hypervisor.sched_ctx_w[sched_ctx].exec_time[worker];
			
 
				-//				printf("%d/%d: exec_time %lf\n", worker, sched_ctx, hypervisor.sched_ctx_w[sched_ctx].exec_time[worker]);
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				 				double current_exec_time = (end_time - hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker]) / 1000000.0; /* in seconds */ 
			
 
				 				exec_time = hypervisor.sched_ctx_w[sched_ctx].exec_time[worker] + current_exec_time;
			
 
				-//				printf("%d/%d: exec_time %lf current_exec_time %lf\n", worker, sched_ctx, hypervisor.sched_ctx_w[sched_ctx].exec_time[worker], current_exec_time);
			
 
				 			}		
			
 
				 			norm_exec_time += elapsed_time_worker[worker] == 0.0 ? 0.0 : exec_time / elapsed_time_worker[worker];
			
 
				 		}			
			
 
				 
			
 
				 		double curr_time = starpu_timing_now();
			
 
				 		double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */
			
 
				-//		double norm_idle_time = max_workers_idle_time[i] / elapsed_time;
			
 
				-//		double norm_exec_time = exec_time / elapsed_time;
			
 
				+		int nready_tasks = starpu_get_nready_tasks_of_sched_ctx(sched_ctx);
			
 
				 		if(norm_idle_time >= 0.9)
			
 
				 		{
			
 
				-//			config->max_nworkers = 	workers->nworkers - lrint(norm_idle_time);
			
 
				 			config->max_nworkers = lrint(norm_exec_time);
			
 
				-/* 			if(config->max_nworkers > hypervisor.sched_ctx_w[sched_ctx].nready_tasks) */
			
 
				-/* 				config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; */
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				-			if(norm_idle_time < 0.1)//(max_workers_idle_time[i] < 0.000001)
			
 
				-				config->max_nworkers = lrint(norm_exec_time)  + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; //workers->nworkers + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
			
 
				+			if(norm_idle_time < 0.1)
			
 
				+				config->max_nworkers = lrint(norm_exec_time)  + nready_tasks - 1; //workers->nworkers + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
			
 
				 			else
			
 
				 				config->max_nworkers = lrint(norm_exec_time);
			
 
				 		}
			
 
				+//		config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
			
 
				 		
			
 
				 		if(config->max_nworkers < 0)
			
 
				 			config->max_nworkers = 0;
			
@@ -897,7 +890,7 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 
				 			config->max_nworkers = max_cpus;
			
 
				 		
			
 
				 		printf("%d: ready tasks  %d idle for long %lf norm_idle_time %lf elapsed_time %lf norm_exec_time %lf nworker %d max %d \n", 
			
 
				-		       sched_ctx, hypervisor.sched_ctx_w[sched_ctx].nready_tasks, max_workers_idle_time[i], norm_idle_time, elapsed_time, norm_exec_time, workers->nworkers, config->max_nworkers);
			
 
				+		       sched_ctx, nready_tasks, max_workers_idle_time[i], norm_idle_time, elapsed_time, norm_exec_time, workers->nworkers, config->max_nworkers);
			
 
				 
			
 
				 
			
 
				 		total_max_nworkers += config->max_nworkers;
			
@@ -913,9 +906,10 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 
				 		unsigned max_nready_sched_ctx = sched_ctxs[0];
			
 
				 		for(i = 0; i < nsched_ctxs; i++)
			
 
				 		{
			
 
				-			if(max_nready < hypervisor.sched_ctx_w[sched_ctxs[i]].nready_tasks)
			
 
				+			int nready_tasks = starpu_get_nready_tasks_of_sched_ctx(sched_ctxs[i]);
			
 
				+			if(max_nready < nready_tasks)
			
 
				 			{
			
 
				-				max_nready = hypervisor.sched_ctx_w[sched_ctxs[i]].nready_tasks;
			
 
				+				max_nready = nready_tasks;
			
 
				 				max_nready_sched_ctx = sched_ctxs[i];
			
 
				 			}
			
 
				 		}
			
@@ -964,7 +958,13 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 
				 		
			
 
				 		if(hypervisor.policy.handle_idle_cycle)
			
 
				 		{
			
 
				-			hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
			
 
				+			double curr_time = starpu_timing_now();
			
 
				+			double elapsed_time = (curr_time - sc_w->hyp_react_start_time) / 1000000.0; /* in seconds */
			
 
				+			if(sc_w->sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > sc_w->config->time_sample)
			
 
				+			{
			
 
				+				sc_w->hyp_react_start_time = starpu_timing_now();
			
 
				+				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 	return;
			
@@ -986,7 +986,7 @@ static void notify_poped_task(unsigned sched_ctx, int worker)
 
				 	if(sc_w->idle_start_time[worker] > 0.0)
			
 
				 	{
			
 
				 		double end_time  = starpu_timing_now();
			
 
				-		sc_w->idle_time[worker] += (end_time - sc_w->idle_start_time[worker]) / 1000000.0; /* in seconds */ 
			
 
				+		sc_w->idle_time[worker] += (end_time - sc_w->idle_start_time[worker]) / 1000000.0; /* in seconds */
			
 
				 		sc_w->idle_start_time[worker] = 0.0;
			
 
				 	}
			
 
				 			
			
@@ -997,7 +997,7 @@ static void notify_poped_task(unsigned sched_ctx, int worker)
 
				 
			
 
				  
			
 
				 /* notifies the hypervisor that a tagged task has just been executed */
			
 
				-static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint, int task_tag, int ready_tasks, double ready_flops)
			
 
				+static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint, int task_tag, double flops)
			
 
				 {
			
 
				 	unsigned sched_ctx = task->sched_ctx;
			
 
				 	int worker = starpu_worker_get_id();
			
@@ -1011,25 +1011,30 @@ static void notify_post_exec_task(struct starpu_task *task, size_t data_size, ui
 
				 	}
			
 
				 
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++;
			
 
				-	hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += task->flops;
			
 
				+	hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += flops;
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].elapsed_data[worker] += data_size ;
			
 
				 	hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[worker]++ ;
			
 
				-	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += task->flops;
			
 
				+	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += flops;
			
 
				 
			
 
				 	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				-	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= task->flops;
			
 
				-	hypervisor.sched_ctx_w[sched_ctx].nready_tasks = ready_tasks;
			
 
				-	hypervisor.sched_ctx_w[sched_ctx].ready_flops = ready_flops;
			
 
				-	if(hypervisor.sched_ctx_w[sched_ctx].ready_flops < 0.0)
			
 
				-		hypervisor.sched_ctx_w[sched_ctx].ready_flops = 0.0;
			
 
				-	_ack_resize_completed(sched_ctx, worker);
			
 
				+	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= flops;
			
 
				+	if(_sc_hypervisor_use_lazy_resize())
			
 
				+		_ack_resize_completed(sched_ctx, worker);
			
 
				 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 
			
 
				 	
			
 
				 	if(hypervisor.resize[sched_ctx])
			
 
				 	{	
			
 
				 		if(hypervisor.policy.handle_poped_task)
			
 
				-			hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
			
 
				+		{
			
 
				+			double curr_time = starpu_timing_now();
			
 
				+			double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time) / 1000000.0; /* in seconds */
			
 
				+			if(hypervisor.sched_ctx_w[sched_ctx].sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > hypervisor.sched_ctx_w[sched_ctx].config->time_sample)
			
 
				+			{
			
 
				+				hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
			
 
				+				hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
			
 
				+			}
			
 
				+		}
			
 
				 	}
			
 
				 /* 	starpu_pthread_mutex_lock(&act_hypervisor_mutex); */
			
 
				 /* 	_ack_resize_completed(sched_ctx, worker); */
			
@@ -1042,9 +1047,7 @@ static void notify_post_exec_task(struct starpu_task *task, size_t data_size, ui
 
				 	
			
 
				 	unsigned conf_sched_ctx;
			
 
				 	unsigned i;
			
 
				-	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				 	unsigned ns = hypervisor.nsched_ctxs;
			
 
				-	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 
			
 
				 	for(i = 0; i < ns; i++)
			
 
				 	{
			
@@ -1098,14 +1101,6 @@ static void notify_submitted_job(struct starpu_task *task, uint32_t footprint, s
 
				 		hypervisor.policy.handle_submitted_job(task->cl, task->sched_ctx, footprint, data_size);
			
 
				 }
			
 
				 
			
 
				-static void notify_ready_task(unsigned sched_ctx_id, struct starpu_task *task)
			
 
				-{
			
 
				-	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				-	hypervisor.sched_ctx_w[sched_ctx_id].nready_tasks++;
			
 
				-	hypervisor.sched_ctx_w[sched_ctx_id].ready_flops += task->flops;
			
 
				-	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				-}
			
 
				-
			
 
				 static void notify_empty_ctx(unsigned sched_ctx_id, struct starpu_task *task)
			
 
				 {
			
 
				 	sc_hypervisor_resize_ctxs(NULL, -1 , NULL, -1);
			
@@ -1126,10 +1121,10 @@ static void notify_delete_context(unsigned sched_ctx)
 
				 
			
 
				 void sc_hypervisor_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
			
 
				 {
			
 
				-	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				+//	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				 	unsigned curr_nsched_ctxs = sched_ctxs == NULL ? hypervisor.nsched_ctxs : (unsigned)nsched_ctxs;
			
 
				 	unsigned *curr_sched_ctxs = sched_ctxs == NULL ? hypervisor.sched_ctxs : sched_ctxs;
			
 
				-	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				+//	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 	unsigned s;
			
 
				 	for(s = 0; s < curr_nsched_ctxs; s++)
			
 
				 		hypervisor.resize[curr_sched_ctxs[s]] = 1;
			
--- a/src/common/barrier_counter.c
+++ b/src/common/barrier_counter.c
@@ -62,9 +62,9 @@ int _starpu_barrier_counter_decrement_until_empty_counter(struct _starpu_barrier
 
				 	int ret = 0;
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex);
			
 
				 
			
 
				+	barrier->reached_flops -= flops;
			
 
				 	if (--barrier->reached_start == 0)
			
 
				 	{
			
 
				-		barrier->reached_flops -= flops;
			
 
				 		ret = 1;
			
 
				 		STARPU_PTHREAD_COND_BROADCAST(&barrier->cond);
			
 
				 	}
			
@@ -79,9 +79,9 @@ int _starpu_barrier_counter_increment_until_full_counter(struct _starpu_barrier_
 
				 	int ret = 0;
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex);
			
 
				 
			
 
				+	barrier->reached_flops += flops;
			
 
				 	if(++barrier->reached_start == barrier->count)
			
 
				 	{
			
 
				-		barrier->reached_flops += flops;
			
 
				 		ret = 1;
			
 
				 		STARPU_PTHREAD_COND_BROADCAST(&barrier_c->cond2);
			
 
				 	}
			
@@ -113,3 +113,23 @@ int _starpu_barrier_counter_check(struct _starpu_barrier_counter *barrier_c)
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
			
 
				 	return 0;
			
 
				 }
			
 
				+
			
 
				+int _starpu_barrier_counter_get_reached_start(struct _starpu_barrier_counter *barrier_c)
			
 
				+{
			
 
				+	struct _starpu_barrier *barrier = &barrier_c->barrier;
			
 
				+	int ret;
			
 
				+//	STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex);
			
 
				+	ret = barrier->reached_start;
			
 
				+//	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+double _starpu_barrier_counter_get_reached_flops(struct _starpu_barrier_counter *barrier_c)
			
 
				+{
			
 
				+	struct _starpu_barrier *barrier = &barrier_c->barrier;
			
 
				+	double ret;
			
 
				+//	STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex);
			
 
				+	ret = barrier->reached_flops;
			
 
				+//	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
			
 
				+	return ret;
			
 
				+}
			
--- a/src/common/barrier_counter.h
+++ b/src/common/barrier_counter.h
@@ -42,4 +42,7 @@ int _starpu_barrier_counter_increment(struct _starpu_barrier_counter *barrier_c,
 
				 
			
 
				 int _starpu_barrier_counter_check(struct _starpu_barrier_counter *barrier_c);
			
 
				 
			
 
				+int _starpu_barrier_counter_get_reached_start(struct _starpu_barrier_counter *barrier_c);
			
 
				+
			
 
				+double _starpu_barrier_counter_get_reached_flops(struct _starpu_barrier_counter *barrier_c);
			
 
				 #endif
			
--- a/src/common/fxt.h
+++ b/src/common/fxt.h
@@ -146,6 +146,9 @@
 
				 #define	_STARPU_FUT_START_WRITEBACK	0x5158
			
 
				 #define	_STARPU_FUT_END_WRITEBACK	0x5159
			
 
				 
			
 
				+#define	_STARPU_FUT_HYPERVISOR_BEGIN    0x5160
			
 
				+#define	_STARPU_FUT_HYPERVISOR_END	0x5161
			
 
				+
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 #include <fxt/fxt.h>
			
 
				 #include <fxt/fut.h>
			
@@ -453,6 +456,13 @@ do {										\
 
				 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL			\
			
 
				 	FUT_DO_PROBE0(_STARPU_FUT_TASK_WAIT_FOR_ALL)
			
 
				 
			
 
				+#define _STARPU_TRACE_HYPERVISOR_BEGIN()  \
			
 
				+	FUT_DO_PROBE1(_STARPU_FUT_HYPERVISOR_BEGIN, _starpu_gettid());
			
 
				+
			
 
				+#define _STARPU_TRACE_HYPERVISOR_END() \
			
 
				+	do {} while (0)
			
 
				+//	FUT_DO_PROBE1(_STARPU_FUT_HYPERVISOR_END, _starpu_gettid());
			
 
				+
			
 
				 #ifdef STARPU_FXT_LOCK_TRACES 
			
 
				 
			
 
				 #define _STARPU_TRACE_LOCKING_MUTEX()	do { \
			
@@ -678,6 +688,8 @@ do {										\
 
				 #define _STARPU_TRACE_MEMORY_FULL(size)				do {} while(0)
			
 
				 #define _STARPU_TRACE_START_UNPARTITION(handle, memnode)	do {} while(0)
			
 
				 #define _STARPU_TRACE_END_UNPARTITION(handle, memnode)		do {} while(0)
			
 
				+#define _STARPU_TRACE_HYPERVISOR_BEGIN()        do {} while(0)
			
 
				+#define _STARPU_TRACE_HYPERVISOR_END()                  do {} while(0)
			
 
				 
			
 
				 #endif // STARPU_USE_FXT
			
 
				 
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -298,7 +298,6 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 
			
 
				 	_starpu_decrement_nsubmitted_tasks_of_sched_ctx(sched_ctx);
			
 
				 	_starpu_decrement_nready_tasks_of_sched_ctx(sched_ctx, flops);
			
 
				-
			
 
				 	struct _starpu_worker *worker;
			
 
				 	worker = _starpu_get_local_worker_key();
			
 
				 	if (worker)
			
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -27,6 +27,10 @@ static starpu_pthread_mutex_t finished_submit_mutex = STARPU_PTHREAD_MUTEX_INITI
 
				 struct starpu_task stop_submission_task = STARPU_TASK_INITIALIZER;
			
 
				 starpu_pthread_key_t sched_ctx_key;
			
 
				 unsigned with_hypervisor = 0;
			
 
				+double hyp_start_sample[STARPU_NMAX_SCHED_CTXS];
			
 
				+double hyp_start_allow_sample[STARPU_NMAX_SCHED_CTXS];
			
 
				+double flops[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS];
			
 
				+size_t data_size[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS];
			
 
				 
			
 
				 static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *config);
			
 
				 
			
@@ -327,12 +331,6 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	int w;
			
 
				-	for(w = 0; w < STARPU_NMAXWORKERS; w++)
			
 
				-	{
			
 
				-		sched_ctx->pop_counter[w] = 0;
			
 
				-	}
			
 
				-
			
 
				 	return sched_ctx;
			
 
				 }
			
 
				 
			
@@ -580,7 +578,11 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 
				 #ifdef STARPU_USE_SC_HYPERVISOR
			
 
				 	if(sched_ctx != NULL && sched_ctx_id != 0 && sched_ctx_id != STARPU_NMAX_SCHED_CTXS
			
 
				 	   && sched_ctx->perf_counters != NULL)
			
 
				+	{
			
 
				+		_STARPU_TRACE_HYPERVISOR_BEGIN();
			
 
				 		sched_ctx->perf_counters->notify_delete_context(sched_ctx_id);
			
 
				+		_STARPU_TRACE_HYPERVISOR_END();
			
 
				+	}
			
 
				 #endif //STARPU_USE_SC_HYPERVISOR
			
 
				 
			
 
				 	unsigned inheritor_sched_ctx_id = sched_ctx->inheritor;
			
@@ -914,16 +916,16 @@ void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double r
 
				 	_starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->ready_tasks_barrier, ready_flops);
			
 
				 }
			
 
				 
			
 
				-int _starpu_get_nready_tasks_of_sched_ctx(unsigned sched_ctx_id)
			
 
				+int starpu_get_nready_tasks_of_sched_ctx(unsigned sched_ctx_id)
			
 
				 {
			
 
				 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
			
 
				-	return sched_ctx->ready_tasks_barrier.barrier.reached_start;
			
 
				+	return _starpu_barrier_counter_get_reached_start(&sched_ctx->ready_tasks_barrier);
			
 
				 }
			
 
				 
			
 
				-double _starpu_get_nready_flops_of_sched_ctx(unsigned sched_ctx_id)
			
 
				+double starpu_get_nready_flops_of_sched_ctx(unsigned sched_ctx_id)
			
 
				 {
			
 
				 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
			
 
				-	return sched_ctx->ready_tasks_barrier.barrier.reached_flops;
			
 
				+	return _starpu_barrier_counter_get_reached_flops(&sched_ctx->ready_tasks_barrier);
			
 
				 }
			
 
				 
			
 
				 int _starpu_wait_for_no_ready_of_sched_ctx(unsigned sched_ctx_id)
			
@@ -958,6 +960,17 @@ unsigned _starpu_sched_ctx_get_current_context()
 
				 void starpu_sched_ctx_notify_hypervisor_exists()
			
 
				 {
			
 
				 	with_hypervisor = 1;
			
 
				+	int i, j;
			
 
				+	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
 
				+	{
			
 
				+		hyp_start_sample[i] = starpu_timing_now();
			
 
				+		hyp_start_allow_sample[i] = 0.0;
			
 
				+		for(j = 0; j < STARPU_NMAXWORKERS; j++)
			
 
				+		{
			
 
				+			flops[i][j] = 0.0;
			
 
				+			data_size[i][j] = 0;
			
 
				+		}
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 unsigned starpu_sched_ctx_check_if_hypervisor_exists()
			
@@ -965,6 +978,32 @@ unsigned starpu_sched_ctx_check_if_hypervisor_exists()
 
				 	return with_hypervisor;
			
 
				 }
			
 
				 
			
 
				+unsigned _starpu_sched_ctx_allow_hypervisor(unsigned sched_ctx_id)
			
 
				+{
			
 
				+	return 1;
			
 
				+	double now = starpu_timing_now();
			
 
				+	if(hyp_start_allow_sample[sched_ctx_id] > 0.0)
			
 
				+	{
			
 
				+		double allow_sample = (now - hyp_start_allow_sample[sched_ctx_id]) / 1000000.0;
			
 
				+		if(allow_sample < 0.001)
			
 
				+			return 1;
			
 
				+		else
			
 
				+		{
			
 
				+			hyp_start_allow_sample[sched_ctx_id] = 0.0;
			
 
				+			hyp_start_sample[sched_ctx_id] = starpu_timing_now();
			
 
				+			return 0;
			
 
				+		}
			
 
				+	}
			
 
				+	double forbid_sample = (now - hyp_start_sample[sched_ctx_id]) / 1000000.0;
			
 
				+	if(forbid_sample > 0.01)
			
 
				+	{
			
 
				+//		hyp_start_sample[sched_ctx_id] = starpu_timing_now();
			
 
				+		hyp_start_allow_sample[sched_ctx_id] = starpu_timing_now();
			
 
				+		return 1;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 unsigned _starpu_get_nsched_ctxs()
			
 
				 {
			
 
				 	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
			
@@ -1198,14 +1237,25 @@ void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id)
 
				 
			
 
				 #ifdef STARPU_USE_SC_HYPERVISOR
			
 
				 
			
 
				-void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint)
			
 
				+void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task, size_t data_size2, uint32_t footprint)
			
 
				 {
			
 
				 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
			
 
				 	if(sched_ctx != NULL && task->sched_ctx != _starpu_get_initial_sched_ctx()->id && 
			
 
				 	   task->sched_ctx != STARPU_NMAX_SCHED_CTXS  && sched_ctx->perf_counters != NULL)
			
 
				-		sched_ctx->perf_counters->notify_post_exec_task(task, data_size, footprint, task->hypervisor_tag, 
			
 
				-								_starpu_get_nready_tasks_of_sched_ctx(sched_ctx->id), 
			
 
				-								_starpu_get_nready_flops_of_sched_ctx(sched_ctx->id));
			
 
				+	{
			
 
				+		flops[task->sched_ctx][workerid] += task->flops;
			
 
				+		data_size[task->sched_ctx][workerid] += data_size2;
			
 
				+
			
 
				+		if(_starpu_sched_ctx_allow_hypervisor(sched_ctx->id) || task->hypervisor_tag > 0)
			
 
				+		{
			
 
				+			_STARPU_TRACE_HYPERVISOR_BEGIN();
			
 
				+			sched_ctx->perf_counters->notify_post_exec_task(task, data_size[task->sched_ctx][workerid], footprint,
			
 
				+									task->hypervisor_tag, flops[task->sched_ctx][workerid]);
			
 
				+			_STARPU_TRACE_HYPERVISOR_END();
			
 
				+			flops[task->sched_ctx][workerid] = 0.0;
			
 
				+			data_size[task->sched_ctx][workerid] = 0;
			
 
				+		}
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id)
			
@@ -1213,8 +1263,12 @@ void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id)
 
				 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
			
 
				 
			
 
				 	if(sched_ctx != NULL && sched_ctx_id != _starpu_get_initial_sched_ctx()->id && sched_ctx_id != STARPU_NMAX_SCHED_CTXS
			
 
				-	   && sched_ctx->perf_counters != NULL)
			
 
				+	   && sched_ctx->perf_counters != NULL && _starpu_sched_ctx_allow_hypervisor(sched_ctx_id))
			
 
				+	{
			
 
				+		_STARPU_TRACE_HYPERVISOR_BEGIN();
			
 
				 		sched_ctx->perf_counters->notify_pushed_task(sched_ctx_id, workerid);
			
 
				+		_STARPU_TRACE_HYPERVISOR_END();
			
 
				+	}
			
 
				 }
			
 
				 #endif //STARPU_USE_SC_HYPERVISOR
			
 
				 
			
--- a/src/core/sched_ctx.h
+++ b/src/core/sched_ctx.h
@@ -89,10 +89,6 @@ struct _starpu_sched_ctx
 
				 	/* max GPUs to execute*/
			
 
				 	int max_ngpus;
			
 
				 
			
 
				-	/* needed for overlapping contexts to help the workers
			
 
				-	   determine which is the next context to pop tasks from */
			
 
				-	unsigned pop_counter[STARPU_NMAXWORKERS];
			
 
				-
			
 
				 	/* in case we delete the context leave resources to the inheritor*/
			
 
				 	unsigned inheritor;
			
 
				 
			
@@ -152,8 +148,6 @@ int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id);
 
				 
			
 
				 void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops);
			
 
				 void _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops);
			
 
				-int _starpu_get_nready_tasks_of_sched_ctx(unsigned sched_ctx_id);
			
 
				-double _starpu_get_nready_flops_of_sched_ctx(unsigned sched_ctx_id);
			
 
				 int _starpu_wait_for_no_ready_of_sched_ctx(unsigned sched_ctx_id);
			
 
				 
			
 
				 /* Return the corresponding index of the workerid in the ctx table */
			
@@ -198,6 +192,8 @@ int _starpu_nworkers_able_to_execute_task(struct starpu_task *task, struct _star
 
				 
			
 
				 void _starpu_fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx);
			
 
				 
			
 
				+unsigned _starpu_sched_ctx_allow_hypervisor(unsigned sched_ctx_id);
			
 
				+
			
 
				 #ifdef STARPU_USE_SC_HYPERVISOR
			
 
				 /* Notifies the hypervisor that a tasks was poped from the workers' list */
			
 
				 void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint);
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -333,7 +333,11 @@ int _starpu_push_task(struct _starpu_job *j)
 
				 #ifdef STARPU_USE_SC_HYPERVISOR
			
 
				 			if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->perf_counters != NULL 
			
 
				 			   && sched_ctx->perf_counters->notify_empty_ctx)
			
 
				+			{
			
 
				+				_STARPU_TRACE_HYPERVISOR_BEGIN();
			
 
				 				sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task);
			
 
				+				_STARPU_TRACE_HYPERVISOR_END();
			
 
				+			}
			
 
				 #endif
			
 
				 			return 0;
			
 
				 		}
			
@@ -377,7 +381,11 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 
				 #ifdef STARPU_USE_SC_HYPERVISOR
			
 
				 			if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->perf_counters != NULL 
			
 
				 			   && sched_ctx->perf_counters->notify_empty_ctx)
			
 
				+			{
			
 
				+				_STARPU_TRACE_HYPERVISOR_BEGIN();
			
 
				 				sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task);
			
 
				+				_STARPU_TRACE_HYPERVISOR_END();
			
 
				+			}
			
 
				 #endif
			
 
				 
			
 
				 			return -EAGAIN;
			
@@ -563,29 +571,34 @@ struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker
 
				 	struct _starpu_sched_ctx *sched_ctx, *good_sched_ctx = NULL;
			
 
				 	unsigned smallest_counter =  worker->nsched_ctxs;
			
 
				 	struct _starpu_sched_ctx_list *l = NULL;
			
 
				-	for (l = worker->sched_ctx_list; l; l = l->next)
			
 
				+	if(!worker->reverse_phase)
			
 
				 	{
			
 
				-		sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
			
 
				-/* 		if(worker->removed_from_ctx[sched_ctx->id] == 1 && worker->shares_tasks_lists[sched_ctx->id] == 1) */
			
 
				-/* 			return sched_ctx; */
			
 
				-		if(sched_ctx->pop_counter[worker->workerid] < worker->nsched_ctxs &&
			
 
				-		   smallest_counter > sched_ctx->pop_counter[worker->workerid])
			
 
				+		/* find a context in which the worker hasn't poped yet */
			
 
				+		for (l = worker->sched_ctx_list; l; l = l->next)
			
 
				 		{
			
 
				-			good_sched_ctx = sched_ctx;
			
 
				-			smallest_counter = sched_ctx->pop_counter[worker->workerid];
			
 
				+			if(!worker->poped_in_ctx[l->sched_ctx])
			
 
				+			{
			
 
				+				worker->poped_in_ctx[l->sched_ctx] = !worker->poped_in_ctx[l->sched_ctx];
			
 
				+				return	_starpu_get_sched_ctx_struct(l->sched_ctx);
			
 
				+			}
			
 
				 		}
			
 
				+		worker->reverse_phase = !worker->reverse_phase;
			
 
				 	}
			
 
				-	
			
 
				-	if(good_sched_ctx == NULL)
			
 
				+	if(worker->reverse_phase)
			
 
				 	{
			
 
				+		/* if the context has already poped in every one start from the begining */
			
 
				 		for (l = worker->sched_ctx_list; l; l = l->next)
			
 
				 		{
			
 
				-			sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
			
 
				-			sched_ctx->pop_counter[worker->workerid] = 0;
			
 
				+			if(worker->poped_in_ctx[l->sched_ctx])
			
 
				+			{
			
 
				+				worker->poped_in_ctx[l->sched_ctx] = !worker->poped_in_ctx[l->sched_ctx];
			
 
				+				return	_starpu_get_sched_ctx_struct(l->sched_ctx);
			
 
				+			}
			
 
				 		}
			
 
				-		return _starpu_get_sched_ctx_struct(worker->sched_ctx_list->sched_ctx);
			
 
				-	}
			
 
				-	return good_sched_ctx;
			
 
				+		worker->reverse_phase = !worker->reverse_phase;
			
 
				+	}	
			
 
				+	worker->poped_in_ctx[worker->sched_ctx_list->sched_ctx] = !worker->poped_in_ctx[worker->sched_ctx_list->sched_ctx];
			
 
				+	return _starpu_get_sched_ctx_struct(worker->sched_ctx_list->sched_ctx);
			
 
				 }
			
 
				 
			
 
				 struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
			
@@ -660,17 +673,20 @@ pick:
 
				 				}
			
 
				 #ifdef STARPU_USE_SC_HYPERVISOR
			
 
				 				struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters;
			
 
				-				if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_idle_cycle)
			
 
				+				if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_idle_cycle && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id))
			
 
				+				{
			
 
				+//					_STARPU_TRACE_HYPERVISOR_BEGIN();
			
 
				 					perf_counters->notify_idle_cycle(sched_ctx->id, worker->workerid, 1.0);
			
 
				+//					_STARPU_TRACE_HYPERVISOR_END();
			
 
				+				}
			
 
				 #endif //STARPU_USE_SC_HYPERVISOR
			
 
				 				
			
 
				 #ifndef STARPU_NON_BLOCKING_DRIVERS
			
 
				-				if((sched_ctx->pop_counter[worker->workerid] == 0 && been_here[sched_ctx->id]) || worker->nsched_ctxs == 1)
			
 
				+				if(been_here[sched_ctx->id] || worker->nsched_ctxs == 1)
			
 
				 					break;
			
 
				 				been_here[sched_ctx->id] = 1;
			
 
				 #endif
			
 
				 			}
			
 
				-			sched_ctx->pop_counter[worker->workerid]++;
			
 
				 		}
			
 
				 	  }
			
 
				 
			
@@ -684,8 +700,12 @@ pick:
 
				 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
			
 
				 	struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters;
			
 
				 
			
 
				-	if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_poped_task)
			
 
				+	if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_poped_task && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id))
			
 
				+	{
			
 
				+//		_STARPU_TRACE_HYPERVISOR_BEGIN();
			
 
				 		perf_counters->notify_poped_task(task->sched_ctx, worker->workerid);
			
 
				+//		_STARPU_TRACE_HYPERVISOR_END();
			
 
				+	}
			
 
				 #endif //STARPU_USE_SC_HYPERVISOR
			
 
				 
			
 
				 
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -26,6 +26,7 @@
 
				 #include <core/task_bundle.h>
			
 
				 #include <common/config.h>
			
 
				 #include <common/utils.h>
			
 
				+#include <common/fxt.h>
			
 
				 #include <profiling/profiling.h>
			
 
				 #include <profiling/bound.h>
			
 
				 #include <math.h>
			
@@ -259,7 +260,9 @@ int _starpu_submit_job(struct _starpu_job *j)
 
				 				data_size += _starpu_data_get_size(handle);
			
 
				 		}
			
 
				 
			
 
				+		_STARPU_TRACE_HYPERVISOR_BEGIN();
			
 
				 		sched_ctx->perf_counters->notify_submitted_job(j->task, j->footprint, data_size);
			
 
				+		_STARPU_TRACE_HYPERVISOR_END();
			
 
				 	}
			
 
				 #endif//STARPU_USE_SC_HYPERVISOR
			
 
				 
			
@@ -659,7 +662,6 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
			
 
				 	j->submitted = 1;
			
 
				 	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops);
			
 
				-
			
 
				 	for (i=0 ; i<task->cl->nbuffers ; i++)
			
 
				 	{
			
 
				 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
			
@@ -852,7 +854,7 @@ int starpu_task_nready(void)
 
				 	int nready = 0;
			
 
				 	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
			
 
				 	if(config->topology.nsched_ctxs == 1)
			
 
				-		nready = _starpu_get_nready_tasks_of_sched_ctx(0);
			
 
				+		nready = starpu_get_nready_tasks_of_sched_ctx(0);
			
 
				 	else
			
 
				 	{
			
 
				 		int s;
			
@@ -860,7 +862,7 @@ int starpu_task_nready(void)
 
				 		{
			
 
				 			if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
			
 
				 			{
			
 
				-				nready += _starpu_get_nready_tasks_of_sched_ctx(config->sched_ctxs[s].id);
			
 
				+				nready += starpu_get_nready_tasks_of_sched_ctx(config->sched_ctxs[s].id);
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -440,7 +440,11 @@ static void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu
 
				 	workerarg->parallel_sect = 0;
			
 
				 
			
 
				 	for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++)
			
 
				+	{
			
 
				 		workerarg->shares_tasks_lists[ctx] = 0;
			
 
				+		workerarg->poped_in_ctx[ctx] = 0;
			
 
				+	}
			
 
				+	workerarg->reverse_phase = 0;
			
 
				 
			
 
				 	/* cpu_set/hwloc_cpu_set initialized in topology.c */
			
 
				 }
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -106,6 +106,14 @@ LIST_TYPE(_starpu_worker,
 
				 	/* in this case when removing him from a context it disapears instantly */
			
 
				 	unsigned shares_tasks_lists[STARPU_NMAX_SCHED_CTXS];
			
 
				 
			
 
				+        /* boolean to chose the next ctx a worker will pop into */
			
 
				+	unsigned poped_in_ctx[STARPU_NMAX_SCHED_CTXS];	  
			
 
				+
			
 
				+       /* boolean indicating at which moment we checked all ctxs and change phase for the booleab poped_in_ctx*/
			
 
				+	unsigned reverse_phase;
			
 
				+
			
 
				+
			
 
				+
			
 
				 #ifdef __GLIBC__
			
 
				 	cpu_set_t cpu_set;
			
 
				 #endif /* __GLIBC__ */
			
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -685,6 +685,28 @@ static void handle_end_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options
 
				 		worker_set_state(get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], "B");
			
 
				 }
			
 
				 
			
 
				+static void handle_hyp_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
			
 
				+{
			
 
				+	int worker;
			
 
				+	worker = find_worker_id(ev->param[0]);
			
 
				+	if (worker < 0)
			
 
				+		return;
			
 
				+
			
 
				+	if (out_paje_file)
			
 
				+		worker_set_state(get_event_time_stamp(ev, options), options->file_prefix, ev->param[0], "H");
			
 
				+}
			
 
				+
			
 
				+static void handle_hyp_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
			
 
				+{
			
 
				+/* 	int worker; */
			
 
				+/* 	worker = find_worker_id(ev->param[0]); */
			
 
				+/* 	if (worker < 0) */
			
 
				+/* 		return; */
			
 
				+
			
 
				+/* 	if (out_paje_file) */
			
 
				+/* 		worker_set_state(get_event_time_stamp(ev, options), options->file_prefix, ev->param[0], "B"); */
			
 
				+}
			
 
				+
			
 
				 static void handle_worker_status(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *newstatus)
			
 
				 {
			
 
				 	int worker;
			
@@ -1673,6 +1695,14 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 
				 			case _STARPU_FUT_MEMORY_FULL:
			
 
				 				break;
			
 
				 
			
 
				+			case _STARPU_FUT_HYPERVISOR_BEGIN:
			
 
				+				handle_hyp_begin(&ev, options);
			
 
				+				break;
			
 
				+
			
 
				+			case _STARPU_FUT_HYPERVISOR_END:
			
 
				+				handle_hyp_end(&ev, options);
			
 
				+				break;
			
 
				+
			
 
				 			default:
			
 
				 #ifdef STARPU_VERBOSE
			
 
				 				fprintf(stderr, "unknown event.. %x at time %llx WITH OFFSET %llx\n",
			
--- a/src/debug/traces/starpu_paje.c
+++ b/src/debug/traces/starpu_paje.c
@@ -194,6 +194,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
				 		poti_DefineEntityValue("Sl", ctx, "Sleeping", ".9 .1 .0");
			
 
				 		poti_DefineEntityValue("P", ctx, "Progressing", ".4 .1 .6");
			
 
				 		poti_DefineEntityValue("U", ctx, "Unpartitioning", ".0 .0 1.0");
			
 
				+		poti_DefineEntityValue("H", ctx, "Hypervisor", ".5 .18 .0");
			
 
				 	}
			
 
				 
			
 
				 	/* Types for the Scheduler */
			
@@ -233,7 +234,8 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
				 6       B       S       Overhead         \".5 .18 .0\"		\n\
			
 
				 6       Sl       S      Sleeping         \".9 .1 .0\"		\n\
			
 
				 6       P       S       Progressing         \".4 .1 .6\"		\n\
			
 
				-6       U       S       Unpartitioning      \".0 .0 1.0\"		\n");
			
 
				+6       U       S       Unpartitioning      \".0 .0 1.0\"		\n\
			
 
				+6       H       S       Hypervisor      \".5 .18 .0\"		\n");
			
 
				 	fprintf(file, "\
			
 
				 6       P       CtS       Processing         \"0 0 0\"		\n\
			
 
				 6       Sl       CtS      Sleeping         \".9 .1 .0\"		\n\