Parcourir la source

hypervisor optimisation (avoid unnecessary notifications/locks/trylocks) + add the hyp the traces

Andra Hugo il y a 11 ans
Parent
commit
12dec4c5cc

+ 4 - 0
include/starpu_sched_ctx.h

@@ -102,6 +102,10 @@ void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id);
 
 void *starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void *param, unsigned sched_ctx_id);
 
+int starpu_get_nready_tasks_of_sched_ctx(unsigned sched_ctx_id);
+
+double starpu_get_nready_flops_of_sched_ctx(unsigned sched_ctx_id);
+
 #ifdef STARPU_USE_SC_HYPERVISOR
 void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id);
 #endif //STARPU_USE_SC_HYPERVISOR

+ 1 - 3
include/starpu_sched_ctx_hypervisor.h

@@ -29,10 +29,8 @@ struct starpu_sched_ctx_performance_counters
 	void (*notify_idle_cycle)(unsigned sched_ctx_id, int worker, double idle_time);
 	void (*notify_poped_task)(unsigned sched_ctx_id, int worker);
 	void (*notify_pushed_task)(unsigned sched_ctx_id, int worker);
-	void (*notify_post_exec_task)(struct starpu_task *task, size_t data_size, uint32_t footprint, int hypervisor_tag,
-				      int nready_tasks, double nready_flops);
+	void (*notify_post_exec_task)(struct starpu_task *task, size_t data_size, uint32_t footprint, int hypervisor_tag, double flops);
 	void (*notify_submitted_job)(struct starpu_task *task, uint32_t footprint, size_t data_size);
-	void (*notify_ready_task)(unsigned sched_ctx_id, struct starpu_task *task);
 	void (*notify_empty_ctx)(unsigned sched_ctx_id, struct starpu_task *task);
 	void (*notify_delete_context)(unsigned sched_ctx);
 };

+ 4 - 6
sc_hypervisor/include/sc_hypervisor_monitoring.h

@@ -100,14 +100,15 @@ struct sc_hypervisor_wrapper
 	/* number of flops that still have to be executed in this ctx */
 	double remaining_flops;
 	
-	/* number of flops coresponding to the ready tasks in this ctx */
-	double ready_flops;
-
 	/* the start time of the resizing sample of this context*/
 	double start_time;
 
 	/* the first time a task was pushed to this context*/
 	double real_start_time;
+	
+	/* the start time for sample in which the hyp is not allowed to react
+	   bc too expensive */
+	double hyp_react_start_time;
 
 	/* the workers don't leave the current ctx until the receiver ctx 
 	   doesn't ack the receive of these workers */
@@ -120,9 +121,6 @@ struct sc_hypervisor_wrapper
 	   flops of all the execution or not */
 	unsigned total_flops_available;
 
-	/* the number of ready tasks submitted to a ctx */
-	int nready_tasks;
-
 	/* boolean indicating that a context is being sized */
 	unsigned to_be_sized;
 

+ 15 - 13
sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c

@@ -1,3 +1,4 @@
+
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011 - 2013  INRIA
@@ -24,8 +25,8 @@ int resize_no = 0;
 static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 {
 	/* for vite */
-/* 	printf("resize_no = %d\n", resize_no); */
-/* 	starpu_trace_user_event(resize_no++); */
+	printf("resize_no = %d\n", resize_no);
+	starpu_trace_user_event(resize_no++);
 	int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
 	unsigned *curr_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
 	unsigned curr_nworkers = nworkers == -1 ? starpu_worker_get_count() : (unsigned)nworkers;
@@ -63,19 +64,20 @@ static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, i
 static void feft_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx, __attribute__((unused))int worker, 
 				      __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint)
 {
-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
-	if(ret != EBUSY)
+	unsigned criteria = sc_hypervisor_get_resize_criteria();
+	if(criteria != SC_NOTHING && criteria == SC_SPEED)
 	{
-		unsigned criteria = sc_hypervisor_get_resize_criteria();
-		if(criteria != SC_NOTHING && criteria == SC_SPEED)
+
+		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+		if(ret != EBUSY)
 		{
 			if(sc_hypervisor_check_speed_gap_btw_ctxs())
 			{
 				_try_resizing(NULL, -1, NULL, -1);
 			}
-		}
 	
-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+		}
 	}
 
 }
@@ -151,16 +153,16 @@ static void feft_lp_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *worker
 
 static void feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
 {
-	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
-	if(ret != EBUSY)
+	unsigned criteria = sc_hypervisor_get_resize_criteria();
+	if(criteria != SC_NOTHING && criteria == SC_IDLE)
 	{
-		unsigned criteria = sc_hypervisor_get_resize_criteria();
-		if(criteria != SC_NOTHING && criteria == SC_IDLE)
+		int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
+		if(ret != EBUSY)
 		{
 			if(sc_hypervisor_check_idle(sched_ctx, worker))
 				_try_resizing(NULL, -1, NULL, -1);
+			starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 		}
-		starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 	}
 }
 

+ 11 - 5
sc_hypervisor/src/policies_utils/lp_tools.c

@@ -40,6 +40,9 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 		int w;
 		for(w = 0; w < nw; w++)
 			v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); 
+
+		double ready_flops = starpu_get_nready_flops_of_sched_ctx(sc_w->sched_ctx);
+		int nready_tasks = starpu_get_nready_tasks_of_sched_ctx(sc_w->sched_ctx);
 		
 		if(sc_w->to_be_sized)
 		{
@@ -49,17 +52,19 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 		else
 		{
 			if(sc_w->remaining_flops < 0.0)
-				flops[i] = sc_w->ready_flops/1000000000.0; /* in gflops*/
+				flops[i] = ready_flops/1000000000.0; /* in gflops*/
 			else
 			{
-				if((sc_w->ready_flops/1000000000.0) <= 0.000002)
+				if((ready_flops/1000000000.0) <= 0.000002)
 					flops[i] = 0.0;
 				else
 					flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
 			}
 		}
-/* 		printf("%d: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n", */
-/* 		       sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, sc_w->ready_flops/1000000000, sc_w->nready_tasks); */
+		if(flops[i] < 0.0)
+			flops[i] = 0.0;
+		printf("%d: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n",
+		       sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, ready_flops/1000000000, nready_tasks);
 
 	}
 
@@ -108,6 +113,7 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 	}
 
 	double vmax = 0.0;
+
 	if(ret != 0.0)
 	{
 		/* redo the lp after cleaning out the contexts that got all the max workers required */
@@ -591,7 +597,7 @@ void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns,
 				{
 					nworkers_to_add=1;
 					int old_start = start[w];
-					if(start[w] == nworkers)
+					if(start[w] != 0)
 						start[w]--;
 					int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
 					start[w] = old_start;

+ 36 - 41
sc_hypervisor/src/sc_hypervisor.c

@@ -25,10 +25,9 @@ struct starpu_sched_ctx_performance_counters* perf_counters = NULL;
 static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time);
 static void notify_pushed_task(unsigned sched_ctx, int worker);
 static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint, 
-				  int hypervisor_tag, int nready_tasks, double ready_flops);
+				  int hypervisor_tag, double flops);
 static void notify_poped_task(unsigned sched_ctx, int  worker);
 static void notify_submitted_job(struct starpu_task *task, unsigned footprint, size_t data_size);
-static void notify_ready_task(unsigned sched_ctx, struct starpu_task *task);
 static void notify_empty_ctx(unsigned sched_ctx, struct starpu_task *task);
 static void notify_delete_context(unsigned sched_ctx);
 
@@ -181,6 +180,7 @@ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
 		hypervisor.sched_ctx_w[i].remaining_flops = 0.0;
 		hypervisor.sched_ctx_w[i].start_time = 0.0;
 		hypervisor.sched_ctx_w[i].real_start_time = 0.0;
+		hypervisor.sched_ctx_w[i].hyp_react_start_time = 0.0;
 		hypervisor.sched_ctx_w[i].resize_ack.receiver_sched_ctx = -1;
 		hypervisor.sched_ctx_w[i].resize_ack.moved_workers = NULL;
 		hypervisor.sched_ctx_w[i].resize_ack.nmoved_workers = 0;
@@ -190,9 +190,7 @@ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
 
 		hypervisor.sched_ctx_w[i].ref_speed[0] = -1.0;
 		hypervisor.sched_ctx_w[i].ref_speed[1] = -1.0;
-		hypervisor.sched_ctx_w[i].ready_flops = 0.0;
 		hypervisor.sched_ctx_w[i].total_flops_available = 0;
-		hypervisor.sched_ctx_w[i].nready_tasks = 0;
 		hypervisor.sched_ctx_w[i].to_be_sized = 0;
 		int j;
 		for(j = 0; j < STARPU_NMAXWORKERS; j++)
@@ -223,7 +221,6 @@ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
 	perf_counters->notify_poped_task = notify_poped_task;
 	perf_counters->notify_post_exec_task = notify_post_exec_task;
 	perf_counters->notify_submitted_job = notify_submitted_job;
-	perf_counters->notify_ready_task = notify_ready_task;
 	perf_counters->notify_empty_ctx = notify_empty_ctx;
 	perf_counters->notify_delete_context = notify_delete_context;
 
@@ -316,6 +313,7 @@ void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops)
 	hypervisor.sched_ctx_w[sched_ctx].total_flops = total_flops;
 	hypervisor.sched_ctx_w[sched_ctx].remaining_flops = total_flops;
 	hypervisor.resize[sched_ctx] = 1;
+	hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 }
 
@@ -861,35 +859,30 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 			if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] == 0.0)
 			{
 				exec_time = hypervisor.sched_ctx_w[sched_ctx].exec_time[worker];
-//				printf("%d/%d: exec_time %lf\n", worker, sched_ctx, hypervisor.sched_ctx_w[sched_ctx].exec_time[worker]);
 			}
 			else
 			{
 				double current_exec_time = (end_time - hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker]) / 1000000.0; /* in seconds */ 
 				exec_time = hypervisor.sched_ctx_w[sched_ctx].exec_time[worker] + current_exec_time;
-//				printf("%d/%d: exec_time %lf current_exec_time %lf\n", worker, sched_ctx, hypervisor.sched_ctx_w[sched_ctx].exec_time[worker], current_exec_time);
 			}		
 			norm_exec_time += elapsed_time_worker[worker] == 0.0 ? 0.0 : exec_time / elapsed_time_worker[worker];
 		}			
 
 		double curr_time = starpu_timing_now();
 		double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */
-//		double norm_idle_time = max_workers_idle_time[i] / elapsed_time;
-//		double norm_exec_time = exec_time / elapsed_time;
+		int nready_tasks = starpu_get_nready_tasks_of_sched_ctx(sched_ctx);
 		if(norm_idle_time >= 0.9)
 		{
-//			config->max_nworkers = 	workers->nworkers - lrint(norm_idle_time);
 			config->max_nworkers = lrint(norm_exec_time);
-/* 			if(config->max_nworkers > hypervisor.sched_ctx_w[sched_ctx].nready_tasks) */
-/* 				config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; */
 		}
 		else
 		{
-			if(norm_idle_time < 0.1)//(max_workers_idle_time[i] < 0.000001)
-				config->max_nworkers = lrint(norm_exec_time)  + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; //workers->nworkers + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
+			if(norm_idle_time < 0.1)
+				config->max_nworkers = lrint(norm_exec_time)  + nready_tasks - 1; //workers->nworkers + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
 			else
 				config->max_nworkers = lrint(norm_exec_time);
 		}
+//		config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
 		
 		if(config->max_nworkers < 0)
 			config->max_nworkers = 0;
@@ -897,7 +890,7 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 			config->max_nworkers = max_cpus;
 		
 		printf("%d: ready tasks  %d idle for long %lf norm_idle_time %lf elapsed_time %lf norm_exec_time %lf nworker %d max %d \n", 
-		       sched_ctx, hypervisor.sched_ctx_w[sched_ctx].nready_tasks, max_workers_idle_time[i], norm_idle_time, elapsed_time, norm_exec_time, workers->nworkers, config->max_nworkers);
+		       sched_ctx, nready_tasks, max_workers_idle_time[i], norm_idle_time, elapsed_time, norm_exec_time, workers->nworkers, config->max_nworkers);
 
 
 		total_max_nworkers += config->max_nworkers;
@@ -913,9 +906,10 @@ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
 		unsigned max_nready_sched_ctx = sched_ctxs[0];
 		for(i = 0; i < nsched_ctxs; i++)
 		{
-			if(max_nready < hypervisor.sched_ctx_w[sched_ctxs[i]].nready_tasks)
+			int nready_tasks = starpu_get_nready_tasks_of_sched_ctx(sched_ctxs[i]);
+			if(max_nready < nready_tasks)
 			{
-				max_nready = hypervisor.sched_ctx_w[sched_ctxs[i]].nready_tasks;
+				max_nready = nready_tasks;
 				max_nready_sched_ctx = sched_ctxs[i];
 			}
 		}
@@ -964,7 +958,13 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 		
 		if(hypervisor.policy.handle_idle_cycle)
 		{
-			hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
+			double curr_time = starpu_timing_now();
+			double elapsed_time = (curr_time - sc_w->hyp_react_start_time) / 1000000.0; /* in seconds */
+			if(sc_w->sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > sc_w->config->time_sample)
+			{
+				sc_w->hyp_react_start_time = starpu_timing_now();
+				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
+			}
 		}
 	}
 	return;
@@ -986,7 +986,7 @@ static void notify_poped_task(unsigned sched_ctx, int worker)
 	if(sc_w->idle_start_time[worker] > 0.0)
 	{
 		double end_time  = starpu_timing_now();
-		sc_w->idle_time[worker] += (end_time - sc_w->idle_start_time[worker]) / 1000000.0; /* in seconds */ 
+		sc_w->idle_time[worker] += (end_time - sc_w->idle_start_time[worker]) / 1000000.0; /* in seconds */
 		sc_w->idle_start_time[worker] = 0.0;
 	}
 			
@@ -997,7 +997,7 @@ static void notify_poped_task(unsigned sched_ctx, int worker)
 
  
 /* notifies the hypervisor that a tagged task has just been executed */
-static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint, int task_tag, int ready_tasks, double ready_flops)
+static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint, int task_tag, double flops)
 {
 	unsigned sched_ctx = task->sched_ctx;
 	int worker = starpu_worker_get_id();
@@ -1011,25 +1011,30 @@ static void notify_post_exec_task(struct starpu_task *task, size_t data_size, ui
 	}
 
 	hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++;
-	hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += task->flops;
+	hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += flops;
 	hypervisor.sched_ctx_w[sched_ctx].elapsed_data[worker] += data_size ;
 	hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[worker]++ ;
-	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += task->flops;
+	hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += flops;
 
 	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
-	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= task->flops;
-	hypervisor.sched_ctx_w[sched_ctx].nready_tasks = ready_tasks;
-	hypervisor.sched_ctx_w[sched_ctx].ready_flops = ready_flops;
-	if(hypervisor.sched_ctx_w[sched_ctx].ready_flops < 0.0)
-		hypervisor.sched_ctx_w[sched_ctx].ready_flops = 0.0;
-	_ack_resize_completed(sched_ctx, worker);
+	hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= flops;
+	if(_sc_hypervisor_use_lazy_resize())
+		_ack_resize_completed(sched_ctx, worker);
 	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 
 	
 	if(hypervisor.resize[sched_ctx])
 	{	
 		if(hypervisor.policy.handle_poped_task)
-			hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
+		{
+			double curr_time = starpu_timing_now();
+			double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time) / 1000000.0; /* in seconds */
+			if(hypervisor.sched_ctx_w[sched_ctx].sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > hypervisor.sched_ctx_w[sched_ctx].config->time_sample)
+			{
+				hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
+				hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
+			}
+		}
 	}
 /* 	starpu_pthread_mutex_lock(&act_hypervisor_mutex); */
 /* 	_ack_resize_completed(sched_ctx, worker); */
@@ -1042,9 +1047,7 @@ static void notify_post_exec_task(struct starpu_task *task, size_t data_size, ui
 	
 	unsigned conf_sched_ctx;
 	unsigned i;
-	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
 	unsigned ns = hypervisor.nsched_ctxs;
-	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 
 	for(i = 0; i < ns; i++)
 	{
@@ -1098,14 +1101,6 @@ static void notify_submitted_job(struct starpu_task *task, uint32_t footprint, s
 		hypervisor.policy.handle_submitted_job(task->cl, task->sched_ctx, footprint, data_size);
 }
 
-static void notify_ready_task(unsigned sched_ctx_id, struct starpu_task *task)
-{
-	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
-	hypervisor.sched_ctx_w[sched_ctx_id].nready_tasks++;
-	hypervisor.sched_ctx_w[sched_ctx_id].ready_flops += task->flops;
-	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
-}
-
 static void notify_empty_ctx(unsigned sched_ctx_id, struct starpu_task *task)
 {
 	sc_hypervisor_resize_ctxs(NULL, -1 , NULL, -1);
@@ -1126,10 +1121,10 @@ static void notify_delete_context(unsigned sched_ctx)
 
 void sc_hypervisor_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 {
-	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
+//	starpu_pthread_mutex_lock(&act_hypervisor_mutex);
 	unsigned curr_nsched_ctxs = sched_ctxs == NULL ? hypervisor.nsched_ctxs : (unsigned)nsched_ctxs;
 	unsigned *curr_sched_ctxs = sched_ctxs == NULL ? hypervisor.sched_ctxs : sched_ctxs;
-	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
+//	starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
 	unsigned s;
 	for(s = 0; s < curr_nsched_ctxs; s++)
 		hypervisor.resize[curr_sched_ctxs[s]] = 1;

+ 22 - 2
src/common/barrier_counter.c

@@ -62,9 +62,9 @@ int _starpu_barrier_counter_decrement_until_empty_counter(struct _starpu_barrier
 	int ret = 0;
 	STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex);
 
+	barrier->reached_flops -= flops;
 	if (--barrier->reached_start == 0)
 	{
-		barrier->reached_flops -= flops;
 		ret = 1;
 		STARPU_PTHREAD_COND_BROADCAST(&barrier->cond);
 	}
@@ -79,9 +79,9 @@ int _starpu_barrier_counter_increment_until_full_counter(struct _starpu_barrier_
 	int ret = 0;
 	STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex);
 
+	barrier->reached_flops += flops;
 	if(++barrier->reached_start == barrier->count)
 	{
-		barrier->reached_flops += flops;
 		ret = 1;
 		STARPU_PTHREAD_COND_BROADCAST(&barrier_c->cond2);
 	}
@@ -113,3 +113,23 @@ int _starpu_barrier_counter_check(struct _starpu_barrier_counter *barrier_c)
 	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
 	return 0;
 }
+
+int _starpu_barrier_counter_get_reached_start(struct _starpu_barrier_counter *barrier_c)
+{
+	struct _starpu_barrier *barrier = &barrier_c->barrier;
+	int ret;
+//	STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex);
+	ret = barrier->reached_start;
+//	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
+	return ret;
+}
+
+double _starpu_barrier_counter_get_reached_flops(struct _starpu_barrier_counter *barrier_c)
+{
+	struct _starpu_barrier *barrier = &barrier_c->barrier;
+	double ret;
+//	STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex);
+	ret = barrier->reached_flops;
+//	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
+	return ret;
+}

+ 3 - 0
src/common/barrier_counter.h

@@ -42,4 +42,7 @@ int _starpu_barrier_counter_increment(struct _starpu_barrier_counter *barrier_c,
 
 int _starpu_barrier_counter_check(struct _starpu_barrier_counter *barrier_c);
 
+int _starpu_barrier_counter_get_reached_start(struct _starpu_barrier_counter *barrier_c);
+
+double _starpu_barrier_counter_get_reached_flops(struct _starpu_barrier_counter *barrier_c);
 #endif

+ 12 - 0
src/common/fxt.h

@@ -146,6 +146,9 @@
 #define	_STARPU_FUT_START_WRITEBACK	0x5158
 #define	_STARPU_FUT_END_WRITEBACK	0x5159
 
+#define	_STARPU_FUT_HYPERVISOR_BEGIN    0x5160
+#define	_STARPU_FUT_HYPERVISOR_END	0x5161
+
 #ifdef STARPU_USE_FXT
 #include <fxt/fxt.h>
 #include <fxt/fut.h>
@@ -453,6 +456,13 @@ do {										\
 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL			\
 	FUT_DO_PROBE0(_STARPU_FUT_TASK_WAIT_FOR_ALL)
 
+#define _STARPU_TRACE_HYPERVISOR_BEGIN()  \
+	FUT_DO_PROBE1(_STARPU_FUT_HYPERVISOR_BEGIN, _starpu_gettid());
+
+#define _STARPU_TRACE_HYPERVISOR_END() \
+	do {} while (0)
+//	FUT_DO_PROBE1(_STARPU_FUT_HYPERVISOR_END, _starpu_gettid());
+
 #ifdef STARPU_FXT_LOCK_TRACES 
 
 #define _STARPU_TRACE_LOCKING_MUTEX()	do { \
@@ -678,6 +688,8 @@ do {										\
 #define _STARPU_TRACE_MEMORY_FULL(size)				do {} while(0)
 #define _STARPU_TRACE_START_UNPARTITION(handle, memnode)	do {} while(0)
 #define _STARPU_TRACE_END_UNPARTITION(handle, memnode)		do {} while(0)
+#define _STARPU_TRACE_HYPERVISOR_BEGIN()        do {} while(0)
+#define _STARPU_TRACE_HYPERVISOR_END()                  do {} while(0)
 
 #endif // STARPU_USE_FXT
 

+ 0 - 1
src/core/jobs.c

@@ -298,7 +298,6 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
 	_starpu_decrement_nsubmitted_tasks_of_sched_ctx(sched_ctx);
 	_starpu_decrement_nready_tasks_of_sched_ctx(sched_ctx, flops);
-
 	struct _starpu_worker *worker;
 	worker = _starpu_get_local_worker_key();
 	if (worker)

+ 69 - 15
src/core/sched_ctx.c

@@ -27,6 +27,10 @@ static starpu_pthread_mutex_t finished_submit_mutex = STARPU_PTHREAD_MUTEX_INITI
 struct starpu_task stop_submission_task = STARPU_TASK_INITIALIZER;
 starpu_pthread_key_t sched_ctx_key;
 unsigned with_hypervisor = 0;
+double hyp_start_sample[STARPU_NMAX_SCHED_CTXS];
+double hyp_start_allow_sample[STARPU_NMAX_SCHED_CTXS];
+double flops[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS];
+size_t data_size[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS];
 
 static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *config);
 
@@ -327,12 +331,6 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 		}
 	}
 
-	int w;
-	for(w = 0; w < STARPU_NMAXWORKERS; w++)
-	{
-		sched_ctx->pop_counter[w] = 0;
-	}
-
 	return sched_ctx;
 }
 
@@ -580,7 +578,11 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 #ifdef STARPU_USE_SC_HYPERVISOR
 	if(sched_ctx != NULL && sched_ctx_id != 0 && sched_ctx_id != STARPU_NMAX_SCHED_CTXS
 	   && sched_ctx->perf_counters != NULL)
+	{
+		_STARPU_TRACE_HYPERVISOR_BEGIN();
 		sched_ctx->perf_counters->notify_delete_context(sched_ctx_id);
+		_STARPU_TRACE_HYPERVISOR_END();
+	}
 #endif //STARPU_USE_SC_HYPERVISOR
 
 	unsigned inheritor_sched_ctx_id = sched_ctx->inheritor;
@@ -914,16 +916,16 @@ void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double r
 	_starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->ready_tasks_barrier, ready_flops);
 }
 
-int _starpu_get_nready_tasks_of_sched_ctx(unsigned sched_ctx_id)
+int starpu_get_nready_tasks_of_sched_ctx(unsigned sched_ctx_id)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-	return sched_ctx->ready_tasks_barrier.barrier.reached_start;
+	return _starpu_barrier_counter_get_reached_start(&sched_ctx->ready_tasks_barrier);
 }
 
-double _starpu_get_nready_flops_of_sched_ctx(unsigned sched_ctx_id)
+double starpu_get_nready_flops_of_sched_ctx(unsigned sched_ctx_id)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-	return sched_ctx->ready_tasks_barrier.barrier.reached_flops;
+	return _starpu_barrier_counter_get_reached_flops(&sched_ctx->ready_tasks_barrier);
 }
 
 int _starpu_wait_for_no_ready_of_sched_ctx(unsigned sched_ctx_id)
@@ -958,6 +960,17 @@ unsigned _starpu_sched_ctx_get_current_context()
 void starpu_sched_ctx_notify_hypervisor_exists()
 {
 	with_hypervisor = 1;
+	int i, j;
+	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+	{
+		hyp_start_sample[i] = starpu_timing_now();
+		hyp_start_allow_sample[i] = 0.0;
+		for(j = 0; j < STARPU_NMAXWORKERS; j++)
+		{
+			flops[i][j] = 0.0;
+			data_size[i][j] = 0;
+		}
+	}
 }
 
 unsigned starpu_sched_ctx_check_if_hypervisor_exists()
@@ -965,6 +978,32 @@ unsigned starpu_sched_ctx_check_if_hypervisor_exists()
 	return with_hypervisor;
 }
 
+unsigned _starpu_sched_ctx_allow_hypervisor(unsigned sched_ctx_id)
+{
+	return 1;
+	double now = starpu_timing_now();
+	if(hyp_start_allow_sample[sched_ctx_id] > 0.0)
+	{
+		double allow_sample = (now - hyp_start_allow_sample[sched_ctx_id]) / 1000000.0;
+		if(allow_sample < 0.001)
+			return 1;
+		else
+		{
+			hyp_start_allow_sample[sched_ctx_id] = 0.0;
+			hyp_start_sample[sched_ctx_id] = starpu_timing_now();
+			return 0;
+		}
+	}
+	double forbid_sample = (now - hyp_start_sample[sched_ctx_id]) / 1000000.0;
+	if(forbid_sample > 0.01)
+	{
+//		hyp_start_sample[sched_ctx_id] = starpu_timing_now();
+		hyp_start_allow_sample[sched_ctx_id] = starpu_timing_now();
+		return 1;
+	}
+	return 0;
+}
+
 unsigned _starpu_get_nsched_ctxs()
 {
 	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
@@ -1198,14 +1237,25 @@ void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id)
 
 #ifdef STARPU_USE_SC_HYPERVISOR
 
-void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint)
+void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task, size_t data_size2, uint32_t footprint)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
 	if(sched_ctx != NULL && task->sched_ctx != _starpu_get_initial_sched_ctx()->id && 
 	   task->sched_ctx != STARPU_NMAX_SCHED_CTXS  && sched_ctx->perf_counters != NULL)
-		sched_ctx->perf_counters->notify_post_exec_task(task, data_size, footprint, task->hypervisor_tag, 
-								_starpu_get_nready_tasks_of_sched_ctx(sched_ctx->id), 
-								_starpu_get_nready_flops_of_sched_ctx(sched_ctx->id));
+	{
+		flops[task->sched_ctx][workerid] += task->flops;
+		data_size[task->sched_ctx][workerid] += data_size2;
+
+		if(_starpu_sched_ctx_allow_hypervisor(sched_ctx->id) || task->hypervisor_tag > 0)
+		{
+			_STARPU_TRACE_HYPERVISOR_BEGIN();
+			sched_ctx->perf_counters->notify_post_exec_task(task, data_size[task->sched_ctx][workerid], footprint,
+									task->hypervisor_tag, flops[task->sched_ctx][workerid]);
+			_STARPU_TRACE_HYPERVISOR_END();
+			flops[task->sched_ctx][workerid] = 0.0;
+			data_size[task->sched_ctx][workerid] = 0;
+		}
+	}
 }
 
 void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id)
@@ -1213,8 +1263,12 @@ void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id)
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 
 	if(sched_ctx != NULL && sched_ctx_id != _starpu_get_initial_sched_ctx()->id && sched_ctx_id != STARPU_NMAX_SCHED_CTXS
-	   && sched_ctx->perf_counters != NULL)
+	   && sched_ctx->perf_counters != NULL && _starpu_sched_ctx_allow_hypervisor(sched_ctx_id))
+	{
+		_STARPU_TRACE_HYPERVISOR_BEGIN();
 		sched_ctx->perf_counters->notify_pushed_task(sched_ctx_id, workerid);
+		_STARPU_TRACE_HYPERVISOR_END();
+	}
 }
 #endif //STARPU_USE_SC_HYPERVISOR
 

+ 2 - 6
src/core/sched_ctx.h

@@ -89,10 +89,6 @@ struct _starpu_sched_ctx
 	/* max GPUs to execute*/
 	int max_ngpus;
 
-	/* needed for overlapping contexts to help the workers
-	   determine which is the next context to pop tasks from */
-	unsigned pop_counter[STARPU_NMAXWORKERS];
-
 	/* in case we delete the context leave resources to the inheritor*/
 	unsigned inheritor;
 
@@ -152,8 +148,6 @@ int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id);
 
 void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops);
 void _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops);
-int _starpu_get_nready_tasks_of_sched_ctx(unsigned sched_ctx_id);
-double _starpu_get_nready_flops_of_sched_ctx(unsigned sched_ctx_id);
 int _starpu_wait_for_no_ready_of_sched_ctx(unsigned sched_ctx_id);
 
 /* Return the corresponding index of the workerid in the ctx table */
@@ -198,6 +192,8 @@ int _starpu_nworkers_able_to_execute_task(struct starpu_task *task, struct _star
 
 void _starpu_fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx);
 
+unsigned _starpu_sched_ctx_allow_hypervisor(unsigned sched_ctx_id);
+
 #ifdef STARPU_USE_SC_HYPERVISOR
 /* Notifies the hypervisor that a tasks was poped from the workers' list */
 void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint);

+ 39 - 19
src/core/sched_policy.c

@@ -333,7 +333,11 @@ int _starpu_push_task(struct _starpu_job *j)
 #ifdef STARPU_USE_SC_HYPERVISOR
 			if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->perf_counters != NULL 
 			   && sched_ctx->perf_counters->notify_empty_ctx)
+			{
+				_STARPU_TRACE_HYPERVISOR_BEGIN();
 				sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task);
+				_STARPU_TRACE_HYPERVISOR_END();
+			}
 #endif
 			return 0;
 		}
@@ -377,7 +381,11 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 #ifdef STARPU_USE_SC_HYPERVISOR
 			if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->perf_counters != NULL 
 			   && sched_ctx->perf_counters->notify_empty_ctx)
+			{
+				_STARPU_TRACE_HYPERVISOR_BEGIN();
 				sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task);
+				_STARPU_TRACE_HYPERVISOR_END();
+			}
 #endif
 
 			return -EAGAIN;
@@ -563,29 +571,34 @@ struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker
 	struct _starpu_sched_ctx *sched_ctx, *good_sched_ctx = NULL;
 	unsigned smallest_counter =  worker->nsched_ctxs;
 	struct _starpu_sched_ctx_list *l = NULL;
-	for (l = worker->sched_ctx_list; l; l = l->next)
+	if(!worker->reverse_phase)
 	{
-		sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
-/* 		if(worker->removed_from_ctx[sched_ctx->id] == 1 && worker->shares_tasks_lists[sched_ctx->id] == 1) */
-/* 			return sched_ctx; */
-		if(sched_ctx->pop_counter[worker->workerid] < worker->nsched_ctxs &&
-		   smallest_counter > sched_ctx->pop_counter[worker->workerid])
+		/* find a context in which the worker hasn't poped yet */
+		for (l = worker->sched_ctx_list; l; l = l->next)
 		{
-			good_sched_ctx = sched_ctx;
-			smallest_counter = sched_ctx->pop_counter[worker->workerid];
+			if(!worker->poped_in_ctx[l->sched_ctx])
+			{
+				worker->poped_in_ctx[l->sched_ctx] = !worker->poped_in_ctx[l->sched_ctx];
+				return	_starpu_get_sched_ctx_struct(l->sched_ctx);
+			}
 		}
+		worker->reverse_phase = !worker->reverse_phase;
 	}
-	
-	if(good_sched_ctx == NULL)
+	if(worker->reverse_phase)
 	{
+		/* if the context has already poped in every one start from the begining */
 		for (l = worker->sched_ctx_list; l; l = l->next)
 		{
-			sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
-			sched_ctx->pop_counter[worker->workerid] = 0;
+			if(worker->poped_in_ctx[l->sched_ctx])
+			{
+				worker->poped_in_ctx[l->sched_ctx] = !worker->poped_in_ctx[l->sched_ctx];
+				return	_starpu_get_sched_ctx_struct(l->sched_ctx);
+			}
 		}
-		return _starpu_get_sched_ctx_struct(worker->sched_ctx_list->sched_ctx);
-	}
-	return good_sched_ctx;
+		worker->reverse_phase = !worker->reverse_phase;
+	}	
+	worker->poped_in_ctx[worker->sched_ctx_list->sched_ctx] = !worker->poped_in_ctx[worker->sched_ctx_list->sched_ctx];
+	return _starpu_get_sched_ctx_struct(worker->sched_ctx_list->sched_ctx);
 }
 
 struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
@@ -660,17 +673,20 @@ pick:
 				}
 #ifdef STARPU_USE_SC_HYPERVISOR
 				struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters;
-				if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_idle_cycle)
+				if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_idle_cycle && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id))
+				{
+//					_STARPU_TRACE_HYPERVISOR_BEGIN();
 					perf_counters->notify_idle_cycle(sched_ctx->id, worker->workerid, 1.0);
+//					_STARPU_TRACE_HYPERVISOR_END();
+				}
 #endif //STARPU_USE_SC_HYPERVISOR
 				
 #ifndef STARPU_NON_BLOCKING_DRIVERS
-				if((sched_ctx->pop_counter[worker->workerid] == 0 && been_here[sched_ctx->id]) || worker->nsched_ctxs == 1)
+				if(been_here[sched_ctx->id] || worker->nsched_ctxs == 1)
 					break;
 				been_here[sched_ctx->id] = 1;
 #endif
 			}
-			sched_ctx->pop_counter[worker->workerid]++;
 		}
 	  }
 
@@ -684,8 +700,12 @@ pick:
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
 	struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters;
 
-	if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_poped_task)
+	if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_poped_task && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id))
+	{
+//		_STARPU_TRACE_HYPERVISOR_BEGIN();
 		perf_counters->notify_poped_task(task->sched_ctx, worker->workerid);
+//		_STARPU_TRACE_HYPERVISOR_END();
+	}
 #endif //STARPU_USE_SC_HYPERVISOR
 
 

+ 5 - 3
src/core/task.c

@@ -26,6 +26,7 @@
 #include <core/task_bundle.h>
 #include <common/config.h>
 #include <common/utils.h>
+#include <common/fxt.h>
 #include <profiling/profiling.h>
 #include <profiling/bound.h>
 #include <math.h>
@@ -259,7 +260,9 @@ int _starpu_submit_job(struct _starpu_job *j)
 				data_size += _starpu_data_get_size(handle);
 		}
 
+		_STARPU_TRACE_HYPERVISOR_BEGIN();
 		sched_ctx->perf_counters->notify_submitted_job(j->task, j->footprint, data_size);
+		_STARPU_TRACE_HYPERVISOR_END();
 	}
 #endif//STARPU_USE_SC_HYPERVISOR
 
@@ -659,7 +662,6 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 	STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
 	j->submitted = 1;
 	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops);
-
 	for (i=0 ; i<task->cl->nbuffers ; i++)
 	{
 		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
@@ -852,7 +854,7 @@ int starpu_task_nready(void)
 	int nready = 0;
 	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
 	if(config->topology.nsched_ctxs == 1)
-		nready = _starpu_get_nready_tasks_of_sched_ctx(0);
+		nready = starpu_get_nready_tasks_of_sched_ctx(0);
 	else
 	{
 		int s;
@@ -860,7 +862,7 @@ int starpu_task_nready(void)
 		{
 			if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
 			{
-				nready += _starpu_get_nready_tasks_of_sched_ctx(config->sched_ctxs[s].id);
+				nready += starpu_get_nready_tasks_of_sched_ctx(config->sched_ctxs[s].id);
 			}
 		}
 	}

+ 4 - 0
src/core/workers.c

@@ -440,7 +440,11 @@ static void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu
 	workerarg->parallel_sect = 0;
 
 	for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++)
+	{
 		workerarg->shares_tasks_lists[ctx] = 0;
+		workerarg->poped_in_ctx[ctx] = 0;
+	}
+	workerarg->reverse_phase = 0;
 
 	/* cpu_set/hwloc_cpu_set initialized in topology.c */
 }

+ 8 - 0
src/core/workers.h

@@ -106,6 +106,14 @@ LIST_TYPE(_starpu_worker,
 	/* in this case when removing him from a context it disapears instantly */
 	unsigned shares_tasks_lists[STARPU_NMAX_SCHED_CTXS];
 
+        /* boolean to chose the next ctx a worker will pop into */
+	unsigned poped_in_ctx[STARPU_NMAX_SCHED_CTXS];	  
+
+       /* boolean indicating at which moment we checked all ctxs and change phase for the booleab poped_in_ctx*/
+	unsigned reverse_phase;
+
+
+
 #ifdef __GLIBC__
 	cpu_set_t cpu_set;
 #endif /* __GLIBC__ */

+ 30 - 0
src/debug/traces/starpu_fxt.c

@@ -685,6 +685,28 @@ static void handle_end_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options
 		worker_set_state(get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], "B");
 }
 
+static void handle_hyp_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
+{
+	int worker;
+	worker = find_worker_id(ev->param[0]);
+	if (worker < 0)
+		return;
+
+	if (out_paje_file)
+		worker_set_state(get_event_time_stamp(ev, options), options->file_prefix, ev->param[0], "H");
+}
+
+static void handle_hyp_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
+{
+/* 	int worker; */
+/* 	worker = find_worker_id(ev->param[0]); */
+/* 	if (worker < 0) */
+/* 		return; */
+
+/* 	if (out_paje_file) */
+/* 		worker_set_state(get_event_time_stamp(ev, options), options->file_prefix, ev->param[0], "B"); */
+}
+
 static void handle_worker_status(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *newstatus)
 {
 	int worker;
@@ -1673,6 +1695,14 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 			case _STARPU_FUT_MEMORY_FULL:
 				break;
 
+			case _STARPU_FUT_HYPERVISOR_BEGIN:
+				handle_hyp_begin(&ev, options);
+				break;
+
+			case _STARPU_FUT_HYPERVISOR_END:
+				handle_hyp_end(&ev, options);
+				break;
+
 			default:
 #ifdef STARPU_VERBOSE
 				fprintf(stderr, "unknown event.. %x at time %llx WITH OFFSET %llx\n",

+ 3 - 1
src/debug/traces/starpu_paje.c

@@ -194,6 +194,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 		poti_DefineEntityValue("Sl", ctx, "Sleeping", ".9 .1 .0");
 		poti_DefineEntityValue("P", ctx, "Progressing", ".4 .1 .6");
 		poti_DefineEntityValue("U", ctx, "Unpartitioning", ".0 .0 1.0");
+		poti_DefineEntityValue("H", ctx, "Hypervisor", ".5 .18 .0");
 	}
 
 	/* Types for the Scheduler */
@@ -233,7 +234,8 @@ void _starpu_fxt_write_paje_header(FILE *file)
 6       B       S       Overhead         \".5 .18 .0\"		\n\
 6       Sl       S      Sleeping         \".9 .1 .0\"		\n\
 6       P       S       Progressing         \".4 .1 .6\"		\n\
-6       U       S       Unpartitioning      \".0 .0 1.0\"		\n");
+6       U       S       Unpartitioning      \".0 .0 1.0\"		\n\
+6       H       S       Hypervisor      \".5 .18 .0\"		\n");
 	fprintf(file, "\
 6       P       CtS       Processing         \"0 0 0\"		\n\
 6       Sl       CtS      Sleeping         \".9 .1 .0\"		\n\