소스 검색

fixes for moving workers

Andra Hugo 13 년 전
부모
커밋
d036291831

+ 1 - 0
sched_ctx_hypervisor/include/sched_ctx_hypervisor.h

@@ -78,6 +78,7 @@ struct sched_ctx_wrapper {
 	unsigned sched_ctx;
 	struct policy_config *config;
 	double current_idle_time[STARPU_NMAXWORKERS];
+	int worker_to_be_removed[STARPU_NMAXWORKERS];
 	int pushed_tasks[STARPU_NMAXWORKERS];
 	int poped_tasks[STARPU_NMAXWORKERS];
 	double total_flops;

+ 18 - 24
sched_ctx_hypervisor/src/hypervisor_policies/lp3_policy.c

@@ -108,13 +108,9 @@ static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt], int *w
                         if (isnan(length))
                                 times[w][t] = NAN;
                        else
-                                times[w][t] = length / 1000.;
-			
-//			printf("t%d_%x_%s on worker %d ctx %d: %lf ntasks = %d\n", t, tp->footprint, tp->cl->model->symbol, w, tp->sched_ctx_id, times[w][t], tp->n);
+                                times[w][t] = length / 1000.;	
                 }
-//		printf("\n");
         }
-//	printf("\n");
 }
 
 /*                                                                                                                                                                                                                  
@@ -292,11 +288,7 @@ static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double
 
 	for(s = 0; s < ns; s++)
 		for(w = 0; w < nw; w++)
-		{
 			w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
-/* 			if(w_in_s[s][w]) */
-/* 				printf("%d in %d %lf \n",w, s, w_in_s[s][w]); */
-		}
 
 	glp_delete_prob(lp);
 	return res;
@@ -317,6 +309,8 @@ static void _redistribute_resources_in_ctxs(int ns, int nw, int nt, double w_in_
 		{
 			workers_to_add[w] = -1;
 			workers_to_remove[w] = -1;
+			for(s2 = 0; s2 < ns; s2++)
+				destination_ctx[w][s2] = -1;
 		}
 
 		int nadd = 0, nremove = 0;
@@ -330,32 +324,26 @@ static void _redistribute_resources_in_ctxs(int ns, int nw, int nt, double w_in_
 			{
 				if(w_in_s[s][w] >= 0.5)
 				{
-//					printf("add %d to ctx %d\n", w, s);
 					workers_to_add[nadd++] = workers == NULL ? w : workers[w];
 				}
 				else
 				{
-//					printf("remove %d from ctx %d\n", w, s);
 					workers_to_remove[nremove++] = workers == NULL ? w : workers[w];
 					for(s2 = 0; s2 < ns; s2++)
 						if(s2 != s && w_in_s[s2][w] >= 0.5)
 							destination_ctx[w][s2] = 1;
 						else
-							destination_ctx[w][s2] = 0;
-					
-					
+							destination_ctx[w][s2] = 0;	
 				}
 			}
 			else
 			{
 				if(w_in_s[s][w] >= 0.3)
 				{
-	//				printf("add %d to ctx %d\n", w, s);
 					workers_to_add[nadd++] = workers == NULL ? w : workers[w];
 				}
 				else
 				{
-//					printf("remove %d from ctx %d\n", w, s);
 					workers_to_remove[nremove++] = workers == NULL ? w : workers[w];
 					for(s2 = 0; s2 < ns; s2++)
 						if(s2 != s && w_in_s[s2][w] >= 0.3)
@@ -366,6 +354,12 @@ static void _redistribute_resources_in_ctxs(int ns, int nw, int nt, double w_in_
 			}
 	
 		}
+
+		sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, nadd, sched_ctxs[s]);
+		struct policy_config *new_config = sched_ctx_hypervisor_get_config(sched_ctxs[s]);
+		int i;
+		for(i = 0; i < nadd; i++)
+			new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] :  new_config->new_workers_max_idle;
 		
 		if(!first_time)
 		{
@@ -378,14 +372,20 @@ static void _redistribute_resources_in_ctxs(int ns, int nw, int nt, double w_in_
 
 			for(w2 = 0; w2 < nremove; w2++)
 				for(s2 = 0; s2 < ns; s2++)
-					if(destination_ctx[w2][s2] && sched_ctx_hypervisor_can_resize(sched_ctxs[s2]))
+				{
+					/* if the worker has to be removed we should find a destination
+					   otherwise we are not interested */
+					if(destination_ctx[w2][s2] == -1)
+						found_one_dest[w2] = -1;
+					if(destination_ctx[w2][s2] == 1)// && sched_ctx_hypervisor_can_resize(sched_ctxs[s2]))
 					{
 						found_one_dest[w2] = 1;
 						break;
 					}
+				}
 			for(w2 = 0; w2 < nremove; w2++)
 			{
-				if(!found_one_dest[w2])
+				if(found_one_dest[w2] == 0)
 				{
 					all_have_dest = 0;
 					break;
@@ -394,12 +394,6 @@ static void _redistribute_resources_in_ctxs(int ns, int nw, int nt, double w_in_
 			if(all_have_dest)
 				sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s]);
 		}
-
-		sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, nadd, sched_ctxs[s]);
-		struct policy_config *new_config = sched_ctx_hypervisor_get_config(sched_ctxs[s]);
-		int i;
-		for(i = 0; i < nadd; i++)
-			new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] :  new_config->new_workers_max_idle;
 	}
 
 }

+ 7 - 5
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c

@@ -1,6 +1,7 @@
-#include <sched_ctx_hypervisor.h>
-#include <pthread.h>
+/* #include <sched_ctx_hypervisor.h> */
+/* #include <pthread.h> */
 
+#include "policy_tools.h"
 //enum starpu_archtype STARPU_ALL;
 
 static int _compute_priority(unsigned sched_ctx)
@@ -317,8 +318,10 @@ double _get_ctx_velocity(struct sched_ctx_wrapper* sc_w)
         double elapsed_flops = sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
 	double total_elapsed_flops = sched_ctx_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w);
 	double prc = elapsed_flops/sc_w->total_flops;
-	double prc_valid_velocity = elapsed_flops == total_elapsed_flops ? 0.05 : 0.2;
-        if( prc >= prc_valid_velocity)
+	unsigned nworkers = starpu_get_nworkers_of_sched_ctx(sc_w->sched_ctx);
+
+	double redim_sample = elapsed_flops == total_elapsed_flops ? HYPERVISOR_START_REDIM_SAMPLE*nworkers : HYPERVISOR_REDIM_SAMPLE*nworkers;
+        if(prc >= redim_sample)
         {
                 double curr_time = starpu_timing_now();
                 double elapsed_time = curr_time - sc_w->start_time;
@@ -368,7 +371,6 @@ int _velocity_gap_btw_ctxs()
 					if(other_ctx_v != 0.0)
 					{
 						double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v ;
-//						printf("gap = %lf\n", gap);
 						if(gap > 2)
 							return 1;
 					}

+ 3 - 0
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.h

@@ -1,6 +1,9 @@
 #include <sched_ctx_hypervisor.h>
 #include <pthread.h>
 
+#define HYPERVISOR_REDIM_SAMPLE 0.01
+#define HYPERVISOR_START_REDIM_SAMPLE 0.005
+
 struct bound_task_pool
 {
 	/* Which codelet has been executed */

+ 71 - 100
sched_ctx_hypervisor/src/sched_ctx_hypervisor.c

@@ -157,7 +157,7 @@ struct starpu_performance_counters* sched_ctx_hypervisor_init(struct hypervisor_
 			hypervisor.sched_ctx_w[i].poped_tasks[j] = 0;
 			hypervisor.sched_ctx_w[i].elapsed_flops[j] = 0.0;
 			hypervisor.sched_ctx_w[i].total_elapsed_flops[j] = 0.0;
-
+			hypervisor.sched_ctx_w[i].worker_to_be_removed[j] = 0;
 		}
 	}
 
@@ -356,9 +356,7 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 //		if(ncpus != 0)
 //			starpu_remove_workers_from_sched_ctx(cpus, ncpus, sender_sched_ctx);
 
-		starpu_remove_workers_from_sched_ctx(workers_to_move, nworkers_to_move, sender_sched_ctx);
 		starpu_add_workers_to_sched_ctx(workers_to_move, nworkers_to_move, receiver_sched_ctx);
-
 		pthread_mutex_lock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
 		hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.receiver_sched_ctx = receiver_sched_ctx;
 		hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_move * sizeof(int));
@@ -374,12 +372,11 @@ void sched_ctx_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned recei
 			hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers[i] = 0;	
 		}
 
-		pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
-
 		hypervisor.resize[sender_sched_ctx] = 0;
 		hypervisor.resize[receiver_sched_ctx] = 0;
-	}
 
+		pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
+	}
 	return;
 }
 
@@ -392,28 +389,8 @@ void sched_ctx_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned
 		for(j = 0; j < nworkers_to_add; j++)
 			printf(" %d", workers_to_add[j]);
 		printf("\n");
-
 		starpu_add_workers_to_sched_ctx(workers_to_add, nworkers_to_add, sched_ctx);
-
-		pthread_mutex_lock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
-		hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = sched_ctx;
-		hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_add * sizeof(int));
-		hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = nworkers_to_add;
-		hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_add * sizeof(int));
-
-
-		int i;
-		for(i = 0; i < nworkers_to_add; i++)
-		{
-			hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers_to_add[i]] = 0.0;
-			hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers_to_add[i];	
-			hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;	
-		}
-		pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
-
-		hypervisor.resize[sched_ctx] = 0;
 	}
-
 	return;
 }
 
@@ -426,30 +403,31 @@ void sched_ctx_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove,
 {
 	if(nworkers_to_remove > 0 && hypervisor.resize[sched_ctx])
 	{
-		int j;
-		printf("remove from ctx %d:", sched_ctx);
-		for(j = 0; j < nworkers_to_remove; j++)
-			printf(" %d", workers_to_remove[j]);
-		printf("\n");
-
-		starpu_remove_workers_from_sched_ctx(workers_to_remove, nworkers_to_remove, sched_ctx);
-/* 		hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = sched_ctx; */
-/* 		hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int)); */
-/* 		hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = nworkers_to_remove; */
-/* 		hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int)); */
+		int nworkers=0;
+		int workers[nworkers_to_remove];
+	
+		pthread_mutex_lock(&hypervisor.sched_ctx_w[sched_ctx].mutex);	
+		int i;
+		for(i = 0; i < nworkers_to_remove; i++)
+			if(starpu_worker_belongs_to_sched_ctx(workers_to_remove[i], sched_ctx))
+				workers[nworkers++] = workers_to_remove[i];
 
+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = -1;
+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = nworkers;
+		hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
 
-/* 		int i; */
-/* 		for(i = 0; i < nworkers_to_remove; i++) */
-/* 		{ */
-/* 			hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers_to_remove[i]] = 0.0; */
-/* 			hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers_to_remove[i];	 */
-/* 			hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;	 */
-/* 		} */
 
-//		hypervisor.resize[sched_ctx] = 0;
-	}
+		for(i = 0; i < nworkers; i++)
+		{
+			hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers[i]] = 0.0;
+			hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers[i];	
+			hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;	
+		}
 
+		hypervisor.resize[sched_ctx] = 0;
+		pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
+ 	}
 	return;
 }
 
@@ -480,6 +458,9 @@ double sched_ctx_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sched_c
 
 static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 {
+	if(!starpu_worker_belongs_to_sched_ctx(worker, sched_ctx))
+		return 0;
+
 	struct resize_ack *resize_ack = NULL;
 	unsigned sender_sched_ctx = STARPU_NMAX_SCHED_CTXS;
 
@@ -491,8 +472,21 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 			if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
 			{
 				struct sched_ctx_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
-				if(sc_w->resize_ack.receiver_sched_ctx != -1 && 
-				   sc_w->resize_ack.receiver_sched_ctx == sched_ctx) 
+				unsigned only_remove = 0;
+				if(sc_w->resize_ack.receiver_sched_ctx == -1 && hypervisor.sched_ctxs[i] != sched_ctx && 
+				   sc_w->resize_ack.nmoved_workers > 0 && starpu_worker_belongs_to_sched_ctx(worker, hypervisor.sched_ctxs[i]))
+				{
+					int j;
+					for(j = 0; j < sc_w->resize_ack.nmoved_workers; j++)
+						if(sc_w->resize_ack.moved_workers[j] == worker)
+						{
+							only_remove = 1;
+							break;
+						}
+				}
+
+				if(only_remove || 
+				   (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == sched_ctx))
 				{
 					resize_ack = &sc_w->resize_ack;
 					sender_sched_ctx = hypervisor.sched_ctxs[i];
@@ -505,20 +499,20 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 	/* if there is no ctx waiting for its ack return 1*/
 	if(resize_ack == NULL)
 		return 1;
-	else
+	int *moved_workers = resize_ack->moved_workers;
+	int nmoved_workers = resize_ack->nmoved_workers;
+	int *acked_workers = resize_ack->acked_workers;
+	int i;
+	
+	if(worker != -1)
 	{
-		int *moved_workers = resize_ack->moved_workers;
-		int nmoved_workers = resize_ack->nmoved_workers;
-		int *acked_workers = resize_ack->acked_workers;
-		int i;
-		
-		if(worker != -1)
+		pthread_mutex_lock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
+		for(i = 0; i < nmoved_workers; i++)
 		{
-			for(i = 0; i < nmoved_workers; i++)
+			int moved_worker = moved_workers[i];
+			if(moved_worker == worker && acked_workers[i] == 0)
 			{
-				int moved_worker = moved_workers[i];
-				if(moved_worker == worker && acked_workers[i] == 0)
-					acked_workers[i] = 1;
+				acked_workers[i] = 1;
 			}
 		}
 		
@@ -529,14 +523,21 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 		}
 		
 		unsigned resize_completed = (nacked_workers == nmoved_workers);
-		unsigned receiver_sched_ctx = resize_ack->receiver_sched_ctx;
-		unsigned unknown_sender = receiver_sched_ctx == sched_ctx;
-		if(!unknown_sender)
+		int receiver_sched_ctx = sched_ctx;
+		if(resize_completed)
 		{
 			/* if the permission to resize is not allowed by the user don't do it
 			   whatever the application says */
-			if(resize_completed && !((hypervisor.resize[sched_ctx] == 0 || hypervisor.resize[receiver_sched_ctx] == 0) && imposed_resize) && worker == moved_workers[0])
+			if(!((hypervisor.resize[sender_sched_ctx] == 0 || hypervisor.resize[receiver_sched_ctx] == 0) && imposed_resize))
 			{				
+				int j;
+				printf("remove from ctx %d:", sender_sched_ctx);
+				for(j = 0; j < nmoved_workers; j++)
+					printf(" %d", moved_workers[j]);
+				printf("\n");
+
+				starpu_remove_workers_from_sched_ctx(moved_workers, nmoved_workers, sender_sched_ctx);
+
 				/* info concerning only the gflops_rate strateg */
 				struct sched_ctx_wrapper *sender_sc_w = &hypervisor.sched_ctx_w[sender_sched_ctx];
 				struct sched_ctx_wrapper *receiver_sc_w = &hypervisor.sched_ctx_w[receiver_sched_ctx];
@@ -555,44 +556,15 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 				/* if the user allowed resizing leave the decisions to the application */
 				if(imposed_resize)  imposed_resize = 0;
 				
-				pthread_mutex_lock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
 				resize_ack->receiver_sched_ctx = -1;
 				resize_ack->nmoved_workers = 0;
 				free(resize_ack->moved_workers);
 				free(resize_ack->acked_workers);
-				pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
 			}
-			
-			return resize_completed;
-		}
-		else
-		{
-			/* if the permission to resize is not allowed by the user don't do it
-			   whatever the application says */
-			if(resize_completed && !(hypervisor.resize[sched_ctx] == 0 && imposed_resize) && worker == moved_workers[0])
-			{				
-				/* info concerning only the gflops_rate strateg */
-				struct sched_ctx_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
-				
-				double start_time =  starpu_timing_now();
-				sc_w->start_time = start_time;
-				sc_w->remaining_flops = sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
-				_set_elapsed_flops_per_sched_ctx(sched_ctx, 0.0);
-								
-				hypervisor.resize[sched_ctx] = 1;
-				/* if the user allowed resizing leave the decisions to the application */
-				if(imposed_resize)  imposed_resize = 0;
-				
-				pthread_mutex_lock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
-				resize_ack->receiver_sched_ctx = -1;
-				resize_ack->nmoved_workers = 0;
-				free(resize_ack->moved_workers);
-				free(resize_ack->acked_workers);
-				pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
-			}
-			
+			pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
 			return resize_completed;
 		}
+		pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
 	}
 	return 0;
 }
@@ -614,8 +586,7 @@ static void notify_idle_end(unsigned sched_ctx, int worker)
 		if(hypervisor.policy.handle_idle_end)
 			hypervisor.policy.handle_idle_end(sched_ctx, worker);
 		
-		if(!hypervisor.resize[sched_ctx])
-			_ack_resize_completed(sched_ctx, worker);
+//			_ack_resize_completed(sched_ctx, worker);
 	}
 }
 
@@ -631,8 +602,8 @@ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
 			if(hypervisor.policy.handle_idle_cycle)
 				hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
 		}		
-		else 
-			_ack_resize_completed(sched_ctx, worker);
+/* 		else  */
+/* 			_ack_resize_completed(sched_ctx, worker); */
 	}
 	return;
 }
@@ -671,8 +642,8 @@ static void notify_poped_task(unsigned sched_ctx, int worker, double elapsed_flo
 			if(hypervisor.policy.handle_poped_task)
 				hypervisor.policy.handle_poped_task(sched_ctx, worker);
 		}
-		else 
-			_ack_resize_completed(sched_ctx, worker);
+		_ack_resize_completed(sched_ctx, worker);
+			
 	}
 }
 

+ 1 - 1
src/core/sched_ctx.c

@@ -664,7 +664,7 @@ unsigned starpu_worker_belongs_to_sched_ctx(int workerid, unsigned sched_ctx_id)
 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
 	unsigned i;
 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
-		if(worker->sched_ctx[i] == sched_ctx_id)
+		if(worker->sched_ctx[i] && worker->sched_ctx[i]->id == sched_ctx_id)
 			return 1;
 	return 0;
 }

+ 22 - 20
src/core/sched_policy.c

@@ -514,6 +514,28 @@ pick:
 		}
 	  }
 
+#ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
+	struct _starpu_sched_ctx *sched_ctx = NULL;
+	struct starpu_performance_counters *perf_counters = NULL;
+	int j;
+	for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++)
+	{
+		sched_ctx = worker->sched_ctx[j];
+		if(sched_ctx != NULL && sched_ctx->id != 0)
+		{
+			perf_counters = sched_ctx->perf_counters;
+			if(perf_counters != NULL && perf_counters->notify_idle_cycle && perf_counters->notify_idle_end)
+			{
+				if(!task)
+					perf_counters->notify_idle_cycle(sched_ctx->id, worker->workerid, 1.0);
+				else
+					perf_counters->notify_idle_end(sched_ctx->id, worker->workerid);
+			}
+		}
+	}
+#endif //STARPU_USE_SCHED_CTX_HYPERVISOR
+
+
 	if (!task)
 		goto profiling;
 
@@ -579,26 +601,6 @@ profiling:
 		}
 	}
 
-#ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
-	struct _starpu_sched_ctx *sched_ctx = NULL;
-	struct starpu_performance_counters *perf_counters = NULL;
-	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
-	{
-		sched_ctx = worker->sched_ctx[i];
-		if(sched_ctx != NULL && sched_ctx->id != 0)
-		{
-			perf_counters = sched_ctx->perf_counters;
-			if(perf_counters != NULL && perf_counters->notify_idle_cycle && perf_counters->notify_idle_end)
-			{
-				if(!task)
-					perf_counters->notify_idle_cycle(sched_ctx->id, worker->workerid, 1.0);
-				else
-					perf_counters->notify_idle_end(sched_ctx->id, worker->workerid);
-			}
-		}
-	}
-#endif //STARPU_USE_SCHED_CTX_HYPERVISOR
-
 	return task;
 }