Andra Hugo преди 13 години
родител
ревизия
dad2cac3f5

+ 1 - 1
sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c

@@ -243,6 +243,7 @@ unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigne
 	if(ret != EBUSY)
 	{					
 		unsigned nworkers_to_move = _get_nworkers_to_move(sender_sched_ctx);
+
 		if(nworkers_to_move > 0)
 		{
 			unsigned poor_sched_ctx = STARPU_NMAX_SCHED_CTXS;
@@ -260,7 +261,6 @@ unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigne
 					nworkers_to_move = nworkers > config->max_nworkers ? 0 : (config->max_nworkers - nworkers+nshared_workers);
 				if(nworkers_to_move == 0) poor_sched_ctx = STARPU_NMAX_SCHED_CTXS;
 			}
-
 			if(poor_sched_ctx != STARPU_NMAX_SCHED_CTXS)
 			{						
 				int *workers_to_move = _get_first_workers(sender_sched_ctx, &nworkers_to_move, STARPU_ALL);

+ 53 - 54
sched_ctx_hypervisor/src/sched_ctx_hypervisor.c

@@ -471,7 +471,7 @@ double sched_ctx_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sched_c
 
 static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 {
-	if(!starpu_worker_belongs_to_sched_ctx(worker, sched_ctx))
+	if(worker != -1 && !starpu_worker_belongs_to_sched_ctx(worker, sched_ctx))
 		return 0;
 
 	struct resize_ack *resize_ack = NULL;
@@ -497,7 +497,6 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 							break;
 						}
 				}
-
 				if(only_remove || 
 				   (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == sched_ctx))
 				{
@@ -517,12 +516,11 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 	int *acked_workers = resize_ack->acked_workers;
 	int i;
 	
-	if(worker != -1)
+	int ret = pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
+	if(ret != EBUSY)
 	{
-		int ret = pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
-		if(ret != EBUSY)
+		if(worker != -1)
 		{
-
 			for(i = 0; i < nmoved_workers; i++)
 			{
 				int moved_worker = moved_workers[i];
@@ -531,65 +529,66 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 					acked_workers[i] = 1;
 				}
 			}
+		}
 			
-			int nacked_workers = 0;
-			for(i = 0; i < nmoved_workers; i++)
-			{
-				nacked_workers += (acked_workers[i] == 1);
-			}
-			
-			unsigned resize_completed = (nacked_workers == nmoved_workers);
-			int receiver_sched_ctx = sched_ctx;
-			if(resize_completed)
-			{
-				/* if the permission to resize is not allowed by the user don't do it
-				   whatever the application says */
-				if(!((hypervisor.resize[sender_sched_ctx] == 0 || hypervisor.resize[receiver_sched_ctx] == 0) && imposed_resize))
-				{				
-					int j;
-					printf("remove from ctx %d:", sender_sched_ctx);
-					for(j = 0; j < nmoved_workers; j++)
-						printf(" %d", moved_workers[j]);
-					printf("\n");
-					
-					starpu_remove_workers_from_sched_ctx(moved_workers, nmoved_workers, sender_sched_ctx);
-					
-					/* info concerning only the gflops_rate strateg */
-					struct sched_ctx_wrapper *sender_sc_w = &hypervisor.sched_ctx_w[sender_sched_ctx];
-					struct sched_ctx_wrapper *receiver_sc_w = &hypervisor.sched_ctx_w[receiver_sched_ctx];
-					
-					double start_time =  starpu_timing_now();
-					sender_sc_w->start_time = start_time;
-					sender_sc_w->remaining_flops = sender_sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sender_sc_w);
-					_set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
-					
-					receiver_sc_w->start_time = start_time;
-					receiver_sc_w->remaining_flops = receiver_sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(receiver_sc_w);
-					_set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
-					
-					hypervisor.resize[sender_sched_ctx] = 1;
-					hypervisor.resize[receiver_sched_ctx] = 1;
-					/* if the user allowed resizing leave the decisions to the application */
-					if(imposed_resize)  imposed_resize = 0;
-					
-					resize_ack->receiver_sched_ctx = -1;
-					resize_ack->nmoved_workers = 0;
-					free(resize_ack->moved_workers);
-					free(resize_ack->acked_workers);
-				}
-				pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
-				return resize_completed;
+		int nacked_workers = 0;
+		for(i = 0; i < nmoved_workers; i++)
+		{
+			nacked_workers += (acked_workers[i] == 1);
+		}
+		
+		unsigned resize_completed = (nacked_workers == nmoved_workers);
+		int receiver_sched_ctx = sched_ctx;
+		if(resize_completed)
+		{
+			/* if the permission to resize is not allowed by the user don't do it
+			   whatever the application says */
+			if(!((hypervisor.resize[sender_sched_ctx] == 0 || hypervisor.resize[receiver_sched_ctx] == 0) && imposed_resize))
+			{				
+				int j;
+				printf("remove from ctx %d:", sender_sched_ctx);
+				for(j = 0; j < nmoved_workers; j++)
+					printf(" %d", moved_workers[j]);
+				printf("\n");
+				
+				starpu_remove_workers_from_sched_ctx(moved_workers, nmoved_workers, sender_sched_ctx);
+				
+				/* info concerning only the gflops_rate strateg */
+				struct sched_ctx_wrapper *sender_sc_w = &hypervisor.sched_ctx_w[sender_sched_ctx];
+				struct sched_ctx_wrapper *receiver_sc_w = &hypervisor.sched_ctx_w[receiver_sched_ctx];
+				
+				double start_time =  starpu_timing_now();
+				sender_sc_w->start_time = start_time;
+				sender_sc_w->remaining_flops = sender_sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sender_sc_w);
+				_set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
+				
+				receiver_sc_w->start_time = start_time;
+				receiver_sc_w->remaining_flops = receiver_sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(receiver_sc_w);
+				_set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
+				
+				hypervisor.resize[sender_sched_ctx] = 1;
+				hypervisor.resize[receiver_sched_ctx] = 1;
+				/* if the user allowed resizing leave the decisions to the application */
+				if(imposed_resize)  imposed_resize = 0;
+				
+				resize_ack->receiver_sched_ctx = -1;
+				resize_ack->nmoved_workers = 0;
+				free(resize_ack->moved_workers);
+				free(resize_ack->acked_workers);
 			}
 			pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
-			return 0;
+			return resize_completed;
 		}
+		pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
 	}
 	return 0;
 }
 
 void sched_ctx_hypervisor_resize(unsigned sched_ctx, int task_tag)
 {
+	pthread_mutex_lock(&act_hypervisor_mutex);
 	_starpu_htbl_insert_32(&hypervisor.resize_requests[sched_ctx], (uint32_t)task_tag, (void*)sched_ctx);	
+	pthread_mutex_unlock(&act_hypervisor_mutex);
 }
 
 /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */

+ 1 - 1
src/core/perfmodel/perfmodel_history.c

@@ -1015,7 +1015,7 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 		 * of that task and the scheduler should perhaps put it aside */
 		/* Not calibrated enough */
 	{
-		printf("nan pt ca stupid algo nsamples %d\n", entry->nsample);
+//		printf("nan -> nsamples %d\n", entry->nsample);
 		exp = NAN;
 	}
 

+ 1 - 1
src/core/sched_policy.c

@@ -351,7 +351,7 @@ int _starpu_push_task(struct _starpu_job *j)
 /* 				if(nworkers == 0) return _starpu_push_task(j); */
 /* 			} */
 /* 			else */
-//			{
+/* 			{ */
 				_STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->empty_ctx_mutex);
 				starpu_task_list_push_front(&sched_ctx->empty_ctx_tasks, task);
 				_STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->empty_ctx_mutex);