преди 13 години · dad2cac3f5
--- a/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
+++ b/sched_ctx_hypervisor/src/hypervisor_policies/policy_tools.c
@@ -243,6 +243,7 @@ unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigne
 
				 	if(ret != EBUSY)
			
 
				 	{					
			
 
				 		unsigned nworkers_to_move = _get_nworkers_to_move(sender_sched_ctx);
			
 
				+
			
 
				 		if(nworkers_to_move > 0)
			
 
				 		{
			
 
				 			unsigned poor_sched_ctx = STARPU_NMAX_SCHED_CTXS;
			
@@ -260,7 +261,6 @@ unsigned _resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigne
 
				 					nworkers_to_move = nworkers > config->max_nworkers ? 0 : (config->max_nworkers - nworkers+nshared_workers);
			
 
				 				if(nworkers_to_move == 0) poor_sched_ctx = STARPU_NMAX_SCHED_CTXS;
			
 
				 			}
			
 
				-
			
 
				 			if(poor_sched_ctx != STARPU_NMAX_SCHED_CTXS)
			
 
				 			{						
			
 
				 				int *workers_to_move = _get_first_workers(sender_sched_ctx, &nworkers_to_move, STARPU_ALL);
			
--- a/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
+++ b/sched_ctx_hypervisor/src/sched_ctx_hypervisor.c
@@ -471,7 +471,7 @@ double sched_ctx_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sched_c
 
				 
			
 
				 static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
			
 
				 {
			
 
				-	if(!starpu_worker_belongs_to_sched_ctx(worker, sched_ctx))
			
 
				+	if(worker != -1 && !starpu_worker_belongs_to_sched_ctx(worker, sched_ctx))
			
 
				 		return 0;
			
 
				 
			
 
				 	struct resize_ack *resize_ack = NULL;
			
@@ -497,7 +497,6 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 
				 							break;
			
 
				 						}
			
 
				 				}
			
 
				-
			
 
				 				if(only_remove || 
			
 
				 				   (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == sched_ctx))
			
 
				 				{
			
@@ -517,12 +516,11 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 
				 	int *acked_workers = resize_ack->acked_workers;
			
 
				 	int i;
			
 
				 	
			
 
				-	if(worker != -1)
			
 
				+	int ret = pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
			
 
				+	if(ret != EBUSY)
			
 
				 	{
			
 
				-		int ret = pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
			
 
				-		if(ret != EBUSY)
			
 
				+		if(worker != -1)
			
 
				 		{
			
 
				-
			
 
				 			for(i = 0; i < nmoved_workers; i++)
			
 
				 			{
			
 
				 				int moved_worker = moved_workers[i];
			
@@ -531,65 +529,66 @@ static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
 
				 					acked_workers[i] = 1;
			
 
				 				}
			
 
				 			}
			
 
				+		}
			
 
				 			
			
 
				-			int nacked_workers = 0;
			
 
				-			for(i = 0; i < nmoved_workers; i++)
			
 
				-			{
			
 
				-				nacked_workers += (acked_workers[i] == 1);
			
 
				-			}
			
 
				-			
			
 
				-			unsigned resize_completed = (nacked_workers == nmoved_workers);
			
 
				-			int receiver_sched_ctx = sched_ctx;
			
 
				-			if(resize_completed)
			
 
				-			{
			
 
				-				/* if the permission to resize is not allowed by the user don't do it
			
 
				-				   whatever the application says */
			
 
				-				if(!((hypervisor.resize[sender_sched_ctx] == 0 || hypervisor.resize[receiver_sched_ctx] == 0) && imposed_resize))
			
 
				-				{				
			
 
				-					int j;
			
 
				-					printf("remove from ctx %d:", sender_sched_ctx);
			
 
				-					for(j = 0; j < nmoved_workers; j++)
			
 
				-						printf(" %d", moved_workers[j]);
			
 
				-					printf("\n");
			
 
				-					
			
 
				-					starpu_remove_workers_from_sched_ctx(moved_workers, nmoved_workers, sender_sched_ctx);
			
 
				-					
			
 
				-					/* info concerning only the gflops_rate strateg */
			
 
				-					struct sched_ctx_wrapper *sender_sc_w = &hypervisor.sched_ctx_w[sender_sched_ctx];
			
 
				-					struct sched_ctx_wrapper *receiver_sc_w = &hypervisor.sched_ctx_w[receiver_sched_ctx];
			
 
				-					
			
 
				-					double start_time =  starpu_timing_now();
			
 
				-					sender_sc_w->start_time = start_time;
			
 
				-					sender_sc_w->remaining_flops = sender_sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sender_sc_w);
			
 
				-					_set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
			
 
				-					
			
 
				-					receiver_sc_w->start_time = start_time;
			
 
				-					receiver_sc_w->remaining_flops = receiver_sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(receiver_sc_w);
			
 
				-					_set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
			
 
				-					
			
 
				-					hypervisor.resize[sender_sched_ctx] = 1;
			
 
				-					hypervisor.resize[receiver_sched_ctx] = 1;
			
 
				-					/* if the user allowed resizing leave the decisions to the application */
			
 
				-					if(imposed_resize)  imposed_resize = 0;
			
 
				-					
			
 
				-					resize_ack->receiver_sched_ctx = -1;
			
 
				-					resize_ack->nmoved_workers = 0;
			
 
				-					free(resize_ack->moved_workers);
			
 
				-					free(resize_ack->acked_workers);
			
 
				-				}
			
 
				-				pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
			
 
				-				return resize_completed;
			
 
				+		int nacked_workers = 0;
			
 
				+		for(i = 0; i < nmoved_workers; i++)
			
 
				+		{
			
 
				+			nacked_workers += (acked_workers[i] == 1);
			
 
				+		}
			
 
				+		
			
 
				+		unsigned resize_completed = (nacked_workers == nmoved_workers);
			
 
				+		int receiver_sched_ctx = sched_ctx;
			
 
				+		if(resize_completed)
			
 
				+		{
			
 
				+			/* if the permission to resize is not allowed by the user don't do it
			
 
				+			   whatever the application says */
			
 
				+			if(!((hypervisor.resize[sender_sched_ctx] == 0 || hypervisor.resize[receiver_sched_ctx] == 0) && imposed_resize))
			
 
				+			{				
			
 
				+				int j;
			
 
				+				printf("remove from ctx %d:", sender_sched_ctx);
			
 
				+				for(j = 0; j < nmoved_workers; j++)
			
 
				+					printf(" %d", moved_workers[j]);
			
 
				+				printf("\n");
			
 
				+				
			
 
				+				starpu_remove_workers_from_sched_ctx(moved_workers, nmoved_workers, sender_sched_ctx);
			
 
				+				
			
 
				+				/* info concerning only the gflops_rate strateg */
			
 
				+				struct sched_ctx_wrapper *sender_sc_w = &hypervisor.sched_ctx_w[sender_sched_ctx];
			
 
				+				struct sched_ctx_wrapper *receiver_sc_w = &hypervisor.sched_ctx_w[receiver_sched_ctx];
			
 
				+				
			
 
				+				double start_time =  starpu_timing_now();
			
 
				+				sender_sc_w->start_time = start_time;
			
 
				+				sender_sc_w->remaining_flops = sender_sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(sender_sc_w);
			
 
				+				_set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
			
 
				+				
			
 
				+				receiver_sc_w->start_time = start_time;
			
 
				+				receiver_sc_w->remaining_flops = receiver_sc_w->remaining_flops - sched_ctx_hypervisor_get_elapsed_flops_per_sched_ctx(receiver_sc_w);
			
 
				+				_set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
			
 
				+				
			
 
				+				hypervisor.resize[sender_sched_ctx] = 1;
			
 
				+				hypervisor.resize[receiver_sched_ctx] = 1;
			
 
				+				/* if the user allowed resizing leave the decisions to the application */
			
 
				+				if(imposed_resize)  imposed_resize = 0;
			
 
				+				
			
 
				+				resize_ack->receiver_sched_ctx = -1;
			
 
				+				resize_ack->nmoved_workers = 0;
			
 
				+				free(resize_ack->moved_workers);
			
 
				+				free(resize_ack->acked_workers);
			
 
				 			}
			
 
				 			pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
			
 
				-			return 0;
			
 
				+			return resize_completed;
			
 
				 		}
			
 
				+		pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
			
 
				 	}
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				 void sched_ctx_hypervisor_resize(unsigned sched_ctx, int task_tag)
			
 
				 {
			
 
				+	pthread_mutex_lock(&act_hypervisor_mutex);
			
 
				 	_starpu_htbl_insert_32(&hypervisor.resize_requests[sched_ctx], (uint32_t)task_tag, (void*)sched_ctx);	
			
 
				+	pthread_mutex_unlock(&act_hypervisor_mutex);
			
 
				 }
			
 
				 
			
 
				 /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1015,7 +1015,7 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 
				 		 * of that task and the scheduler should perhaps put it aside */
			
 
				 		/* Not calibrated enough */
			
 
				 	{
			
 
				-		printf("nan pt ca stupid algo nsamples %d\n", entry->nsample);
			
 
				+//		printf("nan -> nsamples %d\n", entry->nsample);
			
 
				 		exp = NAN;
			
 
				 	}
			
 
				 
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -351,7 +351,7 @@ int _starpu_push_task(struct _starpu_job *j)
 
				 /* 				if(nworkers == 0) return _starpu_push_task(j); */
			
 
				 /* 			} */
			
 
				 /* 			else */
			
 
				-//			{
			
 
				+/* 			{ */
			
 
				 				_STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->empty_ctx_mutex);
			
 
				 				starpu_task_list_push_front(&sched_ctx->empty_ctx_tasks, task);
			
 
				 				_STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->empty_ctx_mutex);