소스 검색

fix pop round-robin btw ctxs

Andra Hugo 13 년 전
부모
커밋
c40a9efa62

+ 4 - 4
sched_ctx_hypervisor/src/sched_ctx_hypervisor.c

@@ -700,12 +700,12 @@ static void notify_post_exec_hook(unsigned sched_ctx, int task_tag)
 	{
 		conf_sched_ctx = hypervisor.sched_ctxs[i];
 		pthread_mutex_lock(&hypervisor.conf_mut[conf_sched_ctx]);
-		void *config = _starpu_htbl_search_32(hypervisor.configurations[conf_sched_ctx], (uint32_t)task_tag);
+		void *_config = _starpu_htbl_search_32(hypervisor.configurations[conf_sched_ctx], (uint32_t)task_tag);
 
-		if(config)// && config != hypervisor.configurations[conf_sched_ctx])
+		if(_config)// && config != hypervisor.configurations[conf_sched_ctx])
 		{
-			sched_ctx_hypervisor_set_config(conf_sched_ctx, config);
-			free(config);
+			sched_ctx_hypervisor_set_config(conf_sched_ctx, _config);
+			free(_config);
 			_starpu_htbl_insert_32(&hypervisor.configurations[sched_ctx], (uint32_t)task_tag, NULL);
 		}
 		pthread_mutex_unlock(&hypervisor.conf_mut[conf_sched_ctx]);

+ 7 - 1
src/core/sched_ctx.c

@@ -235,6 +235,12 @@ struct _starpu_sched_ctx*  _starpu_create_sched_ctx(const char *policy_name, int
 			worker->nsched_ctxs++;
 		}
 	}
+
+	int w;
+	for(w = 0; w < STARPU_NMAXWORKERS; w++)
+	{
+		sched_ctx->pop_counter[w] = 0;
+	}
 	
 	return sched_ctx;
 }
@@ -448,7 +454,7 @@ void starpu_delete_sched_ctx(unsigned sched_ctx_id, unsigned inheritor_sched_ctx
 		starpu_add_workers_to_sched_ctx(sched_ctx->workers->workerids, sched_ctx->workers->nworkers, inheritor_sched_ctx_id);
 	}
 
-	if(!_starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx_id))
+	if(!_starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx_id) && !_starpu_wait_for_all_tasks_of_sched_ctx(0))
 		_starpu_delete_sched_ctx(sched_ctx);
 	return;	
 }

+ 5 - 0
src/core/sched_ctx.h

@@ -27,6 +27,7 @@
 #define REQ_RESIZE 0
 #define DO_RESIZE 1
 
+
 struct _starpu_sched_ctx {
 	/* id of the context used in user mode*/
 	unsigned id;
@@ -80,6 +81,10 @@ struct _starpu_sched_ctx {
 
 	/* max GPUs to execute*/	
 	int max_ngpus;
+	
+	/* needed for overlapping contexts to help the workers to 
+	   determine which is the next context to pop tasks from */
+	int pop_counter[STARPU_NMAXWORKERS];
 
 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR
 	/* a structure containing a series of performance counters determining the resize procedure */

+ 53 - 7
src/core/sched_policy.c

@@ -454,6 +454,38 @@ struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
 	return conversion_task;
 }
 
+struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker *worker)
+{
+	struct _starpu_sched_ctx *sched_ctx, *good_sched_ctx = NULL;
+	int smallest_counter =  worker->nsched_ctxs;
+	unsigned i;
+	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+	{
+		sched_ctx = worker->sched_ctx[i];
+		
+		if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS && 
+		   sched_ctx->pop_counter[worker->workerid] < worker->nsched_ctxs && 
+		   smallest_counter > sched_ctx->pop_counter[worker->workerid])
+		{
+			good_sched_ctx = sched_ctx;
+			smallest_counter = sched_ctx->pop_counter[worker->workerid];
+		}
+	}
+
+	if(good_sched_ctx == NULL)
+	{
+		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+		{
+			sched_ctx = worker->sched_ctx[i];
+			if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
+				sched_ctx->pop_counter[worker->workerid] = 0;
+		}
+		
+		return _get_next_sched_ctx_to_pop_into(worker);
+	}
+	return good_sched_ctx;
+}
+
 struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
 {
 	struct starpu_task *task;
@@ -480,27 +512,41 @@ pick:
 		struct _starpu_sched_ctx *sched_ctx;
 		pthread_mutex_t *sched_ctx_mutex;
 		
-		unsigned i;
+		int been_here[STARPU_NMAX_SCHED_CTXS];
+		int i;
 		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+			been_here[i] = 0;
+
+		while(!task)
 		{
-			sched_ctx = worker->sched_ctx[i];
-			
+			if(worker->nsched_ctxs == 1)
+				sched_ctx = _starpu_get_initial_sched_ctx();
+			else
+				sched_ctx = _get_next_sched_ctx_to_pop_into(worker);
 			if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
 			{
 				sched_ctx_mutex = _starpu_get_sched_mutex(sched_ctx, worker->workerid);
 				if(sched_ctx_mutex != NULL)
 				{
 					_STARPU_PTHREAD_MUTEX_LOCK(sched_ctx_mutex);
+					
 					if (sched_ctx->sched_policy && sched_ctx->sched_policy->pop_task)
-					{
 						task = sched_ctx->sched_policy->pop_task(sched_ctx->id);
-						_STARPU_PTHREAD_MUTEX_UNLOCK(sched_ctx_mutex);
-						break;
-					}
+
 					_STARPU_PTHREAD_MUTEX_UNLOCK(sched_ctx_mutex);
+
 				}
 			}
+
+			if((!task && sched_ctx->pop_counter[worker->workerid] == 0 && been_here[sched_ctx->id]) || worker->nsched_ctxs == 1)
+				break;
+			
+			been_here[sched_ctx->id] = 1;
+			
+			sched_ctx->pop_counter[worker->workerid]++;
+
 		}
+
 	  }
 
 #ifdef STARPU_USE_SCHED_CTX_HYPERVISOR

+ 2 - 1
src/core/workers.c

@@ -991,7 +991,8 @@ void _starpu_worker_set_status(int workerid, enum _starpu_worker_status status)
 	config.workers[workerid].status = status;
 }
 
-struct _starpu_sched_ctx* _starpu_get_initial_sched_ctx(void){
+struct _starpu_sched_ctx* _starpu_get_initial_sched_ctx(void)
+{
 	return &config.sched_ctxs[0];
 }
 

+ 1 - 1
src/sched_policies/eager_central_priority_policy.c

@@ -174,7 +174,7 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 
 static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id)
 {
-		struct starpu_task *chosen_task = NULL, *task;
+	struct starpu_task *chosen_task = NULL, *task;
 	unsigned workerid = starpu_worker_get_id();
 	int skipped = 0;