Browse Source

fix init bug, sched_policy.c:_get_next_sched_ctx_to_pop_into() made a recursive call to wait, with no optimisation this lead to stack overflow

Simon Archipoff 12 years ago
parent
commit
0a293ff950
2 changed files with 36 additions and 31 deletions
  1. 26 23
      src/core/sched_policy.c
  2. 10 8
      src/core/workers.c

+ 26 - 23
src/core/sched_policy.c

@@ -544,36 +544,39 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
 
 struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker *worker)
 struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker *worker)
 {
 {
-	struct _starpu_sched_ctx *sched_ctx, *good_sched_ctx = NULL;
-	unsigned smallest_counter =  worker->nsched_ctxs;
-	unsigned i;
-	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+	while(1)
 	{
 	{
-		sched_ctx = worker->sched_ctx[i];
+		struct _starpu_sched_ctx *sched_ctx, *good_sched_ctx = NULL;
+		unsigned smallest_counter =  worker->nsched_ctxs;
+		unsigned i;
 
 
-		if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS && worker->removed_from_ctx[sched_ctx->id])
-			return sched_ctx;
-		if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS &&
-		   sched_ctx->pop_counter[worker->workerid] < worker->nsched_ctxs &&
-		   smallest_counter > sched_ctx->pop_counter[worker->workerid])
-		{
-			good_sched_ctx = sched_ctx;
-			smallest_counter = sched_ctx->pop_counter[worker->workerid];
-		}
-	}
-
-	if(good_sched_ctx == NULL)
-	{
 		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
 		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
 		{
 		{
 			sched_ctx = worker->sched_ctx[i];
 			sched_ctx = worker->sched_ctx[i];
-			if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
-				sched_ctx->pop_counter[worker->workerid] = 0;
+			
+			if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS && worker->removed_from_ctx[sched_ctx->id])
+				return sched_ctx;
+			if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS &&
+			   sched_ctx->pop_counter[worker->workerid] < worker->nsched_ctxs &&
+			   smallest_counter > sched_ctx->pop_counter[worker->workerid])
+			{
+				good_sched_ctx = sched_ctx;
+				smallest_counter = sched_ctx->pop_counter[worker->workerid];
+			}
 		}
 		}
-
-		return _get_next_sched_ctx_to_pop_into(worker);
+		
+		if(good_sched_ctx == NULL)
+		{
+			for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+			{
+				sched_ctx = worker->sched_ctx[i];
+				if(sched_ctx != NULL && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
+					sched_ctx->pop_counter[worker->workerid] = 0;
+			}
+			continue;
+		}
+		return good_sched_ctx;
 	}
 	}
-	return good_sched_ctx;
 }
 }
 
 
 struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
 struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)

+ 10 - 8
src/core/workers.c

@@ -959,17 +959,19 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 	ret = _starpu_build_topology(&config, is_a_sink);
 	ret = _starpu_build_topology(&config, is_a_sink);
 	_starpu_conf_check_environment(config.conf);
 	_starpu_conf_check_environment(config.conf);
 
 
-	/* Launch "basic" workers (ie. non-combined workers) */
-	if (!is_a_sink)
-		_starpu_launch_drivers(&config);
 
 
 	int nworkers = starpu_worker_get_count();
 	int nworkers = starpu_worker_get_count();
 	int workerid_array[nworkers];
 	int workerid_array[nworkers];
-	int i;
-	for(i = 0; i < nworkers; i++)
-	{
-		workerid_array[i] = i;
-	}
+	int i,j;
+	
+	for(i = j = 0; i < nworkers; i++)
+		if(STARPU_CPU_WORKER == starpu_worker_get_type(i))
+			workerid_array[j++] = i;
+	nworkers = j;
+
+	/* Launch "basic" workers (ie. non-combined workers) */
+	if (!is_a_sink)
+		_starpu_launch_drivers(&config);
 
 
 	starpu_combined_worker_assign_workerid(nworkers, workerid_array);
 	starpu_combined_worker_assign_workerid(nworkers, workerid_array);