소스 검색

fix case where the number of subcontexts does not divide the number of cpu workers, in which case some cpu workers were not assigned to the subcontexts, and would end-up executing parallel tasks without the prologue being executed first

Olivier Aumage 8 년 전
부모
커밋
a110078dab
1개의 변경된 파일14개의 추가작업 그리고 10개의 파일을 삭제
  1. 14 10
      examples/sched_ctx/parallel_tasks_reuse_handle.c

+ 14 - 10
examples/sched_ctx/parallel_tasks_reuse_handle.c

@@ -17,6 +17,7 @@
 
 #include <starpu.h>
 #include <omp.h>
+#include <pthread.h>
 
 #ifdef STARPU_QUICK_CHECK
 #define NTASKS 64
@@ -28,6 +29,8 @@
 #define LOOPS  10
 #endif
 
+#define N_NESTED_CTXS 2
+
 struct context
 {
 	int ncpus;
@@ -38,6 +41,7 @@ struct context
 /* Helper for the task that will initiate everything */
 void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
 {
+	fprintf(stderr, "%p: %s -->\n", (void*)pthread_self(), __func__);
 	int sched_ctx = *(int *)sched_ctx_;
 	int *cpuids = NULL;
 	int ncpuids = 0;
@@ -50,6 +54,7 @@ void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
 
 	omp_set_num_threads(ncpuids);
 	free(cpuids);
+	fprintf(stderr, "%p: %s <--\n", (void*)pthread_self(), __func__);
 	return;
 }
 
@@ -101,25 +106,24 @@ void parallel_task_init()
 						  0);
 
 	/* Initialize nested contexts */
-	/* WARNING : the number of contexts must be a divisor of the number of available cpus*/
-
-	contexts = malloc(sizeof(struct context)*2);
-	int cpus_per_context = main_context.ncpus/2;
+	contexts = malloc(sizeof(struct context)*N_NESTED_CTXS);
+	int cpus_per_context = main_context.ncpus/N_NESTED_CTXS;
 	int i;
-	for(i = 0; i < 2; i++)
+	for(i = 0; i < N_NESTED_CTXS; i++)
 	{
-		fprintf(stderr, "ncpus %d for context %d \n",cpus_per_context, i);
 		contexts[i].ncpus = cpus_per_context;
+		if (i == N_NESTED_CTXS-1)
+			contexts[i].ncpus += main_context.ncpus%N_NESTED_CTXS;
 		contexts[i].cpus = main_context.cpus+i*cpus_per_context;
 	}
 
-	for(i = 0; i < 2; i++)
+	for(i = 0; i < N_NESTED_CTXS; i++)
 		contexts[i].id = starpu_sched_ctx_create(contexts[i].cpus,
 							 contexts[i].ncpus,"nested_ctx",
 							 STARPU_SCHED_CTX_NESTED,main_context.id,
 							 0);
 
-	for (i = 0; i < 2; i++)
+	for (i = 0; i < N_NESTED_CTXS; i++)
 	{
 		parallel_task_init_one_context(&contexts[i].id);
 	}
@@ -131,7 +135,7 @@ void parallel_task_init()
 void parallel_task_deinit()
 {
 	int i;
-	for (i=0; i<2;i++)
+	for (i=0; i<N_NESTED_CTXS;i++)
 		starpu_sched_ctx_delete(contexts[i].id);
 	free(contexts);
 	free(main_context.cpus);
@@ -174,7 +178,7 @@ int main(int argc, char **argv)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
-	if (starpu_cpu_worker_get_count() < 2)
+	if (starpu_cpu_worker_get_count() < N_NESTED_CTXS)
 	{
 		starpu_shutdown();
 		return 77;