Просмотр исходного кода

fix case where the number of subcontexts does not divide the number of cpu workers, in which case some cpu workers were not assigned to the subcontexts, and would end-up executing parallel tasks without the prologue being executed first

Olivier Aumage лет назад: 8
Родитель
Сommit
a110078dab
1 измененных файлов с 14 добавлено и 10 удалено
  1. 14 10
      examples/sched_ctx/parallel_tasks_reuse_handle.c

+ 14 - 10
examples/sched_ctx/parallel_tasks_reuse_handle.c

@@ -17,6 +17,7 @@
 
 #include <starpu.h>
 #include <omp.h>
+#include <pthread.h>
 
 #ifdef STARPU_QUICK_CHECK
 #define NTASKS 64
@@ -28,6 +29,8 @@
 #define LOOPS  10
 #endif
 
+#define N_NESTED_CTXS 2
+
 struct context
 {
 	int ncpus;
@@ -38,6 +41,7 @@ struct context
 /* Helper for the task that will initiate everything */
 void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
 {
+	fprintf(stderr, "%p: %s -->\n", (void*)pthread_self(), __func__);
 	int sched_ctx = *(int *)sched_ctx_;
 	int *cpuids = NULL;
 	int ncpuids = 0;
@@ -50,6 +54,7 @@ void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
 
 	omp_set_num_threads(ncpuids);
 	free(cpuids);
+	fprintf(stderr, "%p: %s <--\n", (void*)pthread_self(), __func__);
 	return;
 }
 
@@ -101,25 +106,24 @@ void parallel_task_init()
 						  0);
 
 	/* Initialize nested contexts */
-	/* WARNING : the number of contexts must be a divisor of the number of available cpus*/
-
-	contexts = malloc(sizeof(struct context)*2);
-	int cpus_per_context = main_context.ncpus/2;
+	contexts = malloc(sizeof(struct context)*N_NESTED_CTXS);
+	int cpus_per_context = main_context.ncpus/N_NESTED_CTXS;
 	int i;
-	for(i = 0; i < 2; i++)
+	for(i = 0; i < N_NESTED_CTXS; i++)
 	{
-		fprintf(stderr, "ncpus %d for context %d \n",cpus_per_context, i);
 		contexts[i].ncpus = cpus_per_context;
+		if (i == N_NESTED_CTXS-1)
+			contexts[i].ncpus += main_context.ncpus%N_NESTED_CTXS;
 		contexts[i].cpus = main_context.cpus+i*cpus_per_context;
 	}
 
-	for(i = 0; i < 2; i++)
+	for(i = 0; i < N_NESTED_CTXS; i++)
 		contexts[i].id = starpu_sched_ctx_create(contexts[i].cpus,
 							 contexts[i].ncpus,"nested_ctx",
 							 STARPU_SCHED_CTX_NESTED,main_context.id,
 							 0);
 
-	for (i = 0; i < 2; i++)
+	for (i = 0; i < N_NESTED_CTXS; i++)
 	{
 		parallel_task_init_one_context(&contexts[i].id);
 	}
@@ -131,7 +135,7 @@ void parallel_task_init()
 void parallel_task_deinit()
 {
 	int i;
-	for (i=0; i<2;i++)
+	for (i=0; i<N_NESTED_CTXS;i++)
 		starpu_sched_ctx_delete(contexts[i].id);
 	free(contexts);
 	free(main_context.cpus);
@@ -174,7 +178,7 @@ int main(int argc, char **argv)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
-	if (starpu_cpu_worker_get_count() < 2)
+	if (starpu_cpu_worker_get_count() < N_NESTED_CTXS)
 	{
 		starpu_shutdown();
 		return 77;