Explorar el Código

- implement 'guided' schedule of omp for
- update test cases

Olivier Aumage hace 11 años
padre
commit
7ba19ce15d
Se han modificado 3 ficheros con 109 adiciones y 12 borrados
  1. 36 10
      src/util/openmp_runtime_support.c
  2. 64 1
      tests/openmp/parallel_for_01.c
  3. 9 1
      tests/openmp/parallel_for_02.c

+ 36 - 10
src/util/openmp_runtime_support.c

@@ -1009,9 +1009,7 @@ void starpu_omp_critical_inline_begin(const char *name)
 
 void starpu_omp_critical_inline_end(const char *name)
 {
-	struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
 	struct starpu_omp_critical *critical = NULL;
-	struct starpu_omp_task_link link;
 
 	if (name)
 	{
@@ -1303,24 +1301,52 @@ void starpu_omp_for(void (*f)(unsigned long _first_i, unsigned long _nb_i, void
 			if (loop->next_iteration >= nb_iterations)
 				break;
 			first_i = loop->next_iteration;
-			loop->next_iteration += chunk;
-			_starpu_spin_unlock(&parallel_region->lock);
-
-			if (first_i + chunk <= nb_iterations)
+			if (first_i + chunk > nb_iterations)
 			{
-				nb_i = chunk;
+				nb_i = nb_iterations - first_i;
 			}
 			else
 			{
-				nb_i = nb_iterations - first_i;
+				nb_i = chunk;
 			}
+			loop->next_iteration += nb_i;
+			_starpu_spin_unlock(&parallel_region->lock);
 			f(first_i, nb_i, arg);
 		}
 	}
 	else if (schedule == starpu_omp_schedule_guided)
 	{
-		/* TODO: implement omp_schedule_guided */
-		_STARPU_ERROR("omp for / guided schedule not implemented\n");
+		if (chunk == 0)
+		{
+			chunk = 1;
+		}
+		for (;;)
+		{
+			unsigned long first_i;
+			unsigned long nb_i;
+
+			_starpu_spin_lock(&parallel_region->lock);
+			/* upon exiting the loop, the parallel_region-lock will already be held
+			 * for performing loop completion */
+			if (loop->next_iteration >= nb_iterations)
+				break;
+			first_i = loop->next_iteration;
+			nb_i = (nb_iterations - first_i)/parallel_region->nb_threads;
+			if (nb_i < chunk)
+			{
+				if (first_i+chunk > nb_iterations)
+				{
+					nb_i = nb_iterations - first_i;
+				}
+				else
+				{
+					nb_i = chunk;
+				}
+			}
+			loop->next_iteration += nb_i;
+			_starpu_spin_unlock(&parallel_region->lock);
+			f(first_i, nb_i, arg);
+		}
 	}
 
 	loop->nb_completed_threads++;

+ 64 - 1
tests/openmp/parallel_for_01.c

@@ -101,7 +101,7 @@ void parallel_region_3_f(void *buffers[], void *args)
 	tid = pthread_self();
 	worker_id = starpu_worker_get_id();
 	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
-	starpu_omp_for(for_g, (void*)"dynamic", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 0);
+	starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 0);
 }
 
 static struct starpu_codelet parallel_region_3_cl =
@@ -112,11 +112,74 @@ static struct starpu_codelet parallel_region_3_cl =
 
 };
 
+void parallel_region_4_f(void *buffers[], void *args)
+{
+	(void) buffers;
+	(void) args;
+	int worker_id;
+	pthread_t tid;
+	tid = pthread_self();
+	worker_id = starpu_worker_get_id();
+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
+	starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 0, 0);
+}
+
+static struct starpu_codelet parallel_region_4_cl =
+{
+	.cpu_funcs    = { parallel_region_4_f, NULL },
+	.where        = STARPU_CPU,
+	.nbuffers     = 0
+
+};
+
+void parallel_region_5_f(void *buffers[], void *args)
+{
+	(void) buffers;
+	(void) args;
+	int worker_id;
+	pthread_t tid;
+	tid = pthread_self();
+	worker_id = starpu_worker_get_id();
+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
+	starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 0);
+}
+
+static struct starpu_codelet parallel_region_5_cl =
+{
+	.cpu_funcs    = { parallel_region_5_f, NULL },
+	.where        = STARPU_CPU,
+	.nbuffers     = 0
+
+};
+
+void parallel_region_6_f(void *buffers[], void *args)
+{
+	(void) buffers;
+	(void) args;
+	int worker_id;
+	pthread_t tid;
+	tid = pthread_self();
+	worker_id = starpu_worker_get_id();
+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
+	starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 0);
+}
+
+static struct starpu_codelet parallel_region_6_cl =
+{
+	.cpu_funcs    = { parallel_region_6_f, NULL },
+	.where        = STARPU_CPU,
+	.nbuffers     = 0
+
+};
+
 int
 main (int argc, char *argv[]) {
 	starpu_omp_parallel_region(&parallel_region_1_cl, NULL);
 	starpu_omp_parallel_region(&parallel_region_2_cl, NULL);
 	starpu_omp_parallel_region(&parallel_region_3_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_4_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_5_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_6_cl, NULL);
 	return 0;
 }
 #endif

+ 9 - 1
tests/openmp/parallel_for_02.c

@@ -64,8 +64,16 @@ void parallel_region_1_f(void *buffers[], void *args)
 	starpu_omp_for(for_g, (void*)"static chunk", NB_ITERS, CHUNK, starpu_omp_sched_static, 0, 1);
 	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
 	starpu_omp_for(for_g, (void*)"static nochunk", NB_ITERS, 0, starpu_omp_sched_static, 0, 1);
+	
 	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
-	starpu_omp_for(for_g, (void*)"dynamic", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 1);
+	starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 1);
+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
+	starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 0, 1);
+
+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
+	starpu_omp_for(for_g, (void*)"guided chunk", NB_ITERS, CHUNK, starpu_omp_sched_guided, 0, 1);
+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
+	starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 1);
 }
 
 static struct starpu_codelet parallel_region_1_cl =