Преглед на файлове

- modify implementation of omp for/ordered to hide loop counter

Olivier Aumage преди 11 години
родител
ревизия
5b98f34986
променени са 4 файла, в които са добавени 36 реда и са изтрити 12 реда
  1. 2 2
      include/starpu_openmp.h
  2. 21 7
      src/util/openmp_runtime_support.c
  3. 2 0
      src/util/openmp_runtime_support.h
  4. 11 3
      tests/openmp/parallel_for_ordered_01.c

+ 2 - 2
include/starpu_openmp.h

@@ -103,9 +103,9 @@ extern void starpu_omp_for_alt(void (*f)(unsigned long long _begin_i, unsigned l
 extern int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) __STARPU_OMP_NOTHROW;
 extern int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) __STARPU_OMP_NOTHROW;
 
-extern void starpu_omp_ordered_inline_begin(unsigned long long i) __STARPU_OMP_NOTHROW;
+extern void starpu_omp_ordered_inline_begin(void) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_ordered_inline_end(void) __STARPU_OMP_NOTHROW;
-extern void starpu_omp_ordered(void (*f)(unsigned long long _i, void *arg), void *arg, unsigned long long i) __STARPU_OMP_NOTHROW;
+extern void starpu_omp_ordered(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW;
 
 extern void starpu_omp_sections(unsigned long long nb_sections, void (**section_f)(void *arg), void **section_arg, int nowait) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_sections_combined(unsigned long long nb_sections, void (*section_f)(unsigned long long section_num, void *arg), void **section_arg, int nowait) __STARPU_OMP_NOTHROW;

+ 21 - 7
src/util/openmp_runtime_support.c

@@ -1361,7 +1361,7 @@ void starpu_omp_taskgroup(void (*f)(void *arg), void *arg)
 
 static inline void _starpu_omp_for_loop(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task,
 		struct starpu_omp_loop *loop, int first_call,
-		unsigned long long nb_iterations, unsigned long long chunk, int schedule, unsigned long long *_first_i, unsigned long long *_nb_i)
+		unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)
 {
 	*_nb_i = 0;
 	if (schedule == starpu_omp_schedule_static || schedule == starpu_omp_schedule_auto)
@@ -1468,6 +1468,11 @@ static inline void _starpu_omp_for_loop(struct starpu_omp_region *parallel_regio
 		}
 		_starpu_spin_unlock(&parallel_region->lock);
 	}
+	if (ordered)
+	{
+		task->ordered_first_i = *_first_i;
+		task->ordered_nb_i = *_nb_i;
+	}
 }
 
 static inline struct starpu_omp_loop *_starpu_omp_for_get_loop(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task)
@@ -1540,7 +1545,7 @@ int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long
 	struct starpu_omp_region *parallel_region = task->owner_region;
 	struct starpu_omp_loop *loop = _starpu_omp_for_loop_begin(parallel_region, task, ordered);
 
-	_starpu_omp_for_loop(parallel_region, task, loop, 1, nb_iterations, chunk, schedule, _first_i, _nb_i);
+	_starpu_omp_for_loop(parallel_region, task, loop, 1, nb_iterations, chunk, schedule, ordered, _first_i, _nb_i);
 	if (*_nb_i == 0)
 	{
 		_starpu_omp_for_loop_end(parallel_region, task, loop, ordered);
@@ -1554,7 +1559,7 @@ int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long l
 	struct starpu_omp_region *parallel_region = task->owner_region;
 	struct starpu_omp_loop *loop = _starpu_omp_for_loop_begin(parallel_region, task, ordered);
 
-	_starpu_omp_for_loop(parallel_region, task, loop, 0, nb_iterations, chunk, schedule, _first_i, _nb_i);
+	_starpu_omp_for_loop(parallel_region, task, loop, 0, nb_iterations, chunk, schedule, ordered, _first_i, _nb_i);
 	if (*_nb_i == 0)
 	{
 		_starpu_omp_for_loop_end(parallel_region, task, loop, ordered);
@@ -1614,30 +1619,39 @@ void starpu_omp_for_alt(void (*f)(unsigned long long _begin_i, unsigned long lon
 	}
 }
 
-void starpu_omp_ordered(void (*f)(unsigned long long _i, void *arg), void *arg, unsigned long long i)
+void starpu_omp_ordered(void (*f)(void *arg), void *arg)
 {
 	struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
 	struct starpu_omp_region *parallel_region = task->owner_region;
 	struct starpu_omp_loop *loop = _starpu_omp_for_get_loop(parallel_region, task);
+	unsigned long long i;
 
+	STARPU_ASSERT(task->ordered_nb_i > 0);
+	i = task->ordered_first_i;
+	task->ordered_first_i++;
+	task->ordered_nb_i--;
 	_starpu_spin_lock(&loop->ordered_lock);
 	while (i != loop->ordered_iteration)
 	{
 		STARPU_ASSERT(i > loop->ordered_iteration);
 		condition_wait(&loop->ordered_cond, &loop->ordered_lock);
 	}
-	f(i, arg);
+	f(arg);
 	loop->ordered_iteration++;	
 	condition_broadcast(&loop->ordered_cond);
 	_starpu_spin_unlock(&loop->ordered_lock);
 }
 
-void starpu_omp_ordered_inline_begin(unsigned long long i)
+void starpu_omp_ordered_inline_begin(void)
 {
 	struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
 	struct starpu_omp_region *parallel_region = task->owner_region;
 	struct starpu_omp_loop *loop = _starpu_omp_for_get_loop(parallel_region, task);
-
+	unsigned long long i;
+	STARPU_ASSERT(task->ordered_nb_i > 0);
+	i = task->ordered_first_i;
+	task->ordered_first_i++;
+	task->ordered_nb_i--;
 	_starpu_spin_lock(&loop->ordered_lock);
 	while (i != loop->ordered_iteration)
 	{

+ 2 - 0
src/util/openmp_runtime_support.h

@@ -226,6 +226,8 @@ LIST_TYPE(starpu_omp_task,
 	int barrier_count;
 	int single_id;
 	int loop_id;
+	unsigned long long ordered_first_i;
+	unsigned long long ordered_nb_i;
 	int sections_id;
 	struct starpu_omp_data_environment_icvs data_env_icvs;
 	struct starpu_omp_implicit_task_icvs implicit_task_icvs;

+ 11 - 3
tests/openmp/parallel_for_ordered_01.c

@@ -42,13 +42,20 @@ static void omp_destructor(void)
 	starpu_omp_shutdown();
 }
 
-void ordered_f(unsigned long long i, void *arg)
+struct s_ordered_arg
 {
+	const char *msg;
+	unsigned long long i;
+};
+
+void ordered_f(void *_arg)
+{
+	struct s_ordered_arg *arg = _arg;
 	int worker_id;
 	pthread_t tid;
 	tid = pthread_self();
 	worker_id = starpu_worker_get_id();
-	printf("[tid %p] task thread = %d, for [%s] iteration (ordered) %llu\n", (void *)tid, worker_id, (const char *)arg, i);
+	printf("[tid %p] task thread = %d, for [%s] iteration (ordered) %llu\n", (void *)tid, worker_id, arg->msg, arg->i);
 }
 
 void for_g(unsigned long long i, unsigned long long nb_i, void *arg)
@@ -60,8 +67,9 @@ void for_g(unsigned long long i, unsigned long long nb_i, void *arg)
 	printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i);
 	for (; nb_i > 0; i++, nb_i--)
 	{
+		struct s_ordered_arg ordered_arg = { arg, i };
 		array[i] = 1;
-		starpu_omp_ordered(ordered_f, arg, i);
+		starpu_omp_ordered(ordered_f, &ordered_arg);
 	}
 }