Browse Source

- change 'omp for' iteration space to 'unsigned long long'
- add basic support for final tasks, included tasks, merged tasks
- add inline 'omp for' variants returning begin/end instead of first/nb
- add support for data handles in omp_parallel_region/omp_task_region
- update test cases

Olivier Aumage 11 years ago
parent
commit
b3315174c6

+ 9 - 7
include/starpu_openmp.h

@@ -51,8 +51,8 @@ extern "C"
 
 extern int starpu_omp_init(void) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_shutdown(void) __STARPU_OMP_NOTHROW;
-extern void starpu_omp_parallel_region(const struct starpu_codelet * const parallel_region_cl, void * const parallel_region_cl_arg) __STARPU_OMP_NOTHROW;
-extern void starpu_omp_task_region(const struct starpu_codelet * const _task_region_cl,
+extern void starpu_omp_parallel_region(const struct starpu_codelet * const parallel_region_cl, starpu_data_handle_t *handles, void * const parallel_region_cl_arg) __STARPU_OMP_NOTHROW;
+extern void starpu_omp_task_region(const struct starpu_codelet * const _task_region_cl, starpu_data_handle_t *handles,
 		void * const task_region_cl_arg,
 		int if_clause, int final_clause, int untied_clause, int mergeable_clause) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_barrier(void) __STARPU_OMP_NOTHROW;
@@ -65,12 +65,14 @@ extern void starpu_omp_critical_inline_begin(const char *name) __STARPU_OMP_NOTH
 extern void starpu_omp_critical_inline_end(const char *name) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_taskwait(void) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_taskgroup(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW;
-extern void starpu_omp_for(void (*f)(unsigned long _first_i, unsigned long _nb_i, void *arg), void *arg, unsigned long nb_iterations, unsigned long chunk, int schedule, int ordered, int nowait) __STARPU_OMP_NOTHROW;
-extern int starpu_omp_for_inline_first(unsigned long nb_iterations, unsigned long chunk, int schedule, int ordered, unsigned long *_first_i, unsigned long *_nb_i) __STARPU_OMP_NOTHROW;
-extern int starpu_omp_for_inline_next(unsigned long nb_iterations, unsigned long chunk, int schedule, int ordered, unsigned long *_first_i, unsigned long *_nb_i) __STARPU_OMP_NOTHROW;
-extern void starpu_omp_ordered_inline_begin(unsigned long i) __STARPU_OMP_NOTHROW;
+extern void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) __STARPU_OMP_NOTHROW;
+extern int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) __STARPU_OMP_NOTHROW;
+extern int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) __STARPU_OMP_NOTHROW;
+extern int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i);
+extern int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i);
+extern void starpu_omp_ordered_inline_begin(unsigned long long i) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_ordered_inline_end(void) __STARPU_OMP_NOTHROW;
-extern void starpu_omp_ordered(void (*f)(unsigned long _i, void *arg), void *arg, unsigned long i) __STARPU_OMP_NOTHROW;
+extern void starpu_omp_ordered(void (*f)(unsigned long long _i, void *arg), void *arg, unsigned long long i) __STARPU_OMP_NOTHROW;
 
 extern void starpu_omp_set_num_threads(int threads) __STARPU_OMP_NOTHROW;
 extern int starpu_omp_get_num_threads() __STARPU_OMP_NOTHROW;

+ 85 - 35
src/util/openmp_runtime_support.c

@@ -652,7 +652,7 @@ void starpu_omp_shutdown(void)
 	STARPU_PTHREAD_KEY_DELETE(omp_thread_key);
 }
 
-void starpu_omp_parallel_region(const struct starpu_codelet * const _parallel_region_cl,
+void starpu_omp_parallel_region(const struct starpu_codelet * const _parallel_region_cl, starpu_data_handle_t *handles,
 		void * const parallel_region_cl_arg)
 {
 	struct starpu_omp_thread *master_thread = STARPU_PTHREAD_GETSPECIFIC(omp_thread_key);
@@ -757,6 +757,13 @@ void starpu_omp_parallel_region(const struct starpu_codelet * const _parallel_re
 
 		implicit_task->starpu_task = starpu_task_create();
 		implicit_task->starpu_task->cl = &implicit_task->cl;
+		{
+			unsigned i;
+			for (i = 0; i < implicit_task->cl.nbuffers; i++)
+			{
+				implicit_task->starpu_task->handles[i] = handles[i];
+			}
+		}
 		implicit_task->starpu_task->cl_arg = parallel_region_cl_arg;
 		implicit_task->starpu_task->omp_task = implicit_task;
 		implicit_task->starpu_task->workerid = implicit_task->owner_thread->worker->workerid;
@@ -1115,7 +1122,7 @@ static void explicit_task__destroy_callback(void *_task)
 	destroy_omp_task_struct(task);
 }
 
-void starpu_omp_task_region(const struct starpu_codelet * const _task_region_cl,
+void starpu_omp_task_region(const struct starpu_codelet * const _task_region_cl, starpu_data_handle_t *handles,
 		void * const task_region_cl_arg,
 		int if_clause, int final_clause, int untied_clause, int mergeable_clause)
 {
@@ -1151,9 +1158,36 @@ void starpu_omp_task_region(const struct starpu_codelet * const _task_region_cl,
 	}
 	if (is_merged)
 	{
-		struct starpu_codelet task_region_cl = *_task_region_cl;
-		(void)task_region_cl;
-		_STARPU_ERROR("omp merged task unimplemented\n");
+		/* note: no need to backup/restore ICVs for merged tasks, merged tasks use the data environment of the caller */
+		unsigned i;
+		for (i = 0; i < _task_region_cl->nbuffers; i++)
+		{
+			ret = starpu_data_acquire(handles[i], _task_region_cl->modes[i]);
+			STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
+		}
+		void (*f)(void **starpu_buffers, void *starpu_cl_arg) = _task_region_cl->cpu_funcs[0];
+		f((void**)handles, task_region_cl_arg);
+		for (i = 0; i < _task_region_cl->nbuffers; i++)
+		{
+			starpu_data_release(handles[i]);
+		}
+	}
+	else if (is_included)
+	{
+		/* TODO: backup current ICVs and setup new ICVs for the included task */
+		unsigned i;
+		for (i = 0; i < _task_region_cl->nbuffers; i++)
+		{
+			ret = starpu_data_acquire(handles[i], _task_region_cl->modes[i]);
+			STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
+		}
+		void (*f)(void **starpu_buffers, void *starpu_cl_arg) = _task_region_cl->cpu_funcs[0];
+		f((void**)handles, task_region_cl_arg);
+		for (i = 0; i < _task_region_cl->nbuffers; i++)
+		{
+			starpu_data_release(handles[i]);
+		}
+		/* TODO: restore backuped ICVs */
 	}
 	else
 	{
@@ -1185,6 +1219,13 @@ void starpu_omp_task_region(const struct starpu_codelet * const _task_region_cl,
 		generated_task->starpu_task = starpu_task_create();
 		generated_task->starpu_task->cl = &generated_task->cl;
 		generated_task->starpu_task->cl_arg = task_region_cl_arg;
+		{
+			unsigned i;
+			for (i = 0; i < generated_task->cl.nbuffers; i++)
+			{
+				generated_task->starpu_task->handles[i] = handles[i];
+			}
+		}
 		generated_task->starpu_task->omp_task = generated_task;
 		_starpu_task_set_omp_cleanup_callback(generated_task->starpu_task, explicit_task__destroy_callback, generated_task);
 		/* if the task is tied, execute_on_a_specific_worker will be changed to 1
@@ -1192,30 +1233,23 @@ void starpu_omp_task_region(const struct starpu_codelet * const _task_region_cl,
 		 * which worker thread has been selected */
 		generated_task->starpu_task->execute_on_a_specific_worker = 0;
 
-		if (is_included)
+		(void)STARPU_ATOMIC_ADD(&generating_task->child_task_count, 1);
+		(void)STARPU_ATOMIC_ADD(&parallel_region->bound_explicit_task_count, 1);
+		if (generated_task->task_group)
 		{
-			_STARPU_ERROR("omp included task unimplemented\n");
+			(void)STARPU_ATOMIC_ADD(&generated_task->task_group->descendent_task_count, 1);
 		}
-		else
+		if (is_undeferred)
 		{
-			(void)STARPU_ATOMIC_ADD(&generating_task->child_task_count, 1);
-			(void)STARPU_ATOMIC_ADD(&parallel_region->bound_explicit_task_count, 1);
-			if (generated_task->task_group)
-			{
-				(void)STARPU_ATOMIC_ADD(&generated_task->task_group->descendent_task_count, 1);
-			}
-			if (is_undeferred)
-			{
-				_starpu_task_prepare_for_continuation();
-				starpu_task_declare_deps_array(generating_task->starpu_task, 1,
-						&generated_task->starpu_task);
-			}
-			ret = starpu_task_submit(generated_task->starpu_task);
-			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-			if (is_undeferred)
-			{
-				starpu_omp_task_preempt();
-			}
+			_starpu_task_prepare_for_continuation();
+			starpu_task_declare_deps_array(generating_task->starpu_task, 1,
+					&generated_task->starpu_task);
+		}
+		ret = starpu_task_submit(generated_task->starpu_task);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+		if (is_undeferred)
+		{
+			starpu_omp_task_preempt();
 		}
 	}
 }
@@ -1276,7 +1310,7 @@ void starpu_omp_taskgroup(void (*f)(void *arg), void *arg)
 
 static inline void _starpu_omp_for_loop(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task,
 		struct starpu_omp_loop *loop, int first_call,
-		unsigned long nb_iterations, unsigned long chunk, int schedule, unsigned long *_first_i, unsigned long *_nb_i)
+		unsigned long long nb_iterations, unsigned long long chunk, int schedule, unsigned long long *_first_i, unsigned long long *_nb_i)
 {
 	*_nb_i = 0;
 	if (schedule == starpu_omp_schedule_static || schedule == starpu_omp_schedule_auto)
@@ -1310,7 +1344,7 @@ static inline void _starpu_omp_for_loop(struct starpu_omp_region *parallel_regio
 			{
 				*_nb_i = nb_iterations / parallel_region->nb_threads;
 				*_first_i = (unsigned)task->rank * (*_nb_i);
-				unsigned long remainder = nb_iterations % parallel_region->nb_threads;
+				unsigned long long remainder = nb_iterations % parallel_region->nb_threads;
 
 				if (remainder > 0)
 				{
@@ -1449,7 +1483,7 @@ static inline void _starpu_omp_for_loop_end(struct starpu_omp_region *parallel_r
 	task->loop_id++;
 }
 
-int starpu_omp_for_inline_first(unsigned long nb_iterations, unsigned long chunk, int schedule, int ordered, unsigned long *_first_i, unsigned long *_nb_i)
+int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)
 {
 	struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
 	struct starpu_omp_region *parallel_region = task->owner_region;
@@ -1463,7 +1497,7 @@ int starpu_omp_for_inline_first(unsigned long nb_iterations, unsigned long chunk
 	return (*_nb_i != 0);
 }
 
-int starpu_omp_for_inline_next(unsigned long nb_iterations, unsigned long chunk, int schedule, int ordered, unsigned long *_first_i, unsigned long *_nb_i)
+int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)
 {
 	struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
 	struct starpu_omp_region *parallel_region = task->owner_region;
@@ -1477,10 +1511,26 @@ int starpu_omp_for_inline_next(unsigned long nb_iterations, unsigned long chunk,
 	return (*_nb_i != 0);
 }
 
-void starpu_omp_for(void (*f)(unsigned long _first_i, unsigned long _nb_i, void *arg), void *arg, unsigned long nb_iterations, unsigned long chunk, int schedule, int ordered, int nowait)
+int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i)
+{
+	unsigned long long nb_i;
+	int end = starpu_omp_for_inline_first(nb_iterations, chunk, schedule, ordered, _begin_i, &nb_i);
+	*_end_i = *_begin_i + nb_i;
+	return end;
+}
+
+int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i)
+{
+	unsigned long long nb_i;
+	int end = starpu_omp_for_inline_next(nb_iterations, chunk, schedule, ordered, _begin_i, &nb_i);
+	*_end_i = *_begin_i + nb_i;
+	return end;
+}
+
+void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait)
 {
-	unsigned long _first_i = 0;
-	unsigned long _nb_i = 0;
+	unsigned long long _first_i = 0;
+	unsigned long long _nb_i = 0;
 	if (starpu_omp_for_inline_first(nb_iterations, chunk, schedule, ordered, &_first_i, &_nb_i))
 	{
 		do
@@ -1495,7 +1545,7 @@ void starpu_omp_for(void (*f)(unsigned long _first_i, unsigned long _nb_i, void
 	}
 }
 
-void starpu_omp_ordered(void (*f)(unsigned long _i, void *arg), void *arg, unsigned long i)
+void starpu_omp_ordered(void (*f)(unsigned long long _i, void *arg), void *arg, unsigned long long i)
 {
 	struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
 	struct starpu_omp_region *parallel_region = task->owner_region;
@@ -1513,7 +1563,7 @@ void starpu_omp_ordered(void (*f)(unsigned long _i, void *arg), void *arg, unsig
 	_starpu_spin_unlock(&loop->ordered_lock);
 }
 
-void starpu_omp_ordered_inline_begin(unsigned long i)
+void starpu_omp_ordered_inline_begin(unsigned long long i)
 {
 	struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
 	struct starpu_omp_region *parallel_region = task->owner_region;

+ 2 - 2
src/util/openmp_runtime_support.h

@@ -278,12 +278,12 @@ LIST_TYPE(starpu_omp_thread,
 struct starpu_omp_loop
 {
 	int id;
-	unsigned long next_iteration;
+	unsigned long long next_iteration;
 	int nb_completed_threads;
 	struct starpu_omp_loop *next_loop;
 	struct _starpu_spinlock ordered_lock;
 	struct starpu_omp_condition ordered_cond;
-	unsigned long ordered_iteration;
+	unsigned long long ordered_iteration;
 };
 
 struct starpu_omp_region

+ 1 - 1
tests/openmp/parallel_01.c

@@ -59,7 +59,7 @@ static struct starpu_codelet parallel_region_cl =
 
 int
 main (int argc, char *argv[]) {
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	return 0;
 }
 #endif

+ 2 - 2
tests/openmp/parallel_02.c

@@ -67,7 +67,7 @@ void parallel_region_1_f(void *buffers[], void *args)
 	worker_id = starpu_worker_get_id();
 	printf("[tid %p] parallel region 1: task thread = %d\n", (void *)tid, worker_id);
 
-	starpu_omp_parallel_region(&parallel_region_2_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_2_cl, NULL, NULL);
 }
 
 static struct starpu_codelet parallel_region_1_cl =
@@ -80,7 +80,7 @@ static struct starpu_codelet parallel_region_1_cl =
 
 int
 main (int argc, char *argv[]) {
-	starpu_omp_parallel_region(&parallel_region_1_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_1_cl, NULL, NULL);
 	return 0;
 }
 #endif

+ 2 - 2
tests/openmp/parallel_03.c

@@ -59,8 +59,8 @@ static struct starpu_codelet parallel_region_cl =
 
 int
 main (int argc, char *argv[]) {
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	return 0;
 }
 #endif

+ 1 - 1
tests/openmp/parallel_barrier_01.c

@@ -68,7 +68,7 @@ int
 main (int argc, char *argv[]) {
 	pthread_t tid;
 	tid = pthread_self();
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	return 0;
 }
 #endif

+ 2 - 2
tests/openmp/parallel_critical_01.c

@@ -77,9 +77,9 @@ main (int argc, char *argv[]) {
 	pthread_t tid;
 	tid = pthread_self();
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
 	return 0;
 }

+ 2 - 2
tests/openmp/parallel_critical_inline_01.c

@@ -80,9 +80,9 @@ main (int argc, char *argv[]) {
 	pthread_t tid;
 	tid = pthread_self();
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
 	return 0;
 }

+ 2 - 2
tests/openmp/parallel_critical_named_01.c

@@ -87,9 +87,9 @@ main (int argc, char *argv[]) {
 	pthread_t tid;
 	tid = pthread_self();
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
 	return 0;
 }

+ 2 - 2
tests/openmp/parallel_critical_named_inline_01.c

@@ -80,9 +80,9 @@ main (int argc, char *argv[]) {
 	pthread_t tid;
 	tid = pthread_self();
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
 	return 0;
 }

+ 12 - 12
tests/openmp/parallel_for_01.c

@@ -27,7 +27,7 @@ int main(int argc, char **argv)
 #else
 #define NB_ITERS 256
 #define CHUNK 16
-unsigned long array[NB_ITERS];
+unsigned long long array[NB_ITERS];
 
 __attribute__((constructor))
 static void omp_constructor(void)
@@ -42,13 +42,13 @@ static void omp_destructor(void)
 	starpu_omp_shutdown();
 }
 
-void for_g(unsigned long i, unsigned long nb_i, void *arg)
+void for_g(unsigned long long i, unsigned long long nb_i, void *arg)
 {
 	int worker_id;
 	pthread_t tid;
 	tid = pthread_self();
 	worker_id = starpu_worker_get_id();
-	printf("[tid %p] task thread = %d, for [%s] iterations first=%lu:nb=%lu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i);
+	printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i);
 	for (; nb_i > 0; i++, nb_i--)
 	{
 		array[i] = 1;
@@ -177,13 +177,13 @@ static struct starpu_codelet parallel_region_6_cl =
 
 static void clear_array(void)
 {
-	memset(array, 0, NB_ITERS*sizeof(unsigned long));
+	memset(array, 0, NB_ITERS*sizeof(unsigned long long));
 }
 
 static void check_array(void)
 {
-	unsigned long i;
-	unsigned long s = 0;
+	unsigned long long i;
+	unsigned long long s = 0;
 	for (i = 0; i < NB_ITERS; i++)
 	{
 		s += array[i];
@@ -197,27 +197,27 @@ static void check_array(void)
 int
 main (int argc, char *argv[]) {
 	clear_array();
-	starpu_omp_parallel_region(&parallel_region_1_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_1_cl, NULL, NULL);
 	check_array();
 
 	clear_array();
-	starpu_omp_parallel_region(&parallel_region_2_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_2_cl, NULL, NULL);
 	check_array();
 
 	clear_array();
-	starpu_omp_parallel_region(&parallel_region_3_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_3_cl, NULL, NULL);
 	check_array();
 
 	clear_array();
-	starpu_omp_parallel_region(&parallel_region_4_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_4_cl, NULL, NULL);
 	check_array();
 
 	clear_array();
-	starpu_omp_parallel_region(&parallel_region_5_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_5_cl, NULL, NULL);
 	check_array();
 
 	clear_array();
-	starpu_omp_parallel_region(&parallel_region_6_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_6_cl, NULL, NULL);
 	check_array();
 	return 0;
 }

+ 4 - 4
tests/openmp/parallel_for_02.c

@@ -40,16 +40,16 @@ static void omp_destructor(void)
 	starpu_omp_shutdown();
 }
 
-void for_g(unsigned long i, unsigned long nb_i, void *arg)
+void for_g(unsigned long long i, unsigned long long nb_i, void *arg)
 {
 	int worker_id;
 	pthread_t tid;
 	tid = pthread_self();
 	worker_id = starpu_worker_get_id();
-	printf("[tid %p] task thread = %d, for [%s] iterations first=%lu:nb=%lu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i);
+	printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i);
 	for (; nb_i > 0; i++, nb_i--)
 	{
-		printf("[tid %p] task thread = %d, for [%s] iteration %lu\n", (void *)tid, worker_id, (const char *)arg, i);
+		printf("[tid %p] task thread = %d, for [%s] iteration %llu\n", (void *)tid, worker_id, (const char *)arg, i);
 	}
 }
 
@@ -86,7 +86,7 @@ static struct starpu_codelet parallel_region_1_cl =
 
 int
 main (int argc, char *argv[]) {
-	starpu_omp_parallel_region(&parallel_region_1_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_1_cl, NULL, NULL);
 	return 0;
 }
 #endif

+ 14 - 14
tests/openmp/parallel_for_ordered_01.c

@@ -27,7 +27,7 @@ int main(int argc, char **argv)
 #else
 #define NB_ITERS 256
 #define CHUNK 16
-unsigned long array[NB_ITERS];
+unsigned long long array[NB_ITERS];
 
 __attribute__((constructor))
 static void omp_constructor(void)
@@ -42,22 +42,22 @@ static void omp_destructor(void)
 	starpu_omp_shutdown();
 }
 
-void ordered_f(unsigned long i, void *arg)
+void ordered_f(unsigned long long i, void *arg)
 {
 	int worker_id;
 	pthread_t tid;
 	tid = pthread_self();
 	worker_id = starpu_worker_get_id();
-	printf("[tid %p] task thread = %d, for [%s] iteration (ordered) %lu\n", (void *)tid, worker_id, (const char *)arg, i);
+	printf("[tid %p] task thread = %d, for [%s] iteration (ordered) %llu\n", (void *)tid, worker_id, (const char *)arg, i);
 }
 
-void for_g(unsigned long i, unsigned long nb_i, void *arg)
+void for_g(unsigned long long i, unsigned long long nb_i, void *arg)
 {
 	int worker_id;
 	pthread_t tid;
 	tid = pthread_self();
 	worker_id = starpu_worker_get_id();
-	printf("[tid %p] task thread = %d, for [%s] iterations first=%lu:nb=%lu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i);
+	printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i);
 	for (; nb_i > 0; i++, nb_i--)
 	{
 		array[i] = 1;
@@ -187,13 +187,13 @@ static struct starpu_codelet parallel_region_6_cl =
 
 static void clear_array(void)
 {
-	memset(array, 0, NB_ITERS*sizeof(unsigned long));
+	memset(array, 0, NB_ITERS*sizeof(unsigned long long));
 }
 
 static void check_array(void)
 {
-	unsigned long i;
-	unsigned long s = 0;
+	unsigned long long i;
+	unsigned long long s = 0;
 	for (i = 0; i < NB_ITERS; i++)
 	{
 		s += array[i];
@@ -207,28 +207,28 @@ static void check_array(void)
 int
 main (int argc, char *argv[]) {
 	clear_array();
-	starpu_omp_parallel_region(&parallel_region_1_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_1_cl, NULL, NULL);
 	check_array();
 	return 0;
 
 	clear_array();
-	starpu_omp_parallel_region(&parallel_region_2_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_2_cl, NULL, NULL);
 	check_array();
 
 	clear_array();
-	starpu_omp_parallel_region(&parallel_region_3_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_3_cl, NULL, NULL);
 	check_array();
 
 	clear_array();
-	starpu_omp_parallel_region(&parallel_region_4_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_4_cl, NULL, NULL);
 	check_array();
 
 	clear_array();
-	starpu_omp_parallel_region(&parallel_region_5_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_5_cl, NULL, NULL);
 	check_array();
 
 	clear_array();
-	starpu_omp_parallel_region(&parallel_region_6_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_6_cl, NULL, NULL);
 	check_array();
 	return 0;
 }

+ 2 - 2
tests/openmp/parallel_master_01.c

@@ -77,9 +77,9 @@ main (int argc, char *argv[]) {
 	pthread_t tid;
 	tid = pthread_self();
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
 	return 0;
 }

+ 2 - 2
tests/openmp/parallel_master_inline_01.c

@@ -71,9 +71,9 @@ main (int argc, char *argv[]) {
 	pthread_t tid;
 	tid = pthread_self();
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
 	return 0;
 }

+ 2 - 2
tests/openmp/parallel_single_inline_01.c

@@ -88,9 +88,9 @@ main (int argc, char *argv[]) {
 	pthread_t tid;
 	tid = pthread_self();
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
 	return 0;
 }

+ 2 - 2
tests/openmp/parallel_single_nowait_01.c

@@ -77,9 +77,9 @@ main (int argc, char *argv[]) {
 	pthread_t tid;
 	tid = pthread_self();
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
 	return 0;
 }

+ 2 - 2
tests/openmp/parallel_single_wait_01.c

@@ -77,9 +77,9 @@ main (int argc, char *argv[]) {
 	pthread_t tid;
 	tid = pthread_self();
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	printf("<main>\n");
 	return 0;
 }

+ 5 - 5
tests/openmp/task_01.c

@@ -74,10 +74,10 @@ void parallel_region_f(void *buffers[], void *args)
 	 * untied_clause: 1
 	 * mergeable_clause: 0
 	 */
-	starpu_omp_task_region(&task_region_cl, NULL, 1, 0, 1, 0);
-	starpu_omp_task_region(&task_region_cl, NULL, 1, 0, 1, 0);
-	starpu_omp_task_region(&task_region_cl, NULL, 1, 0, 1, 0);
-	starpu_omp_task_region(&task_region_cl, NULL, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, NULL, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, NULL, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, NULL, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, NULL, 1, 0, 1, 0);
 }
 
 static struct starpu_codelet parallel_region_cl =
@@ -90,7 +90,7 @@ static struct starpu_codelet parallel_region_cl =
 
 int
 main (int argc, char *argv[]) {
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	return 0;
 }
 #endif

+ 5 - 5
tests/openmp/taskgroup_01.c

@@ -66,8 +66,8 @@ void taskgroup_f(void *arg)
 	 * untied_clause: 1
 	 * mergeable_clause: 0
 	 */
-	starpu_omp_task_region(&task_region_cl, (void *)(intptr_t)(*p_i)++, 1, 0, 1, 0);
-	starpu_omp_task_region(&task_region_cl, (void *)(intptr_t)(*p_i)++, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, (void *)(intptr_t)(*p_i)++, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, (void *)(intptr_t)(*p_i)++, 1, 0, 1, 0);
 }
 
 void parallel_region_f(void *buffers[], void *args)
@@ -88,8 +88,8 @@ void parallel_region_f(void *buffers[], void *args)
 	starpu_omp_taskgroup(taskgroup_f, (void *)&i);
 	printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id);
 
-	starpu_omp_task_region(&task_region_cl, (void *)(intptr_t)i++, 1, 0, 1, 0);
-	starpu_omp_task_region(&task_region_cl, (void *)(intptr_t)i++, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, (void *)(intptr_t)i++, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, (void *)(intptr_t)i++, 1, 0, 1, 0);
 }
 
 static struct starpu_codelet parallel_region_cl =
@@ -102,7 +102,7 @@ static struct starpu_codelet parallel_region_cl =
 
 int
 main (int argc, char *argv[]) {
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	return 0;
 }
 #endif

+ 7 - 7
tests/openmp/taskwait_01.c

@@ -75,18 +75,18 @@ void parallel_region_f(void *buffers[], void *args)
 	 * untied_clause: 1
 	 * mergeable_clause: 0
 	 */
-	starpu_omp_task_region(&task_region_cl, (void *)(intptr_t)i++, 1, 0, 1, 0);
-	starpu_omp_task_region(&task_region_cl, (void *)(intptr_t)i++, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, (void *)(intptr_t)i++, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, (void *)(intptr_t)i++, 1, 0, 1, 0);
 	starpu_omp_taskwait();
 	printf("[tid %p] task thread = %d: implicit task \"f\": taskwait\n", (void *)tid, worker_id);
 
-	starpu_omp_task_region(&task_region_cl, (void *)(intptr_t)i++, 1, 0, 1, 0);
-	starpu_omp_task_region(&task_region_cl, (void *)(intptr_t)i++, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, (void *)(intptr_t)i++, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, (void *)(intptr_t)i++, 1, 0, 1, 0);
 	starpu_omp_taskwait();
 	printf("[tid %p] task thread = %d: implicit task \"f\": taskwait\n", (void *)tid, worker_id);
 
-	starpu_omp_task_region(&task_region_cl, (void *)(intptr_t)i++, 1, 0, 1, 0);
-	starpu_omp_task_region(&task_region_cl, (void *)(intptr_t)i++, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, (void *)(intptr_t)i++, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, (void *)(intptr_t)i++, 1, 0, 1, 0);
 }
 
 static struct starpu_codelet parallel_region_cl =
@@ -99,7 +99,7 @@ static struct starpu_codelet parallel_region_cl =
 
 int
 main (int argc, char *argv[]) {
-	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	starpu_omp_parallel_region(&parallel_region_cl, NULL, NULL);
 	return 0;
 }
 #endif