Browse Source

Support Taskloop directive of OpenMP

Yanis Khorsi 7 years ago
parent
commit
a47c2d4d14

+ 16 - 0
include/starpu_openmp.h

@@ -69,6 +69,20 @@ struct starpu_omp_task_region_attr
 	int final_clause;
 	int untied_clause;
 	int mergeable_clause;
+
+   /*
+    * taskloop attribute
+    * */
+   int is_loop;
+   int nogroup_clause;
+
+   int collapse;
+   int num_tasks;
+   unsigned long long nb_iterations;
+   unsigned long long grainsize;
+   unsigned long long begin_i;
+   unsigned long long end_i;
+   unsigned long long chunk;
 };
 
 #ifdef __cplusplus
@@ -104,6 +118,8 @@ extern void starpu_omp_taskwait(void) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_taskgroup(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_taskgroup_inline_begin(void) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_taskgroup_inline_end(void) __STARPU_OMP_NOTHROW;
+extern void starpu_omp_taskloop_inline_begin(struct starpu_omp_task_region_attr *attr) __STARPU_OMP_NOTHROW;
+extern void starpu_omp_taskloop_inline_end(const struct starpu_omp_task_region_attr *attr) __STARPU_OMP_NOTHROW;
 
 extern void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) __STARPU_OMP_NOTHROW;
 extern int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) __STARPU_OMP_NOTHROW;

+ 72 - 16
src/util/openmp_runtime_support.c

@@ -381,24 +381,29 @@ static void starpu_omp_explicit_task_entry(struct starpu_omp_task *task)
 {
 	STARPU_ASSERT(!(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT));
 	struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key();
-	if (starpu_worker->arch == STARPU_CPU_WORKER)
-	{
-		task->cpu_f(task->starpu_buffers, task->starpu_cl_arg);
-	}
+   /* XXX on work */
+   if (task->is_loop) {
+      starpu_omp_for_inline_first_alt(task->nb_iterations, task->chunk, starpu_omp_sched_static, 1, &task->begin_i, &task->end_i);
+   }
+   if (starpu_worker->arch == STARPU_CPU_WORKER)
+   {
+      task->cpu_f(task->starpu_buffers, task->starpu_cl_arg);
+   }
 #ifdef STARPU_USE_CUDA
-	else if (starpu_worker->arch == STARPU_CUDA_WORKER)
-	{
-		task->cuda_f(task->starpu_buffers, task->starpu_cl_arg);
-	}
+   else if (starpu_worker->arch == STARPU_CUDA_WORKER)
+   {
+      task->cuda_f(task->starpu_buffers, task->starpu_cl_arg);
+   }
 #endif
 #ifdef STARPU_USE_OPENCL
-	else if (starpu_worker->arch == STARPU_OPENCL_WORKER)
-	{
-		task->opencl_f(task->starpu_buffers, task->starpu_cl_arg);
-	}
+   else if (starpu_worker->arch == STARPU_OPENCL_WORKER)
+   {
+      task->opencl_f(task->starpu_buffers, task->starpu_cl_arg);
+   }
 #endif
-	else
-		_STARPU_ERROR("invalid worker architecture");
+   else
+      _STARPU_ERROR("invalid worker architecture");
+   /**/
 	_starpu_omp_unregister_task_handles(task);
 	_starpu_spin_lock(&task->lock);
 	task->state = starpu_omp_task_state_terminated;
@@ -1624,8 +1629,20 @@ void starpu_omp_task_region(const struct starpu_omp_task_region_attr *attr)
 		{
 			generated_task->flags |= STARPU_OMP_TASK_FLAGS_UNDEFERRED;
 		}
-		generated_task->task_group = generating_task->task_group;
-		generated_task->rank = -1;
+      // XXX taskgroup exist
+      if (!attr->nogroup_clause)
+      {
+         generated_task->task_group = generating_task->task_group;
+      }
+      generated_task->rank = -1;
+
+      /* XXX taskloop attributes */
+      generated_task->is_loop = attr->is_loop;
+      generated_task->nb_iterations = attr->nb_iterations;
+      generated_task->grainsize = attr->grainsize;
+      generated_task->chunk = attr->chunk;
+      generated_task->begin_i = attr->begin_i;
+      generated_task->end_i = attr->end_i;
 
 		/*
 		 * save pointer to the regions user function from the task region codelet
@@ -1794,6 +1811,45 @@ void starpu_omp_taskgroup_inline_end(void)
 	free(p_task_group);
 }
 
+// XXX on work
+void starpu_omp_taskloop_inline_begin(struct starpu_omp_task_region_attr *attr)
+{
+   if (!attr->nogroup_clause)
+   {
+      starpu_omp_taskgroup_inline_begin();
+   }
+
+   int nb_subloop;
+   if (attr->num_tasks) {
+      nb_subloop = attr->num_tasks;
+   } else if (attr->grainsize) {
+      nb_subloop = attr->nb_iterations / attr->grainsize;
+   } else {
+      nb_subloop = 4;
+   }
+
+   attr->is_loop = 1;
+
+   int i;
+   int nb_iter_i = attr->nb_iterations / nb_subloop;
+   for (i = 0; i < nb_subloop; i++)
+   {
+      attr->begin_i = nb_iter_i * i;
+      attr->end_i = attr->begin_i + nb_iter_i;
+      attr->end_i += (i+1 != nb_subloop) ? 0 : (attr->nb_iterations % nb_subloop);
+      attr->chunk = attr->end_i - attr->begin_i;
+      starpu_omp_task_region(attr);
+   }
+}
+
+// XXX on work
+void starpu_omp_taskloop_inline_end(const struct starpu_omp_task_region_attr *attr)
+{
+   if (!attr->nogroup_clause) {
+      starpu_omp_taskgroup_inline_end();
+   }
+}
+
 static inline void _starpu_omp_for_loop(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task,
 		struct starpu_omp_loop *loop, int first_call,
 		unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)

+ 10 - 0
src/util/openmp_runtime_support.h

@@ -266,6 +266,16 @@ LIST_TYPE(starpu_omp_task,
 	int stack_vg_id;
 
 	size_t stacksize;
+
+   /*
+    * taskloop attribute
+    * */
+   int is_loop;
+   unsigned long long nb_iterations;
+   unsigned long long grainsize;
+   unsigned long long chunk;
+   unsigned long long begin_i;
+   unsigned long long end_i;
 )
 
 LIST_TYPE(starpu_omp_thread,

+ 4 - 0
tests/Makefile.am

@@ -201,6 +201,7 @@ myPROGRAMS +=					\
 	openmp/task_01				\
 	openmp/task_02				\
 	openmp/task_03				\
+	openmp/taskloop				\
 	openmp/taskwait_01			\
 	openmp/taskgroup_01			\
 	openmp/taskgroup_02			\
@@ -736,6 +737,9 @@ openmp_task_02_SOURCES = 	\
 openmp_task_03_SOURCES = 	\
 	openmp/task_03.c
 
+openmp_taskloop_SOURCES = 	\
+	openmp/taskloop.c
+
 openmp_taskwait_01_SOURCES = 	\
 	openmp/taskwait_01.c
 

+ 70 - 0
tests/openmp/taskloop.c

@@ -0,0 +1,70 @@
+#include <pthread.h>
+#include <starpu.h>
+#include <stdio.h>
+
+/*
+ * Check the OpenMP orphaned task support.
+ */
+
+#if !defined(STARPU_OPENMP)
+int main(void)
+{
+   return STARPU_TEST_SKIPPED;
+}
+#else
+__attribute__((constructor))
+static void omp_constructor(void)
+{
+   int ret = starpu_omp_init();
+   STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
+}
+
+__attribute__((destructor))
+static void omp_destructor(void)
+{
+   starpu_omp_shutdown();
+}
+
+void taskloop_callback(unsigned long long begin_i, unsigned long long end_i) {
+   int worker_id;
+   pthread_t tid;
+   tid = pthread_self();
+   worker_id = starpu_worker_get_id();
+   printf ("begin = %llu , end = %llu, %p\n", begin_i, end_i, (void *)starpu_task_get_current());
+}
+
+void taskloop_callback_wrapper(void *buffers[], void *_args)
+{
+   (void) buffers;
+   struct starpu_omp_task_region_attr * args = _args;
+   taskloop_callback(args->begin_i, args->end_i);
+}
+
+int
+main (void)
+{
+   struct starpu_omp_task_region_attr attr;
+   memset(&attr, 0, sizeof(attr));
+#ifdef STARPU_SIMGRID
+   attr.cl.model         = &starpu_perfmodel_nop;
+#endif
+   attr.cl.flags         = STARPU_CODELET_SIMGRID_EXECUTE;
+   attr.cl.cpu_funcs[0]  = taskloop_callback_wrapper;
+   attr.cl_arg           = &attr;
+   attr.cl.where         = STARPU_CPU;
+   attr.if_clause        = 1;
+   attr.final_clause     = 0;
+   attr.untied_clause    = 1;
+   attr.mergeable_clause = 0;
+   attr.nogroup_clause   = 0;
+   attr.is_loop          = 0;
+   attr.collapse         = 0;
+   attr.num_tasks        = 5;
+   attr.nb_iterations    = 400;
+   attr.grainsize        = 130;
+
+   starpu_omp_taskloop_inline_begin(&attr);
+   starpu_omp_taskloop_inline_end(&attr);
+   return 0;
+}
+#endif