Sfoglia il codice sorgente

- add test case for tasks
- fix omp task implementation
- add implicit barrier at the end of omp parallel

Olivier Aumage 11 anni fa
parent
commit
58e206fd4f

+ 3 - 0
include/starpu_openmp.h

@@ -52,6 +52,9 @@ extern "C"
 extern int starpu_omp_init(void) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_shutdown(void) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_parallel_region(const struct starpu_codelet * const parallel_region_cl, void * const parallel_region_cl_arg) __STARPU_OMP_NOTHROW;
+extern void starpu_omp_task_region(const struct starpu_codelet * const _task_region_cl,
+		void * const task_region_cl_arg,
+		int if_clause, int final_clause, int untied_clause, int mergeable_clause) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_barrier(void) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_master(void (*f)(void *arg), void *arg, int nowait) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_single(void (*f)(void *arg), void *arg, int nowait) __STARPU_OMP_NOTHROW;

+ 57 - 26
src/util/openmp_runtime_support.c

@@ -161,8 +161,9 @@ static void destroy_omp_thread_struct(struct starpu_omp_thread *thread)
 	starpu_omp_thread_delete(thread);
 }
 
-static void starpu_omp_task_entry(struct starpu_omp_task *task)
+static void starpu_omp_explicit_task_entry(struct starpu_omp_task *task)
 {
+	STARPU_ASSERT(!task->is_implicit);
 	task->f(task->starpu_buffers, task->starpu_cl_arg);
 	task->state = starpu_omp_task_state_terminated;
 	struct starpu_omp_thread *thread = STARPU_PTHREAD_GETSPECIFIC(omp_thread_key);
@@ -175,6 +176,22 @@ static void starpu_omp_task_entry(struct starpu_omp_task *task)
 	STARPU_ASSERT(0); /* unreachable code */
 }
 
+static void starpu_omp_implicit_task_entry(struct starpu_omp_task *task)
+{
+	STARPU_ASSERT(task->is_implicit);
+	task->f(task->starpu_buffers, task->starpu_cl_arg);
+	starpu_omp_barrier();
+	task->state = starpu_omp_task_state_terminated;
+	struct starpu_omp_thread *thread = STARPU_PTHREAD_GETSPECIFIC(omp_thread_key);
+	/* 
+	 * the task reached the terminated state, definitively give hand back to the worker code.
+	 *
+	 * about to run on the worker stack...
+	 */
+	setcontext(&thread->ctx);
+	STARPU_ASSERT(0); /* unreachable code */
+}
+
 /*
  * stop executing a task that is about to block
  * and give hand back to the thread
@@ -300,10 +317,9 @@ static void starpu_omp_explicit_task_exec(void *buffers[], void *cl_arg)
 		struct starpu_omp_task *parent_task = task->parent_task;
 		struct starpu_omp_region *parallel_region = task->owner_region;
 		_starpu_spin_lock(&parent_task->lock);
-		if (STARPU_ATOMIC_ADD(&task->parent_task->child_task_count, -1) == 0)
+		if (STARPU_ATOMIC_ADD(&parent_task->child_task_count, -1) == 0)
 		{
-			if (parent_task->child_task_count == 0
-					&& (parent_task->wait_on & starpu_omp_task_wait_on_task_childs))
+			if (parent_task->wait_on & starpu_omp_task_wait_on_task_childs)
 			{
 				parent_task->wait_on &= ~starpu_omp_task_wait_on_task_childs;
 				_wake_up_locked_task(parent_task);
@@ -320,7 +336,7 @@ static void starpu_omp_explicit_task_exec(void *buffers[], void *cl_arg)
 			{
 				_starpu_spin_lock(&waiting_task->lock);
 				_starpu_spin_lock(&parallel_region->lock);
-				parallel_region->waiting_task = 0;
+				parallel_region->waiting_task = NULL;
 				STARPU_ASSERT(waiting_task->wait_on & starpu_omp_task_wait_on_region_tasks);
 				waiting_task->wait_on &= ~starpu_omp_task_wait_on_region_tasks;
 				_wake_up_locked_task(waiting_task);
@@ -373,7 +389,14 @@ static struct starpu_omp_task *create_omp_task_struct(struct starpu_omp_task *pa
 		task->ctx.uc_link                 = NULL;
 		task->ctx.uc_stack.ss_sp          = task->stack;
 		task->ctx.uc_stack.ss_size        = _STARPU_STACKSIZE;
-		makecontext(&task->ctx, (void (*) ()) starpu_omp_task_entry, 1, task);
+		if (is_implicit)
+		{
+			makecontext(&task->ctx, (void (*) ()) starpu_omp_implicit_task_entry, 1, task);
+		}
+		else
+		{
+			makecontext(&task->ctx, (void (*) ()) starpu_omp_explicit_task_entry, 1, task);
+		}
 	}
 
 	return task;
@@ -533,7 +556,6 @@ void starpu_omp_shutdown(void)
 void starpu_omp_parallel_region(const struct starpu_codelet * const _parallel_region_cl,
 		void * const parallel_region_cl_arg)
 {
-	struct starpu_codelet parallel_region_cl = *_parallel_region_cl;
 	struct starpu_omp_thread *master_thread = STARPU_PTHREAD_GETSPECIFIC(omp_thread_key);
 	struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
 	struct starpu_omp_region *region = task->owner_region;
@@ -606,18 +628,6 @@ void starpu_omp_parallel_region(const struct starpu_codelet * const _parallel_re
 	task->nested_region = new_region;
 
 	/*
-	 * save pointer to the regions user function from the parallel region codelet
-	 *
-	 * TODO: add support for multiple/heterogeneous implementations
-	 */
-	void (*parallel_region_f)(void **starpu_buffers, void *starpu_cl_arg) = parallel_region_cl.cpu_funcs[0];
-
-	/*
-	 * plug the task wrapper into the parallel region codelet instead, to support task preemption
-	 */
-	parallel_region_cl.cpu_funcs[0] = starpu_omp_implicit_task_exec;
-
-	/*
 	 * create the starpu tasks for the implicit omp tasks,
 	 * create explicit dependencies between these starpu tasks and the continuation starpu task
 	 */
@@ -626,10 +636,21 @@ void starpu_omp_parallel_region(const struct starpu_codelet * const _parallel_re
 			implicit_task != starpu_omp_task_list_end(new_region->implicit_task_list);
 			implicit_task  = starpu_omp_task_list_next(implicit_task))
 	{
-		implicit_task->f = parallel_region_f;
+		implicit_task->cl = *_parallel_region_cl;
+		/*
+		 * save pointer to the regions user function from the parallel region codelet
+		 *
+		 * TODO: add support for multiple/heterogeneous implementations
+		 */
+		implicit_task->f = implicit_task->cl.cpu_funcs[0];
+
+		/*
+		 * plug the task wrapper into the parallel region codelet instead, to support task preemption
+		 */
+		implicit_task->cl.cpu_funcs[0] = starpu_omp_implicit_task_exec;
 
 		implicit_task->starpu_task = starpu_task_create();
-		implicit_task->starpu_task->cl = &parallel_region_cl;
+		implicit_task->starpu_task->cl = &implicit_task->cl;
 		implicit_task->starpu_task->cl_arg = parallel_region_cl_arg;
 		implicit_task->starpu_task->omp_task = implicit_task;
 		implicit_task->starpu_task->workerid = implicit_task->owner_thread->starpu_worker_id;
@@ -884,7 +905,6 @@ void starpu_omp_task_region(const struct starpu_codelet * const _task_region_cl,
 		void * const task_region_cl_arg,
 		int if_clause, int final_clause, int untied_clause, int mergeable_clause)
 {
-	struct starpu_codelet task_region_cl = *_task_region_cl;
 	struct starpu_omp_task *generating_task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
 	struct starpu_omp_region *parallel_region = generating_task->owner_region;
 	int is_undeferred = 0;
@@ -917,12 +937,15 @@ void starpu_omp_task_region(const struct starpu_codelet * const _task_region_cl,
 	}
 	if (is_merged)
 	{
+		struct starpu_codelet task_region_cl = *_task_region_cl;
+		(void)task_region_cl;
 		_STARPU_ERROR("omp merged task unimplemented\n");
 	}
 	else
 	{
 		struct starpu_omp_task *generated_task =
 			create_omp_task_struct(generating_task, NULL, parallel_region, 0);
+		generated_task->cl = *_task_region_cl;
 		if (untied_clause)
 		{
 			is_untied = 1;
@@ -932,12 +955,20 @@ void starpu_omp_task_region(const struct starpu_codelet * const _task_region_cl,
 		generated_task->is_untied = is_untied;
 		generated_task->task_group = generating_task->task_group;
 
-		void (*task_region_f)(void **starpu_buffers, void *starpu_cl_arg) = task_region_cl.cpu_funcs[0];
-		task_region_cl.cpu_funcs[0] = starpu_omp_explicit_task_exec;
-		generated_task->f = task_region_f;
+		/*
+		 * save pointer to the regions user function from the task region codelet
+		 *
+		 * TODO: add support for multiple/heterogeneous implementations
+		 */
+		generated_task->f = generated_task->cl.cpu_funcs[0];
+
+		/*
+		 * plug the task wrapper into the task region codelet instead, to support task preemption
+		 */
+		generated_task->cl.cpu_funcs[0] = starpu_omp_explicit_task_exec;
 
 		generated_task->starpu_task = starpu_task_create();
-		generated_task->starpu_task->cl = &task_region_cl;
+		generated_task->starpu_task->cl = &generated_task->cl;
 		generated_task->starpu_task->cl_arg = task_region_cl_arg;
 		generated_task->starpu_task->omp_task = generated_task;
 		/* if the task is tied, execute_on_a_specific_worker will be changed to 1

+ 1 - 0
src/util/openmp_runtime_support.h

@@ -220,6 +220,7 @@ LIST_TYPE(starpu_omp_task,
 	struct starpu_omp_implicit_task_icvs implicit_task_icvs;
 
 	struct starpu_task *starpu_task;
+	struct starpu_codelet cl;
 	void **starpu_buffers;
 	void *starpu_cl_arg;
 

+ 4 - 0
tests/Makefile.am

@@ -231,6 +231,7 @@ noinst_PROGRAMS =				\
 	openmp/parallel_single_nowait_01	\
 	openmp/parallel_critical_01		\
 	openmp/parallel_critical_named_01	\
+	openmp/task_01				\
 	overlap/overlap				\
 	overlap/gpu_concurrency			\
 	parallel_tasks/explicit_combined_worker	\
@@ -477,6 +478,9 @@ openmp_parallel_critical_01_SOURCES = 	\
 openmp_parallel_critical_named_01_SOURCES = 	\
 	openmp/parallel_critical_named_01.c
 
+openmp_task_01_SOURCES = 	\
+	openmp/task_01.c
+
 ###################
 # Block interface #
 ###################

+ 96 - 0
tests/openmp/task_01.c

@@ -0,0 +1,96 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2014  Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <pthread.h>
+#include <starpu.h>
+#include "../helper.h"
+#include <stdio.h>
+
+#if !defined(STARPU_OPENMP)
+int main(int argc, char **argv)
+{
+	return STARPU_TEST_SKIPPED;
+}
+#else
+__attribute__((constructor))
+static void omp_constructor(void)
+{
+	int ret = starpu_omp_init();
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
+}
+
+__attribute__((destructor))
+static void omp_destructor(void)
+{
+	starpu_omp_shutdown();
+}
+
+void task_region_g(void *buffers[], void *args)
+{
+	(void) buffers;
+	(void) args;
+	int worker_id;
+	pthread_t tid;
+	tid = pthread_self();
+	worker_id = starpu_worker_get_id();
+	printf("[tid %p] task thread = %d: explicit task \"g\"\n", (void *)tid, worker_id);
+}
+
+static struct starpu_codelet task_region_cl =
+{
+	.cpu_funcs    = { task_region_g, NULL },
+	.where        = STARPU_CPU,
+	.nbuffers     = 0,
+	.model        = NULL
+};
+
+void parallel_region_f(void *buffers[], void *args)
+{
+	(void) buffers;
+	(void) args;
+	int worker_id;
+	pthread_t tid;
+
+	tid = pthread_self();
+	worker_id = starpu_worker_get_id();
+	printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id);
+	
+	/*
+	 * if_clause: 1
+	 * final_clause: 0
+	 * untied_clause: 1
+	 * mergeable_clause: 0
+	 */
+	starpu_omp_task_region(&task_region_cl, NULL, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, 1, 0, 1, 0);
+	starpu_omp_task_region(&task_region_cl, NULL, 1, 0, 1, 0);
+}
+
+static struct starpu_codelet parallel_region_cl =
+{
+	.cpu_funcs    = { parallel_region_f, NULL },
+	.where        = STARPU_CPU,
+	.nbuffers     = 0,
+	.model        = NULL
+};
+
+int
+main (int argc, char *argv[]) {
+	starpu_omp_parallel_region(&parallel_region_cl, NULL);
+	return 0;
+}
+#endif