10 years ago · 836c1e148c
--- a/Makefile.am
+++ b/Makefile.am
@@ -2,6 +2,7 @@
 
				 #
			
 
				 # Copyright (C) 2009-2014  Université de Bordeaux 1
			
 
				 # Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				+# Copyright (C) 2014  Inria
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -65,6 +66,7 @@ versinclude_HEADERS = 				\
 
				 	include/starpu_fxt.h			\
			
 
				 	include/starpu_cuda.h			\
			
 
				 	include/starpu_opencl.h			\
			
 
				+	include/starpu_openmp.h			\
			
 
				 	include/starpu_sink.h			\
			
 
				 	include/starpu_mic.h			\
			
 
				 	include/starpu_scc.h			\
			
--- a/configure.ac
+++ b/configure.ac
@@ -3,7 +3,7 @@
 
				 # Copyright (C) 2009-2014  Université de Bordeaux 1
			
 
				 # Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				 # Copyright (C) 2011  Télécom-SudParis
			
 
				-# Copyright (C) 2011, 2012  Institut National de Recherche en Informatique et Automatique
			
 
				+# Copyright (C) 2011, 2012, 2014  Institut National de Recherche en Informatique et Automatique
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -2031,6 +2031,25 @@ AM_CONDITIONAL([RUN_GCC_PLUGIN_TESTS],
 
				 
			
 
				 ###############################################################################
			
 
				 #                                                                             #
			
 
				+#                            OpenMP runtime support                           #
			
 
				+#                                                                             #
			
 
				+###############################################################################
			
 
				+
			
 
				+AC_ARG_ENABLE(openmp, [AS_HELP_STRING([--enable-openmp],
			
 
				+			[build the OpenMP runtime support (experimental)])],
			
 
				+			enable_openmp=$enableval, enable_openmp=no)
			
 
				+
			
 
				+AC_MSG_CHECKING(for OpenMP runtime support)
			
 
				+
			
 
				+if test x$enable_openmp = xyes; then
			
 
				+	AC_DEFINE(STARPU_OPENMP, 1, [Define this to enable OpenMP runtime support])
			
 
				+fi
			
 
				+
			
 
				+AM_CONDITIONAL([STARPU_OPENMP], [test "x$enable_openmp" = "xyes"])
			
 
				+AC_MSG_RESULT($enable_openmp)
			
 
				+
			
 
				+###############################################################################
			
 
				+#                                                                             #
			
 
				 #                               SOCL interface                                #
			
 
				 #                                                                             #
			
 
				 ###############################################################################
			
@@ -2602,6 +2621,7 @@ AC_MSG_NOTICE([
 
				 	       FFT Support:                                 $fft_support
			
 
				 	       GCC plug-in:                                 $build_gcc_plugin
			
 
				 	       GCC plug-in test suite (requires GNU Guile): $run_gcc_plugin_test_suite
			
 
				+	       OpenMP runtime support enabled:              $enable_openmp
			
 
				 	       SOCL enabled:                                $build_socl
			
 
				                SOCL test suite:                             $run_socl_check
			
 
				                Scheduler Hypervisor:                        $build_sc_hypervisor
			
--- a/doc/doxygen/Makefile.am
+++ b/doc/doxygen/Makefile.am
@@ -46,6 +46,7 @@ chapters =	\
 
				 	chapters/19c_extensions.doxy \
			
 
				 	chapters/20socl_opencl_extensions.doxy \
			
 
				 	chapters/21simgrid.doxy \
			
 
				+	chapters/22openmp_runtime_support.doxy \
			
 
				 	chapters/40environment_variables.doxy \
			
 
				 	chapters/41configure_options.doxy \
			
 
				 	chapters/45files.doxy \
			
@@ -86,6 +87,7 @@ chapters =	\
 
				 	chapters/api/mpi.doxy \
			
 
				 	chapters/api/multiformat_data_interface.doxy \
			
 
				 	chapters/api/opencl_extensions.doxy \
			
 
				+	chapters/api/openmp_runtime_support.doxy \
			
 
				 	chapters/api/mic_extensions.doxy \
			
 
				 	chapters/api/scc_extensions.doxy \
			
 
				 	chapters/api/parallel_tasks.doxy \
			
@@ -202,6 +204,7 @@ dox_inputs = $(DOX_CONFIG) 				\
 
				 	$(top_srcdir)/include/starpu_fxt.h		\
			
 
				 	$(top_srcdir)/include/starpu_cuda.h		\
			
 
				 	$(top_srcdir)/include/starpu_opencl.h		\
			
 
				+	$(top_srcdir)/include/starpu_openmp.h		\
			
 
				 	$(top_srcdir)/include/starpu_sink.h		\
			
 
				 	$(top_srcdir)/include/starpu_mic.h		\
			
 
				 	$(top_srcdir)/include/starpu_scc.h		\
			
--- a/doc/doxygen/chapters/22openmp_runtime_support.doxy
+++ b/doc/doxygen/chapters/22openmp_runtime_support.doxy
@@ -0,0 +1,463 @@
 
				+/*
			
 
				+ * This file is part of the StarPU Handbook.
			
 
				+ * Copyright (C) 2014 Inria
			
 
				+ * See the file version.doxy for copying conditions.
			
 
				+ */
			
 
				+
			
 
				+/*! \page OpenMPRuntimeSupport The StarPU OpenMP Runtime Support (SORS)
			
 
				+
			
 
				+StarPU provides the necessary routines and support to implement an <a
			
 
				+href="http://www.openmp.org/">OpenMP</a> runtime compliant with the
			
 
				+revision 3.1 of the language specification, and compliant with the
			
 
				+task-related data dependency functionalities introduced in the revision
			
 
				+4.0 of the language. This StarPU OpenMP Runtime Support (SORS) has been
			
 
				+designed to be targetted by OpenMP compilers such as the Klang-OMP
			
 
				+compiler. Most supported OpenMP directives can both be implemented
			
 
				+inline or as outlined functions.
			
 
				+
			
 
				+All functions are defined in \ref API_OpenMP_Runtime_Support.
			
 
				+
			
 
				+\section Implementation Implementation Details and Specificities
			
 
				+
			
 
				+\subsection MainThread Main Thread
			
 
				+
			
 
				+When using the SORS, the main thread gets involved in executing OpenMP tasks
			
 
				+just like every other threads, in order to be compliant with the
			
 
				+specification execution model. This contrasts with StarPU's usual
			
 
				+execution model where the main thread submit tasks but does not take
			
 
				+part in executing them.
			
 
				+
			
 
				+\subsection TaskSemantics Extended Task Semantics
			
 
				+
			
 
				+The semantics of tasks generated by the SORS are extended with respect
			
 
				+to regular StarPU tasks in that SORS' tasks may block and be preempted
			
 
				+by SORS call, whereas regular StarPU tasks cannot. SORS tasks may
			
 
				+coexist with regular StarPU tasks. However, only the tasks created using
			
 
				+SORS API functions inherit from extended semantics.
			
 
				+
			
 
				+\section Configuration Configuration
			
 
				+
			
 
				+The SORS can be compiled into <c>libstarpu</c>
			
 
				+by providing the <c>--enable-openmp</c> flag to StarPU's
			
 
				+<c>configure</c>. Conditional compiled source codes may check for the
			
 
				+availability of the OpenMP Runtime Support by testing whether the C
			
 
				+preprocessor macro <c>STARPU_OPENMP</c> is defined or not.
			
 
				+
			
 
				+\section InitExit Initialization and Shutdown
			
 
				+
			
 
				+The SORS needs to be executed/terminated by the
			
 
				+starpu_omp_init()/starpu_omp_shutdown() instead of
			
 
				+starpu_init()/starpu_shutdown(). This requirement is necessary to make
			
 
				+sure that the main thread gets the proper execution environment to run
			
 
				+OpenMP tasks. These calls will usually be performed by a compiler
			
 
				+runtime. Thus, they can be executed from a constructor/destructor such
			
 
				+as this:
			
 
				+
			
 
				+\code{.c}
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+\endcode
			
 
				+
			
 
				+\sa starpu_omp_init()
			
 
				+\sa starpu_omp_shutdown()
			
 
				+
			
 
				+\section Parallel Parallel Regions and Worksharing
			
 
				+
			
 
				+The SORS provides functions to create OpenMP parallel regions as well as
			
 
				+mapping work on participating workers. The current implementation does
			
 
				+not provide nested active parallel regions: Parallel regions may be
			
 
				+created recursively, however only the first level parallel region may
			
 
				+have more than one worker. From an internal point-of-view, the SORS'
			
 
				+parallel regions are implemented as a set of implicit, extended semantics
			
 
				+StarPU tasks, following the execution model of the OpenMP specification.
			
 
				+Thus the SORS' parallel region tasks may block and be preempted, by
			
 
				+SORS calls, enabling constructs such as barriers.
			
 
				+
			
 
				+\subsection OMPParallel Parallel Regions
			
 
				+
			
 
				+Parallel regions can be created with the function
			
 
				+starpu_omp_parallel_region() which accepts a set of attributes as
			
 
				+parameter. The execution of the calling task is suspended until the
			
 
				+parallel region completes. The <c>attr.cl</c> field is a regular StarPU
			
 
				+codelet. However only CPU codelets are supported for parallel regions.
			
 
				+Here is an example of use:
			
 
				+
			
 
				+\code{.c}
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	pthread_t tid = pthread_self();
			
 
				+	int worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void f(void)
			
 
				+{
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+\endcode
			
 
				+
			
 
				+\sa struct starpu_omp_parallel_region_attr
			
 
				+\sa starpu_omp_parallel_region()
			
 
				+
			
 
				+\subsection OMPFor Parallel For
			
 
				+
			
 
				+OpenMP <c>for</c> loops are provided by the starpu_omp_for() group of
			
 
				+functions. Variants are available for inline or outlined
			
 
				+implementations. The SORS supports <c>static</c>, <c>dynamic</c>, and
			
 
				+<c>guided</c> loop scheduling clauses. The <c>auto</c> scheduling clause
			
 
				+is implemented as <c>static</c>. The <c>runtime</c> scheduling clause
			
 
				+honors the scheduling mode selected through the environment variable
			
 
				+OMP_SCHEDULE or the starpu_omp_set_schedule() function. For loops with
			
 
				+the <c>ordered</c> clause are also supported. An implicit barrier can be
			
 
				+enforced or skipped at the end of the worksharing construct, according
			
 
				+to the value of the <c>nowait</c> parameter.
			
 
				+
			
 
				+The canonical family of starpu_omp_for() functions provide each instance
			
 
				+with the first iteration number and the number of iterations (possibly
			
 
				+zero) to perform. The alternate family of starpu_omp_for_alt() functions
			
 
				+provide each instance with the (possibly empty) range of iterations to
			
 
				+perform, including the first and excluding the last.
			
 
				+
			
 
				+The family of starpu_omp_ordered() functions enable to implement
			
 
				+OpenMP's ordered construct, a region with a parallel for loop that is
			
 
				+guaranteed to be executed in the sequential order of the loop
			
 
				+iterations.
			
 
				+
			
 
				+\code{.c}
			
 
				+void for_g(unsigned long long i, unsigned long long nb_i, void *arg)
			
 
				+{
			
 
				+	(void) arg;
			
 
				+	for (; nb_i > 0; i++, nb_i--)
			
 
				+	{
			
 
				+		array[i] = 1;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	starpu_omp_for(for_g, NULL, NB_ITERS, CHUNK, starpu_omp_sched_static, 0, 0);
			
 
				+}
			
 
				+\endcode
			
 
				+
			
 
				+\sa starpu_omp_for()
			
 
				+\sa starpu_omp_for_inline_first()
			
 
				+\sa starpu_omp_for_inline_next()
			
 
				+\sa starpu_omp_for_alt()
			
 
				+\sa starpu_omp_for_inline_first_alt()
			
 
				+\sa starpu_omp_for_inline_next_alt()
			
 
				+\sa starpu_omp_ordered()
			
 
				+\sa starpu_omp_ordered_inline_begin()
			
 
				+\sa starpu_omp_ordered_inline_end()
			
 
				+
			
 
				+
			
 
				+\subsection OMPSections Sections
			
 
				+OpenMP <c>sections</c> worksharing constructs are supported using the
			
 
				+set of starpu_omp_sections() variants. The general principle is either
			
 
				+to provide an array of per-section functions or a single function that
			
 
				+will redirect to execution to the suitable per-section functions. An
			
 
				+implicit barrier can be enforced or skipped at the end of the
			
 
				+worksharing construct, according to the value of the <c>nowait</c>
			
 
				+parameter.
			
 
				+
			
 
				+\code{.c}
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+
			
 
				+	section_funcs[0] = f;
			
 
				+	section_funcs[1] = g;
			
 
				+	section_funcs[2] = h;
			
 
				+	section_funcs[3] = i;
			
 
				+
			
 
				+	section_args[0] = arg_f;
			
 
				+	section_args[1] = arg_g;
			
 
				+	section_args[2] = arg_h;
			
 
				+	section_args[3] = arg_i;
			
 
				+
			
 
				+	starpu_omp_sections(4, section_f, section_args, 0);
			
 
				+}
			
 
				+\endcode
			
 
				+
			
 
				+\sa starpu_omp_sections()
			
 
				+\sa starpu_omp_sections_combined()
			
 
				+
			
 
				+\subsection OMPSingle Single
			
 
				+OpenMP <c>single</c> workharing constructs are supported using the set
			
 
				+of starpu_omp_single() variants. An
			
 
				+implicit barrier can be enforced or skipped at the end of the
			
 
				+worksharing construct, according to the value of the <c>nowait</c>
			
 
				+parameter.
			
 
				+
			
 
				+\code{.c}
			
 
				+void single_f(void *arg)
			
 
				+{
			
 
				+	(void) arg;
			
 
				+	pthread_t tid = pthread_self();
			
 
				+	int worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- single\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	starpu_omp_single(single_f, NULL, 0);
			
 
				+}
			
 
				+\endcode
			
 
				+
			
 
				+The SORS also provides dedicated support for  <c>single</c> sections
			
 
				+with <c>copyprivate</c> clauses through the
			
 
				+starpu_omp_single_copyprivate() function variants. The OpenMP
			
 
				+<c>master</c> directive is supported as well using the
			
 
				+starpu_omp_master() function variants.
			
 
				+
			
 
				+\sa starpu_omp_master()
			
 
				+\sa starpu_omp_master_inline()
			
 
				+\sa starpu_omp_single()
			
 
				+\sa starpu_omp_single_inline()
			
 
				+\sa starpu_omp_single_copyprivate()
			
 
				+\sa starpu_omp_single_copyprivate_inline_begin()
			
 
				+\sa starpu_omp_single_copyprivate_inline_end()
			
 
				+
			
 
				+\section Task Tasks
			
 
				+
			
 
				+The SORS implements the necessary support of OpenMP 3.1 and OpenMP 4.0's
			
 
				+so-called explicit tasks, together with OpenMP 4.0's data dependency
			
 
				+management.
			
 
				+
			
 
				+\subsection OMPTask Explicit Tasks
			
 
				+Explicit OpenMP tasks are created with the SORS using the
			
 
				+starpu_omp_task_region() function. The implementation supports
			
 
				+<c>if</c>, <c>final</c>, <c>untied</c> and <c>mergeable</c> clauses
			
 
				+as defined in the OpenMP specification. Unless specified otherwise by
			
 
				+the appropriate clause(s), the created task may be executed by any
			
 
				+participating worker of the current parallel region.
			
 
				+
			
 
				+The current SORS implementation requires explicit tasks to be created
			
 
				+within the context of an active parallel region. In particular, an
			
 
				+explicit task cannot be created by the main thread outside of a parallel
			
 
				+region. Explicit OpenMP tasks created using starpu_omp_task_region() are
			
 
				+implemented as StarPU tasks with extended semantics, and may as such be
			
 
				+blocked and preempted by SORS routines.
			
 
				+
			
 
				+The current SORS implementation supports recursive explicit tasks
			
 
				+creation, to ensure compliance with the OpenMP specification. However,
			
 
				+it should be noted that StarPU is not designed nor optimized for
			
 
				+efficiently scheduling of recursive task applications.
			
 
				+
			
 
				+The code below shows how to create 4 explicit tasks within a parallel
			
 
				+region.
			
 
				+
			
 
				+\code{.c}
			
 
				+void task_region_g(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	pthread tid = pthread_self();
			
 
				+	int worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d: explicit task \"g\"\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	struct starpu_omp_task_region_attr attr;
			
 
				+
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0]  = task_region_g;
			
 
				+	attr.cl.where         = STARPU_CPU;
			
 
				+	attr.if_clause        = 1;
			
 
				+	attr.final_clause     = 0;
			
 
				+	attr.untied_clause    = 1;
			
 
				+	attr.mergeable_clause = 0;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+}
			
 
				+\endcode
			
 
				+
			
 
				+\sa struct starpu_omp_task_region_attr
			
 
				+\sa starpu_omp_task_region()
			
 
				+
			
 
				+\subsection DataDependencies Data Dependencies
			
 
				+The SORS implements inter-tasks data dependencies as specified in OpenMP
			
 
				+4.0. Data dependencies are expressed using regular StarPU data handles
			
 
				+(<c>starpu_data_handle_t</c>) plugged into the task's <c>attr.cl</c>
			
 
				+codelet. The family of starpu_vector_data_register() -like functions and the
			
 
				+starpu_data_lookup() function may be used to register a memory area and
			
 
				+to retrieve the current data handle associated with a pointer
			
 
				+respectively. The testcase <c>./tests/openmp/task_02.c</c> gives a
			
 
				+detailed example of using OpenMP 4.0 tasks dependencies with the SORS
			
 
				+implementation.
			
 
				+
			
 
				+Note: the OpenMP 4.0 specification only supports data dependencies
			
 
				+between sibling tasks, that is tasks created by the same implicit or
			
 
				+explicit parent task. The current SORS implementation also only supports data
			
 
				+dependencies between sibling tasks. Consequently the behaviour is
			
 
				+unspecified if dependencies are expressed beween tasks that have not
			
 
				+been created by the same parent task.
			
 
				+
			
 
				+\subsection TaskSyncs TaskWait and TaskGroup
			
 
				+The SORS implements both the <c>taskwait</c> and <c>taskgroup</c> OpenMP
			
 
				+task synchronization constructs specified in OpenMP 4.0, with the
			
 
				+starpu_omp_taskwait() and starpu_omp_taskgroup() functions respectively.
			
 
				+
			
 
				+An example of starpu_omp_taskwait() use, creating two explicit tasks and
			
 
				+waiting for their completion:
			
 
				+
			
 
				+\code{.c}
			
 
				+void task_region_g(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	printf("Hello, World!\n");
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	struct starpu_omp_task_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0]  = task_region_g;
			
 
				+	attr.cl.where         = STARPU_CPU;
			
 
				+	attr.if_clause        = 1;
			
 
				+	attr.final_clause     = 0;
			
 
				+	attr.untied_clause    = 1;
			
 
				+	attr.mergeable_clause = 0;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	starpu_omp_taskwait();
			
 
				+\endcode
			
 
				+
			
 
				+An example of starpu_omp_taskgroup() use, creating a task group of two explicit tasks:
			
 
				+
			
 
				+\code{.c}
			
 
				+void task_region_g(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	printf("Hello, World!\n");
			
 
				+}
			
 
				+
			
 
				+void taskgroup_f(void *arg)
			
 
				+{
			
 
				+	(void)arg;
			
 
				+	struct starpu_omp_task_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0]  = task_region_g;
			
 
				+	attr.cl.where         = STARPU_CPU;
			
 
				+	attr.if_clause        = 1;
			
 
				+	attr.final_clause     = 0;
			
 
				+	attr.untied_clause    = 1;
			
 
				+	attr.mergeable_clause = 0;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	starpu_omp_taskgroup(taskgroup_f, (void *)NULL);
			
 
				+}
			
 
				+\endcode
			
 
				+
			
 
				+\sa starpu_omp_task_region()
			
 
				+\sa starpu_omp_taskwait()
			
 
				+\sa starpu_omp_taskgroup()
			
 
				+\sa starpu_omp_taskgroup_inline_begin()
			
 
				+\sa starpu_omp_taskgroup_inline_end()
			
 
				+
			
 
				+\section Synchronization Synchronization Support
			
 
				+
			
 
				+The SORS implements objects and method to build common OpenMP
			
 
				+synchronization constructs.
			
 
				+
			
 
				+\subsection SimpleLock Simple Locks
			
 
				+
			
 
				+The SORS Simple Locks are opaque starpu_omp_lock_t objects enabling multiple
			
 
				+tasks to synchronize with each others, following the Simple Lock
			
 
				+constructs defined by the OpenMP specification. In accordance with such
			
 
				+specification, simple locks may not by acquired multiple times by the
			
 
				+same task, without being released in-between; otherwise, deadlocks may
			
 
				+result. Codes requiring the possibility to lock multiple times
			
 
				+recursively should use Nestable Locks (\ref NestableLock). Codes NOT
			
 
				+requiring the possibility to lock multiple times recursively should use
			
 
				+Simple Locks as they incur less processing overhead than Nestable Locks.
			
 
				+
			
 
				+\sa starpu_omp_lock_t
			
 
				+\sa starpu_omp_init_lock()
			
 
				+\sa starpu_omp_destroy_lock()
			
 
				+\sa starpu_omp_set_lock()
			
 
				+\sa starpu_omp_unset_lock()
			
 
				+\sa starpu_omp_test_lock()
			
 
				+
			
 
				+\subsection NestableLock Nestable Locks
			
 
				+
			
 
				+The SORS Nestable Locks are opaque starpu_omp_nest_lock_t objects enabling
			
 
				+multiple tasks to synchronize with each others, following the Nestable
			
 
				+Lock constructs defined by the OpenMP specification. In accordance with
			
 
				+such specification, nestable locks may by acquired multiple times
			
 
				+recursively by the same task without deadlocking. Nested locking and
			
 
				+unlocking operations must be well parenthesized at any time, otherwise
			
 
				+deadlock and/or undefined behaviour may occur.  Codes requiring the
			
 
				+possibility to lock multiple times recursively should use Nestable
			
 
				+Locks. Codes NOT requiring the possibility to lock multiple times
			
 
				+recursively should use Simple Locks (\ref SimpleLock) instead, as they
			
 
				+incur less processing overhead than Nestable Locks.
			
 
				+
			
 
				+\sa starpu_omp_nest_lock_t
			
 
				+\sa starpu_omp_init_nest_lock()
			
 
				+\sa starpu_omp_destroy_nest_lock()
			
 
				+\sa starpu_omp_set_nest_lock()
			
 
				+\sa starpu_omp_unset_nest_lock()
			
 
				+\sa starpu_omp_test_nest_lock()
			
 
				+
			
 
				+\subsection Critical Critical Sections
			
 
				+
			
 
				+The SORS implements support for OpenMP critical sections through the
			
 
				+family of starpu_omp_critical functions. Critical sections may optionally
			
 
				+be named. There is a single, common anonymous critical section. Mutual
			
 
				+exclusion only occur within the scope of single critical section, either
			
 
				+a named one or the anonymous one.
			
 
				+
			
 
				+\sa starpu_omp_critical()
			
 
				+\sa starpu_omp_critical_inline_begin()
			
 
				+\sa starpu_omp_critical_inline_end()
			
 
				+
			
 
				+\subsection Barrier Barriers
			
 
				+
			
 
				+The SORS provides the starpu_omp_barrier() function to implement
			
 
				+barriers over parallel region teams. In accordance with the OpenMP
			
 
				+specification, the starpu_omp_barrier() function waits for every
			
 
				+implicit task of the parallel region to reach the barrier and every
			
 
				+explicit task launched by the parallel region to complete, before
			
 
				+returning.
			
 
				+
			
 
				+\sa starpu_omp_barrier()
			
 
				+
			
 
				+*/
			
--- a/doc/doxygen/chapters/45files.doxy
+++ b/doc/doxygen/chapters/45files.doxy
@@ -26,6 +26,7 @@
 
				 \file starpu_hash.h
			
 
				 \file starpu_mic.h
			
 
				 \file starpu_opencl.h
			
 
				+\file starpu_openmp.h
			
 
				 \file starpu_perfmodel.h
			
 
				 \file starpu_profiling.h
			
 
				 \file starpu_rand.h
			
--- a/doc/doxygen/chapters/api/openmp_runtime_support.doxy
+++ b/doc/doxygen/chapters/api/openmp_runtime_support.doxy
@@ -0,0 +1,955 @@
 
				+/*
			
 
				+ * This file is part of the StarPU Handbook.
			
 
				+ * Copyright (C) 2014 Inria
			
 
				+ * See the file version.doxy for copying conditions.
			
 
				+ */
			
 
				+
			
 
				+/*! \defgroup API_OpenMP_Runtime_Support OpenMP Runtime Support
			
 
				+
			
 
				+\brief This section describes the interface provided for implementing OpenMP runtimes on top of StarPU.
			
 
				+
			
 
				+
			
 
				+\struct starpu_omp_lock_t
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Opaque Simple Lock object (\ref SimpleLock) for inter-task synchronization operations.
			
 
				+
			
 
				+\sa starpu_omp_init_lock()
			
 
				+\sa starpu_omp_destroy_lock()
			
 
				+\sa starpu_omp_set_lock()
			
 
				+\sa starpu_omp_unset_lock()
			
 
				+\sa starpu_omp_test_lock()
			
 
				+
			
 
				+\var starpu_omp_lock_t::internal
			
 
				+Is an opaque pointer for internal use.
			
 
				+
			
 
				+
			
 
				+\struct starpu_omp_nest_lock_t
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Opaque Nestable Lock object (\ref NestableLock) for inter-task synchronization operations.
			
 
				+
			
 
				+\sa starpu_omp_init_nest_lock()
			
 
				+\sa starpu_omp_destroy_nest_lock()
			
 
				+\sa starpu_omp_set_nest_lock()
			
 
				+\sa starpu_omp_unset_nest_lock()
			
 
				+\sa starpu_omp_test_nest_lock()
			
 
				+\var starpu_omp_nest_lock_t::internal
			
 
				+Is an opaque pointer for internal use.
			
 
				+
			
 
				+
			
 
				+\enum starpu_omp_sched_value
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Set of constants for selecting the for loop iteration scheduling algorithm
			
 
				+(\ref OMPFor) as defined by the OpenMP specification.
			
 
				+
			
 
				+\var starpu_omp_sched_value::starpu_omp_sched_undefined
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Undefined iteration scheduling algorithm.
			
 
				+
			
 
				+\var starpu_omp_sched_value::starpu_omp_sched_static
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+\b Static iteration scheduling algorithm.
			
 
				+
			
 
				+\var starpu_omp_sched_value::starpu_omp_sched_dynamic
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+\b Dynamic iteration scheduling algorithm.
			
 
				+
			
 
				+\var starpu_omp_sched_value::starpu_omp_sched_guided
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+\b Guided iteration scheduling algorithm.
			
 
				+
			
 
				+\var starpu_omp_sched_value::starpu_omp_sched_auto
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+\b Automatically choosen iteration scheduling algorithm.
			
 
				+
			
 
				+\var starpu_omp_sched_value::starpu_omp_sched_runtime
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Choice of iteration scheduling algorithm deferred at \b runtime.
			
 
				+
			
 
				+\sa starpu_omp_for()
			
 
				+\sa starpu_omp_for_inline_first()
			
 
				+\sa starpu_omp_for_inline_next()
			
 
				+\sa starpu_omp_for_alt()
			
 
				+\sa starpu_omp_for_inline_first_alt()
			
 
				+\sa starpu_omp_for_inline_next_alt()
			
 
				+
			
 
				+
			
 
				+\enum starpu_omp_proc_bind_value
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Set of constants for selecting the processor binding method, as defined in the
			
 
				+OpenMP specification.
			
 
				+
			
 
				+\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_undefined
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Undefined processor binding method.
			
 
				+
			
 
				+\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_false
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Team threads may be moved between places at any time.
			
 
				+
			
 
				+\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_true
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Team threads may not be moved between places.
			
 
				+
			
 
				+\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_master
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Assign every thread in the team to the same place as the \b master thread.
			
 
				+
			
 
				+\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_close
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Assign every thread in the team to a place \b close to the parent thread.
			
 
				+
			
 
				+\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_spread
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Assign team threads as a sparse distribution over the selected places.
			
 
				+
			
 
				+\sa starpu_omp_get_proc_bind()
			
 
				+
			
 
				+
			
 
				+\struct starpu_omp_parallel_region_attr
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Set of attributes used for creating a new parallel region.
			
 
				+
			
 
				+\sa starpu_omp_parallel_region()
			
 
				+
			
 
				+\var starpu_omp_parallel_region_attr::cl
			
 
				+
			
 
				+Is a ::starpu_codelet (\ref API_Codelet_And_Tasks) to use for the parallel region
			
 
				+implicit tasks. The codelet must provide a CPU implementation function.
			
 
				+
			
 
				+\var starpu_omp_parallel_region_attr::handles
			
 
				+
			
 
				+Is an array of zero or more ::starpu_data_handle_t data handle to be passed to
			
 
				+the parallel region implicit tasks.
			
 
				+
			
 
				+\var starpu_omp_parallel_region_attr::cl_arg
			
 
				+
			
 
				+Is an optional pointer to an inline argument to be passed to the region implicit tasks.
			
 
				+
			
 
				+\var starpu_omp_parallel_region_attr::cl_arg_size
			
 
				+
			
 
				+Is the size of the optional inline argument to be passed to the region implicit tasks, or 0 if unused.
			
 
				+
			
 
				+\var starpu_omp_parallel_region_attr::cl_arg_free
			
 
				+
			
 
				+Is a boolean indicating whether the optional inline argument should be automatically freed (true), or not (false).
			
 
				+
			
 
				+\var starpu_omp_parallel_region_attr::if_clause
			
 
				+
			
 
				+Is a boolean indicating whether the \b if clause of the corresponding <c>pragma
			
 
				+omp parallel</c> is true or false.
			
 
				+
			
 
				+\var starpu_omp_parallel_region_attr::num_threads
			
 
				+
			
 
				+Is an integer indicating the requested number of threads in the team of the
			
 
				+newly created parallel region, or 0 to let the runtime choose the number of
			
 
				+threads alone. This attribute may be ignored by the runtime system if the
			
 
				+requested number of threads is higher than the number of threads that the
			
 
				+runtime can create.
			
 
				+
			
 
				+\struct starpu_omp_task_region_attr
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Set of attributes used for creating a new task region.
			
 
				+
			
 
				+\sa starpu_omp_task_region()
			
 
				+
			
 
				+\var starpu_omp_task_region_attr::cl
			
 
				+
			
 
				+Is a ::starpu_codelet (\ref API_Codelet_And_Tasks) to use for the task region
			
 
				+explicit task. The codelet must provide a CPU implementation function or an
			
 
				+accelerator implementation for offloaded target regions.
			
 
				+
			
 
				+\var starpu_omp_task_region_attr::handles
			
 
				+
			
 
				+Is an array of zero or more ::starpu_data_handle_t data handle to be passed to
			
 
				+the task region explicit tasks.
			
 
				+
			
 
				+\var starpu_omp_task_region_attr::cl_arg
			
 
				+
			
 
				+Is an optional pointer to an inline argument to be passed to the region implicit tasks.
			
 
				+
			
 
				+\var starpu_omp_task_region_attr::cl_arg_size
			
 
				+
			
 
				+Is the size of the optional inline argument to be passed to the region implicit
			
 
				+tasks, or 0 if unused.
			
 
				+
			
 
				+\var starpu_omp_task_region_attr::cl_arg_free
			
 
				+
			
 
				+Is a boolean indicating whether the optional inline argument should be
			
 
				+automatically freed (true), or not (false).
			
 
				+
			
 
				+\var starpu_omp_task_region_attr::if_clause
			
 
				+
			
 
				+Is a boolean indicating whether the \b if clause of the corresponding <c>pragma
			
 
				+omp task</c> is true or false.
			
 
				+
			
 
				+\var starpu_omp_task_region_attr::final_clause
			
 
				+
			
 
				+Is a boolean indicating whether the \b final clause of the corresponding <c>pragma
			
 
				+omp task</c> is true or false.
			
 
				+
			
 
				+\var starpu_omp_task_region_attr::untied_clause
			
 
				+
			
 
				+Is a boolean indicating whether the \b untied clause of the corresponding <c>pragma
			
 
				+omp task</c> is true or false.
			
 
				+
			
 
				+\var starpu_omp_task_region_attr::mergeable_clause
			
 
				+
			
 
				+Is a boolean indicating whether the \b mergeable clause of the corresponding <c>pragma
			
 
				+omp task</c> is true or false.
			
 
				+
			
 
				+@name Initialisation
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+
			
 
				+\def STARPU_OPENMP
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This macro is defined when StarPU has been installed with OpenMP Runtime
			
 
				+support. It should be used in your code to detect the availability of
			
 
				+the runtime support for OpenMP.
			
 
				+
			
 
				+\fn int starpu_omp_init(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Initializes StarPU and its OpenMP Runtime support.
			
 
				+
			
 
				+\fn int starpu_omp_shutdown(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Shutdown StarPU and its OpenMP Runtime support.
			
 
				+
			
 
				+@name Parallel
			
 
				+\anchor ORS_Parallel
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+
			
 
				+\fn void starpu_omp_parallel_region(const struct starpu_omp_parallel_region_attr *attr)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Generates and launch an OpenMP parallel region and return after its
			
 
				+completion. \p attr specifies the attributes for the generated parallel region.
			
 
				+If this function is called from inside another, generating, parallel region, the
			
 
				+generated parallel region is nested within the generating parallel region.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp parallel</c>.
			
 
				+
			
 
				+\fn void starpu_omp_master(void (*f)(void *arg), void *arg)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Executes a function only on the master thread of the OpenMP
			
 
				+parallel region it is called from. When called from a thread that is not the
			
 
				+master of the parallel region it is called from, this function does nothing. \p
			
 
				+f is the function to be called. \p arg is an argument passed to function \p f.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp master</c>.
			
 
				+
			
 
				+\fn int starpu_omp_master_inline(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Determines whether the calling thread is the master of the OpenMP parallel region
			
 
				+it is called from or not.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp master</c> without code
			
 
				+outlining.
			
 
				+\return <c>!0</c> if called by the region's master thread.
			
 
				+\return <c>0</c> if not called by the region's master thread.
			
 
				+
			
 
				+@name Synchronization
			
 
				+\anchor ORS_Synchronization
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+
			
 
				+\fn void starpu_omp_barrier(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Waits until each participating thread of the innermost OpenMP parallel region
			
 
				+has reached the barrier and each explicit OpenMP task bound to this region has
			
 
				+completed its execution.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp barrier</c>.
			
 
				+
			
 
				+\fn void starpu_omp_critical(void (*f)(void *arg), void *arg, const char *name)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Waits until no other thread is executing within the context of the selected
			
 
				+critical section, then proceeds to the exclusive execution of a function within
			
 
				+the critical section. \p f is the function to be executed in the critical
			
 
				+section. \p arg is an argument passed to function \p f. \p name is the name of
			
 
				+the selected critical section. If <c>name == NULL</c>, the selected critical
			
 
				+section is the unique anonymous critical section.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp critical</c>.
			
 
				+
			
 
				+\fn void starpu_omp_critical_inline_begin(const char *name)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Waits until execution can proceed exclusively within the context of the
			
 
				+selected critical section. \p name is the name of the selected critical
			
 
				+section. If <c>name == NULL</c>, the selected critical section is the unique
			
 
				+anonymous critical section.
			
 
				+
			
 
				+This function together with #starpu_omp_critical_inline_end can be used to
			
 
				+implement <c>\#pragma omp critical</c> without code outlining.
			
 
				+
			
 
				+\fn void starpu_omp_critical_inline_end(const char *name)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Ends the exclusive execution within the context of the selected critical
			
 
				+section. \p name is the name of the selected critical section. If
			
 
				+<c>name==NULL</c>, the selected critical section is the unique anonymous
			
 
				+critical section.
			
 
				+
			
 
				+This function together with #starpu_omp_critical_inline_begin can be used to
			
 
				+implement <c>\#pragma omp critical</c> without code outlining.
			
 
				+
			
 
				+@name Worksharing
			
 
				+\anchor ORS_Worksharing
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+
			
 
				+\fn void starpu_omp_single(void (*f)(void *arg), void *arg, int nowait)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Ensures that a single participating thread of the innermost OpenMP parallel
			
 
				+region executes a function. \p f is the function to be executed by a single
			
 
				+thread. \p arg is an argument passed to function \p f. \p nowait is a flag
			
 
				+indicating whether an implicit barrier is requested after the single section
			
 
				+(<c>nowait==0</c>) or not (<c>nowait==!0</c>).
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp single</c>.
			
 
				+
			
 
				+\fn int starpu_omp_single_inline(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Decides whether the current thread is elected to run the following single
			
 
				+section among the participating threads of the innermost OpenMP parallel
			
 
				+region.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp single</c> without code
			
 
				+outlining.
			
 
				+\return <c>!0</c> if the calling thread has won the election.
			
 
				+\return <c>0</c> if the calling thread has lost the election.
			
 
				+
			
 
				+\fn void starpu_omp_single_copyprivate(void (*f)(void *arg, void *data, unsigned long long data_size), void *arg, void *data, unsigned long long data_size)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function executes \p f on a single task of the current parallel region
			
 
				+task, and then broadcast the contents of the memory block pointed by the
			
 
				+copyprivate pointer \p data and of size \p data_size to the corresponding \p
			
 
				+data pointed memory blocks of all the other participating region tasks. This
			
 
				+function can be used to implement <c>\#pragma omp single</c> with a copyprivate
			
 
				+clause.
			
 
				+
			
 
				+\sa starpu_omp_single_copyprivate_inline
			
 
				+\sa starpu_omp_single_copyprivate_inline_begin
			
 
				+\sa starpu_omp_single_copyprivate_inline_end
			
 
				+
			
 
				+\fn void *starpu_omp_single_copyprivate_inline_begin(void *data)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function elects one task among the tasks of the current parallel region
			
 
				+task to execute the following single section, and then broadcast the
			
 
				+copyprivate pointer \p data to all the other participating region tasks. This
			
 
				+function can be used to implement <c>\#pragma omp single</c> with a copyprivate
			
 
				+clause without code outlining.
			
 
				+
			
 
				+\sa starpu_omp_single_copyprivate_inline
			
 
				+\sa starpu_omp_single_copyprivate_inline_end
			
 
				+
			
 
				+\fn void starpu_omp_single_copyprivate_inline_end(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function completes the execution of a single section and returns the
			
 
				+broadcasted copyprivate pointer for tasks that lost the election and NULL for
			
 
				+the task that won the election. This function can be used to implement
			
 
				+<c>\#pragma omp single</c> with a copyprivate clause without code outlining.
			
 
				+
			
 
				+\return the copyprivate pointer for tasks that lost the election and therefore did not execute the code of the single section.
			
 
				+\return NULL for the task that won the election and executed the code of the single section.
			
 
				+
			
 
				+\sa starpu_omp_single_copyprivate_inline
			
 
				+\sa starpu_omp_single_copyprivate_inline_begin
			
 
				+
			
 
				+\fn void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Executes a parallel loop together with the other threads participating to the
			
 
				+innermost parallel region. \p f is the function to be executed iteratively. \p
			
 
				+arg is an argument passed to function \p f. \p nb_iterations is the number of
			
 
				+iterations to be performed by the parallel loop. \p chunk is the number of
			
 
				+consecutive iterations that should be affected to the same thread when
			
 
				+scheduling the loop workshares, it follows the semantics of the \c modifier
			
 
				+argument in OpenMP <c>\#pragma omp for</c> specification. \p schedule is the
			
 
				+scheduling mode according to the OpenMP specification. \p ordered is a flag
			
 
				+indicating whether the loop region may contain an ordered section
			
 
				+(<c>ordered==!0</c>) or not (<c>ordered==0</c>). \p nowait is a flag
			
 
				+indicating whether an implicit barrier is requested after the for section
			
 
				+(<c>nowait==0</c>) or not (<c>nowait==!0</c>).
			
 
				+
			
 
				+The function \p f will be called with arguments \p _first_i, the first iteration
			
 
				+to perform, \p _nb_i, the number of consecutive iterations to perform before
			
 
				+returning, \p arg, the free \p arg argument.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp for</c>.
			
 
				+
			
 
				+\fn int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Decides whether the current thread should start to execute a parallel loop
			
 
				+section. See #starpu_omp_for for the argument description.
			
 
				+
			
 
				+This function together with #starpu_omp_for_inline_next can be used to
			
 
				+implement <c>\#pragma omp for</c> without code outlining.
			
 
				+
			
 
				+\return <c>!0</c> if the calling thread participates to the loop region and
			
 
				+should execute a first chunk of iterations. In that case, \p *_first_i will be
			
 
				+set to the first iteration of the chunk to perform and \p *_nb_i will be set to
			
 
				+the number of iterations of the chunk to perform.
			
 
				+
			
 
				+\return <c>0</c> if the calling thread does not participate to the loop region
			
 
				+because all the available iterations have been affected to the other threads of
			
 
				+the parallel region.
			
 
				+
			
 
				+\sa starpu_omp_for
			
 
				+
			
 
				+\fn int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Decides whether the current thread should continue to execute a parallel loop
			
 
				+section. See #starpu_omp_for for the argument description.
			
 
				+
			
 
				+This function together with #starpu_omp_for_inline_first can be used to
			
 
				+implement <c>\#pragma omp for</c> without code outlining.
			
 
				+
			
 
				+\return <c>!0</c> if the calling thread should execute a next chunk of
			
 
				+iterations. In that case, \p *_first_i will be set to the first iteration of the
			
 
				+chunk to perform and \p *_nb_i will be set to the number of iterations of the
			
 
				+chunk to perform.
			
 
				+
			
 
				+\return <c>0</c> if the calling thread does not participate anymore to the loop
			
 
				+region because all the available iterations have been affected to the other
			
 
				+threads of the parallel region.
			
 
				+
			
 
				+\sa starpu_omp_for
			
 
				+
			
 
				+\fn void starpu_omp_for_alt(void (*f)(unsigned long long _begin_i, unsigned long long _end_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Alternative implementation of a parallel loop. This function differs from
			
 
				+#starpu_omp_for in the expected arguments of the loop function \c f.
			
 
				+
			
 
				+The function \p f will be called with arguments \p _begin_i, the first iteration
			
 
				+to perform, \p _end_i, the first iteration not to perform before
			
 
				+returning, \p arg, the free \p arg argument.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp for</c>.
			
 
				+
			
 
				+\sa starpu_omp_for
			
 
				+
			
 
				+\fn int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Inline version of the alternative implementation of a parallel loop.
			
 
				+
			
 
				+This function together with #starpu_omp_for_inline_next_alt can be used to
			
 
				+implement <c>\#pragma omp for</c> without code outlining.
			
 
				+
			
 
				+\sa starpu_omp_for
			
 
				+\sa starpu_omp_for_alt
			
 
				+\sa starpu_omp_for_inline_first
			
 
				+
			
 
				+\fn int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Inline version of the alternative implementation of a parallel loop.
			
 
				+
			
 
				+This function together with #starpu_omp_for_inline_first_alt can be used to
			
 
				+implement <c>\#pragma omp for</c> without code outlining.
			
 
				+
			
 
				+\sa starpu_omp_for
			
 
				+\sa starpu_omp_for_alt
			
 
				+\sa starpu_omp_for_inline_next
			
 
				+
			
 
				+\fn void starpu_omp_ordered(void (*f)(void *arg), void *arg)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Ensures that a function is sequentially executed once for each iteration in
			
 
				+order within a parallel loop, by the thread that own the iteration. \p f is the
			
 
				+function to be executed by the thread that own the current iteration. \p arg is
			
 
				+an argument passed to function \p f.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp ordered</c>.
			
 
				+
			
 
				+\fn void starpu_omp_ordered_inline_begin(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Waits until all the iterations of a parallel loop below the iteration owned by
			
 
				+the current thread have been executed.
			
 
				+
			
 
				+This function together with #starpu_omp_ordered_inline_end can be used to
			
 
				+implement <c>\#pragma omp ordered</c> without code code outlining.
			
 
				+
			
 
				+\fn void starpu_omp_ordered_inline_end(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Notifies that the ordered section for the current iteration has been completed.
			
 
				+
			
 
				+This function together with #starpu_omp_ordered_inline_begin can be used to
			
 
				+implement <c>\#pragma omp ordered</c> without code code outlining.
			
 
				+
			
 
				+\fn void starpu_omp_sections(unsigned long long nb_sections, void (**section_f)(void *arg), void **section_arg, int nowait)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Ensures that each function of a given array of functions is executed by one and
			
 
				+only one thread. \p nb_sections is the number of functions in the array \p
			
 
				+section_f. \p section_f is the array of functions to be executed as sections. \p
			
 
				+section_arg is an array of arguments to be passed to the corresponding function.
			
 
				+\p nowait is a flag indicating whether an implicit barrier is requested after
			
 
				+the execution of all the sections (<c>nowait==0</c>) or not (<c>nowait==!0</c>).
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp sections</c> and <c>\#pragma omp section</c>.
			
 
				+
			
 
				+\fn void starpu_omp_sections_combined(unsigned long long nb_sections, void (*section_f)(unsigned long long section_num, void *arg), void **section_arg, int nowait)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Alternative implementation of sections. This function differs from
			
 
				+#starpu_omp_sections in that all the sections are combined within a single
			
 
				+function in this version. \p section_f is the function implementing the combined
			
 
				+sections.
			
 
				+
			
 
				+The function \p section_f will be called with arguments \p section_num, the
			
 
				+section number to be executed, \p arg, the entry of \p section_arg corresponding
			
 
				+to this section.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp sections</c> and <c>\#pragma omp section</c>.
			
 
				+
			
 
				+\sa starpu_omp_sections
			
 
				+
			
 
				+@name Task
			
 
				+\anchor ORS_Task
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+
			
 
				+\fn void starpu_omp_task_region(const struct starpu_omp_task_region_attr *attr)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Generates an explicit child task. The execution of the generated task is
			
 
				+asynchronous with respect to the calling code unless specified otherwise.
			
 
				+\p attr specifies the attributes for the generated task region.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp task</c>.
			
 
				+
			
 
				+\fn void starpu_omp_taskwait(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Waits for the completion of the tasks generated by the current task. This
			
 
				+function does not wait for the descendants of the tasks generated by the current
			
 
				+task.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp taskwait</c>.
			
 
				+
			
 
				+\fn void starpu_omp_taskgroup(void (*f)(void *arg), void *arg)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Launches a function and wait for the completion of every descendant task
			
 
				+generated during the execution of the function.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp taskgroup</c>.
			
 
				+
			
 
				+\sa starpu_omp_taskgroup_inline_begin
			
 
				+\sa starpu_omp_taskgroup_inline_end
			
 
				+
			
 
				+\fn void starpu_omp_taskgroup_inline_begin(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Launches a function and gets ready to wait for the completion of every descendant task
			
 
				+generated during the dynamic scope of the taskgroup.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp taskgroup</c> without code outlining.
			
 
				+
			
 
				+\sa starpu_omp_taskgroup
			
 
				+\sa starpu_omp_taskgroup_inline_end
			
 
				+
			
 
				+\fn void starpu_omp_taskgroup_inline_end(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+Waits for the completion of every descendant task
			
 
				+generated during the dynamic scope of the taskgroup.
			
 
				+
			
 
				+This function can be used to implement <c>\#pragma omp taskgroup</c> without code outlining.
			
 
				+
			
 
				+\sa starpu_omp_taskgroup
			
 
				+\sa starpu_omp_taskgroup_inline_begin
			
 
				+
			
 
				+
			
 
				+@name API
			
 
				+\anchor ORS_API
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+
			
 
				+\fn void starpu_omp_set_num_threads(int threads)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function sets ICVS nthreads_var for the parallel regions to be created
			
 
				+with the current region.
			
 
				+
			
 
				+Note: The StarPU OpenMP runtime support currently ignores
			
 
				+this setting for nested parallel regions.
			
 
				+
			
 
				+\sa starpu_omp_get_num_threads
			
 
				+\sa starpu_omp_get_thread_num
			
 
				+\sa starpu_omp_get_max_threads
			
 
				+\sa starpu_omp_get_num_procs
			
 
				+
			
 
				+\fn int starpu_omp_get_num_threads()
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the number of threads of the current region.
			
 
				+
			
 
				+\return the number of threads of the current region.
			
 
				+
			
 
				+\sa starpu_omp_set_num_threads
			
 
				+\sa starpu_omp_get_thread_num
			
 
				+\sa starpu_omp_get_max_threads
			
 
				+\sa starpu_omp_get_num_procs
			
 
				+
			
 
				+\fn int starpu_omp_get_thread_num()
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the rank of the current thread among the threads
			
 
				+of the current region.
			
 
				+
			
 
				+\return the rank of the current thread in the current region.
			
 
				+
			
 
				+\sa starpu_omp_set_num_threads
			
 
				+\sa starpu_omp_get_num_threads
			
 
				+\sa starpu_omp_get_max_threads
			
 
				+\sa starpu_omp_get_num_procs
			
 
				+
			
 
				+\fn int starpu_omp_get_max_threads()
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the maximum number of threads that can be used to
			
 
				+create a region from the current region.
			
 
				+
			
 
				+\return the maximum number of threads that can be used to create a region from the current region.
			
 
				+
			
 
				+\sa starpu_omp_set_num_threads
			
 
				+\sa starpu_omp_get_num_threads
			
 
				+\sa starpu_omp_get_thread_num
			
 
				+\sa starpu_omp_get_num_procs
			
 
				+
			
 
				+\fn int starpu_omp_get_num_procs (void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the number of StarPU CPU workers.
			
 
				+
			
 
				+\return the number of StarPU CPU workers.
			
 
				+
			
 
				+\sa starpu_omp_set_num_threads
			
 
				+\sa starpu_omp_get_num_threads
			
 
				+\sa starpu_omp_get_thread_num
			
 
				+\sa starpu_omp_get_max_threads
			
 
				+
			
 
				+\fn int starpu_omp_in_parallel (void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns whether it is called from the scope of a parallel region or not.
			
 
				+
			
 
				+\return <c>!0</c> if called from a parallel region scope.
			
 
				+\return <c>0</c> otherwise.
			
 
				+
			
 
				+\fn void starpu_omp_set_dynamic (int dynamic_threads)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function enables (1) or disables (0) dynamically adjusting the number of parallel threads.
			
 
				+
			
 
				+Note: The StarPU OpenMP runtime support currently ignores the argument of this function.
			
 
				+
			
 
				+\sa starpu_omp_get_dynamic
			
 
				+
			
 
				+\fn int starpu_omp_get_dynamic (void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the state of dynamic thread number adjustment.
			
 
				+
			
 
				+\return <c>!0</c> if dynamic thread number adjustment is enabled.
			
 
				+\return <c>0</c> otherwise.
			
 
				+
			
 
				+\sa starpu_omp_set_dynamic
			
 
				+
			
 
				+\fn void starpu_omp_set_nested (int nested)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function enables (1) or disables (0) nested parallel regions.
			
 
				+
			
 
				+Note: The StarPU OpenMP runtime support currently ignores the argument of this function.
			
 
				+
			
 
				+\sa starpu_omp_get_nested
			
 
				+\sa starpu_omp_get_max_active_levels
			
 
				+\sa starpu_omp_set_max_active_levels
			
 
				+\sa starpu_omp_get_level
			
 
				+\sa starpu_omp_get_active_level
			
 
				+
			
 
				+\fn int starpu_omp_get_nested (void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns whether nested parallel sections are enabled or not.
			
 
				+
			
 
				+\return <c>!0</c> if nested parallel sections are enabled.
			
 
				+\return <c>0</c> otherwise.
			
 
				+
			
 
				+\sa starpu_omp_set_nested
			
 
				+\sa starpu_omp_get_max_active_levels
			
 
				+\sa starpu_omp_set_max_active_levels
			
 
				+\sa starpu_omp_get_level
			
 
				+\sa starpu_omp_get_active_level
			
 
				+
			
 
				+\fn int starpu_omp_get_cancellation(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the state of the cancel ICVS var.
			
 
				+
			
 
				+\fn void starpu_omp_set_schedule (enum starpu_omp_sched_value kind, int modifier)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function sets the default scheduling kind for upcoming loops within the
			
 
				+current parallel section. \p kind is the scheduler kind, \p modifier
			
 
				+complements the scheduler kind with informations such as the chunk size,
			
 
				+in accordance with the OpenMP specification.
			
 
				+
			
 
				+\sa starpu_omp_get_schedule
			
 
				+
			
 
				+\fn void starpu_omp_get_schedule (enum starpu_omp_sched_value *kind, int *modifier)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the current selected default loop scheduler.
			
 
				+
			
 
				+\return the kind and the modifier of the current default loop scheduler.
			
 
				+
			
 
				+\sa starpu_omp_set_schedule
			
 
				+
			
 
				+\fn int starpu_omp_get_thread_limit (void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the number of StarPU CPU workers.
			
 
				+
			
 
				+\return the number of StarPU CPU workers.
			
 
				+
			
 
				+\fn void starpu_omp_set_max_active_levels (int max_levels)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function sets the maximum number of allowed active parallel section levels.
			
 
				+
			
 
				+Note: The StarPU OpenMP runtime support currently ignores the argument of this function and assume \p max_levels equals <c>1</c> instead.
			
 
				+
			
 
				+\sa starpu_omp_set_nested
			
 
				+\sa starpu_omp_get_nested
			
 
				+\sa starpu_omp_get_max_active_levels
			
 
				+\sa starpu_omp_get_level
			
 
				+\sa starpu_omp_get_active_level
			
 
				+
			
 
				+\fn int starpu_omp_get_max_active_levels (void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the current maximum number of allowed active parallel section levels
			
 
				+
			
 
				+\return the current maximum number of allowed active parallel section levels.
			
 
				+
			
 
				+\sa starpu_omp_set_nested
			
 
				+\sa starpu_omp_get_nested
			
 
				+\sa starpu_omp_set_max_active_levels
			
 
				+\sa starpu_omp_get_level
			
 
				+\sa starpu_omp_get_active_level
			
 
				+
			
 
				+\fn int starpu_omp_get_level (void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the nesting level of the current parallel section.
			
 
				+
			
 
				+\return the nesting level of the current parallel section.
			
 
				+
			
 
				+\sa starpu_omp_set_nested
			
 
				+\sa starpu_omp_get_nested
			
 
				+\sa starpu_omp_get_max_active_levels
			
 
				+\sa starpu_omp_set_max_active_levels
			
 
				+\sa starpu_omp_get_active_level
			
 
				+
			
 
				+\fn int starpu_omp_get_ancestor_thread_num (int level)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the number of the ancestor of the current parallel section.
			
 
				+
			
 
				+\return the number of the ancestor of the current parallel section.
			
 
				+
			
 
				+\fn int starpu_omp_get_team_size (int level)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the size of the team of the current parallel section.
			
 
				+
			
 
				+\return the size of the team of the current parallel section.
			
 
				+
			
 
				+\fn int starpu_omp_get_active_level (void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the nestinglevel of the current innermost active parallel section.
			
 
				+
			
 
				+\return the nestinglevel of the current innermost active parallel section.
			
 
				+
			
 
				+\sa starpu_omp_set_nested
			
 
				+\sa starpu_omp_get_nested
			
 
				+\sa starpu_omp_get_max_active_levels
			
 
				+\sa starpu_omp_set_max_active_levels
			
 
				+\sa starpu_omp_get_level
			
 
				+
			
 
				+\fn int starpu_omp_in_final(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function checks whether the current task is final or not.
			
 
				+
			
 
				+\return <c>!0</c> if called from a final task.
			
 
				+\return <c>0</c> otherwise.
			
 
				+
			
 
				+\fn enum starpu_omp_proc_bind_value starpu_omp_get_proc_bind(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the proc_bind setting of the current parallel region.
			
 
				+
			
 
				+\return the proc_bind setting of the current parallel region.
			
 
				+
			
 
				+\fn void starpu_omp_set_default_device(int device_num)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function sets the number of the device to use as default.
			
 
				+
			
 
				+Note: The StarPU OpenMP runtime support currently ignores the argument of this function.
			
 
				+
			
 
				+\sa starpu_omp_get_default_device
			
 
				+\sa starpu_omp_is_initial_device
			
 
				+
			
 
				+\fn int starpu_omp_get_default_device(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the number of the device used as default.
			
 
				+
			
 
				+\return the number of the device used as default.
			
 
				+
			
 
				+\sa starpu_omp_set_default_device
			
 
				+\sa starpu_omp_is_initial_device
			
 
				+
			
 
				+\fn int starpu_omp_get_num_devices(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the number of the devices.
			
 
				+
			
 
				+\return the number of the devices.
			
 
				+
			
 
				+\fn int starpu_omp_get_num_teams(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the number of teams in the current teams region.
			
 
				+
			
 
				+\return the number of teams in the current teams region.
			
 
				+
			
 
				+\sa starpu_omp_get_num_teams
			
 
				+
			
 
				+\fn int starpu_omp_get_team_num(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the team number of the calling thread.
			
 
				+
			
 
				+\return the team number of the calling thread.
			
 
				+
			
 
				+\sa starpu_omp_get_num_teams
			
 
				+
			
 
				+\fn int starpu_omp_is_initial_device(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function checks whether the current device is the initial device or not.
			
 
				+
			
 
				+\return <c>!0</c> if called from the host device.
			
 
				+\return <c>0</c> otherwise.
			
 
				+
			
 
				+\sa starpu_omp_set_default_device
			
 
				+\sa starpu_omp_get_default_device
			
 
				+
			
 
				+\fn void starpu_omp_init_lock (starpu_omp_lock_t *lock)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function initializes an opaque lock object.
			
 
				+
			
 
				+\sa starpu_omp_destroy_lock
			
 
				+\sa starpu_omp_set_lock
			
 
				+\sa starpu_omp_unset_lock
			
 
				+\sa starpu_omp_test_lock
			
 
				+
			
 
				+\fn void starpu_omp_destroy_lock (starpu_omp_lock_t *lock)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function destroys an opaque lock object.
			
 
				+
			
 
				+\sa starpu_omp_init_lock
			
 
				+\sa starpu_omp_set_lock
			
 
				+\sa starpu_omp_unset_lock
			
 
				+\sa starpu_omp_test_lock
			
 
				+
			
 
				+\fn void starpu_omp_set_lock (starpu_omp_lock_t *lock)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function locks an opaque lock object. If the lock is already locked, the
			
 
				+function will block until it succeeds in exclusively acquiring the lock.
			
 
				+
			
 
				+\sa starpu_omp_init_lock
			
 
				+\sa starpu_omp_destroy_lock
			
 
				+\sa starpu_omp_unset_lock
			
 
				+\sa starpu_omp_test_lock
			
 
				+
			
 
				+\fn void starpu_omp_unset_lock (starpu_omp_lock_t *lock)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function unlocks a previously locked lock object. The behaviour of this
			
 
				+function is unspecified if it is called on an unlocked lock object.
			
 
				+
			
 
				+\sa starpu_omp_init_lock
			
 
				+\sa starpu_omp_destroy_lock
			
 
				+\sa starpu_omp_set_lock
			
 
				+\sa starpu_omp_test_lock
			
 
				+
			
 
				+\fn int starpu_omp_test_lock (starpu_omp_lock_t *lock)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function unblockingly attempts to lock a lock object and returns whether
			
 
				+it succeeded or not.
			
 
				+
			
 
				+\return <c>!0</c> if the function succeeded in acquiring the lock.
			
 
				+\return <c>0</c> if the lock was already locked.
			
 
				+
			
 
				+\sa starpu_omp_init_lock
			
 
				+\sa starpu_omp_destroy_lock
			
 
				+\sa starpu_omp_set_lock
			
 
				+\sa starpu_omp_unset_lock
			
 
				+
			
 
				+\fn void starpu_omp_init_nest_lock (starpu_omp_nest_lock_t *lock)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function initializes an opaque lock object supporting nested locking operations.
			
 
				+
			
 
				+\sa starpu_omp_destroy_nest_lock
			
 
				+\sa starpu_omp_set_nest_lock
			
 
				+\sa starpu_omp_unset_nest_lock
			
 
				+\sa starpu_omp_test_nest_lock
			
 
				+
			
 
				+\fn void starpu_omp_destroy_nest_lock (starpu_omp_nest_lock_t *lock)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function destroys an opaque lock object supporting nested locking operations.
			
 
				+
			
 
				+\sa starpu_omp_init_nest_lock
			
 
				+\sa starpu_omp_set_nest_lock
			
 
				+\sa starpu_omp_unset_nest_lock
			
 
				+\sa starpu_omp_test_nest_lock
			
 
				+
			
 
				+\fn void starpu_omp_set_nest_lock (starpu_omp_nest_lock_t *lock)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function locks an opaque lock object supporting nested locking operations.
			
 
				+If the lock is already locked by another task, the function will block until
			
 
				+it succeeds in exclusively acquiring the lock. If the lock is already taken by
			
 
				+the current task, the function will increase the nested locking level of the
			
 
				+lock object.
			
 
				+
			
 
				+\sa starpu_omp_init_nest_lock
			
 
				+\sa starpu_omp_destroy_nest_lock
			
 
				+\sa starpu_omp_unset_nest_lock
			
 
				+\sa starpu_omp_test_nest_lock
			
 
				+
			
 
				+\fn void starpu_omp_unset_nest_lock (starpu_omp_nest_lock_t *lock)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function unlocks a previously locked lock object supporting nested locking
			
 
				+operations. If the lock has been locked multiple times in nested fashion, the
			
 
				+nested locking level is decreased and the lock remains locked. Otherwise, if
			
 
				+the lock has only been locked once, it becomes unlocked. The behaviour of this
			
 
				+function is unspecified if it is called on an unlocked lock object. The
			
 
				+behaviour of this function is unspecified if it is called from a different task
			
 
				+than the one that locked the lock object.
			
 
				+
			
 
				+\sa starpu_omp_init_nest_lock
			
 
				+\sa starpu_omp_destroy_nest_lock
			
 
				+\sa starpu_omp_set_nest_lock
			
 
				+\sa starpu_omp_test_nest_lock
			
 
				+
			
 
				+\fn int starpu_omp_test_nest_lock (starpu_omp_nest_lock_t *lock)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function unblocking attempts to lock an opaque lock object supporting
			
 
				+nested locking operations and returns whether it succeeded or not. If the lock
			
 
				+is already locked by another task, the function will return without having
			
 
				+acquired the lock. If the lock is already taken by the current task, the
			
 
				+function will increase the nested locking level of the lock object.
			
 
				+
			
 
				+\return <c>!0</c> if the function succeeded in acquiring the lock.
			
 
				+\return <c>0</c> if the lock was already locked.
			
 
				+
			
 
				+\sa starpu_omp_init_nest_lock
			
 
				+\sa starpu_omp_destroy_nest_lock
			
 
				+\sa starpu_omp_set_nest_lock
			
 
				+\sa starpu_omp_unset_nest_lock
			
 
				+
			
 
				+\fn void starpu_omp_atomic_fallback_inline_begin(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function implements the entry point of a fallback global atomic region. It
			
 
				+blocks until it succeeds in acquiring exclusive access to the global atomic
			
 
				+region.
			
 
				+
			
 
				+\sa starpu_omp_atomic_fallback_inline_end
			
 
				+
			
 
				+\fn void starpu_omp_atomic_fallback_inline_end(void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function implements the exit point of a fallback global atomic region. It
			
 
				+release the exclusive access to the global atomic region.
			
 
				+
			
 
				+\sa starpu_omp_atomic_fallback_inline_begin
			
 
				+
			
 
				+\fn double starpu_omp_get_wtime (void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the elapsed wallclock time in seconds.
			
 
				+
			
 
				+\return the elapsed wallclock time in seconds.
			
 
				+
			
 
				+\sa starpu_omp_get_wtick
			
 
				+
			
 
				+\fn double starpu_omp_get_wtick (void)
			
 
				+\ingroup API_OpenMP_Runtime_Support
			
 
				+This function returns the precision of the time used by \p starpu_omp_get_wtime.
			
 
				+
			
 
				+\return the precision of the time used by \p starpu_omp_get_wtime.
			
 
				+
			
 
				+\sa starpu_omp_get_wtime
			
 
				+
			
 
				+*/
			
--- a/doc/doxygen/doxygen-config.cfg.in
+++ b/doc/doxygen/doxygen-config.cfg.in
@@ -36,6 +36,7 @@ INPUT                  = @top_srcdir@/doc/doxygen/chapters \
 
				 			 @top_srcdir@/include/starpu_hash.h \
			
 
				 			 @top_srcdir@/include/starpu_mic.h \
			
 
				 			 @top_srcdir@/include/starpu_opencl.h \
			
 
				+			 @top_srcdir@/include/starpu_openmp.h \
			
 
				 			 @top_srcdir@/include/starpu_perfmodel.h \
			
 
				 			 @top_srcdir@/include/starpu_profiling.h \
			
 
				 			 @top_srcdir@/include/starpu_rand.h \
			
--- a/doc/doxygen/doxygen.cfg
+++ b/doc/doxygen/doxygen.cfg
@@ -1623,6 +1623,7 @@ PREDEFINED             = STARPU_USE_OPENCL=1 \
 
				 			 STARPU_HAVE_HWLOC=1 \
			
 
				 			 STARPU_USE_SC_HYPERVISOR=1 \
			
 
				 			 STARPU_SIMGRID=1 \
			
 
				+			 STARPU_OPENMP=1 \
			
 
				                          __GCC__
			
 
				 
			
 
				 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
			
--- a/doc/doxygen/refman.tex
+++ b/doc/doxygen/refman.tex
@@ -154,6 +154,11 @@ Documentation License”.
 
				 \hypertarget{SimGridSupport}{}
			
 
				 \input{SimGridSupport}
			
 
				 
			
 
				+\chapter{OpenMP Runtime Support}
			
 
				+\label{OpenMPRuntimeSupport}
			
 
				+\hypertarget{OpenMPRuntimeSupport}{}
			
 
				+\input{OpenMPRuntimeSupport}
			
 
				+
			
 
				 \part{StarPU Reference API}
			
 
				 
			
 
				 \chapter{Execution Configuration Through Environment Variables}
			
@@ -194,6 +199,7 @@ Documentation License”.
 
				 \input{group__API__Theoretical__Lower__Bound__on__Execution__Time}
			
 
				 \input{group__API__CUDA__Extensions}
			
 
				 \input{group__API__OpenCL__Extensions}
			
 
				+\input{group__API__OpenMP__Runtime__Support}
			
 
				 \input{group__API__MIC__Extensions}
			
 
				 \input{group__API__SCC__Extensions}
			
 
				 \input{group__API__Miscellaneous__Helpers}
			
@@ -236,6 +242,7 @@ Documentation License”.
 
				 \input{starpu__hash_8h}
			
 
				 \input{starpu__mic_8h}
			
 
				 \input{starpu__opencl_8h}
			
 
				+\input{starpu__openmp_8h}
			
 
				 \input{starpu__perfmodel_8h}
			
 
				 \input{starpu__profiling_8h}
			
 
				 \input{starpu__rand_8h}
			
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -2,6 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2014  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010-2014  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2014  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -66,6 +67,7 @@ typedef UINT_PTR uintptr_t;
 
				 #include <starpu_fxt.h>
			
 
				 #include <starpu_driver.h>
			
 
				 #include <starpu_tree.h>
			
 
				+#include <starpu_openmp.h>
			
 
				 #include <starpu_simgrid_wrap.h>
			
 
				 
			
 
				 #ifdef __cplusplus
			
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -2,6 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2014  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2014  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -35,6 +36,8 @@
 
				 #undef STARPU_USE_MIC
			
 
				 #undef STARPU_USE_SCC
			
 
				 
			
 
				+#undef STARPU_OPENMP
			
 
				+
			
 
				 #undef STARPU_SIMGRID
			
 
				 #undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT
			
 
				 
			
--- a/include/starpu_openmp.h
+++ b/include/starpu_openmp.h
@@ -0,0 +1,169 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_OPENMP_H__
			
 
				+#define __STARPU_OPENMP_H__
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+#if defined STARPU_OPENMP
			
 
				+typedef struct { void *internal; } starpu_omp_lock_t;
			
 
				+typedef struct { void *internal; } starpu_omp_nest_lock_t;
			
 
				+
			
 
				+enum starpu_omp_sched_value
			
 
				+{
			
 
				+	starpu_omp_sched_undefined = 0,
			
 
				+	starpu_omp_sched_static    = 1,
			
 
				+	starpu_omp_sched_dynamic   = 2,
			
 
				+	starpu_omp_sched_guided    = 3,
			
 
				+	starpu_omp_sched_auto      = 4,
			
 
				+	starpu_omp_sched_runtime   = 5
			
 
				+};
			
 
				+
			
 
				+enum starpu_omp_proc_bind_value
			
 
				+{
			
 
				+	starpu_omp_proc_bind_undefined  = -1,
			
 
				+	starpu_omp_proc_bind_false  = 0,
			
 
				+	starpu_omp_proc_bind_true   = 1,
			
 
				+	starpu_omp_proc_bind_master = 2,
			
 
				+	starpu_omp_proc_bind_close  = 3,
			
 
				+	starpu_omp_proc_bind_spread = 4
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_parallel_region_attr
			
 
				+{
			
 
				+	struct starpu_codelet  cl;
			
 
				+	starpu_data_handle_t  *handles;
			
 
				+	void     *cl_arg;
			
 
				+	size_t    cl_arg_size;
			
 
				+	unsigned  cl_arg_free;
			
 
				+
			
 
				+	int if_clause;
			
 
				+	int num_threads;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_task_region_attr
			
 
				+{
			
 
				+	struct starpu_codelet  cl;
			
 
				+	starpu_data_handle_t  *handles;
			
 
				+	void     *cl_arg;
			
 
				+	size_t    cl_arg_size;
			
 
				+	unsigned  cl_arg_free;
			
 
				+
			
 
				+	int if_clause;
			
 
				+	int final_clause;
			
 
				+	int untied_clause;
			
 
				+	int mergeable_clause;
			
 
				+};
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#define __STARPU_OMP_NOTHROW throw ()
			
 
				+#else
			
 
				+#define __STARPU_OMP_NOTHROW __attribute__((__nothrow__))
			
 
				+#endif
			
 
				+
			
 
				+extern int starpu_omp_init(void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_shutdown(void) __STARPU_OMP_NOTHROW;
			
 
				+
			
 
				+extern void starpu_omp_parallel_region(const struct starpu_omp_parallel_region_attr *attr) __STARPU_OMP_NOTHROW;
			
 
				+
			
 
				+extern void starpu_omp_barrier(void) __STARPU_OMP_NOTHROW;
			
 
				+
			
 
				+extern void starpu_omp_master(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_master_inline(void) __STARPU_OMP_NOTHROW;
			
 
				+
			
 
				+extern void starpu_omp_single(void (*f)(void *arg), void *arg, int nowait) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_single_inline(void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_single_copyprivate(void (*f)(void *arg, void *data, unsigned long long data_size), void *arg, void *data, unsigned long long data_size) __STARPU_OMP_NOTHROW;
			
 
				+extern void *starpu_omp_single_copyprivate_inline_begin(void *data) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_single_copyprivate_inline_end(void) __STARPU_OMP_NOTHROW;
			
 
				+
			
 
				+extern void starpu_omp_critical(void (*f)(void *arg), void *arg, const char *name) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_critical_inline_begin(const char *name) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_critical_inline_end(const char *name) __STARPU_OMP_NOTHROW;
			
 
				+
			
 
				+extern void starpu_omp_task_region(const struct starpu_omp_task_region_attr *attr) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_taskwait(void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_taskgroup(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_taskgroup_inline_begin(void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_taskgroup_inline_end(void) __STARPU_OMP_NOTHROW;
			
 
				+
			
 
				+extern void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) __STARPU_OMP_NOTHROW;
			
 
				+
			
 
				+extern void starpu_omp_for_alt(void (*f)(unsigned long long _begin_i, unsigned long long _end_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) __STARPU_OMP_NOTHROW;
			
 
				+
			
 
				+extern void starpu_omp_ordered_inline_begin(void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_ordered_inline_end(void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_ordered(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW;
			
 
				+
			
 
				+extern void starpu_omp_sections(unsigned long long nb_sections, void (**section_f)(void *arg), void **section_arg, int nowait) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_sections_combined(unsigned long long nb_sections, void (*section_f)(unsigned long long section_num, void *arg), void *section_arg, int nowait) __STARPU_OMP_NOTHROW;
			
 
				+
			
 
				+extern void starpu_omp_set_num_threads(int threads) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_num_threads() __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_thread_num() __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_max_threads() __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_num_procs (void) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_in_parallel (void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_set_dynamic (int dynamic_threads) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_dynamic (void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_set_nested (int nested) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_nested (void) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_cancellation(void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_set_schedule (enum starpu_omp_sched_value kind, int modifier) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_get_schedule (enum starpu_omp_sched_value *kind, int *modifier) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_thread_limit (void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_set_max_active_levels (int max_levels) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_max_active_levels (void) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_level (void) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_ancestor_thread_num (int level) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_team_size (int level) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_active_level (void) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_in_final(void) __STARPU_OMP_NOTHROW;
			
 
				+extern enum starpu_omp_proc_bind_value starpu_omp_get_proc_bind(void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_set_default_device(int device_num) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_default_device(void) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_num_devices(void) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_num_teams(void) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_get_team_num(void) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_is_initial_device(void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_init_lock (starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_destroy_lock (starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_set_lock (starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_unset_lock (starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_test_lock (starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_init_nest_lock (starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_destroy_nest_lock (starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_set_nest_lock (starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_unset_nest_lock (starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW;
			
 
				+extern int starpu_omp_test_nest_lock (starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_atomic_fallback_inline_begin(void) __STARPU_OMP_NOTHROW;
			
 
				+extern void starpu_omp_atomic_fallback_inline_end(void) __STARPU_OMP_NOTHROW;
			
 
				+extern double starpu_omp_get_wtime (void) __STARPU_OMP_NOTHROW;
			
 
				+extern double starpu_omp_get_wtick (void) __STARPU_OMP_NOTHROW;
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* STARPU_USE_OPENMP && !STARPU_DONT_INCLUDE_OPENMP_HEADERS */
			
 
				+#endif /* __STARPU_OPENMP_H__ */
			
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -3,7 +3,7 @@
 
				  * Copyright (C) 2010-2014  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				- * Copyright (C) 2011  INRIA
			
 
				+ * Copyright (C) 2011, 2014  INRIA
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -62,6 +62,9 @@ enum starpu_task_status
 
				 	STARPU_TASK_BLOCKED_ON_TAG,
			
 
				 	STARPU_TASK_BLOCKED_ON_TASK,
			
 
				 	STARPU_TASK_BLOCKED_ON_DATA
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	, STARPU_TASK_STOPPED
			
 
				+#endif
			
 
				 };
			
 
				 
			
 
				 typedef uint64_t starpu_tag_t;
			
@@ -194,6 +197,9 @@ struct starpu_task
 
				 	struct starpu_task *prev;
			
 
				 	struct starpu_task *next;
			
 
				 	void *starpu_private;
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	struct starpu_omp_task *omp_task;
			
 
				+#endif
			
 
				 };
			
 
				 
			
 
				 #define STARPU_TASK_INITIALIZER 			\
			
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,7 +2,7 @@
 
				 #
			
 
				 # Copyright (C) 2009-2014  Université de Bordeaux 1
			
 
				 # Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				-# Copyright (C) 2011  INRIA
			
 
				+# Copyright (C) 2011, 2014  INRIA
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -127,6 +127,7 @@ noinst_HEADERS = 						\
 
				 	debug/traces/starpu_fxt.h				\
			
 
				 	profiling/bound.h					\
			
 
				 	profiling/profiling.h					\
			
 
				+	util/openmp_runtime_support.h				\
			
 
				 	util/starpu_task_insert_utils.h				\
			
 
				 	util/starpu_data_cpy.h					\
			
 
				 	starpu_parameters.h					\
			
@@ -225,6 +226,9 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 		\
 
				 	util/starpu_create_sync_task.c				\
			
 
				 	util/file.c						\
			
 
				 	util/misc.c						\
			
 
				+	util/openmp_runtime_support.c				\
			
 
				+	util/openmp_runtime_support_environment.c		\
			
 
				+	util/openmp_runtime_support_omp_api.c			\
			
 
				 	util/starpu_data_cpy.c					\
			
 
				 	util/starpu_task_insert.c				\
			
 
				 	util/starpu_task_insert_utils.c				\
			
--- a/src/core/dependencies/task_deps.c
+++ b/src/core/dependencies/task_deps.c
@@ -2,6 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2010-2013  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2014  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -67,7 +68,12 @@ void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, s
 
				 
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&job->sync_mutex);
			
 
				 	if (check)
			
 
				-		STARPU_ASSERT_MSG(!job->submitted || !task->destroy || task->detach, "Task dependencies have to be set before submission (submitted %u destroy %d detach %d)", job->submitted, task->destroy, task->detach);
			
 
				+		STARPU_ASSERT_MSG(
			
 
				+				!job->submitted || !task->destroy || task->detach
			
 
				+#ifdef STARPU_OPENMP
			
 
				+				|| job->continuation
			
 
				+#endif
			
 
				+				, "Task dependencies have to be set before submission (submitted %u destroy %d detach %d)", job->submitted, task->destroy, task->detach);
			
 
				 	else
			
 
				 		STARPU_ASSERT_MSG(job->terminated <= 1, "Task dependencies have to be set before termination (terminated %u)", job->terminated);
			
 
				 
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -3,7 +3,7 @@
 
				  * Copyright (C) 2009-2014  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				- * Copyright (C) 2011  INRIA
			
 
				+ * Copyright (C) 2011, 2014  INRIA
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -156,22 +156,70 @@ void _starpu_wait_job(struct _starpu_job *j)
 
				         _STARPU_LOG_OUT();
			
 
				 }
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+int _starpu_test_job_termination(struct _starpu_job *j)
			
 
				+{
			
 
				+	STARPU_ASSERT(j->task);
			
 
				+	STARPU_ASSERT(!j->task->detach);
			
 
				+	return (j->terminated == 2);
			
 
				+}
			
 
				+void _starpu_job_prepare_for_continuation_ext(struct _starpu_job *j, unsigned continuation_resubmit,
			
 
				+		void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg)
			
 
				+{
			
 
				+	STARPU_ASSERT(!j->continuation);
			
 
				+	/* continuation are not supported for parallel tasks for now */
			
 
				+	STARPU_ASSERT(j->task_size == 1);
			
 
				+	j->continuation = 1;
			
 
				+	j->continuation_resubmit = continuation_resubmit;
			
 
				+	j->continuation_callback_on_sleep = continuation_callback_on_sleep;
			
 
				+	j->continuation_callback_on_sleep_arg = continuation_callback_on_sleep_arg;
			
 
				+	j->job_successors.ndeps = 0;
			
 
				+}
			
 
				+/* Prepare a currently running job for accepting a new set of
			
 
				+ * dependencies in anticipation of becoming a continuation. */
			
 
				+void _starpu_job_prepare_for_continuation(struct _starpu_job *j)
			
 
				+{
			
 
				+	_starpu_job_prepare_for_continuation_ext(j, 1, NULL, NULL);
			
 
				+}
			
 
				+void _starpu_job_set_omp_cleanup_callback(struct _starpu_job *j,
			
 
				+		void (*omp_cleanup_callback)(void *arg), void *omp_cleanup_callback_arg)
			
 
				+{
			
 
				+	j->omp_cleanup_callback = omp_cleanup_callback;
			
 
				+	j->omp_cleanup_callback_arg = omp_cleanup_callback_arg;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 void _starpu_handle_job_termination(struct _starpu_job *j)
			
 
				 {
			
 
				 	struct starpu_task *task = j->task;
			
 
				 	unsigned sched_ctx = task->sched_ctx;
			
 
				 	int workerid = starpu_worker_get_id();
			
 
				 	double flops = task->flops;
			
 
				+	const unsigned continuation =
			
 
				+#ifdef STARPU_OPENMP
			
 
				+		j->continuation
			
 
				+#else
			
 
				+		0
			
 
				+#endif
			
 
				+		;
			
 
				+
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
			
 
				-	
			
 
				-	task->status = STARPU_TASK_FINISHED;
			
 
				-	
			
 
				-	/* We must have set the j->terminated flag early, so that it is
			
 
				-	 * possible to express task dependencies within the callback
			
 
				-	 * function. A value of 1 means that the codelet was executed but that
			
 
				-	 * the callback is not done yet. */
			
 
				-	j->terminated = 1;
			
 
				-		
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	if (continuation)
			
 
				+	{
			
 
				+		task->status = STARPU_TASK_STOPPED;
			
 
				+	}
			
 
				+	else
			
 
				+#endif
			
 
				+	{
			
 
				+		task->status = STARPU_TASK_FINISHED;
			
 
				+
			
 
				+		/* We must have set the j->terminated flag early, so that it is
			
 
				+		 * possible to express task dependencies within the callback
			
 
				+		 * function. A value of 1 means that the codelet was executed but that
			
 
				+		 * the callback is not done yet. */
			
 
				+		j->terminated = 1;
			
 
				+	}
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
			
 
				 
			
 
				 
			
@@ -180,7 +228,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 #endif //STARPU_USE_SC_HYPERVISOR
			
 
				 
			
 
				 	/* We release handle reference count */
			
 
				-	if (task->cl)
			
 
				+	if (task->cl && !continuation)
			
 
				 	{
			
 
				 		unsigned i;
			
 
				 		unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
			
@@ -202,13 +250,24 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 				_starpu_spin_unlock(&handle->header_lock);
			
 
				 		}
			
 
				 	}
			
 
				-	/* Tell other tasks that we don't exist any more, thus no need for
			
 
				-	 * implicit dependencies any more.  */
			
 
				-	_starpu_release_task_enforce_sequential_consistency(j);
			
 
				+	/* If this is a continuation, we do not release task dependencies now.
			
 
				+	 * Task dependencies will be released only when the continued task
			
 
				+	 * fully completes */
			
 
				+	if (!continuation)
			
 
				+	{
			
 
				+		/* Tell other tasks that we don't exist any more, thus no need for
			
 
				+		 * implicit dependencies any more.  */
			
 
				+		_starpu_release_task_enforce_sequential_consistency(j);
			
 
				+	}
			
 
				+
			
 
				 	/* Task does not have a cl, but has explicit data dependencies, we need
			
 
				 	 * to tell them that we will not exist any more before notifying the
			
 
				-	 * tasks waiting for us */
			
 
				-	if (j->implicit_dep_handle) {
			
 
				+	 * tasks waiting for us
			
 
				+	 *
			
 
				+	 * For continuations, implicit dependency handles are only released 
			
 
				+	 * when the task fully completes */
			
 
				+	if (j->implicit_dep_handle && !continuation)
			
 
				+	{
			
 
				 		starpu_data_handle_t handle = j->implicit_dep_handle;
			
 
				 		_starpu_release_data_enforce_sequential_consistency(j->task, &j->implicit_dep_slot, handle);
			
 
				 		/* Release reference taken while setting implicit_dep_handle */
			
@@ -218,46 +277,68 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 			_starpu_spin_unlock(&handle->header_lock);
			
 
				 	}
			
 
				 
			
 
				-	/* in case there are dependencies, wake up the proper tasks */
			
 
				-	_starpu_notify_dependencies(j);
			
 
				+	/* If this is a continuation, we do not notify task/tag dependencies
			
 
				+	 * now. Task/tag dependencies will be notified only when the continued
			
 
				+	 * task fully completes */
			
 
				+	if (!continuation)
			
 
				+	{
			
 
				+		/* in case there are dependencies, wake up the proper tasks */
			
 
				+		_starpu_notify_dependencies(j);
			
 
				+	}
			
 
				 
			
 
				-	/* the callback is executed after the dependencies so that we may remove the tag
			
 
				- 	 * of the task itself */
			
 
				-	if (task->callback_func)
			
 
				+	/* If this is a continuation, we do not execute the callback
			
 
				+	 * now. The callback will be executed only when the continued
			
 
				+	 * task fully completes */
			
 
				+	if (!continuation)
			
 
				 	{
			
 
				-		int profiling = starpu_profiling_status_get();
			
 
				-		if (profiling && task->profiling_info)
			
 
				-			_starpu_clock_gettime(&task->profiling_info->callback_start_time);
			
 
				+		/* the callback is executed after the dependencies so that we may remove the tag
			
 
				+		 * of the task itself */
			
 
				+		if (task->callback_func)
			
 
				+		{
			
 
				+			int profiling = starpu_profiling_status_get();
			
 
				+			if (profiling && task->profiling_info)
			
 
				+				_starpu_clock_gettime(&task->profiling_info->callback_start_time);
			
 
				 
			
 
				-		/* so that we can check whether we are doing blocking calls
			
 
				-		 * within the callback */
			
 
				-		_starpu_set_local_worker_status(STATUS_CALLBACK);
			
 
				+			/* so that we can check whether we are doing blocking calls
			
 
				+			 * within the callback */
			
 
				+			_starpu_set_local_worker_status(STATUS_CALLBACK);
			
 
				 
			
 
				 
			
 
				-		/* Perhaps we have nested callbacks (eg. with chains of empty
			
 
				-		 * tasks). So we store the current task and we will restore it
			
 
				-		 * later. */
			
 
				-		struct starpu_task *current_task = starpu_task_get_current();
			
 
				+			/* Perhaps we have nested callbacks (eg. with chains of empty
			
 
				+			 * tasks). So we store the current task and we will restore it
			
 
				+			 * later. */
			
 
				+			struct starpu_task *current_task = starpu_task_get_current();
			
 
				 
			
 
				-		_starpu_set_current_task(task);
			
 
				+			_starpu_set_current_task(task);
			
 
				 
			
 
				-		_STARPU_TRACE_START_CALLBACK(j);
			
 
				-		task->callback_func(task->callback_arg);
			
 
				-		_STARPU_TRACE_END_CALLBACK(j);
			
 
				+			_STARPU_TRACE_START_CALLBACK(j);
			
 
				+			task->callback_func(task->callback_arg);
			
 
				+			_STARPU_TRACE_END_CALLBACK(j);
			
 
				 
			
 
				-		_starpu_set_current_task(current_task);
			
 
				+			_starpu_set_current_task(current_task);
			
 
				 
			
 
				-		_starpu_set_local_worker_status(STATUS_UNKNOWN);
			
 
				+			_starpu_set_local_worker_status(STATUS_UNKNOWN);
			
 
				 
			
 
				-		if (profiling && task->profiling_info)
			
 
				-			_starpu_clock_gettime(&task->profiling_info->callback_end_time);
			
 
				+			if (profiling && task->profiling_info)
			
 
				+				_starpu_clock_gettime(&task->profiling_info->callback_end_time);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	/* If the job was executed on a combined worker there is no need for the
			
 
				 	 * scheduler to process it : the task structure doesn't contain any valuable
			
 
				 	 * data as it's not linked to an actual worker */
			
 
				 	/* control task should not execute post_exec_hook */
			
 
				-	if(j->task_size == 1 && task->cl != NULL && !j->internal)
			
 
				+	if(j->task_size == 1 && task->cl != NULL && !j->internal
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	/* If this is a continuation, we do not execute the post_exec_hook. The
			
 
				+	 * post_exec_hook will be run only when the continued task fully
			
 
				+	 * completes.
			
 
				+	 *
			
 
				+	 * Note: If needed, a specific hook could be added to handle stopped
			
 
				+	 * tasks */
			
 
				+	&& !continuation
			
 
				+#endif
			
 
				+			)
			
 
				 	{
			
 
				 		_starpu_sched_post_exec_hook(task);
			
 
				 #ifdef STARPU_USE_SC_HYPERVISOR
			
@@ -266,6 +347,9 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 
			
 
				 	}
			
 
				 
			
 
				+	/* Note: For now, we keep the TASK_DONE trace event for continuation,
			
 
				+	 * however we could add a specific event for stopped tasks if needed.
			
 
				+	 */
			
 
				 	_STARPU_TRACE_TASK_DONE(j);
			
 
				 
			
 
				 	/* NB: we do not save those values before the callback, in case the
			
@@ -278,9 +362,20 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 	/* we do not desallocate the job structure if some is going to
			
 
				 	 * wait after the task */
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
			
 
				-	/* A value of 2 is put to specify that not only the codelet but
			
 
				-	 * also the callback were executed. */
			
 
				-	j->terminated = 2;
			
 
				+	if (!continuation)
			
 
				+	{
			
 
				+#ifdef STARPU_OPENMP
			
 
				+		if (j->omp_cleanup_callback)
			
 
				+		{
			
 
				+			j->omp_cleanup_callback(j->omp_cleanup_callback_arg);
			
 
				+			j->omp_cleanup_callback = NULL;
			
 
				+			j->omp_cleanup_callback_arg = NULL;
			
 
				+		}
			
 
				+#endif
			
 
				+		/* A value of 2 is put to specify that not only the codelet but
			
 
				+		 * also the callback were executed. */
			
 
				+		j->terminated = 2;
			
 
				+	}
			
 
				 	STARPU_PTHREAD_COND_BROADCAST(&j->sync_cond);
			
 
				 
			
 
				 #ifdef HAVE_AYUDAME_H
			
@@ -289,7 +384,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
			
 
				 
			
 
				-	if (detach)
			
 
				+	if (detach && !continuation)
			
 
				 	{
			
 
				 		/* no one is going to synchronize with that task so we release
			
 
				 		 * the data structures now. In case the job was already locked
			
@@ -299,9 +394,12 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 			_starpu_task_destroy(task);
			
 
				 	}
			
 
				 
			
 
				-	if (regenerate)
			
 
				+	/* A continuation is not much different from a regenerated task. */
			
 
				+	if (regenerate || continuation)
			
 
				 	{
			
 
				-		STARPU_ASSERT_MSG(detach && !destroy && !task->synchronous, "Regenerated task must be detached (was %d), and not have detroy=1 (was %d) or synchronous=1 (was %d)", detach, destroy, task->synchronous);
			
 
				+		STARPU_ASSERT_MSG((detach && !destroy && !task->synchronous)
			
 
				+				|| continuation
			
 
				+				, "Regenerated task must be detached (was %d), and not have detroy=1 (was %d) or synchronous=1 (was %d)", detach, destroy, task->synchronous);
			
 
				 
			
 
				 #ifdef HAVE_AYUDAME_H
			
 
				 		if (AYU_event)
			
@@ -311,9 +409,28 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 		}
			
 
				 #endif
			
 
				 
			
 
				-		/* We reuse the same job structure */
			
 
				-		int ret = _starpu_submit_job(j);
			
 
				-		STARPU_ASSERT(!ret);
			
 
				+		{
			
 
				+#ifdef STARPU_OPENMP
			
 
				+			unsigned continuation_resubmit = j->continuation_resubmit;
			
 
				+			void (*continuation_callback_on_sleep)(void *arg) = j->continuation_callback_on_sleep;
			
 
				+			void *continuation_callback_on_sleep_arg = j->continuation_callback_on_sleep_arg;
			
 
				+			j->continuation_resubmit = 1;
			
 
				+			j->continuation_callback_on_sleep = NULL;
			
 
				+			j->continuation_callback_on_sleep_arg = NULL;
			
 
				+			if (!continuation || continuation_resubmit)
			
 
				+#endif
			
 
				+			{
			
 
				+				/* We reuse the same job structure */
			
 
				+				int ret = _starpu_submit_job(j);
			
 
				+				STARPU_ASSERT(!ret);
			
 
				+			}
			
 
				+#ifdef STARPU_OPENMP
			
 
				+			if (continuation && continuation_callback_on_sleep != NULL)
			
 
				+			{
			
 
				+				continuation_callback_on_sleep(continuation_callback_on_sleep_arg);
			
 
				+			}
			
 
				+#endif
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	_starpu_decrement_nsubmitted_tasks_of_sched_ctx(sched_ctx);
			
@@ -461,6 +578,29 @@ unsigned _starpu_enforce_deps_starting_from_task(struct _starpu_job *j)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+/* When waking up a continuation, we only enforce new task dependencies */
			
 
				+unsigned _starpu_reenforce_task_deps_and_schedule(struct _starpu_job *j)
			
 
				+{
			
 
				+	unsigned ret;
			
 
				+        _STARPU_LOG_IN();
			
 
				+	STARPU_ASSERT(j->discontinuous);
			
 
				+
			
 
				+	/* enfore task dependencies */
			
 
				+	if (_starpu_not_all_task_deps_are_fulfilled(j))
			
 
				+	{
			
 
				+		STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
			
 
				+		_STARPU_LOG_OUT_TAG("not_all_task_deps_are_fulfilled");
			
 
				+		return 0;
			
 
				+	}
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
			
 
				+	ret = _starpu_push_task(j);
			
 
				+
			
 
				+	_STARPU_LOG_OUT();
			
 
				+	return ret;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /* Ordered tasks are simply recorded as they arrive in the local_ordered_tasks
			
 
				  * ring buffer, indexed by order, and pulled from its head. */
			
 
				 /* TODO: replace with perhaps a heap */
			
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -3,6 +3,7 @@
 
				  * Copyright (C) 2009-2014  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				+ * Copyright (C) 2014  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -112,6 +113,36 @@ LIST_TYPE(_starpu_job,
 
				 	 */
			
 
				 	unsigned terminated:2;
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	/* Job is a continuation or a regular task. */
			
 
				+	unsigned continuation;
			
 
				+
			
 
				+	/* If 0, the prepared continuation is not resubmitted automatically
			
 
				+	 * when going to sleep, if 1, the prepared continuation is immediately
			
 
				+	 * resubmitted when going to sleep. */
			
 
				+	unsigned continuation_resubmit;
			
 
				+
			
 
				+	/* Callback function called when:
			
 
				+	 * - The continuation starpu task is ready to be submitted again if
			
 
				+	 *   continuation_resubmit = 0;
			
 
				+	 * - The continuation starpu task has just been re-submitted if
			
 
				+	 *   continuation_resubmit = 1. */
			
 
				+	void (*continuation_callback_on_sleep)(void *arg);
			
 
				+	void *continuation_callback_on_sleep_arg;
			
 
				+
			
 
				+	void (*omp_cleanup_callback)(void *arg);
			
 
				+	void *omp_cleanup_callback_arg;
			
 
				+
			
 
				+	/* Job has been stopped at least once. */
			
 
				+	unsigned discontinuous;
			
 
				+
			
 
				+	/* Cumulated execution time for discontinuous jobs */
			
 
				+	struct timespec cumulated_ts;
			
 
				+
			
 
				+	/* Cumulated power consumption for discontinuous jobs */
			
 
				+	double cumulated_power_consumed;
			
 
				+#endif
			
 
				+
			
 
				 	/* The value of the footprint that identifies the job may be stored in
			
 
				 	 * this structure. */
			
 
				 	uint32_t footprint;
			
@@ -167,13 +198,29 @@ int _starpu_job_finished(struct _starpu_job *j);
 
				 /* Wait for the termination of the job */
			
 
				 void _starpu_wait_job(struct _starpu_job *j);
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+/* Test for the termination of the job */
			
 
				+int _starpu_test_job_termination(struct _starpu_job *j);
			
 
				+
			
 
				+/* Prepare the job for accepting new dependencies before becoming a continuation. */
			
 
				+
			
 
				+void _starpu_job_prepare_for_continuation_ext(struct _starpu_job *j, unsigned continuation_resubmit,
			
 
				+		void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg);
			
 
				+void _starpu_job_prepare_for_continuation(struct _starpu_job *j);
			
 
				+void _starpu_job_set_omp_cleanup_callback(struct _starpu_job *j,
			
 
				+		void (*omp_cleanup_callback)(void *arg), void *omp_cleanup_callback_arg);
			
 
				+#endif
			
 
				+
			
 
				 /* Specify that the task should not appear in the DAG generated by debug tools. */
			
 
				 void _starpu_exclude_task_from_dag(struct starpu_task *task);
			
 
				 
			
 
				 /* try to submit job j, enqueue it if it's not schedulable yet. The job's sync mutex is supposed to be held already */
			
 
				 unsigned _starpu_enforce_deps_and_schedule(struct _starpu_job *j);
			
 
				 unsigned _starpu_enforce_deps_starting_from_task(struct _starpu_job *j);
			
 
				-
			
 
				+#ifdef STARPU_OPENMP
			
 
				+/* When waking up a continuation, we only enforce new task dependencies */
			
 
				+unsigned _starpu_reenforce_task_deps_and_schedule(struct _starpu_job *j);
			
 
				+#endif
			
 
				 
			
 
				 /* This function must be called after the execution of a job, this triggers all
			
 
				  * job's dependencies and perform the callback function if any. */
			
--- a/src/core/simgrid.c
+++ b/src/core/simgrid.c
@@ -27,6 +27,7 @@
 
				 #ifdef STARPU_SIMGRID
			
 
				 #include <msg/msg.h>
			
 
				 #include <smpi/smpif.h>
			
 
				+#include <sys/resource.h>
			
 
				 
			
 
				 #define STARPU_MPI_AS_PREFIX "StarPU-MPI"
			
 
				 
			
@@ -178,7 +179,12 @@ int main(int argc, char **argv)
 
				 #endif
			
 
				 	/* Simgrid uses tiny stacks by default.  This comes unexpected to our users.  */
			
 
				 	extern xbt_cfg_t _sg_cfg_set;
			
 
				-	xbt_cfg_set_int(_sg_cfg_set, "contexts/stack_size", 8192);
			
 
				+	unsigned stack_size = 8192;
			
 
				+	struct rlimit rlim;
			
 
				+	if (getrlimit(RLIMIT_STACK, &rlim) == 0 && rlim.rlim_cur != 0 && rlim.rlim_cur != RLIM_INFINITY)
			
 
				+		stack_size = rlim.rlim_cur / 1024;
			
 
				+
			
 
				+	xbt_cfg_set_int(_sg_cfg_set, "contexts/stack_size", stack_size);
			
 
				 
			
 
				 	/* Load XML platform */
			
 
				 	_starpu_simgrid_get_platform_path(path, sizeof(path));
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -3,7 +3,7 @@
 
				  * Copyright (C) 2009-2014  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				- * Copyright (C) 2011  INRIA
			
 
				+ * Copyright (C) 2011, 2014  INRIA
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -223,6 +223,33 @@ int starpu_task_wait(struct starpu_task *task)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+int _starpu_task_test_termination(struct starpu_task *task)
			
 
				+{
			
 
				+	STARPU_ASSERT(task);
			
 
				+	STARPU_ASSERT_MSG(!task->detach, "starpu_task_wait can only be called on tasks with detach = 0");
			
 
				+
			
 
				+	if (task->detach || task->synchronous)
			
 
				+	{
			
 
				+		_STARPU_DEBUG("Task is detached or synchronous\n");
			
 
				+		_STARPU_LOG_OUT_TAG("einval");
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
			
 
				+
			
 
				+	int ret = _starpu_test_job_termination(j);
			
 
				+
			
 
				+	if (ret)
			
 
				+	{
			
 
				+		if (task->destroy)
			
 
				+			_starpu_task_destroy(task);
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 struct _starpu_job *_starpu_get_job_associated_to_task(struct starpu_task *task)
			
 
				 {
			
 
				 	STARPU_ASSERT(task);
			
@@ -240,8 +267,13 @@ struct _starpu_job *_starpu_get_job_associated_to_task(struct starpu_task *task)
 
				  * already counted. */
			
 
				 int _starpu_submit_job(struct _starpu_job *j)
			
 
				 {
			
 
				-
			
 
				 	struct starpu_task *task = j->task;
			
 
				+	int ret;
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	const unsigned continuation = j->continuation;
			
 
				+#else
			
 
				+	const unsigned continuation = 0;
			
 
				+#endif
			
 
				 
			
 
				 	_STARPU_LOG_IN();
			
 
				 	/* notify bound computation of a new task */
			
@@ -282,7 +314,7 @@ int _starpu_submit_job(struct _starpu_job *j)
 
				 #endif//STARPU_USE_SC_HYPERVISOR
			
 
				 
			
 
				 	/* We retain handle reference count */
			
 
				-	if (task->cl)
			
 
				+	if (task->cl && !continuation)
			
 
				 	{
			
 
				 		unsigned i;
			
 
				 		unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
			
@@ -300,12 +332,29 @@ int _starpu_submit_job(struct _starpu_job *j)
 
				 	/* Need to atomically set submitted to 1 and check dependencies, since
			
 
				 	 * this is concucrent with _starpu_notify_cg */
			
 
				 	j->terminated = 0;
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	if (continuation)
			
 
				+	{
			
 
				+		j->discontinuous = 1;
			
 
				+		j->continuation  = 0;
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				 	if (!j->submitted)
			
 
				 		j->submitted = 1;
			
 
				 	else
			
 
				 		j->submitted = 2;
			
 
				 
			
 
				-	int ret = _starpu_enforce_deps_and_schedule(j);
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	if (continuation)
			
 
				+	{
			
 
				+		ret = _starpu_reenforce_task_deps_and_schedule(j);
			
 
				+	}
			
 
				+	else
			
 
				+#endif
			
 
				+	{
			
 
				+		ret = _starpu_enforce_deps_and_schedule(j);
			
 
				+	}
			
 
				 
			
 
				 	_STARPU_LOG_OUT();
			
 
				 	return ret;
			
@@ -414,11 +463,18 @@ int starpu_task_submit(struct starpu_task *task)
 
				 	int ret;
			
 
				 	unsigned is_sync = task->synchronous;
			
 
				 	starpu_task_bundle_t bundle = task->bundle;
			
 
				-
			
 
				 	/* internally, StarPU manipulates a struct _starpu_job * which is a wrapper around a
			
 
				 	* task structure, it is possible that this job structure was already
			
 
				 	* allocated. */
			
 
				 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
			
 
				+	const unsigned continuation =
			
 
				+#ifdef STARPU_OPENMP
			
 
				+		j->continuation
			
 
				+#else
			
 
				+		0
			
 
				+#endif
			
 
				+		;
			
 
				+
			
 
				 
			
 
				 	if (j->internal)
			
 
				 	{
			
@@ -483,7 +539,11 @@ int starpu_task_submit(struct starpu_task *task)
 
				 			return -ENODEV;
			
 
				 		}
			
 
				 
			
 
				-		_starpu_detect_implicit_data_deps(task);
			
 
				+		/* If this is a continuation, we don't modify the implicit data dependencies detected earlier. */
			
 
				+		if (!continuation)
			
 
				+		{
			
 
				+			_starpu_detect_implicit_data_deps(task);
			
 
				+		}
			
 
				 
			
 
				 		if (task->cl->model)
			
 
				 			_starpu_init_and_load_perfmodel(task->cl->model);
			
@@ -890,6 +950,31 @@ void _starpu_set_current_task(struct starpu_task *task)
 
				 	STARPU_PTHREAD_SETSPECIFIC(current_task_key, task);
			
 
				 }
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+/* Prepare the fields of the currentl task for accepting a new set of
			
 
				+ * dependencies in anticipation of becoming a continuation.
			
 
				+ *
			
 
				+ * When the task becomes 'continued', it will only be queued again when the new
			
 
				+ * set of dependencies is fulfilled. */
			
 
				+void _starpu_task_prepare_for_continuation(void)
			
 
				+{
			
 
				+	_starpu_job_prepare_for_continuation(_starpu_get_job_associated_to_task(starpu_task_get_current()));
			
 
				+}
			
 
				+
			
 
				+void _starpu_task_prepare_for_continuation_ext(unsigned continuation_resubmit,
			
 
				+		void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg)
			
 
				+{
			
 
				+	_starpu_job_prepare_for_continuation_ext(_starpu_get_job_associated_to_task(starpu_task_get_current()),
			
 
				+		continuation_resubmit, continuation_callback_on_sleep, continuation_callback_on_sleep_arg);
			
 
				+}
			
 
				+
			
 
				+void _starpu_task_set_omp_cleanup_callback(struct starpu_task *task, void (*omp_cleanup_callback)(void *arg), void *omp_cleanup_callback_arg)
			
 
				+{
			
 
				+	_starpu_job_set_omp_cleanup_callback(_starpu_get_job_associated_to_task(task),
			
 
				+		omp_cleanup_callback, omp_cleanup_callback_arg);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				  * Returns 0 if tasks does not use any multiformat handle, 1 otherwise.
			
 
				  */
			
--- a/src/core/task.h
+++ b/src/core/task.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2013  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
			
 
				- * Copyright (C) 2011 INRIA
			
 
				+ * Copyright (C) 2011, 2014 INRIA
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -26,6 +26,12 @@
 
				 /* Internal version of starpu_task_destroy: don't check task->destroy flag */
			
 
				 void _starpu_task_destroy(struct starpu_task *task);
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+/* Test for the termination of the task.
			
 
				+ * Call starpu_task_destroy if required and the task is terminated. */
			
 
				+int _starpu_task_test_termination(struct starpu_task *task);
			
 
				+#endif
			
 
				+
			
 
				 /* A pthread key is used to store the task currently executed on the thread.
			
 
				  * _starpu_initialize_current_task_key initializes this pthread key and
			
 
				  * _starpu_set_current_task updates its current value. */
			
@@ -53,6 +59,17 @@ int
 
				 _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
			
 
				 				     enum starpu_node_kind node_kind);
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+/* Prepare the current task for accepting new dependencies before becoming a continuation. */
			
 
				+void _starpu_task_prepare_for_continuation_ext(unsigned continuation_resubmit,
			
 
				+		void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg);
			
 
				+
			
 
				+void _starpu_task_prepare_for_continuation(void);
			
 
				+
			
 
				+void _starpu_task_set_omp_cleanup_callback(struct starpu_task *task, void (*omp_cleanup_callback)(void *arg),
			
 
				+		void *omp_cleanup_callback_arg);
			
 
				+#endif
			
 
				+
			
 
				 int _starpu_task_uses_multiformat_handles(struct starpu_task *task);
			
 
				 
			
 
				 int _starpu_task_submit_conversion_task(struct starpu_task *task,
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -983,6 +983,9 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 
				 
			
 
				 	int ret;
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	_starpu_omp_dummy_init();
			
 
				+#endif
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	_starpu_simgrid_init();
			
 
				 #else
			
@@ -1382,6 +1385,9 @@ void starpu_shutdown(void)
 
				 	/* Drop all remaining tags */
			
 
				 	_starpu_tag_clear();
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	_starpu_omp_dummy_shutdown();
			
 
				+#endif
			
 
				 	_starpu_close_debug_logfile();
			
 
				 
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -2,6 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2014  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2014  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -867,6 +868,9 @@ enomem:
 
				 
			
 
				 void _starpu_push_task_output(struct _starpu_job *j)
			
 
				 {
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	STARPU_ASSERT(!j->continuation);
			
 
				+#endif
			
 
				 	_STARPU_TRACE_START_PUSH_OUTPUT(NULL);
			
 
				 
			
 
				 	int profiling = starpu_profiling_status_get();
			
--- a/src/datawizard/coherency.h
+++ b/src/datawizard/coherency.h
@@ -2,6 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2014  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2014  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -217,6 +218,10 @@ struct _starpu_data_state
 
				 
			
 
				 	unsigned lazy_unregister;
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	unsigned removed_from_context_hash;
			
 
				+#endif
			
 
				+
			
 
				         /* Used for MPI */
			
 
				         int rank;
			
 
				 	int tag;
			
--- a/src/datawizard/interfaces/data_interface.c
+++ b/src/datawizard/interfaces/data_interface.c
@@ -2,6 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2014  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2014  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -24,6 +25,9 @@
 
				 #include <core/task.h>
			
 
				 #include <core/workers.h>
			
 
				 #include <datawizard/memstats.h>
			
 
				+#ifdef STARPU_OPENMP
			
 
				+#include <util/openmp_runtime_support.h>
			
 
				+#endif
			
 
				 
			
 
				 /* Entry in the `registered_handles' hash table.  */
			
 
				 struct handle_entry
			
@@ -90,6 +94,34 @@ void _starpu_data_interface_shutdown()
 
				 	registered_tag_handles = NULL;
			
 
				 }
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+void _starpu_omp_unregister_region_handles(struct starpu_omp_region *region)
			
 
				+{
			
 
				+	_starpu_spin_lock(&region->registered_handles_lock);
			
 
				+	struct handle_entry *entry, *tmp;
			
 
				+	HASH_ITER(hh, (region->registered_handles), entry, tmp)
			
 
				+	{
			
 
				+		entry->handle->removed_from_context_hash = 1;
			
 
				+		HASH_DEL(region->registered_handles, entry);
			
 
				+		starpu_data_unregister_submit(entry->handle);
			
 
				+		free(entry);
			
 
				+	}
			
 
				+	_starpu_spin_unlock(&region->registered_handles_lock);
			
 
				+}
			
 
				+
			
 
				+void _starpu_omp_unregister_task_handles(struct starpu_omp_task *task)
			
 
				+{
			
 
				+	struct handle_entry *entry, *tmp;
			
 
				+	HASH_ITER(hh, task->registered_handles, entry, tmp)
			
 
				+	{
			
 
				+		entry->handle->removed_from_context_hash = 1;
			
 
				+		HASH_DEL(task->registered_handles, entry);
			
 
				+		starpu_data_unregister_submit(entry->handle);
			
 
				+		free(entry);
			
 
				+	}
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id)
			
 
				 {
			
 
				 	switch (interface_id)
			
@@ -136,26 +168,80 @@ void _starpu_data_register_ram_pointer(starpu_data_handle_t handle, void *ptr)
 
				 	entry->pointer = ptr;
			
 
				 	entry->handle = handle;
			
 
				 
			
 
				-	_starpu_spin_lock(&registered_handles_lock);
			
 
				-	HASH_ADD_PTR(registered_handles, pointer, entry);
			
 
				-	_starpu_spin_unlock(&registered_handles_lock);
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	struct starpu_omp_task *task = _starpu_omp_get_task();
			
 
				+	if (task)
			
 
				+	{
			
 
				+		if (task->is_implicit)
			
 
				+		{
			
 
				+			struct starpu_omp_region *parallel_region = task->owner_region;
			
 
				+			_starpu_spin_lock(&parallel_region->registered_handles_lock);
			
 
				+			HASH_ADD_PTR(parallel_region->registered_handles, pointer, entry);
			
 
				+			_starpu_spin_unlock(&parallel_region->registered_handles_lock);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			HASH_ADD_PTR(task->registered_handles, pointer, entry);
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+#endif
			
 
				+	{
			
 
				+		_starpu_spin_lock(&registered_handles_lock);
			
 
				+		HASH_ADD_PTR(registered_handles, pointer, entry);
			
 
				+		_starpu_spin_unlock(&registered_handles_lock);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 starpu_data_handle_t starpu_data_lookup(const void *ptr)
			
 
				 {
			
 
				 	starpu_data_handle_t result;
			
 
				 
			
 
				-	_starpu_spin_lock(&registered_handles_lock);
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	struct starpu_omp_task *task = _starpu_omp_get_task();
			
 
				+	if (task)
			
 
				 	{
			
 
				-		struct handle_entry *entry;
			
 
				+		if (task->is_implicit)
			
 
				+		{
			
 
				+			struct starpu_omp_region *parallel_region = task->owner_region;
			
 
				+			_starpu_spin_lock(&parallel_region->registered_handles_lock);
			
 
				+			{
			
 
				+				struct handle_entry *entry;
			
 
				 
			
 
				-		HASH_FIND_PTR(registered_handles, &ptr, entry);
			
 
				-		if(STARPU_UNLIKELY(entry == NULL))
			
 
				-			result = NULL;
			
 
				+				HASH_FIND_PTR(parallel_region->registered_handles, &ptr, entry);
			
 
				+				if(STARPU_UNLIKELY(entry == NULL))
			
 
				+					result = NULL;
			
 
				+				else
			
 
				+					result = entry->handle;
			
 
				+			}
			
 
				+			_starpu_spin_unlock(&parallel_region->registered_handles_lock);
			
 
				+		}
			
 
				 		else
			
 
				-			result = entry->handle;
			
 
				+		{
			
 
				+			struct handle_entry *entry;
			
 
				+
			
 
				+			HASH_FIND_PTR(task->registered_handles, &ptr, entry);
			
 
				+			if(STARPU_UNLIKELY(entry == NULL))
			
 
				+				result = NULL;
			
 
				+			else
			
 
				+				result = entry->handle;
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+#endif
			
 
				+	{
			
 
				+		_starpu_spin_lock(&registered_handles_lock);
			
 
				+		{
			
 
				+			struct handle_entry *entry;
			
 
				+
			
 
				+			HASH_FIND_PTR(registered_handles, &ptr, entry);
			
 
				+			if(STARPU_UNLIKELY(entry == NULL))
			
 
				+				result = NULL;
			
 
				+			else
			
 
				+				result = entry->handle;
			
 
				+		}
			
 
				+		_starpu_spin_unlock(&registered_handles_lock);
			
 
				 	}
			
 
				-	_starpu_spin_unlock(&registered_handles_lock);
			
 
				 
			
 
				 	return result;
			
 
				 }
			
@@ -519,21 +605,46 @@ struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_hand
 
				 void _starpu_data_unregister_ram_pointer(starpu_data_handle_t handle)
			
 
				 {
			
 
				 	const void *ram_ptr = starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM);
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	if (handle->removed_from_context_hash)
			
 
				+		return;
			
 
				+#endif
			
 
				 	if (ram_ptr != NULL)
			
 
				 	{
			
 
				 		/* Remove the PTR -> HANDLE mapping.  If a mapping from PTR
			
 
				 		 * to another handle existed before (e.g., when using
			
 
				 		 * filters), it becomes visible again.  */
			
 
				 		struct handle_entry *entry;
			
 
				+#ifdef STARPU_OPENMP
			
 
				+		struct starpu_omp_task *task = _starpu_omp_get_task();
			
 
				+		if (task)
			
 
				+		{
			
 
				+			if (task->is_implicit)
			
 
				+			{
			
 
				+				struct starpu_omp_region *parallel_region = task->owner_region;
			
 
				+				_starpu_spin_lock(&parallel_region->registered_handles_lock);
			
 
				+				HASH_FIND_PTR(parallel_region->registered_handles, &ram_ptr, entry);
			
 
				+				STARPU_ASSERT(entry != NULL);
			
 
				+				HASH_DEL(registered_handles, entry);
			
 
				+				_starpu_spin_unlock(&parallel_region->registered_handles_lock);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				HASH_FIND_PTR(task->registered_handles, &ram_ptr, entry);
			
 
				+				STARPU_ASSERT(entry != NULL);
			
 
				+				HASH_DEL(task->registered_handles, entry);
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+#endif
			
 
				+		{
			
 
				 
			
 
				-		_starpu_spin_lock(&registered_handles_lock);
			
 
				-		HASH_FIND_PTR(registered_handles, &ram_ptr, entry);
			
 
				-		STARPU_ASSERT(entry != NULL);
			
 
				-
			
 
				-		HASH_DEL(registered_handles, entry);
			
 
				-
			
 
				-		_starpu_spin_unlock(&registered_handles_lock);
			
 
				-
			
 
				+			_starpu_spin_lock(&registered_handles_lock);
			
 
				+			HASH_FIND_PTR(registered_handles, &ram_ptr, entry);
			
 
				+			STARPU_ASSERT(entry != NULL);
			
 
				+			HASH_DEL(registered_handles, entry);
			
 
				+			_starpu_spin_unlock(&registered_handles_lock);
			
 
				+		}
			
 
				 		free(entry);
			
 
				 	}
			
 
				 }
			
--- a/src/datawizard/interfaces/data_interface.h
+++ b/src/datawizard/interfaces/data_interface.h
@@ -2,6 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2012, 2014  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2014  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -20,6 +21,10 @@
 
				 
			
 
				 #include <starpu.h>
			
 
				 #include <common/config.h>
			
 
				+#include <common/uthash.h>
			
 
				+#ifdef STARPU_OPENMP
			
 
				+#include <util/openmp_runtime_support.h>
			
 
				+#endif
			
 
				 
			
 
				 /* Generic type representing an interface, for now it's only used before
			
 
				  * execution on message-passing devices but it can be useful in other cases.
			
@@ -57,6 +62,11 @@ extern void _starpu_data_interface_init(void) STARPU_ATTRIBUTE_INTERNAL;
 
				 extern int _starpu_data_check_not_busy(starpu_data_handle_t handle) STARPU_ATTRIBUTE_INTERNAL;
			
 
				 extern void _starpu_data_interface_shutdown(void) STARPU_ATTRIBUTE_INTERNAL;
			
 
				 
			
 
				+#ifdef STARPU_OPENMP
			
 
				+void _starpu_omp_unregister_region_handles(struct starpu_omp_region *region);
			
 
				+void _starpu_omp_unregister_task_handles(struct starpu_omp_task *task);
			
 
				+#endif
			
 
				+
			
 
				 struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id);
			
 
				 
			
 
				 extern void _starpu_data_register_ram_pointer(starpu_data_handle_t handle,
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -4,6 +4,7 @@
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010-2014  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				+ * Copyright (C) 2014  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -57,10 +58,17 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 
				 
			
 
				 	struct starpu_task *task = j->task;
			
 
				 	struct starpu_codelet *cl = task->cl;
			
 
				+#ifdef STARPU_OPENMP
			
 
				+	/* At this point, j->continuation as been cleared as the task is being
			
 
				+	 * woken up, thus we use j->discontinuous instead for the check */
			
 
				+	const unsigned continuation_wake_up = j->discontinuous;
			
 
				+#else
			
 
				+	const unsigned continuation_wake_up = 0;
			
 
				+#endif
			
 
				 
			
 
				 	STARPU_ASSERT(cl);
			
 
				 
			
 
				-	if (rank == 0)
			
 
				+	if (rank == 0 && !continuation_wake_up)
			
 
				 	{
			
 
				 		ret = _starpu_fetch_task_input(j);
			
 
				 		if (ret != 0)
			
@@ -116,7 +124,12 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 
				 	{
			
 
				 		_starpu_driver_update_job_feedback(j, cpu_args,
			
 
				 				perf_arch, &codelet_start, &codelet_end, profiling);
			
 
				-		_starpu_push_task_output(j);
			
 
				+#ifdef STARPU_OPENMP
			
 
				+		if (!j->continuation)
			
 
				+#endif
			
 
				+		{
			
 
				+			_starpu_push_task_output(j);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -333,7 +333,17 @@ static void deinit_context(struct _starpu_worker_set *worker_set)
 
				 
			
 
				 	/* cleanup the runtime API internal stuffs (which CUBLAS is using) */
			
 
				 	cures = cudaThreadExit();
			
 
				-	if (cures)
			
 
				+	if (cures
			
 
				+#ifdef STARPU_OPENMP
			
 
				+		/* When StarPU is used as Open Runtime support,
			
 
				+		 * starpu_omp_shutdown() will usually be called from a
			
 
				+		 * destructor, in which case cudaThreadExit() reports a
			
 
				+		 * cudaErrorCudartUnloading here. There should not
			
 
				+		 * be any remaining tasks running at this point so
			
 
				+		 * we can probably ignore it without much consequences. */
			
 
				+		&& cures != cudaErrorCudartUnloading
			
 
				+#endif /* STARPU_OPENMP */
			
 
				+	)
			
 
				 		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 }
			
 
				 #endif /* !SIMGRID */
			
--- a/src/drivers/driver_common/driver_common.c
+++ b/src/drivers/driver_common/driver_common.c
@@ -3,6 +3,7 @@
 
				  * Copyright (C) 2010-2014  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				+ * Copyright (C) 2014  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -205,9 +206,40 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 
				 		}
			
 
				 
			
 
				 		if (calibrate_model)
			
 
				-			_starpu_update_perfmodel_history(j, j->task->cl->model,  perf_arch, worker->devid, measured,j->nimpl);
			
 
				-
			
 
				-
			
 
				+		{
			
 
				+#ifdef STARPU_OPENMP
			
 
				+			double time_consumed = measured;
			
 
				+			unsigned do_update_time_model;
			
 
				+			if (j->continuation)
			
 
				+			{
			
 
				+				/* The job is only paused, thus we accumulate
			
 
				+				 * its timing, but we don't update its
			
 
				+				 * perfmodel now. */
			
 
				+				starpu_timespec_accumulate(&j->cumulated_ts, &measured_ts);
			
 
				+				do_update_time_model = 0;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				if (j->discontinuous)
			
 
				+				{
			
 
				+					/* The job was paused at least once but is now
			
 
				+					 * really completing. We need to take into
			
 
				+					 * account its past execution time in its
			
 
				+					 * perfmodel. */
			
 
				+					starpu_timespec_accumulate(&measured_ts, &j->cumulated_ts);
			
 
				+					time_consumed = starpu_timing_timespec_to_us(&measured_ts);
			
 
				+				}
			
 
				+				do_update_time_model = 1;
			
 
				+			}
			
 
				+#else
			
 
				+			const unsigned do_update_time_model = 1;
			
 
				+			const double time_consumed = measured;
			
 
				+#endif
			
 
				+			if (do_update_time_model)
			
 
				+			{
			
 
				+				_starpu_update_perfmodel_history(j, j->task->cl->model, perf_arch, worker->devid, time_consumed, j->nimpl);
			
 
				+			}
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	if (!updated)
			
@@ -215,7 +247,31 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 
				 
			
 
				 	if (profiling_info && profiling_info->power_consumed && cl->power_model && cl->power_model->benchmarking)
			
 
				 	{
			
 
				-		_starpu_update_perfmodel_history(j, j->task->cl->power_model, perf_arch, worker->devid, profiling_info->power_consumed,j->nimpl);
			
 
				+#ifdef STARPU_OPENMP
			
 
				+		double power_consumed = profiling_info->power_consumed;
			
 
				+		unsigned do_update_power_model;
			
 
				+		if (j->continuation)
			
 
				+		{
			
 
				+			j->cumulated_power_consumed += power_consumed;
			
 
				+			do_update_power_model = 0;
			
 
				+		}
			
 
				+		else 
			
 
				+		{
			
 
				+			if (j->discontinuous)
			
 
				+			{
			
 
				+				power_consumed += j->cumulated_power_consumed;
			
 
				+			}
			
 
				+			do_update_power_model = 1;
			
 
				+		}
			
 
				+#else
			
 
				+		const double power_consumed = profiling_info->power_consumed;
			
 
				+		const unsigned do_update_power_model = 1;
			
 
				+#endif
			
 
				+
			
 
				+		if (do_update_power_model)
			
 
				+		{
			
 
				+			_starpu_update_perfmodel_history(j, j->task->cl->power_model, perf_arch, worker->devid, power_consumed, j->nimpl);
			
 
				+		}
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/src/util/openmp_runtime_support.c
+++ b/src/util/openmp_runtime_support.c
--- a/src/util/openmp_runtime_support.h
+++ b/src/util/openmp_runtime_support.h
@@ -0,0 +1,378 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __OPENMP_RUNTIME_SUPPORT_H__
			
 
				+#define __OPENMP_RUNTIME_SUPPORT_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef STARPU_OPENMP
			
 
				+#include <common/list.h>
			
 
				+#include <common/starpu_spinlock.h>
			
 
				+#include <common/uthash.h>
			
 
				+
			
 
				+/* ucontexts have been deprecated as of POSIX 1-2004
			
 
				+ * _XOPEN_SOURCE required at least on OS/X
			
 
				+ * 
			
 
				+ * TODO: add detection in configure.ac
			
 
				+ */
			
 
				+#ifndef _XOPEN_SOURCE
			
 
				+#define _XOPEN_SOURCE
			
 
				+#endif
			
 
				+#include <ucontext.h>
			
 
				+
			
 
				+/*
			
 
				+ * Arbitrary limit on the number of nested parallel sections
			
 
				+ */
			
 
				+#define STARPU_OMP_MAX_ACTIVE_LEVELS 1
			
 
				+
			
 
				+/*
			
 
				+ * Possible abstract names for OpenMP places
			
 
				+ */
			
 
				+enum starpu_omp_place_name
			
 
				+{
			
 
				+	starpu_omp_place_undefined = 0,
			
 
				+	starpu_omp_place_threads   = 1,
			
 
				+	starpu_omp_place_cores     = 2,
			
 
				+	starpu_omp_place_sockets   = 3,
			
 
				+	starpu_omp_place_numerical = 4 /* place specified numerically */
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_numeric_place
			
 
				+{
			
 
				+	int excluded_place;
			
 
				+	int *included_numeric_items;
			
 
				+	int nb_included_numeric_items;
			
 
				+	int *excluded_numeric_items;
			
 
				+	int nb_excluded_numeric_items;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * OpenMP place for thread afinity, defined by the OpenMP spec
			
 
				+ */
			
 
				+struct starpu_omp_place
			
 
				+{
			
 
				+	int abstract_name;
			
 
				+	int abstract_excluded;
			
 
				+	int abstract_length;
			
 
				+	struct starpu_omp_numeric_place *numeric_places;
			
 
				+	int nb_numeric_places;
			
 
				+};
			
 
				+
			
 
				+/* 
			
 
				+ * Internal Control Variables (ICVs) declared following
			
 
				+ * OpenMP 4.0.0 spec section 2.3.1
			
 
				+ */
			
 
				+struct starpu_omp_data_environment_icvs
			
 
				+{
			
 
				+	/* parallel region icvs */
			
 
				+	int dyn_var;
			
 
				+	int nest_var;
			
 
				+	int *nthreads_var; /* nthreads_var ICV is a list */
			
 
				+	int thread_limit_var;
			
 
				+
			
 
				+	int active_levels_var;
			
 
				+	int levels_var;
			
 
				+	int *bind_var; /* bind_var ICV is a list */
			
 
				+
			
 
				+	/* loop region icvs */
			
 
				+	int run_sched_var;
			
 
				+	unsigned long long run_sched_chunk_var;
			
 
				+
			
 
				+	/* program execution icvs */
			
 
				+	int default_device_var;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_device_icvs
			
 
				+{
			
 
				+	/* parallel region icvs */
			
 
				+	int max_active_levels_var;
			
 
				+
			
 
				+	/* loop region icvs */
			
 
				+	int def_sched_var;
			
 
				+	unsigned long long def_sched_chunk_var;
			
 
				+
			
 
				+	/* program execution icvs */
			
 
				+	int stacksize_var;
			
 
				+	int wait_policy_var;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_implicit_task_icvs
			
 
				+{
			
 
				+	/* parallel region icvs */
			
 
				+	int place_partition_var;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_global_icvs
			
 
				+{
			
 
				+	/* program execution icvs */
			
 
				+	int cancel_var;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_initial_icv_values
			
 
				+{
			
 
				+	int dyn_var;
			
 
				+	int nest_var;
			
 
				+	int *nthreads_var;
			
 
				+	int run_sched_var;
			
 
				+	unsigned long long run_sched_chunk_var;
			
 
				+	int def_sched_var;
			
 
				+	unsigned long long def_sched_chunk_var;
			
 
				+	int *bind_var;
			
 
				+	int stacksize_var;
			
 
				+	int wait_policy_var;
			
 
				+	int thread_limit_var;
			
 
				+	int max_active_levels_var;
			
 
				+	int active_levels_var;
			
 
				+	int levels_var;
			
 
				+	int place_partition_var;
			
 
				+	int cancel_var;
			
 
				+	int default_device_var;
			
 
				+
			
 
				+	/* not a real ICV, but needed to store the contents of OMP_PLACES */
			
 
				+	struct starpu_omp_place places;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_task_group
			
 
				+{
			
 
				+	int descendent_task_count;
			
 
				+	struct starpu_omp_task *leader_task;
			
 
				+	struct starpu_omp_task_group *p_previous_task_group;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_task_link
			
 
				+{
			
 
				+	struct starpu_omp_task *task;
			
 
				+	struct starpu_omp_task_link *next;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_condition
			
 
				+{
			
 
				+	struct starpu_omp_task_link *contention_list_head;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_critical
			
 
				+{
			
 
				+	UT_hash_handle hh;
			
 
				+	struct _starpu_spinlock lock;
			
 
				+	unsigned state;
			
 
				+	struct starpu_omp_task_link *contention_list_head;
			
 
				+	const char *name;
			
 
				+};
			
 
				+
			
 
				+enum starpu_omp_task_state
			
 
				+{
			
 
				+	starpu_omp_task_state_clear      = 0,
			
 
				+	starpu_omp_task_state_preempted  = 1,
			
 
				+	starpu_omp_task_state_terminated = 2,
			
 
				+	starpu_omp_task_state_zombie     = 3,
			
 
				+};
			
 
				+
			
 
				+enum starpu_omp_task_wait_on
			
 
				+{
			
 
				+	starpu_omp_task_wait_on_task_childs  = 1 << 0,
			
 
				+	starpu_omp_task_wait_on_region_tasks = 1 << 1,
			
 
				+	starpu_omp_task_wait_on_barrier      = 1 << 2,
			
 
				+	starpu_omp_task_wait_on_group        = 1 << 3,
			
 
				+	starpu_omp_task_wait_on_critical     = 1 << 4,
			
 
				+	starpu_omp_task_wait_on_condition    = 1 << 5
			
 
				+};
			
 
				+
			
 
				+LIST_TYPE(starpu_omp_task,
			
 
				+	struct starpu_omp_implicit_task_icvs icvs;
			
 
				+	struct starpu_omp_task *parent_task;
			
 
				+	struct starpu_omp_thread *owner_thread;
			
 
				+	struct starpu_omp_region *owner_region;
			
 
				+	struct starpu_omp_region *nested_region;
			
 
				+	int is_implicit;
			
 
				+	int is_undeferred;
			
 
				+	int is_final;
			
 
				+	int is_untied;
			
 
				+	int rank;
			
 
				+	int child_task_count;
			
 
				+	struct starpu_omp_task_group *task_group;
			
 
				+	struct _starpu_spinlock lock;
			
 
				+	int wait_on;
			
 
				+	int barrier_count;
			
 
				+	int single_id;
			
 
				+	int single_first;
			
 
				+	int loop_id;
			
 
				+	unsigned long long ordered_first_i;
			
 
				+	unsigned long long ordered_nb_i;
			
 
				+	int sections_id;
			
 
				+	struct starpu_omp_data_environment_icvs data_env_icvs;
			
 
				+	struct starpu_omp_implicit_task_icvs implicit_task_icvs;
			
 
				+	struct handle_entry *registered_handles;
			
 
				+
			
 
				+	struct starpu_task *starpu_task;
			
 
				+	struct starpu_codelet cl;
			
 
				+	void **starpu_buffers;
			
 
				+	void *starpu_cl_arg;
			
 
				+
			
 
				+	/* actual task function to be run */
			
 
				+	void (*cpu_f)(void **starpu_buffers, void *starpu_cl_arg);
			
 
				+#if STARPU_USE_CUDA
			
 
				+	void (*cuda_f)(void **starpu_buffers, void *starpu_cl_arg);
			
 
				+#endif
			
 
				+#if STARPU_USE_OPENCL
			
 
				+	void (*opencl_f)(void **starpu_buffers, void *starpu_cl_arg);
			
 
				+#endif
			
 
				+
			
 
				+	enum starpu_omp_task_state state;
			
 
				+
			
 
				+	/* 
			
 
				+	 * context to store the processing state of the task
			
 
				+	 * in case of blocking/recursive task operation
			
 
				+	 */
			
 
				+	ucontext_t ctx;
			
 
				+
			
 
				+	/*
			
 
				+	 * stack to execute the task over, to be able to switch
			
 
				+	 * in case blocking/recursive task operation
			
 
				+	 */
			
 
				+	void *stack;
			
 
				+
			
 
				+	size_t stacksize;
			
 
				+)
			
 
				+
			
 
				+LIST_TYPE(starpu_omp_thread,
			
 
				+
			
 
				+	UT_hash_handle hh;
			
 
				+	struct starpu_omp_task *current_task;
			
 
				+	struct starpu_omp_region *owner_region;
			
 
				+
			
 
				+	/*
			
 
				+	 * stack to execute the initial thread over
			
 
				+	 * when preempting the initial task
			
 
				+	 * note: should not be used for other threads
			
 
				+	 */
			
 
				+	void *initial_thread_stack;
			
 
				+
			
 
				+	/*
			
 
				+	 * context to store the 'scheduler' state of the thread,
			
 
				+	 * to which the execution of thread comes back upon a
			
 
				+	 * blocking/recursive task operation
			
 
				+	 */
			
 
				+	ucontext_t ctx;
			
 
				+
			
 
				+	struct starpu_driver starpu_driver;
			
 
				+	struct _starpu_worker *worker;
			
 
				+)
			
 
				+
			
 
				+struct _starpu_omp_lock_internal
			
 
				+{
			
 
				+	struct _starpu_spinlock lock;
			
 
				+	struct starpu_omp_condition cond;
			
 
				+	unsigned state;
			
 
				+};
			
 
				+
			
 
				+struct _starpu_omp_nest_lock_internal
			
 
				+{
			
 
				+	struct _starpu_spinlock lock;
			
 
				+	struct starpu_omp_condition cond;
			
 
				+	unsigned state;
			
 
				+	struct starpu_omp_task *owner_task;
			
 
				+	unsigned nesting;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_loop
			
 
				+{
			
 
				+	int id;
			
 
				+	unsigned long long next_iteration;
			
 
				+	int nb_completed_threads;
			
 
				+	struct starpu_omp_loop *next_loop;
			
 
				+	struct _starpu_spinlock ordered_lock;
			
 
				+	struct starpu_omp_condition ordered_cond;
			
 
				+	unsigned long long ordered_iteration;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_sections
			
 
				+{
			
 
				+	int id;
			
 
				+	unsigned long long next_section_num;
			
 
				+	int nb_completed_threads;
			
 
				+	struct starpu_omp_sections *next_sections;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_region
			
 
				+{
			
 
				+	struct starpu_omp_data_environment_icvs icvs;
			
 
				+	struct starpu_omp_region *parent_region;
			
 
				+	struct starpu_omp_device *owner_device;
			
 
				+	struct starpu_omp_thread *master_thread;
			
 
				+	/* note: the list of threads does not include the master_thread */
			
 
				+	struct starpu_omp_thread_list *thread_list;
			
 
				+	/* list of implicit omp tasks created to run the region */
			
 
				+	struct starpu_omp_task_list *implicit_task_list;
			
 
				+	/* include both the master thread and the region own threads */
			
 
				+	int nb_threads;
			
 
				+	struct _starpu_spinlock lock;
			
 
				+	struct starpu_omp_task *waiting_task;
			
 
				+	int barrier_count;
			
 
				+	int bound_explicit_task_count;
			
 
				+	int single_id;
			
 
				+	void *copy_private_data;
			
 
				+	int level;
			
 
				+	struct starpu_omp_loop *loop_list;
			
 
				+	struct starpu_omp_sections *sections_list;
			
 
				+	struct starpu_task *continuation_starpu_task;
			
 
				+	struct handle_entry *registered_handles;
			
 
				+	struct _starpu_spinlock registered_handles_lock;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_device
			
 
				+{
			
 
				+	struct starpu_omp_device_icvs icvs;
			
 
				+
			
 
				+	/* atomic fallback implementation lock */
			
 
				+	struct _starpu_spinlock atomic_lock;
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_global
			
 
				+{
			
 
				+	struct starpu_omp_global_icvs icvs;
			
 
				+	struct starpu_omp_task *initial_task;
			
 
				+	struct starpu_omp_thread *initial_thread;
			
 
				+	struct starpu_omp_region *initial_region;
			
 
				+	struct starpu_omp_device *initial_device;
			
 
				+	struct starpu_omp_critical *default_critical;
			
 
				+	struct starpu_omp_critical *named_criticals;
			
 
				+	struct _starpu_spinlock named_criticals_lock;
			
 
				+	struct starpu_omp_thread *hash_workers;
			
 
				+	struct _starpu_spinlock hash_workers_lock;
			
 
				+	int nb_starpu_cpu_workers;
			
 
				+	int *starpu_cpu_worker_ids;
			
 
				+};
			
 
				+
			
 
				+/* 
			
 
				+ * internal global variables
			
 
				+ */
			
 
				+extern struct starpu_omp_initial_icv_values *_starpu_omp_initial_icv_values;
			
 
				+extern struct starpu_omp_global *_starpu_omp_global_state;
			
 
				+extern double _starpu_omp_clock_ref;
			
 
				+
			
 
				+/* 
			
 
				+ * internal API
			
 
				+ */
			
 
				+void _starpu_omp_environment_init(void);
			
 
				+void _starpu_omp_environment_exit(void);
			
 
				+struct starpu_omp_thread *_starpu_omp_get_thread(void);
			
 
				+struct starpu_omp_task *_starpu_omp_get_task(void);
			
 
				+void _starpu_omp_dummy_init(void);
			
 
				+void _starpu_omp_dummy_shutdown(void);
			
 
				+#endif // STARPU_OPENMP
			
 
				+
			
 
				+#endif // __OPENMP_RUNTIME_SUPPORT_H__
			
--- a/src/util/openmp_runtime_support_environment.c
+++ b/src/util/openmp_runtime_support_environment.c
@@ -0,0 +1,941 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#ifdef STARPU_OPENMP
			
 
				+#include <util/openmp_runtime_support.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <ctype.h>
			
 
				+#include <strings.h>
			
 
				+
			
 
				+#define _STARPU_INITIAL_PLACES_LIST_SIZE      4
			
 
				+#define _STARPU_INITIAL_PLACE_ITEMS_LIST_SIZE 4
			
 
				+#define _STARPU_DEFAULT_STACKSIZE 2097152
			
 
				+
			
 
				+static struct starpu_omp_initial_icv_values _initial_icv_values =
			
 
				+{
			
 
				+	.dyn_var = 0,
			
 
				+	.nest_var = 0,
			
 
				+	.nthreads_var = NULL,
			
 
				+	.run_sched_var = starpu_omp_sched_static,
			
 
				+	.run_sched_chunk_var = 0,
			
 
				+	.def_sched_var = starpu_omp_sched_static,
			
 
				+	.def_sched_chunk_var = 0,
			
 
				+	.bind_var = NULL,
			
 
				+	.stacksize_var = _STARPU_DEFAULT_STACKSIZE,
			
 
				+	.wait_policy_var = 0,
			
 
				+	.max_active_levels_var = STARPU_OMP_MAX_ACTIVE_LEVELS,
			
 
				+	.active_levels_var = 0,
			
 
				+	.levels_var = 0,
			
 
				+	.place_partition_var = 0,
			
 
				+	.cancel_var = 0,
			
 
				+	.default_device_var = 0
			
 
				+};
			
 
				+
			
 
				+struct starpu_omp_initial_icv_values *_starpu_omp_initial_icv_values = NULL;
			
 
				+
			
 
				+/* TODO: move to utils */
			
 
				+static void remove_spaces(char *str)
			
 
				+{
			
 
				+	int i = 0;
			
 
				+	int j = 0;
			
 
				+
			
 
				+	while (str[j] != '\0')
			
 
				+	{
			
 
				+		if (isspace(str[j]))
			
 
				+		{
			
 
				+			j++;
			
 
				+			continue;
			
 
				+		}
			
 
				+		if (j > i)
			
 
				+		{
			
 
				+			str[i] = str[j];
			
 
				+		}
			
 
				+		i++;
			
 
				+		j++;
			
 
				+	}
			
 
				+	if (j > i)
			
 
				+	{
			
 
				+		str[i] = str[j];
			
 
				+	}
			
 
				+}
			
 
				+/* TODO: move to utils */
			
 
				+static int strings_cmp(const char *strings[], const char *str)
			
 
				+{
			
 
				+	int mode = 0;
			
 
				+	while (strings[mode])
			
 
				+	{
			
 
				+		if (strcasecmp(str, strings[mode]) == 0)
			
 
				+			break;
			
 
				+		mode++;
			
 
				+	}
			
 
				+	if (strings[mode] == NULL)
			
 
				+		return -1;
			
 
				+	return mode;
			
 
				+}
			
 
				+/* TODO: move to utils */
			
 
				+static int stringsn_cmp(const char *strings[], const char *str, size_t n)
			
 
				+{
			
 
				+	int mode = 0;
			
 
				+	while (strings[mode])
			
 
				+	{
			
 
				+		if (strncasecmp(str, strings[mode], n) == 0)
			
 
				+			break;
			
 
				+		mode++;
			
 
				+	}
			
 
				+	if (strings[mode] == NULL)
			
 
				+		return -1;
			
 
				+	return mode;
			
 
				+}
			
 
				+
			
 
				+/* TODO: move to utils */
			
 
				+static void read_boolean_var(const char *var, int *dest)
			
 
				+{
			
 
				+	const char *env = getenv(var);
			
 
				+	if (env)
			
 
				+	{
			
 
				+		char *str = strdup(env);
			
 
				+		if (str == NULL)
			
 
				+			_STARPU_ERROR("memory allocation failed\n");
			
 
				+		remove_spaces(str);
			
 
				+		if (str[0] == '\0')
			
 
				+		{
			
 
				+			free(str);
			
 
				+			return;
			
 
				+		}
			
 
				+		static const char *strings[] = { "false", "true", NULL };
			
 
				+		int mode = strings_cmp(strings, str);
			
 
				+		if (mode < 0)
			
 
				+			_STARPU_ERROR("parse error in variable %s\n", var);
			
 
				+		*dest = mode;
			
 
				+		free(str);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* TODO: move to utils */
			
 
				+static void read_int_var(const char *var, int *dest)
			
 
				+{
			
 
				+	const char *env = getenv(var);
			
 
				+	if (env)
			
 
				+	{
			
 
				+		char *str = strdup(env);
			
 
				+		if (str == NULL)
			
 
				+			_STARPU_ERROR("memory allocation failed\n");
			
 
				+		remove_spaces(str);
			
 
				+		if (str[0] == '\0')
			
 
				+		{
			
 
				+			free(str);
			
 
				+			return;
			
 
				+		}
			
 
				+		errno = 0;
			
 
				+		int v = (int)strtol(str, NULL, 10);
			
 
				+		if (errno != 0)
			
 
				+			_STARPU_ERROR("could not parse environment variable %s, strtol failed with error %s\n", var, strerror(errno));
			
 
				+		*dest = v;
			
 
				+		free(str);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void read_size_var(const char *var, int *dest)
			
 
				+{
			
 
				+	const char *env = getenv(var);
			
 
				+	if (env)
			
 
				+	{
			
 
				+		char *str = strdup(env);
			
 
				+		if (str == NULL)
			
 
				+			_STARPU_ERROR("memory allocation failed\n");
			
 
				+		remove_spaces(str);
			
 
				+		if (str[0] == '\0')
			
 
				+		{
			
 
				+			free(str);
			
 
				+			return;
			
 
				+		}
			
 
				+		char *endptr = NULL;
			
 
				+		int mult = 1024;
			
 
				+		errno = 0;
			
 
				+		int v = (int)strtol(str, &endptr, 10);
			
 
				+		if (errno != 0)
			
 
				+			_STARPU_ERROR("could not parse environment variable %s, strtol failed with error %s\n", var, strerror(errno));
			
 
				+		if (*endptr != '\0')
			
 
				+		{
			
 
				+			switch (*endptr)
			
 
				+			{
			
 
				+				case 'b':
			
 
				+				case 'B': mult = 1; break;
			
 
				+				case 'k':
			
 
				+				case 'K': mult = 1024; break;
			
 
				+				case 'm':
			
 
				+				case 'M': mult = 1024*1024; break;
			
 
				+				case 'g':
			
 
				+				case 'G': mult = 1024*1024*1024; break;
			
 
				+			default:
			
 
				+				_STARPU_ERROR("could not parse environment variable %s size suffix invalid\n", var);
			
 
				+			}
			
 
				+		}
			
 
				+		*dest = v*mult;
			
 
				+		free(str);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void read_sched_var(const char *var, int *dest, unsigned long long *dest_chunk)
			
 
				+{
			
 
				+	const char *env = getenv(var);
			
 
				+	if (env)
			
 
				+	{
			
 
				+		char *str = strdup(env);
			
 
				+		if (str == NULL)
			
 
				+			_STARPU_ERROR("memory allocation failed\n");
			
 
				+		remove_spaces(str);
			
 
				+		if (str[0] == '\0')
			
 
				+		{
			
 
				+			free(str);
			
 
				+			return;
			
 
				+		}
			
 
				+		static const char *strings[] = { "static", "dynamic", "guided", "auto", NULL };
			
 
				+		int mode = strings_cmp(strings, str);
			
 
				+		if (mode < 0)
			
 
				+			_STARPU_ERROR("parse error in variable %s\n", var);
			
 
				+		*dest = mode;
			
 
				+		int offset = strlen(strings[mode]);
			
 
				+		if (str[offset] == ',')
			
 
				+		{
			
 
				+			offset++;
			
 
				+			errno = 0;
			
 
				+			long long v = strtoll(str+offset, NULL, 10);
			
 
				+			if (errno != 0)
			
 
				+				_STARPU_ERROR("could not parse environment variable %s, strtol failed with error %s\n", var, strerror(errno));
			
 
				+			if (v < 0)
			
 
				+				_STARPU_ERROR("invalid negative modifier in environment variable %s\n", var);
			
 
				+			unsigned long long uv = (unsigned long long) v;
			
 
				+			*dest_chunk = uv;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			*dest_chunk = 1;
			
 
				+		}
			
 
				+		free(str);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void read_wait_policy_var(const char *var, int *dest)
			
 
				+{
			
 
				+	const char *env = getenv(var);
			
 
				+	if (env)
			
 
				+	{
			
 
				+		char *str = strdup(env);
			
 
				+		if (str == NULL)
			
 
				+			_STARPU_ERROR("memory allocation failed\n");
			
 
				+		remove_spaces(str);
			
 
				+		if (str[0] == '\0')
			
 
				+		{
			
 
				+			free(str);
			
 
				+			return;
			
 
				+		}
			
 
				+		static const char *strings[] = { "passive", "active", NULL };
			
 
				+		int mode = strings_cmp(strings, str);
			
 
				+		if (mode < 0)
			
 
				+			_STARPU_ERROR("parse error in variable %s\n", var);
			
 
				+		*dest = mode;
			
 
				+		free(str);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void read_display_env_var(const char *var, int *dest)
			
 
				+{
			
 
				+	const char *env = getenv(var);
			
 
				+	if (env)
			
 
				+	{
			
 
				+		char *str = strdup(env);
			
 
				+		if (str == NULL)
			
 
				+			_STARPU_ERROR("memory allocation failed\n");
			
 
				+		remove_spaces(str);
			
 
				+		if (str[0] == '\0')
			
 
				+		{
			
 
				+			free(str);
			
 
				+			return;
			
 
				+		}
			
 
				+		static const char *strings[] = { "false", "true", "verbose", NULL };
			
 
				+		int mode = strings_cmp(strings, str);
			
 
				+		if (mode < 0)
			
 
				+			_STARPU_ERROR("parse error in variable %s\n", var);
			
 
				+		*dest = mode;
			
 
				+		free(str);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int convert_bind_mode(const char *str, size_t n)
			
 
				+{
			
 
				+	static const char *strings[] = { "false", "true", "master", "close", "spread", NULL };
			
 
				+	int mode = stringsn_cmp(strings, str, n);
			
 
				+	if (mode < 0)
			
 
				+		_STARPU_ERROR("proc_bind list parse error\n");
			
 
				+	return mode;
			
 
				+}
			
 
				+
			
 
				+static void convert_bind_string(const char *_str, int *bind_list, const int max_levels)
			
 
				+{
			
 
				+	char *str = strdup(_str);
			
 
				+	if (str == NULL)
			
 
				+		_STARPU_ERROR("memory allocation failed\n");
			
 
				+	remove_spaces(str);
			
 
				+	if (str[0] == '\0')
			
 
				+	{
			
 
				+		free(str);
			
 
				+		return;
			
 
				+	}
			
 
				+	enum { state_split, state_read };
			
 
				+	int level = 0;
			
 
				+	int i = 0;
			
 
				+	int state = state_read;
			
 
				+	while (1)
			
 
				+	{
			
 
				+		if (state == state_split)
			
 
				+		{
			
 
				+			if (str[i] == '\0')
			
 
				+				break;
			
 
				+			if (str[i] != ',')
			
 
				+				_STARPU_ERROR("proc_bind list parse error\n");
			
 
				+			i++;
			
 
				+			state = state_read;
			
 
				+		}
			
 
				+		else if (state == state_read)
			
 
				+		{
			
 
				+			int n = 0;
			
 
				+			while (isalpha(str[i+n]))
			
 
				+				n++;
			
 
				+			if (n == 0)
			
 
				+				_STARPU_ERROR("proc_bind list parse error\n");
			
 
				+			int mode = convert_bind_mode(str+i,n);
			
 
				+			STARPU_ASSERT(mode >= starpu_omp_proc_bind_false && mode <= starpu_omp_proc_bind_spread);
			
 
				+			bind_list[level] = mode;
			
 
				+			level++;
			
 
				+			if (level == max_levels)
			
 
				+				break;
			
 
				+			i += n;
			
 
				+			state = state_split;
			
 
				+		}
			
 
				+		else
			
 
				+			_STARPU_ERROR("invalid state in parsing proc_bind list\n");
			
 
				+	}
			
 
				+	free(str);
			
 
				+}
			
 
				+
			
 
				+static void convert_num_threads_string(const char *_str, int *num_threads_list, const int max_levels)
			
 
				+{
			
 
				+	char *str = strdup(_str);
			
 
				+	if (str == NULL)
			
 
				+		_STARPU_ERROR("memory allocation failed\n");
			
 
				+	remove_spaces(str);
			
 
				+	if (str[0] == '\0')
			
 
				+	{
			
 
				+		free(str);
			
 
				+		return;
			
 
				+	}
			
 
				+	enum { state_split, state_read };
			
 
				+	int level = 0;
			
 
				+	int i = 0;
			
 
				+	int state = state_read;
			
 
				+	while (1)
			
 
				+	{
			
 
				+		/* split a comma separated list of numerical items */
			
 
				+		if (state == state_split)
			
 
				+		{
			
 
				+			if (str[i] == '\0')
			
 
				+				break;
			
 
				+			if (str[i] != ',')
			
 
				+				_STARPU_ERROR("num_threads list parse error\n");
			
 
				+			i++;
			
 
				+			state = state_read;
			
 
				+		}
			
 
				+		/* read a numerical item */
			
 
				+		else if (state == state_read)
			
 
				+		{
			
 
				+			char *endptr = NULL;
			
 
				+			errno = 0;
			
 
				+			int num_threads = (int)strtol(str+i, &endptr, 10);
			
 
				+			if (errno != 0)
			
 
				+				_STARPU_ERROR("num_threads list parse error, strtol failed with error %s\n", strerror(errno));
			
 
				+			if (num_threads < 1)
			
 
				+				_STARPU_ERROR("num_threads list invalid value\n");
			
 
				+			num_threads_list[level] = num_threads;
			
 
				+			level++;
			
 
				+			if (level == max_levels)
			
 
				+				break;
			
 
				+			i = endptr - str;
			
 
				+			state = state_split;
			
 
				+		}
			
 
				+		else
			
 
				+			_STARPU_ERROR("invalid state in parsing num_threads list\n");
			
 
				+	}
			
 
				+	free(str);
			
 
				+}
			
 
				+
			
 
				+static int convert_place_name(const char *str, size_t n)
			
 
				+{
			
 
				+	static const char *strings[] = { "threads", "cores", "sockets", NULL };
			
 
				+	int mode = stringsn_cmp(strings, str, n);
			
 
				+	if (mode < 0)
			
 
				+		_STARPU_ERROR("place abstract name parse error\n");
			
 
				+	return mode+1; /* 0 is for undefined abstract name */
			
 
				+}
			
 
				+
			
 
				+/* Note: this function modifies the string str */
			
 
				+static void read_a_place_name(char *str, struct starpu_omp_place *places)
			
 
				+{
			
 
				+	int i = 0;
			
 
				+	/* detect exclusion of abstract name expressed as '!' prefix */
			
 
				+	if (str[i] == '!')
			
 
				+	{
			
 
				+		places->abstract_excluded = 1;
			
 
				+		i++;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		places->abstract_excluded = 0;
			
 
				+	}
			
 
				+	/* detect length value for abstract name expressed as '(length)' suffix) */
			
 
				+	char *begin_length_spec = strchr(str+i,'(');
			
 
				+	if (begin_length_spec != NULL)
			
 
				+	{
			
 
				+		char *end_length_spec = strrchr(begin_length_spec+1, ')');
			
 
				+		if (end_length_spec == NULL || end_length_spec <= begin_length_spec+1)
			
 
				+			_STARPU_ERROR("parse error in places list\n");
			
 
				+		*begin_length_spec = '\0';
			
 
				+		*end_length_spec = '\0';
			
 
				+		errno = 0;
			
 
				+		int v = (int)strtol(begin_length_spec+1, NULL, 10);
			
 
				+		if (errno != 0)
			
 
				+			_STARPU_ERROR("parse error in places list\n");
			
 
				+		places->abstract_length = v;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		places->abstract_length = 1;
			
 
				+	}
			
 
				+	/* convert abstract place name string to corresponding value */
			
 
				+	{
			
 
				+		int mode = convert_place_name(str+i, strlen(str+i));
			
 
				+		STARPU_ASSERT(mode >= starpu_omp_place_threads && mode <= starpu_omp_place_sockets);
			
 
				+		places->abstract_name = mode;
			
 
				+		places->numeric_places = NULL;
			
 
				+		places->nb_numeric_places = 0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void read_a_places_list(const char *str, struct starpu_omp_place *places)
			
 
				+{
			
 
				+	if (str[0] == '\0')
			
 
				+	{
			
 
				+		places->numeric_places = NULL;
			
 
				+		places->nb_numeric_places = 0;
			
 
				+		places->abstract_name = starpu_omp_place_undefined;
			
 
				+		return;
			
 
				+	}
			
 
				+	enum { state_split,
			
 
				+		state_read_brace_prefix,
			
 
				+		state_read_opening_brace,
			
 
				+		state_read_numeric_prefix,
			
 
				+		state_read_numeric,
			
 
				+		state_split_numeric,
			
 
				+		state_read_closing_brace,
			
 
				+		state_read_brace_suffix,
			
 
				+	};
			
 
				+	struct starpu_omp_numeric_place *places_list = NULL;
			
 
				+	int places_list_size = 0;
			
 
				+	int nb_places = 0;
			
 
				+	int *included_items_list = NULL;
			
 
				+	int included_items_list_size = 0;
			
 
				+	int nb_included_items = 0;
			
 
				+	int *excluded_items_list = NULL;
			
 
				+	int excluded_items_list_size = 0;
			
 
				+	int nb_excluded_items = 0;
			
 
				+	int exclude_place_flag = 0;
			
 
				+	int exclude_item_flag = 0;
			
 
				+	int i = 0;
			
 
				+	int state = state_read_brace_prefix;
			
 
				+	while (1)
			
 
				+	{
			
 
				+		switch (state)
			
 
				+		{
			
 
				+			/* split a comma separated list of numerical places */
			
 
				+			case state_split:
			
 
				+				if (str[i] == '\0')
			
 
				+				{
			
 
				+					goto eol;
			
 
				+				}
			
 
				+				else if (str[i] != ',')
			
 
				+					_STARPU_ERROR("parse error in places list\n");
			
 
				+				i++;
			
 
				+				state = state_read_brace_prefix;
			
 
				+				break;
			
 
				+			/* read optional exclude flag '!' for numerical place */
			
 
				+			case state_read_brace_prefix:
			
 
				+				exclude_place_flag = 0;
			
 
				+				if (str[i] == '!')
			
 
				+				{
			
 
				+					exclude_place_flag = 1;
			
 
				+					i++;
			
 
				+				}
			
 
				+				state = state_read_opening_brace;
			
 
				+				break;
			
 
				+			/* read place opening brace */
			
 
				+			case state_read_opening_brace:
			
 
				+				if (str[i] != '{')
			
 
				+					_STARPU_ERROR("parse error in places list\n");
			
 
				+				i++;
			
 
				+				state = state_read_numeric_prefix;
			
 
				+				break;
			
 
				+			/* read optional exclude flag '!' for numerical item */
			
 
				+			case state_read_numeric_prefix:
			
 
				+				exclude_item_flag = 0;
			
 
				+				if (str[i] == '!')
			
 
				+				{
			
 
				+					exclude_item_flag = 1;
			
 
				+					i++;
			
 
				+				}
			
 
				+				state = state_read_numeric;
			
 
				+				break;
			
 
				+			/* read numerical item */
			
 
				+			case state_read_numeric:
			
 
				+				{
			
 
				+					char *endptr = NULL;
			
 
				+					errno = 0;
			
 
				+					int v = (int)strtol(str+i, &endptr, 10);
			
 
				+					if (errno != 0)
			
 
				+						_STARPU_ERROR("parse error in places list, strtol failed with error %s\n", strerror(errno));
			
 
				+					if (exclude_item_flag)
			
 
				+					{
			
 
				+						if (excluded_items_list_size == 0)
			
 
				+						{
			
 
				+							excluded_items_list_size = _STARPU_INITIAL_PLACE_ITEMS_LIST_SIZE;
			
 
				+							excluded_items_list = malloc(excluded_items_list_size * sizeof(int));
			
 
				+							if (excluded_items_list == NULL)
			
 
				+								_STARPU_ERROR("memory allocation failed");
			
 
				+						}
			
 
				+						else if (nb_excluded_items == excluded_items_list_size)
			
 
				+						{
			
 
				+							excluded_items_list_size *= 2;
			
 
				+							excluded_items_list = realloc(excluded_items_list, excluded_items_list_size * sizeof(int));
			
 
				+							if (excluded_items_list == NULL)
			
 
				+								_STARPU_ERROR("memory allocation failed");
			
 
				+						}
			
 
				+						excluded_items_list[nb_excluded_items] = v;
			
 
				+						nb_excluded_items++;
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						if (included_items_list_size == 0)
			
 
				+						{
			
 
				+							included_items_list_size = _STARPU_INITIAL_PLACE_ITEMS_LIST_SIZE;
			
 
				+							included_items_list = malloc(included_items_list_size * sizeof(int));
			
 
				+							if (included_items_list == NULL)
			
 
				+								_STARPU_ERROR("memory allocation failed");
			
 
				+						}
			
 
				+						else if (nb_included_items == included_items_list_size)
			
 
				+						{
			
 
				+							included_items_list_size *= 2;
			
 
				+							included_items_list = realloc(included_items_list, included_items_list_size * sizeof(int));
			
 
				+							if (included_items_list == NULL)
			
 
				+								_STARPU_ERROR("memory allocation failed");
			
 
				+						}
			
 
				+						included_items_list[nb_included_items] = v;
			
 
				+						nb_included_items++;
			
 
				+					}
			
 
				+					exclude_item_flag = 0;
			
 
				+					i = endptr - str;
			
 
				+					state = state_split_numeric;
			
 
				+				}
			
 
				+				break;
			
 
				+			/* read comma separated or colon separated numerical item list */
			
 
				+			case state_split_numeric:
			
 
				+				if (str[i] == ':')
			
 
				+					/* length and stride colon separated arguments not supported for now */
			
 
				+					_STARPU_ERROR("colon support unimplemented in numeric place list");
			
 
				+				if (str[i] == ',')
			
 
				+				{
			
 
				+					i++;
			
 
				+					state = state_read_numeric_prefix;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					state = state_read_closing_brace;
			
 
				+				}
			
 
				+				break;
			
 
				+			/* read end of numerical item list */
			
 
				+			case state_read_closing_brace:
			
 
				+				if (str[i] != '}')
			
 
				+					_STARPU_ERROR("parse error in places list\n");
			
 
				+				if (places_list_size == 0)
			
 
				+				{
			
 
				+					places_list_size = _STARPU_INITIAL_PLACES_LIST_SIZE;
			
 
				+					places_list = malloc(places_list_size * sizeof(*places_list));
			
 
				+					if (places_list == NULL)
			
 
				+						_STARPU_ERROR("memory allocation failed");
			
 
				+				}
			
 
				+				else if (nb_places == places_list_size)
			
 
				+				{
			
 
				+					places_list_size *= 2;
			
 
				+					places_list = realloc(places_list, places_list_size * sizeof(*places_list));
			
 
				+					if (places_list == NULL)
			
 
				+						_STARPU_ERROR("memory allocation failed");
			
 
				+				}
			
 
				+				places_list[nb_places].excluded_place = exclude_place_flag;
			
 
				+				places_list[nb_places].included_numeric_items = included_items_list;
			
 
				+				places_list[nb_places].nb_included_numeric_items = nb_included_items;
			
 
				+				places_list[nb_places].excluded_numeric_items = excluded_items_list;
			
 
				+				places_list[nb_places].nb_excluded_numeric_items = nb_excluded_items;
			
 
				+				nb_places++;
			
 
				+				exclude_place_flag = 0;
			
 
				+				included_items_list = NULL;
			
 
				+				included_items_list_size = 0;
			
 
				+				nb_included_items = 0;
			
 
				+				excluded_items_list = NULL;
			
 
				+				excluded_items_list_size = 0;
			
 
				+				nb_excluded_items = 0;
			
 
				+				i++;
			
 
				+				state = state_read_brace_suffix;
			
 
				+				break;
			
 
				+			/* read optional place colon separated suffix */
			
 
				+			case state_read_brace_suffix:
			
 
				+				if (str[i] == ':')
			
 
				+					/* length and stride colon separated arguments not supported for now */
			
 
				+					_STARPU_ERROR("colon support unimplemented in numeric place list");
			
 
				+				state = state_split;
			
 
				+				break;
			
 
				+			default:
			
 
				+				_STARPU_ERROR("invalid state in parsing places list\n");
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+eol:
			
 
				+	places->numeric_places = places_list;
			
 
				+	places->nb_numeric_places = nb_places;
			
 
				+	places->abstract_name = starpu_omp_place_numerical;
			
 
				+}
			
 
				+
			
 
				+static void convert_places_string(const char *_str, struct starpu_omp_place *places)
			
 
				+{
			
 
				+	char *str = strdup(_str);
			
 
				+	if (str == NULL)
			
 
				+		_STARPU_ERROR("memory allocation failed\n");
			
 
				+	remove_spaces(str);
			
 
				+	if (str[0] != '\0')
			
 
				+	{
			
 
				+		/* check whether this is the start of an abstract name */
			
 
				+		if (isalpha(str[0]) || (str[0] == '!' && isalpha(str[1])))
			
 
				+		{
			
 
				+			read_a_place_name(str, places);
			
 
				+		}
			
 
				+		/* else the string must contain a list of braces */
			
 
				+		else
			
 
				+		{
			
 
				+			read_a_places_list(str, places);
			
 
				+		}
			
 
				+	}
			
 
				+	free(str);
			
 
				+}
			
 
				+
			
 
				+static void free_places(struct starpu_omp_place *places)
			
 
				+{
			
 
				+	int i;
			
 
				+	for (i = 0; i < places->nb_numeric_places; i++)
			
 
				+	{
			
 
				+		if (places->numeric_places[i].nb_included_numeric_items > 0)
			
 
				+		{
			
 
				+			free(places->numeric_places[i].included_numeric_items);
			
 
				+		}
			
 
				+		if (places->numeric_places[i].nb_excluded_numeric_items > 0)
			
 
				+		{
			
 
				+			free(places->numeric_places[i].excluded_numeric_items);
			
 
				+		}
			
 
				+	}
			
 
				+	if (places->nb_numeric_places > 0)
			
 
				+	{
			
 
				+		free(places->numeric_places);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void read_omp_environment(void)
			
 
				+{
			
 
				+	read_boolean_var("OMP_DYNAMIC", &_initial_icv_values.dyn_var);
			
 
				+	read_boolean_var("OMP_NESTED", &_initial_icv_values.nest_var);
			
 
				+	read_sched_var("OMP_SCHEDULE", &_initial_icv_values.run_sched_var, &_initial_icv_values.run_sched_chunk_var);
			
 
				+	read_size_var("OMP_STACKSIZE", &_initial_icv_values.stacksize_var);
			
 
				+	read_wait_policy_var("OMP_WAIT_POLICY", &_initial_icv_values.wait_policy_var);
			
 
				+	read_int_var("OMP_THREAD_LIMIT", &_initial_icv_values.thread_limit_var);
			
 
				+	read_int_var("OMP_MAX_ACTIVE_LEVELS", &_initial_icv_values.max_active_levels_var);
			
 
				+	read_boolean_var("OMP_CANCELLATION", &_initial_icv_values.cancel_var);
			
 
				+	read_int_var("OMP_DEFAULT_DEVICE", &_initial_icv_values.default_device_var);
			
 
				+
			
 
				+	const int max_levels = _initial_icv_values.max_active_levels_var;
			
 
				+
			
 
				+	/* read OMP_PROC_BIND */
			
 
				+	{
			
 
				+		int *bind_list = malloc((1+max_levels) * sizeof(*bind_list));
			
 
				+		if (bind_list == NULL)
			
 
				+			_STARPU_ERROR("memory allocation failed\n");
			
 
				+		int level;
			
 
				+		for (level = 0;level < max_levels+1;level++)
			
 
				+		{
			
 
				+			/* TODO: check what should be used as default value */
			
 
				+			bind_list[level] = starpu_omp_proc_bind_undefined;
			
 
				+		}
			
 
				+		const char *env = getenv("OMP_PROC_BIND");
			
 
				+		if (env)
			
 
				+		{
			
 
				+			convert_bind_string(env, bind_list, max_levels);
			
 
				+		}
			
 
				+		_initial_icv_values.bind_var = bind_list;
			
 
				+	}
			
 
				+
			
 
				+	/* read OMP_NUM_THREADS */
			
 
				+	{
			
 
				+		int *num_threads_list = malloc((1+max_levels) * sizeof(*num_threads_list));
			
 
				+		if (num_threads_list == NULL)
			
 
				+			_STARPU_ERROR("memory allocation failed\n");
			
 
				+		int level;
			
 
				+		for (level = 0;level < max_levels+1;level++)
			
 
				+		{
			
 
				+			/* TODO: check what should be used as default value */
			
 
				+			num_threads_list[level] = 0;
			
 
				+		}
			
 
				+		const char *env = getenv("OMP_NUM_THREADS");
			
 
				+		if (env)
			
 
				+		{
			
 
				+			convert_num_threads_string(env, num_threads_list, max_levels);
			
 
				+		}
			
 
				+		_initial_icv_values.nthreads_var = num_threads_list;
			
 
				+	}
			
 
				+
			
 
				+	/* read OMP_PLACES */
			
 
				+	{
			
 
				+		memset(&_initial_icv_values.places, 0, sizeof(_initial_icv_values.places));
			
 
				+		_initial_icv_values.places.abstract_name = starpu_omp_place_undefined;
			
 
				+		const char *env = getenv("OMP_PLACES");
			
 
				+		if (env)
			
 
				+		{
			
 
				+			convert_places_string(env, &_initial_icv_values.places);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	_starpu_omp_initial_icv_values = &_initial_icv_values;
			
 
				+}
			
 
				+
			
 
				+static void free_omp_environment(void)
			
 
				+{
			
 
				+	/**/
			
 
				+	_starpu_omp_initial_icv_values = NULL;
			
 
				+
			
 
				+	/* OMP_DYNAMIC */
			
 
				+	/* OMP_NESTED */
			
 
				+	/* OMP_SCHEDULE */
			
 
				+	/* OMP_STACKSIZE */
			
 
				+	/* OMP_WAIT_POLICY */
			
 
				+	/* OMP_THREAD_LIMIT */
			
 
				+	/* OMP_MAX_ACTIVE_LEVELS */
			
 
				+	/* OMP_CANCELLATION */
			
 
				+	/* OMP_DEFAULT_DEVICE */
			
 
				+
			
 
				+	/* OMP_PROC_BIND */
			
 
				+	free(_initial_icv_values.bind_var);
			
 
				+	_initial_icv_values.bind_var = NULL;
			
 
				+
			
 
				+	/* OMP_NUM_THREADS */
			
 
				+	free(_initial_icv_values.nthreads_var);
			
 
				+	_initial_icv_values.nthreads_var = NULL;
			
 
				+
			
 
				+	/* OMP_PLACES */
			
 
				+	free_places(&_initial_icv_values.places);
			
 
				+}
			
 
				+
			
 
				+static void display_omp_environment(int verbosity_level)
			
 
				+{
			
 
				+	if (verbosity_level > 0)
			
 
				+	{
			
 
				+		printf("OPENMP DISPLAY ENVIRONMENT BEGIN\n");
			
 
				+		printf("  _OPENMP='xxxxxx'\n");
			
 
				+		printf("  [host] OMP_DYNAMIC='%s'\n", _starpu_omp_initial_icv_values->dyn_var?"true":"false");
			
 
				+		printf("  [host] OMP_NESTED='%s'\n", _starpu_omp_initial_icv_values->nest_var?"true":"false");
			
 
				+		printf("  [host] OMP_SCHEDULE='");
			
 
				+		switch (_starpu_omp_initial_icv_values->run_sched_var)
			
 
				+		{
			
 
				+			case starpu_omp_sched_static:
			
 
				+				printf("static, %llu", _starpu_omp_initial_icv_values->run_sched_chunk_var);
			
 
				+				break;
			
 
				+			case starpu_omp_sched_dynamic:
			
 
				+				printf("dynamic, %llu", _starpu_omp_initial_icv_values->run_sched_chunk_var);
			
 
				+				break;
			
 
				+			case starpu_omp_sched_guided:
			
 
				+				printf("guided, %llu", _starpu_omp_initial_icv_values->run_sched_chunk_var);
			
 
				+				break;
			
 
				+			case starpu_omp_sched_auto:
			
 
				+				printf("auto, %llu", _starpu_omp_initial_icv_values->run_sched_chunk_var);
			
 
				+				break;
			
 
				+			default:
			
 
				+				printf("<unknown>");
			
 
				+				break;
			
 
				+		}
			
 
				+		printf("'\n");
			
 
				+				
			
 
				+		printf("  [host] OMP_STACKSIZE='%d'\n", _starpu_omp_initial_icv_values->stacksize_var);
			
 
				+		printf("  [host] OMP_WAIT_POLICY='%s'\n", _starpu_omp_initial_icv_values->wait_policy_var?"active":"passive");
			
 
				+		printf("  [host] OMP_MAX_ACTIVE_LEVELS='%d'\n", _starpu_omp_initial_icv_values->max_active_levels_var);
			
 
				+		printf("  [host] OMP_CANCELLATION='%s'\n", _starpu_omp_initial_icv_values->cancel_var?"true":"false");
			
 
				+		printf("  [host] OMP_DEFAULT_DEVICE='%d'\n", _starpu_omp_initial_icv_values->default_device_var);
			
 
				+		printf("  [host] OMP_PROC_BIND='");
			
 
				+		{
			
 
				+			int level;
			
 
				+			for (level = 0; level < _starpu_omp_initial_icv_values->max_active_levels_var; level++)
			
 
				+			{
			
 
				+				if (level > 0)
			
 
				+				{
			
 
				+					printf(", ");
			
 
				+				}
			
 
				+				switch (_starpu_omp_initial_icv_values->bind_var[level])
			
 
				+				{
			
 
				+					case starpu_omp_proc_bind_false:
			
 
				+						printf("false");
			
 
				+						break;
			
 
				+					case starpu_omp_proc_bind_true:
			
 
				+						printf("true");
			
 
				+						break;
			
 
				+					case starpu_omp_proc_bind_master:
			
 
				+						printf("master");
			
 
				+						break;
			
 
				+					case starpu_omp_proc_bind_close:
			
 
				+						printf("close");
			
 
				+						break;
			
 
				+					case starpu_omp_proc_bind_spread:
			
 
				+						printf("spread");
			
 
				+						break;
			
 
				+					default:
			
 
				+						printf("<unknown>");
			
 
				+						break;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		printf("'\n");
			
 
				+		printf("  [host] OMP_NUM_THREADS='");
			
 
				+		{
			
 
				+			int level;
			
 
				+			for (level = 0; level < _starpu_omp_initial_icv_values->max_active_levels_var; level++)
			
 
				+			{
			
 
				+				if (level > 0)
			
 
				+				{
			
 
				+					printf(", ");
			
 
				+				}
			
 
				+				printf("%d", _starpu_omp_initial_icv_values->nthreads_var[level]);
			
 
				+			}
			
 
				+		}
			
 
				+		printf("'\n");
			
 
				+		printf("  [host] OMP_PLACES='");
			
 
				+		{
			
 
				+			struct starpu_omp_place *places = &_starpu_omp_initial_icv_values->places;
			
 
				+			if (places->nb_numeric_places > 0)
			
 
				+			{
			
 
				+				int p;
			
 
				+				for (p = 0; p < places->nb_numeric_places; p++)
			
 
				+				{
			
 
				+					if (p > 0)
			
 
				+					{
			
 
				+						printf(",");
			
 
				+					}
			
 
				+					struct starpu_omp_numeric_place *np = &places->numeric_places[p];
			
 
				+					if (np->excluded_place)
			
 
				+					{
			
 
				+						printf("!");
			
 
				+					}
			
 
				+					printf("{");
			
 
				+					int i;
			
 
				+					for (i = 0; i < np->nb_included_numeric_items; i++)
			
 
				+					{
			
 
				+						if (i > 0)
			
 
				+						{
			
 
				+							printf(",");
			
 
				+						}
			
 
				+						printf("%d", np->included_numeric_items[i]);
			
 
				+					}
			
 
				+					for (i = 0; i < np->nb_excluded_numeric_items; i++)
			
 
				+					{
			
 
				+						if (i > 0 || np->nb_included_numeric_items)
			
 
				+						{
			
 
				+							printf(",");
			
 
				+						}
			
 
				+						printf("!%d", np->excluded_numeric_items[i]);
			
 
				+					}
			
 
				+					printf("}");
			
 
				+					/* TODO: print length/stride suffix */
			
 
				+				}
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				if (places->abstract_excluded)
			
 
				+				{
			
 
				+					printf("!");
			
 
				+				}
			
 
				+				switch (places->abstract_name)
			
 
				+				{
			
 
				+					case starpu_omp_place_undefined:
			
 
				+						printf("undefined");
			
 
				+						break;
			
 
				+					case starpu_omp_place_threads:
			
 
				+						printf("threads");
			
 
				+						break;
			
 
				+					case starpu_omp_place_cores:
			
 
				+						printf("cores");
			
 
				+						break;
			
 
				+					case starpu_omp_place_sockets:
			
 
				+						printf("sockets");
			
 
				+						break;
			
 
				+					case starpu_omp_place_numerical:
			
 
				+						printf("<numerical>");
			
 
				+						break;
			
 
				+					default:
			
 
				+						printf("<unknown>");
			
 
				+						break;
			
 
				+				}
			
 
				+				if (places->abstract_length)
			
 
				+				{
			
 
				+					printf("(%d)", places->abstract_length);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		printf("'\n");
			
 
				+
			
 
				+		if (verbosity_level > 1)
			
 
				+		{
			
 
				+			/* no vendor specific runtime variable */
			
 
				+		}
			
 
				+		printf("OPENMP DISPLAY ENVIRONMENT END\n");
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void _starpu_omp_environment_init(void)
			
 
				+{
			
 
				+	read_omp_environment();
			
 
				+	int display_env = 0;
			
 
				+	read_display_env_var("OMP_DISPLAY_ENV", &display_env);
			
 
				+	if (display_env > 0)
			
 
				+	{
			
 
				+		display_omp_environment(display_env);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void _starpu_omp_environment_exit(void)
			
 
				+{
			
 
				+	free_omp_environment();
			
 
				+}
			
 
				+#endif /* STARPU_OPENMP */
			
--- a/src/util/openmp_runtime_support_omp_api.c
+++ b/src/util/openmp_runtime_support_omp_api.c
@@ -0,0 +1,279 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#ifdef STARPU_OPENMP
			
 
				+#include <util/openmp_runtime_support.h>
			
 
				+
			
 
				+#define __not_implemented__ do { fprintf (stderr, "omp lib function %s not implemented\n", __func__); abort(); } while (0)
			
 
				+
			
 
				+void starpu_omp_set_num_threads(int threads)
			
 
				+{
			
 
				+	STARPU_ASSERT(threads > 0);
			
 
				+	struct starpu_omp_task *task = _starpu_omp_get_task();
			
 
				+	STARPU_ASSERT(task != NULL);
			
 
				+	struct starpu_omp_region *region;
			
 
				+	region = task->owner_region;
			
 
				+	STARPU_ASSERT(region != NULL);
			
 
				+	region->icvs.nthreads_var[0] = threads;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_num_threads()
			
 
				+{
			
 
				+	struct starpu_omp_task *task = _starpu_omp_get_task();
			
 
				+	struct starpu_omp_region *region;
			
 
				+	if (task == NULL)
			
 
				+		return 1;
			
 
				+
			
 
				+	region = task->owner_region;
			
 
				+	return region->nb_threads;
			
 
				+}
			
 
				+
			
 
				+static int _starpu_omp_get_region_thread_num(const struct starpu_omp_region * const region)
			
 
				+{
			
 
				+	struct starpu_omp_thread *thread = _starpu_omp_get_thread();
			
 
				+	STARPU_ASSERT(thread != NULL);
			
 
				+	if (thread == region->master_thread)
			
 
				+		return 0;
			
 
				+	struct starpu_omp_thread * region_thread;
			
 
				+	int tid = 1;
			
 
				+	for (region_thread  = starpu_omp_thread_list_begin(region->thread_list);
			
 
				+			region_thread != starpu_omp_thread_list_end(region->thread_list);
			
 
				+			region_thread  = starpu_omp_thread_list_next(region_thread))
			
 
				+	{
			
 
				+		if (thread == region_thread)
			
 
				+		{
			
 
				+			return tid;
			
 
				+		}
			
 
				+		tid++;
			
 
				+	}
			
 
				+	_STARPU_ERROR("unrecognized omp thread\n");
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_thread_num()
			
 
				+{
			
 
				+	struct starpu_omp_task *task = _starpu_omp_get_task();
			
 
				+	if (task == NULL)
			
 
				+		return 0;
			
 
				+	return _starpu_omp_get_region_thread_num(task->owner_region);
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_max_threads()
			
 
				+{
			
 
				+	const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region;
			
 
				+	int max_threads = parallel_region->icvs.nthreads_var[0];
			
 
				+	/* TODO: for now, nested parallel sections are not supported, thus we
			
 
				+	 * open an active parallel section only if the generating region is the
			
 
				+	 * initial region */
			
 
				+	if (parallel_region->level > 0)
			
 
				+	{
			
 
				+		max_threads = 1;
			
 
				+	}
			
 
				+
			
 
				+	return max_threads;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_num_procs (void)
			
 
				+{
			
 
				+	/* starpu_cpu_worker_get_count defined as topology.ncpus */
			
 
				+	return starpu_cpu_worker_get_count();
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_in_parallel (void)
			
 
				+{
			
 
				+	const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region;
			
 
				+	return parallel_region->icvs.active_levels_var > 0;
			
 
				+}
			
 
				+
			
 
				+void starpu_omp_set_dynamic (int dynamic_threads)
			
 
				+{
			
 
				+	(void) dynamic_threads;
			
 
				+	/* TODO: dynamic adjustment of the number of threads is not supported for now */
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_dynamic (void)
			
 
				+{
			
 
				+	const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region;
			
 
				+	return parallel_region->icvs.dyn_var;
			
 
				+}
			
 
				+
			
 
				+void starpu_omp_set_nested (int nested)
			
 
				+{
			
 
				+	(void) nested;
			
 
				+	/* TODO: nested parallelism not supported for now */
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_nested (void)
			
 
				+{
			
 
				+	const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region;
			
 
				+	return parallel_region->icvs.nest_var;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_cancellation(void)
			
 
				+{
			
 
				+	return _starpu_omp_global_state->icvs.cancel_var;
			
 
				+}
			
 
				+
			
 
				+void starpu_omp_set_schedule (enum starpu_omp_sched_value kind, int modifier)
			
 
				+{
			
 
				+	struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region;
			
 
				+	STARPU_ASSERT(     kind == starpu_omp_sched_static
			
 
				+			|| kind == starpu_omp_sched_dynamic
			
 
				+			|| kind == starpu_omp_sched_guided
			
 
				+			|| kind == starpu_omp_sched_auto);
			
 
				+	STARPU_ASSERT(modifier >= 0);
			
 
				+	parallel_region->icvs.run_sched_var = kind;
			
 
				+	parallel_region->icvs.run_sched_chunk_var = (unsigned long long)modifier;
			
 
				+}
			
 
				+
			
 
				+void starpu_omp_get_schedule (enum starpu_omp_sched_value *kind, int *modifier)
			
 
				+{
			
 
				+	const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region;
			
 
				+	*kind = parallel_region->icvs.run_sched_var;
			
 
				+	*modifier = (int)parallel_region->icvs.run_sched_chunk_var;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_thread_limit (void)
			
 
				+{
			
 
				+	return starpu_cpu_worker_get_count();
			
 
				+}
			
 
				+
			
 
				+void starpu_omp_set_max_active_levels (int max_levels)
			
 
				+{
			
 
				+	struct starpu_omp_device * const device = _starpu_omp_get_task()->owner_region->owner_device;
			
 
				+	if (max_levels > 1)
			
 
				+	{
			
 
				+		/* TODO: nested parallelism not supported for now */
			
 
				+		max_levels = 1;
			
 
				+	}
			
 
				+	device->icvs.max_active_levels_var = max_levels;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_max_active_levels (void)
			
 
				+{
			
 
				+	const struct starpu_omp_device * const device = _starpu_omp_get_task()->owner_region->owner_device;
			
 
				+	return device->icvs.max_active_levels_var;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_level (void)
			
 
				+{
			
 
				+	const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region;
			
 
				+	return parallel_region->icvs.levels_var;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_ancestor_thread_num (int level)
			
 
				+{
			
 
				+	if (level == 0)
			
 
				+		return 0;
			
 
				+	const struct starpu_omp_task *task = _starpu_omp_get_task();
			
 
				+	if (task == NULL)
			
 
				+		return -1;
			
 
				+	const struct starpu_omp_region *parallel_region = task->owner_region;
			
 
				+	if (level < 0 || level > parallel_region->icvs.levels_var)
			
 
				+		return -1;
			
 
				+	while (level < parallel_region->icvs.levels_var)
			
 
				+	{
			
 
				+		parallel_region = parallel_region->parent_region;
			
 
				+	}
			
 
				+	return _starpu_omp_get_region_thread_num(parallel_region);
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_team_size (int level)
			
 
				+{
			
 
				+	if (level == 0)
			
 
				+		return 1;
			
 
				+	const struct starpu_omp_task *task = _starpu_omp_get_task();
			
 
				+	if (task == NULL)
			
 
				+		return -1;
			
 
				+	const struct starpu_omp_region *parallel_region = task->owner_region;
			
 
				+	if (level < 0 || level > parallel_region->icvs.levels_var)
			
 
				+		return -1;
			
 
				+	while (level < parallel_region->icvs.levels_var)
			
 
				+	{
			
 
				+		parallel_region = parallel_region->parent_region;
			
 
				+	}
			
 
				+	return parallel_region->nb_threads;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_active_level (void)
			
 
				+{
			
 
				+	const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region;
			
 
				+	return parallel_region->icvs.active_levels_var;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_in_final(void)
			
 
				+{
			
 
				+	const struct starpu_omp_task *task = _starpu_omp_get_task();
			
 
				+	return task->is_final;
			
 
				+}
			
 
				+
			
 
				+enum starpu_omp_proc_bind_value starpu_omp_get_proc_bind(void)
			
 
				+{
			
 
				+	const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region;
			
 
				+	int proc_bind = parallel_region->icvs.bind_var[0];
			
 
				+	return proc_bind;
			
 
				+}
			
 
				+
			
 
				+void starpu_omp_set_default_device(int device_num)
			
 
				+{
			
 
				+	(void) device_num;
			
 
				+	/* TODO: set_default_device not supported for now */
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_default_device(void)
			
 
				+{
			
 
				+	const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region;
			
 
				+	return parallel_region->icvs.default_device_var;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_num_devices(void)
			
 
				+{
			
 
				+	/* TODO: get_num_devices not supported for now
			
 
				+	 * assume 1 device */
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_num_teams(void)
			
 
				+{
			
 
				+	/* TODO: num_teams not supported for now
			
 
				+	 * assume 1 team */
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_get_team_num(void)
			
 
				+{
			
 
				+	/* TODO: team_num not supported for now
			
 
				+	 * assume team_num 0 */
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int starpu_omp_is_initial_device(void)
			
 
				+{
			
 
				+	const struct starpu_omp_device * const device = _starpu_omp_get_task()->owner_region->owner_device;
			
 
				+	return device == _starpu_omp_global_state->initial_device;
			
 
				+}
			
 
				+
			
 
				+double starpu_omp_get_wtime (void)
			
 
				+{
			
 
				+	return 1e-6 * (starpu_timing_now() - _starpu_omp_clock_ref);
			
 
				+}
			
 
				+
			
 
				+double starpu_omp_get_wtick (void)
			
 
				+{
			
 
				+	/* arbitrary precision value */
			
 
				+	return 1e-6;
			
 
				+}
			
 
				+#endif /* STARPU_OPENMP */
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -223,6 +223,37 @@ noinst_PROGRAMS =				\
 
				 	microbenchs/redundant_buffer		\
			
 
				 	microbenchs/local_pingpong		\
			
 
				 	microbenchs/matrix_as_vector		\
			
 
				+	openmp/init_exit_01			\
			
 
				+	openmp/init_exit_02			\
			
 
				+	openmp/environment			\
			
 
				+	openmp/api_01				\
			
 
				+	openmp/parallel_01			\
			
 
				+	openmp/parallel_02			\
			
 
				+	openmp/parallel_03			\
			
 
				+	openmp/parallel_barrier_01		\
			
 
				+	openmp/parallel_master_01		\
			
 
				+	openmp/parallel_master_inline_01	\
			
 
				+	openmp/parallel_single_wait_01		\
			
 
				+	openmp/parallel_single_nowait_01	\
			
 
				+	openmp/parallel_single_inline_01	\
			
 
				+	openmp/parallel_single_copyprivate_01	\
			
 
				+	openmp/parallel_single_copyprivate_inline_01	\
			
 
				+	openmp/parallel_critical_01		\
			
 
				+	openmp/parallel_critical_inline_01	\
			
 
				+	openmp/parallel_critical_named_01	\
			
 
				+	openmp/parallel_critical_named_inline_01\
			
 
				+	openmp/parallel_simple_lock_01		\
			
 
				+	openmp/parallel_nested_lock_01		\
			
 
				+	openmp/parallel_for_01			\
			
 
				+	openmp/parallel_for_02			\
			
 
				+	openmp/parallel_for_ordered_01		\
			
 
				+	openmp/parallel_sections_01		\
			
 
				+	openmp/parallel_sections_combined_01	\
			
 
				+	openmp/task_01				\
			
 
				+	openmp/task_02				\
			
 
				+	openmp/taskwait_01			\
			
 
				+	openmp/taskgroup_01			\
			
 
				+	openmp/taskgroup_02			\
			
 
				 	overlap/overlap				\
			
 
				 	overlap/gpu_concurrency			\
			
 
				 	parallel_tasks/explicit_combined_worker	\
			
@@ -445,6 +476,99 @@ main_subgraph_repeat_regenerate_tag_SOURCES +=		\
 
				 	main/increment.cu
			
 
				 endif
			
 
				 
			
 
				+openmp_init_exit_01_SOURCES = 	\
			
 
				+	openmp/init_exit_01.c
			
 
				+
			
 
				+openmp_init_exit_02_SOURCES = 	\
			
 
				+	openmp/init_exit_02.c
			
 
				+
			
 
				+openmp_environment_SOURCES = 	\
			
 
				+	openmp/environment.c
			
 
				+
			
 
				+openmp_api_01_SOURCES = 	\
			
 
				+	openmp/api_01.c
			
 
				+
			
 
				+openmp_parallel_01_SOURCES = 	\
			
 
				+	openmp/parallel_01.c
			
 
				+
			
 
				+openmp_parallel_02_SOURCES = 	\
			
 
				+	openmp/parallel_02.c
			
 
				+
			
 
				+openmp_parallel_03_SOURCES = 	\
			
 
				+	openmp/parallel_03.c
			
 
				+
			
 
				+openmp_parallel_barrier_01_SOURCES = 	\
			
 
				+	openmp/parallel_barrier_01.c
			
 
				+
			
 
				+openmp_parallel_master_01_SOURCES = 	\
			
 
				+	openmp/parallel_master_01.c
			
 
				+
			
 
				+openmp_parallel_master_inline_01_SOURCES = 	\
			
 
				+	openmp/parallel_master_inline_01.c
			
 
				+
			
 
				+openmp_parallel_single_wait_01_SOURCES = 	\
			
 
				+	openmp/parallel_single_wait_01.c
			
 
				+
			
 
				+openmp_parallel_single_nowait_01_SOURCES = 	\
			
 
				+	openmp/parallel_single_nowait_01.c
			
 
				+
			
 
				+openmp_parallel_single_inline_01_SOURCES = 	\
			
 
				+	openmp/parallel_single_inline_01.c
			
 
				+
			
 
				+openmp_parallel_single_copyprivate_01_SOURCES = 	\
			
 
				+	openmp/parallel_single_copyprivate_01.c
			
 
				+
			
 
				+openmp_parallel_single_copyprivate_inline_01_SOURCES = 	\
			
 
				+	openmp/parallel_single_copyprivate_inline_01.c
			
 
				+
			
 
				+openmp_parallel_critical_01_SOURCES = 	\
			
 
				+	openmp/parallel_critical_01.c
			
 
				+
			
 
				+openmp_parallel_critical_inline_01_SOURCES = 	\
			
 
				+	openmp/parallel_critical_inline_01.c
			
 
				+
			
 
				+openmp_parallel_critical_named_01_SOURCES = 	\
			
 
				+	openmp/parallel_critical_named_01.c
			
 
				+
			
 
				+openmp_parallel_critical_named_inline_01_SOURCES = 	\
			
 
				+	openmp/parallel_critical_named_inline_01.c
			
 
				+
			
 
				+openmp_parallel_simple_lock_01_SOURCES = 	\
			
 
				+	openmp/parallel_simple_lock_01.c
			
 
				+
			
 
				+openmp_parallel_nested_lock_01_SOURCES = 	\
			
 
				+	openmp/parallel_nested_lock_01.c
			
 
				+
			
 
				+openmp_parallel_for_01_SOURCES = 	\
			
 
				+	openmp/parallel_for_01.c
			
 
				+
			
 
				+openmp_parallel_for_02_SOURCES = 	\
			
 
				+	openmp/parallel_for_02.c
			
 
				+
			
 
				+openmp_parallel_for_ordered_01_SOURCES = 	\
			
 
				+	openmp/parallel_for_ordered_01.c
			
 
				+
			
 
				+openmp_parallel_sections_01_SOURCES = 	\
			
 
				+	openmp/parallel_sections_01.c
			
 
				+
			
 
				+openmp_parallel_sections_combined_01_SOURCES = 	\
			
 
				+	openmp/parallel_sections_combined_01.c
			
 
				+
			
 
				+openmp_task_01_SOURCES = 	\
			
 
				+	openmp/task_01.c
			
 
				+
			
 
				+openmp_task_02_SOURCES = 	\
			
 
				+	openmp/task_02.c
			
 
				+
			
 
				+openmp_taskwait_01_SOURCES = 	\
			
 
				+	openmp/taskwait_01.c
			
 
				+
			
 
				+openmp_taskgroup_01_SOURCES = 	\
			
 
				+	openmp/taskgroup_01.c
			
 
				+
			
 
				+openmp_taskgroup_02_SOURCES = 	\
			
 
				+	openmp/taskgroup_02.c
			
 
				+
			
 
				 ###################
			
 
				 # Block interface #
			
 
				 ###################
			
--- a/tests/openmp/api_01.c
+++ b/tests/openmp/api_01.c
@@ -0,0 +1,135 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdlib.h>
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret;
			
 
				+	/* we clear the whole OMP environment for this test, to check the
			
 
				+	 * default behaviour of API functions */
			
 
				+	unsetenv("OMP_DYNAMIC");
			
 
				+	unsetenv("OMP_NESTED");
			
 
				+	unsetenv("OMP_SCHEDULE");
			
 
				+	unsetenv("OMP_STACKSIZE");
			
 
				+	unsetenv("OMP_WAIT_POLICY");
			
 
				+	unsetenv("OMP_THREAD_LIMIT");
			
 
				+	unsetenv("OMP_MAX_ACTIVE_LEVELS");
			
 
				+	unsetenv("OMP_CANCELLATION");
			
 
				+	unsetenv("OMP_DEFAULT_DEVICE");
			
 
				+	unsetenv("OMP_PROC_BIND");
			
 
				+	unsetenv("OMP_NUM_THREADS");
			
 
				+	unsetenv("OMP_PLACES");
			
 
				+	unsetenv("OMP_DISPLAY_ENV");
			
 
				+	ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+#define check_omp_func(f,_tv)					\
			
 
				+{								\
			
 
				+	const int v = (f());					\
			
 
				+	const int tv = (_tv);					\
			
 
				+	printf(#f ": %d (should be %d)\n", v, tv);		\
			
 
				+	STARPU_ASSERT(v == tv);					\
			
 
				+}
			
 
				+
			
 
				+const char * get_sched_name(int sched_value)
			
 
				+{
			
 
				+	const char *sched_name = NULL;
			
 
				+
			
 
				+	switch (sched_value)
			
 
				+	{
			
 
				+		case starpu_omp_sched_undefined: sched_name = "<undefined>"; break;
			
 
				+		case starpu_omp_sched_static:    sched_name = "static"; break;
			
 
				+		case starpu_omp_sched_dynamic:   sched_name = "dynamic"; break;
			
 
				+		case starpu_omp_sched_guided:    sched_name = "guided"; break;
			
 
				+		case starpu_omp_sched_auto:      sched_name = "auto"; break;
			
 
				+		case starpu_omp_sched_runtime:   sched_name = "runtime"; break;
			
 
				+		default: _STARPU_ERROR("invalid omp schedule value");
			
 
				+	}
			
 
				+	return sched_name;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[])
			
 
				+{
			
 
				+	const int nb_cpus = starpu_cpu_worker_get_count();
			
 
				+
			
 
				+	check_omp_func(starpu_omp_get_num_threads, 1);
			
 
				+	check_omp_func(starpu_omp_get_thread_num, 0);
			
 
				+	/* since OMP_NUM_THREADS is cleared, starpu_omp_get_max_threads() should return nb_cpus */
			
 
				+	check_omp_func(starpu_omp_get_max_threads, nb_cpus);
			
 
				+	check_omp_func(starpu_omp_get_num_procs, nb_cpus);
			
 
				+	check_omp_func(starpu_omp_in_parallel, 0);
			
 
				+	check_omp_func(starpu_omp_get_dynamic, 0);
			
 
				+	check_omp_func(starpu_omp_get_nested, 0);
			
 
				+	check_omp_func(starpu_omp_get_cancellation, 0);
			
 
				+	{
			
 
				+		const enum starpu_omp_sched_value target_kind = starpu_omp_sched_static;
			
 
				+		const int target_modifier = 0;
			
 
				+		enum starpu_omp_sched_value kind;
			
 
				+		int modifier;
			
 
				+		const char *sched_name;
			
 
				+		const char *target_sched_name;
			
 
				+		starpu_omp_get_schedule(&kind, &modifier);
			
 
				+		sched_name = get_sched_name(kind);
			
 
				+		target_sched_name = get_sched_name(target_kind);
			
 
				+		printf("starpu_omp_get_schedule: %s,%d (should be %s,%d)\n", sched_name, modifier, target_sched_name, target_modifier);
			
 
				+		STARPU_ASSERT(kind == target_kind && modifier == target_modifier);
			
 
				+	}
			
 
				+	check_omp_func(starpu_omp_get_thread_limit, nb_cpus);
			
 
				+	check_omp_func(starpu_omp_get_max_active_levels, 1);
			
 
				+	check_omp_func(starpu_omp_get_level, 0);
			
 
				+	{
			
 
				+		const int tv = 0;
			
 
				+		const int v = starpu_omp_get_ancestor_thread_num(0);
			
 
				+		printf("starpu_omp_get_ancestor_thread_num(0): %d (should be %d)\n", v, tv);
			
 
				+		STARPU_ASSERT(v == tv);
			
 
				+	}
			
 
				+	{
			
 
				+		const int tv = 1;
			
 
				+		const int v = starpu_omp_get_team_size(0);
			
 
				+		printf("starpu_omp_get_team_size(0): %d (should be %d)\n", v, tv);
			
 
				+		STARPU_ASSERT(v == tv);
			
 
				+	}
			
 
				+	check_omp_func(starpu_omp_get_active_level, 0);
			
 
				+	check_omp_func(starpu_omp_in_final, 0);
			
 
				+	check_omp_func(starpu_omp_get_proc_bind, starpu_omp_proc_bind_undefined);
			
 
				+	check_omp_func(starpu_omp_get_default_device, 0);
			
 
				+	/* TODO: support more than one device */
			
 
				+	check_omp_func(starpu_omp_get_num_devices, 1);
			
 
				+	check_omp_func(starpu_omp_get_num_teams, 1);
			
 
				+	check_omp_func(starpu_omp_get_team_num, 0);
			
 
				+	check_omp_func(starpu_omp_is_initial_device, 1);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/environment.c
+++ b/tests/openmp/environment.c
@@ -0,0 +1,48 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdlib.h>
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	setenv("OMP_DYNAMIC","false", 1);
			
 
				+	setenv("OMP_NESTED","false", 1);
			
 
				+	setenv("OMP_SCHEDULE","auto", 1);
			
 
				+	setenv("OMP_STACKSIZE","2M", 1);
			
 
				+	setenv("OMP_WAIT_POLICY","passive", 1);
			
 
				+	setenv("OMP_THREAD_LIMIT","0", 1);
			
 
				+	setenv("OMP_MAX_ACTIVE_LEVELS","4", 1);
			
 
				+	setenv("OMP_CANCELLATION","false", 1);
			
 
				+	setenv("OMP_DEFAULT_DEVICE","0", 1);
			
 
				+	setenv("OMP_PROC_BIND","spread, spread, close", 1);
			
 
				+	setenv("OMP_NUM_THREADS","4, 16, 2", 1);
			
 
				+	setenv("OMP_PLACES","{1,2,3,4},{5,6,7,8}", 1);
			
 
				+	setenv("OMP_DISPLAY_ENV","verbose", 1);
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+	starpu_omp_shutdown();
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/init_exit_01.c
+++ b/tests/openmp/init_exit_01.c
@@ -0,0 +1,34 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+	starpu_omp_shutdown();
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/init_exit_02.c
+++ b/tests/openmp/init_exit_02.c
@@ -0,0 +1,44 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_01.c
+++ b/tests/openmp/parallel_01.c
@@ -0,0 +1,62 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_02.c
+++ b/tests/openmp/parallel_02.c
@@ -0,0 +1,79 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void parallel_region_2_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] parallel region 2: task thread = %d\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_1_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] parallel region 1: task thread = %d\n", (void *)tid, worker_id);
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_2_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_1_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_03.c
+++ b/tests/openmp/parallel_03.c
@@ -0,0 +1,63 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_barrier_01.c
+++ b/tests/openmp/parallel_barrier_01.c
@@ -0,0 +1,71 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- barrier 1\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_barrier();
			
 
				+	printf("[tid %p] task thread = %d -- barrier 2\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_barrier();
			
 
				+	printf("[tid %p] task thread = %d -- barrier 3\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_barrier();
			
 
				+	printf("[tid %p] task thread = %d -- barrier 4\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_barrier();
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	pthread_t tid;
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	tid = pthread_self();
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_critical_01.c
+++ b/tests/openmp/parallel_critical_01.c
@@ -0,0 +1,83 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void critical_g(void *arg)
			
 
				+{
			
 
				+	(void) arg;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_critical(critical_g, NULL, NULL);
			
 
				+	starpu_omp_critical(critical_g, NULL, NULL);
			
 
				+	starpu_omp_critical(critical_g, NULL, NULL);
			
 
				+	starpu_omp_critical(critical_g, NULL, NULL);
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	printf("<main>\n");
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_critical_inline_01.c
+++ b/tests/openmp/parallel_critical_inline_01.c
@@ -0,0 +1,86 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+
			
 
				+	starpu_omp_critical_inline_begin(NULL);
			
 
				+	printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_critical_inline_end(NULL);
			
 
				+
			
 
				+	starpu_omp_critical_inline_begin(NULL);
			
 
				+	printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_critical_inline_end(NULL);
			
 
				+
			
 
				+	starpu_omp_critical_inline_begin(NULL);
			
 
				+	printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_critical_inline_end(NULL);
			
 
				+
			
 
				+	starpu_omp_critical_inline_begin(NULL);
			
 
				+	printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_critical_inline_end(NULL);
			
 
				+
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	printf("<main>\n");
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_critical_named_01.c
+++ b/tests/openmp/parallel_critical_named_01.c
@@ -0,0 +1,93 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void critical_g(void *arg)
			
 
				+{
			
 
				+	(void) arg;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- critical \"g\"\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void critical_h(void *arg)
			
 
				+{
			
 
				+	(void) arg;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- critical \"h\"\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_critical(critical_g, NULL, "g");
			
 
				+	starpu_omp_critical(critical_h, NULL, "h");
			
 
				+	starpu_omp_critical(critical_g, NULL, "g");
			
 
				+	starpu_omp_critical(critical_h, NULL, "h");
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	printf("<main>\n");
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_critical_named_inline_01.c
+++ b/tests/openmp/parallel_critical_named_inline_01.c
@@ -0,0 +1,86 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+
			
 
				+	starpu_omp_critical_inline_begin("g");
			
 
				+	printf("[tid %p] task thread = %d -- critical \"g\"\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_critical_inline_end("g");
			
 
				+
			
 
				+	starpu_omp_critical_inline_begin("h");
			
 
				+	printf("[tid %p] task thread = %d -- critical \"h\"\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_critical_inline_end("h");
			
 
				+
			
 
				+	starpu_omp_critical_inline_begin("g");
			
 
				+	printf("[tid %p] task thread = %d -- critical \"g\"\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_critical_inline_end("g");
			
 
				+
			
 
				+	starpu_omp_critical_inline_begin("h");
			
 
				+	printf("[tid %p] task thread = %d -- critical \"h\"\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_critical_inline_end("h");
			
 
				+
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	printf("<main>\n");
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_for_01.c
+++ b/tests/openmp/parallel_for_01.c
@@ -0,0 +1,189 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+#define NB_ITERS 256
			
 
				+#define CHUNK 16
			
 
				+unsigned long long array[NB_ITERS];
			
 
				+
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void for_g(unsigned long long i, unsigned long long nb_i, void *arg)
			
 
				+{
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i);
			
 
				+	for (; nb_i > 0; i++, nb_i--)
			
 
				+	{
			
 
				+		array[i] = 1;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void parallel_region_1_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"static chunk", NB_ITERS, CHUNK, starpu_omp_sched_static, 0, 0);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_2_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"static nochunk", NB_ITERS, 0, starpu_omp_sched_static, 0, 0);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_3_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 0);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_4_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 0, 0);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_5_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 0);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_6_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 0);
			
 
				+}
			
 
				+
			
 
				+static void clear_array(void)
			
 
				+{
			
 
				+	memset(array, 0, NB_ITERS*sizeof(unsigned long long));
			
 
				+}
			
 
				+
			
 
				+static void check_array(void)
			
 
				+{
			
 
				+	unsigned long long i;
			
 
				+	unsigned long long s = 0;
			
 
				+	for (i = 0; i < NB_ITERS; i++)
			
 
				+	{
			
 
				+		s += array[i];
			
 
				+	}
			
 
				+	if (s != NB_ITERS)
			
 
				+	{
			
 
				+		printf("missing iterations\n");
			
 
				+		exit(1);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+
			
 
				+	clear_array();
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_1_f;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	check_array();
			
 
				+
			
 
				+	clear_array();
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_2_f;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	check_array();
			
 
				+
			
 
				+	clear_array();
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_3_f;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	check_array();
			
 
				+
			
 
				+	clear_array();
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_4_f;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	check_array();
			
 
				+
			
 
				+	clear_array();
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_5_f;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	check_array();
			
 
				+
			
 
				+	clear_array();
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_6_f;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	check_array();
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_for_02.c
+++ b/tests/openmp/parallel_for_02.c
@@ -0,0 +1,90 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+#define NB_ITERS 4321
			
 
				+#define CHUNK 42
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void for_g(unsigned long long i, unsigned long long nb_i, void *arg)
			
 
				+{
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i);
			
 
				+	for (; nb_i > 0; i++, nb_i--)
			
 
				+	{
			
 
				+		printf("[tid %p] task thread = %d, for [%s] iteration %llu\n", (void *)tid, worker_id, (const char *)arg, i);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"static chunk", NB_ITERS, CHUNK, starpu_omp_sched_static, 0, 1);
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"static nochunk", NB_ITERS, 0, starpu_omp_sched_static, 0, 1);
			
 
				+	
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 1);
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 0, 1);
			
 
				+
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"guided chunk", NB_ITERS, CHUNK, starpu_omp_sched_guided, 0, 1);
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 1);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_for_ordered_01.c
+++ b/tests/openmp/parallel_for_ordered_01.c
@@ -0,0 +1,207 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+#define NB_ITERS 256
			
 
				+#define CHUNK 16
			
 
				+unsigned long long array[NB_ITERS];
			
 
				+
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+struct s_ordered_arg
			
 
				+{
			
 
				+	const char *msg;
			
 
				+	unsigned long long i;
			
 
				+};
			
 
				+
			
 
				+void ordered_f(void *_arg)
			
 
				+{
			
 
				+	struct s_ordered_arg *arg = _arg;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d, for [%s] iteration (ordered) %llu\n", (void *)tid, worker_id, arg->msg, arg->i);
			
 
				+}
			
 
				+
			
 
				+void for_g(unsigned long long i, unsigned long long nb_i, void *arg)
			
 
				+{
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i);
			
 
				+	for (; nb_i > 0; i++, nb_i--)
			
 
				+	{
			
 
				+		struct s_ordered_arg ordered_arg = { arg, i };
			
 
				+		array[i] = 1;
			
 
				+		starpu_omp_ordered(ordered_f, &ordered_arg);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void parallel_region_1_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"static chunk", NB_ITERS, CHUNK, starpu_omp_sched_static, 1, 0);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_2_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"static nochunk", NB_ITERS, 0, starpu_omp_sched_static, 1, 0);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_3_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 1, 0);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_4_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 1, 0);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_5_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 1, 0);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_6_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 1, 0);
			
 
				+}
			
 
				+
			
 
				+static void clear_array(void)
			
 
				+{
			
 
				+	memset(array, 0, NB_ITERS*sizeof(unsigned long long));
			
 
				+}
			
 
				+
			
 
				+static void check_array(void)
			
 
				+{
			
 
				+	unsigned long long i;
			
 
				+	unsigned long long s = 0;
			
 
				+	for (i = 0; i < NB_ITERS; i++)
			
 
				+	{
			
 
				+		s += array[i];
			
 
				+	}
			
 
				+	if (s != NB_ITERS)
			
 
				+	{
			
 
				+		printf("missing iterations\n");
			
 
				+		exit(1);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+
			
 
				+	clear_array();
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_1_f;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	check_array();
			
 
				+
			
 
				+	clear_array();
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_2_f;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	check_array();
			
 
				+
			
 
				+	clear_array();
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_3_f;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	check_array();
			
 
				+
			
 
				+	clear_array();
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_4_f;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	check_array();
			
 
				+
			
 
				+	clear_array();
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_5_f;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	check_array();
			
 
				+
			
 
				+	clear_array();
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_6_f;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	check_array();
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_master_01.c
+++ b/tests/openmp/parallel_master_01.c
@@ -0,0 +1,83 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void master_g(void *arg)
			
 
				+{
			
 
				+	(void) arg;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_master(master_g, NULL);
			
 
				+	starpu_omp_master(master_g, NULL);
			
 
				+	starpu_omp_master(master_g, NULL);
			
 
				+	starpu_omp_master(master_g, NULL);
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_master_inline_01.c
+++ b/tests/openmp/parallel_master_inline_01.c
@@ -0,0 +1,77 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+	if (starpu_omp_master_inline())
			
 
				+		printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id);
			
 
				+	if (starpu_omp_master_inline())
			
 
				+		printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id);
			
 
				+	if (starpu_omp_master_inline())
			
 
				+		printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id);
			
 
				+	if (starpu_omp_master_inline())
			
 
				+		printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id);
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_nested_lock_01.c
+++ b/tests/openmp/parallel_nested_lock_01.c
@@ -0,0 +1,117 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+starpu_omp_nest_lock_t omp_nest_lock;
			
 
				+
			
 
				+void locked_func_n2(void)
			
 
				+{
			
 
				+	const int worker_id = starpu_worker_get_id();
			
 
				+	const pthread_t tid = pthread_self();
			
 
				+	printf("[tid %p] task thread = %d -- locked function n2\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+void locked_func_n1(void)
			
 
				+{
			
 
				+	const int worker_id = starpu_worker_get_id();
			
 
				+	const pthread_t tid = pthread_self();
			
 
				+	printf("[tid %p] task thread = %d -- locked function n1 -->\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_set_nest_lock(&omp_nest_lock);
			
 
				+	locked_func_n2();
			
 
				+	starpu_omp_unset_nest_lock(&omp_nest_lock);
			
 
				+	printf("[tid %p] task thread = %d -- locked function n1 <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void master_g1(void *arg)
			
 
				+{
			
 
				+	starpu_omp_init_nest_lock(&omp_nest_lock);
			
 
				+}
			
 
				+
			
 
				+void master_g2(void *arg)
			
 
				+{
			
 
				+	starpu_omp_destroy_nest_lock(&omp_nest_lock);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	const int worker_id = starpu_worker_get_id();
			
 
				+	const pthread_t tid = pthread_self();
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_master(master_g1, NULL);
			
 
				+	starpu_omp_barrier();
			
 
				+
			
 
				+	starpu_omp_set_nest_lock(&omp_nest_lock);
			
 
				+	locked_func_n1();
			
 
				+	starpu_omp_unset_nest_lock(&omp_nest_lock);
			
 
				+
			
 
				+	starpu_omp_set_nest_lock(&omp_nest_lock);
			
 
				+	locked_func_n1();
			
 
				+	starpu_omp_unset_nest_lock(&omp_nest_lock);
			
 
				+
			
 
				+	starpu_omp_set_nest_lock(&omp_nest_lock);
			
 
				+	locked_func_n1();
			
 
				+	starpu_omp_unset_nest_lock(&omp_nest_lock);
			
 
				+
			
 
				+	starpu_omp_set_nest_lock(&omp_nest_lock);
			
 
				+	locked_func_n1();
			
 
				+	starpu_omp_unset_nest_lock(&omp_nest_lock);
			
 
				+
			
 
				+	starpu_omp_barrier();
			
 
				+	starpu_omp_master(master_g2, NULL);
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	printf("<main>\n");
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_sections_01.c
+++ b/tests/openmp/parallel_sections_01.c
@@ -0,0 +1,106 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void f(void *arg)
			
 
				+{
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d, section [%s]\n", (void *)tid, worker_id, (const char *)arg);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	void (*section_f[4])(void *);
			
 
				+	void *section_args[4];
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+
			
 
				+	section_f[0] = f;
			
 
				+	section_f[1] = f;
			
 
				+	section_f[2] = f;
			
 
				+	section_f[3] = f;
			
 
				+
			
 
				+	section_args[0] = (void *)"A";
			
 
				+	section_args[1] = (void *)"B";
			
 
				+	section_args[2] = (void *)"C";
			
 
				+	section_args[3] = (void *)"D";
			
 
				+
			
 
				+	starpu_omp_sections(4, section_f, section_args, 0);
			
 
				+
			
 
				+	section_args[0] = (void *)"E";
			
 
				+	section_args[1] = (void *)"F";
			
 
				+	section_args[2] = (void *)"G";
			
 
				+	section_args[3] = (void *)"H";
			
 
				+
			
 
				+	starpu_omp_sections(4, section_f, section_args, 0);
			
 
				+
			
 
				+	section_args[0] = (void *)"I";
			
 
				+	section_args[1] = (void *)"J";
			
 
				+	section_args[2] = (void *)"K";
			
 
				+	section_args[3] = (void *)"L";
			
 
				+
			
 
				+	starpu_omp_sections(4, section_f, section_args, 0);
			
 
				+
			
 
				+	section_args[0] = (void *)"M";
			
 
				+	section_args[1] = (void *)"N";
			
 
				+	section_args[2] = (void *)"O";
			
 
				+	section_args[3] = (void *)"P";
			
 
				+
			
 
				+	starpu_omp_sections(4, section_f, section_args, 0);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_sections_combined_01.c
+++ b/tests/openmp/parallel_sections_combined_01.c
@@ -0,0 +1,100 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void f(unsigned long long section_num, void *arg)
			
 
				+{
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d, section [%llu: %s]\n", (void *)tid, worker_id, section_num, (const char *)arg);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	void *section_args[4];
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+
			
 
				+	section_args[0] = (void *)"A";
			
 
				+	section_args[1] = (void *)"B";
			
 
				+	section_args[2] = (void *)"C";
			
 
				+	section_args[3] = (void *)"D";
			
 
				+
			
 
				+	starpu_omp_sections_combined(4, f, section_args, 0);
			
 
				+
			
 
				+	section_args[0] = (void *)"E";
			
 
				+	section_args[1] = (void *)"F";
			
 
				+	section_args[2] = (void *)"G";
			
 
				+	section_args[3] = (void *)"H";
			
 
				+
			
 
				+	starpu_omp_sections_combined(4, f, section_args, 0);
			
 
				+
			
 
				+	section_args[0] = (void *)"I";
			
 
				+	section_args[1] = (void *)"J";
			
 
				+	section_args[2] = (void *)"K";
			
 
				+	section_args[3] = (void *)"L";
			
 
				+
			
 
				+	starpu_omp_sections_combined(4, f, section_args, 0);
			
 
				+
			
 
				+	section_args[0] = (void *)"M";
			
 
				+	section_args[1] = (void *)"N";
			
 
				+	section_args[2] = (void *)"O";
			
 
				+	section_args[3] = (void *)"P";
			
 
				+
			
 
				+	starpu_omp_sections_combined(4, f, section_args, 0);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_simple_lock_01.c
+++ b/tests/openmp/parallel_simple_lock_01.c
@@ -0,0 +1,107 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+starpu_omp_lock_t omp_lock;
			
 
				+
			
 
				+void locked_func(void)
			
 
				+{
			
 
				+	const int worker_id = starpu_worker_get_id();
			
 
				+	const pthread_t tid = pthread_self();
			
 
				+	printf("[tid %p] task thread = %d -- locked function\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void master_g1(void *arg)
			
 
				+{
			
 
				+	starpu_omp_init_lock(&omp_lock);
			
 
				+}
			
 
				+
			
 
				+void master_g2(void *arg)
			
 
				+{
			
 
				+	starpu_omp_destroy_lock(&omp_lock);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	const int worker_id = starpu_worker_get_id();
			
 
				+	const pthread_t tid = pthread_self();
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_master(master_g1, NULL);
			
 
				+	starpu_omp_barrier();
			
 
				+
			
 
				+	starpu_omp_set_lock(&omp_lock);
			
 
				+	locked_func();
			
 
				+	starpu_omp_unset_lock(&omp_lock);
			
 
				+
			
 
				+	starpu_omp_set_lock(&omp_lock);
			
 
				+	locked_func();
			
 
				+	starpu_omp_unset_lock(&omp_lock);
			
 
				+
			
 
				+	starpu_omp_set_lock(&omp_lock);
			
 
				+	locked_func();
			
 
				+	starpu_omp_unset_lock(&omp_lock);
			
 
				+
			
 
				+	starpu_omp_set_lock(&omp_lock);
			
 
				+	locked_func();
			
 
				+	starpu_omp_unset_lock(&omp_lock);
			
 
				+
			
 
				+	starpu_omp_barrier();
			
 
				+	starpu_omp_master(master_g2, NULL);
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	printf("<main>\n");
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_single_copyprivate_01.c
+++ b/tests/openmp/parallel_single_copyprivate_01.c
@@ -0,0 +1,91 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void single_g(void *arg, void *_data, unsigned long long data_size)
			
 
				+{
			
 
				+	(void) arg;
			
 
				+	int *data = _data;
			
 
				+	STARPU_ASSERT(data_size >= sizeof(*data));
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	*data = worker_id;
			
 
				+	printf("[tid %p] task thread = %d -- single\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	int single_worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_single_copyprivate(single_g, NULL, &single_worker_id, sizeof(single_worker_id));
			
 
				+	printf("[tid %p] task thread = %d -- copyprivate: single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id);
			
 
				+	starpu_omp_single_copyprivate(single_g, NULL, &single_worker_id, sizeof(single_worker_id));
			
 
				+	printf("[tid %p] task thread = %d -- copyprivate: single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id);
			
 
				+	starpu_omp_single_copyprivate(single_g, NULL, &single_worker_id, sizeof(single_worker_id));
			
 
				+	printf("[tid %p] task thread = %d -- copyprivate: single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id);
			
 
				+	starpu_omp_single_copyprivate(single_g, NULL, &single_worker_id, sizeof(single_worker_id));
			
 
				+	printf("[tid %p] task thread = %d -- copyprivate: single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id);
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_single_copyprivate_inline_01.c
+++ b/tests/openmp/parallel_single_copyprivate_inline_01.c
@@ -0,0 +1,88 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	int single_worker_id;
			
 
				+	int *single_data;
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+	int i;
			
 
				+
			
 
				+	for (i=0; i<4; i++)
			
 
				+	{
			
 
				+		if ((single_data = starpu_omp_single_copyprivate_inline_begin(&single_worker_id)) == NULL)
			
 
				+		{
			
 
				+			printf("[tid %p] task thread = %d -- single\n", (void *)tid, worker_id);
			
 
				+			single_worker_id = worker_id;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			memcpy(&single_worker_id, single_data, sizeof(single_worker_id));
			
 
				+		}
			
 
				+		starpu_omp_single_copyprivate_inline_end();
			
 
				+		printf("[tid %p] task thread = %d -- single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id);
			
 
				+	}
			
 
				+
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_single_inline_01.c
+++ b/tests/openmp/parallel_single_inline_01.c
@@ -0,0 +1,94 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+	
			
 
				+	/* nowait = 0 */
			
 
				+	if (starpu_omp_single_inline())
			
 
				+		printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_barrier();
			
 
				+	if (starpu_omp_single_inline())
			
 
				+		printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_barrier();
			
 
				+	if (starpu_omp_single_inline())
			
 
				+		printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_barrier();
			
 
				+	if (starpu_omp_single_inline())
			
 
				+		printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_barrier();
			
 
				+
			
 
				+	/* nowait = 1 */
			
 
				+	if (starpu_omp_single_inline())
			
 
				+		printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id);
			
 
				+	if (starpu_omp_single_inline())
			
 
				+		printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id);
			
 
				+	if (starpu_omp_single_inline())
			
 
				+		printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id);
			
 
				+	if (starpu_omp_single_inline())
			
 
				+		printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id);
			
 
				+
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_single_nowait_01.c
+++ b/tests/openmp/parallel_single_nowait_01.c
@@ -0,0 +1,83 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void single_g(void *arg)
			
 
				+{
			
 
				+	(void) arg;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_single(single_g, NULL, 1);
			
 
				+	starpu_omp_single(single_g, NULL, 1);
			
 
				+	starpu_omp_single(single_g, NULL, 1);
			
 
				+	starpu_omp_single(single_g, NULL, 1);
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/parallel_single_wait_01.c
+++ b/tests/openmp/parallel_single_wait_01.c
@@ -0,0 +1,83 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void single_g(void *arg)
			
 
				+{
			
 
				+	(void) arg;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- single\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_single(single_g, NULL, 0);
			
 
				+	starpu_omp_single(single_g, NULL, 0);
			
 
				+	starpu_omp_single(single_g, NULL, 0);
			
 
				+	starpu_omp_single(single_g, NULL, 0);
			
 
				+	printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	printf("<main>\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/task_01.c
+++ b/tests/openmp/task_01.c
@@ -0,0 +1,87 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void task_region_g(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d: explicit task \"g\"\n", (void *)tid, worker_id);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	struct starpu_omp_task_region_attr attr;
			
 
				+
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id);
			
 
				+	
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0]  = task_region_g;
			
 
				+	attr.cl.where         = STARPU_CPU;
			
 
				+	attr.if_clause        = 1;
			
 
				+	attr.final_clause     = 0;
			
 
				+	attr.untied_clause    = 1;
			
 
				+	attr.mergeable_clause = 0;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/task_02.c
+++ b/tests/openmp/task_02.c
@@ -0,0 +1,196 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+#define	NX	64
			
 
				+int global_vector[NX];
			
 
				+
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void task_region_h(void *buffers[], void *args)
			
 
				+{
			
 
				+	struct starpu_vector_interface *_vector = buffers[0];
			
 
				+	int nx = STARPU_VECTOR_GET_NX(_vector);
			
 
				+	int *v = (int *)STARPU_VECTOR_GET_PTR(_vector);
			
 
				+	int f = (int)(intptr_t)args;
			
 
				+	int i;
			
 
				+
			
 
				+	printf("depth 2 task, entry: vector ptr = %p\n", v);
			
 
				+
			
 
				+	for (i = 0; i < nx; i++)
			
 
				+	{
			
 
				+                v[i] += f;
			
 
				+	}
			
 
				+
			
 
				+	printf("depth 2 task ending\n");
			
 
				+}
			
 
				+
			
 
				+void task_region_g(void *buffers[], void *args)
			
 
				+{
			
 
				+	struct starpu_vector_interface *_vector = buffers[0];
			
 
				+
			
 
				+	int nx = STARPU_VECTOR_GET_NX(_vector);
			
 
				+	int *v = (int *)STARPU_VECTOR_GET_PTR(_vector);
			
 
				+	int f = (int)(intptr_t)args;
			
 
				+	
			
 
				+	printf("depth 1 task, entry: vector ptr = %p\n", v);
			
 
				+
			
 
				+	{
			
 
				+		starpu_data_handle_t task_vector_handle;
			
 
				+		int i;
			
 
				+
			
 
				+		for (i = 0; i < nx; i++)
			
 
				+		{
			
 
				+			v[i] += f;
			
 
				+		}
			
 
				+
			
 
				+		starpu_vector_data_register(&task_vector_handle, STARPU_MAIN_RAM, (uintptr_t)v, NX, sizeof(v[0]));
			
 
				+		printf("depth 1 task, block 1: task_vector_handle = %p\n", task_vector_handle);
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		starpu_data_handle_t task_vector_handle;
			
 
				+		struct starpu_omp_task_region_attr attr;
			
 
				+		int i;
			
 
				+
			
 
				+		task_vector_handle = starpu_data_lookup(v);
			
 
				+		printf("depth 1 task, block 2: task_vector_handle = %p\n", task_vector_handle);
			
 
				+
			
 
				+		memset(&attr, 0, sizeof(attr));
			
 
				+		attr.cl.cpu_funcs[0]  = task_region_h;
			
 
				+		attr.cl.where         = STARPU_CPU;
			
 
				+		attr.cl.nbuffers      = 1;
			
 
				+		attr.cl.modes[0]      = STARPU_RW;
			
 
				+		attr.handles          = &task_vector_handle;
			
 
				+		attr.cl_arg_size      = sizeof(void *);
			
 
				+		attr.cl_arg_free      = 0;
			
 
				+		attr.if_clause        = 1;
			
 
				+		attr.final_clause     = 0;
			
 
				+		attr.untied_clause    = 1;
			
 
				+		attr.mergeable_clause = 0;
			
 
				+
			
 
				+		i = 0;
			
 
				+
			
 
				+		attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+		starpu_omp_task_region(&attr);
			
 
				+		attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+		starpu_omp_task_region(&attr);
			
 
				+	}
			
 
				+
			
 
				+	starpu_omp_taskwait();
			
 
				+}
			
 
				+
			
 
				+void master_g1(void *arg)
			
 
				+{
			
 
				+	starpu_data_handle_t region_vector_handle;
			
 
				+	int i;
			
 
				+
			
 
				+	printf("master_g1: vector ptr = %p\n", global_vector);
			
 
				+	for (i = 0; i < NX; i++)
			
 
				+	{
			
 
				+		global_vector[i] = 1;
			
 
				+	}
			
 
				+
			
 
				+	starpu_vector_data_register(&region_vector_handle, STARPU_MAIN_RAM, (uintptr_t)global_vector, NX, sizeof(global_vector[0]));
			
 
				+	printf("master_g1: region_vector_handle = %p\n", region_vector_handle);
			
 
				+}
			
 
				+
			
 
				+void master_g2(void *arg)
			
 
				+{
			
 
				+	starpu_data_handle_t region_vector_handle;
			
 
				+	struct starpu_omp_task_region_attr attr;
			
 
				+	int i;
			
 
				+
			
 
				+	region_vector_handle = starpu_data_lookup(global_vector);
			
 
				+	printf("master_g2: region_vector_handle = %p\n", region_vector_handle);
			
 
				+
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0]  = task_region_g;
			
 
				+	attr.cl.where         = STARPU_CPU;
			
 
				+	attr.cl.nbuffers      = 1;
			
 
				+	attr.cl.modes[0]      = STARPU_RW;
			
 
				+	attr.handles          = &region_vector_handle;
			
 
				+	attr.cl_arg_size      = sizeof(void *);
			
 
				+	attr.cl_arg_free      = 0;
			
 
				+	attr.if_clause        = 1;
			
 
				+	attr.final_clause     = 0;
			
 
				+	attr.untied_clause    = 1;
			
 
				+	attr.mergeable_clause = 0;
			
 
				+
			
 
				+	i = 0;
			
 
				+
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	starpu_omp_master(master_g1, NULL);
			
 
				+	starpu_omp_barrier();
			
 
				+	{
			
 
				+		starpu_data_handle_t region_vector_handle;
			
 
				+		region_vector_handle = starpu_data_lookup(global_vector);
			
 
				+		printf("parallel_region block 1: region_vector_handle = %p\n", region_vector_handle);
			
 
				+	}
			
 
				+	starpu_omp_barrier();
			
 
				+	starpu_omp_master(master_g2, NULL);
			
 
				+	starpu_omp_barrier();
			
 
				+	{
			
 
				+		starpu_data_handle_t region_vector_handle;
			
 
				+		region_vector_handle = starpu_data_lookup(global_vector);
			
 
				+		printf("parallel_region block 2: region_vector_handle = %p\n", region_vector_handle);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/taskgroup_01.c
+++ b/tests/openmp/taskgroup_01.c
@@ -0,0 +1,119 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void task_region_g(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	int i = (int)(intptr_t) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d: explicit task \"g[%d]\"\n", (void *)tid, worker_id, i);
			
 
				+}
			
 
				+
			
 
				+void taskgroup_f(void *arg)
			
 
				+{
			
 
				+	struct starpu_omp_task_region_attr attr;
			
 
				+	int *p_i = (int *)arg;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0]  = task_region_g;
			
 
				+	attr.cl.where         = STARPU_CPU;
			
 
				+	attr.cl_arg_size      = sizeof(void *);
			
 
				+	attr.cl_arg_free      = 0;
			
 
				+	attr.if_clause        = 1;
			
 
				+	attr.final_clause     = 0;
			
 
				+	attr.untied_clause    = 1;
			
 
				+	attr.mergeable_clause = 0;
			
 
				+
			
 
				+	attr.cl_arg = (void *)(intptr_t)(*p_i)++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+
			
 
				+	attr.cl_arg = (void *)(intptr_t)(*p_i)++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	struct starpu_omp_task_region_attr attr;
			
 
				+	int i = 0;
			
 
				+
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id);
			
 
				+	
			
 
				+	starpu_omp_taskgroup(taskgroup_f, (void *)&i);
			
 
				+	printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id);
			
 
				+
			
 
				+	starpu_omp_taskgroup(taskgroup_f, (void *)&i);
			
 
				+	printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id);
			
 
				+
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0]  = task_region_g;
			
 
				+	attr.cl.where         = STARPU_CPU;
			
 
				+	attr.cl_arg_size      = sizeof(void *);
			
 
				+	attr.cl_arg_free      = 0;
			
 
				+	attr.if_clause        = 1;
			
 
				+	attr.final_clause     = 0;
			
 
				+	attr.untied_clause    = 1;
			
 
				+	attr.mergeable_clause = 0;
			
 
				+
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/taskgroup_02.c
+++ b/tests/openmp/taskgroup_02.c
@@ -0,0 +1,123 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void task_region_g(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	int i = (int)(intptr_t) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d: explicit task \"g[%d]\"\n", (void *)tid, worker_id, i);
			
 
				+}
			
 
				+
			
 
				+void taskgroup_f(void *arg)
			
 
				+{
			
 
				+	struct starpu_omp_task_region_attr attr;
			
 
				+	int *p_i = (int *)arg;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0]  = task_region_g;
			
 
				+	attr.cl.where         = STARPU_CPU;
			
 
				+	attr.cl_arg_size      = sizeof(void *);
			
 
				+	attr.cl_arg_free      = 0;
			
 
				+	attr.if_clause        = 1;
			
 
				+	attr.final_clause     = 0;
			
 
				+	attr.untied_clause    = 1;
			
 
				+	attr.mergeable_clause = 0;
			
 
				+
			
 
				+	attr.cl_arg = (void *)(intptr_t)(*p_i)++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+
			
 
				+	attr.cl_arg = (void *)(intptr_t)(*p_i)++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	struct starpu_omp_task_region_attr attr;
			
 
				+	int i = 0;
			
 
				+
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id);
			
 
				+	
			
 
				+	starpu_omp_taskgroup_inline_begin();
			
 
				+	taskgroup_f((void *)&i);
			
 
				+	starpu_omp_taskgroup_inline_end();
			
 
				+	printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id);
			
 
				+
			
 
				+	starpu_omp_taskgroup_inline_begin();
			
 
				+	taskgroup_f((void *)&i);
			
 
				+	starpu_omp_taskgroup_inline_end();
			
 
				+	printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id);
			
 
				+
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0]  = task_region_g;
			
 
				+	attr.cl.where         = STARPU_CPU;
			
 
				+	attr.cl_arg_size      = sizeof(void *);
			
 
				+	attr.cl_arg_free      = 0;
			
 
				+	attr.if_clause        = 1;
			
 
				+	attr.final_clause     = 0;
			
 
				+	attr.untied_clause    = 1;
			
 
				+	attr.mergeable_clause = 0;
			
 
				+
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
--- a/tests/openmp/taskwait_01.c
+++ b/tests/openmp/taskwait_01.c
@@ -0,0 +1,105 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#if !defined(STARPU_OPENMP)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+__attribute__((constructor))
			
 
				+static void omp_constructor(void)
			
 
				+{
			
 
				+	int ret = starpu_omp_init();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
			
 
				+}
			
 
				+
			
 
				+__attribute__((destructor))
			
 
				+static void omp_destructor(void)
			
 
				+{
			
 
				+	starpu_omp_shutdown();
			
 
				+}
			
 
				+
			
 
				+void task_region_g(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	int i = (int)(intptr_t) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d: explicit task \"g[%d]\"\n", (void *)tid, worker_id, i);
			
 
				+}
			
 
				+
			
 
				+void parallel_region_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	struct starpu_omp_task_region_attr attr;
			
 
				+	int i = 0;
			
 
				+
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id);
			
 
				+	
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0]  = task_region_g;
			
 
				+	attr.cl.where         = STARPU_CPU;
			
 
				+	attr.cl_arg_size      = sizeof(void *);
			
 
				+	attr.cl_arg_free      = 0;
			
 
				+	attr.if_clause        = 1;
			
 
				+	attr.final_clause     = 0;
			
 
				+	attr.untied_clause    = 1;
			
 
				+	attr.mergeable_clause = 0;
			
 
				+
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	starpu_omp_taskwait();
			
 
				+	printf("[tid %p] task thread = %d: implicit task \"f\": taskwait\n", (void *)tid, worker_id);
			
 
				+
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	starpu_omp_taskwait();
			
 
				+	printf("[tid %p] task thread = %d: implicit task \"f\": taskwait\n", (void *)tid, worker_id);
			
 
				+
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+	attr.cl_arg = (void *)(intptr_t)i++;
			
 
				+	starpu_omp_task_region(&attr);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main (int argc, char *argv[]) {
			
 
				+	struct starpu_omp_parallel_region_attr attr;
			
 
				+	memset(&attr, 0, sizeof(attr));
			
 
				+	attr.cl.cpu_funcs[0] = parallel_region_f;
			
 
				+	attr.cl.where        = STARPU_CPU;
			
 
				+	attr.if_clause       = 1;
			
 
				+	starpu_omp_parallel_region(&attr);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif