7 éve · 4f81cd3a92
--- a/configure.ac
+++ b/configure.ac
@@ -2937,6 +2937,8 @@ AC_ARG_ENABLE(blas-lib,
 
				         blas_lib=atlas
			
 
				      elif test "x$enableval" = "xgoto" ; then
			
 
				         blas_lib=goto
			
 
				+     elif test "x$enableval" = "xopenblas" ; then
			
 
				+        blas_lib=openblas
			
 
				      elif test "x$enableval" = "xnone" ; then
			
 
				         blas_lib=none
			
 
				      elif test "x$enableval" = "xmkl" ; then
			
@@ -2998,6 +3000,26 @@ if test x$blas_lib = xmaybe -o x$blas_lib = xatlas; then
 
				     fi
			
 
				 fi
			
 
				 
			
 
				+if test x$blas_lib = xmaybe -o x$blas_lib = xopenblas; then
			
 
				+    PKG_CHECK_MODULES([OPENBLAS],  [openblas],  [
			
 
				+      PKG_CHECK_MODULES([BLAS_OPENBLAS],  [blas-openblas],  [
			
 
				+        AC_DEFINE([STARPU_OPENBLAS], [1], [Define to 1 if you use the openblas library.])
			
 
				+        AC_SUBST([STARPU_OPENBLAS], [1])
			
 
				+        CFLAGS="${CFLAGS} ${OPENBLAS_CFLAGS} ${BLAS_OPENBLAS_CFLAGS} "
			
 
				+        LIBS="${LIBS} ${OPENBLAS_LIBS} ${BLAS_OPENBLAS_LIBS} "
			
 
				+        blas_lib=openblas
			
 
				+      ], [
			
 
				+	if text x$blas_lib = xopenblas; then
			
 
				+	  AC_MSG_ERROR([cannot find blas-openblas lib])
			
 
				+	fi
			
 
				+      ])
			
 
				+    ], [
			
 
				+      if text x$blas_lib = xopenblas; then
			
 
				+        AC_MSG_ERROR([cannot find openblas lib])
			
 
				+      fi
			
 
				+    ])
			
 
				+fi
			
 
				+
			
 
				 if test x$blas_lib = xmaybe -o x$blas_lib = xmkl; then
			
 
				     # Should we use MKL ?
			
 
				     if test -n "$MKLROOT"
			
--- a/doc/doxygen/chapters/310_data_management.doxy
+++ b/doc/doxygen/chapters/310_data_management.doxy
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2017                                CNRS
			
 
				- * Copyright (C) 2009-2011,2014-2017                      Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2011,2014-2018                      Université de Bordeaux
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -871,6 +871,7 @@ struct starpu_codelet cl =
 
				 \endcode
			
 
				 
			
 
				 the first data of the task will be kept in the main memory, while the second
			
 
				-data will be copied to the CUDA GPU as usual.
			
 
				+data will be copied to the CUDA GPU as usual. A working example is available in
			
 
				+<c>tests/datawizard/specific_node.c</c>
			
 
				 
			
 
				 */
			
--- a/doc/doxygen/chapters/510_configure_options.doxy
+++ b/doc/doxygen/chapters/510_configure_options.doxy
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2013,2015-2017                      Inria
			
 
				  * Copyright (C) 2010-2017                                CNRS
			
 
				- * Copyright (C) 2009-2011,2013-2017                      Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2011,2013-2018                      Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -540,6 +540,7 @@ Specify the blas library to be used by some of the examples. Librairies availabl
 
				 - none [default] : no BLAS library is used
			
 
				 - atlas: use ATLAS library
			
 
				 - goto: use GotoBLAS library
			
 
				+- openblas: use OpenBLAS library
			
 
				 - mkl: use MKL library (you may need to set specific CFLAGS and LDFLAGS with --with-mkl-cflags and --with-mkl-ldflags)
			
 
				 </dd>
			
 
				 
			
--- a/doc/doxygen/chapters/api/insert_task.doxy
+++ b/doc/doxygen/chapters/api/insert_task.doxy
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2017                                CNRS
			
 
				+ * Copyright (C) 2010-2018                                CNRS
			
 
				  * Copyright (C) 2009-2011,2014-2016,2018                 Université de Bordeaux
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  *
			
@@ -169,18 +169,17 @@ room again with this function, store yet more handles, etc.
 
				 
			
 
				 \fn void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int arg_type, starpu_data_handle_t handle)
			
 
				 \ingroup API_Insert_Task
			
 
				-This stores data handle \p handle into task \p task with mode \p arg_type,
			
 
				+Store data handle \p handle into task \p task with mode \p arg_type,
			
 
				 updating \p *allocated_buffers and \p *current_buffer accordingly.
			
 
				 
			
 
				 \fn void starpu_task_insert_data_process_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_handles, starpu_data_handle_t *handles)
			
 
				 \ingroup API_Insert_Task
			
 
				-This stores \p nb_handles data handles \p handles into task \p task, updating \p
			
 
				+Store \p nb_handles data handles \p handles into task \p task, updating \p
			
 
				 *allocated_buffers and \p *current_buffer accordingly.
			
 
				 
			
 
				-
			
 
				 \fn void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_descrs, struct starpu_data_descr *descrs);
			
 
				 \ingroup API_Insert_Task
			
 
				-This stores \p nb_descrs data handles described by \p descrs into task \p task,
			
 
				+Store \p nb_descrs data handles described by \p descrs into task \p task,
			
 
				 updating \p *allocated_buffers and \p *current_buffer accordingly.
			
 
				 
			
 
				 \fn void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...)
			
@@ -193,20 +192,21 @@ Instead of calling starpu_codelet_pack_args(), one can also call
 
				 starpu_codelet_pack_arg_init(), then starpu_codelet_pack_arg() for each
			
 
				 data, then starpu_codelet_pack_arg_fini().
			
 
				 
			
 
				-\fn void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg *state)
			
 
				+\fn void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg_data *state)
			
 
				 \ingroup API_Insert_Task
			
 
				-Initiaze struct starpu_codelet_pack_arg before calling starpu_codelet_pack_arg and
			
 
				-starpu_codelet_pack_arg_fini. This will simply initialize the content of the structure.
			
 
				+Initialize struct starpu_codelet_pack_arg before calling starpu_codelet_pack_arg() and
			
 
				+starpu_codelet_pack_arg_fini(). This will simply initialize the content of the structure.
			
 
				 
			
 
				-\fn void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg *state, void *ptr, size_t ptr_size)
			
 
				-Pack one argument into struct starpu_codelet_pack_arg state. That structure
			
 
				-has to be initialized before with starpu_codelet_pack_arg_init, and after all
			
 
				-starpu_codelet_pack_arg calls performed, starpu_codelet_pack_arg_fini has to be
			
 
				-used to get the cl_arg and cl_arg_size to be put in the task.
			
 
				+\fn void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg_data *state, void *ptr, size_t ptr_size)
			
 
				+\ingroup API_Insert_Task
			
 
				+Pack one argument into struct starpu_codelet_pack_arg \p state. That structure
			
 
				+has to be initialized before with starpu_codelet_pack_arg_init(), and after all
			
 
				+starpu_codelet_pack_arg() calls performed, starpu_codelet_pack_arg_fini() has to be
			
 
				+used to get the \p cl_arg and \p cl_arg_size to be put in the task.
			
 
				 
			
 
				-\fn void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg *state, void **cl_arg, size_t *cl_arg_size)
			
 
				+\fn void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg_data *state, void **cl_arg, size_t *cl_arg_size)
			
 
				 \ingroup API_Insert_Task
			
 
				-Finish packing data, after calling starpu_codelet_pack_arg_init once and starpu_codelet_pack_arg several times.
			
 
				+Finish packing data, after calling starpu_codelet_pack_arg_init() once and starpu_codelet_pack_arg() several times.
			
 
				 
			
 
				 \fn void starpu_codelet_unpack_args(void *cl_arg, ...)
			
 
				 \ingroup API_Insert_Task
			
--- a/examples/common/blas.c
+++ b/examples/common/blas.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012                                     Inria
			
 
				- * Copyright (C) 2009-2011,2014-2015                      Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2011,2014-2015, 2018                Université de Bordeaux
			
 
				  * Copyright (C) 2010,2015,2017                           CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -239,7 +239,7 @@ void STARPU_DSWAP(const int n, double *x, const int incx, double *y, const int i
 
				 	cblas_dswap(n, x, incx, y, incy);
			
 
				 }
			
 
				 
			
 
				-#elif defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
			
 
				+#elif defined(STARPU_GOTO) || defined(STARPU_OPENBLAS) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
			
 
				 
			
 
				 inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				 			float alpha, const float *A, int lda, const float *B, int ldb, 
			
--- a/examples/common/blas.h
+++ b/examples/common/blas.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012                                     Inria
			
 
				- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2011,2014, 2018                     Université de Bordeaux
			
 
				  * Copyright (C) 2010,2015,2017                           CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -88,7 +88,7 @@ void STARPU_SPOTRF(const char*uplo, const int n, float *a, const int lda);
 
				 void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda);
			
 
				 #endif
			
 
				 
			
 
				-#if defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
			
 
				+#if defined(STARPU_GOTO) || defined(STARPU_OPENBLAS) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
			
 
				 
			
 
				 extern void sgemm_ (const char *transa, const char *transb, const int *m,
			
 
				                    const int *n, const int *k, const float *alpha, 
			
--- a/examples/common/blas_model.h
+++ b/examples/common/blas_model.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2008-2012,2014                           Université de Bordeaux
			
 
				+ * Copyright (C) 2008-2012,2014, 2018                     Université de Bordeaux
			
 
				  * Copyright (C) 2010-2012,2015,2017                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -29,6 +29,8 @@ static struct starpu_perfmodel starpu_sgemm_model =
 
				 	.symbol = "sgemm_atlas"
			
 
				 #elif defined(STARPU_GOTO)
			
 
				 	.symbol = "sgemm_goto"
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	.symbol = "sgemm_openblas"
			
 
				 #else
			
 
				 	.symbol = "sgemm"
			
 
				 #endif
			
@@ -47,6 +49,8 @@ static struct starpu_perfmodel starpu_dgemm_model =
 
				 	.symbol = "dgemm_atlas"
			
 
				 #elif defined(STARPU_GOTO)
			
 
				 	.symbol = "dgemm_goto"
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	.symbol = "dgemm_openblas"
			
 
				 #else
			
 
				 	.symbol = "dgemm"
			
 
				 #endif
			
--- a/examples/filters/fmultiple_submit.c
+++ b/examples/filters/fmultiple_submit.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2017                                     CNRS
			
 
				+ * Copyright (C) 2017, 2018                               CNRS
			
 
				  * Copyright (C) 2015,2017                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -117,7 +117,8 @@ int main(void)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	/* Disable codelet on CPUs if we have a CUDA device, to force remote execution on the CUDA device */
			
 
				-	if (starpu_cuda_worker_get_count()) {
			
 
				+	if (starpu_cuda_worker_get_count())
			
 
				+	{
			
 
				 		cl_check_scale.cpu_funcs[0] = NULL;
			
 
				 		cl_check_scale.cpu_funcs_name[0] = NULL;
			
 
				 	}
			
--- a/examples/filters/fmultiple_submit_implicit.c
+++ b/examples/filters/fmultiple_submit_implicit.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2017                                     CNRS
			
 
				+ * Copyright (C) 2017, 2018                               CNRS
			
 
				  * Copyright (C) 2015,2017                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -164,7 +164,8 @@ int main(void)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	/* Disable codelet on CPUs if we have a CUDA device, to force remote execution on the CUDA device */
			
 
				-	if (starpu_cuda_worker_get_count()) {
			
 
				+	if (starpu_cuda_worker_get_count())
			
 
				+	{
			
 
				 		cl_check_scale.cpu_funcs[0] = NULL;
			
 
				 		cl_check_scale.cpu_funcs_name[0] = NULL;
			
 
				 		cl_check.cpu_funcs[0] = NULL;
			
--- a/examples/filters/fmultiple_submit_readonly.c
+++ b/examples/filters/fmultiple_submit_readonly.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2017                                     CNRS
			
 
				+ * Copyright (C) 2017, 2018                               CNRS
			
 
				  * Copyright (C) 2015,2017                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -165,7 +165,8 @@ int main(void)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	/* Disable codelet on CPUs if we have a CUDA device, to force remote execution on the CUDA device */
			
 
				-	if (starpu_cuda_worker_get_count()) {
			
 
				+	if (starpu_cuda_worker_get_count())
			
 
				+	{
			
 
				 		cl_check_scale.cpu_funcs[0] = NULL;
			
 
				 		cl_check_scale.cpu_funcs_name[0] = NULL;
			
 
				 		cl_check.cpu_funcs[0] = NULL;
			
--- a/examples/heat/dw_factolu.c
+++ b/examples/heat/dw_factolu.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012-2013                                Inria
			
 
				- * Copyright (C) 2008-2015,2017                           Université de Bordeaux
			
 
				+ * Copyright (C) 2008-2015,2017-2018                      Université de Bordeaux
			
 
				  * Copyright (C) 2010                                     Mehdi Juhoor
			
 
				  * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				  *
			
@@ -766,6 +766,11 @@ void initialize_system(float **A, float **B, unsigned dim, unsigned pinned)
 
				 	char * symbol_12 = "lu_model_12_goto";
			
 
				 	char * symbol_21 = "lu_model_21_goto";
			
 
				 	char * symbol_22 = "lu_model_22_goto";
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	char * symbol_11 = "lu_model_11_openblas";
			
 
				+	char * symbol_12 = "lu_model_12_openblas";
			
 
				+	char * symbol_21 = "lu_model_21_openblas";
			
 
				+	char * symbol_22 = "lu_model_22_openblas";
			
 
				 #else
			
 
				 	char * symbol_11 = "lu_model_11";
			
 
				 	char * symbol_12 = "lu_model_12";
			
--- a/examples/lu/blas_complex.c
+++ b/examples/lu/blas_complex.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2010,2012,2015,2017                      CNRS
			
 
				- * Copyright (C) 2009-2010,2014                           Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2010,2014, 2018                     Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -33,7 +33,7 @@
 
				 #error not implemented
			
 
				 #elif defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS)
			
 
				 #error not implemented
			
 
				-#elif defined(STARPU_MKL)
			
 
				+#elif defined(STARPU_OPENBLAS) || defined(STARPU_MKL)
			
 
				 
			
 
				 inline void CGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				 			complex float alpha, complex float *A, int lda, complex float *B, int ldb, 
			
--- a/examples/lu/blas_complex.h
+++ b/examples/lu/blas_complex.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2010,2012,2015,2017                      CNRS
			
 
				- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2011,2014, 2018                     Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -82,7 +82,7 @@ void ZSWAP(const int n, complex double *x, const int incx, complex double *y, co
 
				 
			
 
				 #if defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS)
			
 
				 #error not implemented
			
 
				-#elif defined(STARPU_MKL)
			
 
				+#elif defined(STARPU_OPENBLAS) || defined(STARPU_MKL)
			
 
				 
			
 
				 extern void cgemm_ (const char *transa, const char *transb, const int *m,
			
 
				                    const int *n, const int *k, const complex float *alpha, 
			
--- a/examples/lu/lu_example.c
+++ b/examples/lu/lu_example.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2009-2017                                Université de Bordeaux
			
 
				- * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015-2018                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -34,6 +34,7 @@ static unsigned check = 0;
 
				 static unsigned pivot = 0;
			
 
				 static unsigned no_stride = 0;
			
 
				 static unsigned profile = 0;
			
 
				+static unsigned no_prio=0;
			
 
				 unsigned bound = 0;
			
 
				 unsigned bounddeps = 0;
			
 
				 unsigned boundprio = 0;
			
@@ -367,7 +368,7 @@ int main(int argc, char **argv)
 
				 			A_blocks = malloc(nblocks*nblocks*sizeof(TYPE *));
			
 
				 			copy_matrix_into_blocks();
			
 
				 
			
 
				-			ret = STARPU_LU(lu_decomposition_pivot_no_stride)(A_blocks, ipiv, size, size, nblocks);
			
 
				+			ret = STARPU_LU(lu_decomposition_pivot_no_stride)(A_blocks, ipiv, size, size, nblocks, no_prio);
			
 
				 
			
 
				 			copy_blocks_into_matrix();
			
 
				 			free(A_blocks);
			
@@ -379,7 +380,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 			start = starpu_timing_now();
			
 
				 
			
 
				-			ret = STARPU_LU(lu_decomposition_pivot)(A, ipiv, size, size, nblocks);
			
 
				+			ret = STARPU_LU(lu_decomposition_pivot)(A, ipiv, size, size, nblocks, no_prio);
			
 
				 
			
 
				 			end = starpu_timing_now();
			
 
				 
			
@@ -394,7 +395,7 @@ int main(int argc, char **argv)
 
				 	else
			
 
				 #endif
			
 
				 	{
			
 
				-		ret = STARPU_LU(lu_decomposition)(A, size, size, nblocks);
			
 
				+		ret = STARPU_LU(lu_decomposition)(A, size, size, nblocks, no_prio);
			
 
				 	}
			
 
				 
			
 
				 	if (profile)
			
--- a/examples/lu/xlu.c
+++ b/examples/lu/xlu.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2011,2014-2015,2017                 Université de Bordeaux
			
 
				  * Copyright (C) 2010                                     Mehdi Juhoor
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
			
 
				  * Copyright (C) 2013                                     Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -21,11 +21,6 @@
 
				 #include "xlu.h"
			
 
				 #include "xlu_kernels.h"
			
 
				 
			
 
				-static unsigned no_prio = 0;
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				 /*
			
 
				  *	Construct the DAG
			
 
				  */
			
@@ -41,7 +36,7 @@ static struct starpu_task *create_task(starpu_tag_t id)
 
				 	return task;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k)
			
 
				+static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k, unsigned no_prio)
			
 
				 {
			
 
				 /*	printf("task 11 k = %d TAG = %llx\n", k, (TAG11(k))); */
			
 
				 
			
@@ -65,7 +60,7 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 
				 	return task;
			
 
				 }
			
 
				 
			
 
				-static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
			
 
				+static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -99,7 +94,7 @@ static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
			
 
				+static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 	struct starpu_task *task = create_task(TAG21(k, i));
			
@@ -130,7 +125,7 @@ static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j)
			
 
				+static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -169,7 +164,7 @@ static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, un
 
				  *	code to bootstrap the factorization
			
 
				  */
			
 
				 
			
 
				-static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
			
 
				+static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks, unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 	double start;
			
@@ -186,7 +181,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
				 	for (k = 0; k < nblocks; k++)
			
 
				 	{
			
 
				 		starpu_iteration_push(k);
			
 
				-		struct starpu_task *task = create_task_11(dataA, k);
			
 
				+		struct starpu_task *task = create_task_11(dataA, k, no_prio);
			
 
				 
			
 
				 		/* we defer the launch of the first task */
			
 
				 		if (k == 0)
			
@@ -202,9 +197,9 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
				 
			
 
				 		for (i = k+1; i<nblocks; i++)
			
 
				 		{
			
 
				-			ret = create_task_12(dataA, k, i);
			
 
				+			ret = create_task_12(dataA, k, i, no_prio);
			
 
				 			if (ret == -ENODEV) return ret;
			
 
				-			ret = create_task_21(dataA, k, i);
			
 
				+			ret = create_task_21(dataA, k, i, no_prio);
			
 
				 			if (ret == -ENODEV) return ret;
			
 
				 		}
			
 
				 
			
@@ -212,7 +207,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
				 		{
			
 
				 			for (j = k+1; j<nblocks; j++)
			
 
				 			{
			
 
				-			     ret = create_task_22(dataA, k, i, j);
			
 
				+			     ret = create_task_22(dataA, k, i, j, no_prio);
			
 
				 			     if (ret == -ENODEV) return ret;
			
 
				 			}
			
 
				 		}
			
@@ -253,7 +248,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks)
			
 
				+int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
			
 
				 {
			
 
				 	starpu_data_handle_t dataA;
			
 
				 
			
@@ -278,7 +273,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 
				 
			
 
				 	starpu_data_map_filters(dataA, 2, &f, &f2);
			
 
				 
			
 
				-	int ret = dw_codelet_facto_v3(dataA, nblocks);
			
 
				+	int ret = dw_codelet_facto_v3(dataA, nblocks, no_prio);
			
 
				 
			
 
				 	/* gather all the data */
			
 
				 	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
--- a/examples/lu/xlu.h
+++ b/examples/lu/xlu.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009-2011,2013-2014,2017                 Université de Bordeaux
			
 
				- * Copyright (C) 2010-2015,2017                           CNRS
			
 
				+ * Copyright (C) 2010-2015,2017,2018                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -121,8 +121,8 @@ struct piv_s
 
				 	unsigned last; /* last element */
			
 
				 };
			
 
				 
			
 
				-int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks);
			
 
				-int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks);
			
 
				-int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks);
			
 
				+int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio);
			
 
				+int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio);
			
 
				+int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio);
			
 
				 
			
 
				 #endif /* __XLU_H__ */
			
--- a/examples/lu/xlu_implicit.c
+++ b/examples/lu/xlu_implicit.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2010-2011,2014-2015,2017                 Université de Bordeaux
			
 
				  * Copyright (C) 2010                                     Mehdi Juhoor
			
 
				- * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015-2018                      CNRS
			
 
				  * Copyright (C) 2013                                     Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -22,9 +22,7 @@
 
				 #include "xlu.h"
			
 
				 #include "xlu_kernels.h"
			
 
				 
			
 
				-static unsigned no_prio = 0;
			
 
				-
			
 
				-static int create_task_11(starpu_data_handle_t dataA, unsigned k)
			
 
				+static int create_task_11(starpu_data_handle_t dataA, unsigned k, unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 	struct starpu_task *task = starpu_task_create();
			
@@ -44,7 +42,7 @@ static int create_task_11(starpu_data_handle_t dataA, unsigned k)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
			
 
				+static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 	struct starpu_task *task = starpu_task_create();
			
@@ -64,7 +62,7 @@ static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
			
 
				+static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 	struct starpu_task *task = starpu_task_create();
			
@@ -85,7 +83,7 @@ static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j)
			
 
				+static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 	struct starpu_task *task = starpu_task_create();
			
@@ -111,7 +109,7 @@ static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, un
 
				  *	code to bootstrap the factorization
			
 
				  */
			
 
				 
			
 
				-static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
			
 
				+static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks, unsigned no_prio)
			
 
				 {
			
 
				 	double start;
			
 
				 	double end;
			
@@ -130,14 +128,14 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
				 
			
 
				 		starpu_iteration_push(k);
			
 
				 
			
 
				-		ret = create_task_11(dataA, k);
			
 
				+		ret = create_task_11(dataA, k, no_prio);
			
 
				 		if (ret == -ENODEV) return ret;
			
 
				 
			
 
				 		for (i = k+1; i<nblocks; i++)
			
 
				 		{
			
 
				-		     ret = create_task_12(dataA, k, i);
			
 
				+			ret = create_task_12(dataA, k, i, no_prio);
			
 
				 		     if (ret == -ENODEV) return ret;
			
 
				-		     ret = create_task_21(dataA, k, i);
			
 
				+		     ret = create_task_21(dataA, k, i, no_prio);
			
 
				 		     if (ret == -ENODEV) return ret;
			
 
				 		}
			
 
				 		starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, k));
			
@@ -145,7 +143,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
				 		for (i = k+1; i<nblocks; i++)
			
 
				 		     for (j = k+1; j<nblocks; j++)
			
 
				 		     {
			
 
				-			  ret = create_task_22(dataA, k, i, j);
			
 
				+			     ret = create_task_22(dataA, k, i, j, no_prio);
			
 
				 			  if (ret == -ENODEV) return ret;
			
 
				 		     }
			
 
				 		for (i = k+1; i<nblocks; i++)
			
@@ -184,7 +182,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks)
			
 
				+int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
			
 
				 {
			
 
				 	starpu_data_handle_t dataA;
			
 
				 
			
@@ -206,7 +204,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 
				 
			
 
				 	starpu_data_map_filters(dataA, 2, &f, &f2);
			
 
				 
			
 
				-	int ret = dw_codelet_facto_v3(dataA, nblocks);
			
 
				+	int ret = dw_codelet_facto_v3(dataA, nblocks, no_prio);
			
 
				 
			
 
				 	/* gather all the data */
			
 
				 	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
			
--- a/examples/lu/xlu_implicit_pivot.c
+++ b/examples/lu/xlu_implicit_pivot.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2010-2015,2017                           Université de Bordeaux
			
 
				  * Copyright (C) 2010                                     Mehdi Juhoor
			
 
				- * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015-2018                      CNRS
			
 
				  * Copyright (C) 2013                                     Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -23,8 +23,6 @@
 
				 #include "xlu.h"
			
 
				 #include "xlu_kernels.h"
			
 
				 
			
 
				-static unsigned no_prio = 0;
			
 
				-
			
 
				 /*
			
 
				  *	Construct the DAG
			
 
				  */
			
@@ -32,7 +30,7 @@ static unsigned no_prio = 0;
 
				 static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
			
 
				 			     struct piv_s *piv_description,
			
 
				 			     unsigned k, unsigned i,
			
 
				-			     starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
			
 
				+			     starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -58,7 +56,7 @@ static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 
				 
			
 
				 static int create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
			
 
				 				unsigned k, struct piv_s *piv_description,
			
 
				-				starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
			
 
				+				starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -83,7 +81,7 @@ static int create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 
				 }
			
 
				 
			
 
				 static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j,
			
 
				-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
			
 
				+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 	struct starpu_task *task = starpu_task_create();
			
@@ -105,7 +103,7 @@ static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 
				 }
			
 
				 
			
 
				 static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i,
			
 
				-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
			
 
				+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 	struct starpu_task *task = starpu_task_create();
			
@@ -127,7 +125,7 @@ static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 
				 }
			
 
				 
			
 
				 static int create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j,
			
 
				-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
			
 
				+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 	struct starpu_task *task = starpu_task_create();
			
@@ -157,7 +155,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
				 				  struct piv_s *piv_description,
			
 
				 				  unsigned nblocks,
			
 
				 				  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned),
			
 
				-				  double *timing)
			
 
				+				  double *timing, unsigned no_prio)
			
 
				 {
			
 
				 	double start;
			
 
				 	double end;
			
@@ -176,32 +174,32 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
				 
			
 
				 		starpu_iteration_push(k);
			
 
				 
			
 
				-		ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
			
 
				+		ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block, no_prio);
			
 
				 		if (ret == -ENODEV) return ret;
			
 
				 
			
 
				 		for (i = 0; i < nblocks; i++)
			
 
				 		{
			
 
				 			if (i != k)
			
 
				 			{
			
 
				-			     ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
			
 
				-			     if (ret == -ENODEV) return ret;
			
 
				+				ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block, no_prio);
			
 
				+				if (ret == -ENODEV) return ret;
			
 
				 			}
			
 
				 		}
			
 
				 
			
 
				 		for (i = k+1; i<nblocks; i++)
			
 
				 		{
			
 
				-		     ret = create_task_12(dataAp, nblocks, k, i, get_block);
			
 
				-		     if (ret == -ENODEV) return ret;
			
 
				-		     ret = create_task_21(dataAp, nblocks, k, i, get_block);
			
 
				-		     if (ret == -ENODEV) return ret;
			
 
				+			ret = create_task_12(dataAp, nblocks, k, i, get_block, no_prio);
			
 
				+			if (ret == -ENODEV) return ret;
			
 
				+			ret = create_task_21(dataAp, nblocks, k, i, get_block, no_prio);
			
 
				+			if (ret == -ENODEV) return ret;
			
 
				 		}
			
 
				 		starpu_data_wont_use(get_block(dataAp, nblocks, k, k));
			
 
				 
			
 
				 		for (i = k+1; i<nblocks; i++)
			
 
				 		     for (j = k+1; j<nblocks; j++)
			
 
				 		     {
			
 
				-			  ret = create_task_22(dataAp, nblocks, k, i, j, get_block);
			
 
				-			  if (ret == -ENODEV) return ret;
			
 
				+			     ret = create_task_22(dataAp, nblocks, k, i, j, get_block, no_prio);
			
 
				+			     if (ret == -ENODEV) return ret;
			
 
				 		     }
			
 
				 		for (i = k+1; i<nblocks; i++)
			
 
				 		{
			
@@ -231,7 +229,7 @@ starpu_data_handle_t get_block_with_striding(starpu_data_handle_t *dataAp, unsig
 
				 }
			
 
				 
			
 
				 
			
 
				-int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
			
 
				+int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
			
 
				 {
			
 
				 	if (starpu_mic_worker_get_count() || starpu_scc_worker_get_count() || starpu_mpi_ms_worker_get_count())
			
 
				 		/* These won't work with pivoting: we pass a pointer in cl_args */
			
@@ -271,7 +269,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 
				 	}
			
 
				 
			
 
				 	double timing;
			
 
				-	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing);
			
 
				+	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing, no_prio);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
@@ -307,7 +305,7 @@ starpu_data_handle_t get_block_with_no_striding(starpu_data_handle_t *dataAp, un
 
				 	return dataAp[i+j*nblocks];
			
 
				 }
			
 
				 
			
 
				-int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
			
 
				+int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
			
 
				 {
			
 
				 	(void)ld;
			
 
				 	starpu_data_handle_t *dataAp = malloc(nblocks*nblocks*sizeof(starpu_data_handle_t));
			
@@ -337,7 +335,7 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 
				 	}
			
 
				 
			
 
				 	double timing;
			
 
				-	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing);
			
 
				+	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing, no_prio);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
--- a/examples/lu/xlu_kernels.c
+++ b/examples/lu/xlu_kernels.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				- * Copyright (C) 2009-2017                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2018                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2012,2015,2017                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -107,6 +107,8 @@ static struct starpu_perfmodel STARPU_LU(model_22) =
 
				 	.symbol = STARPU_LU_STR(lu_model_22_atlas)
			
 
				 #elif defined(STARPU_GOTO)
			
 
				 	.symbol = STARPU_LU_STR(lu_model_22_goto)
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	.symbol = STARPU_LU_STR(lu_model_22_openblas)
			
 
				 #else
			
 
				 	.symbol = STARPU_LU_STR(lu_model_22)
			
 
				 #endif
			
@@ -228,6 +230,8 @@ static struct starpu_perfmodel STARPU_LU(model_12) =
 
				 	.symbol = STARPU_LU_STR(lu_model_12_atlas)
			
 
				 #elif defined(STARPU_GOTO)
			
 
				 	.symbol = STARPU_LU_STR(lu_model_12_goto)
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	.symbol = STARPU_LU_STR(lu_model_12_openblas)
			
 
				 #else
			
 
				 	.symbol = STARPU_LU_STR(lu_model_12)
			
 
				 #endif
			
@@ -315,6 +319,8 @@ static struct starpu_perfmodel STARPU_LU(model_21) =
 
				 	.symbol = STARPU_LU_STR(lu_model_21_atlas)
			
 
				 #elif defined(STARPU_GOTO)
			
 
				 	.symbol = STARPU_LU_STR(lu_model_21_goto)
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	.symbol = STARPU_LU_STR(lu_model_21_openblas)
			
 
				 #else
			
 
				 	.symbol = STARPU_LU_STR(lu_model_21)
			
 
				 #endif
			
@@ -433,6 +439,8 @@ static struct starpu_perfmodel STARPU_LU(model_11) =
 
				 	.symbol = STARPU_LU_STR(lu_model_11_atlas)
			
 
				 #elif defined(STARPU_GOTO)
			
 
				 	.symbol = STARPU_LU_STR(lu_model_11_goto)
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	.symbol = STARPU_LU_STR(lu_model_11_openblas)
			
 
				 #else
			
 
				 	.symbol = STARPU_LU_STR(lu_model_11)
			
 
				 #endif
			
@@ -602,6 +610,8 @@ static struct starpu_perfmodel STARPU_LU(model_11_pivot) =
 
				 	.symbol = STARPU_LU_STR(lu_model_11_pivot_atlas)
			
 
				 #elif defined(STARPU_GOTO)
			
 
				 	.symbol = STARPU_LU_STR(lu_model_11_pivot_goto)
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	.symbol = STARPU_LU_STR(lu_model_11_pivot_openblas)
			
 
				 #else
			
 
				 	.symbol = STARPU_LU_STR(lu_model_11_pivot)
			
 
				 #endif
			
@@ -703,6 +713,8 @@ static struct starpu_perfmodel STARPU_LU(model_pivot) =
 
				 	.symbol = STARPU_LU_STR(lu_model_pivot_atlas)
			
 
				 #elif defined(STARPU_GOTO)
			
 
				 	.symbol = STARPU_LU_STR(lu_model_pivot_goto)
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	.symbol = STARPU_LU_STR(lu_model_pivot_openblas)
			
 
				 #else
			
 
				 	.symbol = STARPU_LU_STR(lu_model_pivot)
			
 
				 #endif
			
--- a/examples/lu/xlu_pivot.c
+++ b/examples/lu/xlu_pivot.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009-2015,2017                           Université de Bordeaux
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
			
 
				  * Copyright (C) 2011,2013                                Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -21,8 +21,6 @@
 
				 #include "xlu.h"
			
 
				 #include "xlu_kernels.h"
			
 
				 
			
 
				-static unsigned no_prio = 0;
			
 
				-
			
 
				 /*
			
 
				  *	Construct the DAG
			
 
				  */
			
@@ -39,9 +37,9 @@ static struct starpu_task *create_task(starpu_tag_t id)
 
				 }
			
 
				 
			
 
				 static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
			
 
				-					struct piv_s *piv_description,
			
 
				-					unsigned k, unsigned i,
			
 
				-					starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
			
 
				+			     struct piv_s *piv_description,
			
 
				+			     unsigned k, unsigned i,
			
 
				+			     starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -92,8 +90,8 @@ static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 
				 }
			
 
				 
			
 
				 static struct starpu_task *create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
			
 
				-					unsigned k, struct piv_s *piv_description,
			
 
				-					starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
			
 
				+						unsigned k, struct piv_s *piv_description,
			
 
				+						starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
			
 
				 {
			
 
				 	struct starpu_task *task = create_task(TAG11(k));
			
 
				 
			
@@ -118,7 +116,7 @@ static struct starpu_task *create_task_11_pivot(starpu_data_handle_t *dataAp, un
 
				 }
			
 
				 
			
 
				 static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j,
			
 
				-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
			
 
				+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -158,7 +156,7 @@ static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 
				 }
			
 
				 
			
 
				 static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i,
			
 
				-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
			
 
				+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -186,7 +184,7 @@ static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 
				 }
			
 
				 
			
 
				 static int create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j,
			
 
				-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
			
 
				+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -231,7 +229,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
				 				  struct piv_s *piv_description,
			
 
				 				  unsigned nblocks,
			
 
				 				  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned),
			
 
				-				  double *timing)
			
 
				+				  double *timing, unsigned no_prio)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -249,7 +247,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
				 	for (k = 0; k < nblocks; k++)
			
 
				 	{
			
 
				 		starpu_iteration_push(k);
			
 
				-		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
			
 
				+		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block, no_prio);
			
 
				 
			
 
				 		/* we defer the launch of the first task */
			
 
				 		if (k == 0)
			
@@ -267,16 +265,16 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
				 		{
			
 
				 			if (i != k)
			
 
				 			{
			
 
				-				ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
			
 
				+				ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block, no_prio);
			
 
				 				if (ret == -ENODEV) return ret;
			
 
				 			}
			
 
				 		}
			
 
				 
			
 
				 		for (i = k+1; i<nblocks; i++)
			
 
				 		{
			
 
				-			ret = create_task_12(dataAp, nblocks, k, i, get_block);
			
 
				+			ret = create_task_12(dataAp, nblocks, k, i, get_block, no_prio);
			
 
				 			if (ret == -ENODEV) return ret;
			
 
				-			ret = create_task_21(dataAp, nblocks, k, i, get_block);
			
 
				+			ret = create_task_21(dataAp, nblocks, k, i, get_block, no_prio);
			
 
				 			if (ret == -ENODEV) return ret;
			
 
				 		}
			
 
				 
			
@@ -284,7 +282,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
				 		{
			
 
				 			for (j = k+1; j<nblocks; j++)
			
 
				 			{
			
 
				-			     ret = create_task_22(dataAp, nblocks, k, i, j, get_block);
			
 
				+			     ret = create_task_22(dataAp, nblocks, k, i, j, get_block, no_prio);
			
 
				 			     if (ret == -ENODEV) return ret;
			
 
				 			}
			
 
				 		}
			
@@ -332,7 +330,7 @@ starpu_data_handle_t get_block_with_striding(starpu_data_handle_t *dataAp, unsig
 
				 }
			
 
				 
			
 
				 
			
 
				-int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
			
 
				+int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
			
 
				 {
			
 
				 	starpu_data_handle_t dataA;
			
 
				 
			
@@ -380,7 +378,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 
				 #endif
			
 
				 
			
 
				 	double timing=0.0;
			
 
				-	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing);
			
 
				+	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing, no_prio);
			
 
				 
			
 
				 	unsigned n = starpu_matrix_get_nx(dataA);
			
 
				 	double flop = (2.0f*n*n*n)/3.0f;
			
@@ -413,7 +411,7 @@ starpu_data_handle_t get_block_with_no_striding(starpu_data_handle_t *dataAp, un
 
				 	return dataAp[i+j*nblocks];
			
 
				 }
			
 
				 
			
 
				-int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
			
 
				+int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
			
 
				 {
			
 
				 	(void)ld;
			
 
				 	if (starpu_mic_worker_get_count() || starpu_scc_worker_get_count() || starpu_mpi_ms_worker_get_count())
			
@@ -450,7 +448,7 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 
				 	}
			
 
				 
			
 
				 	double timing=0.0;
			
 
				-	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing);
			
 
				+	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing, no_prio);
			
 
				 
			
 
				 	unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks;
			
 
				 	double flop = (2.0f*n*n*n)/3.0f;
			
--- a/examples/reductions/dot_product.c
+++ b/examples/reductions/dot_product.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012-2013,2015                           Inria
			
 
				- * Copyright (C) 2010-2015,2017                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2015,2017-2018                      Université de Bordeaux
			
 
				  * Copyright (C) 2011-2013,2015-2017                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
--- a/examples/sched_ctx/parallel_tasks_reuse_handle.c
+++ b/examples/sched_ctx/parallel_tasks_reuse_handle.c
@@ -1,10 +1,8 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C)                                          Inria
			
 
				- * Copyright (C)                                          CNRS
			
 
				  * Copyright (C) 2015-2016                                Université de Bordeaux
			
 
				- * Copyright (C) 2015,2017                                Inria

			
 
				- * Copyright (C) 2015-2017                                CNRS

			
 
				+ * Copyright (C) 2015,2017                                Inria
			
 
				+ * Copyright (C) 2015-2018                                CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -17,226 +15,226 @@
 
				  *
			
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				-

			
 
				-#include <starpu.h>

			
 
				-#include <omp.h>

			
 
				-#include <pthread.h>

			
 
				-

			
 
				-#ifdef STARPU_QUICK_CHECK

			
 
				-#define NTASKS 64

			
 
				-#define SIZE   40

			
 
				-#define LOOPS  4

			
 
				-#else

			
 
				-#define NTASKS 100

			
 
				-#define SIZE   400

			
 
				-#define LOOPS  10

			
 
				-#endif

			
 
				-

			
 
				-#define N_NESTED_CTXS 2

			
 
				-

			
 
				-struct context

			
 
				-{

			
 
				-	int ncpus;

			
 
				-	int *cpus;

			
 
				-	unsigned id;

			
 
				-};

			
 
				-

			
 
				-/* Helper for the task that will initiate everything */

			
 
				-void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)

			
 
				-{

			
 
				-	fprintf(stderr, "%p: %s -->\n", (void*)pthread_self(), __func__);

			
 
				-	int sched_ctx = *(int *)sched_ctx_;

			
 
				-	int *cpuids = NULL;

			
 
				-	int ncpuids = 0;

			
 
				-	starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);

			
 
				-

			
 
				-#pragma omp parallel num_threads(ncpuids)

			
 
				-	{

			
 
				-		starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);

			
 
				-	}

			
 
				-

			
 
				-	omp_set_num_threads(ncpuids);

			
 
				-	free(cpuids);

			
 
				-	fprintf(stderr, "%p: %s <--\n", (void*)pthread_self(), __func__);

			
 
				-	return;

			
 
				-}

			
 
				-

			
 
				-void noop(void * buffers[], void * cl_arg)

			
 
				-{

			
 
				-	(void)buffers;

			
 
				-	(void)cl_arg;

			
 
				-}

			
 
				-

			
 
				-static struct starpu_codelet init_parallel_worker_cl=

			
 
				-{

			
 
				-	.cpu_funcs = {noop},

			
 
				-	.nbuffers = 0,

			
 
				-	.name = "init_parallel_worker"

			
 
				-};

			
 
				-

			
 
				-/* function called to initialize the parallel "workers" */

			
 
				-void parallel_task_init_one_context(unsigned * context_id)

			
 
				-{

			
 
				-	struct starpu_task * t;

			
 
				-	int ret;

			
 
				-

			
 
				-	t = starpu_task_build(&init_parallel_worker_cl,

			
 
				-			      STARPU_SCHED_CTX, *context_id,

			
 
				-			      0);

			
 
				-	t->destroy = 1;

			
 
				-	t->prologue_callback_pop_func=parallel_task_prologue_init_once_and_for_all;

			
 
				-	if (t->prologue_callback_pop_arg_free)

			
 
				-		free(t->prologue_callback_pop_arg);

			
 
				-	t->prologue_callback_pop_arg=context_id;

			
 
				-	t->prologue_callback_pop_arg_free=0;

			
 
				-

			
 
				-	ret = starpu_task_submit(t);

			
 
				-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

			
 
				-}

			
 
				-

			
 
				-struct context main_context;

			
 
				-struct context *contexts;

			
 
				-void parallel_task_init()

			
 
				-{

			
 
				-	/* Context creation */

			
 
				-	main_context.ncpus = starpu_cpu_worker_get_count();

			
 
				-	main_context.cpus = (int *) malloc(main_context.ncpus*sizeof(int));

			
 
				-	fprintf(stderr, "ncpus : %d \n",main_context.ncpus);

			
 
				-

			
 
				-	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, main_context.cpus, main_context.ncpus);

			
 
				-

			
 
				-	main_context.id = starpu_sched_ctx_create(main_context.cpus,

			
 
				-						  main_context.ncpus,"main_ctx",

			
 
				-						  STARPU_SCHED_CTX_POLICY_NAME,"prio",

			
 
				-						  0);

			
 
				-

			
 
				-	/* Initialize nested contexts */

			
 
				-	contexts = malloc(sizeof(struct context)*N_NESTED_CTXS);

			
 
				-	int cpus_per_context = main_context.ncpus/N_NESTED_CTXS;

			
 
				-	int i;

			
 
				-	for(i = 0; i < N_NESTED_CTXS; i++)

			
 
				-	{

			
 
				-		contexts[i].ncpus = cpus_per_context;

			
 
				-		if (i == N_NESTED_CTXS-1)

			
 
				-			contexts[i].ncpus += main_context.ncpus%N_NESTED_CTXS;

			
 
				-		contexts[i].cpus = main_context.cpus+i*cpus_per_context;

			
 
				-	}

			
 
				-

			
 
				-	for(i = 0; i < N_NESTED_CTXS; i++)

			
 
				-		contexts[i].id = starpu_sched_ctx_create(contexts[i].cpus,

			
 
				-							 contexts[i].ncpus,"nested_ctx",

			
 
				-							 STARPU_SCHED_CTX_NESTED,main_context.id,

			
 
				-							 0);

			
 
				-

			
 
				-	for (i = 0; i < N_NESTED_CTXS; i++)

			
 
				-	{

			
 
				-		parallel_task_init_one_context(&contexts[i].id);

			
 
				-	}

			
 
				-

			
 
				-	starpu_task_wait_for_all();

			
 
				-	starpu_sched_ctx_set_context(&main_context.id);

			
 
				-}

			
 
				-

			
 
				-void parallel_task_deinit()

			
 
				-{

			
 
				-	int i;

			
 
				-	for (i=0; i<N_NESTED_CTXS;i++)

			
 
				-		starpu_sched_ctx_delete(contexts[i].id);

			
 
				-	free(contexts);

			
 
				-	free(main_context.cpus);

			
 
				-}

			
 
				-

			
 
				-/* Codelet SUM */

			
 
				-static void sum_cpu(void * descr[], void *cl_arg)

			
 
				-{

			
 
				-	(void)cl_arg;

			
 
				-	double *v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]);

			
 
				-	double *v_src0 = (double *) STARPU_VECTOR_GET_PTR(descr[1]);

			
 
				-	double *v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[2]);

			
 
				-	int size = STARPU_VECTOR_GET_NX(descr[0]);

			
 
				-

			
 
				-	int i, k;

			
 
				-	for (k=0;k<LOOPS;k++)

			
 
				-	{

			
 
				-#pragma omp parallel for

			
 
				-		for (i=0; i<size; i++)

			
 
				-		{

			
 
				-			v_dst[i]+=v_src0[i]+v_src1[i];

			
 
				-		}

			
 
				-	}

			
 
				-}

			
 
				-

			
 
				-static struct starpu_codelet sum_cl =

			
 
				-{

			
 
				-	.cpu_funcs = {sum_cpu, NULL},

			
 
				-	.nbuffers = 3,

			
 
				-	.modes={STARPU_RW,STARPU_R, STARPU_R}

			
 
				-};

			
 
				-

			
 
				-int main(void)

			
 
				-{

			
 
				-	int ntasks = NTASKS;

			
 
				-	int ret, j, k;

			
 
				-	unsigned ncpus = 0;

			
 
				-

			
 
				-	ret = starpu_init(NULL);

			
 
				-	if (ret == -ENODEV)

			
 
				-		return 77;

			
 
				-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

			
 
				-

			
 
				-	if (starpu_cpu_worker_get_count() < N_NESTED_CTXS)

			
 
				-	{

			
 
				-		starpu_shutdown();

			
 
				-		return 77;

			
 
				-	}

			
 
				-

			
 
				-	parallel_task_init();

			
 
				-

			
 
				-	/* Data preparation */

			
 
				-	double array1[SIZE];

			
 
				-	double array2[SIZE];

			
 
				-

			
 
				-	memset(array1, 0, sizeof(double));

			
 
				-	int i;

			
 
				-	for (i=0;i<SIZE;i++)

			
 
				-	{

			
 
				-		array2[i]=i*2;

			
 
				-	}

			
 
				-

			
 
				-	starpu_data_handle_t handle1;

			
 
				-	starpu_data_handle_t handle2;

			
 
				-

			
 
				-	starpu_vector_data_register(&handle1, 0, (uintptr_t)array1, SIZE, sizeof(double));

			
 
				-	starpu_vector_data_register(&handle2, 0, (uintptr_t)array2, SIZE, sizeof(double));

			
 
				-

			
 
				-	for (i = 0; i < ntasks; i++)

			
 
				-	{

			
 
				-		struct starpu_task * t;

			
 
				-		t=starpu_task_build(&sum_cl,

			
 
				-				    STARPU_RW,handle1,

			
 
				-				    STARPU_R,handle2,

			
 
				-				    STARPU_R,handle1,

			
 
				-				    STARPU_SCHED_CTX, main_context.id,

			
 
				-				    0);

			
 
				-		t->destroy = 1;

			
 
				-		t->possibly_parallel = 1;

			
 
				-

			
 
				-		ret=starpu_task_submit(t);

			
 
				-		if (ret == -ENODEV)

			
 
				-			goto out;

			
 
				-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

			
 
				-	}

			
 
				-

			
 
				-

			
 
				-

			
 
				-out:

			
 
				-	/* wait for all tasks at the end*/

			
 
				-	starpu_task_wait_for_all();

			
 
				-

			
 
				-	starpu_data_unregister(handle1);

			
 
				-	starpu_data_unregister(handle2);

			
 
				-	parallel_task_deinit();

			
 
				-

			
 
				-	starpu_shutdown();

			
 
				-	return 0;

			
 
				-}

			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <omp.h>
			
 
				+#include <pthread.h>
			
 
				+
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#define NTASKS 64
			
 
				+#define SIZE   40
			
 
				+#define LOOPS  4
			
 
				+#else
			
 
				+#define NTASKS 100
			
 
				+#define SIZE   400
			
 
				+#define LOOPS  10
			
 
				+#endif
			
 
				+
			
 
				+#define N_NESTED_CTXS 2
			
 
				+
			
 
				+struct context
			
 
				+{
			
 
				+	int ncpus;
			
 
				+	int *cpus;
			
 
				+	unsigned id;
			
 
				+};
			
 
				+
			
 
				+/* Helper for the task that will initiate everything */
			
 
				+void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
			
 
				+{
			
 
				+	fprintf(stderr, "%p: %s -->\n", (void*)pthread_self(), __func__);
			
 
				+	int sched_ctx = *(int *)sched_ctx_;
			
 
				+	int *cpuids = NULL;
			
 
				+	int ncpuids = 0;
			
 
				+	starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
			
 
				+
			
 
				+#pragma omp parallel num_threads(ncpuids)
			
 
				+	{
			
 
				+		starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
			
 
				+	}
			
 
				+
			
 
				+	omp_set_num_threads(ncpuids);
			
 
				+	free(cpuids);
			
 
				+	fprintf(stderr, "%p: %s <--\n", (void*)pthread_self(), __func__);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void noop(void * buffers[], void * cl_arg)
			
 
				+{
			
 
				+	(void)buffers;
			
 
				+	(void)cl_arg;
			
 
				+}
			
 
				+
			
 
				+static struct starpu_codelet init_parallel_worker_cl=
			
 
				+{
			
 
				+	.cpu_funcs = {noop},
			
 
				+	.nbuffers = 0,
			
 
				+	.name = "init_parallel_worker"
			
 
				+};
			
 
				+
			
 
				+/* function called to initialize the parallel "workers" */
			
 
				+void parallel_task_init_one_context(unsigned * context_id)
			
 
				+{
			
 
				+	struct starpu_task * t;
			
 
				+	int ret;
			
 
				+
			
 
				+	t = starpu_task_build(&init_parallel_worker_cl,
			
 
				+			      STARPU_SCHED_CTX, *context_id,
			
 
				+			      0);
			
 
				+	t->destroy = 1;
			
 
				+	t->prologue_callback_pop_func=parallel_task_prologue_init_once_and_for_all;
			
 
				+	if (t->prologue_callback_pop_arg_free)
			
 
				+		free(t->prologue_callback_pop_arg);
			
 
				+	t->prologue_callback_pop_arg=context_id;
			
 
				+	t->prologue_callback_pop_arg_free=0;
			
 
				+
			
 
				+	ret = starpu_task_submit(t);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+}
			
 
				+
			
 
				+struct context main_context;
			
 
				+struct context *contexts;
			
 
				+void parallel_task_init()
			
 
				+{
			
 
				+	/* Context creation */
			
 
				+	main_context.ncpus = starpu_cpu_worker_get_count();
			
 
				+	main_context.cpus = (int *) malloc(main_context.ncpus*sizeof(int));
			
 
				+	fprintf(stderr, "ncpus : %d \n",main_context.ncpus);
			
 
				+
			
 
				+	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, main_context.cpus, main_context.ncpus);
			
 
				+
			
 
				+	main_context.id = starpu_sched_ctx_create(main_context.cpus,
			
 
				+						  main_context.ncpus,"main_ctx",
			
 
				+						  STARPU_SCHED_CTX_POLICY_NAME,"prio",
			
 
				+						  0);
			
 
				+
			
 
				+	/* Initialize nested contexts */
			
 
				+	contexts = malloc(sizeof(struct context)*N_NESTED_CTXS);
			
 
				+	int cpus_per_context = main_context.ncpus/N_NESTED_CTXS;
			
 
				+	int i;
			
 
				+	for(i = 0; i < N_NESTED_CTXS; i++)
			
 
				+	{
			
 
				+		contexts[i].ncpus = cpus_per_context;
			
 
				+		if (i == N_NESTED_CTXS-1)
			
 
				+			contexts[i].ncpus += main_context.ncpus%N_NESTED_CTXS;
			
 
				+		contexts[i].cpus = main_context.cpus+i*cpus_per_context;
			
 
				+	}
			
 
				+
			
 
				+	for(i = 0; i < N_NESTED_CTXS; i++)
			
 
				+		contexts[i].id = starpu_sched_ctx_create(contexts[i].cpus,
			
 
				+							 contexts[i].ncpus,"nested_ctx",
			
 
				+							 STARPU_SCHED_CTX_NESTED,main_context.id,
			
 
				+							 0);
			
 
				+
			
 
				+	for (i = 0; i < N_NESTED_CTXS; i++)
			
 
				+	{
			
 
				+		parallel_task_init_one_context(&contexts[i].id);
			
 
				+	}
			
 
				+
			
 
				+	starpu_task_wait_for_all();
			
 
				+	starpu_sched_ctx_set_context(&main_context.id);
			
 
				+}
			
 
				+
			
 
				+void parallel_task_deinit()
			
 
				+{
			
 
				+	int i;
			
 
				+	for (i=0; i<N_NESTED_CTXS;i++)
			
 
				+		starpu_sched_ctx_delete(contexts[i].id);
			
 
				+	free(contexts);
			
 
				+	free(main_context.cpus);
			
 
				+}
			
 
				+
			
 
				+/* Codelet SUM */
			
 
				+static void sum_cpu(void * descr[], void *cl_arg)
			
 
				+{
			
 
				+	(void)cl_arg;
			
 
				+	double *v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				+	double *v_src0 = (double *) STARPU_VECTOR_GET_PTR(descr[1]);
			
 
				+	double *v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[2]);
			
 
				+	int size = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				+
			
 
				+	int i, k;
			
 
				+	for (k=0;k<LOOPS;k++)
			
 
				+	{
			
 
				+#pragma omp parallel for
			
 
				+		for (i=0; i<size; i++)
			
 
				+		{
			
 
				+			v_dst[i]+=v_src0[i]+v_src1[i];
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static struct starpu_codelet sum_cl =
			
 
				+{
			
 
				+	.cpu_funcs = {sum_cpu, NULL},
			
 
				+	.nbuffers = 3,
			
 
				+	.modes={STARPU_RW,STARPU_R, STARPU_R}
			
 
				+};
			
 
				+
			
 
				+int main(void)
			
 
				+{
			
 
				+	int ntasks = NTASKS;
			
 
				+	int ret, j, k;
			
 
				+	unsigned ncpus = 0;
			
 
				+
			
 
				+	ret = starpu_init(NULL);
			
 
				+	if (ret == -ENODEV)
			
 
				+		return 77;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	if (starpu_cpu_worker_get_count() < N_NESTED_CTXS)
			
 
				+	{
			
 
				+		starpu_shutdown();
			
 
				+		return 77;
			
 
				+	}
			
 
				+
			
 
				+	parallel_task_init();
			
 
				+
			
 
				+	/* Data preparation */
			
 
				+	double array1[SIZE];
			
 
				+	double array2[SIZE];
			
 
				+
			
 
				+	memset(array1, 0, sizeof(double));
			
 
				+	int i;
			
 
				+	for (i=0;i<SIZE;i++)
			
 
				+	{
			
 
				+		array2[i]=i*2;
			
 
				+	}
			
 
				+
			
 
				+	starpu_data_handle_t handle1;
			
 
				+	starpu_data_handle_t handle2;
			
 
				+
			
 
				+	starpu_vector_data_register(&handle1, 0, (uintptr_t)array1, SIZE, sizeof(double));
			
 
				+	starpu_vector_data_register(&handle2, 0, (uintptr_t)array2, SIZE, sizeof(double));
			
 
				+
			
 
				+	for (i = 0; i < ntasks; i++)
			
 
				+	{
			
 
				+		struct starpu_task * t;
			
 
				+		t=starpu_task_build(&sum_cl,
			
 
				+				    STARPU_RW,handle1,
			
 
				+				    STARPU_R,handle2,
			
 
				+				    STARPU_R,handle1,
			
 
				+				    STARPU_SCHED_CTX, main_context.id,
			
 
				+				    0);
			
 
				+		t->destroy = 1;
			
 
				+		t->possibly_parallel = 1;
			
 
				+
			
 
				+		ret=starpu_task_submit(t);
			
 
				+		if (ret == -ENODEV)
			
 
				+			goto out;
			
 
				+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+
			
 
				+out:
			
 
				+	/* wait for all tasks at the end*/
			
 
				+	starpu_task_wait_for_all();
			
 
				+
			
 
				+	starpu_data_unregister(handle1);
			
 
				+	starpu_data_unregister(handle2);
			
 
				+	parallel_task_deinit();
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+	return 0;
			
 
				+}
			
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011-2012,2014,2016-2017                 Inria
			
 
				- * Copyright (C) 2009-2017                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2018                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2017                                CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -61,6 +61,7 @@
 
				 
			
 
				 #undef STARPU_ATLAS
			
 
				 #undef STARPU_GOTO
			
 
				+#undef STARPU_OPENBLAS
			
 
				 #undef STARPU_MKL
			
 
				 #undef STARPU_SYSTEM_BLAS
			
 
				 
			
--- a/include/starpu_task_util.h
+++ b/include/starpu_task_util.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2013-2014                                Inria
			
 
				- * Copyright (C) 2010-2017                                CNRS
			
 
				+ * Copyright (C) 2010-2018                                CNRS
			
 
				  * Copyright (C) 2010-2015, 2018                          Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -77,16 +77,17 @@ void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, s
 
				 
			
 
				 void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...);
			
 
				 
			
 
				-struct starpu_codelet_pack_arg {
			
 
				+struct starpu_codelet_pack_arg_data
			
 
				+{
			
 
				 	char *arg_buffer;
			
 
				 	size_t arg_buffer_size;
			
 
				 	size_t current_offset;
			
 
				 	int nargs;
			
 
				 };
			
 
				 
			
 
				-void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg *state);
			
 
				-void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg *state, const void *ptr, size_t ptr_size);
			
 
				-void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg *state, void **cl_arg, size_t *cl_arg_size);
			
 
				+void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg_data *state);
			
 
				+void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg_data *state, const void *ptr, size_t ptr_size);
			
 
				+void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg_data *state, void **cl_arg, size_t *cl_arg_size);
			
 
				 
			
 
				 void starpu_codelet_unpack_args(void *cl_arg, ...);
			
 
				 void starpu_codelet_unpack_args_and_copyleft(void *cl_arg, void *buffer, size_t buffer_size, ...);
			
--- a/include/starpu_util.h
+++ b/include/starpu_util.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011-2012,2017                           Inria
			
 
				- * Copyright (C) 2008-2017                                Université de Bordeaux
			
 
				+ * Copyright (C) 2008-2018                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2017                                CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -323,6 +323,7 @@ STARPU_ATOMIC_SOMETHINGL(or, old | value)
 
				 #define STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value) (starpu_cmpxchg((ptr), (old), (value)))
			
 
				 #endif
			
 
				 
			
 
				+/* Returns the previous value */
			
 
				 #ifdef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET
			
 
				 #define STARPU_TEST_AND_SET(ptr, value) (__sync_lock_test_and_set ((ptr), (value)))
			
 
				 #define STARPU_RELEASE(ptr) (__sync_lock_release ((ptr)))
			
--- a/mpi/examples/mpi_lu/plu_example.c
+++ b/mpi/examples/mpi_lu/plu_example.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2012-2013                                Inria
			
 
				  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
			
 
				- * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015-2018                      CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -37,6 +37,7 @@ static unsigned check = 0;
 
				 static int p = 1;
			
 
				 static int q = 1;
			
 
				 static unsigned display = 0;
			
 
				+static unsigned no_prio = 0;
			
 
				 
			
 
				 #ifdef STARPU_HAVE_LIBNUMA
			
 
				 static unsigned numa = 0;
			
@@ -509,7 +510,7 @@ int main(int argc, char **argv)
 
				 	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
			
 
				 	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
			
 
				 
			
 
				-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
			
 
				+	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio);
			
 
				 
			
 
				 	/*
			
 
				 	 * 	Report performance
			
--- a/mpi/examples/mpi_lu/plu_implicit_example.c
+++ b/mpi/examples/mpi_lu/plu_implicit_example.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2012-2013                                Inria
			
 
				  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
			
 
				- * Copyright (C) 2010-2017                                CNRS
			
 
				+ * Copyright (C) 2010-2018                                CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -37,6 +37,7 @@ static unsigned check = 0;
 
				 static int p = 1;
			
 
				 static int q = 1;
			
 
				 static unsigned display = 0;
			
 
				+static unsigned no_prio = 0;
			
 
				 
			
 
				 #ifdef STARPU_HAVE_LIBNUMA
			
 
				 static unsigned numa = 0;
			
@@ -301,7 +302,7 @@ int main(int argc, char **argv)
 
				 		free(y);
			
 
				 	}
			
 
				 
			
 
				-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
			
 
				+	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio);
			
 
				 
			
 
				 	/*
			
 
				 	 * 	Report performance
			
--- a/mpi/examples/mpi_lu/plu_outofcore_example.c
+++ b/mpi/examples/mpi_lu/plu_outofcore_example.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2012-2014                                Inria
			
 
				  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
			
 
				- * Copyright (C) 2010-2017                                CNRS
			
 
				+ * Copyright (C) 2010-2018                                CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -40,6 +40,7 @@ static unsigned check = 0;
 
				 static int p = 1;
			
 
				 static int q = 1;
			
 
				 static unsigned display = 0;
			
 
				+static unsigned no_prio = 0;
			
 
				 static char *path = "./starpu-ooc-files";
			
 
				 
			
 
				 #ifdef STARPU_HAVE_LIBNUMA
			
@@ -329,7 +330,7 @@ int main(int argc, char **argv)
 
				 		free(y);
			
 
				 	}
			
 
				 
			
 
				-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
			
 
				+	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio);
			
 
				 
			
 
				 	/*
			
 
				 	 * 	Report performance
			
--- a/mpi/examples/mpi_lu/pxlu.c
+++ b/mpi/examples/mpi_lu/pxlu.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2012,2017                                Inria
			
 
				  * Copyright (C) 2010-2011,2014,2017                      Université de Bordeaux
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -866,7 +866,7 @@ static void wait_termination(void)
 
				  *	code to bootstrap the factorization
			
 
				  */
			
 
				 
			
 
				-double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
			
 
				+double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size, unsigned _no_prio)
			
 
				 {
			
 
				 	double start;
			
 
				 	double end;
			
@@ -874,6 +874,7 @@ double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
 
				 	nblocks = _nblocks;
			
 
				 	rank = _rank;
			
 
				 	world_size = _world_size;
			
 
				+	no_prio = _no_prio;
			
 
				 
			
 
				 	/* create all the DAG nodes */
			
 
				 	unsigned i,j,k;
			
--- a/mpi/examples/mpi_lu/pxlu.h
+++ b/mpi/examples/mpi_lu/pxlu.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2011,2014                           Université de Bordeaux
			
 
				- * Copyright (C) 2010-2012,2014-2015,2017                 CNRS
			
 
				+ * Copyright (C) 2010-2012,2014-2015,2017,2018            CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -38,7 +38,7 @@ struct debug_info
 
				 	unsigned k;
			
 
				 };
			
 
				 
			
 
				-double STARPU_PLU(plu_main)(unsigned nblocks, int rank, int world_size);
			
 
				+double STARPU_PLU(plu_main)(unsigned nblocks, int rank, int world_size, unsigned no_prio);
			
 
				 
			
 
				 TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks);
			
 
				 void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved);
			
--- a/mpi/examples/mpi_lu/pxlu_implicit.c
+++ b/mpi/examples/mpi_lu/pxlu_implicit.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2012                                     Inria
			
 
				  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2018                      CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -26,7 +26,6 @@
 
				 //#define DEBUG	1
			
 
				 
			
 
				 static unsigned no_prio = 0;
			
 
				-
			
 
				 static unsigned nblocks = 0;
			
 
				 static int rank = -1;
			
 
				 static int world_size = -1;
			
@@ -120,7 +119,7 @@ static void create_task_22(unsigned k, unsigned i, unsigned j)
 
				  *	code to bootstrap the factorization 
			
 
				  */
			
 
				 
			
 
				-double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
			
 
				+double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size, unsigned _no_prio)
			
 
				 {
			
 
				 	double start;
			
 
				 	double end;
			
@@ -128,6 +127,7 @@ double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
 
				 	nblocks = _nblocks;
			
 
				 	rank = _rank;
			
 
				 	world_size = _world_size;
			
 
				+	no_prio = _no_prio;
			
 
				 
			
 
				 	/* create all the DAG nodes */
			
 
				 	unsigned i,j,k;
			
--- a/mpi/examples/mpi_lu/pxlu_kernels.c
+++ b/mpi/examples/mpi_lu/pxlu_kernels.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012                                     Inria
			
 
				- * Copyright (C) 2010-2015                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2015, 2018                          Université de Bordeaux
			
 
				  * Copyright (C) 2010-2012,2015,2017                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -106,6 +106,8 @@ static struct starpu_perfmodel STARPU_PLU(model_22) =
 
				 	.symbol = STARPU_PLU_STR(lu_model_22_atlas)
			
 
				 #elif defined(STARPU_GOTO)
			
 
				 	.symbol = STARPU_PLU_STR(lu_model_22_goto)
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	.symbol = STARPU_PLU_STR(lu_model_22_openblas)
			
 
				 #else
			
 
				 	.symbol = STARPU_PLU_STR(lu_model_22)
			
 
				 #endif
			
@@ -218,6 +220,8 @@ static struct starpu_perfmodel STARPU_PLU(model_12) =
 
				 	.symbol = STARPU_PLU_STR(lu_model_12_atlas)
			
 
				 #elif defined(STARPU_GOTO)
			
 
				 	.symbol = STARPU_PLU_STR(lu_model_12_goto)
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	.symbol = STARPU_PLU_STR(lu_model_12_openblas)
			
 
				 #else
			
 
				 	.symbol = STARPU_PLU_STR(lu_model_12)
			
 
				 #endif
			
@@ -331,6 +335,8 @@ static struct starpu_perfmodel STARPU_PLU(model_21) =
 
				 	.symbol = STARPU_PLU_STR(lu_model_21_atlas)
			
 
				 #elif defined(STARPU_GOTO)
			
 
				 	.symbol = STARPU_PLU_STR(lu_model_21_goto)
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	.symbol = STARPU_PLU_STR(lu_model_21_openblas)
			
 
				 #else
			
 
				 	.symbol = STARPU_PLU_STR(lu_model_21)
			
 
				 #endif
			
@@ -441,6 +447,8 @@ static struct starpu_perfmodel STARPU_PLU(model_11) =
 
				 	.symbol = STARPU_PLU_STR(lu_model_11_atlas)
			
 
				 #elif defined(STARPU_GOTO)
			
 
				 	.symbol = STARPU_PLU_STR(lu_model_11_goto)
			
 
				+#elif defined(STARPU_OPENBLAS)
			
 
				+	.symbol = STARPU_PLU_STR(lu_model_11_openblas)
			
 
				 #else
			
 
				 	.symbol = STARPU_PLU_STR(lu_model_11)
			
 
				 #endif
			
--- a/mpi/src/Makefile.am
+++ b/mpi/src/Makefile.am
@@ -2,7 +2,7 @@
 
				 #
			
 
				 # Copyright (C) 2012                                     Inria
			
 
				 # Copyright (C) 2010-2017                                CNRS
			
 
				-# Copyright (C) 2009-2014                                Université de Bordeaux
			
 
				+# Copyright (C) 2009-2014, 2018                                Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -78,6 +78,8 @@ noinst_HEADERS =					\
 
				 
			
 
				 libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
			
 
				 	starpu_mpi.c					\
			
 
				+	starpu_mpi_req.c				\
			
 
				+	starpu_mpi_coop_sends.c				\
			
 
				 	starpu_mpi_helper.c				\
			
 
				 	starpu_mpi_datatype.c				\
			
 
				 	starpu_mpi_task_insert.c			\
			
--- a/mpi/src/mpi/starpu_mpi_comm.c
+++ b/mpi/src/mpi/starpu_mpi_comm.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2017                                     Guillaume Beauchamp
			
 
				- * Copyright (C) 2011-2017                                CNRS
			
 
				+ * Copyright (C) 2011-2018                                CNRS
			
 
				  * Copyright (C) 2014,2017                                Inria
			
 
				  * Copyright (C) 2011-2017                                Université de Bordeaux
			
 
				  *
			
@@ -81,7 +81,7 @@ void _starpu_mpi_comm_shutdown()
 
				 	}
			
 
				 	free(_starpu_mpi_comms);
			
 
				 
			
 
				-	struct _starpu_mpi_comm_hashtable *entry, *tmp;
			
 
				+	struct _starpu_mpi_comm_hashtable *entry=NULL, *tmp=NULL;
			
 
				 	HASH_ITER(hh, _starpu_mpi_comms_cache, entry, tmp)
			
 
				 	{
			
 
				 		HASH_DEL(_starpu_mpi_comms_cache, entry);
			
--- a/mpi/src/mpi/starpu_mpi_mpi.c
+++ b/mpi/src/mpi/starpu_mpi_mpi.c
@@ -50,11 +50,7 @@ static unsigned nready_process;
 
				 /* Number of send requests to submit to MPI at the same time */
			
 
				 static unsigned ndetached_send;
			
 
				 
			
 
				-static int mpi_thread_cpuid = -1;
			
 
				-static int use_prio = 1;
			
 
				-
			
 
				 static void _starpu_mpi_add_sync_point_in_fxt(void);
			
 
				-static void _starpu_mpi_submit_ready_request(void *arg);
			
 
				 static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req);
			
 
				 static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req);
			
 
				 #ifdef STARPU_MPI_VERBOSE
			
@@ -87,8 +83,6 @@ static int wait_counter;
 
				 static starpu_pthread_cond_t wait_counter_cond;
			
 
				 static starpu_pthread_mutex_t wait_counter_mutex;
			
 
				 #endif
			
 
				-int _starpu_mpi_fake_world_size = -1;
			
 
				-int _starpu_mpi_fake_world_rank = -1;
			
 
				 
			
 
				 /* Count requests posted by the application and not yet submitted to MPI */
			
 
				 static starpu_pthread_mutex_t mutex_posted_requests;
			
@@ -110,81 +104,6 @@ extern void smpi_process_set_user_data(void *);
 
				 #endif
			
 
				 #endif
			
 
				 
			
 
				-void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
			
 
				-{
			
 
				-	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
			
 
				-
			
 
				-	/* Initialize the request structure */
			
 
				-	(*req)->data_handle = NULL;
			
 
				-	(*req)->prio = 0;
			
 
				-
			
 
				-	(*req)->datatype = 0;
			
 
				-	(*req)->datatype_name = NULL;
			
 
				-	(*req)->ptr = NULL;
			
 
				-	(*req)->count = -1;
			
 
				-	(*req)->registered_datatype = -1;
			
 
				-
			
 
				-	(*req)->node_tag.rank = -1;
			
 
				-	(*req)->node_tag.data_tag = -1;
			
 
				-	(*req)->node_tag.comm = 0;
			
 
				-
			
 
				-	(*req)->func = NULL;
			
 
				-
			
 
				-	(*req)->status = NULL;
			
 
				-	(*req)->data_request = 0;
			
 
				-	(*req)->flag = NULL;
			
 
				-
			
 
				-	(*req)->ret = -1;
			
 
				-	STARPU_PTHREAD_MUTEX_INIT(&((*req)->req_mutex), NULL);
			
 
				-	STARPU_PTHREAD_COND_INIT(&((*req)->req_cond), NULL);
			
 
				-	STARPU_PTHREAD_MUTEX_INIT(&((*req)->posted_mutex), NULL);
			
 
				-	STARPU_PTHREAD_COND_INIT(&((*req)->posted_cond), NULL);
			
 
				-
			
 
				-	(*req)->request_type = UNKNOWN_REQ;
			
 
				-
			
 
				-	(*req)->submitted = 0;
			
 
				-	(*req)->completed = 0;
			
 
				-	(*req)->posted = 0;
			
 
				-
			
 
				-	(*req)->other_request = NULL;
			
 
				-
			
 
				-	(*req)->sync = 0;
			
 
				-	(*req)->detached = -1;
			
 
				-	(*req)->callback = NULL;
			
 
				-	(*req)->callback_arg = NULL;
			
 
				-
			
 
				-	(*req)->size_req = 0;
			
 
				-	(*req)->internal_req = NULL;
			
 
				-	(*req)->is_internal_req = 0;
			
 
				-	(*req)->to_destroy = 1;
			
 
				-	(*req)->early_data_handle = NULL;
			
 
				-	(*req)->envelope = NULL;
			
 
				-	(*req)->sequential_consistency = 1;
			
 
				-	(*req)->pre_sync_jobid = -1;
			
 
				-	(*req)->post_sync_jobid = -1;
			
 
				-
			
 
				-#ifdef STARPU_SIMGRID
			
 
				-	starpu_pthread_queue_init(&((*req)->queue));
			
 
				-	starpu_pthread_queue_register(&wait, &((*req)->queue));
			
 
				-	(*req)->done = 0;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
			
 
				-{
			
 
				-	STARPU_PTHREAD_MUTEX_DESTROY(&req->req_mutex);
			
 
				-	STARPU_PTHREAD_COND_DESTROY(&req->req_cond);
			
 
				-	STARPU_PTHREAD_MUTEX_DESTROY(&req->posted_mutex);
			
 
				-	STARPU_PTHREAD_COND_DESTROY(&req->posted_cond);
			
 
				-	free(req->datatype_name);
			
 
				-	req->datatype_name = NULL;
			
 
				-#ifdef STARPU_SIMGRID
			
 
				-	starpu_pthread_queue_unregister(&wait, &req->queue);
			
 
				-	starpu_pthread_queue_destroy(&req->queue);
			
 
				-#endif
			
 
				-	free(req);
			
 
				-}
			
 
				-
			
 
				  /********************************************************/
			
 
				  /*                                                      */
			
 
				  /*  Send/Receive functionalities                        */
			
@@ -205,7 +124,28 @@ void _starpu_mpi_submit_ready_request_inc(struct _starpu_mpi_req *req)
 
				 	_starpu_mpi_submit_ready_request(req);
			
 
				 }
			
 
				 
			
 
				-static void _starpu_mpi_submit_ready_request(void *arg)
			
 
				+void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends)
			
 
				+{
			
 
				+	/* TODO: turn them into redirects & forwards */
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_redirects, int submit_data)
			
 
				+{
			
 
				+	unsigned i, n = coop_sends->n;
			
 
				+
			
 
				+	/* Note: coop_sends might disappear very very soon after last request is submitted */
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	{
			
 
				+		if (coop_sends->reqs_array[i]->request_type == SEND_REQ && submit_data)
			
 
				+		{
			
 
				+			_STARPU_MPI_DEBUG(0, "cooperative sends %p sending to %d\n", coop_sends, coop_sends->reqs_array[i]->node_tag.rank);
			
 
				+			_starpu_mpi_submit_ready_request(coop_sends->reqs_array[i]);
			
 
				+		}
			
 
				+		/* TODO: handle redirect requests */
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_submit_ready_request(void *arg)
			
 
				 {
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				 	struct _starpu_mpi_req *req = arg;
			
@@ -346,58 +286,10 @@ static void nop_acquire_cb(void *arg)
 
				 	starpu_data_release(arg);
			
 
				 }
			
 
				 
			
 
				-struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
			
 
				-						       int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
			
 
				-						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
			
 
				-						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
			
 
				-						       enum starpu_data_access_mode mode,
			
 
				-						       int sequential_consistency,
			
 
				-						       int is_internal_req,
			
 
				-						       starpu_ssize_t count)
			
 
				+void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req STARPU_ATTRIBUTE_UNUSED)
			
 
				 {
			
 
				-	struct _starpu_mpi_req *req;
			
 
				-
			
 
				-	if (_starpu_mpi_fake_world_size != -1)
			
 
				-	{
			
 
				-		/* Don't actually do the communication */
			
 
				-		starpu_data_acquire_on_node_cb_sequential_consistency(data_handle, STARPU_MAIN_RAM, mode, nop_acquire_cb, data_handle, sequential_consistency);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	_STARPU_MPI_LOG_IN();
			
 
				 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
			
 
				-
			
 
				-	_starpu_mpi_comm_register(comm);
			
 
				-
			
 
				-	/* Initialize the request structure */
			
 
				-	_starpu_mpi_request_init(&req);
			
 
				-	req->request_type = request_type;
			
 
				-	/* prio_list is sorted by increasing values */
			
 
				-	if (use_prio)
			
 
				-		req->prio = prio;
			
 
				-	req->data_handle = data_handle;
			
 
				-	req->node_tag.rank = srcdst;
			
 
				-	req->node_tag.data_tag = data_tag;
			
 
				-	req->node_tag.comm = comm;
			
 
				-	req->detached = detached;
			
 
				-	req->sync = sync;
			
 
				-	req->callback = callback;
			
 
				-	req->callback_arg = arg;
			
 
				-	req->func = func;
			
 
				-	req->sequential_consistency = sequential_consistency;
			
 
				-	req->is_internal_req = is_internal_req;
			
 
				-	/* For internal requests, we wait for both the request completion and the matching application request completion */
			
 
				-	req->to_destroy = !is_internal_req;
			
 
				-	req->count = count;
			
 
				-
			
 
				-	/* Asynchronously request StarPU to fetch the data in main memory: when
			
 
				-	 * it is available in main memory, _starpu_mpi_submit_ready_request(req) is called and
			
 
				-	 * the request is actually submitted */
			
 
				-	starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_submit_ready_request, (void *)req, sequential_consistency, &req->pre_sync_jobid, &req->post_sync_jobid);
			
 
				-
			
 
				-	_STARPU_MPI_LOG_OUT();
			
 
				-	return req;
			
 
				- }
			
 
				+}
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 int _starpu_mpi_simgrid_mpi_test(unsigned *done, int *flag)
			
@@ -935,8 +827,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 
				 		_STARPU_MPI_TRACE_TERMINATED(req, req->node_tag.rank, req->node_tag.data_tag);
			
 
				 	}
			
 
				 
			
 
				-	if (req->data_handle)
			
 
				-		starpu_data_release(req->data_handle);
			
 
				+	_starpu_mpi_release_req_data(req);
			
 
				 
			
 
				 	if (req->envelope)
			
 
				 	{
			
@@ -1224,16 +1115,15 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 	starpu_pthread_setname("MPI");
			
 
				 
			
 
				 #ifndef STARPU_SIMGRID
			
 
				-	if (mpi_thread_cpuid >= 0)
			
 
				-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
			
 
				+	if (_starpu_mpi_thread_cpuid >= 0)
			
 
				+		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
			
 
				 	_starpu_mpi_do_initialize(argc_argv);
			
 
				-	if (mpi_thread_cpuid >= 0)
			
 
				+	if (_starpu_mpi_thread_cpuid >= 0)
			
 
				 		/* In case MPI changed the binding */
			
 
				-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
			
 
				+		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
			
 
				 #endif
			
 
				 
			
 
				-	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
			
 
				-	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
			
 
				+	_starpu_mpi_env_init();
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	/* Now that MPI is set up, let the rest of simgrid get initialized */
			
@@ -1578,11 +1468,8 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv)
 
				         STARPU_PTHREAD_MUTEX_INIT(&mutex_posted_requests, NULL);
			
 
				         STARPU_PTHREAD_MUTEX_INIT(&mutex_ready_requests, NULL);
			
 
				 
			
 
				-        _starpu_mpi_comm_debug = starpu_getenv("STARPU_MPI_COMM") != NULL;
			
 
				 	nready_process = starpu_get_env_number_default("STARPU_MPI_NREADY_PROCESS", 10);
			
 
				 	ndetached_send = starpu_get_env_number_default("STARPU_MPI_NDETACHED_SEND", 10);
			
 
				-	mpi_thread_cpuid = starpu_get_env_number_default("STARPU_MPI_THREAD_CPUID", -1);
			
 
				-	use_prio = starpu_get_env_number_default("STARPU_MPI_PRIORITIES", 1);
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	STARPU_PTHREAD_MUTEX_INIT(&wait_counter_mutex, NULL);
			
--- a/mpi/src/nmad/starpu_mpi_nmad.c
+++ b/mpi/src/nmad/starpu_mpi_nmad.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2017                                     Inria
			
 
				  * Copyright (C) 2017                                     Guillaume Beauchamp
			
 
				- * Copyright (C) 2010-2015,2017                           CNRS
			
 
				+ * Copyright (C) 2010-2015,2017,2018                      CNRS
			
 
				  * Copyright (C) 2009-2014,2017-2018                      Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -39,20 +39,15 @@
 
				 #include <nm_sendrecv_interface.h>
			
 
				 #include <nm_mpi_nmad.h>
			
 
				 
			
 
				+
			
 
				 static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,nm_sr_event_t event);
			
 
				 #ifdef STARPU_VERBOSE
			
 
				 static char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type);
			
 
				 #endif
			
 
				-static void _starpu_mpi_handle_new_request(void *arg);
			
 
				 
			
 
				 static void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req);
			
 
				 static void _starpu_mpi_add_sync_point_in_fxt(void);
			
 
				 
			
 
				-static int mpi_thread_cpuid = -1;
			
 
				-static int use_prio = 1;
			
 
				-int _starpu_mpi_fake_world_size = -1;
			
 
				-int _starpu_mpi_fake_world_rank = -1;
			
 
				-
			
 
				 /* Condition to wake up waiting for all current MPI requests to finish */
			
 
				 static starpu_pthread_t progress_thread;
			
 
				 static starpu_pthread_cond_t progress_cond;
			
@@ -72,74 +67,6 @@ static callback_lfstack_t callback_stack = NULL;
 
				 
			
 
				 static starpu_sem_t callback_sem;
			
 
				 
			
 
				-void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
			
 
				-{
			
 
				-	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
			
 
				-
			
 
				-	/* Initialize the request structure */
			
 
				-	(*req)->data_handle = NULL;
			
 
				-	(*req)->prio = 0;
			
 
				-	(*req)->completed = 0;
			
 
				-
			
 
				-	(*req)->datatype = 0;
			
 
				-	(*req)->datatype_name = NULL;
			
 
				-	(*req)->ptr = NULL;
			
 
				-	(*req)->count = -1;
			
 
				-	(*req)->registered_datatype = -1;
			
 
				-
			
 
				-	(*req)->node_tag.rank = -1;
			
 
				-	(*req)->node_tag.data_tag = -1;
			
 
				-	(*req)->node_tag.comm = 0;
			
 
				-
			
 
				-	(*req)->func = NULL;
			
 
				-
			
 
				-	(*req)->status = NULL;
			
 
				-	//	(*req)->data_request = 0;
			
 
				-	(*req)->flag = NULL;
			
 
				-
			
 
				-	(*req)->ret = -1;
			
 
				-	piom_cond_init(&((*req)->req_cond), 0);
			
 
				-	//STARPU_PTHREAD_MUTEX_INIT(&((*req)->req_mutex), NULL);
			
 
				-	//STARPU_PTHREAD_COND_INIT(&((*req)->req_cond), NULL);
			
 
				-	//	STARPU_PTHREAD_MUTEX_INIT(&((*req)->posted_mutex), NULL);
			
 
				-	//STARPU_PTHREAD_COND_INIT(&((*req)->posted_cond), NULL);
			
 
				-
			
 
				-	(*req)->request_type = UNKNOWN_REQ;
			
 
				-
			
 
				-	(*req)->submitted = 0;
			
 
				-	(*req)->completed = 0;
			
 
				-	(*req)->posted = 0;
			
 
				-
			
 
				-	//(*req)->other_request = NULL;
			
 
				-
			
 
				-	(*req)->sync = 0;
			
 
				-	(*req)->detached = -1;
			
 
				-	(*req)->callback = NULL;
			
 
				-	(*req)->callback_arg = NULL;
			
 
				-
			
 
				-	//	(*req)->size_req = 0;
			
 
				-	//(*req)->internal_req = NULL;
			
 
				-	//(*req)->is_internal_req = 0;
			
 
				-	//(*req)->to_destroy = 1;
			
 
				-	//(*req)->early_data_handle = NULL;
			
 
				-	//(*req)->envelope = NULL;
			
 
				-	(*req)->sequential_consistency = 1;
			
 
				-	(*req)->pre_sync_jobid = -1;
			
 
				-	(*req)->post_sync_jobid = -1;
			
 
				-
			
 
				-#ifdef STARPU_SIMGRID
			
 
				-	starpu_pthread_queue_init(&((*req)->queue));
			
 
				-	starpu_pthread_queue_register(&wait, &((*req)->queue));
			
 
				-	(*req)->done = 0;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
			
 
				-{
			
 
				-	piom_cond_destroy(&(req->req_cond));
			
 
				-	free(req);
			
 
				-}
			
 
				-
			
 
				 /********************************************************/
			
 
				 /*                                                      */
			
 
				 /*  Send/Receive functionalities                        */
			
@@ -151,53 +78,9 @@ static void nop_acquire_cb(void *arg)
 
				 	starpu_data_release(arg);
			
 
				 }
			
 
				 
			
 
				-struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
			
 
				-						       int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
			
 
				-						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
			
 
				-						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
			
 
				-						       enum starpu_data_access_mode mode,
			
 
				-						       int sequential_consistency,
			
 
				-						       int is_internal_req,
			
 
				-						       starpu_ssize_t count)
			
 
				+void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req STARPU_ATTRIBUTE_UNUSED)
			
 
				 {
			
 
				-
			
 
				-	struct _starpu_mpi_req *req;
			
 
				-
			
 
				-	if (_starpu_mpi_fake_world_size != -1)
			
 
				-	{
			
 
				-		/* Don't actually do the communication */
			
 
				-		starpu_data_acquire_on_node_cb_sequential_consistency(data_handle, STARPU_MAIN_RAM, mode, nop_acquire_cb, data_handle, sequential_consistency);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	_STARPU_MPI_LOG_IN();
			
 
				 	STARPU_ATOMIC_ADD( &pending_request, 1);
			
 
				-
			
 
				-	/* Initialize the request structure */
			
 
				-	_starpu_mpi_request_init(&req);
			
 
				-	req->request_type = request_type;
			
 
				-	/* prio_list is sorted by increasing values */
			
 
				-	if (use_prio)
			
 
				-		req->prio = prio;
			
 
				-	req->data_handle = data_handle;
			
 
				-	req->node_tag.rank = srcdst;
			
 
				-	req->node_tag.data_tag = data_tag;
			
 
				-	req->node_tag.comm = comm;
			
 
				-	req->detached = detached;
			
 
				-	req->sync = sync;
			
 
				-	req->callback = callback;
			
 
				-	req->callback_arg = arg;
			
 
				-	req->func = func;
			
 
				-	req->sequential_consistency = sequential_consistency;
			
 
				-	nm_mpi_nmad_dest(&req->session, &req->gate, comm, req->node_tag.rank);
			
 
				-
			
 
				-	/* Asynchronously request StarPU to fetch the data in main memory: when
			
 
				-	 * it is available in main memory, _starpu_mpi_submit_new_mpi_request(req) is called and
			
 
				-	 * the request is actually submitted */
			
 
				-	starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_handle_new_request, (void *)req, sequential_consistency, &req->pre_sync_jobid, &req->post_sync_jobid);
			
 
				-
			
 
				-	_STARPU_MPI_LOG_OUT();
			
 
				-	return req;
			
 
				 }
			
 
				 
			
 
				 /********************************************************/
			
@@ -505,7 +388,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,n
 
				 		        nm_mpi_nmad_data_release(req->datatype);
			
 
				 			_starpu_mpi_datatype_free(req->data_handle, &req->datatype);
			
 
				 		}
			
 
				-		starpu_data_release(req->data_handle);
			
 
				+		_starpu_mpi_release_req_data(req);
			
 
				 	}
			
 
				 
			
 
				 	/* Execute the specified callback, if any */
			
@@ -560,13 +443,34 @@ static void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req)
 
				 	nm_sr_request_monitor(req->session, &(req->data_request), NM_SR_EVENT_FINALIZED,_starpu_mpi_handle_request_termination_callback);
			
 
				 }
			
 
				 
			
 
				-static void _starpu_mpi_handle_new_request(void *arg)
			
 
				+void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends)
			
 
				+{
			
 
				+	/* TODO: turn them into redirects & forwards */
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_redirects, int submit_data)
			
 
				+{
			
 
				+	unsigned i, n = coop_sends->n;
			
 
				+
			
 
				+	/* Note: coop_sends might disappear very very soon after last request is submitted */
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	{
			
 
				+		if (coop_sends->reqs_array[i]->request_type == SEND_REQ && submit_data)
			
 
				+		{
			
 
				+			_STARPU_MPI_DEBUG(0, "cooperative sends %p sending to %d\n", coop_sends, coop_sends->reqs_array[i]->node_tag.rank);
			
 
				+			_starpu_mpi_submit_ready_request(coop_sends->reqs_array[i]);
			
 
				+		}
			
 
				+		/* TODO: handle redirect requests */
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_submit_ready_request(void *arg)
			
 
				 {
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				 	struct _starpu_mpi_req *req = arg;
			
 
				 	STARPU_ASSERT_MSG(req, "Invalid request");
			
 
				 
			
 
				-	/* submit the request to MPI */
			
 
				+	/* submit the request to MPI directly from submitter */
			
 
				 	_STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
			
 
				 			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
			
 
				 	req->func(req);
			
@@ -581,16 +485,15 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 	starpu_pthread_setname("MPI");
			
 
				 
			
 
				 #ifndef STARPU_SIMGRID
			
 
				-	if (mpi_thread_cpuid >= 0)
			
 
				-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
			
 
				+	if (_starpu_mpi_thread_cpuid >= 0)
			
 
				+		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
			
 
				 	_starpu_mpi_do_initialize(argc_argv);
			
 
				-	if (mpi_thread_cpuid >= 0)
			
 
				+	if (_starpu_mpi_thread_cpuid >= 0)
			
 
				 		/* In case MPI changed the binding */
			
 
				-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
			
 
				+		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
			
 
				 #endif
			
 
				 
			
 
				-	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
			
 
				-	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
			
 
				+	_starpu_mpi_env_init();
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	/* Now that MPI is set up, let the rest of simgrid get initialized */
			
@@ -636,7 +539,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 		int err=0;
			
 
				 
			
 
				 		if(running || pending_request>0)
			
 
				-		{/* shall we block ? */
			
 
				+		{
			
 
				+			/* shall we block ? */
			
 
				 			err = starpu_sem_wait(&callback_sem);
			
 
				 			//running pending_request can change while waiting
			
 
				 		}
			
@@ -740,8 +644,6 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv)
 
				 
			
 
				 	starpu_sem_init(&callback_sem, 0, 0);
			
 
				 	running = 0;
			
 
				-	mpi_thread_cpuid = starpu_get_env_number_default("STARPU_MPI_THREAD_CPUID", -1);
			
 
				-	use_prio = starpu_get_env_number_default("STARPU_MPI_PRIORITIES", 1);
			
 
				 
			
 
				 	STARPU_PTHREAD_CREATE(&progress_thread, NULL, _starpu_mpi_progress_thread_func, argc_argv);
			
 
				 
			
@@ -753,7 +655,7 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv)
 
				         return 0;
			
 
				 }
			
 
				 
			
 
				-void _starpu_mpi_progress_shutdown(void *value)
			
 
				+void _starpu_mpi_progress_shutdown(void **value)
			
 
				 {
			
 
				 	/* kill the progression thread */
			
 
				         STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
			
--- a/mpi/src/starpu_mpi.c
+++ b/mpi/src/starpu_mpi.c
@@ -1,8 +1,8 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012-2013,2016-2017                      Inria
			
 
				- * Copyright (C) 2009-2017                                Université de Bordeaux
			
 
				- * Copyright (C) 2010-2017                                CNRS
			
 
				+ * Copyright (C) 2009-2018                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2018                                CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -41,18 +41,46 @@
 
				 #include <mpi/starpu_mpi_tag.h>
			
 
				 #endif
			
 
				 
			
 
				+static void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
			
 
				+{
			
 
				+	/* Asynchronously request StarPU to fetch the data in main memory: when
			
 
				+	 * it is available in main memory, _starpu_mpi_submit_ready_request(req) is called and
			
 
				+	 * the request is actually submitted */
			
 
				+	starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_submit_ready_request, (void *)req, sequential_consistency, &req->pre_sync_jobid, &req->post_sync_jobid);
			
 
				+}
			
 
				+
			
 
				 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
			
 
				 							int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm,
			
 
				 							unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
			
 
				 							int sequential_consistency)
			
 
				 {
			
 
				-	return _starpu_mpi_isend_irecv_common(data_handle, dest, data_tag, comm, detached, sync, prio, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func,
			
 
				+	if (_starpu_mpi_fake_world_size != -1)
			
 
				+	{
			
 
				+		/* Don't actually do the communication */
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				 #ifdef STARPU_MPI_PEDANTIC_ISEND
			
 
				-					      STARPU_RW,
			
 
				+	enum starpu_data_access_mode mode = STARPU_RW;
			
 
				 #else
			
 
				-					      STARPU_R,
			
 
				+	enum starpu_data_access_mode mode = STARPU_R;
			
 
				 #endif
			
 
				+
			
 
				+	struct _starpu_mpi_req *req = _starpu_mpi_request_fill(
			
 
				+	                                      data_handle, dest, data_tag, comm, detached, sync, prio, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func,
			
 
				 					      sequential_consistency, 0, 0);
			
 
				+	_starpu_mpi_req_willpost(req);
			
 
				+
			
 
				+	if (_starpu_mpi_use_coop_sends && detached == 1 && sync == 0 && callback == NULL)
			
 
				+	{
			
 
				+		/* It's a send & forget send, we can perhaps optimize its distribution over several nodes */
			
 
				+		_starpu_mpi_coop_send(data_handle, req, mode, sequential_consistency);
			
 
				+		return req;
			
 
				+	}
			
 
				+
			
 
				+	/* Post normally */
			
 
				+	_starpu_mpi_isend_irecv_common(req, mode, sequential_consistency);
			
 
				+	return req;
			
 
				 }
			
 
				 
			
 
				 int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
			
@@ -147,7 +175,16 @@ int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starp
 
				 
			
 
				 struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count)
			
 
				 {
			
 
				-	return _starpu_mpi_isend_irecv_common(data_handle, source, data_tag, comm, detached, sync, 0, callback, arg, RECV_REQ, _starpu_mpi_irecv_size_func, STARPU_W, sequential_consistency, is_internal_req, count);
			
 
				+	if (_starpu_mpi_fake_world_size != -1)
			
 
				+	{
			
 
				+		/* Don't actually do the communication */
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	struct _starpu_mpi_req *req = _starpu_mpi_request_fill(data_handle, source, data_tag, comm, detached, sync, 0, callback, arg, RECV_REQ, _starpu_mpi_irecv_size_func, sequential_consistency, is_internal_req, count);
			
 
				+	_starpu_mpi_req_willpost(req);
			
 
				+	_starpu_mpi_isend_irecv_common(req, STARPU_W, sequential_consistency);
			
 
				+	return req;
			
 
				 }
			
 
				 
			
 
				 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm)
			
@@ -221,14 +258,15 @@ void _starpu_mpi_data_clear(starpu_data_handle_t data_handle)
 
				 #endif
			
 
				 	_starpu_mpi_cache_data_clear(data_handle);
			
 
				 	free(data_handle->mpi_data);
			
 
				+	data_handle->mpi_data = NULL;
			
 
				 }
			
 
				 
			
 
				-void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm)
			
 
				+struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle)
			
 
				 {
			
 
				-	struct _starpu_mpi_data *mpi_data;
			
 
				-	if (data_handle->mpi_data)
			
 
				+	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
			
 
				+	if (mpi_data)
			
 
				 	{
			
 
				-		mpi_data = data_handle->mpi_data;
			
 
				+		STARPU_ASSERT(mpi_data->magic == 42);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
@@ -237,16 +275,23 @@ void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_
 
				 		mpi_data->node_tag.data_tag = -1;
			
 
				 		mpi_data->node_tag.rank = -1;
			
 
				 		mpi_data->node_tag.comm = MPI_COMM_WORLD;
			
 
				+		_starpu_spin_init(&mpi_data->coop_lock);
			
 
				 		data_handle->mpi_data = mpi_data;
			
 
				-#if defined(STARPU_USE_MPI_MPI)
			
 
				-		_starpu_mpi_tag_data_register(data_handle, data_tag);
			
 
				-#endif
			
 
				 		_starpu_mpi_cache_data_init(data_handle);
			
 
				 		_starpu_data_set_unregister_hook(data_handle, _starpu_mpi_data_clear);
			
 
				 	}
			
 
				+	return mpi_data;
			
 
				+}
			
 
				+
			
 
				+void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm)
			
 
				+{
			
 
				+	struct _starpu_mpi_data *mpi_data = _starpu_mpi_data_get(data_handle);
			
 
				 
			
 
				 	if (data_tag != -1)
			
 
				 	{
			
 
				+#if defined(STARPU_USE_MPI_MPI)
			
 
				+		_starpu_mpi_tag_data_register(data_handle, data_tag);
			
 
				+#endif
			
 
				 		mpi_data->node_tag.data_tag = data_tag;
			
 
				 	}
			
 
				 	if (rank != -1)
			
@@ -371,9 +416,6 @@ void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle
 
				 {
			
 
				 	int size, i;
			
 
				 	starpu_mpi_comm_size(comm, &size);
			
 
				-#ifdef STARPU_DEVEL
			
 
				-#warning TODO: use binary communication tree to optimize broadcast
			
 
				-#endif
			
 
				 	for (i = 0; i < size; i++)
			
 
				 		starpu_mpi_get_data_on_node_detached(comm, data_handle, i, NULL, NULL);
			
 
				 }
			
--- a/mpi/src/starpu_mpi_cache.c
+++ b/mpi/src/starpu_mpi_cache.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011-2017                                CNRS
			
 
				- * Copyright (C) 2011-2017                                Université de Bordeaux
			
 
				+ * Copyright (C) 2011-2018                                Université de Bordeaux
			
 
				  * Copyright (C) 2014                                     Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -371,6 +371,8 @@ static void _starpu_mpi_cache_flush_and_invalidate_nolock(MPI_Comm comm, starpu_
 
				 
			
 
				 void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)
			
 
				 {
			
 
				+	_starpu_mpi_data_flush(data_handle);
			
 
				+
			
 
				 	if (_starpu_cache_enabled == 0)
			
 
				 		return;
			
 
				 
			
--- a/mpi/src/starpu_mpi_coop_sends.c
+++ b/mpi/src/starpu_mpi_coop_sends.c
@@ -0,0 +1,269 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012-2013,2016-2017                      Inria
			
 
				+ * Copyright (C) 2009-2018                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2018                                CNRS
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu_mpi.h>
			
 
				+#include <starpu_mpi_private.h>
			
 
				+#include <datawizard/coherency.h>
			
 
				+
			
 
				+/*
			
 
				+ * One node sends the same data to several nodes. Gather them into a
			
 
				+ * "coop_sends", which then has a global view of all the required sends, and can
			
 
				+ * establish a diffusion tree by telling receiving nodes to retransmit what they
			
 
				+ * received (forwards) to others, and to others that they will receive from the
			
 
				+ * former (redirects).
			
 
				+ */
			
 
				+
			
 
				+/* This is called after a request is finished processing, to release the data */
			
 
				+void _starpu_mpi_release_req_data(struct _starpu_mpi_req *req)
			
 
				+{
			
 
				+	if (!req->data_handle)
			
 
				+		return;
			
 
				+
			
 
				+	if (_starpu_mpi_req_multilist_queued_coop_sends(req))
			
 
				+	{
			
 
				+		struct _starpu_mpi_coop_sends *coop_sends = req->coop_sends_head;
			
 
				+		struct _starpu_mpi_data *mpi_data = coop_sends->mpi_data;
			
 
				+		int last;
			
 
				+		_starpu_spin_lock(&mpi_data->coop_lock);
			
 
				+		/* Part of a cooperative send, dequeue ourself from others */
			
 
				+		_starpu_mpi_req_multilist_erase_coop_sends(&coop_sends->reqs, req);
			
 
				+		last = _starpu_mpi_req_multilist_empty_coop_sends(&coop_sends->reqs);
			
 
				+		_starpu_spin_unlock(&mpi_data->coop_lock);
			
 
				+		if (last)
			
 
				+		{
			
 
				+			/* We were last, release data */
			
 
				+			free(coop_sends->reqs_array);
			
 
				+			free(coop_sends);
			
 
				+			starpu_data_release(req->data_handle);
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		/* Trivial request */
			
 
				+		starpu_data_release(req->data_handle);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* Comparison function for getting qsort to put requests with high priority first */
			
 
				+static int _starpu_mpi_reqs_prio_compare(const void *a, const void *b)
			
 
				+{
			
 
				+	const struct _starpu_mpi_req * const *ra = a;
			
 
				+	const struct _starpu_mpi_req * const *rb = b;
			
 
				+	return (*rb)->prio - (*ra)->prio;
			
 
				+}
			
 
				+
			
 
				+/* Sort the requests by priority and build a diffusion tree. Actually does something only once per coop_sends bag. */
			
 
				+static void _starpu_mpi_coop_sends_optimize(struct _starpu_mpi_coop_sends *coop_sends)
			
 
				+{
			
 
				+	if (coop_sends->n == 1)
			
 
				+		/* Trivial case, don't optimize */
			
 
				+		return;
			
 
				+
			
 
				+	_starpu_spin_lock(&coop_sends->lock);
			
 
				+	if (!coop_sends->reqs_array)
			
 
				+	{
			
 
				+		unsigned n = coop_sends->n, i;
			
 
				+		struct _starpu_mpi_req *cur;
			
 
				+		struct _starpu_mpi_req **reqs;
			
 
				+
			
 
				+		_STARPU_MPI_DEBUG(0, "handling cooperative sends %p for %u neighbours\n", coop_sends, n);
			
 
				+
			
 
				+		/* Store them in an array */
			
 
				+		_STARPU_CALLOC(reqs, n, sizeof(*reqs));
			
 
				+		for (cur  = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs), i = 0;
			
 
				+		     cur != _starpu_mpi_req_multilist_end_coop_sends(&coop_sends->reqs);
			
 
				+		     cur  = _starpu_mpi_req_multilist_next_coop_sends(cur), i++)
			
 
				+			reqs[i] = cur;
			
 
				+		coop_sends->reqs_array = reqs;
			
 
				+
			
 
				+		/* Sort them */
			
 
				+		qsort(reqs, n, sizeof(*reqs), _starpu_mpi_reqs_prio_compare);
			
 
				+
			
 
				+		/* And build the diffusion tree */
			
 
				+		_starpu_mpi_coop_sends_build_tree(coop_sends);
			
 
				+	}
			
 
				+	_starpu_spin_unlock(&coop_sends->lock);
			
 
				+}
			
 
				+
			
 
				+/* This is called on completion of acquisition of data for a cooperative send */
			
 
				+static void _starpu_mpi_coop_sends_data_ready(void *arg)
			
 
				+{
			
 
				+	_STARPU_MPI_LOG_IN();
			
 
				+	struct _starpu_mpi_coop_sends *coop_sends = arg;
			
 
				+	struct _starpu_mpi_data *mpi_data = coop_sends->mpi_data;
			
 
				+
			
 
				+	/* Take the cooperative send bag out from more submissions */
			
 
				+	if (mpi_data->coop_sends == coop_sends)
			
 
				+	{
			
 
				+		_starpu_spin_lock(&mpi_data->coop_lock);
			
 
				+		if (mpi_data->coop_sends == coop_sends)
			
 
				+			mpi_data->coop_sends = NULL;
			
 
				+		_starpu_spin_unlock(&mpi_data->coop_lock);
			
 
				+	}
			
 
				+
			
 
				+	/* Build diffusion tree */
			
 
				+	_starpu_mpi_coop_sends_optimize(coop_sends);
			
 
				+
			
 
				+	if (coop_sends->n == 1)
			
 
				+	{
			
 
				+		/* Trivial case, just submit it */
			
 
				+		_starpu_mpi_submit_ready_request(_starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs));
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		/* And submit them */
			
 
				+		if (STARPU_TEST_AND_SET(&coop_sends->redirects_sent, 1) == 0)
			
 
				+			_starpu_mpi_submit_coop_sends(coop_sends, 1, 1);
			
 
				+		else
			
 
				+			_starpu_mpi_submit_coop_sends(coop_sends, 0, 1);
			
 
				+	}
			
 
				+	_STARPU_MPI_LOG_OUT();
			
 
				+}
			
 
				+
			
 
				+/* This is called when we want to stop including new members in a cooperative send,
			
 
				+ * either because we know there won't be any other members due to the algorithm
			
 
				+ * or because the value has changed.  */
			
 
				+static void _starpu_mpi_coop_send_flush(struct _starpu_mpi_coop_sends *coop_sends)
			
 
				+{
			
 
				+	if (!coop_sends)
			
 
				+		return;
			
 
				+
			
 
				+	/* Build diffusion tree */
			
 
				+	_starpu_mpi_coop_sends_optimize(coop_sends);
			
 
				+
			
 
				+	if (coop_sends->n == 1)
			
 
				+		/* Trivial case, we will just send the data */
			
 
				+		return;
			
 
				+
			
 
				+	/* And submit them */
			
 
				+	if (STARPU_TEST_AND_SET(&coop_sends->redirects_sent, 1) == 0)
			
 
				+		_starpu_mpi_submit_coop_sends(coop_sends, 1, 0);
			
 
				+}
			
 
				+
			
 
				+/* This is called when a write to the data was just submitted, which means we
			
 
				+ * can't make future sends cooperate with past sends since it's not the same value
			
 
				+ */
			
 
				+void _starpu_mpi_data_flush(starpu_data_handle_t data_handle)
			
 
				+{
			
 
				+	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
			
 
				+	struct _starpu_mpi_coop_sends *coop_sends;
			
 
				+	if (!mpi_data)
			
 
				+		return;
			
 
				+
			
 
				+	_starpu_spin_lock(&mpi_data->coop_lock);
			
 
				+	coop_sends = mpi_data->coop_sends;
			
 
				+	if (coop_sends)
			
 
				+		mpi_data->coop_sends = NULL;
			
 
				+	_starpu_spin_unlock(&mpi_data->coop_lock);
			
 
				+	if (coop_sends)
			
 
				+	{
			
 
				+		_STARPU_MPI_DEBUG(0, "%p: data written to, flush cooperative sends %p\n", data_handle, coop_sends);
			
 
				+		_starpu_mpi_coop_send_flush(coop_sends);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* Test whether a request is compatible with a cooperative send */
			
 
				+static int _starpu_mpi_coop_send_compatible(struct _starpu_mpi_req *req, struct _starpu_mpi_coop_sends *coop_sends)
			
 
				+{
			
 
				+	struct _starpu_mpi_req *prevreq;
			
 
				+
			
 
				+	prevreq = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs);
			
 
				+	return /* we can cope with tag being different */
			
 
				+	          prevreq->node_tag.comm == req->node_tag.comm
			
 
				+	       && prevreq->sequential_consistency == req->sequential_consistency;
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
			
 
				+{
			
 
				+	struct _starpu_mpi_data *mpi_data = _starpu_mpi_data_get(data_handle);
			
 
				+	struct _starpu_mpi_coop_sends *coop_sends = NULL, *tofree = NULL;
			
 
				+	int done = 0, queue, first = 1;
			
 
				+
			
 
				+	/* Try to add ourself to something existing, otherwise create one.  */
			
 
				+	while (!done)
			
 
				+	{
			
 
				+		_starpu_spin_lock(&mpi_data->coop_lock);
			
 
				+		if (mpi_data->coop_sends)
			
 
				+		{
			
 
				+			/* Already something, check we are coherent with it */
			
 
				+			queue = _starpu_mpi_coop_send_compatible(req, mpi_data->coop_sends);
			
 
				+			if (queue)
			
 
				+			{
			
 
				+				/* Yes, queue ourself there */
			
 
				+				if (coop_sends)
			
 
				+				{
			
 
				+					/* Remove ourself from what we created for ourself first */
			
 
				+					_starpu_mpi_req_multilist_erase_coop_sends(&coop_sends->reqs, req);
			
 
				+					tofree = coop_sends;
			
 
				+				}
			
 
				+				coop_sends = mpi_data->coop_sends;
			
 
				+				_STARPU_MPI_DEBUG(0, "%p: add to cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.rank);
			
 
				+				_starpu_mpi_req_multilist_push_back_coop_sends(&coop_sends->reqs, req);
			
 
				+				coop_sends->n++;
			
 
				+				req->coop_sends_head = coop_sends;
			
 
				+				first = 0;
			
 
				+				done = 1;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				/* Nope, incompatible, put ours instead */
			
 
				+				_STARPU_MPI_DEBUG(0, "%p: new cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.rank);
			
 
				+				mpi_data->coop_sends = coop_sends;
			
 
				+				first = 1;
			
 
				+				_starpu_spin_unlock(&mpi_data->coop_lock);
			
 
				+				/* and flush it */
			
 
				+				_starpu_mpi_coop_send_flush(coop_sends);
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		else if (coop_sends)
			
 
				+		{
			
 
				+			/* Nobody else and we have allocated one, we're first! */
			
 
				+			_STARPU_MPI_DEBUG(0, "%p: new cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.rank);
			
 
				+			mpi_data->coop_sends = coop_sends;
			
 
				+			first = 1;
			
 
				+			done = 1;
			
 
				+		}
			
 
				+		_starpu_spin_unlock(&mpi_data->coop_lock);
			
 
				+
			
 
				+		if (!done && !coop_sends)
			
 
				+		{
			
 
				+			/* Didn't find something to join, create one out of critical section */
			
 
				+			_STARPU_MPI_CALLOC(coop_sends, 1, sizeof(*coop_sends));
			
 
				+			coop_sends->redirects_sent = 0;
			
 
				+			coop_sends->n = 1;
			
 
				+			_starpu_mpi_req_multilist_head_init_coop_sends(&coop_sends->reqs);
			
 
				+			_starpu_mpi_req_multilist_push_back_coop_sends(&coop_sends->reqs, req);
			
 
				+			_starpu_spin_init(&coop_sends->lock);
			
 
				+			req->coop_sends_head = coop_sends;
			
 
				+			coop_sends->mpi_data = mpi_data;
			
 
				+		}
			
 
				+		/* We at worse do two iteration */
			
 
				+		STARPU_ASSERT(done || coop_sends);
			
 
				+	}
			
 
				+
			
 
				+	/* In case we created one for nothing after all */
			
 
				+	free(tofree);
			
 
				+
			
 
				+	if (first)
			
 
				+	{
			
 
				+		/* We were first, we are responsible for acquiring the data for everybody */
			
 
				+		starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_coop_sends_data_ready, coop_sends, sequential_consistency, &req->pre_sync_jobid, NULL);
			
 
				+	}
			
 
				+}
			
 
				+
			
--- a/mpi/src/starpu_mpi_init.c
+++ b/mpi/src/starpu_mpi_init.c
@@ -109,6 +109,7 @@ int _starpu_mpi_initialize(int *argc, char ***argv, int initialize_mpi, MPI_Comm
 
				 	argc_argv->argc = argc;
			
 
				 	argc_argv->argv = argv;
			
 
				 	argc_argv->comm = comm;
			
 
				+	_starpu_implicit_data_deps_write_hook(_starpu_mpi_data_flush);
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	/* Call MPI_Init_thread as early as possible, to initialize simgrid
			
--- a/mpi/src/starpu_mpi_private.c
+++ b/mpi/src/starpu_mpi_private.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				- * Copyright (C) 2010,2012,2014-2016                      Université de Bordeaux
			
 
				+ * Copyright (C) 2010,2012,2014-2016,2018                 Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -23,6 +23,12 @@ int _starpu_debug_level_max=0;
 
				 int _starpu_mpi_tag = 42;
			
 
				 int _starpu_mpi_comm_debug;
			
 
				 
			
 
				+int _starpu_mpi_thread_cpuid = -1;
			
 
				+int _starpu_mpi_use_prio = 1;
			
 
				+int _starpu_mpi_fake_world_size = -1;
			
 
				+int _starpu_mpi_fake_world_rank = -1;
			
 
				+int _starpu_mpi_use_coop_sends = 1;
			
 
				+
			
 
				 void _starpu_mpi_set_debug_level_min(int level)
			
 
				 {
			
 
				 	_starpu_debug_level_min = level;
			
@@ -50,3 +56,13 @@ char *_starpu_mpi_get_mpi_error_code(int code)
 
				 	MPI_Error_string(code, str, &len);
			
 
				 	return str;
			
 
				 }
			
 
				+
			
 
				+void _starpu_mpi_env_init(void)
			
 
				+{
			
 
				+        _starpu_mpi_comm_debug = starpu_getenv("STARPU_MPI_COMM") != NULL;
			
 
				+	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
			
 
				+	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
			
 
				+	_starpu_mpi_thread_cpuid = starpu_get_env_number_default("STARPU_MPI_THREAD_CPUID", -1);
			
 
				+	_starpu_mpi_use_prio = starpu_get_env_number_default("STARPU_MPI_PRIORITIES", 1);
			
 
				+	_starpu_mpi_use_coop_sends = starpu_get_env_number_default("STARPU_MPI_COOP_SENDS", 1);
			
 
				+}
			
--- a/mpi/src/starpu_mpi_private.h
+++ b/mpi/src/starpu_mpi_private.h
@@ -26,6 +26,7 @@
 
				 #include <starpu_mpi_fxt.h>
			
 
				 #include <common/list.h>
			
 
				 #include <common/prio_list.h>
			
 
				+#include <common/starpu_spinlock.h>
			
 
				 #include <core/simgrid.h>
			
 
				 #if defined(STARPU_USE_MPI_NMAD)
			
 
				 #include <pioman.h>
			
@@ -66,6 +67,10 @@ void _starpu_mpi_set_debug_level_max(int level);
 
				 #endif
			
 
				 extern int _starpu_mpi_fake_world_size;
			
 
				 extern int _starpu_mpi_fake_world_rank;
			
 
				+extern int _starpu_mpi_use_prio;
			
 
				+extern int _starpu_mpi_thread_cpuid;
			
 
				+extern int _starpu_mpi_use_coop_sends;
			
 
				+void _starpu_mpi_env_init(void);
			
 
				 
			
 
				 #ifdef STARPU_NO_ASSERT
			
 
				 #  define STARPU_MPI_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); }} while(0)
			
@@ -194,14 +199,36 @@ struct _starpu_mpi_node_tag
 
				 	starpu_mpi_tag_t data_tag;
			
 
				 };
			
 
				 
			
 
				+MULTILIST_CREATE_TYPE(_starpu_mpi_req, coop_sends)
			
 
				+/* One bag of cooperative sends */
			
 
				+struct _starpu_mpi_coop_sends
			
 
				+{
			
 
				+	/* List of send requests */
			
 
				+	struct _starpu_mpi_req_multilist_coop_sends reqs;
			
 
				+	struct _starpu_mpi_data *mpi_data;
			
 
				+
			
 
				+	/* Array of send requests, after sorting out */
			
 
				+	struct _starpu_spinlock lock;
			
 
				+	struct _starpu_mpi_req **reqs_array;
			
 
				+	unsigned n;
			
 
				+	unsigned redirects_sent;
			
 
				+};
			
 
				+
			
 
				+/* Initialized in starpu_mpi_data_register_comm */
			
 
				 struct _starpu_mpi_data
			
 
				 {
			
 
				 	int magic;
			
 
				 	struct _starpu_mpi_node_tag node_tag;
			
 
				 	int *cache_sent;
			
 
				 	int cache_received;
			
 
				+
			
 
				+	/* Rendez-vous data for opportunistic cooperative sends */
			
 
				+	struct _starpu_spinlock coop_lock; /* Needed to synchronize between submit thread and workers */
			
 
				+	struct _starpu_mpi_coop_sends *coop_sends; /* Current cooperative send bag */
			
 
				 };
			
 
				 
			
 
				+struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle);
			
 
				+
			
 
				 struct _starpu_mpi_req;
			
 
				 LIST_TYPE(_starpu_mpi_req,
			
 
				 	/* description of the data at StarPU level */
			
@@ -232,6 +259,8 @@ LIST_TYPE(_starpu_mpi_req,
 
				 #elif defined(STARPU_USE_MPI_MPI)
			
 
				 	MPI_Request data_request;
			
 
				 #endif
			
 
				+	struct _starpu_mpi_req_multilist_coop_sends coop_sends;
			
 
				+	struct _starpu_mpi_coop_sends *coop_sends_head;
			
 
				 
			
 
				 	int *flag;
			
 
				 	unsigned sync;
			
@@ -290,17 +319,41 @@ LIST_TYPE(_starpu_mpi_req,
 
				 );
			
 
				 PRIO_LIST_TYPE(_starpu_mpi_req, prio)
			
 
				 
			
 
				-struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
			
 
				+MULTILIST_CREATE_INLINES(struct _starpu_mpi_req, _starpu_mpi_req, coop_sends)
			
 
				+
			
 
				+/* To be called before actually queueing a request, so the communication layer knows it has something to look at */
			
 
				+void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req);
			
 
				+/* To be called to actually submit the request */
			
 
				+void _starpu_mpi_submit_ready_request(void *arg);
			
 
				+/* To be called when request is completed */
			
 
				+void _starpu_mpi_release_req_data(struct _starpu_mpi_req *req);
			
 
				+
			
 
				+/* Build a communication tree. Called before _starpu_mpi_coop_send is ever called. coop_sends->lock is held. */
			
 
				+void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends);
			
 
				+/* Try to merge with send request with other send requests */
			
 
				+void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency);
			
 
				+
			
 
				+/* Actually submit the coop_sends bag to MPI.
			
 
				+ * At least one of submit_redirects or submit_data is true.
			
 
				+ * _starpu_mpi_submit_coop_sends may be called either
			
 
				+ * - just once with both parameters being true,
			
 
				+ * - or once with submit_redirects being true (data is not available yet, but we
			
 
				+ * can send the redirects), and a second time with submit_data being true. Or
			
 
				+ * the converse, possibly on different threads, etc.
			
 
				+ */
			
 
				+void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_redirects, int submit_data);
			
 
				+
			
 
				+void _starpu_mpi_submit_ready_request_inc(struct _starpu_mpi_req *req);
			
 
				+void _starpu_mpi_request_init(struct _starpu_mpi_req **req);
			
 
				+struct _starpu_mpi_req * _starpu_mpi_request_fill(starpu_data_handle_t data_handle,
			
 
				 						       int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
			
 
				 						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
			
 
				 						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
			
 
				-						       enum starpu_data_access_mode mode,
			
 
				 						       int sequential_consistency,
			
 
				 						       int is_internal_req,
			
 
				 						       starpu_ssize_t count);
			
 
				 
			
 
				-void _starpu_mpi_submit_ready_request_inc(struct _starpu_mpi_req *req);
			
 
				-void _starpu_mpi_request_init(struct _starpu_mpi_req **req);
			
 
				+
			
 
				 void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req);
			
 
				 void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req);
			
 
				 void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req);
			
@@ -325,6 +378,7 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv);
 
				 #ifdef STARPU_SIMGRID
			
 
				 void _starpu_mpi_wait_for_initialization();
			
 
				 #endif
			
 
				+void _starpu_mpi_data_flush(starpu_data_handle_t data_handle);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/mpi/src/starpu_mpi_req.c
+++ b/mpi/src/starpu_mpi_req.c
@@ -0,0 +1,161 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012-2013,2016-2017                      Inria
			
 
				+ * Copyright (C) 2009-2018                                Université de Bordeaux
			
 
				+ * Copyright (C) 2017                                     Guillaume Beauchamp
			
 
				+ * Copyright (C) 2010-2018                                CNRS
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_mpi_private.h>
			
 
				+#if defined(STARPU_USE_MPI_MPI)
			
 
				+#include <mpi/starpu_mpi_comm.h>
			
 
				+#endif
			
 
				+#if defined(STARPU_USE_MPI_NMAD)
			
 
				+#include <pioman.h>
			
 
				+#include <nm_mpi_nmad.h>
			
 
				+#endif
			
 
				+
			
 
				+void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
			
 
				+{
			
 
				+	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
			
 
				+
			
 
				+	/* Initialize the request structure */
			
 
				+	(*req)->data_handle = NULL;
			
 
				+	(*req)->prio = 0;
			
 
				+
			
 
				+	(*req)->datatype = 0;
			
 
				+	(*req)->datatype_name = NULL;
			
 
				+	(*req)->ptr = NULL;
			
 
				+	(*req)->count = -1;
			
 
				+	(*req)->registered_datatype = -1;
			
 
				+
			
 
				+	(*req)->node_tag.rank = -1;
			
 
				+	(*req)->node_tag.data_tag = -1;
			
 
				+	(*req)->node_tag.comm = 0;
			
 
				+
			
 
				+	(*req)->func = NULL;
			
 
				+
			
 
				+	(*req)->status = NULL;
			
 
				+#ifdef STARPU_USE_MPI_MPI
			
 
				+	(*req)->data_request = 0;
			
 
				+#endif
			
 
				+	(*req)->flag = NULL;
			
 
				+	_starpu_mpi_req_multilist_init_coop_sends(*req);
			
 
				+
			
 
				+	(*req)->ret = -1;
			
 
				+#ifdef STARPU_USE_MPI_NMAD
			
 
				+	piom_cond_init(&((*req)->req_cond), 0);
			
 
				+#elif defined(STARPU_USE_MPI_MPI)
			
 
				+	STARPU_PTHREAD_MUTEX_INIT(&((*req)->req_mutex), NULL);
			
 
				+	STARPU_PTHREAD_COND_INIT(&((*req)->req_cond), NULL);
			
 
				+	STARPU_PTHREAD_MUTEX_INIT(&((*req)->posted_mutex), NULL);
			
 
				+	STARPU_PTHREAD_COND_INIT(&((*req)->posted_cond), NULL);
			
 
				+#endif
			
 
				+
			
 
				+	(*req)->request_type = UNKNOWN_REQ;
			
 
				+
			
 
				+	(*req)->submitted = 0;
			
 
				+	(*req)->completed = 0;
			
 
				+	(*req)->posted = 0;
			
 
				+
			
 
				+#ifdef STARPU_USE_MPI_MPI
			
 
				+	(*req)->other_request = NULL;
			
 
				+#endif
			
 
				+
			
 
				+	(*req)->sync = 0;
			
 
				+	(*req)->detached = -1;
			
 
				+	(*req)->callback = NULL;
			
 
				+	(*req)->callback_arg = NULL;
			
 
				+
			
 
				+#ifdef STARPU_USE_MPI_MPI
			
 
				+	(*req)->size_req = 0;
			
 
				+	(*req)->internal_req = NULL;
			
 
				+	(*req)->is_internal_req = 0;
			
 
				+	(*req)->to_destroy = 1;
			
 
				+	(*req)->early_data_handle = NULL;
			
 
				+	(*req)->envelope = NULL;
			
 
				+#endif
			
 
				+	(*req)->sequential_consistency = 1;
			
 
				+	(*req)->pre_sync_jobid = -1;
			
 
				+	(*req)->post_sync_jobid = -1;
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	starpu_pthread_queue_init(&((*req)->queue));
			
 
				+	starpu_pthread_queue_register(&wait, &((*req)->queue));
			
 
				+	(*req)->done = 0;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+struct _starpu_mpi_req *_starpu_mpi_request_fill(starpu_data_handle_t data_handle,
			
 
				+						 int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
			
 
				+						 unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
			
 
				+						 enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
			
 
				+						 int sequential_consistency,
			
 
				+						 int is_internal_req,
			
 
				+						 starpu_ssize_t count)
			
 
				+{
			
 
				+	struct _starpu_mpi_req *req;
			
 
				+
			
 
				+#ifdef STARPU_USE_MPI_MPI
			
 
				+	_starpu_mpi_comm_register(comm);
			
 
				+#endif
			
 
				+
			
 
				+	/* Initialize the request structure */
			
 
				+	_starpu_mpi_request_init(&req);
			
 
				+	req->request_type = request_type;
			
 
				+	/* prio_list is sorted by increasing values */
			
 
				+	if (_starpu_mpi_use_prio)
			
 
				+		req->prio = prio;
			
 
				+	req->data_handle = data_handle;
			
 
				+	req->node_tag.rank = srcdst;
			
 
				+	req->node_tag.data_tag = data_tag;
			
 
				+	req->node_tag.comm = comm;
			
 
				+	req->detached = detached;
			
 
				+	req->sync = sync;
			
 
				+	req->callback = callback;
			
 
				+	req->callback_arg = arg;
			
 
				+	req->func = func;
			
 
				+	req->sequential_consistency = sequential_consistency;
			
 
				+#ifdef STARPU_USE_MPI_NMAD
			
 
				+	nm_mpi_nmad_dest(&req->session, &req->gate, comm, req->node_tag.rank);
			
 
				+#elif defined(STARPU_USE_MPI_MPI)
			
 
				+	req->is_internal_req = is_internal_req;
			
 
				+	/* For internal requests, we wait for both the request completion and the matching application request completion */
			
 
				+	req->to_destroy = !is_internal_req;
			
 
				+	req->count = count;
			
 
				+#endif
			
 
				+
			
 
				+	return req;
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
			
 
				+{
			
 
				+#ifdef STARPU_USE_MPI_NMAD
			
 
				+	piom_cond_destroy(&(req->req_cond));
			
 
				+#elif defined(STARPU_USE_MPI_MPI)
			
 
				+	STARPU_PTHREAD_MUTEX_DESTROY(&req->req_mutex);
			
 
				+	STARPU_PTHREAD_COND_DESTROY(&req->req_cond);
			
 
				+	STARPU_PTHREAD_MUTEX_DESTROY(&req->posted_mutex);
			
 
				+	STARPU_PTHREAD_COND_DESTROY(&req->posted_cond);
			
 
				+	free(req->datatype_name);
			
 
				+	req->datatype_name = NULL;
			
 
				+#endif
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	starpu_pthread_queue_unregister(&wait, &req->queue);
			
 
				+	starpu_pthread_queue_destroy(&req->queue);
			
 
				+#endif
			
 
				+	free(req);
			
 
				+}
			
 
				+
			
--- a/mpi/tests/Makefile.am
+++ b/mpi/tests/Makefile.am
@@ -194,6 +194,7 @@ noinst_PROGRAMS =				\
 
				 	block_interface				\
			
 
				 	block_interface_pinned			\
			
 
				 	attr					\
			
 
				+	broadcast				\
			
 
				 	cache					\
			
 
				 	cache_disable				\
			
 
				 	callback				\
			
--- a/mpi/tests/broadcast.c
+++ b/mpi/tests/broadcast.c
@@ -18,11 +18,13 @@
 
				 #include <starpu_mpi.h>
			
 
				 #include "helper.h"
			
 
				 
			
 
				-void wait_CPU(void *descr[], void *_args)
			
 
				+void wait_CPU(void *descr[], void *args)
			
 
				 {
			
 
				-	(void)_args;
			
 
				 	int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]);
			
 
				-	*var = 42;
			
 
				+	int val;
			
 
				+
			
 
				+	starpu_codelet_unpack_args(args, &val);
			
 
				+	*var = val;
			
 
				 	starpu_sleep(1);
			
 
				 }
			
 
				 
			
@@ -57,9 +59,20 @@ int main(int argc, char **argv)
 
				 
			
 
				 	if (rank == 0)
			
 
				 	{
			
 
				-		starpu_task_insert(&cl, STARPU_W, handle, 0);
			
 
				+		int val, n;
			
 
				+
			
 
				+		val = 42;
			
 
				+		starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0);
			
 
				+
			
 
				+		for(n = 1 ; n < size ; n++)
			
 
				+		{
			
 
				+			FPRINTF_MPI(stderr, "sending data to %d\n", n);
			
 
				+			starpu_mpi_isend_detached(handle, n, 0, MPI_COMM_WORLD, NULL, NULL);
			
 
				+		}
			
 
				+
			
 
				+		val = 43;
			
 
				+		starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0);
			
 
				 
			
 
				-		int n;
			
 
				 		for(n = 1 ; n < size ; n++)
			
 
				 		{
			
 
				 			FPRINTF_MPI(stderr, "sending data to %d\n", n);
			
@@ -69,11 +82,18 @@ int main(int argc, char **argv)
 
				 	else
			
 
				 	{
			
 
				 		starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status);
			
 
				+		starpu_data_acquire(handle, STARPU_R);
			
 
				+		STARPU_ASSERT(var == 42);
			
 
				+		starpu_data_release(handle);
			
 
				+
			
 
				+		starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status);
			
 
				+		starpu_data_acquire(handle, STARPU_R);
			
 
				+		STARPU_ASSERT(var == 43);
			
 
				+		starpu_data_release(handle);
			
 
				 		FPRINTF_MPI(stderr, "received data\n");
			
 
				 	}
			
 
				 
			
 
				 	starpu_data_unregister(handle);
			
 
				-	STARPU_ASSERT(var == 42);
			
 
				 
			
 
				 	starpu_mpi_shutdown();
			
 
				 	starpu_shutdown();
			
--- a/mpi/tests/user_defined_datatype_value.h
+++ b/mpi/tests/user_defined_datatype_value.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2013-2015,2017                           CNRS
			
 
				+ * Copyright (C) 2013-2015,2017,2018                      CNRS
			
 
				  * Copyright (C) 2014                                     Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -129,7 +129,6 @@ static int copy_any_to_any(void *src_interface, unsigned src_node,
 
				 {
			
 
				 	struct starpu_value_interface *src_value = src_interface;
			
 
				 	struct starpu_value_interface *dst_value = dst_interface;
			
 
				-	int ret = 0;
			
 
				 
			
 
				 	return starpu_interface_copy((uintptr_t) src_value->value, 0, src_node,
			
 
				 				     (uintptr_t) dst_value->value, 0, dst_node,
			
--- a/socl/src/cl_buildprogram.c
+++ b/socl/src/cl_buildprogram.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2016-2017                           CNRS
			
 
				- * Copyright (C) 2010-2012                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2012, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -76,13 +76,14 @@ static void soclBuildProgram_task(void *data)
 
				 	DEBUG_MSG("[Worker %d] Done building.\n", wid);
			
 
				 }
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclBuildProgram(cl_program         program,
			
 
				 		 cl_uint              num_devices,
			
 
				 		 const cl_device_id * device_list,
			
 
				 		 const char *         options, 
			
 
				 		 void (*pfn_notify)(cl_program program, void * user_data),
			
 
				-		 void *               user_data) CL_API_SUFFIX__VERSION_1_0
			
 
				+		 void *               user_data)
			
 
				 {
			
 
				 	struct bp_data *data;
			
 
				 
			
--- a/socl/src/cl_createbuffer.c
+++ b/socl/src/cl_createbuffer.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2013                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -49,12 +49,13 @@ static void release_callback_memobject(void * e)
 
				  * should avoid it.
			
 
				  *
			
 
				  */
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_mem CL_API_CALL
			
 
				 soclCreateBuffer(cl_context   context,
			
 
				 		 cl_mem_flags flags,
			
 
				 		 size_t       size,
			
 
				 		 void *       host_ptr,
			
 
				-		 cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		 cl_int *     errcode_ret)
			
 
				 {
			
 
				 	cl_mem mem;
			
 
				 
			
--- a/socl/src/cl_createcommandqueue.c
+++ b/socl/src/cl_createcommandqueue.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2014,2017                           CNRS
			
 
				- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -37,11 +37,12 @@ static void release_callback_command_queue(void * e)
 
				 	STARPU_PTHREAD_MUTEX_DESTROY(&cq->mutex);
			
 
				 }
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_command_queue CL_API_CALL
			
 
				 soclCreateCommandQueue(cl_context                   context,
			
 
				 		       cl_device_id                   device,
			
 
				 		       cl_command_queue_properties    properties,
			
 
				-		       cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		       cl_int *                       errcode_ret)
			
 
				 {
			
 
				 	cl_command_queue cq;
			
 
				 
			
--- a/socl/src/cl_createcontext.c
+++ b/socl/src/cl_createcontext.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012-2013,2017                           CNRS
			
 
				- * Copyright (C) 2010-2013                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2013, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -35,13 +35,14 @@ static void release_callback_context(void * e)
 
				 static char * defaultScheduler = "dmda";
			
 
				 static char * defaultName = "default";
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_context CL_API_CALL
			
 
				 soclCreateContext(const cl_context_properties * properties,
			
 
				 		  cl_uint                       num_devices,
			
 
				 		  const cl_device_id *          devices,
			
 
				 		  void (*pfn_notify)(const char *, const void *, size_t, void *),
			
 
				 		  void *                        user_data,
			
 
				-		  cl_int *                      errcode_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		  cl_int *                      errcode_ret)
			
 
				 {
			
 
				 	if (pfn_notify == NULL && user_data != NULL)
			
 
				 	{
			
--- a/socl/src/cl_createcontextfromtype.c
+++ b/socl/src/cl_createcontextfromtype.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2012,2016                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2012,2016, 2018                           Université de Bordeaux
			
 
				  * Copyright (C) 2012                                     Vincent Danjean
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -20,12 +20,13 @@
 
				 #include "socl.h"
			
 
				 #include "init.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_context CL_API_CALL
			
 
				 soclCreateContextFromType(const cl_context_properties * properties,
			
 
				 			  cl_device_type                device_type,
			
 
				 			  void (*pfn_notify)(const char *, const void *, size_t, void *),
			
 
				 			  void *                        user_data,
			
 
				-			  cl_int *                      errcode_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+			  cl_int *                      errcode_ret)
			
 
				 {
			
 
				 	if (socl_init_starpu() < 0)
			
 
				 		return NULL;
			
--- a/socl/src/cl_createimage2d.c
+++ b/socl/src/cl_createimage2d.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,6 +18,7 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_mem CL_API_CALL
			
 
				 soclCreateImage2D(cl_context              UNUSED(context),
			
 
				 		  cl_mem_flags            UNUSED(flags),
			
@@ -26,7 +27,7 @@ soclCreateImage2D(cl_context              UNUSED(context),
 
				 		  size_t                  UNUSED(image_height),
			
 
				 		  size_t                  UNUSED(image_row_pitch),
			
 
				 		  void *                  UNUSED(host_ptr),
			
 
				-		  cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		  cl_int *                errcode_ret)
			
 
				 {
			
 
				 	if (errcode_ret != NULL)
			
 
				 		*errcode_ret = CL_INVALID_OPERATION;
			
--- a/socl/src/cl_createimage3d.c
+++ b/socl/src/cl_createimage3d.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,6 +18,7 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_mem CL_API_CALL
			
 
				 soclCreateImage3D(cl_context              UNUSED(context),
			
 
				 		  cl_mem_flags            UNUSED(flags),
			
@@ -28,7 +29,7 @@ soclCreateImage3D(cl_context              UNUSED(context),
 
				 		  size_t                  UNUSED(image_row_pitch),
			
 
				 		  size_t                  UNUSED(image_slice_pitch),
			
 
				 		  void *                  UNUSED(host_ptr),
			
 
				-		  cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		  cl_int *                errcode_ret)
			
 
				 {
			
 
				 	if (errcode_ret != NULL)
			
 
				 		*errcode_ret = CL_INVALID_OPERATION;
			
--- a/socl/src/cl_createkernel.c
+++ b/socl/src/cl_createkernel.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2014,2016-2017                      CNRS
			
 
				- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -117,10 +117,11 @@ static void release_callback_kernel(void * e)
 
				 	free(kernel->errcodes);
			
 
				 }
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_kernel CL_API_CALL
			
 
				 soclCreateKernel(cl_program    program,
			
 
				 		 const char *    kernel_name,
			
 
				-		 cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		 cl_int *        errcode_ret)
			
 
				 {
			
 
				 	cl_kernel k;
			
 
				 
			
--- a/socl/src/cl_createkernelsinprogram.c
+++ b/socl/src/cl_createkernelsinprogram.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,11 +18,12 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclCreateKernelsInProgram(cl_program   UNUSED(program),
			
 
				 			   cl_uint        UNUSED(num_kernels),
			
 
				 			   cl_kernel *    UNUSED(kernels),
			
 
				-			   cl_uint *      UNUSED(num_kernels_ret)) CL_API_SUFFIX__VERSION_1_0
			
 
				+			   cl_uint *      UNUSED(num_kernels_ret))
			
 
				 {
			
 
				 	//TODO
			
 
				 	return CL_INVALID_OPERATION;
			
--- a/socl/src/cl_createprogramwithbinary.c
+++ b/socl/src/cl_createprogramwithbinary.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,6 +18,7 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_program CL_API_CALL
			
 
				 soclCreateProgramWithBinary(cl_context                     UNUSED(context),
			
 
				 			    cl_uint                        UNUSED(num_devices),
			
@@ -25,7 +26,7 @@ soclCreateProgramWithBinary(cl_context                     UNUSED(context),
 
				 			    const size_t *                 UNUSED(lengths),
			
 
				 			    const unsigned char **         UNUSED(binaries),
			
 
				 			    cl_int *                       UNUSED(binary_status),
			
 
				-			    cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+			    cl_int *                       errcode_ret)
			
 
				 {
			
 
				 	//TODO
			
 
				 	if (errcode_ret != NULL)
			
--- a/socl/src/cl_createprogramwithsource.c
+++ b/socl/src/cl_createprogramwithsource.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2016-2017                           CNRS
			
 
				- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -66,12 +66,13 @@ static void release_callback_program(void * e)
 
				 		free(program->options);
			
 
				 }
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_program CL_API_CALL
			
 
				 soclCreateProgramWithSource(cl_context      context,
			
 
				 			    cl_uint           count,
			
 
				 			    const char **     strings,
			
 
				 			    const size_t *    lengths,
			
 
				-			    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+			    cl_int *          errcode_ret)
			
 
				 {
			
 
				 	cl_program p;
			
 
				 	struct cpws_data *data;
			
--- a/socl/src/cl_createsampler.c
+++ b/socl/src/cl_createsampler.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,12 +18,13 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_sampler CL_API_CALL
			
 
				 soclCreateSampler(cl_context          UNUSED(context),
			
 
				 		  cl_bool             UNUSED(normalized_coords), 
			
 
				 		  cl_addressing_mode  UNUSED(addressing_mode), 
			
 
				 		  cl_filter_mode      UNUSED(filter_mode),
			
 
				-		  cl_int *            errcode_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		  cl_int *            errcode_ret)
			
 
				 {
			
 
				 	if (errcode_ret != NULL)
			
 
				 		*errcode_ret = CL_INVALID_OPERATION;
			
--- a/socl/src/cl_enqueuebarrier.c
+++ b/socl/src/cl_enqueuebarrier.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,8 +18,9 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				-soclEnqueueBarrier(cl_command_queue cq) CL_API_SUFFIX__VERSION_1_0
			
 
				+soclEnqueueBarrier(cl_command_queue cq)
			
 
				 {
			
 
				 	command_barrier cmd = command_barrier_create();
			
 
				 
			
--- a/socl/src/cl_enqueuebarrierwithwaitlist.c
+++ b/socl/src/cl_enqueuebarrierwithwaitlist.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2017                                     CNRS
			
 
				- * Copyright (C) 2010,2013                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010,2013, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -17,11 +17,12 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_2
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueBarrierWithWaitList(cl_command_queue  cq,
			
 
				 			       cl_uint num_events,
			
 
				 			       const cl_event * events,
			
 
				-			       cl_event *          event) CL_API_SUFFIX__VERSION_1_2
			
 
				+			       cl_event *          event)
			
 
				 {
			
 
				 	command_barrier cmd = command_barrier_create();
			
 
				 
			
--- a/socl/src/cl_enqueuecopybuffer.c
+++ b/socl/src/cl_enqueuecopybuffer.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2016-2017                           CNRS
			
 
				- * Copyright (C) 2010-2011,2013-2014,2017                 Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013-2014,2017-2018                 Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -99,6 +99,7 @@ cl_int command_copy_buffer_submit(command_copy_buffer cmd)
 
				 	return CL_SUCCESS;
			
 
				 }
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueCopyBuffer(cl_command_queue  cq,
			
 
				 		      cl_mem              src_buffer,
			
@@ -108,7 +109,7 @@ soclEnqueueCopyBuffer(cl_command_queue  cq,
 
				 		      size_t              cb,
			
 
				 		      cl_uint             num_events,
			
 
				 		      const cl_event *    events,
			
 
				-		      cl_event *          event) CL_API_SUFFIX__VERSION_1_0
			
 
				+		      cl_event *          event)
			
 
				 {
			
 
				 	command_copy_buffer cmd = command_copy_buffer_create(src_buffer, dst_buffer, src_offset, dst_offset, cb);
			
 
				 
			
--- a/socl/src/cl_enqueuecopybuffertoimage.c
+++ b/socl/src/cl_enqueuecopybuffertoimage.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,6 +18,7 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueCopyBufferToImage(cl_command_queue UNUSED(command_queue),
			
 
				 			     cl_mem           UNUSED(src_buffer),
			
@@ -27,7 +28,7 @@ soclEnqueueCopyBufferToImage(cl_command_queue UNUSED(command_queue),
 
				 			     const size_t *   UNUSED(region),
			
 
				 			     cl_uint          UNUSED(num_events_in_wait_list),
			
 
				 			     const cl_event * UNUSED(event_wait_list),
			
 
				-			     cl_event *       UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
			
 
				+			     cl_event *       UNUSED(event))
			
 
				 {
			
 
				 	return CL_INVALID_OPERATION;
			
 
				 }
			
--- a/socl/src/cl_enqueuecopyimage.c
+++ b/socl/src/cl_enqueuecopyimage.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,6 +18,7 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueCopyImage(cl_command_queue   UNUSED(command_queue),
			
 
				 		     cl_mem               UNUSED(src_image),
			
@@ -27,7 +28,7 @@ soclEnqueueCopyImage(cl_command_queue   UNUSED(command_queue),
 
				 		     const size_t *       UNUSED(region),
			
 
				 		     cl_uint              UNUSED(num_events_in_wait_list),
			
 
				 		     const cl_event *     UNUSED(event_wait_list),
			
 
				-		     cl_event *           UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
			
 
				+		     cl_event *           UNUSED(event))
			
 
				 {
			
 
				 	return CL_INVALID_OPERATION;
			
 
				 }
			
--- a/socl/src/cl_enqueuecopyimagetobuffer.c
+++ b/socl/src/cl_enqueuecopyimagetobuffer.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,6 +18,7 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueCopyImageToBuffer(cl_command_queue UNUSED(command_queue),
			
 
				 			     cl_mem           UNUSED(src_image),
			
@@ -27,7 +28,7 @@ soclEnqueueCopyImageToBuffer(cl_command_queue UNUSED(command_queue),
 
				 			     size_t           UNUSED(dst_offset),
			
 
				 			     cl_uint          UNUSED(num_events_in_wait_list),
			
 
				 			     const cl_event * UNUSED(event_wait_list),
			
 
				-			     cl_event *       UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
			
 
				+			     cl_event *       UNUSED(event))
			
 
				 {
			
 
				 	return CL_INVALID_OPERATION;
			
 
				 }
			
--- a/socl/src/cl_enqueuemapbuffer.c
+++ b/socl/src/cl_enqueuemapbuffer.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012-2013,2017                           CNRS
			
 
				- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -45,6 +45,7 @@ cl_int command_map_buffer_submit(command_map_buffer cmd)
 
				 	return CL_SUCCESS;
			
 
				 }
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY void * CL_API_CALL
			
 
				 soclEnqueueMapBuffer(cl_command_queue cq,
			
 
				 		     cl_mem           buffer,
			
@@ -55,7 +56,7 @@ soclEnqueueMapBuffer(cl_command_queue cq,
 
				 		     cl_uint          num_events,
			
 
				 		     const cl_event * events,
			
 
				 		     cl_event *       event,
			
 
				-		     cl_int *         errcode_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		     cl_int *         errcode_ret)
			
 
				 {
			
 
				 	command_map_buffer cmd = command_map_buffer_create(buffer, map_flags, offset, cb);
			
 
				 
			
--- a/socl/src/cl_enqueuemapimage.c
+++ b/socl/src/cl_enqueuemapimage.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,6 +18,7 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY void * CL_API_CALL
			
 
				 soclEnqueueMapImage(cl_command_queue  UNUSED(command_queue),
			
 
				 		    cl_mem            UNUSED(image),
			
@@ -30,7 +31,7 @@ soclEnqueueMapImage(cl_command_queue  UNUSED(command_queue),
 
				 		    cl_uint           UNUSED(num_events_in_wait_list),
			
 
				 		    const cl_event *  UNUSED(event_wait_list),
			
 
				 		    cl_event *        UNUSED(event),
			
 
				-		    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		    cl_int *          errcode_ret)
			
 
				 {
			
 
				 	if (errcode_ret != NULL)
			
 
				 		*errcode_ret = CL_INVALID_OPERATION;
			
--- a/socl/src/cl_enqueuemarker.c
+++ b/socl/src/cl_enqueuemarker.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,9 +18,10 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueMarker(cl_command_queue  cq,
			
 
				-                cl_event *          event) CL_API_SUFFIX__VERSION_1_0
			
 
				+                cl_event *          event)
			
 
				 {
			
 
				 	if (event == NULL)
			
 
				 		return CL_INVALID_VALUE;
			
--- a/socl/src/cl_enqueuemarkerwithwaitlist.c
+++ b/socl/src/cl_enqueuemarkerwithwaitlist.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2017                                     CNRS
			
 
				- * Copyright (C) 2010,2013                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010,2013, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -17,11 +17,12 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_2
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueMarkerWithWaitList(cl_command_queue  cq,
			
 
				 			      cl_uint num_events,
			
 
				 			      const cl_event * events,
			
 
				-			      cl_event *          event) CL_API_SUFFIX__VERSION_1_2
			
 
				+			      cl_event *          event)
			
 
				 {
			
 
				 	if (events == NULL)
			
 
				 		return soclEnqueueBarrierWithWaitList(cq, num_events, events, event);
			
--- a/socl/src/cl_enqueuenativekernel.c
+++ b/socl/src/cl_enqueuenativekernel.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,6 +18,7 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueNativeKernel(cl_command_queue  UNUSED(command_queue),
			
 
				 			__attribute__((unused)) void (*user_func)(void *),
			
@@ -28,7 +29,7 @@ soclEnqueueNativeKernel(cl_command_queue  UNUSED(command_queue),
 
				 			const void **     UNUSED(args_mem_loc),
			
 
				 			cl_uint           UNUSED(num_events_in_wait_list),
			
 
				 			const cl_event *  UNUSED(event_wait_list),
			
 
				-			cl_event *        UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
			
 
				+			cl_event *        UNUSED(event))
			
 
				 {
			
 
				 	return CL_INVALID_OPERATION;
			
 
				 }
			
--- a/socl/src/cl_enqueuendrangekernel.c
+++ b/socl/src/cl_enqueuendrangekernel.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2014,2016-2017                      CNRS
			
 
				- * Copyright (C) 2010-2011,2013,2016-2017                 Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013,2016-2018                 Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -159,6 +159,7 @@ cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd)
 
				 	return CL_SUCCESS;
			
 
				 }
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_1
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueNDRangeKernel(cl_command_queue cq,
			
 
				 			 cl_kernel        kernel,
			
@@ -168,7 +169,7 @@ soclEnqueueNDRangeKernel(cl_command_queue cq,
 
				 			 const size_t *   local_work_size,
			
 
				 			 cl_uint          num_events,
			
 
				 			 const cl_event * events,
			
 
				-			 cl_event *       event) CL_API_SUFFIX__VERSION_1_1
			
 
				+			 cl_event *       event)
			
 
				 {
			
 
				 	if (kernel->split_func != NULL && !STARPU_PTHREAD_MUTEX_TRYLOCK(&kernel->split_lock))
			
 
				 	{
			
--- a/socl/src/cl_enqueuereadbuffer.c
+++ b/socl/src/cl_enqueuereadbuffer.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2016-2017                           CNRS
			
 
				- * Copyright (C) 2010-2011,2013-2014                      Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013-2014, 2018                      Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -102,6 +102,7 @@ cl_int command_read_buffer_submit(command_read_buffer cmd)
 
				 	return CL_SUCCESS;
			
 
				 }
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueReadBuffer(cl_command_queue  cq,
			
 
				 		      cl_mem              buffer,
			
@@ -111,7 +112,7 @@ soclEnqueueReadBuffer(cl_command_queue  cq,
 
				 		      void *              ptr,
			
 
				 		      cl_uint             num_events,
			
 
				 		      const cl_event *    events,
			
 
				-		      cl_event *          event) CL_API_SUFFIX__VERSION_1_0
			
 
				+		      cl_event *          event)
			
 
				 {
			
 
				 	command_read_buffer cmd = command_read_buffer_create(buffer, offset, cb, ptr);
			
 
				 
			
--- a/socl/src/cl_enqueuereadimage.c
+++ b/socl/src/cl_enqueuereadimage.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,6 +18,7 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueReadImage(cl_command_queue   UNUSED(command_queue),
			
 
				 		     cl_mem               UNUSED(image),
			
@@ -29,7 +30,7 @@ soclEnqueueReadImage(cl_command_queue   UNUSED(command_queue),
 
				 		     void *               UNUSED(ptr),
			
 
				 		     cl_uint              UNUSED(num_events_in_wait_list),
			
 
				 		     const cl_event *     UNUSED(event_wait_list),
			
 
				-		     cl_event *           UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
			
 
				+		     cl_event *           UNUSED(event))
			
 
				 {
			
 
				 	return CL_INVALID_OPERATION;
			
 
				 }
			
--- a/socl/src/cl_enqueuetask.c
+++ b/socl/src/cl_enqueuetask.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,12 +18,13 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueTask(cl_command_queue cq,
			
 
				 		cl_kernel         kernel,
			
 
				 		cl_uint           num_events,
			
 
				 		const cl_event *  events,
			
 
				-		cl_event *        event) CL_API_SUFFIX__VERSION_1_0
			
 
				+		cl_event *        event)
			
 
				 {
			
 
				 	command_ndrange_kernel cmd = command_task_create(kernel);
			
 
				 
			
--- a/socl/src/cl_enqueueunmapmemobject.c
+++ b/socl/src/cl_enqueueunmapmemobject.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -32,13 +32,14 @@ cl_int command_unmap_mem_object_submit(command_unmap_mem_object cmd)
 
				 	return CL_SUCCESS;
			
 
				 }
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueUnmapMemObject(cl_command_queue cq,
			
 
				 			  cl_mem            buffer,
			
 
				 			  void *            ptr,
			
 
				 			  cl_uint           num_events,
			
 
				 			  const cl_event *  events,
			
 
				-			  cl_event *        event) CL_API_SUFFIX__VERSION_1_0
			
 
				+			  cl_event *        event)
			
 
				 {
			
 
				 	command_unmap_mem_object cmd = command_unmap_mem_object_create(buffer, ptr);
			
 
				 
			
--- a/socl/src/cl_enqueuewaitforevents.c
+++ b/socl/src/cl_enqueuewaitforevents.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,10 +18,11 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueWaitForEvents(cl_command_queue cq,
			
 
				 			 cl_uint          num_events,
			
 
				-			 const cl_event * events) CL_API_SUFFIX__VERSION_1_0
			
 
				+			 const cl_event * events)
			
 
				 {
			
 
				 	command_marker cmd = command_marker_create();
			
 
				 
			
--- a/socl/src/cl_enqueuewritebuffer.c
+++ b/socl/src/cl_enqueuewritebuffer.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2016-2017                           CNRS
			
 
				- * Copyright (C) 2010-2011,2013-2014                      Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013-2014, 2018                      Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -127,6 +127,7 @@ cl_int command_write_buffer_submit(command_write_buffer cmd)
 
				 	return CL_SUCCESS;
			
 
				 }
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueWriteBuffer(cl_command_queue cq,
			
 
				 		       cl_mem             buffer,
			
@@ -136,7 +137,7 @@ soclEnqueueWriteBuffer(cl_command_queue cq,
 
				 		       const void *       ptr,
			
 
				 		       cl_uint            num_events,
			
 
				 		       const cl_event *   events,
			
 
				-		       cl_event *         event) CL_API_SUFFIX__VERSION_1_0
			
 
				+		       cl_event *         event)
			
 
				 {
			
 
				 	command_write_buffer cmd = command_write_buffer_create(buffer, offset, cb, ptr);
			
 
				 
			
--- a/socl/src/cl_enqueuewriteimage.c
+++ b/socl/src/cl_enqueuewriteimage.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,6 +18,7 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclEnqueueWriteImage(cl_command_queue  UNUSED(command_queue),
			
 
				 		      cl_mem              UNUSED(image),
			
@@ -29,7 +30,7 @@ soclEnqueueWriteImage(cl_command_queue  UNUSED(command_queue),
 
				 		      const void *        UNUSED(ptr),
			
 
				 		      cl_uint             UNUSED(num_events_in_wait_list),
			
 
				 		      const cl_event *    UNUSED(event_wait_list),
			
 
				-		      cl_event *          UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
			
 
				+		      cl_event *          UNUSED(event))
			
 
				 {
			
 
				 	return CL_INVALID_OPERATION;
			
 
				 }
			
--- a/socl/src/cl_finish.c
+++ b/socl/src/cl_finish.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,8 +18,9 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				-soclFinish(cl_command_queue cq) CL_API_SUFFIX__VERSION_1_0
			
 
				+soclFinish(cl_command_queue cq)
			
 
				 {
			
 
				 	command_barrier cmd = command_barrier_create();
			
 
				 
			
--- a/socl/src/cl_flush.c
+++ b/socl/src/cl_flush.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,8 +18,9 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				-soclFlush(cl_command_queue UNUSED(command_queue)) CL_API_SUFFIX__VERSION_1_0
			
 
				+soclFlush(cl_command_queue UNUSED(command_queue))
			
 
				 {
			
 
				 	return CL_SUCCESS;
			
 
				 }
			
--- a/socl/src/cl_getcommandqueueinfo.c
+++ b/socl/src/cl_getcommandqueueinfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,12 +19,13 @@
 
				 #include "socl.h"
			
 
				 #include "getinfo.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetCommandQueueInfo(cl_command_queue    cq,
			
 
				 			cl_command_queue_info param_name,
			
 
				 			size_t                param_value_size,
			
 
				 			void *                param_value,
			
 
				-			size_t *              param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+			size_t *              param_value_size_ret)
			
 
				 {
			
 
				 	if (cq == NULL)
			
 
				 		return CL_INVALID_COMMAND_QUEUE;
			
--- a/socl/src/cl_getcontextinfo.c
+++ b/socl/src/cl_getcontextinfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2018                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,12 +19,13 @@
 
				 #include "socl.h"
			
 
				 #include "getinfo.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetContextInfo(cl_context       context,
			
 
				 		   cl_context_info    param_name,
			
 
				 		   size_t             param_value_size,
			
 
				 		   void *             param_value,
			
 
				-		   size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		   size_t *           param_value_size_ret)
			
 
				 {
			
 
				 	if (context == NULL)
			
 
				 		return CL_INVALID_CONTEXT;
			
@@ -33,7 +34,7 @@ soclGetContextInfo(cl_context       context,
 
				 	{
			
 
				 		INFO_CASE(CL_CONTEXT_REFERENCE_COUNT, context->_entity.refs);
			
 
				 		INFO_CASE_EX(CL_CONTEXT_DEVICES, context->devices, context->num_devices * sizeof(cl_device_id));
			
 
				-		INFO_CASE_EX(CL_CONTEXT_PROPERTIES, context->properties, context->num_properties * sizeof(cl_device_id));
			
 
				+		INFO_CASE_EX(CL_CONTEXT_PROPERTIES, context->properties, context->num_properties * sizeof(cl_context_properties));
			
 
				 	default:
			
 
				 		return CL_INVALID_VALUE;
			
 
				 	}
			
--- a/socl/src/cl_getdeviceids.c
+++ b/socl/src/cl_getdeviceids.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2012,2016                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2012,2016, 2018                           Université de Bordeaux
			
 
				  * Copyright (C) 2012                                     Vincent Danjean
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -25,12 +25,13 @@
 
				  *
			
 
				  * \param[in] platform Must be StarPU platform ID or NULL
			
 
				  */
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetDeviceIDs(cl_platform_id   platform,
			
 
				 		 cl_device_type   device_type,
			
 
				 		 cl_uint          num_entries,
			
 
				 		 cl_device_id *   devices,
			
 
				-		 cl_uint *        num_devices) CL_API_SUFFIX__VERSION_1_0
			
 
				+		 cl_uint *        num_devices)
			
 
				 {
			
 
				 	if (socl_init_starpu() < 0)
			
 
				 	{
			
--- a/socl/src/cl_getdeviceinfo.c
+++ b/socl/src/cl_getdeviceinfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,12 +19,13 @@
 
				 #include "socl.h"
			
 
				 #include "getinfo.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetDeviceInfo(cl_device_id    device,
			
 
				 		  cl_device_info  param_name,
			
 
				 		  size_t          param_value_size,
			
 
				 		  void *          param_value,
			
 
				-		  size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		  size_t *        param_value_size_ret)
			
 
				 {
			
 
				 	//FIXME: we do not check if the device is valid
			
 
				 	/* if (device != &socl_virtual_device && device is not a valid StarPU worker identifier)
			
--- a/socl/src/cl_geteventinfo.c
+++ b/socl/src/cl_geteventinfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,12 +19,13 @@
 
				 #include "socl.h"
			
 
				 #include "getinfo.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetEventInfo(cl_event       event,
			
 
				 		 cl_event_info    param_name,
			
 
				 		 size_t           param_value_size,
			
 
				 		 void *           param_value,
			
 
				-		 size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		 size_t *         param_value_size_ret)
			
 
				 {
			
 
				 	if (event == NULL)
			
 
				 		return CL_INVALID_EVENT;
			
--- a/socl/src/cl_geteventprofilinginfo.c
+++ b/socl/src/cl_geteventprofilinginfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,12 +19,13 @@
 
				 #include "socl.h"
			
 
				 #include "getinfo.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetEventProfilingInfo(cl_event          event,
			
 
				 			  cl_profiling_info   param_name,
			
 
				 			  size_t              param_value_size,
			
 
				 			  void *              param_value,
			
 
				-			  size_t *            param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+			  size_t *            param_value_size_ret)
			
 
				 {
			
 
				 	switch (param_name)
			
 
				 	{
			
--- a/socl/src/cl_getextensionfunctionaddress.c
+++ b/socl/src/cl_getextensionfunctionaddress.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2012                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2012, 2018                                Université de Bordeaux
			
 
				  * Copyright (C) 2012                                     Vincent Danjean
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -21,8 +21,9 @@
 
				 #include "socl.h"
			
 
				 #include "init.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY void * CL_API_CALL
			
 
				-soclGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_0
			
 
				+soclGetExtensionFunctionAddress(const char * func_name)
			
 
				 {
			
 
				 	if (func_name != NULL && strcmp(func_name, "clShutdown") == 0)
			
 
				 	{
			
--- a/socl/src/cl_getimageinfo.c
+++ b/socl/src/cl_getimageinfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,12 +19,13 @@
 
				 #include "socl.h"
			
 
				 #include "getinfo.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetImageInfo(cl_mem           UNUSED(image),
			
 
				 		 cl_image_info    UNUSED(param_name),
			
 
				 		 size_t           UNUSED(param_value_size),
			
 
				 		 void *           UNUSED(param_value),
			
 
				-		 size_t *         UNUSED(param_value_size_ret)) CL_API_SUFFIX__VERSION_1_0
			
 
				+		 size_t *         UNUSED(param_value_size_ret))
			
 
				 {
			
 
				 	return CL_INVALID_OPERATION;
			
 
				 }
			
--- a/socl/src/cl_getkernelinfo.c
+++ b/socl/src/cl_getkernelinfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,12 +19,13 @@
 
				 #include "socl.h"
			
 
				 #include "getinfo.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetKernelInfo(cl_kernel       kernel,
			
 
				 		  cl_kernel_info  param_name,
			
 
				 		  size_t          param_value_size,
			
 
				 		  void *          param_value,
			
 
				-		  size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		  size_t *        param_value_size_ret)
			
 
				 {
			
 
				 	if (kernel == NULL)
			
 
				 		return CL_INVALID_KERNEL;
			
--- a/socl/src/cl_getkernelworkgroupinfo.c
+++ b/socl/src/cl_getkernelworkgroupinfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,13 +18,14 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetKernelWorkGroupInfo(cl_kernel                kernel,
			
 
				 			   cl_device_id               device,
			
 
				 			   cl_kernel_work_group_info  param_name,
			
 
				 			   size_t                     param_value_size,
			
 
				 			   void *                     param_value,
			
 
				-			   size_t *                   param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+			   size_t *                   param_value_size_ret)
			
 
				 {
			
 
				 	int range = starpu_worker_get_range_by_id(device->worker_id);
			
 
				 	cl_device_id dev;
			
--- a/socl/src/cl_getmemobjectinfo.c
+++ b/socl/src/cl_getmemobjectinfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,12 +19,13 @@
 
				 #include "socl.h"
			
 
				 #include "getinfo.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetMemObjectInfo(cl_mem           mem,
			
 
				 		     cl_mem_info      param_name,
			
 
				 		     size_t           param_value_size,
			
 
				 		     void *           param_value,
			
 
				-		     size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		     size_t *         param_value_size_ret)
			
 
				 {
			
 
				 	static cl_mem_object_type mot = CL_MEM_OBJECT_BUFFER;
			
 
				 
			
--- a/socl/src/cl_getplatformids.c
+++ b/socl/src/cl_getplatformids.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -23,10 +23,11 @@ extern int _starpu_init_failed;
 
				 /**
			
 
				  * \brief Get StarPU platform ID
			
 
				  */
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetPlatformIDs(cl_uint          num_entries,
			
 
				 		   cl_platform_id * platforms,
			
 
				-		   cl_uint *        num_platforms) CL_API_SUFFIX__VERSION_1_0
			
 
				+		   cl_uint *        num_platforms)
			
 
				 {
			
 
				 	if (_starpu_init_failed)
			
 
				 	{
			
--- a/socl/src/cl_getplatforminfo.c
+++ b/socl/src/cl_getplatforminfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2012                                Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2012                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2012, 2018                                Université de Bordeaux
			
 
				  * Copyright (C) 2012                                     Vincent Danjean
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -25,12 +25,13 @@
 
				  *
			
 
				  * \param[in] platform StarPU platform ID or NULL
			
 
				  */
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetPlatformInfo(cl_platform_id   platform,
			
 
				 		    cl_platform_info param_name,
			
 
				 		    size_t           param_value_size,
			
 
				 		    void *           param_value,
			
 
				-		    size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		    size_t *         param_value_size_ret)
			
 
				 {
			
 
				 	if (platform != NULL && platform != &socl_platform)
			
 
				 		return CL_INVALID_PLATFORM;
			
--- a/socl/src/cl_getprogrambuildinfo.c
+++ b/socl/src/cl_getprogrambuildinfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,13 +19,14 @@
 
				 #include "socl.h"
			
 
				 #include "getinfo.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetProgramBuildInfo(cl_program          program,
			
 
				 			cl_device_id          UNUSED(device),
			
 
				 			cl_program_build_info param_name,
			
 
				 			size_t                param_value_size,
			
 
				 			void *                param_value,
			
 
				-			size_t *              param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+			size_t *              param_value_size_ret)
			
 
				 {
			
 
				 	if (program == NULL)
			
 
				 		return CL_INVALID_PROGRAM;
			
--- a/socl/src/cl_getprograminfo.c
+++ b/socl/src/cl_getprograminfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,12 +19,13 @@
 
				 #include "socl.h"
			
 
				 #include "getinfo.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetProgramInfo(cl_program       program,
			
 
				 		   cl_program_info    param_name,
			
 
				 		   size_t             param_value_size,
			
 
				 		   void *             param_value,
			
 
				-		   size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
			
 
				+		   size_t *           param_value_size_ret)
			
 
				 {
			
 
				 	if (program == NULL)
			
 
				 		return CL_INVALID_PROGRAM;
			
--- a/socl/src/cl_getsamplerinfo.c
+++ b/socl/src/cl_getsamplerinfo.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,12 +19,13 @@
 
				 #include "socl.h"
			
 
				 #include "getinfo.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetSamplerInfo(cl_sampler       UNUSED(sampler),
			
 
				 		   cl_sampler_info    UNUSED(param_name),
			
 
				 		   size_t             UNUSED(param_value_size),
			
 
				 		   void *             UNUSED(param_value),
			
 
				-		   size_t *           UNUSED(param_value_size_ret)) CL_API_SUFFIX__VERSION_1_0
			
 
				+		   size_t *           UNUSED(param_value_size_ret))
			
 
				 {
			
 
				 	return CL_INVALID_OPERATION;
			
 
				 }
			
--- a/socl/src/cl_getsupportedimageformats.c
+++ b/socl/src/cl_getsupportedimageformats.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,13 +18,14 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				 soclGetSupportedImageFormats(cl_context           UNUSED(context),
			
 
				 			     cl_mem_flags         UNUSED(flags),
			
 
				 			     cl_mem_object_type   UNUSED(image_type),
			
 
				 			     cl_uint              UNUSED(num_entries),
			
 
				 			     cl_image_format *    UNUSED(image_formats),
			
 
				-			     cl_uint *            UNUSED(num_image_formats)) CL_API_SUFFIX__VERSION_1_0
			
 
				+			     cl_uint *            UNUSED(num_image_formats))
			
 
				 {
			
 
				 	return CL_INVALID_OPERATION;
			
 
				 }
			
--- a/socl/src/cl_icdgetplatformidskhr.c
+++ b/socl/src/cl_icdgetplatformidskhr.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2012                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2012                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2012, 2018                                Université de Bordeaux
			
 
				  * Copyright (C) 2012                                     Vincent Danjean
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -21,9 +21,10 @@
 
				 
			
 
				 extern int _starpu_init_failed;
			
 
				 
			
 
				+CL_EXT_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL soclIcdGetPlatformIDsKHR(cl_uint num_entries,
			
 
				 							 cl_platform_id *platforms,
			
 
				-							 cl_uint *num_platforms) CL_EXT_SUFFIX__VERSION_1_0
			
 
				+							 cl_uint *num_platforms)
			
 
				 {
			
 
				 	if ((num_entries == 0 && platforms != NULL)
			
 
				 	    || (num_platforms == NULL && platforms == NULL))
			
--- a/socl/src/cl_releasecommandqueue.c
+++ b/socl/src/cl_releasecommandqueue.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,8 +18,9 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				-soclReleaseCommandQueue(cl_command_queue cq) CL_API_SUFFIX__VERSION_1_0
			
 
				+soclReleaseCommandQueue(cl_command_queue cq)
			
 
				 {
			
 
				 	gc_entity_release(cq);
			
 
				 
			
--- a/socl/src/cl_releasecontext.c
+++ b/socl/src/cl_releasecontext.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				  * Copyright (C) 2012,2017                                CNRS
			
 
				- * Copyright (C) 2010-2011                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,8 +18,9 @@
 
				 
			
 
				 #include "socl.h"
			
 
				 
			
 
				+CL_API_SUFFIX__VERSION_1_0
			
 
				 CL_API_ENTRY cl_int CL_API_CALL
			
 
				-soclReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0
			
 
				+soclReleaseContext(cl_context context)
			
 
				 {
			
 
				 	if (context == NULL)
			
 
				 		return CL_INVALID_CONTEXT;
			
--- a/socl/src/cl_releaseevent.c
+++ b/socl/src/cl_releaseevent.c