Browse Source

Merge branch 'master' of git+ssh://scm.gforge.inria.fr/gitroot/starpu/starpu

Olivier Aumage 7 years ago
parent
commit
4f81cd3a92
100 changed files with 1242 additions and 782 deletions
  1. 22 0
      configure.ac
  2. 3 2
      doc/doxygen/chapters/310_data_management.doxy
  3. 2 1
      doc/doxygen/chapters/510_configure_options.doxy
  4. 15 15
      doc/doxygen/chapters/api/insert_task.doxy
  5. 2 2
      examples/common/blas.c
  6. 2 2
      examples/common/blas.h
  7. 5 1
      examples/common/blas_model.h
  8. 3 2
      examples/filters/fmultiple_submit.c
  9. 3 2
      examples/filters/fmultiple_submit_implicit.c
  10. 3 2
      examples/filters/fmultiple_submit_readonly.c
  11. 6 1
      examples/heat/dw_factolu.c
  12. 2 2
      examples/lu/blas_complex.c
  13. 2 2
      examples/lu/blas_complex.h
  14. 5 4
      examples/lu/lu_example.c
  15. 12 17
      examples/lu/xlu.c
  16. 4 4
      examples/lu/xlu.h
  17. 12 14
      examples/lu/xlu_implicit.c
  18. 20 22
      examples/lu/xlu_implicit_pivot.c
  19. 13 1
      examples/lu/xlu_kernels.c
  20. 19 21
      examples/lu/xlu_pivot.c
  21. 1 1
      examples/reductions/dot_product.c
  22. 225 227
      examples/sched_ctx/parallel_tasks_reuse_handle.c
  23. 2 1
      include/starpu_config.h.in
  24. 6 5
      include/starpu_task_util.h
  25. 2 1
      include/starpu_util.h
  26. 3 2
      mpi/examples/mpi_lu/plu_example.c
  27. 3 2
      mpi/examples/mpi_lu/plu_implicit_example.c
  28. 3 2
      mpi/examples/mpi_lu/plu_outofcore_example.c
  29. 3 2
      mpi/examples/mpi_lu/pxlu.c
  30. 2 2
      mpi/examples/mpi_lu/pxlu.h
  31. 3 3
      mpi/examples/mpi_lu/pxlu_implicit.c
  32. 9 1
      mpi/examples/mpi_lu/pxlu_kernels.c
  33. 3 1
      mpi/src/Makefile.am
  34. 2 2
      mpi/src/mpi/starpu_mpi_comm.c
  35. 30 143
      mpi/src/mpi/starpu_mpi_mpi.c
  36. 35 133
      mpi/src/nmad/starpu_mpi_nmad.c
  37. 58 16
      mpi/src/starpu_mpi.c
  38. 3 1
      mpi/src/starpu_mpi_cache.c
  39. 269 0
      mpi/src/starpu_mpi_coop_sends.c
  40. 1 0
      mpi/src/starpu_mpi_init.c
  41. 17 1
      mpi/src/starpu_mpi_private.c
  42. 58 4
      mpi/src/starpu_mpi_private.h
  43. 161 0
      mpi/src/starpu_mpi_req.c
  44. 1 0
      mpi/tests/Makefile.am
  45. 26 6
      mpi/tests/broadcast.c
  46. 1 2
      mpi/tests/user_defined_datatype_value.h
  47. 3 2
      socl/src/cl_buildprogram.c
  48. 3 2
      socl/src/cl_createbuffer.c
  49. 3 2
      socl/src/cl_createcommandqueue.c
  50. 3 2
      socl/src/cl_createcontext.c
  51. 3 2
      socl/src/cl_createcontextfromtype.c
  52. 3 2
      socl/src/cl_createimage2d.c
  53. 3 2
      socl/src/cl_createimage3d.c
  54. 3 2
      socl/src/cl_createkernel.c
  55. 3 2
      socl/src/cl_createkernelsinprogram.c
  56. 3 2
      socl/src/cl_createprogramwithbinary.c
  57. 3 2
      socl/src/cl_createprogramwithsource.c
  58. 3 2
      socl/src/cl_createsampler.c
  59. 3 2
      socl/src/cl_enqueuebarrier.c
  60. 3 2
      socl/src/cl_enqueuebarrierwithwaitlist.c
  61. 3 2
      socl/src/cl_enqueuecopybuffer.c
  62. 3 2
      socl/src/cl_enqueuecopybuffertoimage.c
  63. 3 2
      socl/src/cl_enqueuecopyimage.c
  64. 3 2
      socl/src/cl_enqueuecopyimagetobuffer.c
  65. 3 2
      socl/src/cl_enqueuemapbuffer.c
  66. 3 2
      socl/src/cl_enqueuemapimage.c
  67. 3 2
      socl/src/cl_enqueuemarker.c
  68. 3 2
      socl/src/cl_enqueuemarkerwithwaitlist.c
  69. 3 2
      socl/src/cl_enqueuenativekernel.c
  70. 3 2
      socl/src/cl_enqueuendrangekernel.c
  71. 3 2
      socl/src/cl_enqueuereadbuffer.c
  72. 3 2
      socl/src/cl_enqueuereadimage.c
  73. 3 2
      socl/src/cl_enqueuetask.c
  74. 3 2
      socl/src/cl_enqueueunmapmemobject.c
  75. 3 2
      socl/src/cl_enqueuewaitforevents.c
  76. 3 2
      socl/src/cl_enqueuewritebuffer.c
  77. 3 2
      socl/src/cl_enqueuewriteimage.c
  78. 3 2
      socl/src/cl_finish.c
  79. 3 2
      socl/src/cl_flush.c
  80. 3 2
      socl/src/cl_getcommandqueueinfo.c
  81. 4 3
      socl/src/cl_getcontextinfo.c
  82. 3 2
      socl/src/cl_getdeviceids.c
  83. 3 2
      socl/src/cl_getdeviceinfo.c
  84. 3 2
      socl/src/cl_geteventinfo.c
  85. 3 2
      socl/src/cl_geteventprofilinginfo.c
  86. 3 2
      socl/src/cl_getextensionfunctionaddress.c
  87. 3 2
      socl/src/cl_getimageinfo.c
  88. 3 2
      socl/src/cl_getkernelinfo.c
  89. 3 2
      socl/src/cl_getkernelworkgroupinfo.c
  90. 3 2
      socl/src/cl_getmemobjectinfo.c
  91. 3 2
      socl/src/cl_getplatformids.c
  92. 3 2
      socl/src/cl_getplatforminfo.c
  93. 3 2
      socl/src/cl_getprogrambuildinfo.c
  94. 3 2
      socl/src/cl_getprograminfo.c
  95. 3 2
      socl/src/cl_getsamplerinfo.c
  96. 3 2
      socl/src/cl_getsupportedimageformats.c
  97. 3 2
      socl/src/cl_icdgetplatformidskhr.c
  98. 3 2
      socl/src/cl_releasecommandqueue.c
  99. 3 2
      socl/src/cl_releasecontext.c
  100. 0 0
      socl/src/cl_releaseevent.c

+ 22 - 0
configure.ac

@@ -2937,6 +2937,8 @@ AC_ARG_ENABLE(blas-lib,
         blas_lib=atlas
         blas_lib=atlas
      elif test "x$enableval" = "xgoto" ; then
      elif test "x$enableval" = "xgoto" ; then
         blas_lib=goto
         blas_lib=goto
+     elif test "x$enableval" = "xopenblas" ; then
+        blas_lib=openblas
      elif test "x$enableval" = "xnone" ; then
      elif test "x$enableval" = "xnone" ; then
         blas_lib=none
         blas_lib=none
      elif test "x$enableval" = "xmkl" ; then
      elif test "x$enableval" = "xmkl" ; then
@@ -2998,6 +3000,26 @@ if test x$blas_lib = xmaybe -o x$blas_lib = xatlas; then
     fi
     fi
 fi
 fi
 
 
+if test x$blas_lib = xmaybe -o x$blas_lib = xopenblas; then
+    PKG_CHECK_MODULES([OPENBLAS],  [openblas],  [
+      PKG_CHECK_MODULES([BLAS_OPENBLAS],  [blas-openblas],  [
+        AC_DEFINE([STARPU_OPENBLAS], [1], [Define to 1 if you use the openblas library.])
+        AC_SUBST([STARPU_OPENBLAS], [1])
+        CFLAGS="${CFLAGS} ${OPENBLAS_CFLAGS} ${BLAS_OPENBLAS_CFLAGS} "
+        LIBS="${LIBS} ${OPENBLAS_LIBS} ${BLAS_OPENBLAS_LIBS} "
+        blas_lib=openblas
+      ], [
+	if text x$blas_lib = xopenblas; then
+	  AC_MSG_ERROR([cannot find blas-openblas lib])
+	fi
+      ])
+    ], [
+      if text x$blas_lib = xopenblas; then
+        AC_MSG_ERROR([cannot find openblas lib])
+      fi
+    ])
+fi
+
 if test x$blas_lib = xmaybe -o x$blas_lib = xmkl; then
 if test x$blas_lib = xmaybe -o x$blas_lib = xmkl; then
     # Should we use MKL ?
     # Should we use MKL ?
     if test -n "$MKLROOT"
     if test -n "$MKLROOT"

+ 3 - 2
doc/doxygen/chapters/310_data_management.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009-2011,2014-2017                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2018                      Université de Bordeaux
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -871,6 +871,7 @@ struct starpu_codelet cl =
 \endcode
 \endcode
 
 
 the first data of the task will be kept in the main memory, while the second
 the first data of the task will be kept in the main memory, while the second
-data will be copied to the CUDA GPU as usual.
+data will be copied to the CUDA GPU as usual. A working example is available in
+<c>tests/datawizard/specific_node.c</c>
 
 
 */
 */

+ 2 - 1
doc/doxygen/chapters/510_configure_options.doxy

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2013,2015-2017                      Inria
  * Copyright (C) 2011-2013,2015-2017                      Inria
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009-2011,2013-2017                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2013-2018                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -540,6 +540,7 @@ Specify the blas library to be used by some of the examples. Librairies availabl
 - none [default] : no BLAS library is used
 - none [default] : no BLAS library is used
 - atlas: use ATLAS library
 - atlas: use ATLAS library
 - goto: use GotoBLAS library
 - goto: use GotoBLAS library
+- openblas: use OpenBLAS library
 - mkl: use MKL library (you may need to set specific CFLAGS and LDFLAGS with --with-mkl-cflags and --with-mkl-ldflags)
 - mkl: use MKL library (you may need to set specific CFLAGS and LDFLAGS with --with-mkl-cflags and --with-mkl-ldflags)
 </dd>
 </dd>
 
 

+ 15 - 15
doc/doxygen/chapters/api/insert_task.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2009-2011,2014-2016,2018                 Université de Bordeaux
  * Copyright (C) 2009-2011,2014-2016,2018                 Université de Bordeaux
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  *
  *
@@ -169,18 +169,17 @@ room again with this function, store yet more handles, etc.
 
 
 \fn void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int arg_type, starpu_data_handle_t handle)
 \fn void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int arg_type, starpu_data_handle_t handle)
 \ingroup API_Insert_Task
 \ingroup API_Insert_Task
-This stores data handle \p handle into task \p task with mode \p arg_type,
+Store data handle \p handle into task \p task with mode \p arg_type,
 updating \p *allocated_buffers and \p *current_buffer accordingly.
 updating \p *allocated_buffers and \p *current_buffer accordingly.
 
 
 \fn void starpu_task_insert_data_process_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_handles, starpu_data_handle_t *handles)
 \fn void starpu_task_insert_data_process_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_handles, starpu_data_handle_t *handles)
 \ingroup API_Insert_Task
 \ingroup API_Insert_Task
-This stores \p nb_handles data handles \p handles into task \p task, updating \p
+Store \p nb_handles data handles \p handles into task \p task, updating \p
 *allocated_buffers and \p *current_buffer accordingly.
 *allocated_buffers and \p *current_buffer accordingly.
 
 
-
 \fn void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_descrs, struct starpu_data_descr *descrs);
 \fn void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_descrs, struct starpu_data_descr *descrs);
 \ingroup API_Insert_Task
 \ingroup API_Insert_Task
-This stores \p nb_descrs data handles described by \p descrs into task \p task,
+Store \p nb_descrs data handles described by \p descrs into task \p task,
 updating \p *allocated_buffers and \p *current_buffer accordingly.
 updating \p *allocated_buffers and \p *current_buffer accordingly.
 
 
 \fn void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...)
 \fn void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...)
@@ -193,20 +192,21 @@ Instead of calling starpu_codelet_pack_args(), one can also call
 starpu_codelet_pack_arg_init(), then starpu_codelet_pack_arg() for each
 starpu_codelet_pack_arg_init(), then starpu_codelet_pack_arg() for each
 data, then starpu_codelet_pack_arg_fini().
 data, then starpu_codelet_pack_arg_fini().
 
 
-\fn void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg *state)
+\fn void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg_data *state)
 \ingroup API_Insert_Task
 \ingroup API_Insert_Task
-Initiaze struct starpu_codelet_pack_arg before calling starpu_codelet_pack_arg and
-starpu_codelet_pack_arg_fini. This will simply initialize the content of the structure.
+Initialize struct starpu_codelet_pack_arg before calling starpu_codelet_pack_arg() and
+starpu_codelet_pack_arg_fini(). This will simply initialize the content of the structure.
 
 
-\fn void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg *state, void *ptr, size_t ptr_size)
-Pack one argument into struct starpu_codelet_pack_arg state. That structure
-has to be initialized before with starpu_codelet_pack_arg_init, and after all
-starpu_codelet_pack_arg calls performed, starpu_codelet_pack_arg_fini has to be
-used to get the cl_arg and cl_arg_size to be put in the task.
+\fn void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg_data *state, void *ptr, size_t ptr_size)
+\ingroup API_Insert_Task
+Pack one argument into struct starpu_codelet_pack_arg \p state. That structure
+has to be initialized before with starpu_codelet_pack_arg_init(), and after all
+starpu_codelet_pack_arg() calls performed, starpu_codelet_pack_arg_fini() has to be
+used to get the \p cl_arg and \p cl_arg_size to be put in the task.
 
 
-\fn void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg *state, void **cl_arg, size_t *cl_arg_size)
+\fn void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg_data *state, void **cl_arg, size_t *cl_arg_size)
 \ingroup API_Insert_Task
 \ingroup API_Insert_Task
-Finish packing data, after calling starpu_codelet_pack_arg_init once and starpu_codelet_pack_arg several times.
+Finish packing data, after calling starpu_codelet_pack_arg_init() once and starpu_codelet_pack_arg() several times.
 
 
 \fn void starpu_codelet_unpack_args(void *cl_arg, ...)
 \fn void starpu_codelet_unpack_args(void *cl_arg, ...)
 \ingroup API_Insert_Task
 \ingroup API_Insert_Task

+ 2 - 2
examples/common/blas.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
- * Copyright (C) 2009-2011,2014-2015                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2015, 2018                Université de Bordeaux
  * Copyright (C) 2010,2015,2017                           CNRS
  * Copyright (C) 2010,2015,2017                           CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -239,7 +239,7 @@ void STARPU_DSWAP(const int n, double *x, const int incx, double *y, const int i
 	cblas_dswap(n, x, incx, y, incy);
 	cblas_dswap(n, x, incx, y, incy);
 }
 }
 
 
-#elif defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
+#elif defined(STARPU_GOTO) || defined(STARPU_OPENBLAS) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
 
 
 inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, 
 inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, 
 			float alpha, const float *A, int lda, const float *B, int ldb, 
 			float alpha, const float *A, int lda, const float *B, int ldb, 

+ 2 - 2
examples/common/blas.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
+ * Copyright (C) 2009-2011,2014, 2018                     Université de Bordeaux
  * Copyright (C) 2010,2015,2017                           CNRS
  * Copyright (C) 2010,2015,2017                           CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -88,7 +88,7 @@ void STARPU_SPOTRF(const char*uplo, const int n, float *a, const int lda);
 void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda);
 void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda);
 #endif
 #endif
 
 
-#if defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
+#if defined(STARPU_GOTO) || defined(STARPU_OPENBLAS) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
 
 
 extern void sgemm_ (const char *transa, const char *transb, const int *m,
 extern void sgemm_ (const char *transa, const char *transb, const int *m,
                    const int *n, const int *k, const float *alpha, 
                    const int *n, const int *k, const float *alpha, 

+ 5 - 1
examples/common/blas_model.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2008-2012,2014                           Université de Bordeaux
+ * Copyright (C) 2008-2012,2014, 2018                     Université de Bordeaux
  * Copyright (C) 2010-2012,2015,2017                      CNRS
  * Copyright (C) 2010-2012,2015,2017                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -29,6 +29,8 @@ static struct starpu_perfmodel starpu_sgemm_model =
 	.symbol = "sgemm_atlas"
 	.symbol = "sgemm_atlas"
 #elif defined(STARPU_GOTO)
 #elif defined(STARPU_GOTO)
 	.symbol = "sgemm_goto"
 	.symbol = "sgemm_goto"
+#elif defined(STARPU_OPENBLAS)
+	.symbol = "sgemm_openblas"
 #else
 #else
 	.symbol = "sgemm"
 	.symbol = "sgemm"
 #endif
 #endif
@@ -47,6 +49,8 @@ static struct starpu_perfmodel starpu_dgemm_model =
 	.symbol = "dgemm_atlas"
 	.symbol = "dgemm_atlas"
 #elif defined(STARPU_GOTO)
 #elif defined(STARPU_GOTO)
 	.symbol = "dgemm_goto"
 	.symbol = "dgemm_goto"
+#elif defined(STARPU_OPENBLAS)
+	.symbol = "dgemm_openblas"
 #else
 #else
 	.symbol = "dgemm"
 	.symbol = "dgemm"
 #endif
 #endif

+ 3 - 2
examples/filters/fmultiple_submit.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2017                                     CNRS
+ * Copyright (C) 2017, 2018                               CNRS
  * Copyright (C) 2015,2017                                Université de Bordeaux
  * Copyright (C) 2015,2017                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -117,7 +117,8 @@ int main(void)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
 	/* Disable codelet on CPUs if we have a CUDA device, to force remote execution on the CUDA device */
 	/* Disable codelet on CPUs if we have a CUDA device, to force remote execution on the CUDA device */
-	if (starpu_cuda_worker_get_count()) {
+	if (starpu_cuda_worker_get_count())
+	{
 		cl_check_scale.cpu_funcs[0] = NULL;
 		cl_check_scale.cpu_funcs[0] = NULL;
 		cl_check_scale.cpu_funcs_name[0] = NULL;
 		cl_check_scale.cpu_funcs_name[0] = NULL;
 	}
 	}

+ 3 - 2
examples/filters/fmultiple_submit_implicit.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2017                                     CNRS
+ * Copyright (C) 2017, 2018                               CNRS
  * Copyright (C) 2015,2017                                Université de Bordeaux
  * Copyright (C) 2015,2017                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -164,7 +164,8 @@ int main(void)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
 	/* Disable codelet on CPUs if we have a CUDA device, to force remote execution on the CUDA device */
 	/* Disable codelet on CPUs if we have a CUDA device, to force remote execution on the CUDA device */
-	if (starpu_cuda_worker_get_count()) {
+	if (starpu_cuda_worker_get_count())
+	{
 		cl_check_scale.cpu_funcs[0] = NULL;
 		cl_check_scale.cpu_funcs[0] = NULL;
 		cl_check_scale.cpu_funcs_name[0] = NULL;
 		cl_check_scale.cpu_funcs_name[0] = NULL;
 		cl_check.cpu_funcs[0] = NULL;
 		cl_check.cpu_funcs[0] = NULL;

+ 3 - 2
examples/filters/fmultiple_submit_readonly.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2017                                     CNRS
+ * Copyright (C) 2017, 2018                               CNRS
  * Copyright (C) 2015,2017                                Université de Bordeaux
  * Copyright (C) 2015,2017                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -165,7 +165,8 @@ int main(void)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
 	/* Disable codelet on CPUs if we have a CUDA device, to force remote execution on the CUDA device */
 	/* Disable codelet on CPUs if we have a CUDA device, to force remote execution on the CUDA device */
-	if (starpu_cuda_worker_get_count()) {
+	if (starpu_cuda_worker_get_count())
+	{
 		cl_check_scale.cpu_funcs[0] = NULL;
 		cl_check_scale.cpu_funcs[0] = NULL;
 		cl_check_scale.cpu_funcs_name[0] = NULL;
 		cl_check_scale.cpu_funcs_name[0] = NULL;
 		cl_check.cpu_funcs[0] = NULL;
 		cl_check.cpu_funcs[0] = NULL;

+ 6 - 1
examples/heat/dw_factolu.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2012-2013                                Inria
- * Copyright (C) 2008-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2008-2015,2017-2018                      Université de Bordeaux
  * Copyright (C) 2010                                     Mehdi Juhoor
  * Copyright (C) 2010                                     Mehdi Juhoor
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  *
  *
@@ -766,6 +766,11 @@ void initialize_system(float **A, float **B, unsigned dim, unsigned pinned)
 	char * symbol_12 = "lu_model_12_goto";
 	char * symbol_12 = "lu_model_12_goto";
 	char * symbol_21 = "lu_model_21_goto";
 	char * symbol_21 = "lu_model_21_goto";
 	char * symbol_22 = "lu_model_22_goto";
 	char * symbol_22 = "lu_model_22_goto";
+#elif defined(STARPU_OPENBLAS)
+	char * symbol_11 = "lu_model_11_openblas";
+	char * symbol_12 = "lu_model_12_openblas";
+	char * symbol_21 = "lu_model_21_openblas";
+	char * symbol_22 = "lu_model_22_openblas";
 #else
 #else
 	char * symbol_11 = "lu_model_11";
 	char * symbol_11 = "lu_model_11";
 	char * symbol_12 = "lu_model_12";
 	char * symbol_12 = "lu_model_12";

+ 2 - 2
examples/lu/blas_complex.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2010,2012,2015,2017                      CNRS
  * Copyright (C) 2010,2012,2015,2017                      CNRS
- * Copyright (C) 2009-2010,2014                           Université de Bordeaux
+ * Copyright (C) 2009-2010,2014, 2018                     Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -33,7 +33,7 @@
 #error not implemented
 #error not implemented
 #elif defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS)
 #elif defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS)
 #error not implemented
 #error not implemented
-#elif defined(STARPU_MKL)
+#elif defined(STARPU_OPENBLAS) || defined(STARPU_MKL)
 
 
 inline void CGEMM(char *transa, char *transb, int M, int N, int K, 
 inline void CGEMM(char *transa, char *transb, int M, int N, int K, 
 			complex float alpha, complex float *A, int lda, complex float *B, int ldb, 
 			complex float alpha, complex float *A, int lda, complex float *B, int ldb, 

+ 2 - 2
examples/lu/blas_complex.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2010,2012,2015,2017                      CNRS
  * Copyright (C) 2010,2012,2015,2017                      CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
+ * Copyright (C) 2009-2011,2014, 2018                     Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -82,7 +82,7 @@ void ZSWAP(const int n, complex double *x, const int incx, complex double *y, co
 
 
 #if defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS)
 #if defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS)
 #error not implemented
 #error not implemented
-#elif defined(STARPU_MKL)
+#elif defined(STARPU_OPENBLAS) || defined(STARPU_MKL)
 
 
 extern void cgemm_ (const char *transa, const char *transb, const int *m,
 extern void cgemm_ (const char *transa, const char *transb, const int *m,
                    const int *n, const int *k, const complex float *alpha, 
                    const int *n, const int *k, const complex float *alpha, 

+ 5 - 4
examples/lu/lu_example.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2009-2017                                Université de Bordeaux
  * Copyright (C) 2009-2017                                Université de Bordeaux
- * Copyright (C) 2010-2013,2015-2017                      CNRS
+ * Copyright (C) 2010-2013,2015-2018                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -34,6 +34,7 @@ static unsigned check = 0;
 static unsigned pivot = 0;
 static unsigned pivot = 0;
 static unsigned no_stride = 0;
 static unsigned no_stride = 0;
 static unsigned profile = 0;
 static unsigned profile = 0;
+static unsigned no_prio=0;
 unsigned bound = 0;
 unsigned bound = 0;
 unsigned bounddeps = 0;
 unsigned bounddeps = 0;
 unsigned boundprio = 0;
 unsigned boundprio = 0;
@@ -367,7 +368,7 @@ int main(int argc, char **argv)
 			A_blocks = malloc(nblocks*nblocks*sizeof(TYPE *));
 			A_blocks = malloc(nblocks*nblocks*sizeof(TYPE *));
 			copy_matrix_into_blocks();
 			copy_matrix_into_blocks();
 
 
-			ret = STARPU_LU(lu_decomposition_pivot_no_stride)(A_blocks, ipiv, size, size, nblocks);
+			ret = STARPU_LU(lu_decomposition_pivot_no_stride)(A_blocks, ipiv, size, size, nblocks, no_prio);
 
 
 			copy_blocks_into_matrix();
 			copy_blocks_into_matrix();
 			free(A_blocks);
 			free(A_blocks);
@@ -379,7 +380,7 @@ int main(int argc, char **argv)
 
 
 			start = starpu_timing_now();
 			start = starpu_timing_now();
 
 
-			ret = STARPU_LU(lu_decomposition_pivot)(A, ipiv, size, size, nblocks);
+			ret = STARPU_LU(lu_decomposition_pivot)(A, ipiv, size, size, nblocks, no_prio);
 
 
 			end = starpu_timing_now();
 			end = starpu_timing_now();
 
 
@@ -394,7 +395,7 @@ int main(int argc, char **argv)
 	else
 	else
 #endif
 #endif
 	{
 	{
-		ret = STARPU_LU(lu_decomposition)(A, size, size, nblocks);
+		ret = STARPU_LU(lu_decomposition)(A, size, size, nblocks, no_prio);
 	}
 	}
 
 
 	if (profile)
 	if (profile)

+ 12 - 17
examples/lu/xlu.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2009-2011,2014-2015,2017                 Université de Bordeaux
  * Copyright (C) 2009-2011,2014-2015,2017                 Université de Bordeaux
  * Copyright (C) 2010                                     Mehdi Juhoor
  * Copyright (C) 2010                                     Mehdi Juhoor
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2013                                     Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,11 +21,6 @@
 #include "xlu.h"
 #include "xlu.h"
 #include "xlu_kernels.h"
 #include "xlu_kernels.h"
 
 
-static unsigned no_prio = 0;
-
-
-
-
 /*
 /*
  *	Construct the DAG
  *	Construct the DAG
  */
  */
@@ -41,7 +36,7 @@ static struct starpu_task *create_task(starpu_tag_t id)
 	return task;
 	return task;
 }
 }
 
 
-static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k)
+static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k, unsigned no_prio)
 {
 {
 /*	printf("task 11 k = %d TAG = %llx\n", k, (TAG11(k))); */
 /*	printf("task 11 k = %d TAG = %llx\n", k, (TAG11(k))); */
 
 
@@ -65,7 +60,7 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 	return task;
 	return task;
 }
 }
 
 
-static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
+static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 
 
@@ -99,7 +94,7 @@ static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	return ret;
 	return ret;
 }
 }
 
 
-static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
+static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 	struct starpu_task *task = create_task(TAG21(k, i));
 	struct starpu_task *task = create_task(TAG21(k, i));
@@ -130,7 +125,7 @@ static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 	return ret;
 	return ret;
 }
 }
 
 
-static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j)
+static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 
 
@@ -169,7 +164,7 @@ static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, un
  *	code to bootstrap the factorization
  *	code to bootstrap the factorization
  */
  */
 
 
-static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
+static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks, unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 	double start;
 	double start;
@@ -186,7 +181,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 	for (k = 0; k < nblocks; k++)
 	for (k = 0; k < nblocks; k++)
 	{
 	{
 		starpu_iteration_push(k);
 		starpu_iteration_push(k);
-		struct starpu_task *task = create_task_11(dataA, k);
+		struct starpu_task *task = create_task_11(dataA, k, no_prio);
 
 
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */
 		if (k == 0)
 		if (k == 0)
@@ -202,9 +197,9 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
 
 		for (i = k+1; i<nblocks; i++)
 		for (i = k+1; i<nblocks; i++)
 		{
 		{
-			ret = create_task_12(dataA, k, i);
+			ret = create_task_12(dataA, k, i, no_prio);
 			if (ret == -ENODEV) return ret;
 			if (ret == -ENODEV) return ret;
-			ret = create_task_21(dataA, k, i);
+			ret = create_task_21(dataA, k, i, no_prio);
 			if (ret == -ENODEV) return ret;
 			if (ret == -ENODEV) return ret;
 		}
 		}
 
 
@@ -212,7 +207,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 		{
 		{
 			for (j = k+1; j<nblocks; j++)
 			for (j = k+1; j<nblocks; j++)
 			{
 			{
-			     ret = create_task_22(dataA, k, i, j);
+			     ret = create_task_22(dataA, k, i, j, no_prio);
 			     if (ret == -ENODEV) return ret;
 			     if (ret == -ENODEV) return ret;
 			}
 			}
 		}
 		}
@@ -253,7 +248,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 	return 0;
 	return 0;
 }
 }
 
 
-int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
 {
 {
 	starpu_data_handle_t dataA;
 	starpu_data_handle_t dataA;
 
 
@@ -278,7 +273,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 
 
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 
 
-	int ret = dw_codelet_facto_v3(dataA, nblocks);
+	int ret = dw_codelet_facto_v3(dataA, nblocks, no_prio);
 
 
 	/* gather all the data */
 	/* gather all the data */
 	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);

+ 4 - 4
examples/lu/xlu.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009-2011,2013-2014,2017                 Université de Bordeaux
  * Copyright (C) 2009-2011,2013-2014,2017                 Université de Bordeaux
- * Copyright (C) 2010-2015,2017                           CNRS
+ * Copyright (C) 2010-2015,2017,2018                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -121,8 +121,8 @@ struct piv_s
 	unsigned last; /* last element */
 	unsigned last; /* last element */
 };
 };
 
 
-int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks);
-int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks);
-int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks);
+int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio);
+int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio);
+int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio);
 
 
 #endif /* __XLU_H__ */
 #endif /* __XLU_H__ */

+ 12 - 14
examples/lu/xlu_implicit.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2010-2011,2014-2015,2017                 Université de Bordeaux
  * Copyright (C) 2010-2011,2014-2015,2017                 Université de Bordeaux
  * Copyright (C) 2010                                     Mehdi Juhoor
  * Copyright (C) 2010                                     Mehdi Juhoor
- * Copyright (C) 2010-2013,2015-2017                      CNRS
+ * Copyright (C) 2010-2013,2015-2018                      CNRS
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2013                                     Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -22,9 +22,7 @@
 #include "xlu.h"
 #include "xlu.h"
 #include "xlu_kernels.h"
 #include "xlu_kernels.h"
 
 
-static unsigned no_prio = 0;
-
-static int create_task_11(starpu_data_handle_t dataA, unsigned k)
+static int create_task_11(starpu_data_handle_t dataA, unsigned k, unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
@@ -44,7 +42,7 @@ static int create_task_11(starpu_data_handle_t dataA, unsigned k)
 	return ret;
 	return ret;
 }
 }
 
 
-static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
+static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
@@ -64,7 +62,7 @@ static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	return ret;
 	return ret;
 }
 }
 
 
-static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
+static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
@@ -85,7 +83,7 @@ static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 	return ret;
 	return ret;
 }
 }
 
 
-static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j)
+static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
@@ -111,7 +109,7 @@ static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, un
  *	code to bootstrap the factorization
  *	code to bootstrap the factorization
  */
  */
 
 
-static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
+static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks, unsigned no_prio)
 {
 {
 	double start;
 	double start;
 	double end;
 	double end;
@@ -130,14 +128,14 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
 
 		starpu_iteration_push(k);
 		starpu_iteration_push(k);
 
 
-		ret = create_task_11(dataA, k);
+		ret = create_task_11(dataA, k, no_prio);
 		if (ret == -ENODEV) return ret;
 		if (ret == -ENODEV) return ret;
 
 
 		for (i = k+1; i<nblocks; i++)
 		for (i = k+1; i<nblocks; i++)
 		{
 		{
-		     ret = create_task_12(dataA, k, i);
+			ret = create_task_12(dataA, k, i, no_prio);
 		     if (ret == -ENODEV) return ret;
 		     if (ret == -ENODEV) return ret;
-		     ret = create_task_21(dataA, k, i);
+		     ret = create_task_21(dataA, k, i, no_prio);
 		     if (ret == -ENODEV) return ret;
 		     if (ret == -ENODEV) return ret;
 		}
 		}
 		starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, k));
 		starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, k));
@@ -145,7 +143,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 		for (i = k+1; i<nblocks; i++)
 		for (i = k+1; i<nblocks; i++)
 		     for (j = k+1; j<nblocks; j++)
 		     for (j = k+1; j<nblocks; j++)
 		     {
 		     {
-			  ret = create_task_22(dataA, k, i, j);
+			     ret = create_task_22(dataA, k, i, j, no_prio);
 			  if (ret == -ENODEV) return ret;
 			  if (ret == -ENODEV) return ret;
 		     }
 		     }
 		for (i = k+1; i<nblocks; i++)
 		for (i = k+1; i<nblocks; i++)
@@ -184,7 +182,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 	return 0;
 	return 0;
 }
 }
 
 
-int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
 {
 {
 	starpu_data_handle_t dataA;
 	starpu_data_handle_t dataA;
 
 
@@ -206,7 +204,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 
 
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 
 
-	int ret = dw_codelet_facto_v3(dataA, nblocks);
+	int ret = dw_codelet_facto_v3(dataA, nblocks, no_prio);
 
 
 	/* gather all the data */
 	/* gather all the data */
 	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
 	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);

+ 20 - 22
examples/lu/xlu_implicit_pivot.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2010-2015,2017                           Université de Bordeaux
  * Copyright (C) 2010-2015,2017                           Université de Bordeaux
  * Copyright (C) 2010                                     Mehdi Juhoor
  * Copyright (C) 2010                                     Mehdi Juhoor
- * Copyright (C) 2010-2013,2015-2017                      CNRS
+ * Copyright (C) 2010-2013,2015-2018                      CNRS
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2013                                     Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -23,8 +23,6 @@
 #include "xlu.h"
 #include "xlu.h"
 #include "xlu_kernels.h"
 #include "xlu_kernels.h"
 
 
-static unsigned no_prio = 0;
-
 /*
 /*
  *	Construct the DAG
  *	Construct the DAG
  */
  */
@@ -32,7 +30,7 @@ static unsigned no_prio = 0;
 static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 			     struct piv_s *piv_description,
 			     struct piv_s *piv_description,
 			     unsigned k, unsigned i,
 			     unsigned k, unsigned i,
-			     starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			     starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 
 
@@ -58,7 +56,7 @@ static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 
 
 static int create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 static int create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 				unsigned k, struct piv_s *piv_description,
 				unsigned k, struct piv_s *piv_description,
-				starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+				starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 
 
@@ -83,7 +81,7 @@ static int create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 }
 }
 
 
 static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j,
 static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j,
-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
@@ -105,7 +103,7 @@ static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 }
 }
 
 
 static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i,
 static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i,
-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
@@ -127,7 +125,7 @@ static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 }
 }
 
 
 static int create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j,
 static int create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j,
-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
@@ -157,7 +155,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 				  struct piv_s *piv_description,
 				  struct piv_s *piv_description,
 				  unsigned nblocks,
 				  unsigned nblocks,
 				  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned),
 				  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned),
-				  double *timing)
+				  double *timing, unsigned no_prio)
 {
 {
 	double start;
 	double start;
 	double end;
 	double end;
@@ -176,32 +174,32 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
 
 		starpu_iteration_push(k);
 		starpu_iteration_push(k);
 
 
-		ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
+		ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block, no_prio);
 		if (ret == -ENODEV) return ret;
 		if (ret == -ENODEV) return ret;
 
 
 		for (i = 0; i < nblocks; i++)
 		for (i = 0; i < nblocks; i++)
 		{
 		{
 			if (i != k)
 			if (i != k)
 			{
 			{
-			     ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
-			     if (ret == -ENODEV) return ret;
+				ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block, no_prio);
+				if (ret == -ENODEV) return ret;
 			}
 			}
 		}
 		}
 
 
 		for (i = k+1; i<nblocks; i++)
 		for (i = k+1; i<nblocks; i++)
 		{
 		{
-		     ret = create_task_12(dataAp, nblocks, k, i, get_block);
-		     if (ret == -ENODEV) return ret;
-		     ret = create_task_21(dataAp, nblocks, k, i, get_block);
-		     if (ret == -ENODEV) return ret;
+			ret = create_task_12(dataAp, nblocks, k, i, get_block, no_prio);
+			if (ret == -ENODEV) return ret;
+			ret = create_task_21(dataAp, nblocks, k, i, get_block, no_prio);
+			if (ret == -ENODEV) return ret;
 		}
 		}
 		starpu_data_wont_use(get_block(dataAp, nblocks, k, k));
 		starpu_data_wont_use(get_block(dataAp, nblocks, k, k));
 
 
 		for (i = k+1; i<nblocks; i++)
 		for (i = k+1; i<nblocks; i++)
 		     for (j = k+1; j<nblocks; j++)
 		     for (j = k+1; j<nblocks; j++)
 		     {
 		     {
-			  ret = create_task_22(dataAp, nblocks, k, i, j, get_block);
-			  if (ret == -ENODEV) return ret;
+			     ret = create_task_22(dataAp, nblocks, k, i, j, get_block, no_prio);
+			     if (ret == -ENODEV) return ret;
 		     }
 		     }
 		for (i = k+1; i<nblocks; i++)
 		for (i = k+1; i<nblocks; i++)
 		{
 		{
@@ -231,7 +229,7 @@ starpu_data_handle_t get_block_with_striding(starpu_data_handle_t *dataAp, unsig
 }
 }
 
 
 
 
-int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
 {
 {
 	if (starpu_mic_worker_get_count() || starpu_scc_worker_get_count() || starpu_mpi_ms_worker_get_count())
 	if (starpu_mic_worker_get_count() || starpu_scc_worker_get_count() || starpu_mpi_ms_worker_get_count())
 		/* These won't work with pivoting: we pass a pointer in cl_args */
 		/* These won't work with pivoting: we pass a pointer in cl_args */
@@ -271,7 +269,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 	}
 	}
 
 
 	double timing;
 	double timing;
-	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing);
+	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing, no_prio);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
@@ -307,7 +305,7 @@ starpu_data_handle_t get_block_with_no_striding(starpu_data_handle_t *dataAp, un
 	return dataAp[i+j*nblocks];
 	return dataAp[i+j*nblocks];
 }
 }
 
 
-int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
 {
 {
 	(void)ld;
 	(void)ld;
 	starpu_data_handle_t *dataAp = malloc(nblocks*nblocks*sizeof(starpu_data_handle_t));
 	starpu_data_handle_t *dataAp = malloc(nblocks*nblocks*sizeof(starpu_data_handle_t));
@@ -337,7 +335,7 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 	}
 	}
 
 
 	double timing;
 	double timing;
-	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing);
+	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing, no_prio);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 

+ 13 - 1
examples/lu/xlu_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
- * Copyright (C) 2009-2017                                Université de Bordeaux
+ * Copyright (C) 2009-2018                                Université de Bordeaux
  * Copyright (C) 2010-2012,2015,2017                      CNRS
  * Copyright (C) 2010-2012,2015,2017                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -107,6 +107,8 @@ static struct starpu_perfmodel STARPU_LU(model_22) =
 	.symbol = STARPU_LU_STR(lu_model_22_atlas)
 	.symbol = STARPU_LU_STR(lu_model_22_atlas)
 #elif defined(STARPU_GOTO)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_LU_STR(lu_model_22_goto)
 	.symbol = STARPU_LU_STR(lu_model_22_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_LU_STR(lu_model_22_openblas)
 #else
 #else
 	.symbol = STARPU_LU_STR(lu_model_22)
 	.symbol = STARPU_LU_STR(lu_model_22)
 #endif
 #endif
@@ -228,6 +230,8 @@ static struct starpu_perfmodel STARPU_LU(model_12) =
 	.symbol = STARPU_LU_STR(lu_model_12_atlas)
 	.symbol = STARPU_LU_STR(lu_model_12_atlas)
 #elif defined(STARPU_GOTO)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_LU_STR(lu_model_12_goto)
 	.symbol = STARPU_LU_STR(lu_model_12_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_LU_STR(lu_model_12_openblas)
 #else
 #else
 	.symbol = STARPU_LU_STR(lu_model_12)
 	.symbol = STARPU_LU_STR(lu_model_12)
 #endif
 #endif
@@ -315,6 +319,8 @@ static struct starpu_perfmodel STARPU_LU(model_21) =
 	.symbol = STARPU_LU_STR(lu_model_21_atlas)
 	.symbol = STARPU_LU_STR(lu_model_21_atlas)
 #elif defined(STARPU_GOTO)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_LU_STR(lu_model_21_goto)
 	.symbol = STARPU_LU_STR(lu_model_21_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_LU_STR(lu_model_21_openblas)
 #else
 #else
 	.symbol = STARPU_LU_STR(lu_model_21)
 	.symbol = STARPU_LU_STR(lu_model_21)
 #endif
 #endif
@@ -433,6 +439,8 @@ static struct starpu_perfmodel STARPU_LU(model_11) =
 	.symbol = STARPU_LU_STR(lu_model_11_atlas)
 	.symbol = STARPU_LU_STR(lu_model_11_atlas)
 #elif defined(STARPU_GOTO)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_LU_STR(lu_model_11_goto)
 	.symbol = STARPU_LU_STR(lu_model_11_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_LU_STR(lu_model_11_openblas)
 #else
 #else
 	.symbol = STARPU_LU_STR(lu_model_11)
 	.symbol = STARPU_LU_STR(lu_model_11)
 #endif
 #endif
@@ -602,6 +610,8 @@ static struct starpu_perfmodel STARPU_LU(model_11_pivot) =
 	.symbol = STARPU_LU_STR(lu_model_11_pivot_atlas)
 	.symbol = STARPU_LU_STR(lu_model_11_pivot_atlas)
 #elif defined(STARPU_GOTO)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_LU_STR(lu_model_11_pivot_goto)
 	.symbol = STARPU_LU_STR(lu_model_11_pivot_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_LU_STR(lu_model_11_pivot_openblas)
 #else
 #else
 	.symbol = STARPU_LU_STR(lu_model_11_pivot)
 	.symbol = STARPU_LU_STR(lu_model_11_pivot)
 #endif
 #endif
@@ -703,6 +713,8 @@ static struct starpu_perfmodel STARPU_LU(model_pivot) =
 	.symbol = STARPU_LU_STR(lu_model_pivot_atlas)
 	.symbol = STARPU_LU_STR(lu_model_pivot_atlas)
 #elif defined(STARPU_GOTO)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_LU_STR(lu_model_pivot_goto)
 	.symbol = STARPU_LU_STR(lu_model_pivot_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_LU_STR(lu_model_pivot_openblas)
 #else
 #else
 	.symbol = STARPU_LU_STR(lu_model_pivot)
 	.symbol = STARPU_LU_STR(lu_model_pivot)
 #endif
 #endif

+ 19 - 21
examples/lu/xlu_pivot.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009-2015,2017                           Université de Bordeaux
  * Copyright (C) 2009-2015,2017                           Université de Bordeaux
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
  * Copyright (C) 2011,2013                                Inria
  * Copyright (C) 2011,2013                                Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,8 +21,6 @@
 #include "xlu.h"
 #include "xlu.h"
 #include "xlu_kernels.h"
 #include "xlu_kernels.h"
 
 
-static unsigned no_prio = 0;
-
 /*
 /*
  *	Construct the DAG
  *	Construct the DAG
  */
  */
@@ -39,9 +37,9 @@ static struct starpu_task *create_task(starpu_tag_t id)
 }
 }
 
 
 static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
-					struct piv_s *piv_description,
-					unsigned k, unsigned i,
-					starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			     struct piv_s *piv_description,
+			     unsigned k, unsigned i,
+			     starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 
 
@@ -92,8 +90,8 @@ static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 }
 }
 
 
 static struct starpu_task *create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 static struct starpu_task *create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
-					unsigned k, struct piv_s *piv_description,
-					starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+						unsigned k, struct piv_s *piv_description,
+						starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 {
 	struct starpu_task *task = create_task(TAG11(k));
 	struct starpu_task *task = create_task(TAG11(k));
 
 
@@ -118,7 +116,7 @@ static struct starpu_task *create_task_11_pivot(starpu_data_handle_t *dataAp, un
 }
 }
 
 
 static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j,
 static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j,
-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 
 
@@ -158,7 +156,7 @@ static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 }
 }
 
 
 static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i,
 static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i,
-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 
 
@@ -186,7 +184,7 @@ static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 }
 }
 
 
 static int create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j,
 static int create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j,
-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 
 
@@ -231,7 +229,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 				  struct piv_s *piv_description,
 				  struct piv_s *piv_description,
 				  unsigned nblocks,
 				  unsigned nblocks,
 				  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned),
 				  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned),
-				  double *timing)
+				  double *timing, unsigned no_prio)
 {
 {
 	int ret;
 	int ret;
 
 
@@ -249,7 +247,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 	for (k = 0; k < nblocks; k++)
 	for (k = 0; k < nblocks; k++)
 	{
 	{
 		starpu_iteration_push(k);
 		starpu_iteration_push(k);
-		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
+		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block, no_prio);
 
 
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */
 		if (k == 0)
 		if (k == 0)
@@ -267,16 +265,16 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 		{
 		{
 			if (i != k)
 			if (i != k)
 			{
 			{
-				ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
+				ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block, no_prio);
 				if (ret == -ENODEV) return ret;
 				if (ret == -ENODEV) return ret;
 			}
 			}
 		}
 		}
 
 
 		for (i = k+1; i<nblocks; i++)
 		for (i = k+1; i<nblocks; i++)
 		{
 		{
-			ret = create_task_12(dataAp, nblocks, k, i, get_block);
+			ret = create_task_12(dataAp, nblocks, k, i, get_block, no_prio);
 			if (ret == -ENODEV) return ret;
 			if (ret == -ENODEV) return ret;
-			ret = create_task_21(dataAp, nblocks, k, i, get_block);
+			ret = create_task_21(dataAp, nblocks, k, i, get_block, no_prio);
 			if (ret == -ENODEV) return ret;
 			if (ret == -ENODEV) return ret;
 		}
 		}
 
 
@@ -284,7 +282,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 		{
 		{
 			for (j = k+1; j<nblocks; j++)
 			for (j = k+1; j<nblocks; j++)
 			{
 			{
-			     ret = create_task_22(dataAp, nblocks, k, i, j, get_block);
+			     ret = create_task_22(dataAp, nblocks, k, i, j, get_block, no_prio);
 			     if (ret == -ENODEV) return ret;
 			     if (ret == -ENODEV) return ret;
 			}
 			}
 		}
 		}
@@ -332,7 +330,7 @@ starpu_data_handle_t get_block_with_striding(starpu_data_handle_t *dataAp, unsig
 }
 }
 
 
 
 
-int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
 {
 {
 	starpu_data_handle_t dataA;
 	starpu_data_handle_t dataA;
 
 
@@ -380,7 +378,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 #endif
 #endif
 
 
 	double timing=0.0;
 	double timing=0.0;
-	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing);
+	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing, no_prio);
 
 
 	unsigned n = starpu_matrix_get_nx(dataA);
 	unsigned n = starpu_matrix_get_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	double flop = (2.0f*n*n*n)/3.0f;
@@ -413,7 +411,7 @@ starpu_data_handle_t get_block_with_no_striding(starpu_data_handle_t *dataAp, un
 	return dataAp[i+j*nblocks];
 	return dataAp[i+j*nblocks];
 }
 }
 
 
-int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
 {
 {
 	(void)ld;
 	(void)ld;
 	if (starpu_mic_worker_get_count() || starpu_scc_worker_get_count() || starpu_mpi_ms_worker_get_count())
 	if (starpu_mic_worker_get_count() || starpu_scc_worker_get_count() || starpu_mpi_ms_worker_get_count())
@@ -450,7 +448,7 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 	}
 	}
 
 
 	double timing=0.0;
 	double timing=0.0;
-	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing);
+	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing, no_prio);
 
 
 	unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks;
 	unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks;
 	double flop = (2.0f*n*n*n)/3.0f;
 	double flop = (2.0f*n*n*n)/3.0f;

+ 1 - 1
examples/reductions/dot_product.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012-2013,2015                           Inria
  * Copyright (C) 2012-2013,2015                           Inria
- * Copyright (C) 2010-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2010-2015,2017-2018                      Université de Bordeaux
  * Copyright (C) 2011-2013,2015-2017                      CNRS
  * Copyright (C) 2011-2013,2015-2017                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify

+ 225 - 227
examples/sched_ctx/parallel_tasks_reuse_handle.c

@@ -1,10 +1,8 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C)                                          Inria
- * Copyright (C)                                          CNRS
  * Copyright (C) 2015-2016                                Université de Bordeaux
  * Copyright (C) 2015-2016                                Université de Bordeaux
- * Copyright (C) 2015,2017                                Inria
- * Copyright (C) 2015-2017                                CNRS
+ * Copyright (C) 2015,2017                                Inria
+ * Copyright (C) 2015-2018                                CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,226 +15,226 @@
  *
  *
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
-
-#include <starpu.h>
-#include <omp.h>
-#include <pthread.h>
-
-#ifdef STARPU_QUICK_CHECK
-#define NTASKS 64
-#define SIZE   40
-#define LOOPS  4
-#else
-#define NTASKS 100
-#define SIZE   400
-#define LOOPS  10
-#endif
-
-#define N_NESTED_CTXS 2
-
-struct context
-{
-	int ncpus;
-	int *cpus;
-	unsigned id;
-};
-
-/* Helper for the task that will initiate everything */
-void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
-{
-	fprintf(stderr, "%p: %s -->\n", (void*)pthread_self(), __func__);
-	int sched_ctx = *(int *)sched_ctx_;
-	int *cpuids = NULL;
-	int ncpuids = 0;
-	starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
-
-#pragma omp parallel num_threads(ncpuids)
-	{
-		starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
-	}
-
-	omp_set_num_threads(ncpuids);
-	free(cpuids);
-	fprintf(stderr, "%p: %s <--\n", (void*)pthread_self(), __func__);
-	return;
-}
-
-void noop(void * buffers[], void * cl_arg)
-{
-	(void)buffers;
-	(void)cl_arg;
-}
-
-static struct starpu_codelet init_parallel_worker_cl=
-{
-	.cpu_funcs = {noop},
-	.nbuffers = 0,
-	.name = "init_parallel_worker"
-};
-
-/* function called to initialize the parallel "workers" */
-void parallel_task_init_one_context(unsigned * context_id)
-{
-	struct starpu_task * t;
-	int ret;
-
-	t = starpu_task_build(&init_parallel_worker_cl,
-			      STARPU_SCHED_CTX, *context_id,
-			      0);
-	t->destroy = 1;
-	t->prologue_callback_pop_func=parallel_task_prologue_init_once_and_for_all;
-	if (t->prologue_callback_pop_arg_free)
-		free(t->prologue_callback_pop_arg);
-	t->prologue_callback_pop_arg=context_id;
-	t->prologue_callback_pop_arg_free=0;
-
-	ret = starpu_task_submit(t);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-struct context main_context;
-struct context *contexts;
-void parallel_task_init()
-{
-	/* Context creation */
-	main_context.ncpus = starpu_cpu_worker_get_count();
-	main_context.cpus = (int *) malloc(main_context.ncpus*sizeof(int));
-	fprintf(stderr, "ncpus : %d \n",main_context.ncpus);
-
-	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, main_context.cpus, main_context.ncpus);
-
-	main_context.id = starpu_sched_ctx_create(main_context.cpus,
-						  main_context.ncpus,"main_ctx",
-						  STARPU_SCHED_CTX_POLICY_NAME,"prio",
-						  0);
-
-	/* Initialize nested contexts */
-	contexts = malloc(sizeof(struct context)*N_NESTED_CTXS);
-	int cpus_per_context = main_context.ncpus/N_NESTED_CTXS;
-	int i;
-	for(i = 0; i < N_NESTED_CTXS; i++)
-	{
-		contexts[i].ncpus = cpus_per_context;
-		if (i == N_NESTED_CTXS-1)
-			contexts[i].ncpus += main_context.ncpus%N_NESTED_CTXS;
-		contexts[i].cpus = main_context.cpus+i*cpus_per_context;
-	}
-
-	for(i = 0; i < N_NESTED_CTXS; i++)
-		contexts[i].id = starpu_sched_ctx_create(contexts[i].cpus,
-							 contexts[i].ncpus,"nested_ctx",
-							 STARPU_SCHED_CTX_NESTED,main_context.id,
-							 0);
-
-	for (i = 0; i < N_NESTED_CTXS; i++)
-	{
-		parallel_task_init_one_context(&contexts[i].id);
-	}
-
-	starpu_task_wait_for_all();
-	starpu_sched_ctx_set_context(&main_context.id);
-}
-
-void parallel_task_deinit()
-{
-	int i;
-	for (i=0; i<N_NESTED_CTXS;i++)
-		starpu_sched_ctx_delete(contexts[i].id);
-	free(contexts);
-	free(main_context.cpus);
-}
-
-/* Codelet SUM */
-static void sum_cpu(void * descr[], void *cl_arg)
-{
-	(void)cl_arg;
-	double *v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]);
-	double *v_src0 = (double *) STARPU_VECTOR_GET_PTR(descr[1]);
-	double *v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[2]);
-	int size = STARPU_VECTOR_GET_NX(descr[0]);
-
-	int i, k;
-	for (k=0;k<LOOPS;k++)
-	{
-#pragma omp parallel for
-		for (i=0; i<size; i++)
-		{
-			v_dst[i]+=v_src0[i]+v_src1[i];
-		}
-	}
-}
-
-static struct starpu_codelet sum_cl =
-{
-	.cpu_funcs = {sum_cpu, NULL},
-	.nbuffers = 3,
-	.modes={STARPU_RW,STARPU_R, STARPU_R}
-};
-
-int main(void)
-{
-	int ntasks = NTASKS;
-	int ret, j, k;
-	unsigned ncpus = 0;
-
-	ret = starpu_init(NULL);
-	if (ret == -ENODEV)
-		return 77;
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	if (starpu_cpu_worker_get_count() < N_NESTED_CTXS)
-	{
-		starpu_shutdown();
-		return 77;
-	}
-
-	parallel_task_init();
-
-	/* Data preparation */
-	double array1[SIZE];
-	double array2[SIZE];
-
-	memset(array1, 0, sizeof(double));
-	int i;
-	for (i=0;i<SIZE;i++)
-	{
-		array2[i]=i*2;
-	}
-
-	starpu_data_handle_t handle1;
-	starpu_data_handle_t handle2;
-
-	starpu_vector_data_register(&handle1, 0, (uintptr_t)array1, SIZE, sizeof(double));
-	starpu_vector_data_register(&handle2, 0, (uintptr_t)array2, SIZE, sizeof(double));
-
-	for (i = 0; i < ntasks; i++)
-	{
-		struct starpu_task * t;
-		t=starpu_task_build(&sum_cl,
-				    STARPU_RW,handle1,
-				    STARPU_R,handle2,
-				    STARPU_R,handle1,
-				    STARPU_SCHED_CTX, main_context.id,
-				    0);
-		t->destroy = 1;
-		t->possibly_parallel = 1;
-
-		ret=starpu_task_submit(t);
-		if (ret == -ENODEV)
-			goto out;
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-	}
-
-
-
-out:
-	/* wait for all tasks at the end*/
-	starpu_task_wait_for_all();
-
-	starpu_data_unregister(handle1);
-	starpu_data_unregister(handle2);
-	parallel_task_deinit();
-
-	starpu_shutdown();
-	return 0;
-}
+
+#include <starpu.h>
+#include <omp.h>
+#include <pthread.h>
+
+#ifdef STARPU_QUICK_CHECK
+#define NTASKS 64
+#define SIZE   40
+#define LOOPS  4
+#else
+#define NTASKS 100
+#define SIZE   400
+#define LOOPS  10
+#endif
+
+#define N_NESTED_CTXS 2
+
+struct context
+{
+	int ncpus;
+	int *cpus;
+	unsigned id;
+};
+
+/* Helper for the task that will initiate everything */
+void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
+{
+	fprintf(stderr, "%p: %s -->\n", (void*)pthread_self(), __func__);
+	int sched_ctx = *(int *)sched_ctx_;
+	int *cpuids = NULL;
+	int ncpuids = 0;
+	starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
+
+#pragma omp parallel num_threads(ncpuids)
+	{
+		starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
+	}
+
+	omp_set_num_threads(ncpuids);
+	free(cpuids);
+	fprintf(stderr, "%p: %s <--\n", (void*)pthread_self(), __func__);
+	return;
+}
+
+void noop(void * buffers[], void * cl_arg)
+{
+	(void)buffers;
+	(void)cl_arg;
+}
+
+static struct starpu_codelet init_parallel_worker_cl=
+{
+	.cpu_funcs = {noop},
+	.nbuffers = 0,
+	.name = "init_parallel_worker"
+};
+
+/* function called to initialize the parallel "workers" */
+void parallel_task_init_one_context(unsigned * context_id)
+{
+	struct starpu_task * t;
+	int ret;
+
+	t = starpu_task_build(&init_parallel_worker_cl,
+			      STARPU_SCHED_CTX, *context_id,
+			      0);
+	t->destroy = 1;
+	t->prologue_callback_pop_func=parallel_task_prologue_init_once_and_for_all;
+	if (t->prologue_callback_pop_arg_free)
+		free(t->prologue_callback_pop_arg);
+	t->prologue_callback_pop_arg=context_id;
+	t->prologue_callback_pop_arg_free=0;
+
+	ret = starpu_task_submit(t);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+}
+
+struct context main_context;
+struct context *contexts;
+void parallel_task_init()
+{
+	/* Context creation */
+	main_context.ncpus = starpu_cpu_worker_get_count();
+	main_context.cpus = (int *) malloc(main_context.ncpus*sizeof(int));
+	fprintf(stderr, "ncpus : %d \n",main_context.ncpus);
+
+	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, main_context.cpus, main_context.ncpus);
+
+	main_context.id = starpu_sched_ctx_create(main_context.cpus,
+						  main_context.ncpus,"main_ctx",
+						  STARPU_SCHED_CTX_POLICY_NAME,"prio",
+						  0);
+
+	/* Initialize nested contexts */
+	contexts = malloc(sizeof(struct context)*N_NESTED_CTXS);
+	int cpus_per_context = main_context.ncpus/N_NESTED_CTXS;
+	int i;
+	for(i = 0; i < N_NESTED_CTXS; i++)
+	{
+		contexts[i].ncpus = cpus_per_context;
+		if (i == N_NESTED_CTXS-1)
+			contexts[i].ncpus += main_context.ncpus%N_NESTED_CTXS;
+		contexts[i].cpus = main_context.cpus+i*cpus_per_context;
+	}
+
+	for(i = 0; i < N_NESTED_CTXS; i++)
+		contexts[i].id = starpu_sched_ctx_create(contexts[i].cpus,
+							 contexts[i].ncpus,"nested_ctx",
+							 STARPU_SCHED_CTX_NESTED,main_context.id,
+							 0);
+
+	for (i = 0; i < N_NESTED_CTXS; i++)
+	{
+		parallel_task_init_one_context(&contexts[i].id);
+	}
+
+	starpu_task_wait_for_all();
+	starpu_sched_ctx_set_context(&main_context.id);
+}
+
+void parallel_task_deinit()
+{
+	int i;
+	for (i=0; i<N_NESTED_CTXS;i++)
+		starpu_sched_ctx_delete(contexts[i].id);
+	free(contexts);
+	free(main_context.cpus);
+}
+
+/* Codelet SUM */
+static void sum_cpu(void * descr[], void *cl_arg)
+{
+	(void)cl_arg;
+	double *v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]);
+	double *v_src0 = (double *) STARPU_VECTOR_GET_PTR(descr[1]);
+	double *v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[2]);
+	int size = STARPU_VECTOR_GET_NX(descr[0]);
+
+	int i, k;
+	for (k=0;k<LOOPS;k++)
+	{
+#pragma omp parallel for
+		for (i=0; i<size; i++)
+		{
+			v_dst[i]+=v_src0[i]+v_src1[i];
+		}
+	}
+}
+
+static struct starpu_codelet sum_cl =
+{
+	.cpu_funcs = {sum_cpu, NULL},
+	.nbuffers = 3,
+	.modes={STARPU_RW,STARPU_R, STARPU_R}
+};
+
+int main(void)
+{
+	int ntasks = NTASKS;
+	int ret, j, k;
+	unsigned ncpus = 0;
+
+	ret = starpu_init(NULL);
+	if (ret == -ENODEV)
+		return 77;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	if (starpu_cpu_worker_get_count() < N_NESTED_CTXS)
+	{
+		starpu_shutdown();
+		return 77;
+	}
+
+	parallel_task_init();
+
+	/* Data preparation */
+	double array1[SIZE];
+	double array2[SIZE];
+
+	memset(array1, 0, sizeof(double));
+	int i;
+	for (i=0;i<SIZE;i++)
+	{
+		array2[i]=i*2;
+	}
+
+	starpu_data_handle_t handle1;
+	starpu_data_handle_t handle2;
+
+	starpu_vector_data_register(&handle1, 0, (uintptr_t)array1, SIZE, sizeof(double));
+	starpu_vector_data_register(&handle2, 0, (uintptr_t)array2, SIZE, sizeof(double));
+
+	for (i = 0; i < ntasks; i++)
+	{
+		struct starpu_task * t;
+		t=starpu_task_build(&sum_cl,
+				    STARPU_RW,handle1,
+				    STARPU_R,handle2,
+				    STARPU_R,handle1,
+				    STARPU_SCHED_CTX, main_context.id,
+				    0);
+		t->destroy = 1;
+		t->possibly_parallel = 1;
+
+		ret=starpu_task_submit(t);
+		if (ret == -ENODEV)
+			goto out;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	}
+
+
+
+out:
+	/* wait for all tasks at the end*/
+	starpu_task_wait_for_all();
+
+	starpu_data_unregister(handle1);
+	starpu_data_unregister(handle2);
+	parallel_task_deinit();
+
+	starpu_shutdown();
+	return 0;
+}

+ 2 - 1
include/starpu_config.h.in

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011-2012,2014,2016-2017                 Inria
  * Copyright (C) 2011-2012,2014,2016-2017                 Inria
- * Copyright (C) 2009-2017                                Université de Bordeaux
+ * Copyright (C) 2009-2018                                Université de Bordeaux
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2010-2017                                CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -61,6 +61,7 @@
 
 
 #undef STARPU_ATLAS
 #undef STARPU_ATLAS
 #undef STARPU_GOTO
 #undef STARPU_GOTO
+#undef STARPU_OPENBLAS
 #undef STARPU_MKL
 #undef STARPU_MKL
 #undef STARPU_SYSTEM_BLAS
 #undef STARPU_SYSTEM_BLAS
 
 

+ 6 - 5
include/starpu_task_util.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2013-2014                                Inria
  * Copyright (C) 2013-2014                                Inria
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2010-2015, 2018                          Université de Bordeaux
  * Copyright (C) 2010-2015, 2018                          Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -77,16 +77,17 @@ void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, s
 
 
 void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...);
 void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...);
 
 
-struct starpu_codelet_pack_arg {
+struct starpu_codelet_pack_arg_data
+{
 	char *arg_buffer;
 	char *arg_buffer;
 	size_t arg_buffer_size;
 	size_t arg_buffer_size;
 	size_t current_offset;
 	size_t current_offset;
 	int nargs;
 	int nargs;
 };
 };
 
 
-void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg *state);
-void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg *state, const void *ptr, size_t ptr_size);
-void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg *state, void **cl_arg, size_t *cl_arg_size);
+void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg_data *state);
+void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg_data *state, const void *ptr, size_t ptr_size);
+void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg_data *state, void **cl_arg, size_t *cl_arg_size);
 
 
 void starpu_codelet_unpack_args(void *cl_arg, ...);
 void starpu_codelet_unpack_args(void *cl_arg, ...);
 void starpu_codelet_unpack_args_and_copyleft(void *cl_arg, void *buffer, size_t buffer_size, ...);
 void starpu_codelet_unpack_args_and_copyleft(void *cl_arg, void *buffer, size_t buffer_size, ...);

+ 2 - 1
include/starpu_util.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011-2012,2017                           Inria
  * Copyright (C) 2011-2012,2017                           Inria
- * Copyright (C) 2008-2017                                Université de Bordeaux
+ * Copyright (C) 2008-2018                                Université de Bordeaux
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2010-2017                                CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -323,6 +323,7 @@ STARPU_ATOMIC_SOMETHINGL(or, old | value)
 #define STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value) (starpu_cmpxchg((ptr), (old), (value)))
 #define STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value) (starpu_cmpxchg((ptr), (old), (value)))
 #endif
 #endif
 
 
+/* Returns the previous value */
 #ifdef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET
 #ifdef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET
 #define STARPU_TEST_AND_SET(ptr, value) (__sync_lock_test_and_set ((ptr), (value)))
 #define STARPU_TEST_AND_SET(ptr, value) (__sync_lock_test_and_set ((ptr), (value)))
 #define STARPU_RELEASE(ptr) (__sync_lock_release ((ptr)))
 #define STARPU_RELEASE(ptr) (__sync_lock_release ((ptr)))

+ 3 - 2
mpi/examples/mpi_lu/plu_example.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
- * Copyright (C) 2010-2013,2015-2017                      CNRS
+ * Copyright (C) 2010-2013,2015-2018                      CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -37,6 +37,7 @@ static unsigned check = 0;
 static int p = 1;
 static int p = 1;
 static int q = 1;
 static int q = 1;
 static unsigned display = 0;
 static unsigned display = 0;
+static unsigned no_prio = 0;
 
 
 #ifdef STARPU_HAVE_LIBNUMA
 #ifdef STARPU_HAVE_LIBNUMA
 static unsigned numa = 0;
 static unsigned numa = 0;
@@ -509,7 +510,7 @@ int main(int argc, char **argv)
 	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
 	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
 	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
 	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
 
 
-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
+	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio);
 
 
 	/*
 	/*
 	 * 	Report performance
 	 * 	Report performance

+ 3 - 2
mpi/examples/mpi_lu/plu_implicit_example.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -37,6 +37,7 @@ static unsigned check = 0;
 static int p = 1;
 static int p = 1;
 static int q = 1;
 static int q = 1;
 static unsigned display = 0;
 static unsigned display = 0;
+static unsigned no_prio = 0;
 
 
 #ifdef STARPU_HAVE_LIBNUMA
 #ifdef STARPU_HAVE_LIBNUMA
 static unsigned numa = 0;
 static unsigned numa = 0;
@@ -301,7 +302,7 @@ int main(int argc, char **argv)
 		free(y);
 		free(y);
 	}
 	}
 
 
-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
+	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio);
 
 
 	/*
 	/*
 	 * 	Report performance
 	 * 	Report performance

+ 3 - 2
mpi/examples/mpi_lu/plu_outofcore_example.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012-2014                                Inria
  * Copyright (C) 2012-2014                                Inria
  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -40,6 +40,7 @@ static unsigned check = 0;
 static int p = 1;
 static int p = 1;
 static int q = 1;
 static int q = 1;
 static unsigned display = 0;
 static unsigned display = 0;
+static unsigned no_prio = 0;
 static char *path = "./starpu-ooc-files";
 static char *path = "./starpu-ooc-files";
 
 
 #ifdef STARPU_HAVE_LIBNUMA
 #ifdef STARPU_HAVE_LIBNUMA
@@ -329,7 +330,7 @@ int main(int argc, char **argv)
 		free(y);
 		free(y);
 	}
 	}
 
 
-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
+	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio);
 
 
 	/*
 	/*
 	 * 	Report performance
 	 * 	Report performance

+ 3 - 2
mpi/examples/mpi_lu/pxlu.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012,2017                                Inria
  * Copyright (C) 2012,2017                                Inria
  * Copyright (C) 2010-2011,2014,2017                      Université de Bordeaux
  * Copyright (C) 2010-2011,2014,2017                      Université de Bordeaux
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -866,7 +866,7 @@ static void wait_termination(void)
  *	code to bootstrap the factorization
  *	code to bootstrap the factorization
  */
  */
 
 
-double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
+double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size, unsigned _no_prio)
 {
 {
 	double start;
 	double start;
 	double end;
 	double end;
@@ -874,6 +874,7 @@ double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
 	nblocks = _nblocks;
 	nblocks = _nblocks;
 	rank = _rank;
 	rank = _rank;
 	world_size = _world_size;
 	world_size = _world_size;
+	no_prio = _no_prio;
 
 
 	/* create all the DAG nodes */
 	/* create all the DAG nodes */
 	unsigned i,j,k;
 	unsigned i,j,k;

+ 2 - 2
mpi/examples/mpi_lu/pxlu.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2011,2014                           Université de Bordeaux
  * Copyright (C) 2010-2011,2014                           Université de Bordeaux
- * Copyright (C) 2010-2012,2014-2015,2017                 CNRS
+ * Copyright (C) 2010-2012,2014-2015,2017,2018            CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -38,7 +38,7 @@ struct debug_info
 	unsigned k;
 	unsigned k;
 };
 };
 
 
-double STARPU_PLU(plu_main)(unsigned nblocks, int rank, int world_size);
+double STARPU_PLU(plu_main)(unsigned nblocks, int rank, int world_size, unsigned no_prio);
 
 
 TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks);
 TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks);
 void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved);
 void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved);

+ 3 - 3
mpi/examples/mpi_lu/pxlu_implicit.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2018                      CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -26,7 +26,6 @@
 //#define DEBUG	1
 //#define DEBUG	1
 
 
 static unsigned no_prio = 0;
 static unsigned no_prio = 0;
-
 static unsigned nblocks = 0;
 static unsigned nblocks = 0;
 static int rank = -1;
 static int rank = -1;
 static int world_size = -1;
 static int world_size = -1;
@@ -120,7 +119,7 @@ static void create_task_22(unsigned k, unsigned i, unsigned j)
  *	code to bootstrap the factorization 
  *	code to bootstrap the factorization 
  */
  */
 
 
-double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
+double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size, unsigned _no_prio)
 {
 {
 	double start;
 	double start;
 	double end;
 	double end;
@@ -128,6 +127,7 @@ double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
 	nblocks = _nblocks;
 	nblocks = _nblocks;
 	rank = _rank;
 	rank = _rank;
 	world_size = _world_size;
 	world_size = _world_size;
+	no_prio = _no_prio;
 
 
 	/* create all the DAG nodes */
 	/* create all the DAG nodes */
 	unsigned i,j,k;
 	unsigned i,j,k;

+ 9 - 1
mpi/examples/mpi_lu/pxlu_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
- * Copyright (C) 2010-2015                                Université de Bordeaux
+ * Copyright (C) 2010-2015, 2018                          Université de Bordeaux
  * Copyright (C) 2010-2012,2015,2017                      CNRS
  * Copyright (C) 2010-2012,2015,2017                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -106,6 +106,8 @@ static struct starpu_perfmodel STARPU_PLU(model_22) =
 	.symbol = STARPU_PLU_STR(lu_model_22_atlas)
 	.symbol = STARPU_PLU_STR(lu_model_22_atlas)
 #elif defined(STARPU_GOTO)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_PLU_STR(lu_model_22_goto)
 	.symbol = STARPU_PLU_STR(lu_model_22_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_PLU_STR(lu_model_22_openblas)
 #else
 #else
 	.symbol = STARPU_PLU_STR(lu_model_22)
 	.symbol = STARPU_PLU_STR(lu_model_22)
 #endif
 #endif
@@ -218,6 +220,8 @@ static struct starpu_perfmodel STARPU_PLU(model_12) =
 	.symbol = STARPU_PLU_STR(lu_model_12_atlas)
 	.symbol = STARPU_PLU_STR(lu_model_12_atlas)
 #elif defined(STARPU_GOTO)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_PLU_STR(lu_model_12_goto)
 	.symbol = STARPU_PLU_STR(lu_model_12_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_PLU_STR(lu_model_12_openblas)
 #else
 #else
 	.symbol = STARPU_PLU_STR(lu_model_12)
 	.symbol = STARPU_PLU_STR(lu_model_12)
 #endif
 #endif
@@ -331,6 +335,8 @@ static struct starpu_perfmodel STARPU_PLU(model_21) =
 	.symbol = STARPU_PLU_STR(lu_model_21_atlas)
 	.symbol = STARPU_PLU_STR(lu_model_21_atlas)
 #elif defined(STARPU_GOTO)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_PLU_STR(lu_model_21_goto)
 	.symbol = STARPU_PLU_STR(lu_model_21_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_PLU_STR(lu_model_21_openblas)
 #else
 #else
 	.symbol = STARPU_PLU_STR(lu_model_21)
 	.symbol = STARPU_PLU_STR(lu_model_21)
 #endif
 #endif
@@ -441,6 +447,8 @@ static struct starpu_perfmodel STARPU_PLU(model_11) =
 	.symbol = STARPU_PLU_STR(lu_model_11_atlas)
 	.symbol = STARPU_PLU_STR(lu_model_11_atlas)
 #elif defined(STARPU_GOTO)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_PLU_STR(lu_model_11_goto)
 	.symbol = STARPU_PLU_STR(lu_model_11_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_PLU_STR(lu_model_11_openblas)
 #else
 #else
 	.symbol = STARPU_PLU_STR(lu_model_11)
 	.symbol = STARPU_PLU_STR(lu_model_11)
 #endif
 #endif

+ 3 - 1
mpi/src/Makefile.am

@@ -2,7 +2,7 @@
 #
 #
 # Copyright (C) 2012                                     Inria
 # Copyright (C) 2012                                     Inria
 # Copyright (C) 2010-2017                                CNRS
 # Copyright (C) 2010-2017                                CNRS
-# Copyright (C) 2009-2014                                Université de Bordeaux
+# Copyright (C) 2009-2014, 2018                                Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -78,6 +78,8 @@ noinst_HEADERS =					\
 
 
 libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 	starpu_mpi.c					\
 	starpu_mpi.c					\
+	starpu_mpi_req.c				\
+	starpu_mpi_coop_sends.c				\
 	starpu_mpi_helper.c				\
 	starpu_mpi_helper.c				\
 	starpu_mpi_datatype.c				\
 	starpu_mpi_datatype.c				\
 	starpu_mpi_task_insert.c			\
 	starpu_mpi_task_insert.c			\

+ 2 - 2
mpi/src/mpi/starpu_mpi_comm.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2017                                     Guillaume Beauchamp
  * Copyright (C) 2017                                     Guillaume Beauchamp
- * Copyright (C) 2011-2017                                CNRS
+ * Copyright (C) 2011-2018                                CNRS
  * Copyright (C) 2014,2017                                Inria
  * Copyright (C) 2014,2017                                Inria
  * Copyright (C) 2011-2017                                Université de Bordeaux
  * Copyright (C) 2011-2017                                Université de Bordeaux
  *
  *
@@ -81,7 +81,7 @@ void _starpu_mpi_comm_shutdown()
 	}
 	}
 	free(_starpu_mpi_comms);
 	free(_starpu_mpi_comms);
 
 
-	struct _starpu_mpi_comm_hashtable *entry, *tmp;
+	struct _starpu_mpi_comm_hashtable *entry=NULL, *tmp=NULL;
 	HASH_ITER(hh, _starpu_mpi_comms_cache, entry, tmp)
 	HASH_ITER(hh, _starpu_mpi_comms_cache, entry, tmp)
 	{
 	{
 		HASH_DEL(_starpu_mpi_comms_cache, entry);
 		HASH_DEL(_starpu_mpi_comms_cache, entry);

+ 30 - 143
mpi/src/mpi/starpu_mpi_mpi.c

@@ -50,11 +50,7 @@ static unsigned nready_process;
 /* Number of send requests to submit to MPI at the same time */
 /* Number of send requests to submit to MPI at the same time */
 static unsigned ndetached_send;
 static unsigned ndetached_send;
 
 
-static int mpi_thread_cpuid = -1;
-static int use_prio = 1;
-
 static void _starpu_mpi_add_sync_point_in_fxt(void);
 static void _starpu_mpi_add_sync_point_in_fxt(void);
-static void _starpu_mpi_submit_ready_request(void *arg);
 static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req);
 static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req);
 static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req);
 static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req);
 #ifdef STARPU_MPI_VERBOSE
 #ifdef STARPU_MPI_VERBOSE
@@ -87,8 +83,6 @@ static int wait_counter;
 static starpu_pthread_cond_t wait_counter_cond;
 static starpu_pthread_cond_t wait_counter_cond;
 static starpu_pthread_mutex_t wait_counter_mutex;
 static starpu_pthread_mutex_t wait_counter_mutex;
 #endif
 #endif
-int _starpu_mpi_fake_world_size = -1;
-int _starpu_mpi_fake_world_rank = -1;
 
 
 /* Count requests posted by the application and not yet submitted to MPI */
 /* Count requests posted by the application and not yet submitted to MPI */
 static starpu_pthread_mutex_t mutex_posted_requests;
 static starpu_pthread_mutex_t mutex_posted_requests;
@@ -110,81 +104,6 @@ extern void smpi_process_set_user_data(void *);
 #endif
 #endif
 #endif
 #endif
 
 
-void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
-{
-	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
-
-	/* Initialize the request structure */
-	(*req)->data_handle = NULL;
-	(*req)->prio = 0;
-
-	(*req)->datatype = 0;
-	(*req)->datatype_name = NULL;
-	(*req)->ptr = NULL;
-	(*req)->count = -1;
-	(*req)->registered_datatype = -1;
-
-	(*req)->node_tag.rank = -1;
-	(*req)->node_tag.data_tag = -1;
-	(*req)->node_tag.comm = 0;
-
-	(*req)->func = NULL;
-
-	(*req)->status = NULL;
-	(*req)->data_request = 0;
-	(*req)->flag = NULL;
-
-	(*req)->ret = -1;
-	STARPU_PTHREAD_MUTEX_INIT(&((*req)->req_mutex), NULL);
-	STARPU_PTHREAD_COND_INIT(&((*req)->req_cond), NULL);
-	STARPU_PTHREAD_MUTEX_INIT(&((*req)->posted_mutex), NULL);
-	STARPU_PTHREAD_COND_INIT(&((*req)->posted_cond), NULL);
-
-	(*req)->request_type = UNKNOWN_REQ;
-
-	(*req)->submitted = 0;
-	(*req)->completed = 0;
-	(*req)->posted = 0;
-
-	(*req)->other_request = NULL;
-
-	(*req)->sync = 0;
-	(*req)->detached = -1;
-	(*req)->callback = NULL;
-	(*req)->callback_arg = NULL;
-
-	(*req)->size_req = 0;
-	(*req)->internal_req = NULL;
-	(*req)->is_internal_req = 0;
-	(*req)->to_destroy = 1;
-	(*req)->early_data_handle = NULL;
-	(*req)->envelope = NULL;
-	(*req)->sequential_consistency = 1;
-	(*req)->pre_sync_jobid = -1;
-	(*req)->post_sync_jobid = -1;
-
-#ifdef STARPU_SIMGRID
-	starpu_pthread_queue_init(&((*req)->queue));
-	starpu_pthread_queue_register(&wait, &((*req)->queue));
-	(*req)->done = 0;
-#endif
-}
-
-void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
-{
-	STARPU_PTHREAD_MUTEX_DESTROY(&req->req_mutex);
-	STARPU_PTHREAD_COND_DESTROY(&req->req_cond);
-	STARPU_PTHREAD_MUTEX_DESTROY(&req->posted_mutex);
-	STARPU_PTHREAD_COND_DESTROY(&req->posted_cond);
-	free(req->datatype_name);
-	req->datatype_name = NULL;
-#ifdef STARPU_SIMGRID
-	starpu_pthread_queue_unregister(&wait, &req->queue);
-	starpu_pthread_queue_destroy(&req->queue);
-#endif
-	free(req);
-}
-
  /********************************************************/
  /********************************************************/
  /*                                                      */
  /*                                                      */
  /*  Send/Receive functionalities                        */
  /*  Send/Receive functionalities                        */
@@ -205,7 +124,28 @@ void _starpu_mpi_submit_ready_request_inc(struct _starpu_mpi_req *req)
 	_starpu_mpi_submit_ready_request(req);
 	_starpu_mpi_submit_ready_request(req);
 }
 }
 
 
-static void _starpu_mpi_submit_ready_request(void *arg)
+void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends)
+{
+	/* TODO: turn them into redirects & forwards */
+}
+
+void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_redirects, int submit_data)
+{
+	unsigned i, n = coop_sends->n;
+
+	/* Note: coop_sends might disappear very very soon after last request is submitted */
+	for (i = 0; i < n; i++)
+	{
+		if (coop_sends->reqs_array[i]->request_type == SEND_REQ && submit_data)
+		{
+			_STARPU_MPI_DEBUG(0, "cooperative sends %p sending to %d\n", coop_sends, coop_sends->reqs_array[i]->node_tag.rank);
+			_starpu_mpi_submit_ready_request(coop_sends->reqs_array[i]);
+		}
+		/* TODO: handle redirect requests */
+	}
+}
+
+void _starpu_mpi_submit_ready_request(void *arg)
 {
 {
 	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_LOG_IN();
 	struct _starpu_mpi_req *req = arg;
 	struct _starpu_mpi_req *req = arg;
@@ -346,58 +286,10 @@ static void nop_acquire_cb(void *arg)
 	starpu_data_release(arg);
 	starpu_data_release(arg);
 }
 }
 
 
-struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
-						       int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
-						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
-						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
-						       enum starpu_data_access_mode mode,
-						       int sequential_consistency,
-						       int is_internal_req,
-						       starpu_ssize_t count)
+void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req STARPU_ATTRIBUTE_UNUSED)
 {
 {
-	struct _starpu_mpi_req *req;
-
-	if (_starpu_mpi_fake_world_size != -1)
-	{
-		/* Don't actually do the communication */
-		starpu_data_acquire_on_node_cb_sequential_consistency(data_handle, STARPU_MAIN_RAM, mode, nop_acquire_cb, data_handle, sequential_consistency);
-		return NULL;
-	}
-
-	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
-
-	_starpu_mpi_comm_register(comm);
-
-	/* Initialize the request structure */
-	_starpu_mpi_request_init(&req);
-	req->request_type = request_type;
-	/* prio_list is sorted by increasing values */
-	if (use_prio)
-		req->prio = prio;
-	req->data_handle = data_handle;
-	req->node_tag.rank = srcdst;
-	req->node_tag.data_tag = data_tag;
-	req->node_tag.comm = comm;
-	req->detached = detached;
-	req->sync = sync;
-	req->callback = callback;
-	req->callback_arg = arg;
-	req->func = func;
-	req->sequential_consistency = sequential_consistency;
-	req->is_internal_req = is_internal_req;
-	/* For internal requests, we wait for both the request completion and the matching application request completion */
-	req->to_destroy = !is_internal_req;
-	req->count = count;
-
-	/* Asynchronously request StarPU to fetch the data in main memory: when
-	 * it is available in main memory, _starpu_mpi_submit_ready_request(req) is called and
-	 * the request is actually submitted */
-	starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_submit_ready_request, (void *)req, sequential_consistency, &req->pre_sync_jobid, &req->post_sync_jobid);
-
-	_STARPU_MPI_LOG_OUT();
-	return req;
- }
+}
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 int _starpu_mpi_simgrid_mpi_test(unsigned *done, int *flag)
 int _starpu_mpi_simgrid_mpi_test(unsigned *done, int *flag)
@@ -935,8 +827,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 		_STARPU_MPI_TRACE_TERMINATED(req, req->node_tag.rank, req->node_tag.data_tag);
 		_STARPU_MPI_TRACE_TERMINATED(req, req->node_tag.rank, req->node_tag.data_tag);
 	}
 	}
 
 
-	if (req->data_handle)
-		starpu_data_release(req->data_handle);
+	_starpu_mpi_release_req_data(req);
 
 
 	if (req->envelope)
 	if (req->envelope)
 	{
 	{
@@ -1224,16 +1115,15 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	starpu_pthread_setname("MPI");
 	starpu_pthread_setname("MPI");
 
 
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
-	if (mpi_thread_cpuid >= 0)
-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
+	if (_starpu_mpi_thread_cpuid >= 0)
+		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
 	_starpu_mpi_do_initialize(argc_argv);
 	_starpu_mpi_do_initialize(argc_argv);
-	if (mpi_thread_cpuid >= 0)
+	if (_starpu_mpi_thread_cpuid >= 0)
 		/* In case MPI changed the binding */
 		/* In case MPI changed the binding */
-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
+		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
 #endif
 #endif
 
 
-	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
-	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
+	_starpu_mpi_env_init();
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 	/* Now that MPI is set up, let the rest of simgrid get initialized */
 	/* Now that MPI is set up, let the rest of simgrid get initialized */
@@ -1578,11 +1468,8 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv)
         STARPU_PTHREAD_MUTEX_INIT(&mutex_posted_requests, NULL);
         STARPU_PTHREAD_MUTEX_INIT(&mutex_posted_requests, NULL);
         STARPU_PTHREAD_MUTEX_INIT(&mutex_ready_requests, NULL);
         STARPU_PTHREAD_MUTEX_INIT(&mutex_ready_requests, NULL);
 
 
-        _starpu_mpi_comm_debug = starpu_getenv("STARPU_MPI_COMM") != NULL;
 	nready_process = starpu_get_env_number_default("STARPU_MPI_NREADY_PROCESS", 10);
 	nready_process = starpu_get_env_number_default("STARPU_MPI_NREADY_PROCESS", 10);
 	ndetached_send = starpu_get_env_number_default("STARPU_MPI_NDETACHED_SEND", 10);
 	ndetached_send = starpu_get_env_number_default("STARPU_MPI_NDETACHED_SEND", 10);
-	mpi_thread_cpuid = starpu_get_env_number_default("STARPU_MPI_THREAD_CPUID", -1);
-	use_prio = starpu_get_env_number_default("STARPU_MPI_PRIORITIES", 1);
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 	STARPU_PTHREAD_MUTEX_INIT(&wait_counter_mutex, NULL);
 	STARPU_PTHREAD_MUTEX_INIT(&wait_counter_mutex, NULL);

+ 35 - 133
mpi/src/nmad/starpu_mpi_nmad.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2017                                     Inria
  * Copyright (C) 2017                                     Inria
  * Copyright (C) 2017                                     Guillaume Beauchamp
  * Copyright (C) 2017                                     Guillaume Beauchamp
- * Copyright (C) 2010-2015,2017                           CNRS
+ * Copyright (C) 2010-2015,2017,2018                      CNRS
  * Copyright (C) 2009-2014,2017-2018                      Université de Bordeaux
  * Copyright (C) 2009-2014,2017-2018                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -39,20 +39,15 @@
 #include <nm_sendrecv_interface.h>
 #include <nm_sendrecv_interface.h>
 #include <nm_mpi_nmad.h>
 #include <nm_mpi_nmad.h>
 
 
+
 static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,nm_sr_event_t event);
 static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,nm_sr_event_t event);
 #ifdef STARPU_VERBOSE
 #ifdef STARPU_VERBOSE
 static char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type);
 static char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type);
 #endif
 #endif
-static void _starpu_mpi_handle_new_request(void *arg);
 
 
 static void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req);
 static void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req);
 static void _starpu_mpi_add_sync_point_in_fxt(void);
 static void _starpu_mpi_add_sync_point_in_fxt(void);
 
 
-static int mpi_thread_cpuid = -1;
-static int use_prio = 1;
-int _starpu_mpi_fake_world_size = -1;
-int _starpu_mpi_fake_world_rank = -1;
-
 /* Condition to wake up waiting for all current MPI requests to finish */
 /* Condition to wake up waiting for all current MPI requests to finish */
 static starpu_pthread_t progress_thread;
 static starpu_pthread_t progress_thread;
 static starpu_pthread_cond_t progress_cond;
 static starpu_pthread_cond_t progress_cond;
@@ -72,74 +67,6 @@ static callback_lfstack_t callback_stack = NULL;
 
 
 static starpu_sem_t callback_sem;
 static starpu_sem_t callback_sem;
 
 
-void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
-{
-	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
-
-	/* Initialize the request structure */
-	(*req)->data_handle = NULL;
-	(*req)->prio = 0;
-	(*req)->completed = 0;
-
-	(*req)->datatype = 0;
-	(*req)->datatype_name = NULL;
-	(*req)->ptr = NULL;
-	(*req)->count = -1;
-	(*req)->registered_datatype = -1;
-
-	(*req)->node_tag.rank = -1;
-	(*req)->node_tag.data_tag = -1;
-	(*req)->node_tag.comm = 0;
-
-	(*req)->func = NULL;
-
-	(*req)->status = NULL;
-	//	(*req)->data_request = 0;
-	(*req)->flag = NULL;
-
-	(*req)->ret = -1;
-	piom_cond_init(&((*req)->req_cond), 0);
-	//STARPU_PTHREAD_MUTEX_INIT(&((*req)->req_mutex), NULL);
-	//STARPU_PTHREAD_COND_INIT(&((*req)->req_cond), NULL);
-	//	STARPU_PTHREAD_MUTEX_INIT(&((*req)->posted_mutex), NULL);
-	//STARPU_PTHREAD_COND_INIT(&((*req)->posted_cond), NULL);
-
-	(*req)->request_type = UNKNOWN_REQ;
-
-	(*req)->submitted = 0;
-	(*req)->completed = 0;
-	(*req)->posted = 0;
-
-	//(*req)->other_request = NULL;
-
-	(*req)->sync = 0;
-	(*req)->detached = -1;
-	(*req)->callback = NULL;
-	(*req)->callback_arg = NULL;
-
-	//	(*req)->size_req = 0;
-	//(*req)->internal_req = NULL;
-	//(*req)->is_internal_req = 0;
-	//(*req)->to_destroy = 1;
-	//(*req)->early_data_handle = NULL;
-	//(*req)->envelope = NULL;
-	(*req)->sequential_consistency = 1;
-	(*req)->pre_sync_jobid = -1;
-	(*req)->post_sync_jobid = -1;
-
-#ifdef STARPU_SIMGRID
-	starpu_pthread_queue_init(&((*req)->queue));
-	starpu_pthread_queue_register(&wait, &((*req)->queue));
-	(*req)->done = 0;
-#endif
-}
-
-void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
-{
-	piom_cond_destroy(&(req->req_cond));
-	free(req);
-}
-
 /********************************************************/
 /********************************************************/
 /*                                                      */
 /*                                                      */
 /*  Send/Receive functionalities                        */
 /*  Send/Receive functionalities                        */
@@ -151,53 +78,9 @@ static void nop_acquire_cb(void *arg)
 	starpu_data_release(arg);
 	starpu_data_release(arg);
 }
 }
 
 
-struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
-						       int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
-						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
-						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
-						       enum starpu_data_access_mode mode,
-						       int sequential_consistency,
-						       int is_internal_req,
-						       starpu_ssize_t count)
+void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req STARPU_ATTRIBUTE_UNUSED)
 {
 {
-
-	struct _starpu_mpi_req *req;
-
-	if (_starpu_mpi_fake_world_size != -1)
-	{
-		/* Don't actually do the communication */
-		starpu_data_acquire_on_node_cb_sequential_consistency(data_handle, STARPU_MAIN_RAM, mode, nop_acquire_cb, data_handle, sequential_consistency);
-		return NULL;
-	}
-
-	_STARPU_MPI_LOG_IN();
 	STARPU_ATOMIC_ADD( &pending_request, 1);
 	STARPU_ATOMIC_ADD( &pending_request, 1);
-
-	/* Initialize the request structure */
-	_starpu_mpi_request_init(&req);
-	req->request_type = request_type;
-	/* prio_list is sorted by increasing values */
-	if (use_prio)
-		req->prio = prio;
-	req->data_handle = data_handle;
-	req->node_tag.rank = srcdst;
-	req->node_tag.data_tag = data_tag;
-	req->node_tag.comm = comm;
-	req->detached = detached;
-	req->sync = sync;
-	req->callback = callback;
-	req->callback_arg = arg;
-	req->func = func;
-	req->sequential_consistency = sequential_consistency;
-	nm_mpi_nmad_dest(&req->session, &req->gate, comm, req->node_tag.rank);
-
-	/* Asynchronously request StarPU to fetch the data in main memory: when
-	 * it is available in main memory, _starpu_mpi_submit_new_mpi_request(req) is called and
-	 * the request is actually submitted */
-	starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_handle_new_request, (void *)req, sequential_consistency, &req->pre_sync_jobid, &req->post_sync_jobid);
-
-	_STARPU_MPI_LOG_OUT();
-	return req;
 }
 }
 
 
 /********************************************************/
 /********************************************************/
@@ -505,7 +388,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,n
 		        nm_mpi_nmad_data_release(req->datatype);
 		        nm_mpi_nmad_data_release(req->datatype);
 			_starpu_mpi_datatype_free(req->data_handle, &req->datatype);
 			_starpu_mpi_datatype_free(req->data_handle, &req->datatype);
 		}
 		}
-		starpu_data_release(req->data_handle);
+		_starpu_mpi_release_req_data(req);
 	}
 	}
 
 
 	/* Execute the specified callback, if any */
 	/* Execute the specified callback, if any */
@@ -560,13 +443,34 @@ static void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req)
 	nm_sr_request_monitor(req->session, &(req->data_request), NM_SR_EVENT_FINALIZED,_starpu_mpi_handle_request_termination_callback);
 	nm_sr_request_monitor(req->session, &(req->data_request), NM_SR_EVENT_FINALIZED,_starpu_mpi_handle_request_termination_callback);
 }
 }
 
 
-static void _starpu_mpi_handle_new_request(void *arg)
+void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends)
+{
+	/* TODO: turn them into redirects & forwards */
+}
+
+void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_redirects, int submit_data)
+{
+	unsigned i, n = coop_sends->n;
+
+	/* Note: coop_sends might disappear very very soon after last request is submitted */
+	for (i = 0; i < n; i++)
+	{
+		if (coop_sends->reqs_array[i]->request_type == SEND_REQ && submit_data)
+		{
+			_STARPU_MPI_DEBUG(0, "cooperative sends %p sending to %d\n", coop_sends, coop_sends->reqs_array[i]->node_tag.rank);
+			_starpu_mpi_submit_ready_request(coop_sends->reqs_array[i]);
+		}
+		/* TODO: handle redirect requests */
+	}
+}
+
+void _starpu_mpi_submit_ready_request(void *arg)
 {
 {
 	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_LOG_IN();
 	struct _starpu_mpi_req *req = arg;
 	struct _starpu_mpi_req *req = arg;
 	STARPU_ASSERT_MSG(req, "Invalid request");
 	STARPU_ASSERT_MSG(req, "Invalid request");
 
 
-	/* submit the request to MPI */
+	/* submit the request to MPI directly from submitter */
 	_STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
 	_STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
 			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
 			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
 	req->func(req);
 	req->func(req);
@@ -581,16 +485,15 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	starpu_pthread_setname("MPI");
 	starpu_pthread_setname("MPI");
 
 
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
-	if (mpi_thread_cpuid >= 0)
-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
+	if (_starpu_mpi_thread_cpuid >= 0)
+		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
 	_starpu_mpi_do_initialize(argc_argv);
 	_starpu_mpi_do_initialize(argc_argv);
-	if (mpi_thread_cpuid >= 0)
+	if (_starpu_mpi_thread_cpuid >= 0)
 		/* In case MPI changed the binding */
 		/* In case MPI changed the binding */
-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
+		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
 #endif
 #endif
 
 
-	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
-	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
+	_starpu_mpi_env_init();
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 	/* Now that MPI is set up, let the rest of simgrid get initialized */
 	/* Now that MPI is set up, let the rest of simgrid get initialized */
@@ -636,7 +539,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 		int err=0;
 		int err=0;
 
 
 		if(running || pending_request>0)
 		if(running || pending_request>0)
-		{/* shall we block ? */
+		{
+			/* shall we block ? */
 			err = starpu_sem_wait(&callback_sem);
 			err = starpu_sem_wait(&callback_sem);
 			//running pending_request can change while waiting
 			//running pending_request can change while waiting
 		}
 		}
@@ -740,8 +644,6 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv)
 
 
 	starpu_sem_init(&callback_sem, 0, 0);
 	starpu_sem_init(&callback_sem, 0, 0);
 	running = 0;
 	running = 0;
-	mpi_thread_cpuid = starpu_get_env_number_default("STARPU_MPI_THREAD_CPUID", -1);
-	use_prio = starpu_get_env_number_default("STARPU_MPI_PRIORITIES", 1);
 
 
 	STARPU_PTHREAD_CREATE(&progress_thread, NULL, _starpu_mpi_progress_thread_func, argc_argv);
 	STARPU_PTHREAD_CREATE(&progress_thread, NULL, _starpu_mpi_progress_thread_func, argc_argv);
 
 
@@ -753,7 +655,7 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv)
         return 0;
         return 0;
 }
 }
 
 
-void _starpu_mpi_progress_shutdown(void *value)
+void _starpu_mpi_progress_shutdown(void **value)
 {
 {
 	/* kill the progression thread */
 	/* kill the progression thread */
         STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
         STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);

+ 58 - 16
mpi/src/starpu_mpi.c

@@ -1,8 +1,8 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012-2013,2016-2017                      Inria
  * Copyright (C) 2012-2013,2016-2017                      Inria
- * Copyright (C) 2009-2017                                Université de Bordeaux
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2009-2018                                Université de Bordeaux
+ * Copyright (C) 2010-2018                                CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -41,18 +41,46 @@
 #include <mpi/starpu_mpi_tag.h>
 #include <mpi/starpu_mpi_tag.h>
 #endif
 #endif
 
 
+static void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
+{
+	/* Asynchronously request StarPU to fetch the data in main memory: when
+	 * it is available in main memory, _starpu_mpi_submit_ready_request(req) is called and
+	 * the request is actually submitted */
+	starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_submit_ready_request, (void *)req, sequential_consistency, &req->pre_sync_jobid, &req->post_sync_jobid);
+}
+
 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
 							int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm,
 							int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm,
 							unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
 							unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
 							int sequential_consistency)
 							int sequential_consistency)
 {
 {
-	return _starpu_mpi_isend_irecv_common(data_handle, dest, data_tag, comm, detached, sync, prio, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func,
+	if (_starpu_mpi_fake_world_size != -1)
+	{
+		/* Don't actually do the communication */
+		return NULL;
+	}
+
 #ifdef STARPU_MPI_PEDANTIC_ISEND
 #ifdef STARPU_MPI_PEDANTIC_ISEND
-					      STARPU_RW,
+	enum starpu_data_access_mode mode = STARPU_RW;
 #else
 #else
-					      STARPU_R,
+	enum starpu_data_access_mode mode = STARPU_R;
 #endif
 #endif
+
+	struct _starpu_mpi_req *req = _starpu_mpi_request_fill(
+	                                      data_handle, dest, data_tag, comm, detached, sync, prio, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func,
 					      sequential_consistency, 0, 0);
 					      sequential_consistency, 0, 0);
+	_starpu_mpi_req_willpost(req);
+
+	if (_starpu_mpi_use_coop_sends && detached == 1 && sync == 0 && callback == NULL)
+	{
+		/* It's a send & forget send, we can perhaps optimize its distribution over several nodes */
+		_starpu_mpi_coop_send(data_handle, req, mode, sequential_consistency);
+		return req;
+	}
+
+	/* Post normally */
+	_starpu_mpi_isend_irecv_common(req, mode, sequential_consistency);
+	return req;
 }
 }
 
 
 int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
 int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
@@ -147,7 +175,16 @@ int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starp
 
 
 struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count)
 struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count)
 {
 {
-	return _starpu_mpi_isend_irecv_common(data_handle, source, data_tag, comm, detached, sync, 0, callback, arg, RECV_REQ, _starpu_mpi_irecv_size_func, STARPU_W, sequential_consistency, is_internal_req, count);
+	if (_starpu_mpi_fake_world_size != -1)
+	{
+		/* Don't actually do the communication */
+		return NULL;
+	}
+
+	struct _starpu_mpi_req *req = _starpu_mpi_request_fill(data_handle, source, data_tag, comm, detached, sync, 0, callback, arg, RECV_REQ, _starpu_mpi_irecv_size_func, sequential_consistency, is_internal_req, count);
+	_starpu_mpi_req_willpost(req);
+	_starpu_mpi_isend_irecv_common(req, STARPU_W, sequential_consistency);
+	return req;
 }
 }
 
 
 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm)
 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm)
@@ -221,14 +258,15 @@ void _starpu_mpi_data_clear(starpu_data_handle_t data_handle)
 #endif
 #endif
 	_starpu_mpi_cache_data_clear(data_handle);
 	_starpu_mpi_cache_data_clear(data_handle);
 	free(data_handle->mpi_data);
 	free(data_handle->mpi_data);
+	data_handle->mpi_data = NULL;
 }
 }
 
 
-void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm)
+struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle)
 {
 {
-	struct _starpu_mpi_data *mpi_data;
-	if (data_handle->mpi_data)
+	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
+	if (mpi_data)
 	{
 	{
-		mpi_data = data_handle->mpi_data;
+		STARPU_ASSERT(mpi_data->magic == 42);
 	}
 	}
 	else
 	else
 	{
 	{
@@ -237,16 +275,23 @@ void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_
 		mpi_data->node_tag.data_tag = -1;
 		mpi_data->node_tag.data_tag = -1;
 		mpi_data->node_tag.rank = -1;
 		mpi_data->node_tag.rank = -1;
 		mpi_data->node_tag.comm = MPI_COMM_WORLD;
 		mpi_data->node_tag.comm = MPI_COMM_WORLD;
+		_starpu_spin_init(&mpi_data->coop_lock);
 		data_handle->mpi_data = mpi_data;
 		data_handle->mpi_data = mpi_data;
-#if defined(STARPU_USE_MPI_MPI)
-		_starpu_mpi_tag_data_register(data_handle, data_tag);
-#endif
 		_starpu_mpi_cache_data_init(data_handle);
 		_starpu_mpi_cache_data_init(data_handle);
 		_starpu_data_set_unregister_hook(data_handle, _starpu_mpi_data_clear);
 		_starpu_data_set_unregister_hook(data_handle, _starpu_mpi_data_clear);
 	}
 	}
+	return mpi_data;
+}
+
+void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm)
+{
+	struct _starpu_mpi_data *mpi_data = _starpu_mpi_data_get(data_handle);
 
 
 	if (data_tag != -1)
 	if (data_tag != -1)
 	{
 	{
+#if defined(STARPU_USE_MPI_MPI)
+		_starpu_mpi_tag_data_register(data_handle, data_tag);
+#endif
 		mpi_data->node_tag.data_tag = data_tag;
 		mpi_data->node_tag.data_tag = data_tag;
 	}
 	}
 	if (rank != -1)
 	if (rank != -1)
@@ -371,9 +416,6 @@ void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle
 {
 {
 	int size, i;
 	int size, i;
 	starpu_mpi_comm_size(comm, &size);
 	starpu_mpi_comm_size(comm, &size);
-#ifdef STARPU_DEVEL
-#warning TODO: use binary communication tree to optimize broadcast
-#endif
 	for (i = 0; i < size; i++)
 	for (i = 0; i < size; i++)
 		starpu_mpi_get_data_on_node_detached(comm, data_handle, i, NULL, NULL);
 		starpu_mpi_get_data_on_node_detached(comm, data_handle, i, NULL, NULL);
 }
 }

+ 3 - 1
mpi/src/starpu_mpi_cache.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011-2017                                CNRS
  * Copyright (C) 2011-2017                                CNRS
- * Copyright (C) 2011-2017                                Université de Bordeaux
+ * Copyright (C) 2011-2018                                Université de Bordeaux
  * Copyright (C) 2014                                     Inria
  * Copyright (C) 2014                                     Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -371,6 +371,8 @@ static void _starpu_mpi_cache_flush_and_invalidate_nolock(MPI_Comm comm, starpu_
 
 
 void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)
 void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)
 {
 {
+	_starpu_mpi_data_flush(data_handle);
+
 	if (_starpu_cache_enabled == 0)
 	if (_starpu_cache_enabled == 0)
 		return;
 		return;
 
 

+ 269 - 0
mpi/src/starpu_mpi_coop_sends.c

@@ -0,0 +1,269 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012-2013,2016-2017                      Inria
+ * Copyright (C) 2009-2018                                Université de Bordeaux
+ * Copyright (C) 2010-2018                                CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <starpu_mpi_private.h>
+#include <datawizard/coherency.h>
+
+/*
+ * One node sends the same data to several nodes. Gather them into a
+ * "coop_sends", which then has a global view of all the required sends, and can
+ * establish a diffusion tree by telling receiving nodes to retransmit what they
+ * received (forwards) to others, and to others that they will receive from the
+ * former (redirects).
+ */
+
+/* This is called after a request is finished processing, to release the data */
+void _starpu_mpi_release_req_data(struct _starpu_mpi_req *req)
+{
+	if (!req->data_handle)
+		return;
+
+	if (_starpu_mpi_req_multilist_queued_coop_sends(req))
+	{
+		struct _starpu_mpi_coop_sends *coop_sends = req->coop_sends_head;
+		struct _starpu_mpi_data *mpi_data = coop_sends->mpi_data;
+		int last;
+		_starpu_spin_lock(&mpi_data->coop_lock);
+		/* Part of a cooperative send, dequeue ourself from others */
+		_starpu_mpi_req_multilist_erase_coop_sends(&coop_sends->reqs, req);
+		last = _starpu_mpi_req_multilist_empty_coop_sends(&coop_sends->reqs);
+		_starpu_spin_unlock(&mpi_data->coop_lock);
+		if (last)
+		{
+			/* We were last, release data */
+			free(coop_sends->reqs_array);
+			free(coop_sends);
+			starpu_data_release(req->data_handle);
+		}
+	}
+	else
+	{
+		/* Trivial request */
+		starpu_data_release(req->data_handle);
+	}
+}
+
+/* Comparison function for getting qsort to put requests with high priority first */
+static int _starpu_mpi_reqs_prio_compare(const void *a, const void *b)
+{
+	const struct _starpu_mpi_req * const *ra = a;
+	const struct _starpu_mpi_req * const *rb = b;
+	return (*rb)->prio - (*ra)->prio;
+}
+
+/* Sort the requests by priority and build a diffusion tree. Actually does something only once per coop_sends bag. */
+static void _starpu_mpi_coop_sends_optimize(struct _starpu_mpi_coop_sends *coop_sends)
+{
+	if (coop_sends->n == 1)
+		/* Trivial case, don't optimize */
+		return;
+
+	_starpu_spin_lock(&coop_sends->lock);
+	if (!coop_sends->reqs_array)
+	{
+		unsigned n = coop_sends->n, i;
+		struct _starpu_mpi_req *cur;
+		struct _starpu_mpi_req **reqs;
+
+		_STARPU_MPI_DEBUG(0, "handling cooperative sends %p for %u neighbours\n", coop_sends, n);
+
+		/* Store them in an array */
+		_STARPU_CALLOC(reqs, n, sizeof(*reqs));
+		for (cur  = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs), i = 0;
+		     cur != _starpu_mpi_req_multilist_end_coop_sends(&coop_sends->reqs);
+		     cur  = _starpu_mpi_req_multilist_next_coop_sends(cur), i++)
+			reqs[i] = cur;
+		coop_sends->reqs_array = reqs;
+
+		/* Sort them */
+		qsort(reqs, n, sizeof(*reqs), _starpu_mpi_reqs_prio_compare);
+
+		/* And build the diffusion tree */
+		_starpu_mpi_coop_sends_build_tree(coop_sends);
+	}
+	_starpu_spin_unlock(&coop_sends->lock);
+}
+
+/* This is called on completion of acquisition of data for a cooperative send */
+static void _starpu_mpi_coop_sends_data_ready(void *arg)
+{
+	_STARPU_MPI_LOG_IN();
+	struct _starpu_mpi_coop_sends *coop_sends = arg;
+	struct _starpu_mpi_data *mpi_data = coop_sends->mpi_data;
+
+	/* Take the cooperative send bag out from more submissions */
+	if (mpi_data->coop_sends == coop_sends)
+	{
+		_starpu_spin_lock(&mpi_data->coop_lock);
+		if (mpi_data->coop_sends == coop_sends)
+			mpi_data->coop_sends = NULL;
+		_starpu_spin_unlock(&mpi_data->coop_lock);
+	}
+
+	/* Build diffusion tree */
+	_starpu_mpi_coop_sends_optimize(coop_sends);
+
+	if (coop_sends->n == 1)
+	{
+		/* Trivial case, just submit it */
+		_starpu_mpi_submit_ready_request(_starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs));
+	}
+	else
+	{
+		/* And submit them */
+		if (STARPU_TEST_AND_SET(&coop_sends->redirects_sent, 1) == 0)
+			_starpu_mpi_submit_coop_sends(coop_sends, 1, 1);
+		else
+			_starpu_mpi_submit_coop_sends(coop_sends, 0, 1);
+	}
+	_STARPU_MPI_LOG_OUT();
+}
+
+/* This is called when we want to stop including new members in a cooperative send,
+ * either because we know there won't be any other members due to the algorithm
+ * or because the value has changed.  */
+static void _starpu_mpi_coop_send_flush(struct _starpu_mpi_coop_sends *coop_sends)
+{
+	if (!coop_sends)
+		return;
+
+	/* Build diffusion tree */
+	_starpu_mpi_coop_sends_optimize(coop_sends);
+
+	if (coop_sends->n == 1)
+		/* Trivial case, we will just send the data */
+		return;
+
+	/* And submit them */
+	if (STARPU_TEST_AND_SET(&coop_sends->redirects_sent, 1) == 0)
+		_starpu_mpi_submit_coop_sends(coop_sends, 1, 0);
+}
+
+/* This is called when a write to the data was just submitted, which means we
+ * can't make future sends cooperate with past sends since it's not the same value
+ */
+void _starpu_mpi_data_flush(starpu_data_handle_t data_handle)
+{
+	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
+	struct _starpu_mpi_coop_sends *coop_sends;
+	if (!mpi_data)
+		return;
+
+	_starpu_spin_lock(&mpi_data->coop_lock);
+	coop_sends = mpi_data->coop_sends;
+	if (coop_sends)
+		mpi_data->coop_sends = NULL;
+	_starpu_spin_unlock(&mpi_data->coop_lock);
+	if (coop_sends)
+	{
+		_STARPU_MPI_DEBUG(0, "%p: data written to, flush cooperative sends %p\n", data_handle, coop_sends);
+		_starpu_mpi_coop_send_flush(coop_sends);
+	}
+}
+
+/* Test whether a request is compatible with a cooperative send */
+static int _starpu_mpi_coop_send_compatible(struct _starpu_mpi_req *req, struct _starpu_mpi_coop_sends *coop_sends)
+{
+	struct _starpu_mpi_req *prevreq;
+
+	prevreq = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs);
+	return /* we can cope with tag being different */
+	          prevreq->node_tag.comm == req->node_tag.comm
+	       && prevreq->sequential_consistency == req->sequential_consistency;
+}
+
+void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
+{
+	struct _starpu_mpi_data *mpi_data = _starpu_mpi_data_get(data_handle);
+	struct _starpu_mpi_coop_sends *coop_sends = NULL, *tofree = NULL;
+	int done = 0, queue, first = 1;
+
+	/* Try to add ourself to something existing, otherwise create one.  */
+	while (!done)
+	{
+		_starpu_spin_lock(&mpi_data->coop_lock);
+		if (mpi_data->coop_sends)
+		{
+			/* Already something, check we are coherent with it */
+			queue = _starpu_mpi_coop_send_compatible(req, mpi_data->coop_sends);
+			if (queue)
+			{
+				/* Yes, queue ourself there */
+				if (coop_sends)
+				{
+					/* Remove ourself from what we created for ourself first */
+					_starpu_mpi_req_multilist_erase_coop_sends(&coop_sends->reqs, req);
+					tofree = coop_sends;
+				}
+				coop_sends = mpi_data->coop_sends;
+				_STARPU_MPI_DEBUG(0, "%p: add to cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.rank);
+				_starpu_mpi_req_multilist_push_back_coop_sends(&coop_sends->reqs, req);
+				coop_sends->n++;
+				req->coop_sends_head = coop_sends;
+				first = 0;
+				done = 1;
+			}
+			else
+			{
+				/* Nope, incompatible, put ours instead */
+				_STARPU_MPI_DEBUG(0, "%p: new cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.rank);
+				mpi_data->coop_sends = coop_sends;
+				first = 1;
+				_starpu_spin_unlock(&mpi_data->coop_lock);
+				/* and flush it */
+				_starpu_mpi_coop_send_flush(coop_sends);
+				break;
+			}
+		}
+		else if (coop_sends)
+		{
+			/* Nobody else and we have allocated one, we're first! */
+			_STARPU_MPI_DEBUG(0, "%p: new cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.rank);
+			mpi_data->coop_sends = coop_sends;
+			first = 1;
+			done = 1;
+		}
+		_starpu_spin_unlock(&mpi_data->coop_lock);
+
+		if (!done && !coop_sends)
+		{
+			/* Didn't find something to join, create one out of critical section */
+			_STARPU_MPI_CALLOC(coop_sends, 1, sizeof(*coop_sends));
+			coop_sends->redirects_sent = 0;
+			coop_sends->n = 1;
+			_starpu_mpi_req_multilist_head_init_coop_sends(&coop_sends->reqs);
+			_starpu_mpi_req_multilist_push_back_coop_sends(&coop_sends->reqs, req);
+			_starpu_spin_init(&coop_sends->lock);
+			req->coop_sends_head = coop_sends;
+			coop_sends->mpi_data = mpi_data;
+		}
+		/* We at worse do two iteration */
+		STARPU_ASSERT(done || coop_sends);
+	}
+
+	/* In case we created one for nothing after all */
+	free(tofree);
+
+	if (first)
+	{
+		/* We were first, we are responsible for acquiring the data for everybody */
+		starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_coop_sends_data_ready, coop_sends, sequential_consistency, &req->pre_sync_jobid, NULL);
+	}
+}
+

+ 1 - 0
mpi/src/starpu_mpi_init.c

@@ -109,6 +109,7 @@ int _starpu_mpi_initialize(int *argc, char ***argv, int initialize_mpi, MPI_Comm
 	argc_argv->argc = argc;
 	argc_argv->argc = argc;
 	argc_argv->argv = argv;
 	argc_argv->argv = argv;
 	argc_argv->comm = comm;
 	argc_argv->comm = comm;
+	_starpu_implicit_data_deps_write_hook(_starpu_mpi_data_flush);
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 	/* Call MPI_Init_thread as early as possible, to initialize simgrid
 	/* Call MPI_Init_thread as early as possible, to initialize simgrid

+ 17 - 1
mpi/src/starpu_mpi_private.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  * Copyright (C) 2010-2013,2015-2017                      CNRS
- * Copyright (C) 2010,2012,2014-2016                      Université de Bordeaux
+ * Copyright (C) 2010,2012,2014-2016,2018                 Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -23,6 +23,12 @@ int _starpu_debug_level_max=0;
 int _starpu_mpi_tag = 42;
 int _starpu_mpi_tag = 42;
 int _starpu_mpi_comm_debug;
 int _starpu_mpi_comm_debug;
 
 
+int _starpu_mpi_thread_cpuid = -1;
+int _starpu_mpi_use_prio = 1;
+int _starpu_mpi_fake_world_size = -1;
+int _starpu_mpi_fake_world_rank = -1;
+int _starpu_mpi_use_coop_sends = 1;
+
 void _starpu_mpi_set_debug_level_min(int level)
 void _starpu_mpi_set_debug_level_min(int level)
 {
 {
 	_starpu_debug_level_min = level;
 	_starpu_debug_level_min = level;
@@ -50,3 +56,13 @@ char *_starpu_mpi_get_mpi_error_code(int code)
 	MPI_Error_string(code, str, &len);
 	MPI_Error_string(code, str, &len);
 	return str;
 	return str;
 }
 }
+
+void _starpu_mpi_env_init(void)
+{
+        _starpu_mpi_comm_debug = starpu_getenv("STARPU_MPI_COMM") != NULL;
+	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
+	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
+	_starpu_mpi_thread_cpuid = starpu_get_env_number_default("STARPU_MPI_THREAD_CPUID", -1);
+	_starpu_mpi_use_prio = starpu_get_env_number_default("STARPU_MPI_PRIORITIES", 1);
+	_starpu_mpi_use_coop_sends = starpu_get_env_number_default("STARPU_MPI_COOP_SENDS", 1);
+}

+ 58 - 4
mpi/src/starpu_mpi_private.h

@@ -26,6 +26,7 @@
 #include <starpu_mpi_fxt.h>
 #include <starpu_mpi_fxt.h>
 #include <common/list.h>
 #include <common/list.h>
 #include <common/prio_list.h>
 #include <common/prio_list.h>
+#include <common/starpu_spinlock.h>
 #include <core/simgrid.h>
 #include <core/simgrid.h>
 #if defined(STARPU_USE_MPI_NMAD)
 #if defined(STARPU_USE_MPI_NMAD)
 #include <pioman.h>
 #include <pioman.h>
@@ -66,6 +67,10 @@ void _starpu_mpi_set_debug_level_max(int level);
 #endif
 #endif
 extern int _starpu_mpi_fake_world_size;
 extern int _starpu_mpi_fake_world_size;
 extern int _starpu_mpi_fake_world_rank;
 extern int _starpu_mpi_fake_world_rank;
+extern int _starpu_mpi_use_prio;
+extern int _starpu_mpi_thread_cpuid;
+extern int _starpu_mpi_use_coop_sends;
+void _starpu_mpi_env_init(void);
 
 
 #ifdef STARPU_NO_ASSERT
 #ifdef STARPU_NO_ASSERT
 #  define STARPU_MPI_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); }} while(0)
 #  define STARPU_MPI_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); }} while(0)
@@ -194,14 +199,36 @@ struct _starpu_mpi_node_tag
 	starpu_mpi_tag_t data_tag;
 	starpu_mpi_tag_t data_tag;
 };
 };
 
 
+MULTILIST_CREATE_TYPE(_starpu_mpi_req, coop_sends)
+/* One bag of cooperative sends */
+struct _starpu_mpi_coop_sends
+{
+	/* List of send requests */
+	struct _starpu_mpi_req_multilist_coop_sends reqs;
+	struct _starpu_mpi_data *mpi_data;
+
+	/* Array of send requests, after sorting out */
+	struct _starpu_spinlock lock;
+	struct _starpu_mpi_req **reqs_array;
+	unsigned n;
+	unsigned redirects_sent;
+};
+
+/* Initialized in starpu_mpi_data_register_comm */
 struct _starpu_mpi_data
 struct _starpu_mpi_data
 {
 {
 	int magic;
 	int magic;
 	struct _starpu_mpi_node_tag node_tag;
 	struct _starpu_mpi_node_tag node_tag;
 	int *cache_sent;
 	int *cache_sent;
 	int cache_received;
 	int cache_received;
+
+	/* Rendez-vous data for opportunistic cooperative sends */
+	struct _starpu_spinlock coop_lock; /* Needed to synchronize between submit thread and workers */
+	struct _starpu_mpi_coop_sends *coop_sends; /* Current cooperative send bag */
 };
 };
 
 
+struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle);
+
 struct _starpu_mpi_req;
 struct _starpu_mpi_req;
 LIST_TYPE(_starpu_mpi_req,
 LIST_TYPE(_starpu_mpi_req,
 	/* description of the data at StarPU level */
 	/* description of the data at StarPU level */
@@ -232,6 +259,8 @@ LIST_TYPE(_starpu_mpi_req,
 #elif defined(STARPU_USE_MPI_MPI)
 #elif defined(STARPU_USE_MPI_MPI)
 	MPI_Request data_request;
 	MPI_Request data_request;
 #endif
 #endif
+	struct _starpu_mpi_req_multilist_coop_sends coop_sends;
+	struct _starpu_mpi_coop_sends *coop_sends_head;
 
 
 	int *flag;
 	int *flag;
 	unsigned sync;
 	unsigned sync;
@@ -290,17 +319,41 @@ LIST_TYPE(_starpu_mpi_req,
 );
 );
 PRIO_LIST_TYPE(_starpu_mpi_req, prio)
 PRIO_LIST_TYPE(_starpu_mpi_req, prio)
 
 
-struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
+MULTILIST_CREATE_INLINES(struct _starpu_mpi_req, _starpu_mpi_req, coop_sends)
+
+/* To be called before actually queueing a request, so the communication layer knows it has something to look at */
+void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req);
+/* To be called to actually submit the request */
+void _starpu_mpi_submit_ready_request(void *arg);
+/* To be called when request is completed */
+void _starpu_mpi_release_req_data(struct _starpu_mpi_req *req);
+
+/* Build a communication tree. Called before _starpu_mpi_coop_send is ever called. coop_sends->lock is held. */
+void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends);
+/* Try to merge with send request with other send requests */
+void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency);
+
+/* Actually submit the coop_sends bag to MPI.
+ * At least one of submit_redirects or submit_data is true.
+ * _starpu_mpi_submit_coop_sends may be called either
+ * - just once with both parameters being true,
+ * - or once with submit_redirects being true (data is not available yet, but we
+ * can send the redirects), and a second time with submit_data being true. Or
+ * the converse, possibly on different threads, etc.
+ */
+void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_redirects, int submit_data);
+
+void _starpu_mpi_submit_ready_request_inc(struct _starpu_mpi_req *req);
+void _starpu_mpi_request_init(struct _starpu_mpi_req **req);
+struct _starpu_mpi_req * _starpu_mpi_request_fill(starpu_data_handle_t data_handle,
 						       int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
 						       int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
 						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
 						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
 						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
 						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
-						       enum starpu_data_access_mode mode,
 						       int sequential_consistency,
 						       int sequential_consistency,
 						       int is_internal_req,
 						       int is_internal_req,
 						       starpu_ssize_t count);
 						       starpu_ssize_t count);
 
 
-void _starpu_mpi_submit_ready_request_inc(struct _starpu_mpi_req *req);
-void _starpu_mpi_request_init(struct _starpu_mpi_req **req);
+
 void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req);
 void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req);
 void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req);
 void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req);
 void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req);
 void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req);
@@ -325,6 +378,7 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv);
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 void _starpu_mpi_wait_for_initialization();
 void _starpu_mpi_wait_for_initialization();
 #endif
 #endif
+void _starpu_mpi_data_flush(starpu_data_handle_t data_handle);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }

+ 161 - 0
mpi/src/starpu_mpi_req.c

@@ -0,0 +1,161 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012-2013,2016-2017                      Inria
+ * Copyright (C) 2009-2018                                Université de Bordeaux
+ * Copyright (C) 2017                                     Guillaume Beauchamp
+ * Copyright (C) 2010-2018                                CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_mpi_private.h>
+#if defined(STARPU_USE_MPI_MPI)
+#include <mpi/starpu_mpi_comm.h>
+#endif
+#if defined(STARPU_USE_MPI_NMAD)
+#include <pioman.h>
+#include <nm_mpi_nmad.h>
+#endif
+
+void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
+{
+	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
+
+	/* Initialize the request structure */
+	(*req)->data_handle = NULL;
+	(*req)->prio = 0;
+
+	(*req)->datatype = 0;
+	(*req)->datatype_name = NULL;
+	(*req)->ptr = NULL;
+	(*req)->count = -1;
+	(*req)->registered_datatype = -1;
+
+	(*req)->node_tag.rank = -1;
+	(*req)->node_tag.data_tag = -1;
+	(*req)->node_tag.comm = 0;
+
+	(*req)->func = NULL;
+
+	(*req)->status = NULL;
+#ifdef STARPU_USE_MPI_MPI
+	(*req)->data_request = 0;
+#endif
+	(*req)->flag = NULL;
+	_starpu_mpi_req_multilist_init_coop_sends(*req);
+
+	(*req)->ret = -1;
+#ifdef STARPU_USE_MPI_NMAD
+	piom_cond_init(&((*req)->req_cond), 0);
+#elif defined(STARPU_USE_MPI_MPI)
+	STARPU_PTHREAD_MUTEX_INIT(&((*req)->req_mutex), NULL);
+	STARPU_PTHREAD_COND_INIT(&((*req)->req_cond), NULL);
+	STARPU_PTHREAD_MUTEX_INIT(&((*req)->posted_mutex), NULL);
+	STARPU_PTHREAD_COND_INIT(&((*req)->posted_cond), NULL);
+#endif
+
+	(*req)->request_type = UNKNOWN_REQ;
+
+	(*req)->submitted = 0;
+	(*req)->completed = 0;
+	(*req)->posted = 0;
+
+#ifdef STARPU_USE_MPI_MPI
+	(*req)->other_request = NULL;
+#endif
+
+	(*req)->sync = 0;
+	(*req)->detached = -1;
+	(*req)->callback = NULL;
+	(*req)->callback_arg = NULL;
+
+#ifdef STARPU_USE_MPI_MPI
+	(*req)->size_req = 0;
+	(*req)->internal_req = NULL;
+	(*req)->is_internal_req = 0;
+	(*req)->to_destroy = 1;
+	(*req)->early_data_handle = NULL;
+	(*req)->envelope = NULL;
+#endif
+	(*req)->sequential_consistency = 1;
+	(*req)->pre_sync_jobid = -1;
+	(*req)->post_sync_jobid = -1;
+
+#ifdef STARPU_SIMGRID
+	starpu_pthread_queue_init(&((*req)->queue));
+	starpu_pthread_queue_register(&wait, &((*req)->queue));
+	(*req)->done = 0;
+#endif
+}
+
+struct _starpu_mpi_req *_starpu_mpi_request_fill(starpu_data_handle_t data_handle,
+						 int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
+						 unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
+						 enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
+						 int sequential_consistency,
+						 int is_internal_req,
+						 starpu_ssize_t count)
+{
+	struct _starpu_mpi_req *req;
+
+#ifdef STARPU_USE_MPI_MPI
+	_starpu_mpi_comm_register(comm);
+#endif
+
+	/* Initialize the request structure */
+	_starpu_mpi_request_init(&req);
+	req->request_type = request_type;
+	/* prio_list is sorted by increasing values */
+	if (_starpu_mpi_use_prio)
+		req->prio = prio;
+	req->data_handle = data_handle;
+	req->node_tag.rank = srcdst;
+	req->node_tag.data_tag = data_tag;
+	req->node_tag.comm = comm;
+	req->detached = detached;
+	req->sync = sync;
+	req->callback = callback;
+	req->callback_arg = arg;
+	req->func = func;
+	req->sequential_consistency = sequential_consistency;
+#ifdef STARPU_USE_MPI_NMAD
+	nm_mpi_nmad_dest(&req->session, &req->gate, comm, req->node_tag.rank);
+#elif defined(STARPU_USE_MPI_MPI)
+	req->is_internal_req = is_internal_req;
+	/* For internal requests, we wait for both the request completion and the matching application request completion */
+	req->to_destroy = !is_internal_req;
+	req->count = count;
+#endif
+
+	return req;
+}
+
+void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
+{
+#ifdef STARPU_USE_MPI_NMAD
+	piom_cond_destroy(&(req->req_cond));
+#elif defined(STARPU_USE_MPI_MPI)
+	STARPU_PTHREAD_MUTEX_DESTROY(&req->req_mutex);
+	STARPU_PTHREAD_COND_DESTROY(&req->req_cond);
+	STARPU_PTHREAD_MUTEX_DESTROY(&req->posted_mutex);
+	STARPU_PTHREAD_COND_DESTROY(&req->posted_cond);
+	free(req->datatype_name);
+	req->datatype_name = NULL;
+#endif
+#ifdef STARPU_SIMGRID
+	starpu_pthread_queue_unregister(&wait, &req->queue);
+	starpu_pthread_queue_destroy(&req->queue);
+#endif
+	free(req);
+}
+

+ 1 - 0
mpi/tests/Makefile.am

@@ -194,6 +194,7 @@ noinst_PROGRAMS =				\
 	block_interface				\
 	block_interface				\
 	block_interface_pinned			\
 	block_interface_pinned			\
 	attr					\
 	attr					\
+	broadcast				\
 	cache					\
 	cache					\
 	cache_disable				\
 	cache_disable				\
 	callback				\
 	callback				\

+ 26 - 6
mpi/tests/broadcast.c

@@ -18,11 +18,13 @@
 #include <starpu_mpi.h>
 #include <starpu_mpi.h>
 #include "helper.h"
 #include "helper.h"
 
 
-void wait_CPU(void *descr[], void *_args)
+void wait_CPU(void *descr[], void *args)
 {
 {
-	(void)_args;
 	int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]);
 	int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]);
-	*var = 42;
+	int val;
+
+	starpu_codelet_unpack_args(args, &val);
+	*var = val;
 	starpu_sleep(1);
 	starpu_sleep(1);
 }
 }
 
 
@@ -57,9 +59,20 @@ int main(int argc, char **argv)
 
 
 	if (rank == 0)
 	if (rank == 0)
 	{
 	{
-		starpu_task_insert(&cl, STARPU_W, handle, 0);
+		int val, n;
+
+		val = 42;
+		starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0);
+
+		for(n = 1 ; n < size ; n++)
+		{
+			FPRINTF_MPI(stderr, "sending data to %d\n", n);
+			starpu_mpi_isend_detached(handle, n, 0, MPI_COMM_WORLD, NULL, NULL);
+		}
+
+		val = 43;
+		starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0);
 
 
-		int n;
 		for(n = 1 ; n < size ; n++)
 		for(n = 1 ; n < size ; n++)
 		{
 		{
 			FPRINTF_MPI(stderr, "sending data to %d\n", n);
 			FPRINTF_MPI(stderr, "sending data to %d\n", n);
@@ -69,11 +82,18 @@ int main(int argc, char **argv)
 	else
 	else
 	{
 	{
 		starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status);
 		starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status);
+		starpu_data_acquire(handle, STARPU_R);
+		STARPU_ASSERT(var == 42);
+		starpu_data_release(handle);
+
+		starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status);
+		starpu_data_acquire(handle, STARPU_R);
+		STARPU_ASSERT(var == 43);
+		starpu_data_release(handle);
 		FPRINTF_MPI(stderr, "received data\n");
 		FPRINTF_MPI(stderr, "received data\n");
 	}
 	}
 
 
 	starpu_data_unregister(handle);
 	starpu_data_unregister(handle);
-	STARPU_ASSERT(var == 42);
 
 
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();

+ 1 - 2
mpi/tests/user_defined_datatype_value.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2013-2015,2017                           CNRS
+ * Copyright (C) 2013-2015,2017,2018                      CNRS
  * Copyright (C) 2014                                     Université de Bordeaux
  * Copyright (C) 2014                                     Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -129,7 +129,6 @@ static int copy_any_to_any(void *src_interface, unsigned src_node,
 {
 {
 	struct starpu_value_interface *src_value = src_interface;
 	struct starpu_value_interface *src_value = src_interface;
 	struct starpu_value_interface *dst_value = dst_interface;
 	struct starpu_value_interface *dst_value = dst_interface;
-	int ret = 0;
 
 
 	return starpu_interface_copy((uintptr_t) src_value->value, 0, src_node,
 	return starpu_interface_copy((uintptr_t) src_value->value, 0, src_node,
 				     (uintptr_t) dst_value->value, 0, dst_node,
 				     (uintptr_t) dst_value->value, 0, dst_node,

+ 3 - 2
socl/src/cl_buildprogram.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2016-2017                           CNRS
  * Copyright (C) 2012,2016-2017                           CNRS
- * Copyright (C) 2010-2012                                Université de Bordeaux
+ * Copyright (C) 2010-2012, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -76,13 +76,14 @@ static void soclBuildProgram_task(void *data)
 	DEBUG_MSG("[Worker %d] Done building.\n", wid);
 	DEBUG_MSG("[Worker %d] Done building.\n", wid);
 }
 }
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclBuildProgram(cl_program         program,
 soclBuildProgram(cl_program         program,
 		 cl_uint              num_devices,
 		 cl_uint              num_devices,
 		 const cl_device_id * device_list,
 		 const cl_device_id * device_list,
 		 const char *         options, 
 		 const char *         options, 
 		 void (*pfn_notify)(cl_program program, void * user_data),
 		 void (*pfn_notify)(cl_program program, void * user_data),
-		 void *               user_data) CL_API_SUFFIX__VERSION_1_0
+		 void *               user_data)
 {
 {
 	struct bp_data *data;
 	struct bp_data *data;
 
 

+ 3 - 2
socl/src/cl_createbuffer.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2013                                Inria
  * Copyright (C) 2011-2013                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -49,12 +49,13 @@ static void release_callback_memobject(void * e)
  * should avoid it.
  * should avoid it.
  *
  *
  */
  */
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_mem CL_API_CALL
 CL_API_ENTRY cl_mem CL_API_CALL
 soclCreateBuffer(cl_context   context,
 soclCreateBuffer(cl_context   context,
 		 cl_mem_flags flags,
 		 cl_mem_flags flags,
 		 size_t       size,
 		 size_t       size,
 		 void *       host_ptr,
 		 void *       host_ptr,
-		 cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		 cl_int *     errcode_ret)
 {
 {
 	cl_mem mem;
 	cl_mem mem;
 
 

+ 3 - 2
socl/src/cl_createcommandqueue.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2014,2017                           CNRS
  * Copyright (C) 2012,2014,2017                           CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -37,11 +37,12 @@ static void release_callback_command_queue(void * e)
 	STARPU_PTHREAD_MUTEX_DESTROY(&cq->mutex);
 	STARPU_PTHREAD_MUTEX_DESTROY(&cq->mutex);
 }
 }
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_command_queue CL_API_CALL
 CL_API_ENTRY cl_command_queue CL_API_CALL
 soclCreateCommandQueue(cl_context                   context,
 soclCreateCommandQueue(cl_context                   context,
 		       cl_device_id                   device,
 		       cl_device_id                   device,
 		       cl_command_queue_properties    properties,
 		       cl_command_queue_properties    properties,
-		       cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		       cl_int *                       errcode_ret)
 {
 {
 	cl_command_queue cq;
 	cl_command_queue cq;
 
 

+ 3 - 2
socl/src/cl_createcontext.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012-2013,2017                           CNRS
  * Copyright (C) 2012-2013,2017                           CNRS
- * Copyright (C) 2010-2013                                Université de Bordeaux
+ * Copyright (C) 2010-2013, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -35,13 +35,14 @@ static void release_callback_context(void * e)
 static char * defaultScheduler = "dmda";
 static char * defaultScheduler = "dmda";
 static char * defaultName = "default";
 static char * defaultName = "default";
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_context CL_API_CALL
 CL_API_ENTRY cl_context CL_API_CALL
 soclCreateContext(const cl_context_properties * properties,
 soclCreateContext(const cl_context_properties * properties,
 		  cl_uint                       num_devices,
 		  cl_uint                       num_devices,
 		  const cl_device_id *          devices,
 		  const cl_device_id *          devices,
 		  void (*pfn_notify)(const char *, const void *, size_t, void *),
 		  void (*pfn_notify)(const char *, const void *, size_t, void *),
 		  void *                        user_data,
 		  void *                        user_data,
-		  cl_int *                      errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		  cl_int *                      errcode_ret)
 {
 {
 	if (pfn_notify == NULL && user_data != NULL)
 	if (pfn_notify == NULL && user_data != NULL)
 	{
 	{

+ 3 - 2
socl/src/cl_createcontextfromtype.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2012,2016                           Université de Bordeaux
+ * Copyright (C) 2010-2012,2016, 2018                           Université de Bordeaux
  * Copyright (C) 2012                                     Vincent Danjean
  * Copyright (C) 2012                                     Vincent Danjean
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -20,12 +20,13 @@
 #include "socl.h"
 #include "socl.h"
 #include "init.h"
 #include "init.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_context CL_API_CALL
 CL_API_ENTRY cl_context CL_API_CALL
 soclCreateContextFromType(const cl_context_properties * properties,
 soclCreateContextFromType(const cl_context_properties * properties,
 			  cl_device_type                device_type,
 			  cl_device_type                device_type,
 			  void (*pfn_notify)(const char *, const void *, size_t, void *),
 			  void (*pfn_notify)(const char *, const void *, size_t, void *),
 			  void *                        user_data,
 			  void *                        user_data,
-			  cl_int *                      errcode_ret) CL_API_SUFFIX__VERSION_1_0
+			  cl_int *                      errcode_ret)
 {
 {
 	if (socl_init_starpu() < 0)
 	if (socl_init_starpu() < 0)
 		return NULL;
 		return NULL;

+ 3 - 2
socl/src/cl_createimage2d.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_mem CL_API_CALL
 CL_API_ENTRY cl_mem CL_API_CALL
 soclCreateImage2D(cl_context              UNUSED(context),
 soclCreateImage2D(cl_context              UNUSED(context),
 		  cl_mem_flags            UNUSED(flags),
 		  cl_mem_flags            UNUSED(flags),
@@ -26,7 +27,7 @@ soclCreateImage2D(cl_context              UNUSED(context),
 		  size_t                  UNUSED(image_height),
 		  size_t                  UNUSED(image_height),
 		  size_t                  UNUSED(image_row_pitch),
 		  size_t                  UNUSED(image_row_pitch),
 		  void *                  UNUSED(host_ptr),
 		  void *                  UNUSED(host_ptr),
-		  cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		  cl_int *                errcode_ret)
 {
 {
 	if (errcode_ret != NULL)
 	if (errcode_ret != NULL)
 		*errcode_ret = CL_INVALID_OPERATION;
 		*errcode_ret = CL_INVALID_OPERATION;

+ 3 - 2
socl/src/cl_createimage3d.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_mem CL_API_CALL
 CL_API_ENTRY cl_mem CL_API_CALL
 soclCreateImage3D(cl_context              UNUSED(context),
 soclCreateImage3D(cl_context              UNUSED(context),
 		  cl_mem_flags            UNUSED(flags),
 		  cl_mem_flags            UNUSED(flags),
@@ -28,7 +29,7 @@ soclCreateImage3D(cl_context              UNUSED(context),
 		  size_t                  UNUSED(image_row_pitch),
 		  size_t                  UNUSED(image_row_pitch),
 		  size_t                  UNUSED(image_slice_pitch),
 		  size_t                  UNUSED(image_slice_pitch),
 		  void *                  UNUSED(host_ptr),
 		  void *                  UNUSED(host_ptr),
-		  cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		  cl_int *                errcode_ret)
 {
 {
 	if (errcode_ret != NULL)
 	if (errcode_ret != NULL)
 		*errcode_ret = CL_INVALID_OPERATION;
 		*errcode_ret = CL_INVALID_OPERATION;

+ 3 - 2
socl/src/cl_createkernel.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2014,2016-2017                      CNRS
  * Copyright (C) 2012,2014,2016-2017                      CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -117,10 +117,11 @@ static void release_callback_kernel(void * e)
 	free(kernel->errcodes);
 	free(kernel->errcodes);
 }
 }
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_kernel CL_API_CALL
 CL_API_ENTRY cl_kernel CL_API_CALL
 soclCreateKernel(cl_program    program,
 soclCreateKernel(cl_program    program,
 		 const char *    kernel_name,
 		 const char *    kernel_name,
-		 cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		 cl_int *        errcode_ret)
 {
 {
 	cl_kernel k;
 	cl_kernel k;
 
 

+ 3 - 2
socl/src/cl_createkernelsinprogram.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,11 +18,12 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclCreateKernelsInProgram(cl_program   UNUSED(program),
 soclCreateKernelsInProgram(cl_program   UNUSED(program),
 			   cl_uint        UNUSED(num_kernels),
 			   cl_uint        UNUSED(num_kernels),
 			   cl_kernel *    UNUSED(kernels),
 			   cl_kernel *    UNUSED(kernels),
-			   cl_uint *      UNUSED(num_kernels_ret)) CL_API_SUFFIX__VERSION_1_0
+			   cl_uint *      UNUSED(num_kernels_ret))
 {
 {
 	//TODO
 	//TODO
 	return CL_INVALID_OPERATION;
 	return CL_INVALID_OPERATION;

+ 3 - 2
socl/src/cl_createprogramwithbinary.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_program CL_API_CALL
 CL_API_ENTRY cl_program CL_API_CALL
 soclCreateProgramWithBinary(cl_context                     UNUSED(context),
 soclCreateProgramWithBinary(cl_context                     UNUSED(context),
 			    cl_uint                        UNUSED(num_devices),
 			    cl_uint                        UNUSED(num_devices),
@@ -25,7 +26,7 @@ soclCreateProgramWithBinary(cl_context                     UNUSED(context),
 			    const size_t *                 UNUSED(lengths),
 			    const size_t *                 UNUSED(lengths),
 			    const unsigned char **         UNUSED(binaries),
 			    const unsigned char **         UNUSED(binaries),
 			    cl_int *                       UNUSED(binary_status),
 			    cl_int *                       UNUSED(binary_status),
-			    cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_1_0
+			    cl_int *                       errcode_ret)
 {
 {
 	//TODO
 	//TODO
 	if (errcode_ret != NULL)
 	if (errcode_ret != NULL)

+ 3 - 2
socl/src/cl_createprogramwithsource.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2016-2017                           CNRS
  * Copyright (C) 2012,2016-2017                           CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -66,12 +66,13 @@ static void release_callback_program(void * e)
 		free(program->options);
 		free(program->options);
 }
 }
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_program CL_API_CALL
 CL_API_ENTRY cl_program CL_API_CALL
 soclCreateProgramWithSource(cl_context      context,
 soclCreateProgramWithSource(cl_context      context,
 			    cl_uint           count,
 			    cl_uint           count,
 			    const char **     strings,
 			    const char **     strings,
 			    const size_t *    lengths,
 			    const size_t *    lengths,
-			    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0
+			    cl_int *          errcode_ret)
 {
 {
 	cl_program p;
 	cl_program p;
 	struct cpws_data *data;
 	struct cpws_data *data;

+ 3 - 2
socl/src/cl_createsampler.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,12 +18,13 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_sampler CL_API_CALL
 CL_API_ENTRY cl_sampler CL_API_CALL
 soclCreateSampler(cl_context          UNUSED(context),
 soclCreateSampler(cl_context          UNUSED(context),
 		  cl_bool             UNUSED(normalized_coords), 
 		  cl_bool             UNUSED(normalized_coords), 
 		  cl_addressing_mode  UNUSED(addressing_mode), 
 		  cl_addressing_mode  UNUSED(addressing_mode), 
 		  cl_filter_mode      UNUSED(filter_mode),
 		  cl_filter_mode      UNUSED(filter_mode),
-		  cl_int *            errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		  cl_int *            errcode_ret)
 {
 {
 	if (errcode_ret != NULL)
 	if (errcode_ret != NULL)
 		*errcode_ret = CL_INVALID_OPERATION;
 		*errcode_ret = CL_INVALID_OPERATION;

+ 3 - 2
socl/src/cl_enqueuebarrier.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,8 +18,9 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
-soclEnqueueBarrier(cl_command_queue cq) CL_API_SUFFIX__VERSION_1_0
+soclEnqueueBarrier(cl_command_queue cq)
 {
 {
 	command_barrier cmd = command_barrier_create();
 	command_barrier cmd = command_barrier_create();
 
 

+ 3 - 2
socl/src/cl_enqueuebarrierwithwaitlist.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2017                                     CNRS
  * Copyright (C) 2017                                     CNRS
- * Copyright (C) 2010,2013                                Université de Bordeaux
+ * Copyright (C) 2010,2013, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,11 +17,12 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_2
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueBarrierWithWaitList(cl_command_queue  cq,
 soclEnqueueBarrierWithWaitList(cl_command_queue  cq,
 			       cl_uint num_events,
 			       cl_uint num_events,
 			       const cl_event * events,
 			       const cl_event * events,
-			       cl_event *          event) CL_API_SUFFIX__VERSION_1_2
+			       cl_event *          event)
 {
 {
 	command_barrier cmd = command_barrier_create();
 	command_barrier cmd = command_barrier_create();
 
 

+ 3 - 2
socl/src/cl_enqueuecopybuffer.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2016-2017                           CNRS
  * Copyright (C) 2012,2016-2017                           CNRS
- * Copyright (C) 2010-2011,2013-2014,2017                 Université de Bordeaux
+ * Copyright (C) 2010-2011,2013-2014,2017-2018                 Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -99,6 +99,7 @@ cl_int command_copy_buffer_submit(command_copy_buffer cmd)
 	return CL_SUCCESS;
 	return CL_SUCCESS;
 }
 }
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueCopyBuffer(cl_command_queue  cq,
 soclEnqueueCopyBuffer(cl_command_queue  cq,
 		      cl_mem              src_buffer,
 		      cl_mem              src_buffer,
@@ -108,7 +109,7 @@ soclEnqueueCopyBuffer(cl_command_queue  cq,
 		      size_t              cb,
 		      size_t              cb,
 		      cl_uint             num_events,
 		      cl_uint             num_events,
 		      const cl_event *    events,
 		      const cl_event *    events,
-		      cl_event *          event) CL_API_SUFFIX__VERSION_1_0
+		      cl_event *          event)
 {
 {
 	command_copy_buffer cmd = command_copy_buffer_create(src_buffer, dst_buffer, src_offset, dst_offset, cb);
 	command_copy_buffer cmd = command_copy_buffer_create(src_buffer, dst_buffer, src_offset, dst_offset, cb);
 
 

+ 3 - 2
socl/src/cl_enqueuecopybuffertoimage.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueCopyBufferToImage(cl_command_queue UNUSED(command_queue),
 soclEnqueueCopyBufferToImage(cl_command_queue UNUSED(command_queue),
 			     cl_mem           UNUSED(src_buffer),
 			     cl_mem           UNUSED(src_buffer),
@@ -27,7 +28,7 @@ soclEnqueueCopyBufferToImage(cl_command_queue UNUSED(command_queue),
 			     const size_t *   UNUSED(region),
 			     const size_t *   UNUSED(region),
 			     cl_uint          UNUSED(num_events_in_wait_list),
 			     cl_uint          UNUSED(num_events_in_wait_list),
 			     const cl_event * UNUSED(event_wait_list),
 			     const cl_event * UNUSED(event_wait_list),
-			     cl_event *       UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
+			     cl_event *       UNUSED(event))
 {
 {
 	return CL_INVALID_OPERATION;
 	return CL_INVALID_OPERATION;
 }
 }

+ 3 - 2
socl/src/cl_enqueuecopyimage.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueCopyImage(cl_command_queue   UNUSED(command_queue),
 soclEnqueueCopyImage(cl_command_queue   UNUSED(command_queue),
 		     cl_mem               UNUSED(src_image),
 		     cl_mem               UNUSED(src_image),
@@ -27,7 +28,7 @@ soclEnqueueCopyImage(cl_command_queue   UNUSED(command_queue),
 		     const size_t *       UNUSED(region),
 		     const size_t *       UNUSED(region),
 		     cl_uint              UNUSED(num_events_in_wait_list),
 		     cl_uint              UNUSED(num_events_in_wait_list),
 		     const cl_event *     UNUSED(event_wait_list),
 		     const cl_event *     UNUSED(event_wait_list),
-		     cl_event *           UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
+		     cl_event *           UNUSED(event))
 {
 {
 	return CL_INVALID_OPERATION;
 	return CL_INVALID_OPERATION;
 }
 }

+ 3 - 2
socl/src/cl_enqueuecopyimagetobuffer.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueCopyImageToBuffer(cl_command_queue UNUSED(command_queue),
 soclEnqueueCopyImageToBuffer(cl_command_queue UNUSED(command_queue),
 			     cl_mem           UNUSED(src_image),
 			     cl_mem           UNUSED(src_image),
@@ -27,7 +28,7 @@ soclEnqueueCopyImageToBuffer(cl_command_queue UNUSED(command_queue),
 			     size_t           UNUSED(dst_offset),
 			     size_t           UNUSED(dst_offset),
 			     cl_uint          UNUSED(num_events_in_wait_list),
 			     cl_uint          UNUSED(num_events_in_wait_list),
 			     const cl_event * UNUSED(event_wait_list),
 			     const cl_event * UNUSED(event_wait_list),
-			     cl_event *       UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
+			     cl_event *       UNUSED(event))
 {
 {
 	return CL_INVALID_OPERATION;
 	return CL_INVALID_OPERATION;
 }
 }

+ 3 - 2
socl/src/cl_enqueuemapbuffer.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012-2013,2017                           CNRS
  * Copyright (C) 2012-2013,2017                           CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -45,6 +45,7 @@ cl_int command_map_buffer_submit(command_map_buffer cmd)
 	return CL_SUCCESS;
 	return CL_SUCCESS;
 }
 }
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY void * CL_API_CALL
 CL_API_ENTRY void * CL_API_CALL
 soclEnqueueMapBuffer(cl_command_queue cq,
 soclEnqueueMapBuffer(cl_command_queue cq,
 		     cl_mem           buffer,
 		     cl_mem           buffer,
@@ -55,7 +56,7 @@ soclEnqueueMapBuffer(cl_command_queue cq,
 		     cl_uint          num_events,
 		     cl_uint          num_events,
 		     const cl_event * events,
 		     const cl_event * events,
 		     cl_event *       event,
 		     cl_event *       event,
-		     cl_int *         errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		     cl_int *         errcode_ret)
 {
 {
 	command_map_buffer cmd = command_map_buffer_create(buffer, map_flags, offset, cb);
 	command_map_buffer cmd = command_map_buffer_create(buffer, map_flags, offset, cb);
 
 

+ 3 - 2
socl/src/cl_enqueuemapimage.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY void * CL_API_CALL
 CL_API_ENTRY void * CL_API_CALL
 soclEnqueueMapImage(cl_command_queue  UNUSED(command_queue),
 soclEnqueueMapImage(cl_command_queue  UNUSED(command_queue),
 		    cl_mem            UNUSED(image),
 		    cl_mem            UNUSED(image),
@@ -30,7 +31,7 @@ soclEnqueueMapImage(cl_command_queue  UNUSED(command_queue),
 		    cl_uint           UNUSED(num_events_in_wait_list),
 		    cl_uint           UNUSED(num_events_in_wait_list),
 		    const cl_event *  UNUSED(event_wait_list),
 		    const cl_event *  UNUSED(event_wait_list),
 		    cl_event *        UNUSED(event),
 		    cl_event *        UNUSED(event),
-		    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		    cl_int *          errcode_ret)
 {
 {
 	if (errcode_ret != NULL)
 	if (errcode_ret != NULL)
 		*errcode_ret = CL_INVALID_OPERATION;
 		*errcode_ret = CL_INVALID_OPERATION;

+ 3 - 2
socl/src/cl_enqueuemarker.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,9 +18,10 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueMarker(cl_command_queue  cq,
 soclEnqueueMarker(cl_command_queue  cq,
-                cl_event *          event) CL_API_SUFFIX__VERSION_1_0
+                cl_event *          event)
 {
 {
 	if (event == NULL)
 	if (event == NULL)
 		return CL_INVALID_VALUE;
 		return CL_INVALID_VALUE;

+ 3 - 2
socl/src/cl_enqueuemarkerwithwaitlist.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2017                                     CNRS
  * Copyright (C) 2017                                     CNRS
- * Copyright (C) 2010,2013                                Université de Bordeaux
+ * Copyright (C) 2010,2013, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,11 +17,12 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_2
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueMarkerWithWaitList(cl_command_queue  cq,
 soclEnqueueMarkerWithWaitList(cl_command_queue  cq,
 			      cl_uint num_events,
 			      cl_uint num_events,
 			      const cl_event * events,
 			      const cl_event * events,
-			      cl_event *          event) CL_API_SUFFIX__VERSION_1_2
+			      cl_event *          event)
 {
 {
 	if (events == NULL)
 	if (events == NULL)
 		return soclEnqueueBarrierWithWaitList(cq, num_events, events, event);
 		return soclEnqueueBarrierWithWaitList(cq, num_events, events, event);

+ 3 - 2
socl/src/cl_enqueuenativekernel.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueNativeKernel(cl_command_queue  UNUSED(command_queue),
 soclEnqueueNativeKernel(cl_command_queue  UNUSED(command_queue),
 			__attribute__((unused)) void (*user_func)(void *),
 			__attribute__((unused)) void (*user_func)(void *),
@@ -28,7 +29,7 @@ soclEnqueueNativeKernel(cl_command_queue  UNUSED(command_queue),
 			const void **     UNUSED(args_mem_loc),
 			const void **     UNUSED(args_mem_loc),
 			cl_uint           UNUSED(num_events_in_wait_list),
 			cl_uint           UNUSED(num_events_in_wait_list),
 			const cl_event *  UNUSED(event_wait_list),
 			const cl_event *  UNUSED(event_wait_list),
-			cl_event *        UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
+			cl_event *        UNUSED(event))
 {
 {
 	return CL_INVALID_OPERATION;
 	return CL_INVALID_OPERATION;
 }
 }

+ 3 - 2
socl/src/cl_enqueuendrangekernel.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2014,2016-2017                      CNRS
  * Copyright (C) 2012,2014,2016-2017                      CNRS
- * Copyright (C) 2010-2011,2013,2016-2017                 Université de Bordeaux
+ * Copyright (C) 2010-2011,2013,2016-2018                 Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -159,6 +159,7 @@ cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd)
 	return CL_SUCCESS;
 	return CL_SUCCESS;
 }
 }
 
 
+CL_API_SUFFIX__VERSION_1_1
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueNDRangeKernel(cl_command_queue cq,
 soclEnqueueNDRangeKernel(cl_command_queue cq,
 			 cl_kernel        kernel,
 			 cl_kernel        kernel,
@@ -168,7 +169,7 @@ soclEnqueueNDRangeKernel(cl_command_queue cq,
 			 const size_t *   local_work_size,
 			 const size_t *   local_work_size,
 			 cl_uint          num_events,
 			 cl_uint          num_events,
 			 const cl_event * events,
 			 const cl_event * events,
-			 cl_event *       event) CL_API_SUFFIX__VERSION_1_1
+			 cl_event *       event)
 {
 {
 	if (kernel->split_func != NULL && !STARPU_PTHREAD_MUTEX_TRYLOCK(&kernel->split_lock))
 	if (kernel->split_func != NULL && !STARPU_PTHREAD_MUTEX_TRYLOCK(&kernel->split_lock))
 	{
 	{

+ 3 - 2
socl/src/cl_enqueuereadbuffer.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2016-2017                           CNRS
  * Copyright (C) 2012,2016-2017                           CNRS
- * Copyright (C) 2010-2011,2013-2014                      Université de Bordeaux
+ * Copyright (C) 2010-2011,2013-2014, 2018                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -102,6 +102,7 @@ cl_int command_read_buffer_submit(command_read_buffer cmd)
 	return CL_SUCCESS;
 	return CL_SUCCESS;
 }
 }
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueReadBuffer(cl_command_queue  cq,
 soclEnqueueReadBuffer(cl_command_queue  cq,
 		      cl_mem              buffer,
 		      cl_mem              buffer,
@@ -111,7 +112,7 @@ soclEnqueueReadBuffer(cl_command_queue  cq,
 		      void *              ptr,
 		      void *              ptr,
 		      cl_uint             num_events,
 		      cl_uint             num_events,
 		      const cl_event *    events,
 		      const cl_event *    events,
-		      cl_event *          event) CL_API_SUFFIX__VERSION_1_0
+		      cl_event *          event)
 {
 {
 	command_read_buffer cmd = command_read_buffer_create(buffer, offset, cb, ptr);
 	command_read_buffer cmd = command_read_buffer_create(buffer, offset, cb, ptr);
 
 

+ 3 - 2
socl/src/cl_enqueuereadimage.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueReadImage(cl_command_queue   UNUSED(command_queue),
 soclEnqueueReadImage(cl_command_queue   UNUSED(command_queue),
 		     cl_mem               UNUSED(image),
 		     cl_mem               UNUSED(image),
@@ -29,7 +30,7 @@ soclEnqueueReadImage(cl_command_queue   UNUSED(command_queue),
 		     void *               UNUSED(ptr),
 		     void *               UNUSED(ptr),
 		     cl_uint              UNUSED(num_events_in_wait_list),
 		     cl_uint              UNUSED(num_events_in_wait_list),
 		     const cl_event *     UNUSED(event_wait_list),
 		     const cl_event *     UNUSED(event_wait_list),
-		     cl_event *           UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
+		     cl_event *           UNUSED(event))
 {
 {
 	return CL_INVALID_OPERATION;
 	return CL_INVALID_OPERATION;
 }
 }

+ 3 - 2
socl/src/cl_enqueuetask.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,12 +18,13 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueTask(cl_command_queue cq,
 soclEnqueueTask(cl_command_queue cq,
 		cl_kernel         kernel,
 		cl_kernel         kernel,
 		cl_uint           num_events,
 		cl_uint           num_events,
 		const cl_event *  events,
 		const cl_event *  events,
-		cl_event *        event) CL_API_SUFFIX__VERSION_1_0
+		cl_event *        event)
 {
 {
 	command_ndrange_kernel cmd = command_task_create(kernel);
 	command_ndrange_kernel cmd = command_task_create(kernel);
 
 

+ 3 - 2
socl/src/cl_enqueueunmapmemobject.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -32,13 +32,14 @@ cl_int command_unmap_mem_object_submit(command_unmap_mem_object cmd)
 	return CL_SUCCESS;
 	return CL_SUCCESS;
 }
 }
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueUnmapMemObject(cl_command_queue cq,
 soclEnqueueUnmapMemObject(cl_command_queue cq,
 			  cl_mem            buffer,
 			  cl_mem            buffer,
 			  void *            ptr,
 			  void *            ptr,
 			  cl_uint           num_events,
 			  cl_uint           num_events,
 			  const cl_event *  events,
 			  const cl_event *  events,
-			  cl_event *        event) CL_API_SUFFIX__VERSION_1_0
+			  cl_event *        event)
 {
 {
 	command_unmap_mem_object cmd = command_unmap_mem_object_create(buffer, ptr);
 	command_unmap_mem_object cmd = command_unmap_mem_object_create(buffer, ptr);
 
 

+ 3 - 2
socl/src/cl_enqueuewaitforevents.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,10 +18,11 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueWaitForEvents(cl_command_queue cq,
 soclEnqueueWaitForEvents(cl_command_queue cq,
 			 cl_uint          num_events,
 			 cl_uint          num_events,
-			 const cl_event * events) CL_API_SUFFIX__VERSION_1_0
+			 const cl_event * events)
 {
 {
 	command_marker cmd = command_marker_create();
 	command_marker cmd = command_marker_create();
 
 

+ 3 - 2
socl/src/cl_enqueuewritebuffer.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2016-2017                           CNRS
  * Copyright (C) 2012,2016-2017                           CNRS
- * Copyright (C) 2010-2011,2013-2014                      Université de Bordeaux
+ * Copyright (C) 2010-2011,2013-2014, 2018                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -127,6 +127,7 @@ cl_int command_write_buffer_submit(command_write_buffer cmd)
 	return CL_SUCCESS;
 	return CL_SUCCESS;
 }
 }
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueWriteBuffer(cl_command_queue cq,
 soclEnqueueWriteBuffer(cl_command_queue cq,
 		       cl_mem             buffer,
 		       cl_mem             buffer,
@@ -136,7 +137,7 @@ soclEnqueueWriteBuffer(cl_command_queue cq,
 		       const void *       ptr,
 		       const void *       ptr,
 		       cl_uint            num_events,
 		       cl_uint            num_events,
 		       const cl_event *   events,
 		       const cl_event *   events,
-		       cl_event *         event) CL_API_SUFFIX__VERSION_1_0
+		       cl_event *         event)
 {
 {
 	command_write_buffer cmd = command_write_buffer_create(buffer, offset, cb, ptr);
 	command_write_buffer cmd = command_write_buffer_create(buffer, offset, cb, ptr);
 
 

+ 3 - 2
socl/src/cl_enqueuewriteimage.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueWriteImage(cl_command_queue  UNUSED(command_queue),
 soclEnqueueWriteImage(cl_command_queue  UNUSED(command_queue),
 		      cl_mem              UNUSED(image),
 		      cl_mem              UNUSED(image),
@@ -29,7 +30,7 @@ soclEnqueueWriteImage(cl_command_queue  UNUSED(command_queue),
 		      const void *        UNUSED(ptr),
 		      const void *        UNUSED(ptr),
 		      cl_uint             UNUSED(num_events_in_wait_list),
 		      cl_uint             UNUSED(num_events_in_wait_list),
 		      const cl_event *    UNUSED(event_wait_list),
 		      const cl_event *    UNUSED(event_wait_list),
-		      cl_event *          UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
+		      cl_event *          UNUSED(event))
 {
 {
 	return CL_INVALID_OPERATION;
 	return CL_INVALID_OPERATION;
 }
 }

+ 3 - 2
socl/src/cl_finish.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,8 +18,9 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
-soclFinish(cl_command_queue cq) CL_API_SUFFIX__VERSION_1_0
+soclFinish(cl_command_queue cq)
 {
 {
 	command_barrier cmd = command_barrier_create();
 	command_barrier cmd = command_barrier_create();
 
 

+ 3 - 2
socl/src/cl_flush.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,8 +18,9 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
-soclFlush(cl_command_queue UNUSED(command_queue)) CL_API_SUFFIX__VERSION_1_0
+soclFlush(cl_command_queue UNUSED(command_queue))
 {
 {
 	return CL_SUCCESS;
 	return CL_SUCCESS;
 }
 }

+ 3 - 2
socl/src/cl_getcommandqueueinfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "socl.h"
 #include "getinfo.h"
 #include "getinfo.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetCommandQueueInfo(cl_command_queue    cq,
 soclGetCommandQueueInfo(cl_command_queue    cq,
 			cl_command_queue_info param_name,
 			cl_command_queue_info param_name,
 			size_t                param_value_size,
 			size_t                param_value_size,
 			void *                param_value,
 			void *                param_value,
-			size_t *              param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+			size_t *              param_value_size_ret)
 {
 {
 	if (cq == NULL)
 	if (cq == NULL)
 		return CL_INVALID_COMMAND_QUEUE;
 		return CL_INVALID_COMMAND_QUEUE;

+ 4 - 3
socl/src/cl_getcontextinfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011,2018                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "socl.h"
 #include "getinfo.h"
 #include "getinfo.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetContextInfo(cl_context       context,
 soclGetContextInfo(cl_context       context,
 		   cl_context_info    param_name,
 		   cl_context_info    param_name,
 		   size_t             param_value_size,
 		   size_t             param_value_size,
 		   void *             param_value,
 		   void *             param_value,
-		   size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		   size_t *           param_value_size_ret)
 {
 {
 	if (context == NULL)
 	if (context == NULL)
 		return CL_INVALID_CONTEXT;
 		return CL_INVALID_CONTEXT;
@@ -33,7 +34,7 @@ soclGetContextInfo(cl_context       context,
 	{
 	{
 		INFO_CASE(CL_CONTEXT_REFERENCE_COUNT, context->_entity.refs);
 		INFO_CASE(CL_CONTEXT_REFERENCE_COUNT, context->_entity.refs);
 		INFO_CASE_EX(CL_CONTEXT_DEVICES, context->devices, context->num_devices * sizeof(cl_device_id));
 		INFO_CASE_EX(CL_CONTEXT_DEVICES, context->devices, context->num_devices * sizeof(cl_device_id));
-		INFO_CASE_EX(CL_CONTEXT_PROPERTIES, context->properties, context->num_properties * sizeof(cl_device_id));
+		INFO_CASE_EX(CL_CONTEXT_PROPERTIES, context->properties, context->num_properties * sizeof(cl_context_properties));
 	default:
 	default:
 		return CL_INVALID_VALUE;
 		return CL_INVALID_VALUE;
 	}
 	}

+ 3 - 2
socl/src/cl_getdeviceids.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2012,2016                           Université de Bordeaux
+ * Copyright (C) 2010-2012,2016, 2018                           Université de Bordeaux
  * Copyright (C) 2012                                     Vincent Danjean
  * Copyright (C) 2012                                     Vincent Danjean
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -25,12 +25,13 @@
  *
  *
  * \param[in] platform Must be StarPU platform ID or NULL
  * \param[in] platform Must be StarPU platform ID or NULL
  */
  */
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetDeviceIDs(cl_platform_id   platform,
 soclGetDeviceIDs(cl_platform_id   platform,
 		 cl_device_type   device_type,
 		 cl_device_type   device_type,
 		 cl_uint          num_entries,
 		 cl_uint          num_entries,
 		 cl_device_id *   devices,
 		 cl_device_id *   devices,
-		 cl_uint *        num_devices) CL_API_SUFFIX__VERSION_1_0
+		 cl_uint *        num_devices)
 {
 {
 	if (socl_init_starpu() < 0)
 	if (socl_init_starpu() < 0)
 	{
 	{

+ 3 - 2
socl/src/cl_getdeviceinfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "socl.h"
 #include "getinfo.h"
 #include "getinfo.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetDeviceInfo(cl_device_id    device,
 soclGetDeviceInfo(cl_device_id    device,
 		  cl_device_info  param_name,
 		  cl_device_info  param_name,
 		  size_t          param_value_size,
 		  size_t          param_value_size,
 		  void *          param_value,
 		  void *          param_value,
-		  size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		  size_t *        param_value_size_ret)
 {
 {
 	//FIXME: we do not check if the device is valid
 	//FIXME: we do not check if the device is valid
 	/* if (device != &socl_virtual_device && device is not a valid StarPU worker identifier)
 	/* if (device != &socl_virtual_device && device is not a valid StarPU worker identifier)

+ 3 - 2
socl/src/cl_geteventinfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "socl.h"
 #include "getinfo.h"
 #include "getinfo.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetEventInfo(cl_event       event,
 soclGetEventInfo(cl_event       event,
 		 cl_event_info    param_name,
 		 cl_event_info    param_name,
 		 size_t           param_value_size,
 		 size_t           param_value_size,
 		 void *           param_value,
 		 void *           param_value,
-		 size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		 size_t *         param_value_size_ret)
 {
 {
 	if (event == NULL)
 	if (event == NULL)
 		return CL_INVALID_EVENT;
 		return CL_INVALID_EVENT;

+ 3 - 2
socl/src/cl_geteventprofilinginfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "socl.h"
 #include "getinfo.h"
 #include "getinfo.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetEventProfilingInfo(cl_event          event,
 soclGetEventProfilingInfo(cl_event          event,
 			  cl_profiling_info   param_name,
 			  cl_profiling_info   param_name,
 			  size_t              param_value_size,
 			  size_t              param_value_size,
 			  void *              param_value,
 			  void *              param_value,
-			  size_t *            param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+			  size_t *            param_value_size_ret)
 {
 {
 	switch (param_name)
 	switch (param_name)
 	{
 	{

+ 3 - 2
socl/src/cl_getextensionfunctionaddress.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2012                                Université de Bordeaux
+ * Copyright (C) 2010-2012, 2018                                Université de Bordeaux
  * Copyright (C) 2012                                     Vincent Danjean
  * Copyright (C) 2012                                     Vincent Danjean
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,8 +21,9 @@
 #include "socl.h"
 #include "socl.h"
 #include "init.h"
 #include "init.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY void * CL_API_CALL
 CL_API_ENTRY void * CL_API_CALL
-soclGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_0
+soclGetExtensionFunctionAddress(const char * func_name)
 {
 {
 	if (func_name != NULL && strcmp(func_name, "clShutdown") == 0)
 	if (func_name != NULL && strcmp(func_name, "clShutdown") == 0)
 	{
 	{

+ 3 - 2
socl/src/cl_getimageinfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "socl.h"
 #include "getinfo.h"
 #include "getinfo.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetImageInfo(cl_mem           UNUSED(image),
 soclGetImageInfo(cl_mem           UNUSED(image),
 		 cl_image_info    UNUSED(param_name),
 		 cl_image_info    UNUSED(param_name),
 		 size_t           UNUSED(param_value_size),
 		 size_t           UNUSED(param_value_size),
 		 void *           UNUSED(param_value),
 		 void *           UNUSED(param_value),
-		 size_t *         UNUSED(param_value_size_ret)) CL_API_SUFFIX__VERSION_1_0
+		 size_t *         UNUSED(param_value_size_ret))
 {
 {
 	return CL_INVALID_OPERATION;
 	return CL_INVALID_OPERATION;
 }
 }

+ 3 - 2
socl/src/cl_getkernelinfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "socl.h"
 #include "getinfo.h"
 #include "getinfo.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetKernelInfo(cl_kernel       kernel,
 soclGetKernelInfo(cl_kernel       kernel,
 		  cl_kernel_info  param_name,
 		  cl_kernel_info  param_name,
 		  size_t          param_value_size,
 		  size_t          param_value_size,
 		  void *          param_value,
 		  void *          param_value,
-		  size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		  size_t *        param_value_size_ret)
 {
 {
 	if (kernel == NULL)
 	if (kernel == NULL)
 		return CL_INVALID_KERNEL;
 		return CL_INVALID_KERNEL;

+ 3 - 2
socl/src/cl_getkernelworkgroupinfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,13 +18,14 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetKernelWorkGroupInfo(cl_kernel                kernel,
 soclGetKernelWorkGroupInfo(cl_kernel                kernel,
 			   cl_device_id               device,
 			   cl_device_id               device,
 			   cl_kernel_work_group_info  param_name,
 			   cl_kernel_work_group_info  param_name,
 			   size_t                     param_value_size,
 			   size_t                     param_value_size,
 			   void *                     param_value,
 			   void *                     param_value,
-			   size_t *                   param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+			   size_t *                   param_value_size_ret)
 {
 {
 	int range = starpu_worker_get_range_by_id(device->worker_id);
 	int range = starpu_worker_get_range_by_id(device->worker_id);
 	cl_device_id dev;
 	cl_device_id dev;

+ 3 - 2
socl/src/cl_getmemobjectinfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "socl.h"
 #include "getinfo.h"
 #include "getinfo.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetMemObjectInfo(cl_mem           mem,
 soclGetMemObjectInfo(cl_mem           mem,
 		     cl_mem_info      param_name,
 		     cl_mem_info      param_name,
 		     size_t           param_value_size,
 		     size_t           param_value_size,
 		     void *           param_value,
 		     void *           param_value,
-		     size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		     size_t *         param_value_size_ret)
 {
 {
 	static cl_mem_object_type mot = CL_MEM_OBJECT_BUFFER;
 	static cl_mem_object_type mot = CL_MEM_OBJECT_BUFFER;
 
 

+ 3 - 2
socl/src/cl_getplatformids.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -23,10 +23,11 @@ extern int _starpu_init_failed;
 /**
 /**
  * \brief Get StarPU platform ID
  * \brief Get StarPU platform ID
  */
  */
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetPlatformIDs(cl_uint          num_entries,
 soclGetPlatformIDs(cl_uint          num_entries,
 		   cl_platform_id * platforms,
 		   cl_platform_id * platforms,
-		   cl_uint *        num_platforms) CL_API_SUFFIX__VERSION_1_0
+		   cl_uint *        num_platforms)
 {
 {
 	if (_starpu_init_failed)
 	if (_starpu_init_failed)
 	{
 	{

+ 3 - 2
socl/src/cl_getplatforminfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2012                                Université de Bordeaux
+ * Copyright (C) 2010-2012, 2018                                Université de Bordeaux
  * Copyright (C) 2012                                     Vincent Danjean
  * Copyright (C) 2012                                     Vincent Danjean
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -25,12 +25,13 @@
  *
  *
  * \param[in] platform StarPU platform ID or NULL
  * \param[in] platform StarPU platform ID or NULL
  */
  */
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetPlatformInfo(cl_platform_id   platform,
 soclGetPlatformInfo(cl_platform_id   platform,
 		    cl_platform_info param_name,
 		    cl_platform_info param_name,
 		    size_t           param_value_size,
 		    size_t           param_value_size,
 		    void *           param_value,
 		    void *           param_value,
-		    size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		    size_t *         param_value_size_ret)
 {
 {
 	if (platform != NULL && platform != &socl_platform)
 	if (platform != NULL && platform != &socl_platform)
 		return CL_INVALID_PLATFORM;
 		return CL_INVALID_PLATFORM;

+ 3 - 2
socl/src/cl_getprogrambuildinfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,13 +19,14 @@
 #include "socl.h"
 #include "socl.h"
 #include "getinfo.h"
 #include "getinfo.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetProgramBuildInfo(cl_program          program,
 soclGetProgramBuildInfo(cl_program          program,
 			cl_device_id          UNUSED(device),
 			cl_device_id          UNUSED(device),
 			cl_program_build_info param_name,
 			cl_program_build_info param_name,
 			size_t                param_value_size,
 			size_t                param_value_size,
 			void *                param_value,
 			void *                param_value,
-			size_t *              param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+			size_t *              param_value_size_ret)
 {
 {
 	if (program == NULL)
 	if (program == NULL)
 		return CL_INVALID_PROGRAM;
 		return CL_INVALID_PROGRAM;

+ 3 - 2
socl/src/cl_getprograminfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "socl.h"
 #include "getinfo.h"
 #include "getinfo.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetProgramInfo(cl_program       program,
 soclGetProgramInfo(cl_program       program,
 		   cl_program_info    param_name,
 		   cl_program_info    param_name,
 		   size_t             param_value_size,
 		   size_t             param_value_size,
 		   void *             param_value,
 		   void *             param_value,
-		   size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		   size_t *           param_value_size_ret)
 {
 {
 	if (program == NULL)
 	if (program == NULL)
 		return CL_INVALID_PROGRAM;
 		return CL_INVALID_PROGRAM;

+ 3 - 2
socl/src/cl_getsamplerinfo.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "socl.h"
 #include "getinfo.h"
 #include "getinfo.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetSamplerInfo(cl_sampler       UNUSED(sampler),
 soclGetSamplerInfo(cl_sampler       UNUSED(sampler),
 		   cl_sampler_info    UNUSED(param_name),
 		   cl_sampler_info    UNUSED(param_name),
 		   size_t             UNUSED(param_value_size),
 		   size_t             UNUSED(param_value_size),
 		   void *             UNUSED(param_value),
 		   void *             UNUSED(param_value),
-		   size_t *           UNUSED(param_value_size_ret)) CL_API_SUFFIX__VERSION_1_0
+		   size_t *           UNUSED(param_value_size_ret))
 {
 {
 	return CL_INVALID_OPERATION;
 	return CL_INVALID_OPERATION;
 }
 }

+ 3 - 2
socl/src/cl_getsupportedimageformats.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,13 +18,14 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetSupportedImageFormats(cl_context           UNUSED(context),
 soclGetSupportedImageFormats(cl_context           UNUSED(context),
 			     cl_mem_flags         UNUSED(flags),
 			     cl_mem_flags         UNUSED(flags),
 			     cl_mem_object_type   UNUSED(image_type),
 			     cl_mem_object_type   UNUSED(image_type),
 			     cl_uint              UNUSED(num_entries),
 			     cl_uint              UNUSED(num_entries),
 			     cl_image_format *    UNUSED(image_formats),
 			     cl_image_format *    UNUSED(image_formats),
-			     cl_uint *            UNUSED(num_image_formats)) CL_API_SUFFIX__VERSION_1_0
+			     cl_uint *            UNUSED(num_image_formats))
 {
 {
 	return CL_INVALID_OPERATION;
 	return CL_INVALID_OPERATION;
 }
 }

+ 3 - 2
socl/src/cl_icdgetplatformidskhr.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2012                                Université de Bordeaux
+ * Copyright (C) 2010-2012, 2018                                Université de Bordeaux
  * Copyright (C) 2012                                     Vincent Danjean
  * Copyright (C) 2012                                     Vincent Danjean
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,9 +21,10 @@
 
 
 extern int _starpu_init_failed;
 extern int _starpu_init_failed;
 
 
+CL_EXT_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL soclIcdGetPlatformIDsKHR(cl_uint num_entries,
 CL_API_ENTRY cl_int CL_API_CALL soclIcdGetPlatformIDsKHR(cl_uint num_entries,
 							 cl_platform_id *platforms,
 							 cl_platform_id *platforms,
-							 cl_uint *num_platforms) CL_EXT_SUFFIX__VERSION_1_0
+							 cl_uint *num_platforms)
 {
 {
 	if ((num_entries == 0 && platforms != NULL)
 	if ((num_entries == 0 && platforms != NULL)
 	    || (num_platforms == NULL && platforms == NULL))
 	    || (num_platforms == NULL && platforms == NULL))

+ 3 - 2
socl/src/cl_releasecommandqueue.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,8 +18,9 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
-soclReleaseCommandQueue(cl_command_queue cq) CL_API_SUFFIX__VERSION_1_0
+soclReleaseCommandQueue(cl_command_queue cq)
 {
 {
 	gc_entity_release(cq);
 	gc_entity_release(cq);
 
 

+ 3 - 2
socl/src/cl_releasecontext.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,8 +18,9 @@
 
 
 #include "socl.h"
 #include "socl.h"
 
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 CL_API_ENTRY cl_int CL_API_CALL
-soclReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0
+soclReleaseContext(cl_context context)
 {
 {
 	if (context == NULL)
 	if (context == NULL)
 		return CL_INVALID_CONTEXT;
 		return CL_INVALID_CONTEXT;

+ 0 - 0
socl/src/cl_releaseevent.c


Some files were not shown because too many files changed in this diff