Pārlūkot izejas kodu

Merge branch 'master' of git+ssh://scm.gforge.inria.fr/gitroot/starpu/starpu

Olivier Aumage 7 gadi atpakaļ
vecāks
revīzija
4f81cd3a92
100 mainītis faili ar 1242 papildinājumiem un 782 dzēšanām
  1. 22 0
      configure.ac
  2. 3 2
      doc/doxygen/chapters/310_data_management.doxy
  3. 2 1
      doc/doxygen/chapters/510_configure_options.doxy
  4. 15 15
      doc/doxygen/chapters/api/insert_task.doxy
  5. 2 2
      examples/common/blas.c
  6. 2 2
      examples/common/blas.h
  7. 5 1
      examples/common/blas_model.h
  8. 3 2
      examples/filters/fmultiple_submit.c
  9. 3 2
      examples/filters/fmultiple_submit_implicit.c
  10. 3 2
      examples/filters/fmultiple_submit_readonly.c
  11. 6 1
      examples/heat/dw_factolu.c
  12. 2 2
      examples/lu/blas_complex.c
  13. 2 2
      examples/lu/blas_complex.h
  14. 5 4
      examples/lu/lu_example.c
  15. 12 17
      examples/lu/xlu.c
  16. 4 4
      examples/lu/xlu.h
  17. 12 14
      examples/lu/xlu_implicit.c
  18. 20 22
      examples/lu/xlu_implicit_pivot.c
  19. 13 1
      examples/lu/xlu_kernels.c
  20. 19 21
      examples/lu/xlu_pivot.c
  21. 1 1
      examples/reductions/dot_product.c
  22. 225 227
      examples/sched_ctx/parallel_tasks_reuse_handle.c
  23. 2 1
      include/starpu_config.h.in
  24. 6 5
      include/starpu_task_util.h
  25. 2 1
      include/starpu_util.h
  26. 3 2
      mpi/examples/mpi_lu/plu_example.c
  27. 3 2
      mpi/examples/mpi_lu/plu_implicit_example.c
  28. 3 2
      mpi/examples/mpi_lu/plu_outofcore_example.c
  29. 3 2
      mpi/examples/mpi_lu/pxlu.c
  30. 2 2
      mpi/examples/mpi_lu/pxlu.h
  31. 3 3
      mpi/examples/mpi_lu/pxlu_implicit.c
  32. 9 1
      mpi/examples/mpi_lu/pxlu_kernels.c
  33. 3 1
      mpi/src/Makefile.am
  34. 2 2
      mpi/src/mpi/starpu_mpi_comm.c
  35. 30 143
      mpi/src/mpi/starpu_mpi_mpi.c
  36. 35 133
      mpi/src/nmad/starpu_mpi_nmad.c
  37. 58 16
      mpi/src/starpu_mpi.c
  38. 3 1
      mpi/src/starpu_mpi_cache.c
  39. 269 0
      mpi/src/starpu_mpi_coop_sends.c
  40. 1 0
      mpi/src/starpu_mpi_init.c
  41. 17 1
      mpi/src/starpu_mpi_private.c
  42. 58 4
      mpi/src/starpu_mpi_private.h
  43. 161 0
      mpi/src/starpu_mpi_req.c
  44. 1 0
      mpi/tests/Makefile.am
  45. 26 6
      mpi/tests/broadcast.c
  46. 1 2
      mpi/tests/user_defined_datatype_value.h
  47. 3 2
      socl/src/cl_buildprogram.c
  48. 3 2
      socl/src/cl_createbuffer.c
  49. 3 2
      socl/src/cl_createcommandqueue.c
  50. 3 2
      socl/src/cl_createcontext.c
  51. 3 2
      socl/src/cl_createcontextfromtype.c
  52. 3 2
      socl/src/cl_createimage2d.c
  53. 3 2
      socl/src/cl_createimage3d.c
  54. 3 2
      socl/src/cl_createkernel.c
  55. 3 2
      socl/src/cl_createkernelsinprogram.c
  56. 3 2
      socl/src/cl_createprogramwithbinary.c
  57. 3 2
      socl/src/cl_createprogramwithsource.c
  58. 3 2
      socl/src/cl_createsampler.c
  59. 3 2
      socl/src/cl_enqueuebarrier.c
  60. 3 2
      socl/src/cl_enqueuebarrierwithwaitlist.c
  61. 3 2
      socl/src/cl_enqueuecopybuffer.c
  62. 3 2
      socl/src/cl_enqueuecopybuffertoimage.c
  63. 3 2
      socl/src/cl_enqueuecopyimage.c
  64. 3 2
      socl/src/cl_enqueuecopyimagetobuffer.c
  65. 3 2
      socl/src/cl_enqueuemapbuffer.c
  66. 3 2
      socl/src/cl_enqueuemapimage.c
  67. 3 2
      socl/src/cl_enqueuemarker.c
  68. 3 2
      socl/src/cl_enqueuemarkerwithwaitlist.c
  69. 3 2
      socl/src/cl_enqueuenativekernel.c
  70. 3 2
      socl/src/cl_enqueuendrangekernel.c
  71. 3 2
      socl/src/cl_enqueuereadbuffer.c
  72. 3 2
      socl/src/cl_enqueuereadimage.c
  73. 3 2
      socl/src/cl_enqueuetask.c
  74. 3 2
      socl/src/cl_enqueueunmapmemobject.c
  75. 3 2
      socl/src/cl_enqueuewaitforevents.c
  76. 3 2
      socl/src/cl_enqueuewritebuffer.c
  77. 3 2
      socl/src/cl_enqueuewriteimage.c
  78. 3 2
      socl/src/cl_finish.c
  79. 3 2
      socl/src/cl_flush.c
  80. 3 2
      socl/src/cl_getcommandqueueinfo.c
  81. 4 3
      socl/src/cl_getcontextinfo.c
  82. 3 2
      socl/src/cl_getdeviceids.c
  83. 3 2
      socl/src/cl_getdeviceinfo.c
  84. 3 2
      socl/src/cl_geteventinfo.c
  85. 3 2
      socl/src/cl_geteventprofilinginfo.c
  86. 3 2
      socl/src/cl_getextensionfunctionaddress.c
  87. 3 2
      socl/src/cl_getimageinfo.c
  88. 3 2
      socl/src/cl_getkernelinfo.c
  89. 3 2
      socl/src/cl_getkernelworkgroupinfo.c
  90. 3 2
      socl/src/cl_getmemobjectinfo.c
  91. 3 2
      socl/src/cl_getplatformids.c
  92. 3 2
      socl/src/cl_getplatforminfo.c
  93. 3 2
      socl/src/cl_getprogrambuildinfo.c
  94. 3 2
      socl/src/cl_getprograminfo.c
  95. 3 2
      socl/src/cl_getsamplerinfo.c
  96. 3 2
      socl/src/cl_getsupportedimageformats.c
  97. 3 2
      socl/src/cl_icdgetplatformidskhr.c
  98. 3 2
      socl/src/cl_releasecommandqueue.c
  99. 3 2
      socl/src/cl_releasecontext.c
  100. 0 0
      socl/src/cl_releaseevent.c

+ 22 - 0
configure.ac

@@ -2937,6 +2937,8 @@ AC_ARG_ENABLE(blas-lib,
         blas_lib=atlas
      elif test "x$enableval" = "xgoto" ; then
         blas_lib=goto
+     elif test "x$enableval" = "xopenblas" ; then
+        blas_lib=openblas
      elif test "x$enableval" = "xnone" ; then
         blas_lib=none
      elif test "x$enableval" = "xmkl" ; then
@@ -2998,6 +3000,26 @@ if test x$blas_lib = xmaybe -o x$blas_lib = xatlas; then
     fi
 fi
 
+if test x$blas_lib = xmaybe -o x$blas_lib = xopenblas; then
+    PKG_CHECK_MODULES([OPENBLAS],  [openblas],  [
+      PKG_CHECK_MODULES([BLAS_OPENBLAS],  [blas-openblas],  [
+        AC_DEFINE([STARPU_OPENBLAS], [1], [Define to 1 if you use the openblas library.])
+        AC_SUBST([STARPU_OPENBLAS], [1])
+        CFLAGS="${CFLAGS} ${OPENBLAS_CFLAGS} ${BLAS_OPENBLAS_CFLAGS} "
+        LIBS="${LIBS} ${OPENBLAS_LIBS} ${BLAS_OPENBLAS_LIBS} "
+        blas_lib=openblas
+      ], [
+	if text x$blas_lib = xopenblas; then
+	  AC_MSG_ERROR([cannot find blas-openblas lib])
+	fi
+      ])
+    ], [
+      if text x$blas_lib = xopenblas; then
+        AC_MSG_ERROR([cannot find openblas lib])
+      fi
+    ])
+fi
+
 if test x$blas_lib = xmaybe -o x$blas_lib = xmkl; then
     # Should we use MKL ?
     if test -n "$MKLROOT"

+ 3 - 2
doc/doxygen/chapters/310_data_management.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009-2011,2014-2017                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2018                      Université de Bordeaux
  * Copyright (C) 2011-2012                                Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -871,6 +871,7 @@ struct starpu_codelet cl =
 \endcode
 
 the first data of the task will be kept in the main memory, while the second
-data will be copied to the CUDA GPU as usual.
+data will be copied to the CUDA GPU as usual. A working example is available in
+<c>tests/datawizard/specific_node.c</c>
 
 */

+ 2 - 1
doc/doxygen/chapters/510_configure_options.doxy

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2013,2015-2017                      Inria
  * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009-2011,2013-2017                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2013-2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -540,6 +540,7 @@ Specify the blas library to be used by some of the examples. Librairies availabl
 - none [default] : no BLAS library is used
 - atlas: use ATLAS library
 - goto: use GotoBLAS library
+- openblas: use OpenBLAS library
 - mkl: use MKL library (you may need to set specific CFLAGS and LDFLAGS with --with-mkl-cflags and --with-mkl-ldflags)
 </dd>
 

+ 15 - 15
doc/doxygen/chapters/api/insert_task.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2009-2011,2014-2016,2018                 Université de Bordeaux
  * Copyright (C) 2011-2012                                Inria
  *
@@ -169,18 +169,17 @@ room again with this function, store yet more handles, etc.
 
 \fn void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int arg_type, starpu_data_handle_t handle)
 \ingroup API_Insert_Task
-This stores data handle \p handle into task \p task with mode \p arg_type,
+Store data handle \p handle into task \p task with mode \p arg_type,
 updating \p *allocated_buffers and \p *current_buffer accordingly.
 
 \fn void starpu_task_insert_data_process_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_handles, starpu_data_handle_t *handles)
 \ingroup API_Insert_Task
-This stores \p nb_handles data handles \p handles into task \p task, updating \p
+Store \p nb_handles data handles \p handles into task \p task, updating \p
 *allocated_buffers and \p *current_buffer accordingly.
 
-
 \fn void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_descrs, struct starpu_data_descr *descrs);
 \ingroup API_Insert_Task
-This stores \p nb_descrs data handles described by \p descrs into task \p task,
+Store \p nb_descrs data handles described by \p descrs into task \p task,
 updating \p *allocated_buffers and \p *current_buffer accordingly.
 
 \fn void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...)
@@ -193,20 +192,21 @@ Instead of calling starpu_codelet_pack_args(), one can also call
 starpu_codelet_pack_arg_init(), then starpu_codelet_pack_arg() for each
 data, then starpu_codelet_pack_arg_fini().
 
-\fn void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg *state)
+\fn void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg_data *state)
 \ingroup API_Insert_Task
-Initiaze struct starpu_codelet_pack_arg before calling starpu_codelet_pack_arg and
-starpu_codelet_pack_arg_fini. This will simply initialize the content of the structure.
+Initialize struct starpu_codelet_pack_arg before calling starpu_codelet_pack_arg() and
+starpu_codelet_pack_arg_fini(). This will simply initialize the content of the structure.
 
-\fn void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg *state, void *ptr, size_t ptr_size)
-Pack one argument into struct starpu_codelet_pack_arg state. That structure
-has to be initialized before with starpu_codelet_pack_arg_init, and after all
-starpu_codelet_pack_arg calls performed, starpu_codelet_pack_arg_fini has to be
-used to get the cl_arg and cl_arg_size to be put in the task.
+\fn void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg_data *state, void *ptr, size_t ptr_size)
+\ingroup API_Insert_Task
+Pack one argument into struct starpu_codelet_pack_arg \p state. That structure
+has to be initialized before with starpu_codelet_pack_arg_init(), and after all
+starpu_codelet_pack_arg() calls performed, starpu_codelet_pack_arg_fini() has to be
+used to get the \p cl_arg and \p cl_arg_size to be put in the task.
 
-\fn void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg *state, void **cl_arg, size_t *cl_arg_size)
+\fn void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg_data *state, void **cl_arg, size_t *cl_arg_size)
 \ingroup API_Insert_Task
-Finish packing data, after calling starpu_codelet_pack_arg_init once and starpu_codelet_pack_arg several times.
+Finish packing data, after calling starpu_codelet_pack_arg_init() once and starpu_codelet_pack_arg() several times.
 
 \fn void starpu_codelet_unpack_args(void *cl_arg, ...)
 \ingroup API_Insert_Task

+ 2 - 2
examples/common/blas.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012                                     Inria
- * Copyright (C) 2009-2011,2014-2015                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2015, 2018                Université de Bordeaux
  * Copyright (C) 2010,2015,2017                           CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -239,7 +239,7 @@ void STARPU_DSWAP(const int n, double *x, const int incx, double *y, const int i
 	cblas_dswap(n, x, incx, y, incy);
 }
 
-#elif defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
+#elif defined(STARPU_GOTO) || defined(STARPU_OPENBLAS) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
 
 inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, 
 			float alpha, const float *A, int lda, const float *B, int ldb, 

+ 2 - 2
examples/common/blas.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012                                     Inria
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
+ * Copyright (C) 2009-2011,2014, 2018                     Université de Bordeaux
  * Copyright (C) 2010,2015,2017                           CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -88,7 +88,7 @@ void STARPU_SPOTRF(const char*uplo, const int n, float *a, const int lda);
 void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda);
 #endif
 
-#if defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
+#if defined(STARPU_GOTO) || defined(STARPU_OPENBLAS) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
 
 extern void sgemm_ (const char *transa, const char *transb, const int *m,
                    const int *n, const int *k, const float *alpha, 

+ 5 - 1
examples/common/blas_model.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2008-2012,2014                           Université de Bordeaux
+ * Copyright (C) 2008-2012,2014, 2018                     Université de Bordeaux
  * Copyright (C) 2010-2012,2015,2017                      CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -29,6 +29,8 @@ static struct starpu_perfmodel starpu_sgemm_model =
 	.symbol = "sgemm_atlas"
 #elif defined(STARPU_GOTO)
 	.symbol = "sgemm_goto"
+#elif defined(STARPU_OPENBLAS)
+	.symbol = "sgemm_openblas"
 #else
 	.symbol = "sgemm"
 #endif
@@ -47,6 +49,8 @@ static struct starpu_perfmodel starpu_dgemm_model =
 	.symbol = "dgemm_atlas"
 #elif defined(STARPU_GOTO)
 	.symbol = "dgemm_goto"
+#elif defined(STARPU_OPENBLAS)
+	.symbol = "dgemm_openblas"
 #else
 	.symbol = "dgemm"
 #endif

+ 3 - 2
examples/filters/fmultiple_submit.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2017                                     CNRS
+ * Copyright (C) 2017, 2018                               CNRS
  * Copyright (C) 2015,2017                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -117,7 +117,8 @@ int main(void)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	/* Disable codelet on CPUs if we have a CUDA device, to force remote execution on the CUDA device */
-	if (starpu_cuda_worker_get_count()) {
+	if (starpu_cuda_worker_get_count())
+	{
 		cl_check_scale.cpu_funcs[0] = NULL;
 		cl_check_scale.cpu_funcs_name[0] = NULL;
 	}

+ 3 - 2
examples/filters/fmultiple_submit_implicit.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2017                                     CNRS
+ * Copyright (C) 2017, 2018                               CNRS
  * Copyright (C) 2015,2017                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -164,7 +164,8 @@ int main(void)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	/* Disable codelet on CPUs if we have a CUDA device, to force remote execution on the CUDA device */
-	if (starpu_cuda_worker_get_count()) {
+	if (starpu_cuda_worker_get_count())
+	{
 		cl_check_scale.cpu_funcs[0] = NULL;
 		cl_check_scale.cpu_funcs_name[0] = NULL;
 		cl_check.cpu_funcs[0] = NULL;

+ 3 - 2
examples/filters/fmultiple_submit_readonly.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2017                                     CNRS
+ * Copyright (C) 2017, 2018                               CNRS
  * Copyright (C) 2015,2017                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -165,7 +165,8 @@ int main(void)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	/* Disable codelet on CPUs if we have a CUDA device, to force remote execution on the CUDA device */
-	if (starpu_cuda_worker_get_count()) {
+	if (starpu_cuda_worker_get_count())
+	{
 		cl_check_scale.cpu_funcs[0] = NULL;
 		cl_check_scale.cpu_funcs_name[0] = NULL;
 		cl_check.cpu_funcs[0] = NULL;

+ 6 - 1
examples/heat/dw_factolu.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012-2013                                Inria
- * Copyright (C) 2008-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2008-2015,2017-2018                      Université de Bordeaux
  * Copyright (C) 2010                                     Mehdi Juhoor
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  *
@@ -766,6 +766,11 @@ void initialize_system(float **A, float **B, unsigned dim, unsigned pinned)
 	char * symbol_12 = "lu_model_12_goto";
 	char * symbol_21 = "lu_model_21_goto";
 	char * symbol_22 = "lu_model_22_goto";
+#elif defined(STARPU_OPENBLAS)
+	char * symbol_11 = "lu_model_11_openblas";
+	char * symbol_12 = "lu_model_12_openblas";
+	char * symbol_21 = "lu_model_21_openblas";
+	char * symbol_22 = "lu_model_22_openblas";
 #else
 	char * symbol_11 = "lu_model_11";
 	char * symbol_12 = "lu_model_12";

+ 2 - 2
examples/lu/blas_complex.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2010,2012,2015,2017                      CNRS
- * Copyright (C) 2009-2010,2014                           Université de Bordeaux
+ * Copyright (C) 2009-2010,2014, 2018                     Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -33,7 +33,7 @@
 #error not implemented
 #elif defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS)
 #error not implemented
-#elif defined(STARPU_MKL)
+#elif defined(STARPU_OPENBLAS) || defined(STARPU_MKL)
 
 inline void CGEMM(char *transa, char *transb, int M, int N, int K, 
 			complex float alpha, complex float *A, int lda, complex float *B, int ldb, 

+ 2 - 2
examples/lu/blas_complex.h

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2010,2012,2015,2017                      CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
+ * Copyright (C) 2009-2011,2014, 2018                     Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -82,7 +82,7 @@ void ZSWAP(const int n, complex double *x, const int incx, complex double *y, co
 
 #if defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS)
 #error not implemented
-#elif defined(STARPU_MKL)
+#elif defined(STARPU_OPENBLAS) || defined(STARPU_MKL)
 
 extern void cgemm_ (const char *transa, const char *transb, const int *m,
                    const int *n, const int *k, const complex float *alpha, 

+ 5 - 4
examples/lu/lu_example.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2009-2017                                Université de Bordeaux
- * Copyright (C) 2010-2013,2015-2017                      CNRS
+ * Copyright (C) 2010-2013,2015-2018                      CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -34,6 +34,7 @@ static unsigned check = 0;
 static unsigned pivot = 0;
 static unsigned no_stride = 0;
 static unsigned profile = 0;
+static unsigned no_prio=0;
 unsigned bound = 0;
 unsigned bounddeps = 0;
 unsigned boundprio = 0;
@@ -367,7 +368,7 @@ int main(int argc, char **argv)
 			A_blocks = malloc(nblocks*nblocks*sizeof(TYPE *));
 			copy_matrix_into_blocks();
 
-			ret = STARPU_LU(lu_decomposition_pivot_no_stride)(A_blocks, ipiv, size, size, nblocks);
+			ret = STARPU_LU(lu_decomposition_pivot_no_stride)(A_blocks, ipiv, size, size, nblocks, no_prio);
 
 			copy_blocks_into_matrix();
 			free(A_blocks);
@@ -379,7 +380,7 @@ int main(int argc, char **argv)
 
 			start = starpu_timing_now();
 
-			ret = STARPU_LU(lu_decomposition_pivot)(A, ipiv, size, size, nblocks);
+			ret = STARPU_LU(lu_decomposition_pivot)(A, ipiv, size, size, nblocks, no_prio);
 
 			end = starpu_timing_now();
 
@@ -394,7 +395,7 @@ int main(int argc, char **argv)
 	else
 #endif
 	{
-		ret = STARPU_LU(lu_decomposition)(A, size, size, nblocks);
+		ret = STARPU_LU(lu_decomposition)(A, size, size, nblocks, no_prio);
 	}
 
 	if (profile)

+ 12 - 17
examples/lu/xlu.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2009-2011,2014-2015,2017                 Université de Bordeaux
  * Copyright (C) 2010                                     Mehdi Juhoor
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
  * Copyright (C) 2013                                     Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,11 +21,6 @@
 #include "xlu.h"
 #include "xlu_kernels.h"
 
-static unsigned no_prio = 0;
-
-
-
-
 /*
  *	Construct the DAG
  */
@@ -41,7 +36,7 @@ static struct starpu_task *create_task(starpu_tag_t id)
 	return task;
 }
 
-static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k)
+static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k, unsigned no_prio)
 {
 /*	printf("task 11 k = %d TAG = %llx\n", k, (TAG11(k))); */
 
@@ -65,7 +60,7 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 	return task;
 }
 
-static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
+static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned no_prio)
 {
 	int ret;
 
@@ -99,7 +94,7 @@ static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	return ret;
 }
 
-static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
+static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned no_prio)
 {
 	int ret;
 	struct starpu_task *task = create_task(TAG21(k, i));
@@ -130,7 +125,7 @@ static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 	return ret;
 }
 
-static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j)
+static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned no_prio)
 {
 	int ret;
 
@@ -169,7 +164,7 @@ static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, un
  *	code to bootstrap the factorization
  */
 
-static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
+static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks, unsigned no_prio)
 {
 	int ret;
 	double start;
@@ -186,7 +181,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 	for (k = 0; k < nblocks; k++)
 	{
 		starpu_iteration_push(k);
-		struct starpu_task *task = create_task_11(dataA, k);
+		struct starpu_task *task = create_task_11(dataA, k, no_prio);
 
 		/* we defer the launch of the first task */
 		if (k == 0)
@@ -202,9 +197,9 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
 		for (i = k+1; i<nblocks; i++)
 		{
-			ret = create_task_12(dataA, k, i);
+			ret = create_task_12(dataA, k, i, no_prio);
 			if (ret == -ENODEV) return ret;
-			ret = create_task_21(dataA, k, i);
+			ret = create_task_21(dataA, k, i, no_prio);
 			if (ret == -ENODEV) return ret;
 		}
 
@@ -212,7 +207,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 		{
 			for (j = k+1; j<nblocks; j++)
 			{
-			     ret = create_task_22(dataA, k, i, j);
+			     ret = create_task_22(dataA, k, i, j, no_prio);
 			     if (ret == -ENODEV) return ret;
 			}
 		}
@@ -253,7 +248,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 	return 0;
 }
 
-int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
 {
 	starpu_data_handle_t dataA;
 
@@ -278,7 +273,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 
-	int ret = dw_codelet_facto_v3(dataA, nblocks);
+	int ret = dw_codelet_facto_v3(dataA, nblocks, no_prio);
 
 	/* gather all the data */
 	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);

+ 4 - 4
examples/lu/xlu.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2011,2013-2014,2017                 Université de Bordeaux
- * Copyright (C) 2010-2015,2017                           CNRS
+ * Copyright (C) 2010-2015,2017,2018                      CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -121,8 +121,8 @@ struct piv_s
 	unsigned last; /* last element */
 };
 
-int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks);
-int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks);
-int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks);
+int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio);
+int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio);
+int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio);
 
 #endif /* __XLU_H__ */

+ 12 - 14
examples/lu/xlu_implicit.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2011,2014-2015,2017                 Université de Bordeaux
  * Copyright (C) 2010                                     Mehdi Juhoor
- * Copyright (C) 2010-2013,2015-2017                      CNRS
+ * Copyright (C) 2010-2013,2015-2018                      CNRS
  * Copyright (C) 2013                                     Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -22,9 +22,7 @@
 #include "xlu.h"
 #include "xlu_kernels.h"
 
-static unsigned no_prio = 0;
-
-static int create_task_11(starpu_data_handle_t dataA, unsigned k)
+static int create_task_11(starpu_data_handle_t dataA, unsigned k, unsigned no_prio)
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
@@ -44,7 +42,7 @@ static int create_task_11(starpu_data_handle_t dataA, unsigned k)
 	return ret;
 }
 
-static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
+static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned no_prio)
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
@@ -64,7 +62,7 @@ static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	return ret;
 }
 
-static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
+static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned no_prio)
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
@@ -85,7 +83,7 @@ static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 	return ret;
 }
 
-static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j)
+static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned no_prio)
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
@@ -111,7 +109,7 @@ static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, un
  *	code to bootstrap the factorization
  */
 
-static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
+static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks, unsigned no_prio)
 {
 	double start;
 	double end;
@@ -130,14 +128,14 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
 		starpu_iteration_push(k);
 
-		ret = create_task_11(dataA, k);
+		ret = create_task_11(dataA, k, no_prio);
 		if (ret == -ENODEV) return ret;
 
 		for (i = k+1; i<nblocks; i++)
 		{
-		     ret = create_task_12(dataA, k, i);
+			ret = create_task_12(dataA, k, i, no_prio);
 		     if (ret == -ENODEV) return ret;
-		     ret = create_task_21(dataA, k, i);
+		     ret = create_task_21(dataA, k, i, no_prio);
 		     if (ret == -ENODEV) return ret;
 		}
 		starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, k));
@@ -145,7 +143,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 		for (i = k+1; i<nblocks; i++)
 		     for (j = k+1; j<nblocks; j++)
 		     {
-			  ret = create_task_22(dataA, k, i, j);
+			     ret = create_task_22(dataA, k, i, j, no_prio);
 			  if (ret == -ENODEV) return ret;
 		     }
 		for (i = k+1; i<nblocks; i++)
@@ -184,7 +182,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 	return 0;
 }
 
-int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
 {
 	starpu_data_handle_t dataA;
 
@@ -206,7 +204,7 @@ int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned
 
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 
-	int ret = dw_codelet_facto_v3(dataA, nblocks);
+	int ret = dw_codelet_facto_v3(dataA, nblocks, no_prio);
 
 	/* gather all the data */
 	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);

+ 20 - 22
examples/lu/xlu_implicit_pivot.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2015,2017                           Université de Bordeaux
  * Copyright (C) 2010                                     Mehdi Juhoor
- * Copyright (C) 2010-2013,2015-2017                      CNRS
+ * Copyright (C) 2010-2013,2015-2018                      CNRS
  * Copyright (C) 2013                                     Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -23,8 +23,6 @@
 #include "xlu.h"
 #include "xlu_kernels.h"
 
-static unsigned no_prio = 0;
-
 /*
  *	Construct the DAG
  */
@@ -32,7 +30,7 @@ static unsigned no_prio = 0;
 static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 			     struct piv_s *piv_description,
 			     unsigned k, unsigned i,
-			     starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			     starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 	int ret;
 
@@ -58,7 +56,7 @@ static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 
 static int create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 				unsigned k, struct piv_s *piv_description,
-				starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+				starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 	int ret;
 
@@ -83,7 +81,7 @@ static int create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 }
 
 static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j,
-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
@@ -105,7 +103,7 @@ static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 }
 
 static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i,
-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
@@ -127,7 +125,7 @@ static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 }
 
 static int create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j,
-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
@@ -157,7 +155,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 				  struct piv_s *piv_description,
 				  unsigned nblocks,
 				  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned),
-				  double *timing)
+				  double *timing, unsigned no_prio)
 {
 	double start;
 	double end;
@@ -176,32 +174,32 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
 		starpu_iteration_push(k);
 
-		ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
+		ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block, no_prio);
 		if (ret == -ENODEV) return ret;
 
 		for (i = 0; i < nblocks; i++)
 		{
 			if (i != k)
 			{
-			     ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
-			     if (ret == -ENODEV) return ret;
+				ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block, no_prio);
+				if (ret == -ENODEV) return ret;
 			}
 		}
 
 		for (i = k+1; i<nblocks; i++)
 		{
-		     ret = create_task_12(dataAp, nblocks, k, i, get_block);
-		     if (ret == -ENODEV) return ret;
-		     ret = create_task_21(dataAp, nblocks, k, i, get_block);
-		     if (ret == -ENODEV) return ret;
+			ret = create_task_12(dataAp, nblocks, k, i, get_block, no_prio);
+			if (ret == -ENODEV) return ret;
+			ret = create_task_21(dataAp, nblocks, k, i, get_block, no_prio);
+			if (ret == -ENODEV) return ret;
 		}
 		starpu_data_wont_use(get_block(dataAp, nblocks, k, k));
 
 		for (i = k+1; i<nblocks; i++)
 		     for (j = k+1; j<nblocks; j++)
 		     {
-			  ret = create_task_22(dataAp, nblocks, k, i, j, get_block);
-			  if (ret == -ENODEV) return ret;
+			     ret = create_task_22(dataAp, nblocks, k, i, j, get_block, no_prio);
+			     if (ret == -ENODEV) return ret;
 		     }
 		for (i = k+1; i<nblocks; i++)
 		{
@@ -231,7 +229,7 @@ starpu_data_handle_t get_block_with_striding(starpu_data_handle_t *dataAp, unsig
 }
 
 
-int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
 {
 	if (starpu_mic_worker_get_count() || starpu_scc_worker_get_count() || starpu_mpi_ms_worker_get_count())
 		/* These won't work with pivoting: we pass a pointer in cl_args */
@@ -271,7 +269,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 	}
 
 	double timing;
-	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing);
+	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing, no_prio);
 	if (ret)
 		return ret;
 
@@ -307,7 +305,7 @@ starpu_data_handle_t get_block_with_no_striding(starpu_data_handle_t *dataAp, un
 	return dataAp[i+j*nblocks];
 }
 
-int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
 {
 	(void)ld;
 	starpu_data_handle_t *dataAp = malloc(nblocks*nblocks*sizeof(starpu_data_handle_t));
@@ -337,7 +335,7 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 	}
 
 	double timing;
-	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing);
+	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing, no_prio);
 	if (ret)
 		return ret;
 

+ 13 - 1
examples/lu/xlu_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2012                                Inria
- * Copyright (C) 2009-2017                                Université de Bordeaux
+ * Copyright (C) 2009-2018                                Université de Bordeaux
  * Copyright (C) 2010-2012,2015,2017                      CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -107,6 +107,8 @@ static struct starpu_perfmodel STARPU_LU(model_22) =
 	.symbol = STARPU_LU_STR(lu_model_22_atlas)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_LU_STR(lu_model_22_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_LU_STR(lu_model_22_openblas)
 #else
 	.symbol = STARPU_LU_STR(lu_model_22)
 #endif
@@ -228,6 +230,8 @@ static struct starpu_perfmodel STARPU_LU(model_12) =
 	.symbol = STARPU_LU_STR(lu_model_12_atlas)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_LU_STR(lu_model_12_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_LU_STR(lu_model_12_openblas)
 #else
 	.symbol = STARPU_LU_STR(lu_model_12)
 #endif
@@ -315,6 +319,8 @@ static struct starpu_perfmodel STARPU_LU(model_21) =
 	.symbol = STARPU_LU_STR(lu_model_21_atlas)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_LU_STR(lu_model_21_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_LU_STR(lu_model_21_openblas)
 #else
 	.symbol = STARPU_LU_STR(lu_model_21)
 #endif
@@ -433,6 +439,8 @@ static struct starpu_perfmodel STARPU_LU(model_11) =
 	.symbol = STARPU_LU_STR(lu_model_11_atlas)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_LU_STR(lu_model_11_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_LU_STR(lu_model_11_openblas)
 #else
 	.symbol = STARPU_LU_STR(lu_model_11)
 #endif
@@ -602,6 +610,8 @@ static struct starpu_perfmodel STARPU_LU(model_11_pivot) =
 	.symbol = STARPU_LU_STR(lu_model_11_pivot_atlas)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_LU_STR(lu_model_11_pivot_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_LU_STR(lu_model_11_pivot_openblas)
 #else
 	.symbol = STARPU_LU_STR(lu_model_11_pivot)
 #endif
@@ -703,6 +713,8 @@ static struct starpu_perfmodel STARPU_LU(model_pivot) =
 	.symbol = STARPU_LU_STR(lu_model_pivot_atlas)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_LU_STR(lu_model_pivot_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_LU_STR(lu_model_pivot_openblas)
 #else
 	.symbol = STARPU_LU_STR(lu_model_pivot)
 #endif

+ 19 - 21
examples/lu/xlu_pivot.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2015,2017                           Université de Bordeaux
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
  * Copyright (C) 2011,2013                                Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,8 +21,6 @@
 #include "xlu.h"
 #include "xlu_kernels.h"
 
-static unsigned no_prio = 0;
-
 /*
  *	Construct the DAG
  */
@@ -39,9 +37,9 @@ static struct starpu_task *create_task(starpu_tag_t id)
 }
 
 static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
-					struct piv_s *piv_description,
-					unsigned k, unsigned i,
-					starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			     struct piv_s *piv_description,
+			     unsigned k, unsigned i,
+			     starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 	int ret;
 
@@ -92,8 +90,8 @@ static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 }
 
 static struct starpu_task *create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
-					unsigned k, struct piv_s *piv_description,
-					starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+						unsigned k, struct piv_s *piv_description,
+						starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 	struct starpu_task *task = create_task(TAG11(k));
 
@@ -118,7 +116,7 @@ static struct starpu_task *create_task_11_pivot(starpu_data_handle_t *dataAp, un
 }
 
 static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j,
-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 	int ret;
 
@@ -158,7 +156,7 @@ static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 }
 
 static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i,
-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 	int ret;
 
@@ -186,7 +184,7 @@ static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 }
 
 static int create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j,
-			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio)
 {
 	int ret;
 
@@ -231,7 +229,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 				  struct piv_s *piv_description,
 				  unsigned nblocks,
 				  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned),
-				  double *timing)
+				  double *timing, unsigned no_prio)
 {
 	int ret;
 
@@ -249,7 +247,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 	for (k = 0; k < nblocks; k++)
 	{
 		starpu_iteration_push(k);
-		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
+		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block, no_prio);
 
 		/* we defer the launch of the first task */
 		if (k == 0)
@@ -267,16 +265,16 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 		{
 			if (i != k)
 			{
-				ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
+				ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block, no_prio);
 				if (ret == -ENODEV) return ret;
 			}
 		}
 
 		for (i = k+1; i<nblocks; i++)
 		{
-			ret = create_task_12(dataAp, nblocks, k, i, get_block);
+			ret = create_task_12(dataAp, nblocks, k, i, get_block, no_prio);
 			if (ret == -ENODEV) return ret;
-			ret = create_task_21(dataAp, nblocks, k, i, get_block);
+			ret = create_task_21(dataAp, nblocks, k, i, get_block, no_prio);
 			if (ret == -ENODEV) return ret;
 		}
 
@@ -284,7 +282,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 		{
 			for (j = k+1; j<nblocks; j++)
 			{
-			     ret = create_task_22(dataAp, nblocks, k, i, j, get_block);
+			     ret = create_task_22(dataAp, nblocks, k, i, j, get_block, no_prio);
 			     if (ret == -ENODEV) return ret;
 			}
 		}
@@ -332,7 +330,7 @@ starpu_data_handle_t get_block_with_striding(starpu_data_handle_t *dataAp, unsig
 }
 
 
-int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
 {
 	starpu_data_handle_t dataA;
 
@@ -380,7 +378,7 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 #endif
 
 	double timing=0.0;
-	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing);
+	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing, no_prio);
 
 	unsigned n = starpu_matrix_get_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
@@ -413,7 +411,7 @@ starpu_data_handle_t get_block_with_no_striding(starpu_data_handle_t *dataAp, un
 	return dataAp[i+j*nblocks];
 }
 
-int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio)
 {
 	(void)ld;
 	if (starpu_mic_worker_get_count() || starpu_scc_worker_get_count() || starpu_mpi_ms_worker_get_count())
@@ -450,7 +448,7 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 	}
 
 	double timing=0.0;
-	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing);
+	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing, no_prio);
 
 	unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks;
 	double flop = (2.0f*n*n*n)/3.0f;

+ 1 - 1
examples/reductions/dot_product.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012-2013,2015                           Inria
- * Copyright (C) 2010-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2010-2015,2017-2018                      Université de Bordeaux
  * Copyright (C) 2011-2013,2015-2017                      CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify

+ 225 - 227
examples/sched_ctx/parallel_tasks_reuse_handle.c

@@ -1,10 +1,8 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C)                                          Inria
- * Copyright (C)                                          CNRS
  * Copyright (C) 2015-2016                                Université de Bordeaux
- * Copyright (C) 2015,2017                                Inria
- * Copyright (C) 2015-2017                                CNRS
+ * Copyright (C) 2015,2017                                Inria
+ * Copyright (C) 2015-2018                                CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,226 +15,226 @@
  *
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
-
-#include <starpu.h>
-#include <omp.h>
-#include <pthread.h>
-
-#ifdef STARPU_QUICK_CHECK
-#define NTASKS 64
-#define SIZE   40
-#define LOOPS  4
-#else
-#define NTASKS 100
-#define SIZE   400
-#define LOOPS  10
-#endif
-
-#define N_NESTED_CTXS 2
-
-struct context
-{
-	int ncpus;
-	int *cpus;
-	unsigned id;
-};
-
-/* Helper for the task that will initiate everything */
-void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
-{
-	fprintf(stderr, "%p: %s -->\n", (void*)pthread_self(), __func__);
-	int sched_ctx = *(int *)sched_ctx_;
-	int *cpuids = NULL;
-	int ncpuids = 0;
-	starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
-
-#pragma omp parallel num_threads(ncpuids)
-	{
-		starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
-	}
-
-	omp_set_num_threads(ncpuids);
-	free(cpuids);
-	fprintf(stderr, "%p: %s <--\n", (void*)pthread_self(), __func__);
-	return;
-}
-
-void noop(void * buffers[], void * cl_arg)
-{
-	(void)buffers;
-	(void)cl_arg;
-}
-
-static struct starpu_codelet init_parallel_worker_cl=
-{
-	.cpu_funcs = {noop},
-	.nbuffers = 0,
-	.name = "init_parallel_worker"
-};
-
-/* function called to initialize the parallel "workers" */
-void parallel_task_init_one_context(unsigned * context_id)
-{
-	struct starpu_task * t;
-	int ret;
-
-	t = starpu_task_build(&init_parallel_worker_cl,
-			      STARPU_SCHED_CTX, *context_id,
-			      0);
-	t->destroy = 1;
-	t->prologue_callback_pop_func=parallel_task_prologue_init_once_and_for_all;
-	if (t->prologue_callback_pop_arg_free)
-		free(t->prologue_callback_pop_arg);
-	t->prologue_callback_pop_arg=context_id;
-	t->prologue_callback_pop_arg_free=0;
-
-	ret = starpu_task_submit(t);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
-struct context main_context;
-struct context *contexts;
-void parallel_task_init()
-{
-	/* Context creation */
-	main_context.ncpus = starpu_cpu_worker_get_count();
-	main_context.cpus = (int *) malloc(main_context.ncpus*sizeof(int));
-	fprintf(stderr, "ncpus : %d \n",main_context.ncpus);
-
-	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, main_context.cpus, main_context.ncpus);
-
-	main_context.id = starpu_sched_ctx_create(main_context.cpus,
-						  main_context.ncpus,"main_ctx",
-						  STARPU_SCHED_CTX_POLICY_NAME,"prio",
-						  0);
-
-	/* Initialize nested contexts */
-	contexts = malloc(sizeof(struct context)*N_NESTED_CTXS);
-	int cpus_per_context = main_context.ncpus/N_NESTED_CTXS;
-	int i;
-	for(i = 0; i < N_NESTED_CTXS; i++)
-	{
-		contexts[i].ncpus = cpus_per_context;
-		if (i == N_NESTED_CTXS-1)
-			contexts[i].ncpus += main_context.ncpus%N_NESTED_CTXS;
-		contexts[i].cpus = main_context.cpus+i*cpus_per_context;
-	}
-
-	for(i = 0; i < N_NESTED_CTXS; i++)
-		contexts[i].id = starpu_sched_ctx_create(contexts[i].cpus,
-							 contexts[i].ncpus,"nested_ctx",
-							 STARPU_SCHED_CTX_NESTED,main_context.id,
-							 0);
-
-	for (i = 0; i < N_NESTED_CTXS; i++)
-	{
-		parallel_task_init_one_context(&contexts[i].id);
-	}
-
-	starpu_task_wait_for_all();
-	starpu_sched_ctx_set_context(&main_context.id);
-}
-
-void parallel_task_deinit()
-{
-	int i;
-	for (i=0; i<N_NESTED_CTXS;i++)
-		starpu_sched_ctx_delete(contexts[i].id);
-	free(contexts);
-	free(main_context.cpus);
-}
-
-/* Codelet SUM */
-static void sum_cpu(void * descr[], void *cl_arg)
-{
-	(void)cl_arg;
-	double *v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]);
-	double *v_src0 = (double *) STARPU_VECTOR_GET_PTR(descr[1]);
-	double *v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[2]);
-	int size = STARPU_VECTOR_GET_NX(descr[0]);
-
-	int i, k;
-	for (k=0;k<LOOPS;k++)
-	{
-#pragma omp parallel for
-		for (i=0; i<size; i++)
-		{
-			v_dst[i]+=v_src0[i]+v_src1[i];
-		}
-	}
-}
-
-static struct starpu_codelet sum_cl =
-{
-	.cpu_funcs = {sum_cpu, NULL},
-	.nbuffers = 3,
-	.modes={STARPU_RW,STARPU_R, STARPU_R}
-};
-
-int main(void)
-{
-	int ntasks = NTASKS;
-	int ret, j, k;
-	unsigned ncpus = 0;
-
-	ret = starpu_init(NULL);
-	if (ret == -ENODEV)
-		return 77;
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	if (starpu_cpu_worker_get_count() < N_NESTED_CTXS)
-	{
-		starpu_shutdown();
-		return 77;
-	}
-
-	parallel_task_init();
-
-	/* Data preparation */
-	double array1[SIZE];
-	double array2[SIZE];
-
-	memset(array1, 0, sizeof(double));
-	int i;
-	for (i=0;i<SIZE;i++)
-	{
-		array2[i]=i*2;
-	}
-
-	starpu_data_handle_t handle1;
-	starpu_data_handle_t handle2;
-
-	starpu_vector_data_register(&handle1, 0, (uintptr_t)array1, SIZE, sizeof(double));
-	starpu_vector_data_register(&handle2, 0, (uintptr_t)array2, SIZE, sizeof(double));
-
-	for (i = 0; i < ntasks; i++)
-	{
-		struct starpu_task * t;
-		t=starpu_task_build(&sum_cl,
-				    STARPU_RW,handle1,
-				    STARPU_R,handle2,
-				    STARPU_R,handle1,
-				    STARPU_SCHED_CTX, main_context.id,
-				    0);
-		t->destroy = 1;
-		t->possibly_parallel = 1;
-
-		ret=starpu_task_submit(t);
-		if (ret == -ENODEV)
-			goto out;
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-	}
-
-
-
-out:
-	/* wait for all tasks at the end*/
-	starpu_task_wait_for_all();
-
-	starpu_data_unregister(handle1);
-	starpu_data_unregister(handle2);
-	parallel_task_deinit();
-
-	starpu_shutdown();
-	return 0;
-}
+
+#include <starpu.h>
+#include <omp.h>
+#include <pthread.h>
+
+#ifdef STARPU_QUICK_CHECK
+#define NTASKS 64
+#define SIZE   40
+#define LOOPS  4
+#else
+#define NTASKS 100
+#define SIZE   400
+#define LOOPS  10
+#endif
+
+#define N_NESTED_CTXS 2
+
+struct context
+{
+	int ncpus;
+	int *cpus;
+	unsigned id;
+};
+
+/* Helper for the task that will initiate everything */
+void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
+{
+	fprintf(stderr, "%p: %s -->\n", (void*)pthread_self(), __func__);
+	int sched_ctx = *(int *)sched_ctx_;
+	int *cpuids = NULL;
+	int ncpuids = 0;
+	starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
+
+#pragma omp parallel num_threads(ncpuids)
+	{
+		starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
+	}
+
+	omp_set_num_threads(ncpuids);
+	free(cpuids);
+	fprintf(stderr, "%p: %s <--\n", (void*)pthread_self(), __func__);
+	return;
+}
+
+void noop(void * buffers[], void * cl_arg)
+{
+	(void)buffers;
+	(void)cl_arg;
+}
+
+static struct starpu_codelet init_parallel_worker_cl=
+{
+	.cpu_funcs = {noop},
+	.nbuffers = 0,
+	.name = "init_parallel_worker"
+};
+
+/* function called to initialize the parallel "workers" */
+void parallel_task_init_one_context(unsigned * context_id)
+{
+	struct starpu_task * t;
+	int ret;
+
+	t = starpu_task_build(&init_parallel_worker_cl,
+			      STARPU_SCHED_CTX, *context_id,
+			      0);
+	t->destroy = 1;
+	t->prologue_callback_pop_func=parallel_task_prologue_init_once_and_for_all;
+	if (t->prologue_callback_pop_arg_free)
+		free(t->prologue_callback_pop_arg);
+	t->prologue_callback_pop_arg=context_id;
+	t->prologue_callback_pop_arg_free=0;
+
+	ret = starpu_task_submit(t);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+}
+
+struct context main_context;
+struct context *contexts;
+void parallel_task_init()
+{
+	/* Context creation */
+	main_context.ncpus = starpu_cpu_worker_get_count();
+	main_context.cpus = (int *) malloc(main_context.ncpus*sizeof(int));
+	fprintf(stderr, "ncpus : %d \n",main_context.ncpus);
+
+	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, main_context.cpus, main_context.ncpus);
+
+	main_context.id = starpu_sched_ctx_create(main_context.cpus,
+						  main_context.ncpus,"main_ctx",
+						  STARPU_SCHED_CTX_POLICY_NAME,"prio",
+						  0);
+
+	/* Initialize nested contexts */
+	contexts = malloc(sizeof(struct context)*N_NESTED_CTXS);
+	int cpus_per_context = main_context.ncpus/N_NESTED_CTXS;
+	int i;
+	for(i = 0; i < N_NESTED_CTXS; i++)
+	{
+		contexts[i].ncpus = cpus_per_context;
+		if (i == N_NESTED_CTXS-1)
+			contexts[i].ncpus += main_context.ncpus%N_NESTED_CTXS;
+		contexts[i].cpus = main_context.cpus+i*cpus_per_context;
+	}
+
+	for(i = 0; i < N_NESTED_CTXS; i++)
+		contexts[i].id = starpu_sched_ctx_create(contexts[i].cpus,
+							 contexts[i].ncpus,"nested_ctx",
+							 STARPU_SCHED_CTX_NESTED,main_context.id,
+							 0);
+
+	for (i = 0; i < N_NESTED_CTXS; i++)
+	{
+		parallel_task_init_one_context(&contexts[i].id);
+	}
+
+	starpu_task_wait_for_all();
+	starpu_sched_ctx_set_context(&main_context.id);
+}
+
+void parallel_task_deinit()
+{
+	int i;
+	for (i=0; i<N_NESTED_CTXS;i++)
+		starpu_sched_ctx_delete(contexts[i].id);
+	free(contexts);
+	free(main_context.cpus);
+}
+
+/* Codelet SUM */
+static void sum_cpu(void * descr[], void *cl_arg)
+{
+	(void)cl_arg;
+	double *v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]);
+	double *v_src0 = (double *) STARPU_VECTOR_GET_PTR(descr[1]);
+	double *v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[2]);
+	int size = STARPU_VECTOR_GET_NX(descr[0]);
+
+	int i, k;
+	for (k=0;k<LOOPS;k++)
+	{
+#pragma omp parallel for
+		for (i=0; i<size; i++)
+		{
+			v_dst[i]+=v_src0[i]+v_src1[i];
+		}
+	}
+}
+
+static struct starpu_codelet sum_cl =
+{
+	.cpu_funcs = {sum_cpu, NULL},
+	.nbuffers = 3,
+	.modes={STARPU_RW,STARPU_R, STARPU_R}
+};
+
+int main(void)
+{
+	int ntasks = NTASKS;
+	int ret, j, k;
+	unsigned ncpus = 0;
+
+	ret = starpu_init(NULL);
+	if (ret == -ENODEV)
+		return 77;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	if (starpu_cpu_worker_get_count() < N_NESTED_CTXS)
+	{
+		starpu_shutdown();
+		return 77;
+	}
+
+	parallel_task_init();
+
+	/* Data preparation */
+	double array1[SIZE];
+	double array2[SIZE];
+
+	memset(array1, 0, sizeof(double));
+	int i;
+	for (i=0;i<SIZE;i++)
+	{
+		array2[i]=i*2;
+	}
+
+	starpu_data_handle_t handle1;
+	starpu_data_handle_t handle2;
+
+	starpu_vector_data_register(&handle1, 0, (uintptr_t)array1, SIZE, sizeof(double));
+	starpu_vector_data_register(&handle2, 0, (uintptr_t)array2, SIZE, sizeof(double));
+
+	for (i = 0; i < ntasks; i++)
+	{
+		struct starpu_task * t;
+		t=starpu_task_build(&sum_cl,
+				    STARPU_RW,handle1,
+				    STARPU_R,handle2,
+				    STARPU_R,handle1,
+				    STARPU_SCHED_CTX, main_context.id,
+				    0);
+		t->destroy = 1;
+		t->possibly_parallel = 1;
+
+		ret=starpu_task_submit(t);
+		if (ret == -ENODEV)
+			goto out;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	}
+
+
+
+out:
+	/* wait for all tasks at the end*/
+	starpu_task_wait_for_all();
+
+	starpu_data_unregister(handle1);
+	starpu_data_unregister(handle2);
+	parallel_task_deinit();
+
+	starpu_shutdown();
+	return 0;
+}

+ 2 - 1
include/starpu_config.h.in

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2012,2014,2016-2017                 Inria
- * Copyright (C) 2009-2017                                Université de Bordeaux
+ * Copyright (C) 2009-2018                                Université de Bordeaux
  * Copyright (C) 2010-2017                                CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -61,6 +61,7 @@
 
 #undef STARPU_ATLAS
 #undef STARPU_GOTO
+#undef STARPU_OPENBLAS
 #undef STARPU_MKL
 #undef STARPU_SYSTEM_BLAS
 

+ 6 - 5
include/starpu_task_util.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2013-2014                                Inria
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2010-2015, 2018                          Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -77,16 +77,17 @@ void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, s
 
 void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...);
 
-struct starpu_codelet_pack_arg {
+struct starpu_codelet_pack_arg_data
+{
 	char *arg_buffer;
 	size_t arg_buffer_size;
 	size_t current_offset;
 	int nargs;
 };
 
-void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg *state);
-void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg *state, const void *ptr, size_t ptr_size);
-void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg *state, void **cl_arg, size_t *cl_arg_size);
+void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg_data *state);
+void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg_data *state, const void *ptr, size_t ptr_size);
+void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg_data *state, void **cl_arg, size_t *cl_arg_size);
 
 void starpu_codelet_unpack_args(void *cl_arg, ...);
 void starpu_codelet_unpack_args_and_copyleft(void *cl_arg, void *buffer, size_t buffer_size, ...);

+ 2 - 1
include/starpu_util.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2012,2017                           Inria
- * Copyright (C) 2008-2017                                Université de Bordeaux
+ * Copyright (C) 2008-2018                                Université de Bordeaux
  * Copyright (C) 2010-2017                                CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -323,6 +323,7 @@ STARPU_ATOMIC_SOMETHINGL(or, old | value)
 #define STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value) (starpu_cmpxchg((ptr), (old), (value)))
 #endif
 
+/* Returns the previous value */
 #ifdef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET
 #define STARPU_TEST_AND_SET(ptr, value) (__sync_lock_test_and_set ((ptr), (value)))
 #define STARPU_RELEASE(ptr) (__sync_lock_release ((ptr)))

+ 3 - 2
mpi/examples/mpi_lu/plu_example.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
- * Copyright (C) 2010-2013,2015-2017                      CNRS
+ * Copyright (C) 2010-2013,2015-2018                      CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -37,6 +37,7 @@ static unsigned check = 0;
 static int p = 1;
 static int q = 1;
 static unsigned display = 0;
+static unsigned no_prio = 0;
 
 #ifdef STARPU_HAVE_LIBNUMA
 static unsigned numa = 0;
@@ -509,7 +510,7 @@ int main(int argc, char **argv)
 	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
 	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
 
-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
+	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio);
 
 	/*
 	 * 	Report performance

+ 3 - 2
mpi/examples/mpi_lu/plu_implicit_example.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -37,6 +37,7 @@ static unsigned check = 0;
 static int p = 1;
 static int q = 1;
 static unsigned display = 0;
+static unsigned no_prio = 0;
 
 #ifdef STARPU_HAVE_LIBNUMA
 static unsigned numa = 0;
@@ -301,7 +302,7 @@ int main(int argc, char **argv)
 		free(y);
 	}
 
-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
+	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio);
 
 	/*
 	 * 	Report performance

+ 3 - 2
mpi/examples/mpi_lu/plu_outofcore_example.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2012-2014                                Inria
  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -40,6 +40,7 @@ static unsigned check = 0;
 static int p = 1;
 static int q = 1;
 static unsigned display = 0;
+static unsigned no_prio = 0;
 static char *path = "./starpu-ooc-files";
 
 #ifdef STARPU_HAVE_LIBNUMA
@@ -329,7 +330,7 @@ int main(int argc, char **argv)
 		free(y);
 	}
 
-	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
+	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio);
 
 	/*
 	 * 	Report performance

+ 3 - 2
mpi/examples/mpi_lu/pxlu.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2012,2017                                Inria
  * Copyright (C) 2010-2011,2014,2017                      Université de Bordeaux
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -866,7 +866,7 @@ static void wait_termination(void)
  *	code to bootstrap the factorization
  */
 
-double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
+double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size, unsigned _no_prio)
 {
 	double start;
 	double end;
@@ -874,6 +874,7 @@ double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
 	nblocks = _nblocks;
 	rank = _rank;
 	world_size = _world_size;
+	no_prio = _no_prio;
 
 	/* create all the DAG nodes */
 	unsigned i,j,k;

+ 2 - 2
mpi/examples/mpi_lu/pxlu.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2011,2014                           Université de Bordeaux
- * Copyright (C) 2010-2012,2014-2015,2017                 CNRS
+ * Copyright (C) 2010-2012,2014-2015,2017,2018            CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -38,7 +38,7 @@ struct debug_info
 	unsigned k;
 };
 
-double STARPU_PLU(plu_main)(unsigned nblocks, int rank, int world_size);
+double STARPU_PLU(plu_main)(unsigned nblocks, int rank, int world_size, unsigned no_prio);
 
 TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks);
 void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved);

+ 3 - 3
mpi/examples/mpi_lu/pxlu_implicit.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2018                      CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -26,7 +26,6 @@
 //#define DEBUG	1
 
 static unsigned no_prio = 0;
-
 static unsigned nblocks = 0;
 static int rank = -1;
 static int world_size = -1;
@@ -120,7 +119,7 @@ static void create_task_22(unsigned k, unsigned i, unsigned j)
  *	code to bootstrap the factorization 
  */
 
-double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
+double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size, unsigned _no_prio)
 {
 	double start;
 	double end;
@@ -128,6 +127,7 @@ double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
 	nblocks = _nblocks;
 	rank = _rank;
 	world_size = _world_size;
+	no_prio = _no_prio;
 
 	/* create all the DAG nodes */
 	unsigned i,j,k;

+ 9 - 1
mpi/examples/mpi_lu/pxlu_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012                                     Inria
- * Copyright (C) 2010-2015                                Université de Bordeaux
+ * Copyright (C) 2010-2015, 2018                          Université de Bordeaux
  * Copyright (C) 2010-2012,2015,2017                      CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -106,6 +106,8 @@ static struct starpu_perfmodel STARPU_PLU(model_22) =
 	.symbol = STARPU_PLU_STR(lu_model_22_atlas)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_PLU_STR(lu_model_22_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_PLU_STR(lu_model_22_openblas)
 #else
 	.symbol = STARPU_PLU_STR(lu_model_22)
 #endif
@@ -218,6 +220,8 @@ static struct starpu_perfmodel STARPU_PLU(model_12) =
 	.symbol = STARPU_PLU_STR(lu_model_12_atlas)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_PLU_STR(lu_model_12_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_PLU_STR(lu_model_12_openblas)
 #else
 	.symbol = STARPU_PLU_STR(lu_model_12)
 #endif
@@ -331,6 +335,8 @@ static struct starpu_perfmodel STARPU_PLU(model_21) =
 	.symbol = STARPU_PLU_STR(lu_model_21_atlas)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_PLU_STR(lu_model_21_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_PLU_STR(lu_model_21_openblas)
 #else
 	.symbol = STARPU_PLU_STR(lu_model_21)
 #endif
@@ -441,6 +447,8 @@ static struct starpu_perfmodel STARPU_PLU(model_11) =
 	.symbol = STARPU_PLU_STR(lu_model_11_atlas)
 #elif defined(STARPU_GOTO)
 	.symbol = STARPU_PLU_STR(lu_model_11_goto)
+#elif defined(STARPU_OPENBLAS)
+	.symbol = STARPU_PLU_STR(lu_model_11_openblas)
 #else
 	.symbol = STARPU_PLU_STR(lu_model_11)
 #endif

+ 3 - 1
mpi/src/Makefile.am

@@ -2,7 +2,7 @@
 #
 # Copyright (C) 2012                                     Inria
 # Copyright (C) 2010-2017                                CNRS
-# Copyright (C) 2009-2014                                Université de Bordeaux
+# Copyright (C) 2009-2014, 2018                                Université de Bordeaux
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -78,6 +78,8 @@ noinst_HEADERS =					\
 
 libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 	starpu_mpi.c					\
+	starpu_mpi_req.c				\
+	starpu_mpi_coop_sends.c				\
 	starpu_mpi_helper.c				\
 	starpu_mpi_datatype.c				\
 	starpu_mpi_task_insert.c			\

+ 2 - 2
mpi/src/mpi/starpu_mpi_comm.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2017                                     Guillaume Beauchamp
- * Copyright (C) 2011-2017                                CNRS
+ * Copyright (C) 2011-2018                                CNRS
  * Copyright (C) 2014,2017                                Inria
  * Copyright (C) 2011-2017                                Université de Bordeaux
  *
@@ -81,7 +81,7 @@ void _starpu_mpi_comm_shutdown()
 	}
 	free(_starpu_mpi_comms);
 
-	struct _starpu_mpi_comm_hashtable *entry, *tmp;
+	struct _starpu_mpi_comm_hashtable *entry=NULL, *tmp=NULL;
 	HASH_ITER(hh, _starpu_mpi_comms_cache, entry, tmp)
 	{
 		HASH_DEL(_starpu_mpi_comms_cache, entry);

+ 30 - 143
mpi/src/mpi/starpu_mpi_mpi.c

@@ -50,11 +50,7 @@ static unsigned nready_process;
 /* Number of send requests to submit to MPI at the same time */
 static unsigned ndetached_send;
 
-static int mpi_thread_cpuid = -1;
-static int use_prio = 1;
-
 static void _starpu_mpi_add_sync_point_in_fxt(void);
-static void _starpu_mpi_submit_ready_request(void *arg);
 static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req);
 static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req);
 #ifdef STARPU_MPI_VERBOSE
@@ -87,8 +83,6 @@ static int wait_counter;
 static starpu_pthread_cond_t wait_counter_cond;
 static starpu_pthread_mutex_t wait_counter_mutex;
 #endif
-int _starpu_mpi_fake_world_size = -1;
-int _starpu_mpi_fake_world_rank = -1;
 
 /* Count requests posted by the application and not yet submitted to MPI */
 static starpu_pthread_mutex_t mutex_posted_requests;
@@ -110,81 +104,6 @@ extern void smpi_process_set_user_data(void *);
 #endif
 #endif
 
-void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
-{
-	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
-
-	/* Initialize the request structure */
-	(*req)->data_handle = NULL;
-	(*req)->prio = 0;
-
-	(*req)->datatype = 0;
-	(*req)->datatype_name = NULL;
-	(*req)->ptr = NULL;
-	(*req)->count = -1;
-	(*req)->registered_datatype = -1;
-
-	(*req)->node_tag.rank = -1;
-	(*req)->node_tag.data_tag = -1;
-	(*req)->node_tag.comm = 0;
-
-	(*req)->func = NULL;
-
-	(*req)->status = NULL;
-	(*req)->data_request = 0;
-	(*req)->flag = NULL;
-
-	(*req)->ret = -1;
-	STARPU_PTHREAD_MUTEX_INIT(&((*req)->req_mutex), NULL);
-	STARPU_PTHREAD_COND_INIT(&((*req)->req_cond), NULL);
-	STARPU_PTHREAD_MUTEX_INIT(&((*req)->posted_mutex), NULL);
-	STARPU_PTHREAD_COND_INIT(&((*req)->posted_cond), NULL);
-
-	(*req)->request_type = UNKNOWN_REQ;
-
-	(*req)->submitted = 0;
-	(*req)->completed = 0;
-	(*req)->posted = 0;
-
-	(*req)->other_request = NULL;
-
-	(*req)->sync = 0;
-	(*req)->detached = -1;
-	(*req)->callback = NULL;
-	(*req)->callback_arg = NULL;
-
-	(*req)->size_req = 0;
-	(*req)->internal_req = NULL;
-	(*req)->is_internal_req = 0;
-	(*req)->to_destroy = 1;
-	(*req)->early_data_handle = NULL;
-	(*req)->envelope = NULL;
-	(*req)->sequential_consistency = 1;
-	(*req)->pre_sync_jobid = -1;
-	(*req)->post_sync_jobid = -1;
-
-#ifdef STARPU_SIMGRID
-	starpu_pthread_queue_init(&((*req)->queue));
-	starpu_pthread_queue_register(&wait, &((*req)->queue));
-	(*req)->done = 0;
-#endif
-}
-
-void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
-{
-	STARPU_PTHREAD_MUTEX_DESTROY(&req->req_mutex);
-	STARPU_PTHREAD_COND_DESTROY(&req->req_cond);
-	STARPU_PTHREAD_MUTEX_DESTROY(&req->posted_mutex);
-	STARPU_PTHREAD_COND_DESTROY(&req->posted_cond);
-	free(req->datatype_name);
-	req->datatype_name = NULL;
-#ifdef STARPU_SIMGRID
-	starpu_pthread_queue_unregister(&wait, &req->queue);
-	starpu_pthread_queue_destroy(&req->queue);
-#endif
-	free(req);
-}
-
  /********************************************************/
  /*                                                      */
  /*  Send/Receive functionalities                        */
@@ -205,7 +124,28 @@ void _starpu_mpi_submit_ready_request_inc(struct _starpu_mpi_req *req)
 	_starpu_mpi_submit_ready_request(req);
 }
 
-static void _starpu_mpi_submit_ready_request(void *arg)
+void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends)
+{
+	/* TODO: turn them into redirects & forwards */
+}
+
+void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_redirects, int submit_data)
+{
+	unsigned i, n = coop_sends->n;
+
+	/* Note: coop_sends might disappear very very soon after last request is submitted */
+	for (i = 0; i < n; i++)
+	{
+		if (coop_sends->reqs_array[i]->request_type == SEND_REQ && submit_data)
+		{
+			_STARPU_MPI_DEBUG(0, "cooperative sends %p sending to %d\n", coop_sends, coop_sends->reqs_array[i]->node_tag.rank);
+			_starpu_mpi_submit_ready_request(coop_sends->reqs_array[i]);
+		}
+		/* TODO: handle redirect requests */
+	}
+}
+
+void _starpu_mpi_submit_ready_request(void *arg)
 {
 	_STARPU_MPI_LOG_IN();
 	struct _starpu_mpi_req *req = arg;
@@ -346,58 +286,10 @@ static void nop_acquire_cb(void *arg)
 	starpu_data_release(arg);
 }
 
-struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
-						       int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
-						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
-						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
-						       enum starpu_data_access_mode mode,
-						       int sequential_consistency,
-						       int is_internal_req,
-						       starpu_ssize_t count)
+void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req STARPU_ATTRIBUTE_UNUSED)
 {
-	struct _starpu_mpi_req *req;
-
-	if (_starpu_mpi_fake_world_size != -1)
-	{
-		/* Don't actually do the communication */
-		starpu_data_acquire_on_node_cb_sequential_consistency(data_handle, STARPU_MAIN_RAM, mode, nop_acquire_cb, data_handle, sequential_consistency);
-		return NULL;
-	}
-
-	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
-
-	_starpu_mpi_comm_register(comm);
-
-	/* Initialize the request structure */
-	_starpu_mpi_request_init(&req);
-	req->request_type = request_type;
-	/* prio_list is sorted by increasing values */
-	if (use_prio)
-		req->prio = prio;
-	req->data_handle = data_handle;
-	req->node_tag.rank = srcdst;
-	req->node_tag.data_tag = data_tag;
-	req->node_tag.comm = comm;
-	req->detached = detached;
-	req->sync = sync;
-	req->callback = callback;
-	req->callback_arg = arg;
-	req->func = func;
-	req->sequential_consistency = sequential_consistency;
-	req->is_internal_req = is_internal_req;
-	/* For internal requests, we wait for both the request completion and the matching application request completion */
-	req->to_destroy = !is_internal_req;
-	req->count = count;
-
-	/* Asynchronously request StarPU to fetch the data in main memory: when
-	 * it is available in main memory, _starpu_mpi_submit_ready_request(req) is called and
-	 * the request is actually submitted */
-	starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_submit_ready_request, (void *)req, sequential_consistency, &req->pre_sync_jobid, &req->post_sync_jobid);
-
-	_STARPU_MPI_LOG_OUT();
-	return req;
- }
+}
 
 #ifdef STARPU_SIMGRID
 int _starpu_mpi_simgrid_mpi_test(unsigned *done, int *flag)
@@ -935,8 +827,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 		_STARPU_MPI_TRACE_TERMINATED(req, req->node_tag.rank, req->node_tag.data_tag);
 	}
 
-	if (req->data_handle)
-		starpu_data_release(req->data_handle);
+	_starpu_mpi_release_req_data(req);
 
 	if (req->envelope)
 	{
@@ -1224,16 +1115,15 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	starpu_pthread_setname("MPI");
 
 #ifndef STARPU_SIMGRID
-	if (mpi_thread_cpuid >= 0)
-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
+	if (_starpu_mpi_thread_cpuid >= 0)
+		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
 	_starpu_mpi_do_initialize(argc_argv);
-	if (mpi_thread_cpuid >= 0)
+	if (_starpu_mpi_thread_cpuid >= 0)
 		/* In case MPI changed the binding */
-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
+		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
 #endif
 
-	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
-	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
+	_starpu_mpi_env_init();
 
 #ifdef STARPU_SIMGRID
 	/* Now that MPI is set up, let the rest of simgrid get initialized */
@@ -1578,11 +1468,8 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv)
         STARPU_PTHREAD_MUTEX_INIT(&mutex_posted_requests, NULL);
         STARPU_PTHREAD_MUTEX_INIT(&mutex_ready_requests, NULL);
 
-        _starpu_mpi_comm_debug = starpu_getenv("STARPU_MPI_COMM") != NULL;
 	nready_process = starpu_get_env_number_default("STARPU_MPI_NREADY_PROCESS", 10);
 	ndetached_send = starpu_get_env_number_default("STARPU_MPI_NDETACHED_SEND", 10);
-	mpi_thread_cpuid = starpu_get_env_number_default("STARPU_MPI_THREAD_CPUID", -1);
-	use_prio = starpu_get_env_number_default("STARPU_MPI_PRIORITIES", 1);
 
 #ifdef STARPU_SIMGRID
 	STARPU_PTHREAD_MUTEX_INIT(&wait_counter_mutex, NULL);

+ 35 - 133
mpi/src/nmad/starpu_mpi_nmad.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2017                                     Inria
  * Copyright (C) 2017                                     Guillaume Beauchamp
- * Copyright (C) 2010-2015,2017                           CNRS
+ * Copyright (C) 2010-2015,2017,2018                      CNRS
  * Copyright (C) 2009-2014,2017-2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -39,20 +39,15 @@
 #include <nm_sendrecv_interface.h>
 #include <nm_mpi_nmad.h>
 
+
 static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,nm_sr_event_t event);
 #ifdef STARPU_VERBOSE
 static char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type);
 #endif
-static void _starpu_mpi_handle_new_request(void *arg);
 
 static void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req);
 static void _starpu_mpi_add_sync_point_in_fxt(void);
 
-static int mpi_thread_cpuid = -1;
-static int use_prio = 1;
-int _starpu_mpi_fake_world_size = -1;
-int _starpu_mpi_fake_world_rank = -1;
-
 /* Condition to wake up waiting for all current MPI requests to finish */
 static starpu_pthread_t progress_thread;
 static starpu_pthread_cond_t progress_cond;
@@ -72,74 +67,6 @@ static callback_lfstack_t callback_stack = NULL;
 
 static starpu_sem_t callback_sem;
 
-void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
-{
-	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
-
-	/* Initialize the request structure */
-	(*req)->data_handle = NULL;
-	(*req)->prio = 0;
-	(*req)->completed = 0;
-
-	(*req)->datatype = 0;
-	(*req)->datatype_name = NULL;
-	(*req)->ptr = NULL;
-	(*req)->count = -1;
-	(*req)->registered_datatype = -1;
-
-	(*req)->node_tag.rank = -1;
-	(*req)->node_tag.data_tag = -1;
-	(*req)->node_tag.comm = 0;
-
-	(*req)->func = NULL;
-
-	(*req)->status = NULL;
-	//	(*req)->data_request = 0;
-	(*req)->flag = NULL;
-
-	(*req)->ret = -1;
-	piom_cond_init(&((*req)->req_cond), 0);
-	//STARPU_PTHREAD_MUTEX_INIT(&((*req)->req_mutex), NULL);
-	//STARPU_PTHREAD_COND_INIT(&((*req)->req_cond), NULL);
-	//	STARPU_PTHREAD_MUTEX_INIT(&((*req)->posted_mutex), NULL);
-	//STARPU_PTHREAD_COND_INIT(&((*req)->posted_cond), NULL);
-
-	(*req)->request_type = UNKNOWN_REQ;
-
-	(*req)->submitted = 0;
-	(*req)->completed = 0;
-	(*req)->posted = 0;
-
-	//(*req)->other_request = NULL;
-
-	(*req)->sync = 0;
-	(*req)->detached = -1;
-	(*req)->callback = NULL;
-	(*req)->callback_arg = NULL;
-
-	//	(*req)->size_req = 0;
-	//(*req)->internal_req = NULL;
-	//(*req)->is_internal_req = 0;
-	//(*req)->to_destroy = 1;
-	//(*req)->early_data_handle = NULL;
-	//(*req)->envelope = NULL;
-	(*req)->sequential_consistency = 1;
-	(*req)->pre_sync_jobid = -1;
-	(*req)->post_sync_jobid = -1;
-
-#ifdef STARPU_SIMGRID
-	starpu_pthread_queue_init(&((*req)->queue));
-	starpu_pthread_queue_register(&wait, &((*req)->queue));
-	(*req)->done = 0;
-#endif
-}
-
-void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
-{
-	piom_cond_destroy(&(req->req_cond));
-	free(req);
-}
-
 /********************************************************/
 /*                                                      */
 /*  Send/Receive functionalities                        */
@@ -151,53 +78,9 @@ static void nop_acquire_cb(void *arg)
 	starpu_data_release(arg);
 }
 
-struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
-						       int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
-						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
-						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
-						       enum starpu_data_access_mode mode,
-						       int sequential_consistency,
-						       int is_internal_req,
-						       starpu_ssize_t count)
+void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req STARPU_ATTRIBUTE_UNUSED)
 {
-
-	struct _starpu_mpi_req *req;
-
-	if (_starpu_mpi_fake_world_size != -1)
-	{
-		/* Don't actually do the communication */
-		starpu_data_acquire_on_node_cb_sequential_consistency(data_handle, STARPU_MAIN_RAM, mode, nop_acquire_cb, data_handle, sequential_consistency);
-		return NULL;
-	}
-
-	_STARPU_MPI_LOG_IN();
 	STARPU_ATOMIC_ADD( &pending_request, 1);
-
-	/* Initialize the request structure */
-	_starpu_mpi_request_init(&req);
-	req->request_type = request_type;
-	/* prio_list is sorted by increasing values */
-	if (use_prio)
-		req->prio = prio;
-	req->data_handle = data_handle;
-	req->node_tag.rank = srcdst;
-	req->node_tag.data_tag = data_tag;
-	req->node_tag.comm = comm;
-	req->detached = detached;
-	req->sync = sync;
-	req->callback = callback;
-	req->callback_arg = arg;
-	req->func = func;
-	req->sequential_consistency = sequential_consistency;
-	nm_mpi_nmad_dest(&req->session, &req->gate, comm, req->node_tag.rank);
-
-	/* Asynchronously request StarPU to fetch the data in main memory: when
-	 * it is available in main memory, _starpu_mpi_submit_new_mpi_request(req) is called and
-	 * the request is actually submitted */
-	starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_handle_new_request, (void *)req, sequential_consistency, &req->pre_sync_jobid, &req->post_sync_jobid);
-
-	_STARPU_MPI_LOG_OUT();
-	return req;
 }
 
 /********************************************************/
@@ -505,7 +388,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,n
 		        nm_mpi_nmad_data_release(req->datatype);
 			_starpu_mpi_datatype_free(req->data_handle, &req->datatype);
 		}
-		starpu_data_release(req->data_handle);
+		_starpu_mpi_release_req_data(req);
 	}
 
 	/* Execute the specified callback, if any */
@@ -560,13 +443,34 @@ static void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req)
 	nm_sr_request_monitor(req->session, &(req->data_request), NM_SR_EVENT_FINALIZED,_starpu_mpi_handle_request_termination_callback);
 }
 
-static void _starpu_mpi_handle_new_request(void *arg)
+void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends)
+{
+	/* TODO: turn them into redirects & forwards */
+}
+
+void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_redirects, int submit_data)
+{
+	unsigned i, n = coop_sends->n;
+
+	/* Note: coop_sends might disappear very very soon after last request is submitted */
+	for (i = 0; i < n; i++)
+	{
+		if (coop_sends->reqs_array[i]->request_type == SEND_REQ && submit_data)
+		{
+			_STARPU_MPI_DEBUG(0, "cooperative sends %p sending to %d\n", coop_sends, coop_sends->reqs_array[i]->node_tag.rank);
+			_starpu_mpi_submit_ready_request(coop_sends->reqs_array[i]);
+		}
+		/* TODO: handle redirect requests */
+	}
+}
+
+void _starpu_mpi_submit_ready_request(void *arg)
 {
 	_STARPU_MPI_LOG_IN();
 	struct _starpu_mpi_req *req = arg;
 	STARPU_ASSERT_MSG(req, "Invalid request");
 
-	/* submit the request to MPI */
+	/* submit the request to MPI directly from submitter */
 	_STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
 			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
 	req->func(req);
@@ -581,16 +485,15 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	starpu_pthread_setname("MPI");
 
 #ifndef STARPU_SIMGRID
-	if (mpi_thread_cpuid >= 0)
-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
+	if (_starpu_mpi_thread_cpuid >= 0)
+		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
 	_starpu_mpi_do_initialize(argc_argv);
-	if (mpi_thread_cpuid >= 0)
+	if (_starpu_mpi_thread_cpuid >= 0)
 		/* In case MPI changed the binding */
-		_starpu_bind_thread_on_cpu(mpi_thread_cpuid, STARPU_NOWORKERID);
+		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
 #endif
 
-	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
-	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
+	_starpu_mpi_env_init();
 
 #ifdef STARPU_SIMGRID
 	/* Now that MPI is set up, let the rest of simgrid get initialized */
@@ -636,7 +539,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 		int err=0;
 
 		if(running || pending_request>0)
-		{/* shall we block ? */
+		{
+			/* shall we block ? */
 			err = starpu_sem_wait(&callback_sem);
 			//running pending_request can change while waiting
 		}
@@ -740,8 +644,6 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv)
 
 	starpu_sem_init(&callback_sem, 0, 0);
 	running = 0;
-	mpi_thread_cpuid = starpu_get_env_number_default("STARPU_MPI_THREAD_CPUID", -1);
-	use_prio = starpu_get_env_number_default("STARPU_MPI_PRIORITIES", 1);
 
 	STARPU_PTHREAD_CREATE(&progress_thread, NULL, _starpu_mpi_progress_thread_func, argc_argv);
 
@@ -753,7 +655,7 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv)
         return 0;
 }
 
-void _starpu_mpi_progress_shutdown(void *value)
+void _starpu_mpi_progress_shutdown(void **value)
 {
 	/* kill the progression thread */
         STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);

+ 58 - 16
mpi/src/starpu_mpi.c

@@ -1,8 +1,8 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012-2013,2016-2017                      Inria
- * Copyright (C) 2009-2017                                Université de Bordeaux
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2009-2018                                Université de Bordeaux
+ * Copyright (C) 2010-2018                                CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -41,18 +41,46 @@
 #include <mpi/starpu_mpi_tag.h>
 #endif
 
+static void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
+{
+	/* Asynchronously request StarPU to fetch the data in main memory: when
+	 * it is available in main memory, _starpu_mpi_submit_ready_request(req) is called and
+	 * the request is actually submitted */
+	starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_submit_ready_request, (void *)req, sequential_consistency, &req->pre_sync_jobid, &req->post_sync_jobid);
+}
+
 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
 							int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm,
 							unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
 							int sequential_consistency)
 {
-	return _starpu_mpi_isend_irecv_common(data_handle, dest, data_tag, comm, detached, sync, prio, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func,
+	if (_starpu_mpi_fake_world_size != -1)
+	{
+		/* Don't actually do the communication */
+		return NULL;
+	}
+
 #ifdef STARPU_MPI_PEDANTIC_ISEND
-					      STARPU_RW,
+	enum starpu_data_access_mode mode = STARPU_RW;
 #else
-					      STARPU_R,
+	enum starpu_data_access_mode mode = STARPU_R;
 #endif
+
+	struct _starpu_mpi_req *req = _starpu_mpi_request_fill(
+	                                      data_handle, dest, data_tag, comm, detached, sync, prio, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func,
 					      sequential_consistency, 0, 0);
+	_starpu_mpi_req_willpost(req);
+
+	if (_starpu_mpi_use_coop_sends && detached == 1 && sync == 0 && callback == NULL)
+	{
+		/* It's a send & forget send, we can perhaps optimize its distribution over several nodes */
+		_starpu_mpi_coop_send(data_handle, req, mode, sequential_consistency);
+		return req;
+	}
+
+	/* Post normally */
+	_starpu_mpi_isend_irecv_common(req, mode, sequential_consistency);
+	return req;
 }
 
 int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
@@ -147,7 +175,16 @@ int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starp
 
 struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count)
 {
-	return _starpu_mpi_isend_irecv_common(data_handle, source, data_tag, comm, detached, sync, 0, callback, arg, RECV_REQ, _starpu_mpi_irecv_size_func, STARPU_W, sequential_consistency, is_internal_req, count);
+	if (_starpu_mpi_fake_world_size != -1)
+	{
+		/* Don't actually do the communication */
+		return NULL;
+	}
+
+	struct _starpu_mpi_req *req = _starpu_mpi_request_fill(data_handle, source, data_tag, comm, detached, sync, 0, callback, arg, RECV_REQ, _starpu_mpi_irecv_size_func, sequential_consistency, is_internal_req, count);
+	_starpu_mpi_req_willpost(req);
+	_starpu_mpi_isend_irecv_common(req, STARPU_W, sequential_consistency);
+	return req;
 }
 
 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm)
@@ -221,14 +258,15 @@ void _starpu_mpi_data_clear(starpu_data_handle_t data_handle)
 #endif
 	_starpu_mpi_cache_data_clear(data_handle);
 	free(data_handle->mpi_data);
+	data_handle->mpi_data = NULL;
 }
 
-void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm)
+struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle)
 {
-	struct _starpu_mpi_data *mpi_data;
-	if (data_handle->mpi_data)
+	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
+	if (mpi_data)
 	{
-		mpi_data = data_handle->mpi_data;
+		STARPU_ASSERT(mpi_data->magic == 42);
 	}
 	else
 	{
@@ -237,16 +275,23 @@ void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_
 		mpi_data->node_tag.data_tag = -1;
 		mpi_data->node_tag.rank = -1;
 		mpi_data->node_tag.comm = MPI_COMM_WORLD;
+		_starpu_spin_init(&mpi_data->coop_lock);
 		data_handle->mpi_data = mpi_data;
-#if defined(STARPU_USE_MPI_MPI)
-		_starpu_mpi_tag_data_register(data_handle, data_tag);
-#endif
 		_starpu_mpi_cache_data_init(data_handle);
 		_starpu_data_set_unregister_hook(data_handle, _starpu_mpi_data_clear);
 	}
+	return mpi_data;
+}
+
+void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm)
+{
+	struct _starpu_mpi_data *mpi_data = _starpu_mpi_data_get(data_handle);
 
 	if (data_tag != -1)
 	{
+#if defined(STARPU_USE_MPI_MPI)
+		_starpu_mpi_tag_data_register(data_handle, data_tag);
+#endif
 		mpi_data->node_tag.data_tag = data_tag;
 	}
 	if (rank != -1)
@@ -371,9 +416,6 @@ void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle
 {
 	int size, i;
 	starpu_mpi_comm_size(comm, &size);
-#ifdef STARPU_DEVEL
-#warning TODO: use binary communication tree to optimize broadcast
-#endif
 	for (i = 0; i < size; i++)
 		starpu_mpi_get_data_on_node_detached(comm, data_handle, i, NULL, NULL);
 }

+ 3 - 1
mpi/src/starpu_mpi_cache.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2017                                CNRS
- * Copyright (C) 2011-2017                                Université de Bordeaux
+ * Copyright (C) 2011-2018                                Université de Bordeaux
  * Copyright (C) 2014                                     Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -371,6 +371,8 @@ static void _starpu_mpi_cache_flush_and_invalidate_nolock(MPI_Comm comm, starpu_
 
 void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)
 {
+	_starpu_mpi_data_flush(data_handle);
+
 	if (_starpu_cache_enabled == 0)
 		return;
 

+ 269 - 0
mpi/src/starpu_mpi_coop_sends.c

@@ -0,0 +1,269 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012-2013,2016-2017                      Inria
+ * Copyright (C) 2009-2018                                Université de Bordeaux
+ * Copyright (C) 2010-2018                                CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <starpu_mpi_private.h>
+#include <datawizard/coherency.h>
+
+/*
+ * One node sends the same data to several nodes. Gather them into a
+ * "coop_sends", which then has a global view of all the required sends, and can
+ * establish a diffusion tree by telling receiving nodes to retransmit what they
+ * received (forwards) to others, and to others that they will receive from the
+ * former (redirects).
+ */
+
+/* This is called after a request is finished processing, to release the data */
+void _starpu_mpi_release_req_data(struct _starpu_mpi_req *req)
+{
+	if (!req->data_handle)
+		return;
+
+	if (_starpu_mpi_req_multilist_queued_coop_sends(req))
+	{
+		struct _starpu_mpi_coop_sends *coop_sends = req->coop_sends_head;
+		struct _starpu_mpi_data *mpi_data = coop_sends->mpi_data;
+		int last;
+		_starpu_spin_lock(&mpi_data->coop_lock);
+		/* Part of a cooperative send, dequeue ourself from others */
+		_starpu_mpi_req_multilist_erase_coop_sends(&coop_sends->reqs, req);
+		last = _starpu_mpi_req_multilist_empty_coop_sends(&coop_sends->reqs);
+		_starpu_spin_unlock(&mpi_data->coop_lock);
+		if (last)
+		{
+			/* We were last, release data */
+			free(coop_sends->reqs_array);
+			free(coop_sends);
+			starpu_data_release(req->data_handle);
+		}
+	}
+	else
+	{
+		/* Trivial request */
+		starpu_data_release(req->data_handle);
+	}
+}
+
+/* Comparison function for getting qsort to put requests with high priority first */
+static int _starpu_mpi_reqs_prio_compare(const void *a, const void *b)
+{
+	const struct _starpu_mpi_req * const *ra = a;
+	const struct _starpu_mpi_req * const *rb = b;
+	return (*rb)->prio - (*ra)->prio;
+}
+
+/* Sort the requests by priority and build a diffusion tree. Actually does something only once per coop_sends bag. */
+static void _starpu_mpi_coop_sends_optimize(struct _starpu_mpi_coop_sends *coop_sends)
+{
+	if (coop_sends->n == 1)
+		/* Trivial case, don't optimize */
+		return;
+
+	_starpu_spin_lock(&coop_sends->lock);
+	if (!coop_sends->reqs_array)
+	{
+		unsigned n = coop_sends->n, i;
+		struct _starpu_mpi_req *cur;
+		struct _starpu_mpi_req **reqs;
+
+		_STARPU_MPI_DEBUG(0, "handling cooperative sends %p for %u neighbours\n", coop_sends, n);
+
+		/* Store them in an array */
+		_STARPU_CALLOC(reqs, n, sizeof(*reqs));
+		for (cur  = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs), i = 0;
+		     cur != _starpu_mpi_req_multilist_end_coop_sends(&coop_sends->reqs);
+		     cur  = _starpu_mpi_req_multilist_next_coop_sends(cur), i++)
+			reqs[i] = cur;
+		coop_sends->reqs_array = reqs;
+
+		/* Sort them */
+		qsort(reqs, n, sizeof(*reqs), _starpu_mpi_reqs_prio_compare);
+
+		/* And build the diffusion tree */
+		_starpu_mpi_coop_sends_build_tree(coop_sends);
+	}
+	_starpu_spin_unlock(&coop_sends->lock);
+}
+
+/* This is called on completion of acquisition of data for a cooperative send */
+static void _starpu_mpi_coop_sends_data_ready(void *arg)
+{
+	_STARPU_MPI_LOG_IN();
+	struct _starpu_mpi_coop_sends *coop_sends = arg;
+	struct _starpu_mpi_data *mpi_data = coop_sends->mpi_data;
+
+	/* Take the cooperative send bag out from more submissions */
+	if (mpi_data->coop_sends == coop_sends)
+	{
+		_starpu_spin_lock(&mpi_data->coop_lock);
+		if (mpi_data->coop_sends == coop_sends)
+			mpi_data->coop_sends = NULL;
+		_starpu_spin_unlock(&mpi_data->coop_lock);
+	}
+
+	/* Build diffusion tree */
+	_starpu_mpi_coop_sends_optimize(coop_sends);
+
+	if (coop_sends->n == 1)
+	{
+		/* Trivial case, just submit it */
+		_starpu_mpi_submit_ready_request(_starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs));
+	}
+	else
+	{
+		/* And submit them */
+		if (STARPU_TEST_AND_SET(&coop_sends->redirects_sent, 1) == 0)
+			_starpu_mpi_submit_coop_sends(coop_sends, 1, 1);
+		else
+			_starpu_mpi_submit_coop_sends(coop_sends, 0, 1);
+	}
+	_STARPU_MPI_LOG_OUT();
+}
+
+/* This is called when we want to stop including new members in a cooperative send,
+ * either because we know there won't be any other members due to the algorithm
+ * or because the value has changed.  */
+static void _starpu_mpi_coop_send_flush(struct _starpu_mpi_coop_sends *coop_sends)
+{
+	if (!coop_sends)
+		return;
+
+	/* Build diffusion tree */
+	_starpu_mpi_coop_sends_optimize(coop_sends);
+
+	if (coop_sends->n == 1)
+		/* Trivial case, we will just send the data */
+		return;
+
+	/* And submit them */
+	if (STARPU_TEST_AND_SET(&coop_sends->redirects_sent, 1) == 0)
+		_starpu_mpi_submit_coop_sends(coop_sends, 1, 0);
+}
+
+/* This is called when a write to the data was just submitted, which means we
+ * can't make future sends cooperate with past sends since it's not the same value
+ */
+void _starpu_mpi_data_flush(starpu_data_handle_t data_handle)
+{
+	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
+	struct _starpu_mpi_coop_sends *coop_sends;
+	if (!mpi_data)
+		return;
+
+	_starpu_spin_lock(&mpi_data->coop_lock);
+	coop_sends = mpi_data->coop_sends;
+	if (coop_sends)
+		mpi_data->coop_sends = NULL;
+	_starpu_spin_unlock(&mpi_data->coop_lock);
+	if (coop_sends)
+	{
+		_STARPU_MPI_DEBUG(0, "%p: data written to, flush cooperative sends %p\n", data_handle, coop_sends);
+		_starpu_mpi_coop_send_flush(coop_sends);
+	}
+}
+
+/* Test whether a request is compatible with a cooperative send */
+static int _starpu_mpi_coop_send_compatible(struct _starpu_mpi_req *req, struct _starpu_mpi_coop_sends *coop_sends)
+{
+	struct _starpu_mpi_req *prevreq;
+
+	prevreq = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs);
+	return /* we can cope with tag being different */
+	          prevreq->node_tag.comm == req->node_tag.comm
+	       && prevreq->sequential_consistency == req->sequential_consistency;
+}
+
+void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
+{
+	struct _starpu_mpi_data *mpi_data = _starpu_mpi_data_get(data_handle);
+	struct _starpu_mpi_coop_sends *coop_sends = NULL, *tofree = NULL;
+	int done = 0, queue, first = 1;
+
+	/* Try to add ourself to something existing, otherwise create one.  */
+	while (!done)
+	{
+		_starpu_spin_lock(&mpi_data->coop_lock);
+		if (mpi_data->coop_sends)
+		{
+			/* Already something, check we are coherent with it */
+			queue = _starpu_mpi_coop_send_compatible(req, mpi_data->coop_sends);
+			if (queue)
+			{
+				/* Yes, queue ourself there */
+				if (coop_sends)
+				{
+					/* Remove ourself from what we created for ourself first */
+					_starpu_mpi_req_multilist_erase_coop_sends(&coop_sends->reqs, req);
+					tofree = coop_sends;
+				}
+				coop_sends = mpi_data->coop_sends;
+				_STARPU_MPI_DEBUG(0, "%p: add to cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.rank);
+				_starpu_mpi_req_multilist_push_back_coop_sends(&coop_sends->reqs, req);
+				coop_sends->n++;
+				req->coop_sends_head = coop_sends;
+				first = 0;
+				done = 1;
+			}
+			else
+			{
+				/* Nope, incompatible, put ours instead */
+				_STARPU_MPI_DEBUG(0, "%p: new cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.rank);
+				mpi_data->coop_sends = coop_sends;
+				first = 1;
+				_starpu_spin_unlock(&mpi_data->coop_lock);
+				/* and flush it */
+				_starpu_mpi_coop_send_flush(coop_sends);
+				break;
+			}
+		}
+		else if (coop_sends)
+		{
+			/* Nobody else and we have allocated one, we're first! */
+			_STARPU_MPI_DEBUG(0, "%p: new cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.rank);
+			mpi_data->coop_sends = coop_sends;
+			first = 1;
+			done = 1;
+		}
+		_starpu_spin_unlock(&mpi_data->coop_lock);
+
+		if (!done && !coop_sends)
+		{
+			/* Didn't find something to join, create one out of critical section */
+			_STARPU_MPI_CALLOC(coop_sends, 1, sizeof(*coop_sends));
+			coop_sends->redirects_sent = 0;
+			coop_sends->n = 1;
+			_starpu_mpi_req_multilist_head_init_coop_sends(&coop_sends->reqs);
+			_starpu_mpi_req_multilist_push_back_coop_sends(&coop_sends->reqs, req);
+			_starpu_spin_init(&coop_sends->lock);
+			req->coop_sends_head = coop_sends;
+			coop_sends->mpi_data = mpi_data;
+		}
+		/* We at worse do two iteration */
+		STARPU_ASSERT(done || coop_sends);
+	}
+
+	/* In case we created one for nothing after all */
+	free(tofree);
+
+	if (first)
+	{
+		/* We were first, we are responsible for acquiring the data for everybody */
+		starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_coop_sends_data_ready, coop_sends, sequential_consistency, &req->pre_sync_jobid, NULL);
+	}
+}
+

+ 1 - 0
mpi/src/starpu_mpi_init.c

@@ -109,6 +109,7 @@ int _starpu_mpi_initialize(int *argc, char ***argv, int initialize_mpi, MPI_Comm
 	argc_argv->argc = argc;
 	argc_argv->argv = argv;
 	argc_argv->comm = comm;
+	_starpu_implicit_data_deps_write_hook(_starpu_mpi_data_flush);
 
 #ifdef STARPU_SIMGRID
 	/* Call MPI_Init_thread as early as possible, to initialize simgrid

+ 17 - 1
mpi/src/starpu_mpi_private.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2013,2015-2017                      CNRS
- * Copyright (C) 2010,2012,2014-2016                      Université de Bordeaux
+ * Copyright (C) 2010,2012,2014-2016,2018                 Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -23,6 +23,12 @@ int _starpu_debug_level_max=0;
 int _starpu_mpi_tag = 42;
 int _starpu_mpi_comm_debug;
 
+int _starpu_mpi_thread_cpuid = -1;
+int _starpu_mpi_use_prio = 1;
+int _starpu_mpi_fake_world_size = -1;
+int _starpu_mpi_fake_world_rank = -1;
+int _starpu_mpi_use_coop_sends = 1;
+
 void _starpu_mpi_set_debug_level_min(int level)
 {
 	_starpu_debug_level_min = level;
@@ -50,3 +56,13 @@ char *_starpu_mpi_get_mpi_error_code(int code)
 	MPI_Error_string(code, str, &len);
 	return str;
 }
+
+void _starpu_mpi_env_init(void)
+{
+        _starpu_mpi_comm_debug = starpu_getenv("STARPU_MPI_COMM") != NULL;
+	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
+	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
+	_starpu_mpi_thread_cpuid = starpu_get_env_number_default("STARPU_MPI_THREAD_CPUID", -1);
+	_starpu_mpi_use_prio = starpu_get_env_number_default("STARPU_MPI_PRIORITIES", 1);
+	_starpu_mpi_use_coop_sends = starpu_get_env_number_default("STARPU_MPI_COOP_SENDS", 1);
+}

+ 58 - 4
mpi/src/starpu_mpi_private.h

@@ -26,6 +26,7 @@
 #include <starpu_mpi_fxt.h>
 #include <common/list.h>
 #include <common/prio_list.h>
+#include <common/starpu_spinlock.h>
 #include <core/simgrid.h>
 #if defined(STARPU_USE_MPI_NMAD)
 #include <pioman.h>
@@ -66,6 +67,10 @@ void _starpu_mpi_set_debug_level_max(int level);
 #endif
 extern int _starpu_mpi_fake_world_size;
 extern int _starpu_mpi_fake_world_rank;
+extern int _starpu_mpi_use_prio;
+extern int _starpu_mpi_thread_cpuid;
+extern int _starpu_mpi_use_coop_sends;
+void _starpu_mpi_env_init(void);
 
 #ifdef STARPU_NO_ASSERT
 #  define STARPU_MPI_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); }} while(0)
@@ -194,14 +199,36 @@ struct _starpu_mpi_node_tag
 	starpu_mpi_tag_t data_tag;
 };
 
+MULTILIST_CREATE_TYPE(_starpu_mpi_req, coop_sends)
+/* One bag of cooperative sends */
+struct _starpu_mpi_coop_sends
+{
+	/* List of send requests */
+	struct _starpu_mpi_req_multilist_coop_sends reqs;
+	struct _starpu_mpi_data *mpi_data;
+
+	/* Array of send requests, after sorting out */
+	struct _starpu_spinlock lock;
+	struct _starpu_mpi_req **reqs_array;
+	unsigned n;
+	unsigned redirects_sent;
+};
+
+/* Initialized in starpu_mpi_data_register_comm */
 struct _starpu_mpi_data
 {
 	int magic;
 	struct _starpu_mpi_node_tag node_tag;
 	int *cache_sent;
 	int cache_received;
+
+	/* Rendez-vous data for opportunistic cooperative sends */
+	struct _starpu_spinlock coop_lock; /* Needed to synchronize between submit thread and workers */
+	struct _starpu_mpi_coop_sends *coop_sends; /* Current cooperative send bag */
 };
 
+struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle);
+
 struct _starpu_mpi_req;
 LIST_TYPE(_starpu_mpi_req,
 	/* description of the data at StarPU level */
@@ -232,6 +259,8 @@ LIST_TYPE(_starpu_mpi_req,
 #elif defined(STARPU_USE_MPI_MPI)
 	MPI_Request data_request;
 #endif
+	struct _starpu_mpi_req_multilist_coop_sends coop_sends;
+	struct _starpu_mpi_coop_sends *coop_sends_head;
 
 	int *flag;
 	unsigned sync;
@@ -290,17 +319,41 @@ LIST_TYPE(_starpu_mpi_req,
 );
 PRIO_LIST_TYPE(_starpu_mpi_req, prio)
 
-struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
+MULTILIST_CREATE_INLINES(struct _starpu_mpi_req, _starpu_mpi_req, coop_sends)
+
+/* To be called before actually queueing a request, so the communication layer knows it has something to look at */
+void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req);
+/* To be called to actually submit the request */
+void _starpu_mpi_submit_ready_request(void *arg);
+/* To be called when request is completed */
+void _starpu_mpi_release_req_data(struct _starpu_mpi_req *req);
+
+/* Build a communication tree. Called before _starpu_mpi_coop_send is ever called. coop_sends->lock is held. */
+void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends);
+/* Try to merge with send request with other send requests */
+void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency);
+
+/* Actually submit the coop_sends bag to MPI.
+ * At least one of submit_redirects or submit_data is true.
+ * _starpu_mpi_submit_coop_sends may be called either
+ * - just once with both parameters being true,
+ * - or once with submit_redirects being true (data is not available yet, but we
+ * can send the redirects), and a second time with submit_data being true. Or
+ * the converse, possibly on different threads, etc.
+ */
+void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_redirects, int submit_data);
+
+void _starpu_mpi_submit_ready_request_inc(struct _starpu_mpi_req *req);
+void _starpu_mpi_request_init(struct _starpu_mpi_req **req);
+struct _starpu_mpi_req * _starpu_mpi_request_fill(starpu_data_handle_t data_handle,
 						       int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
 						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
 						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
-						       enum starpu_data_access_mode mode,
 						       int sequential_consistency,
 						       int is_internal_req,
 						       starpu_ssize_t count);
 
-void _starpu_mpi_submit_ready_request_inc(struct _starpu_mpi_req *req);
-void _starpu_mpi_request_init(struct _starpu_mpi_req **req);
+
 void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req);
 void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req);
 void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req);
@@ -325,6 +378,7 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv);
 #ifdef STARPU_SIMGRID
 void _starpu_mpi_wait_for_initialization();
 #endif
+void _starpu_mpi_data_flush(starpu_data_handle_t data_handle);
 
 #ifdef __cplusplus
 }

+ 161 - 0
mpi/src/starpu_mpi_req.c

@@ -0,0 +1,161 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012-2013,2016-2017                      Inria
+ * Copyright (C) 2009-2018                                Université de Bordeaux
+ * Copyright (C) 2017                                     Guillaume Beauchamp
+ * Copyright (C) 2010-2018                                CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_mpi_private.h>
+#if defined(STARPU_USE_MPI_MPI)
+#include <mpi/starpu_mpi_comm.h>
+#endif
+#if defined(STARPU_USE_MPI_NMAD)
+#include <pioman.h>
+#include <nm_mpi_nmad.h>
+#endif
+
+void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
+{
+	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
+
+	/* Initialize the request structure */
+	(*req)->data_handle = NULL;
+	(*req)->prio = 0;
+
+	(*req)->datatype = 0;
+	(*req)->datatype_name = NULL;
+	(*req)->ptr = NULL;
+	(*req)->count = -1;
+	(*req)->registered_datatype = -1;
+
+	(*req)->node_tag.rank = -1;
+	(*req)->node_tag.data_tag = -1;
+	(*req)->node_tag.comm = 0;
+
+	(*req)->func = NULL;
+
+	(*req)->status = NULL;
+#ifdef STARPU_USE_MPI_MPI
+	(*req)->data_request = 0;
+#endif
+	(*req)->flag = NULL;
+	_starpu_mpi_req_multilist_init_coop_sends(*req);
+
+	(*req)->ret = -1;
+#ifdef STARPU_USE_MPI_NMAD
+	piom_cond_init(&((*req)->req_cond), 0);
+#elif defined(STARPU_USE_MPI_MPI)
+	STARPU_PTHREAD_MUTEX_INIT(&((*req)->req_mutex), NULL);
+	STARPU_PTHREAD_COND_INIT(&((*req)->req_cond), NULL);
+	STARPU_PTHREAD_MUTEX_INIT(&((*req)->posted_mutex), NULL);
+	STARPU_PTHREAD_COND_INIT(&((*req)->posted_cond), NULL);
+#endif
+
+	(*req)->request_type = UNKNOWN_REQ;
+
+	(*req)->submitted = 0;
+	(*req)->completed = 0;
+	(*req)->posted = 0;
+
+#ifdef STARPU_USE_MPI_MPI
+	(*req)->other_request = NULL;
+#endif
+
+	(*req)->sync = 0;
+	(*req)->detached = -1;
+	(*req)->callback = NULL;
+	(*req)->callback_arg = NULL;
+
+#ifdef STARPU_USE_MPI_MPI
+	(*req)->size_req = 0;
+	(*req)->internal_req = NULL;
+	(*req)->is_internal_req = 0;
+	(*req)->to_destroy = 1;
+	(*req)->early_data_handle = NULL;
+	(*req)->envelope = NULL;
+#endif
+	(*req)->sequential_consistency = 1;
+	(*req)->pre_sync_jobid = -1;
+	(*req)->post_sync_jobid = -1;
+
+#ifdef STARPU_SIMGRID
+	starpu_pthread_queue_init(&((*req)->queue));
+	starpu_pthread_queue_register(&wait, &((*req)->queue));
+	(*req)->done = 0;
+#endif
+}
+
+struct _starpu_mpi_req *_starpu_mpi_request_fill(starpu_data_handle_t data_handle,
+						 int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
+						 unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
+						 enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
+						 int sequential_consistency,
+						 int is_internal_req,
+						 starpu_ssize_t count)
+{
+	struct _starpu_mpi_req *req;
+
+#ifdef STARPU_USE_MPI_MPI
+	_starpu_mpi_comm_register(comm);
+#endif
+
+	/* Initialize the request structure */
+	_starpu_mpi_request_init(&req);
+	req->request_type = request_type;
+	/* prio_list is sorted by increasing values */
+	if (_starpu_mpi_use_prio)
+		req->prio = prio;
+	req->data_handle = data_handle;
+	req->node_tag.rank = srcdst;
+	req->node_tag.data_tag = data_tag;
+	req->node_tag.comm = comm;
+	req->detached = detached;
+	req->sync = sync;
+	req->callback = callback;
+	req->callback_arg = arg;
+	req->func = func;
+	req->sequential_consistency = sequential_consistency;
+#ifdef STARPU_USE_MPI_NMAD
+	nm_mpi_nmad_dest(&req->session, &req->gate, comm, req->node_tag.rank);
+#elif defined(STARPU_USE_MPI_MPI)
+	req->is_internal_req = is_internal_req;
+	/* For internal requests, we wait for both the request completion and the matching application request completion */
+	req->to_destroy = !is_internal_req;
+	req->count = count;
+#endif
+
+	return req;
+}
+
+void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
+{
+#ifdef STARPU_USE_MPI_NMAD
+	piom_cond_destroy(&(req->req_cond));
+#elif defined(STARPU_USE_MPI_MPI)
+	STARPU_PTHREAD_MUTEX_DESTROY(&req->req_mutex);
+	STARPU_PTHREAD_COND_DESTROY(&req->req_cond);
+	STARPU_PTHREAD_MUTEX_DESTROY(&req->posted_mutex);
+	STARPU_PTHREAD_COND_DESTROY(&req->posted_cond);
+	free(req->datatype_name);
+	req->datatype_name = NULL;
+#endif
+#ifdef STARPU_SIMGRID
+	starpu_pthread_queue_unregister(&wait, &req->queue);
+	starpu_pthread_queue_destroy(&req->queue);
+#endif
+	free(req);
+}
+

+ 1 - 0
mpi/tests/Makefile.am

@@ -194,6 +194,7 @@ noinst_PROGRAMS =				\
 	block_interface				\
 	block_interface_pinned			\
 	attr					\
+	broadcast				\
 	cache					\
 	cache_disable				\
 	callback				\

+ 26 - 6
mpi/tests/broadcast.c

@@ -18,11 +18,13 @@
 #include <starpu_mpi.h>
 #include "helper.h"
 
-void wait_CPU(void *descr[], void *_args)
+void wait_CPU(void *descr[], void *args)
 {
-	(void)_args;
 	int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]);
-	*var = 42;
+	int val;
+
+	starpu_codelet_unpack_args(args, &val);
+	*var = val;
 	starpu_sleep(1);
 }
 
@@ -57,9 +59,20 @@ int main(int argc, char **argv)
 
 	if (rank == 0)
 	{
-		starpu_task_insert(&cl, STARPU_W, handle, 0);
+		int val, n;
+
+		val = 42;
+		starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0);
+
+		for(n = 1 ; n < size ; n++)
+		{
+			FPRINTF_MPI(stderr, "sending data to %d\n", n);
+			starpu_mpi_isend_detached(handle, n, 0, MPI_COMM_WORLD, NULL, NULL);
+		}
+
+		val = 43;
+		starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0);
 
-		int n;
 		for(n = 1 ; n < size ; n++)
 		{
 			FPRINTF_MPI(stderr, "sending data to %d\n", n);
@@ -69,11 +82,18 @@ int main(int argc, char **argv)
 	else
 	{
 		starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status);
+		starpu_data_acquire(handle, STARPU_R);
+		STARPU_ASSERT(var == 42);
+		starpu_data_release(handle);
+
+		starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status);
+		starpu_data_acquire(handle, STARPU_R);
+		STARPU_ASSERT(var == 43);
+		starpu_data_release(handle);
 		FPRINTF_MPI(stderr, "received data\n");
 	}
 
 	starpu_data_unregister(handle);
-	STARPU_ASSERT(var == 42);
 
 	starpu_mpi_shutdown();
 	starpu_shutdown();

+ 1 - 2
mpi/tests/user_defined_datatype_value.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2013-2015,2017                           CNRS
+ * Copyright (C) 2013-2015,2017,2018                      CNRS
  * Copyright (C) 2014                                     Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -129,7 +129,6 @@ static int copy_any_to_any(void *src_interface, unsigned src_node,
 {
 	struct starpu_value_interface *src_value = src_interface;
 	struct starpu_value_interface *dst_value = dst_interface;
-	int ret = 0;
 
 	return starpu_interface_copy((uintptr_t) src_value->value, 0, src_node,
 				     (uintptr_t) dst_value->value, 0, dst_node,

+ 3 - 2
socl/src/cl_buildprogram.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2016-2017                           CNRS
- * Copyright (C) 2010-2012                                Université de Bordeaux
+ * Copyright (C) 2010-2012, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -76,13 +76,14 @@ static void soclBuildProgram_task(void *data)
 	DEBUG_MSG("[Worker %d] Done building.\n", wid);
 }
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclBuildProgram(cl_program         program,
 		 cl_uint              num_devices,
 		 const cl_device_id * device_list,
 		 const char *         options, 
 		 void (*pfn_notify)(cl_program program, void * user_data),
-		 void *               user_data) CL_API_SUFFIX__VERSION_1_0
+		 void *               user_data)
 {
 	struct bp_data *data;
 

+ 3 - 2
socl/src/cl_createbuffer.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2013                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -49,12 +49,13 @@ static void release_callback_memobject(void * e)
  * should avoid it.
  *
  */
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_mem CL_API_CALL
 soclCreateBuffer(cl_context   context,
 		 cl_mem_flags flags,
 		 size_t       size,
 		 void *       host_ptr,
-		 cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		 cl_int *     errcode_ret)
 {
 	cl_mem mem;
 

+ 3 - 2
socl/src/cl_createcommandqueue.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2014,2017                           CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -37,11 +37,12 @@ static void release_callback_command_queue(void * e)
 	STARPU_PTHREAD_MUTEX_DESTROY(&cq->mutex);
 }
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_command_queue CL_API_CALL
 soclCreateCommandQueue(cl_context                   context,
 		       cl_device_id                   device,
 		       cl_command_queue_properties    properties,
-		       cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		       cl_int *                       errcode_ret)
 {
 	cl_command_queue cq;
 

+ 3 - 2
socl/src/cl_createcontext.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012-2013,2017                           CNRS
- * Copyright (C) 2010-2013                                Université de Bordeaux
+ * Copyright (C) 2010-2013, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -35,13 +35,14 @@ static void release_callback_context(void * e)
 static char * defaultScheduler = "dmda";
 static char * defaultName = "default";
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_context CL_API_CALL
 soclCreateContext(const cl_context_properties * properties,
 		  cl_uint                       num_devices,
 		  const cl_device_id *          devices,
 		  void (*pfn_notify)(const char *, const void *, size_t, void *),
 		  void *                        user_data,
-		  cl_int *                      errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		  cl_int *                      errcode_ret)
 {
 	if (pfn_notify == NULL && user_data != NULL)
 	{

+ 3 - 2
socl/src/cl_createcontextfromtype.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2012,2016                           Université de Bordeaux
+ * Copyright (C) 2010-2012,2016, 2018                           Université de Bordeaux
  * Copyright (C) 2012                                     Vincent Danjean
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -20,12 +20,13 @@
 #include "socl.h"
 #include "init.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_context CL_API_CALL
 soclCreateContextFromType(const cl_context_properties * properties,
 			  cl_device_type                device_type,
 			  void (*pfn_notify)(const char *, const void *, size_t, void *),
 			  void *                        user_data,
-			  cl_int *                      errcode_ret) CL_API_SUFFIX__VERSION_1_0
+			  cl_int *                      errcode_ret)
 {
 	if (socl_init_starpu() < 0)
 		return NULL;

+ 3 - 2
socl/src/cl_createimage2d.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_mem CL_API_CALL
 soclCreateImage2D(cl_context              UNUSED(context),
 		  cl_mem_flags            UNUSED(flags),
@@ -26,7 +27,7 @@ soclCreateImage2D(cl_context              UNUSED(context),
 		  size_t                  UNUSED(image_height),
 		  size_t                  UNUSED(image_row_pitch),
 		  void *                  UNUSED(host_ptr),
-		  cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		  cl_int *                errcode_ret)
 {
 	if (errcode_ret != NULL)
 		*errcode_ret = CL_INVALID_OPERATION;

+ 3 - 2
socl/src/cl_createimage3d.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_mem CL_API_CALL
 soclCreateImage3D(cl_context              UNUSED(context),
 		  cl_mem_flags            UNUSED(flags),
@@ -28,7 +29,7 @@ soclCreateImage3D(cl_context              UNUSED(context),
 		  size_t                  UNUSED(image_row_pitch),
 		  size_t                  UNUSED(image_slice_pitch),
 		  void *                  UNUSED(host_ptr),
-		  cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		  cl_int *                errcode_ret)
 {
 	if (errcode_ret != NULL)
 		*errcode_ret = CL_INVALID_OPERATION;

+ 3 - 2
socl/src/cl_createkernel.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2014,2016-2017                      CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -117,10 +117,11 @@ static void release_callback_kernel(void * e)
 	free(kernel->errcodes);
 }
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_kernel CL_API_CALL
 soclCreateKernel(cl_program    program,
 		 const char *    kernel_name,
-		 cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		 cl_int *        errcode_ret)
 {
 	cl_kernel k;
 

+ 3 - 2
socl/src/cl_createkernelsinprogram.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,11 +18,12 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclCreateKernelsInProgram(cl_program   UNUSED(program),
 			   cl_uint        UNUSED(num_kernels),
 			   cl_kernel *    UNUSED(kernels),
-			   cl_uint *      UNUSED(num_kernels_ret)) CL_API_SUFFIX__VERSION_1_0
+			   cl_uint *      UNUSED(num_kernels_ret))
 {
 	//TODO
 	return CL_INVALID_OPERATION;

+ 3 - 2
socl/src/cl_createprogramwithbinary.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_program CL_API_CALL
 soclCreateProgramWithBinary(cl_context                     UNUSED(context),
 			    cl_uint                        UNUSED(num_devices),
@@ -25,7 +26,7 @@ soclCreateProgramWithBinary(cl_context                     UNUSED(context),
 			    const size_t *                 UNUSED(lengths),
 			    const unsigned char **         UNUSED(binaries),
 			    cl_int *                       UNUSED(binary_status),
-			    cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_1_0
+			    cl_int *                       errcode_ret)
 {
 	//TODO
 	if (errcode_ret != NULL)

+ 3 - 2
socl/src/cl_createprogramwithsource.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2016-2017                           CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -66,12 +66,13 @@ static void release_callback_program(void * e)
 		free(program->options);
 }
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_program CL_API_CALL
 soclCreateProgramWithSource(cl_context      context,
 			    cl_uint           count,
 			    const char **     strings,
 			    const size_t *    lengths,
-			    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0
+			    cl_int *          errcode_ret)
 {
 	cl_program p;
 	struct cpws_data *data;

+ 3 - 2
socl/src/cl_createsampler.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,12 +18,13 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_sampler CL_API_CALL
 soclCreateSampler(cl_context          UNUSED(context),
 		  cl_bool             UNUSED(normalized_coords), 
 		  cl_addressing_mode  UNUSED(addressing_mode), 
 		  cl_filter_mode      UNUSED(filter_mode),
-		  cl_int *            errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		  cl_int *            errcode_ret)
 {
 	if (errcode_ret != NULL)
 		*errcode_ret = CL_INVALID_OPERATION;

+ 3 - 2
socl/src/cl_enqueuebarrier.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,8 +18,9 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
-soclEnqueueBarrier(cl_command_queue cq) CL_API_SUFFIX__VERSION_1_0
+soclEnqueueBarrier(cl_command_queue cq)
 {
 	command_barrier cmd = command_barrier_create();
 

+ 3 - 2
socl/src/cl_enqueuebarrierwithwaitlist.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2017                                     CNRS
- * Copyright (C) 2010,2013                                Université de Bordeaux
+ * Copyright (C) 2010,2013, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,11 +17,12 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_2
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueBarrierWithWaitList(cl_command_queue  cq,
 			       cl_uint num_events,
 			       const cl_event * events,
-			       cl_event *          event) CL_API_SUFFIX__VERSION_1_2
+			       cl_event *          event)
 {
 	command_barrier cmd = command_barrier_create();
 

+ 3 - 2
socl/src/cl_enqueuecopybuffer.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2016-2017                           CNRS
- * Copyright (C) 2010-2011,2013-2014,2017                 Université de Bordeaux
+ * Copyright (C) 2010-2011,2013-2014,2017-2018                 Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -99,6 +99,7 @@ cl_int command_copy_buffer_submit(command_copy_buffer cmd)
 	return CL_SUCCESS;
 }
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueCopyBuffer(cl_command_queue  cq,
 		      cl_mem              src_buffer,
@@ -108,7 +109,7 @@ soclEnqueueCopyBuffer(cl_command_queue  cq,
 		      size_t              cb,
 		      cl_uint             num_events,
 		      const cl_event *    events,
-		      cl_event *          event) CL_API_SUFFIX__VERSION_1_0
+		      cl_event *          event)
 {
 	command_copy_buffer cmd = command_copy_buffer_create(src_buffer, dst_buffer, src_offset, dst_offset, cb);
 

+ 3 - 2
socl/src/cl_enqueuecopybuffertoimage.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueCopyBufferToImage(cl_command_queue UNUSED(command_queue),
 			     cl_mem           UNUSED(src_buffer),
@@ -27,7 +28,7 @@ soclEnqueueCopyBufferToImage(cl_command_queue UNUSED(command_queue),
 			     const size_t *   UNUSED(region),
 			     cl_uint          UNUSED(num_events_in_wait_list),
 			     const cl_event * UNUSED(event_wait_list),
-			     cl_event *       UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
+			     cl_event *       UNUSED(event))
 {
 	return CL_INVALID_OPERATION;
 }

+ 3 - 2
socl/src/cl_enqueuecopyimage.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueCopyImage(cl_command_queue   UNUSED(command_queue),
 		     cl_mem               UNUSED(src_image),
@@ -27,7 +28,7 @@ soclEnqueueCopyImage(cl_command_queue   UNUSED(command_queue),
 		     const size_t *       UNUSED(region),
 		     cl_uint              UNUSED(num_events_in_wait_list),
 		     const cl_event *     UNUSED(event_wait_list),
-		     cl_event *           UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
+		     cl_event *           UNUSED(event))
 {
 	return CL_INVALID_OPERATION;
 }

+ 3 - 2
socl/src/cl_enqueuecopyimagetobuffer.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueCopyImageToBuffer(cl_command_queue UNUSED(command_queue),
 			     cl_mem           UNUSED(src_image),
@@ -27,7 +28,7 @@ soclEnqueueCopyImageToBuffer(cl_command_queue UNUSED(command_queue),
 			     size_t           UNUSED(dst_offset),
 			     cl_uint          UNUSED(num_events_in_wait_list),
 			     const cl_event * UNUSED(event_wait_list),
-			     cl_event *       UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
+			     cl_event *       UNUSED(event))
 {
 	return CL_INVALID_OPERATION;
 }

+ 3 - 2
socl/src/cl_enqueuemapbuffer.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012-2013,2017                           CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -45,6 +45,7 @@ cl_int command_map_buffer_submit(command_map_buffer cmd)
 	return CL_SUCCESS;
 }
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY void * CL_API_CALL
 soclEnqueueMapBuffer(cl_command_queue cq,
 		     cl_mem           buffer,
@@ -55,7 +56,7 @@ soclEnqueueMapBuffer(cl_command_queue cq,
 		     cl_uint          num_events,
 		     const cl_event * events,
 		     cl_event *       event,
-		     cl_int *         errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		     cl_int *         errcode_ret)
 {
 	command_map_buffer cmd = command_map_buffer_create(buffer, map_flags, offset, cb);
 

+ 3 - 2
socl/src/cl_enqueuemapimage.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY void * CL_API_CALL
 soclEnqueueMapImage(cl_command_queue  UNUSED(command_queue),
 		    cl_mem            UNUSED(image),
@@ -30,7 +31,7 @@ soclEnqueueMapImage(cl_command_queue  UNUSED(command_queue),
 		    cl_uint           UNUSED(num_events_in_wait_list),
 		    const cl_event *  UNUSED(event_wait_list),
 		    cl_event *        UNUSED(event),
-		    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0
+		    cl_int *          errcode_ret)
 {
 	if (errcode_ret != NULL)
 		*errcode_ret = CL_INVALID_OPERATION;

+ 3 - 2
socl/src/cl_enqueuemarker.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,9 +18,10 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueMarker(cl_command_queue  cq,
-                cl_event *          event) CL_API_SUFFIX__VERSION_1_0
+                cl_event *          event)
 {
 	if (event == NULL)
 		return CL_INVALID_VALUE;

+ 3 - 2
socl/src/cl_enqueuemarkerwithwaitlist.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2017                                     CNRS
- * Copyright (C) 2010,2013                                Université de Bordeaux
+ * Copyright (C) 2010,2013, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,11 +17,12 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_2
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueMarkerWithWaitList(cl_command_queue  cq,
 			      cl_uint num_events,
 			      const cl_event * events,
-			      cl_event *          event) CL_API_SUFFIX__VERSION_1_2
+			      cl_event *          event)
 {
 	if (events == NULL)
 		return soclEnqueueBarrierWithWaitList(cq, num_events, events, event);

+ 3 - 2
socl/src/cl_enqueuenativekernel.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueNativeKernel(cl_command_queue  UNUSED(command_queue),
 			__attribute__((unused)) void (*user_func)(void *),
@@ -28,7 +29,7 @@ soclEnqueueNativeKernel(cl_command_queue  UNUSED(command_queue),
 			const void **     UNUSED(args_mem_loc),
 			cl_uint           UNUSED(num_events_in_wait_list),
 			const cl_event *  UNUSED(event_wait_list),
-			cl_event *        UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
+			cl_event *        UNUSED(event))
 {
 	return CL_INVALID_OPERATION;
 }

+ 3 - 2
socl/src/cl_enqueuendrangekernel.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2014,2016-2017                      CNRS
- * Copyright (C) 2010-2011,2013,2016-2017                 Université de Bordeaux
+ * Copyright (C) 2010-2011,2013,2016-2018                 Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -159,6 +159,7 @@ cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd)
 	return CL_SUCCESS;
 }
 
+CL_API_SUFFIX__VERSION_1_1
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueNDRangeKernel(cl_command_queue cq,
 			 cl_kernel        kernel,
@@ -168,7 +169,7 @@ soclEnqueueNDRangeKernel(cl_command_queue cq,
 			 const size_t *   local_work_size,
 			 cl_uint          num_events,
 			 const cl_event * events,
-			 cl_event *       event) CL_API_SUFFIX__VERSION_1_1
+			 cl_event *       event)
 {
 	if (kernel->split_func != NULL && !STARPU_PTHREAD_MUTEX_TRYLOCK(&kernel->split_lock))
 	{

+ 3 - 2
socl/src/cl_enqueuereadbuffer.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2016-2017                           CNRS
- * Copyright (C) 2010-2011,2013-2014                      Université de Bordeaux
+ * Copyright (C) 2010-2011,2013-2014, 2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -102,6 +102,7 @@ cl_int command_read_buffer_submit(command_read_buffer cmd)
 	return CL_SUCCESS;
 }
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueReadBuffer(cl_command_queue  cq,
 		      cl_mem              buffer,
@@ -111,7 +112,7 @@ soclEnqueueReadBuffer(cl_command_queue  cq,
 		      void *              ptr,
 		      cl_uint             num_events,
 		      const cl_event *    events,
-		      cl_event *          event) CL_API_SUFFIX__VERSION_1_0
+		      cl_event *          event)
 {
 	command_read_buffer cmd = command_read_buffer_create(buffer, offset, cb, ptr);
 

+ 3 - 2
socl/src/cl_enqueuereadimage.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueReadImage(cl_command_queue   UNUSED(command_queue),
 		     cl_mem               UNUSED(image),
@@ -29,7 +30,7 @@ soclEnqueueReadImage(cl_command_queue   UNUSED(command_queue),
 		     void *               UNUSED(ptr),
 		     cl_uint              UNUSED(num_events_in_wait_list),
 		     const cl_event *     UNUSED(event_wait_list),
-		     cl_event *           UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
+		     cl_event *           UNUSED(event))
 {
 	return CL_INVALID_OPERATION;
 }

+ 3 - 2
socl/src/cl_enqueuetask.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,12 +18,13 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueTask(cl_command_queue cq,
 		cl_kernel         kernel,
 		cl_uint           num_events,
 		const cl_event *  events,
-		cl_event *        event) CL_API_SUFFIX__VERSION_1_0
+		cl_event *        event)
 {
 	command_ndrange_kernel cmd = command_task_create(kernel);
 

+ 3 - 2
socl/src/cl_enqueueunmapmemobject.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -32,13 +32,14 @@ cl_int command_unmap_mem_object_submit(command_unmap_mem_object cmd)
 	return CL_SUCCESS;
 }
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueUnmapMemObject(cl_command_queue cq,
 			  cl_mem            buffer,
 			  void *            ptr,
 			  cl_uint           num_events,
 			  const cl_event *  events,
-			  cl_event *        event) CL_API_SUFFIX__VERSION_1_0
+			  cl_event *        event)
 {
 	command_unmap_mem_object cmd = command_unmap_mem_object_create(buffer, ptr);
 

+ 3 - 2
socl/src/cl_enqueuewaitforevents.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,10 +18,11 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueWaitForEvents(cl_command_queue cq,
 			 cl_uint          num_events,
-			 const cl_event * events) CL_API_SUFFIX__VERSION_1_0
+			 const cl_event * events)
 {
 	command_marker cmd = command_marker_create();
 

+ 3 - 2
socl/src/cl_enqueuewritebuffer.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2016-2017                           CNRS
- * Copyright (C) 2010-2011,2013-2014                      Université de Bordeaux
+ * Copyright (C) 2010-2011,2013-2014, 2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -127,6 +127,7 @@ cl_int command_write_buffer_submit(command_write_buffer cmd)
 	return CL_SUCCESS;
 }
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueWriteBuffer(cl_command_queue cq,
 		       cl_mem             buffer,
@@ -136,7 +137,7 @@ soclEnqueueWriteBuffer(cl_command_queue cq,
 		       const void *       ptr,
 		       cl_uint            num_events,
 		       const cl_event *   events,
-		       cl_event *         event) CL_API_SUFFIX__VERSION_1_0
+		       cl_event *         event)
 {
 	command_write_buffer cmd = command_write_buffer_create(buffer, offset, cb, ptr);
 

+ 3 - 2
socl/src/cl_enqueuewriteimage.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,7 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclEnqueueWriteImage(cl_command_queue  UNUSED(command_queue),
 		      cl_mem              UNUSED(image),
@@ -29,7 +30,7 @@ soclEnqueueWriteImage(cl_command_queue  UNUSED(command_queue),
 		      const void *        UNUSED(ptr),
 		      cl_uint             UNUSED(num_events_in_wait_list),
 		      const cl_event *    UNUSED(event_wait_list),
-		      cl_event *          UNUSED(event)) CL_API_SUFFIX__VERSION_1_0
+		      cl_event *          UNUSED(event))
 {
 	return CL_INVALID_OPERATION;
 }

+ 3 - 2
socl/src/cl_finish.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,8 +18,9 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
-soclFinish(cl_command_queue cq) CL_API_SUFFIX__VERSION_1_0
+soclFinish(cl_command_queue cq)
 {
 	command_barrier cmd = command_barrier_create();
 

+ 3 - 2
socl/src/cl_flush.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,8 +18,9 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
-soclFlush(cl_command_queue UNUSED(command_queue)) CL_API_SUFFIX__VERSION_1_0
+soclFlush(cl_command_queue UNUSED(command_queue))
 {
 	return CL_SUCCESS;
 }

+ 3 - 2
socl/src/cl_getcommandqueueinfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "getinfo.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetCommandQueueInfo(cl_command_queue    cq,
 			cl_command_queue_info param_name,
 			size_t                param_value_size,
 			void *                param_value,
-			size_t *              param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+			size_t *              param_value_size_ret)
 {
 	if (cq == NULL)
 		return CL_INVALID_COMMAND_QUEUE;

+ 4 - 3
socl/src/cl_getcontextinfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011,2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "getinfo.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetContextInfo(cl_context       context,
 		   cl_context_info    param_name,
 		   size_t             param_value_size,
 		   void *             param_value,
-		   size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		   size_t *           param_value_size_ret)
 {
 	if (context == NULL)
 		return CL_INVALID_CONTEXT;
@@ -33,7 +34,7 @@ soclGetContextInfo(cl_context       context,
 	{
 		INFO_CASE(CL_CONTEXT_REFERENCE_COUNT, context->_entity.refs);
 		INFO_CASE_EX(CL_CONTEXT_DEVICES, context->devices, context->num_devices * sizeof(cl_device_id));
-		INFO_CASE_EX(CL_CONTEXT_PROPERTIES, context->properties, context->num_properties * sizeof(cl_device_id));
+		INFO_CASE_EX(CL_CONTEXT_PROPERTIES, context->properties, context->num_properties * sizeof(cl_context_properties));
 	default:
 		return CL_INVALID_VALUE;
 	}

+ 3 - 2
socl/src/cl_getdeviceids.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2012,2016                           Université de Bordeaux
+ * Copyright (C) 2010-2012,2016, 2018                           Université de Bordeaux
  * Copyright (C) 2012                                     Vincent Danjean
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -25,12 +25,13 @@
  *
  * \param[in] platform Must be StarPU platform ID or NULL
  */
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetDeviceIDs(cl_platform_id   platform,
 		 cl_device_type   device_type,
 		 cl_uint          num_entries,
 		 cl_device_id *   devices,
-		 cl_uint *        num_devices) CL_API_SUFFIX__VERSION_1_0
+		 cl_uint *        num_devices)
 {
 	if (socl_init_starpu() < 0)
 	{

+ 3 - 2
socl/src/cl_getdeviceinfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "getinfo.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetDeviceInfo(cl_device_id    device,
 		  cl_device_info  param_name,
 		  size_t          param_value_size,
 		  void *          param_value,
-		  size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		  size_t *        param_value_size_ret)
 {
 	//FIXME: we do not check if the device is valid
 	/* if (device != &socl_virtual_device && device is not a valid StarPU worker identifier)

+ 3 - 2
socl/src/cl_geteventinfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "getinfo.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetEventInfo(cl_event       event,
 		 cl_event_info    param_name,
 		 size_t           param_value_size,
 		 void *           param_value,
-		 size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		 size_t *         param_value_size_ret)
 {
 	if (event == NULL)
 		return CL_INVALID_EVENT;

+ 3 - 2
socl/src/cl_geteventprofilinginfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011,2013                           Université de Bordeaux
+ * Copyright (C) 2010-2011,2013, 2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "getinfo.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetEventProfilingInfo(cl_event          event,
 			  cl_profiling_info   param_name,
 			  size_t              param_value_size,
 			  void *              param_value,
-			  size_t *            param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+			  size_t *            param_value_size_ret)
 {
 	switch (param_name)
 	{

+ 3 - 2
socl/src/cl_getextensionfunctionaddress.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2012                                Université de Bordeaux
+ * Copyright (C) 2010-2012, 2018                                Université de Bordeaux
  * Copyright (C) 2012                                     Vincent Danjean
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,8 +21,9 @@
 #include "socl.h"
 #include "init.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY void * CL_API_CALL
-soclGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_0
+soclGetExtensionFunctionAddress(const char * func_name)
 {
 	if (func_name != NULL && strcmp(func_name, "clShutdown") == 0)
 	{

+ 3 - 2
socl/src/cl_getimageinfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "getinfo.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetImageInfo(cl_mem           UNUSED(image),
 		 cl_image_info    UNUSED(param_name),
 		 size_t           UNUSED(param_value_size),
 		 void *           UNUSED(param_value),
-		 size_t *         UNUSED(param_value_size_ret)) CL_API_SUFFIX__VERSION_1_0
+		 size_t *         UNUSED(param_value_size_ret))
 {
 	return CL_INVALID_OPERATION;
 }

+ 3 - 2
socl/src/cl_getkernelinfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "getinfo.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetKernelInfo(cl_kernel       kernel,
 		  cl_kernel_info  param_name,
 		  size_t          param_value_size,
 		  void *          param_value,
-		  size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		  size_t *        param_value_size_ret)
 {
 	if (kernel == NULL)
 		return CL_INVALID_KERNEL;

+ 3 - 2
socl/src/cl_getkernelworkgroupinfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,13 +18,14 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetKernelWorkGroupInfo(cl_kernel                kernel,
 			   cl_device_id               device,
 			   cl_kernel_work_group_info  param_name,
 			   size_t                     param_value_size,
 			   void *                     param_value,
-			   size_t *                   param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+			   size_t *                   param_value_size_ret)
 {
 	int range = starpu_worker_get_range_by_id(device->worker_id);
 	cl_device_id dev;

+ 3 - 2
socl/src/cl_getmemobjectinfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "getinfo.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetMemObjectInfo(cl_mem           mem,
 		     cl_mem_info      param_name,
 		     size_t           param_value_size,
 		     void *           param_value,
-		     size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		     size_t *         param_value_size_ret)
 {
 	static cl_mem_object_type mot = CL_MEM_OBJECT_BUFFER;
 

+ 3 - 2
socl/src/cl_getplatformids.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -23,10 +23,11 @@ extern int _starpu_init_failed;
 /**
  * \brief Get StarPU platform ID
  */
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetPlatformIDs(cl_uint          num_entries,
 		   cl_platform_id * platforms,
-		   cl_uint *        num_platforms) CL_API_SUFFIX__VERSION_1_0
+		   cl_uint *        num_platforms)
 {
 	if (_starpu_init_failed)
 	{

+ 3 - 2
socl/src/cl_getplatforminfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2012                                Université de Bordeaux
+ * Copyright (C) 2010-2012, 2018                                Université de Bordeaux
  * Copyright (C) 2012                                     Vincent Danjean
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -25,12 +25,13 @@
  *
  * \param[in] platform StarPU platform ID or NULL
  */
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetPlatformInfo(cl_platform_id   platform,
 		    cl_platform_info param_name,
 		    size_t           param_value_size,
 		    void *           param_value,
-		    size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		    size_t *         param_value_size_ret)
 {
 	if (platform != NULL && platform != &socl_platform)
 		return CL_INVALID_PLATFORM;

+ 3 - 2
socl/src/cl_getprogrambuildinfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,13 +19,14 @@
 #include "socl.h"
 #include "getinfo.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetProgramBuildInfo(cl_program          program,
 			cl_device_id          UNUSED(device),
 			cl_program_build_info param_name,
 			size_t                param_value_size,
 			void *                param_value,
-			size_t *              param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+			size_t *              param_value_size_ret)
 {
 	if (program == NULL)
 		return CL_INVALID_PROGRAM;

+ 3 - 2
socl/src/cl_getprograminfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "getinfo.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetProgramInfo(cl_program       program,
 		   cl_program_info    param_name,
 		   size_t             param_value_size,
 		   void *             param_value,
-		   size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+		   size_t *           param_value_size_ret)
 {
 	if (program == NULL)
 		return CL_INVALID_PROGRAM;

+ 3 - 2
socl/src/cl_getsamplerinfo.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,12 +19,13 @@
 #include "socl.h"
 #include "getinfo.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetSamplerInfo(cl_sampler       UNUSED(sampler),
 		   cl_sampler_info    UNUSED(param_name),
 		   size_t             UNUSED(param_value_size),
 		   void *             UNUSED(param_value),
-		   size_t *           UNUSED(param_value_size_ret)) CL_API_SUFFIX__VERSION_1_0
+		   size_t *           UNUSED(param_value_size_ret))
 {
 	return CL_INVALID_OPERATION;
 }

+ 3 - 2
socl/src/cl_getsupportedimageformats.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,13 +18,14 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetSupportedImageFormats(cl_context           UNUSED(context),
 			     cl_mem_flags         UNUSED(flags),
 			     cl_mem_object_type   UNUSED(image_type),
 			     cl_uint              UNUSED(num_entries),
 			     cl_image_format *    UNUSED(image_formats),
-			     cl_uint *            UNUSED(num_image_formats)) CL_API_SUFFIX__VERSION_1_0
+			     cl_uint *            UNUSED(num_image_formats))
 {
 	return CL_INVALID_OPERATION;
 }

+ 3 - 2
socl/src/cl_icdgetplatformidskhr.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2012                                Université de Bordeaux
+ * Copyright (C) 2010-2012, 2018                                Université de Bordeaux
  * Copyright (C) 2012                                     Vincent Danjean
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,9 +21,10 @@
 
 extern int _starpu_init_failed;
 
+CL_EXT_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL soclIcdGetPlatformIDsKHR(cl_uint num_entries,
 							 cl_platform_id *platforms,
-							 cl_uint *num_platforms) CL_EXT_SUFFIX__VERSION_1_0
+							 cl_uint *num_platforms)
 {
 	if ((num_entries == 0 && platforms != NULL)
 	    || (num_platforms == NULL && platforms == NULL))

+ 3 - 2
socl/src/cl_releasecommandqueue.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,8 +18,9 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
-soclReleaseCommandQueue(cl_command_queue cq) CL_API_SUFFIX__VERSION_1_0
+soclReleaseCommandQueue(cl_command_queue cq)
 {
 	gc_entity_release(cq);
 

+ 3 - 2
socl/src/cl_releasecontext.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2012,2017                                CNRS
- * Copyright (C) 2010-2011                                Université de Bordeaux
+ * Copyright (C) 2010-2011, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,8 +18,9 @@
 
 #include "socl.h"
 
+CL_API_SUFFIX__VERSION_1_0
 CL_API_ENTRY cl_int CL_API_CALL
-soclReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0
+soclReleaseContext(cl_context context)
 {
 	if (context == NULL)
 		return CL_INVALID_CONTEXT;

+ 0 - 0
socl/src/cl_releaseevent.c


Daži faili netika attēloti, jo izmaiņu fails ir pārāk liels