Ver código fonte

Merge from trunk

Corentin Salingue 8 anos atrás
pai
commit
0f9830e940
100 arquivos alterados com 2088 adições e 1316 exclusões
  1. 5 0
      AUTHORS
  2. 8 0
      ChangeLog
  3. 2 1
      Makefile.am
  4. 41 26
      configure.ac
  5. 20 1
      doc/doxygen/chapters/210_check_list_performance.doxy
  6. 1 1
      doc/doxygen/chapters/350_modularized_scheduler.doxy
  7. 1 1
      doc/doxygen/chapters/501_environment_variables.doxy
  8. 16 12
      doc/doxygen/chapters/api/codelet_and_tasks.doxy
  9. 17 0
      doc/doxygen/chapters/api/cuda_extensions.doxy
  10. 2 2
      doc/doxygen/chapters/api/scheduling_contexts.doxy
  11. 0 6
      doc/doxygen/chapters/api/scheduling_policy.doxy
  12. 7 7
      doc/doxygen/chapters/api/task_lists.doxy
  13. 66 0
      doc/doxygen/chapters/api/workers.doxy
  14. 1 0
      examples/heat/heat.sh
  15. 19 2
      examples/interface/complex_codelet.h
  16. 3 0
      examples/mandelbrot/mandelbrot.c
  17. 8 2
      examples/sched_ctx/dummy_sched_with_ctx.c
  18. 5 1
      examples/sched_ctx/nested_sched_ctxs.c
  19. 8 6
      examples/sched_ctx/parallel_code.c
  20. 15 11
      examples/sched_ctx/parallel_tasks_reuse_handle.c
  21. 19 10
      examples/sched_ctx/sched_ctx.c
  22. 4 2
      examples/scheduler/dummy_sched.c
  23. 3 21
      include/fstarpu_mod.f90
  24. 1 0
      include/starpu.h
  25. 5 1
      include/starpu_cublas_v2.h
  26. 38 0
      include/starpu_cusparse.h
  27. 19 0
      include/starpu_sched_component.h
  28. 7 9
      include/starpu_sched_ctx.h
  29. 3 5
      include/starpu_scheduler.h
  30. 1 0
      include/starpu_task.h
  31. 33 125
      include/starpu_task_list.h
  32. 10 9
      include/starpu_task_util.h
  33. 22 1
      include/starpu_worker.h
  34. 26 3
      mpi/examples/Makefile.am
  35. 17 1
      mpi/examples/complex/mpi_complex.c
  36. 17 1
      mpi/examples/stencil/stencil5.c
  37. 6 4
      mpi/src/starpu_mpi.c
  38. 6 0
      mpi/src/starpu_mpi_task_insert.c
  39. 5 0
      mpi/src/starpu_mpi_task_insert_fortran.c
  40. 20 17
      mpi/tests/Makefile.am
  41. 9 6
      mpi/tests/block_interface.c
  42. 11 7
      mpi/tests/block_interface_pinned.c
  43. 10 6
      mpi/tests/datatypes.c
  44. 10 6
      mpi/tests/early_request.c
  45. 10 6
      mpi/tests/gather.c
  46. 10 6
      mpi/tests/gather2.c
  47. 13 4
      mpi/tests/helper.h
  48. 1 1
      mpi/tests/insert_task_compute.c
  49. 11 7
      mpi/tests/insert_task_count.c
  50. 9 4
      mpi/tests/insert_task_dyn_handles.c
  51. 1 1
      mpi/tests/insert_task_recv_cache.c
  52. 1 1
      mpi/tests/insert_task_sent_cache.c
  53. 5 3
      mpi/tests/load_balancer.c
  54. 10 6
      mpi/tests/matrix2.c
  55. 11 7
      mpi/tests/mpi_detached_tag.c
  56. 11 7
      mpi/tests/mpi_earlyrecv.c
  57. 11 7
      mpi/tests/mpi_earlyrecv2.c
  58. 1 1
      mpi/tests/mpi_earlyrecv2_sync.c
  59. 11 7
      mpi/tests/mpi_irecv.c
  60. 11 7
      mpi/tests/mpi_irecv_detached.c
  61. 11 7
      mpi/tests/mpi_isend.c
  62. 11 7
      mpi/tests/mpi_isend_detached.c
  63. 10 7
      mpi/tests/mpi_redux.c
  64. 11 7
      mpi/tests/mpi_test.c
  65. 11 7
      mpi/tests/pingpong.c
  66. 10 6
      mpi/tests/policy_register.c
  67. 10 5
      mpi/tests/policy_selection.c
  68. 14 5
      mpi/tests/policy_selection2.c
  69. 11 7
      mpi/tests/ring.c
  70. 11 7
      mpi/tests/ring_async.c
  71. 11 7
      mpi/tests/ring_sync.c
  72. 11 7
      mpi/tests/ring_sync_detached.c
  73. 5 3
      mpi/tests/starpu_redefine.c
  74. 8 5
      mpi/tests/sync.c
  75. 1 1
      mpi/tests/tags_checking.c
  76. 0 1
      sc_hypervisor/src/sc_hypervisor.c
  77. 10 0
      socl/examples/matmul/matmul.c
  78. 1 1
      socl/src/cl_createkernel.c
  79. 4 4
      socl/src/gc.c
  80. 1 1
      socl/src/task.c
  81. 2 0
      src/Makefile.am
  82. 12 0
      src/common/graph.c
  83. 22 0
      src/common/inlines.c
  84. 42 38
      src/common/list.h
  85. 46 42
      src/common/prio_list.h
  86. 22 24
      src/common/thread.c
  87. 4 0
      src/common/utils.c
  88. 2 1
      src/core/combined_workers.c
  89. 2 3
      src/core/dependencies/cg.c
  90. 2 3
      src/core/errorcheck.h
  91. 6 11
      src/core/jobs.c
  92. 6 6
      src/core/jobs.h
  93. 1 2
      src/core/perfmodel/multiple_regression.c
  94. 20 7
      src/core/perfmodel/perfmodel.c
  95. 153 3
      src/core/perfmodel/perfmodel_history.c
  96. 14 16
      src/core/perfmodel/perfmodel_print.c
  97. 827 629
      src/core/sched_ctx.c
  98. 66 46
      src/core/sched_ctx.h
  99. 3 2
      src/core/sched_ctx_list.c
  100. 0 0
      src/core/sched_policy.c

+ 5 - 0
AUTHORS

@@ -6,10 +6,12 @@ Berenger Bramas <berenger.bramas@inria.fr>
 Alfredo Buttari <alfredo.buttari@enseeiht.fr>
 Alfredo Buttari <alfredo.buttari@enseeiht.fr>
 Adrien Cassagne <adrien.cassagne@inria.fr>
 Adrien Cassagne <adrien.cassagne@inria.fr>
 Jérôme Clet-Ortega <jerome.clet-ortega@labri.fr>
 Jérôme Clet-Ortega <jerome.clet-ortega@labri.fr>
+Terry Cojean <terry.cojean@inria.fr>
 Nicolas Collin <nicolas.collin@inria.fr>
 Nicolas Collin <nicolas.collin@inria.fr>
 Ludovic Courtès <ludovic.courtes@inria.fr>
 Ludovic Courtès <ludovic.courtes@inria.fr>
 Yann Courtois <yann.courtois33@gmail.com>
 Yann Courtois <yann.courtois33@gmail.com>
 Jean-Marie Couteyen <jm.couteyen@gmail.com>
 Jean-Marie Couteyen <jm.couteyen@gmail.com>
+Lionel Eyraud-Dubois <lionel.eyraud-dubois@inria.fr>
 Nathalie Furmento <nathalie.furmento@labri.fr>
 Nathalie Furmento <nathalie.furmento@labri.fr>
 David Gómez <david_gomez1380@yahoo.com.mx>
 David Gómez <david_gomez1380@yahoo.com.mx>
 Sylvain Henry <sylvain.henry@inria.fr>
 Sylvain Henry <sylvain.henry@inria.fr>
@@ -22,9 +24,11 @@ Brice Mortier <brice.mortier@etu.u-bordeaux1.fr>
 Stojce Nakov <stojce.nakov@inria.fr>
 Stojce Nakov <stojce.nakov@inria.fr>
 Joris Pablo <joris.pablo@orange.fr>
 Joris Pablo <joris.pablo@orange.fr>
 Damien Pasqualinotto <dam.pasqualinotto@wanadoo.fr>
 Damien Pasqualinotto <dam.pasqualinotto@wanadoo.fr>
+Samuel Pitoiset <samuel.pitoiset@inria.fr>
 Nguyen Quôc-Dinh <nguyen.quocdinh@gmail.com>
 Nguyen Quôc-Dinh <nguyen.quocdinh@gmail.com>
 Cyril Roelandt <cyril.roelandt@inria.fr>
 Cyril Roelandt <cyril.roelandt@inria.fr>
 Anthony Roy <theanthony33@gmail.com>
 Anthony Roy <theanthony33@gmail.com>
+Chiheb Sakka <chiheb.sakka@inria.fr>
 Corentin Salingue <corentin.salingue@gmail.com>
 Corentin Salingue <corentin.salingue@gmail.com>
 Marc Sergent <marc.sergent@inria.fr>
 Marc Sergent <marc.sergent@inria.fr>
 Anthony Simonet <anthony.simonet@etu.u-bordeaux.fr>
 Anthony Simonet <anthony.simonet@etu.u-bordeaux.fr>
@@ -33,4 +37,5 @@ Ludovic Stordeur <ludovic.stordeur@inria.fr>
 Guillaume Sylvand <guillaume.sylvand@airbus.com>
 Guillaume Sylvand <guillaume.sylvand@airbus.com>
 François Tessier <francois.tessier@inria.fr>
 François Tessier <francois.tessier@inria.fr>
 Samuel Thibault <samuel.thibault@labri.fr>
 Samuel Thibault <samuel.thibault@labri.fr>
+Leo Villeveygoux <leo.villeveygoux@inria.fr>
 Pierre-André Wacrenier <wacrenier@labri.fr>
 Pierre-André Wacrenier <wacrenier@labri.fr>

+ 8 - 0
ChangeLog

@@ -43,12 +43,17 @@ Small features:
   * New function starpu_worker_display_names to display the names of
   * New function starpu_worker_display_names to display the names of
     all the workers of a specified type.
     all the workers of a specified type.
   * Arbiters now support concurrent read access.
   * Arbiters now support concurrent read access.
+  * Add a field starpu_task::where similar to starpu_codelet::where
+    which allows to restrict where to execute a task. Also add
+    STARPU_TASK_WHERE to be used when calling starpu_task_insert().
 
 
 Changes:
 Changes:
   * Vastly improve simgrid simulation time.
   * Vastly improve simgrid simulation time.
 
 
 Small changes:
 Small changes:
   * Use asynchronous transfers for task data fetches with were not prefetched.
   * Use asynchronous transfers for task data fetches with were not prefetched.
+  * Allow to call starpu_sched_ctx_set_policy_data on the main
+    scheduler context
 
 
 StarPU 1.2.2 (svn revision xxx)
 StarPU 1.2.2 (svn revision xxx)
 ==============================================
 ==============================================
@@ -57,6 +62,9 @@ New features:
   * Add starpu_data_acquire_try and starpu_data_acquire_on_node_try.
   * Add starpu_data_acquire_try and starpu_data_acquire_on_node_try.
   * Add NVCC_CC environment variable.
   * Add NVCC_CC environment variable.
   * Add -no-foo options to starpu_fxt_tool to make traces lighter
   * Add -no-foo options to starpu_fxt_tool to make traces lighter
+  * Add starpu_cusparse_init/shutdown/get_local_handle for proper CUDA
+    overlapping with cusparse.
+
 
 
 Small changes:
 Small changes:
   * Output generated through STARPU_MPI_COMM has been modified to
   * Output generated through STARPU_MPI_COMM has been modified to

+ 2 - 1
Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2009-2016  Université de Bordeaux
+# Copyright (C) 2009-2017  Université de Bordeaux
 # Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
 # Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
 # Copyright (C) 2014  INRIA
 # Copyright (C) 2014  INRIA
 # Copyright (C) 2016  Inria
 # Copyright (C) 2016  Inria
@@ -99,6 +99,7 @@ versinclude_HEADERS = 				\
 	include/starpu_disk.h			\
 	include/starpu_disk.h			\
 	include/starpu_cublas.h			\
 	include/starpu_cublas.h			\
 	include/starpu_cublas_v2.h		\
 	include/starpu_cublas_v2.h		\
+	include/starpu_cusparse.h		\
 	include/starpu_driver.h			\
 	include/starpu_driver.h			\
 	include/starpu_stdlib.h			\
 	include/starpu_stdlib.h			\
 	include/starpu_thread.h			\
 	include/starpu_thread.h			\

+ 41 - 26
configure.ac

@@ -3,7 +3,7 @@
 # Copyright (C) 2009-2017  Université de Bordeaux
 # Copyright (C) 2009-2017  Université de Bordeaux
 # Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
 # Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
 # Copyright (C) 2011  Télécom-SudParis
 # Copyright (C) 2011  Télécom-SudParis
-# Copyright (C) 2011, 2012, 2014-2016  INRIA
+# Copyright (C) 2011, 2012, 2014-2017  INRIA
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -475,30 +475,33 @@ if test x$enable_mpi_check = xno ; then
 fi
 fi
 
 
 
 
-# Check if mpiexec is available
-AC_ARG_WITH(mpiexec, [AS_HELP_STRING([--with-mpiexec[=<path to mpiexec>]],
-            [Path of mpiexec])],
-    [
-        if test x$withval = xyes; then
-            AC_MSG_ERROR(--with-mpiexec must be given a pathname)
-        else
-            mpiexec_path=$withval
-        fi
-    ],
-    [
-        # nothing was specified: look in the path
-        AC_PATH_PROG(mpiexec_path, mpiexec, [no], [$(dirname $mpicc_path):$PATH])
-    ])
-
-AC_MSG_CHECKING(whether mpiexec is available)
-AC_MSG_RESULT($mpiexec_path)
-
-# We test if MPIEXEC exists
-if test ! -x $mpiexec_path; then
-    #MPIEXEC does not exists or is not executable
-    AC_MSG_RESULT(The mpiexec script is not valid)
-        running_mpi_check=no
-        mpiexec_path=""
+if test x$enable_simgrid = xno ; then
+    # Check if mpiexec is available
+    AC_ARG_WITH(mpiexec, [AS_HELP_STRING([--with-mpiexec[=<path to mpiexec>]],
+                [Path of mpiexec])],
+        [
+            if test x$withval = xyes; then
+                AC_MSG_ERROR(--with-mpiexec must be given a pathname)
+            else
+                mpiexec_path=$withval
+            fi
+        ],
+        [
+            # nothing was specified: look in the path
+            AC_PATH_PROG(mpiexec_path, mpiexec, [no], [$(dirname $mpicc_path):$PATH])
+        ])
+    
+    AC_MSG_CHECKING(whether mpiexec is available)
+    AC_MSG_RESULT($mpiexec_path)
+    
+    # We test if MPIEXEC exists
+    if test ! -x $mpiexec_path; then
+        #MPIEXEC does not exists or is not executable
+        AC_MSG_RESULT(The mpiexec script is not valid)
+            running_mpi_check=no
+            mpiexec_path=""
+    fi
+    AC_SUBST(MPIEXEC,$mpiexec_path)
 fi
 fi
 
 
 AM_CONDITIONAL(STARPU_MPI_CHECK, test x$running_mpi_check = xyes)
 AM_CONDITIONAL(STARPU_MPI_CHECK, test x$running_mpi_check = xyes)
@@ -508,7 +511,6 @@ fi
 if test x$use_mpi = xyes ; then
 if test x$use_mpi = xyes ; then
     AC_MSG_CHECKING(whether MPI tests should be run)
     AC_MSG_CHECKING(whether MPI tests should be run)
     AC_MSG_RESULT($running_mpi_check)
     AC_MSG_RESULT($running_mpi_check)
-    AC_SUBST(MPIEXEC,$mpiexec_path)
 fi
 fi
 
 
 #We can only build StarPU MPI Library if User wants it and MPI is available
 #We can only build StarPU MPI Library if User wants it and MPI is available
@@ -1144,6 +1146,9 @@ if test x$enable_cuda = xyes; then
 	fi
 	fi
 
 
 	AC_CHECK_HEADERS([cuda_gl_interop.h])
 	AC_CHECK_HEADERS([cuda_gl_interop.h])
+
+	AC_CHECK_LIB([cusparse], [cusparseCreate])
+	AC_CHECK_DECLS([cusparseSetStream], [], [], [[#include <cusparse.h>]])
 fi
 fi
 
 
 dnl Hey dude, are you around?
 dnl Hey dude, are you around?
@@ -2616,6 +2621,16 @@ if test "x$enable_socl" = "xyes" -a "$have_valid_opencl" = "no" ; then
     AC_MSG_ERROR([SOCL cannot be enabled without OpenCL])
     AC_MSG_ERROR([SOCL cannot be enabled without OpenCL])
 fi
 fi
 
 
+# MPI Master Slave and SOCL are not compatible
+if test "x$use_mpi_master_slave" = "xyes" ; then
+   if test "x$enable_socl" = "xyes" ; then
+      AC_MSG_ERROR([MPI Master-Slave and SOCL can not be used at the same time !])
+   fi
+   if test "x$enable_socl" = "xmaybe" ; then
+     enable_socl=no 
+   fi
+fi
+
 # now we enable SOCL if and only if a proper setup is available
 # now we enable SOCL if and only if a proper setup is available
 if test "x$enable_socl" = "xyes" -o "x$enable_socl" = "xmaybe" ; then
 if test "x$enable_socl" = "xyes" -o "x$enable_socl" = "xmaybe" ; then
    build_socl=$have_valid_opencl
    build_socl=$have_valid_opencl

+ 20 - 1
doc/doxygen/chapters/210_check_list_performance.doxy

@@ -65,9 +65,13 @@ Calling starpu_cublas_init() makes StarPU already do appropriate calls for the
 CUBLAS library. Some libraries like Magma may however change the current stream of CUBLAS v1,
 CUBLAS library. Some libraries like Magma may however change the current stream of CUBLAS v1,
 one then has to call <c>cublasSetKernelStream(starpu_cuda_get_local_stream())</c> at
 one then has to call <c>cublasSetKernelStream(starpu_cuda_get_local_stream())</c> at
 the beginning of the codelet to make sure that CUBLAS is really using the proper
 the beginning of the codelet to make sure that CUBLAS is really using the proper
-stream. When using CUBLAS v2, starpu_cublas_local_handle() can be called to queue CUBLAS
+stream. When using CUBLAS v2, starpu_cublas_get_local_handle() can be called to queue CUBLAS
 kernels with the proper configuration.
 kernels with the proper configuration.
 
 
+Similarly, calling starpu_cusparse_init() makes StarPU create CUSPARSE handles
+on each CUDA device, starpu_cusparse_get_local_handle() can then be used to
+queue CUSPARSE kernels with the proper configuration.
+
 If the kernel can be made to only use this local stream or other self-allocated
 If the kernel can be made to only use this local stream or other self-allocated
 streams, i.e. the whole kernel submission can be made asynchronous, then
 streams, i.e. the whole kernel submission can be made asynchronous, then
 one should enable asynchronous execution of the kernel.  That means setting
 one should enable asynchronous execution of the kernel.  That means setting
@@ -78,6 +82,21 @@ able to submit and complete data transfers while kernels are executing, instead
 kernel submission. The kernel just has to make sure that StarPU can use the
 kernel submission. The kernel just has to make sure that StarPU can use the
 local stream to synchronize with the kernel startup and completion.
 local stream to synchronize with the kernel startup and completion.
 
 
+If the kernel uses its own non-default stream, one can synchronize that stream
+with the StarPU-provided stream this way:
+
+\code{.c}
+cudaEvent_t event;
+call_kernel_with_its_own_stream()
+cudaEventCreateWithFlags(&event, cudaEventDisableTiming);
+cudaEventRecord(event, get_kernel_stream());
+cudaStreamWaitEvent(starpu_cuda_get_local_stream(), event, 0);
+cudaEventDestroy(event);
+\endcode
+
+That code makes the StarPU-provided stream wait for a new event, which will be
+triggered by the completion of the kernel.
+
 Using the flag ::STARPU_CUDA_ASYNC also permits to enable concurrent kernel
 Using the flag ::STARPU_CUDA_ASYNC also permits to enable concurrent kernel
 execution, on cards which support it (Kepler and later, notably). This is
 execution, on cards which support it (Kepler and later, notably). This is
 enabled by setting the environment variable \ref STARPU_NWORKER_PER_CUDA to the
 enabled by setting the environment variable \ref STARPU_NWORKER_PER_CUDA to the

+ 1 - 1
doc/doxygen/chapters/350_modularized_scheduler.doxy

@@ -229,7 +229,7 @@ static void initialize_eager_prefetching_center_policy(unsigned sched_ctx_id)
     /* Each Worker Component has a Flow-control Fifo Component as
     /* Each Worker Component has a Flow-control Fifo Component as
      * father */
      * father */
     struct starpu_sched_component * worker_component =
     struct starpu_sched_component * worker_component =
-	  starpu_sched_component_worker_get(i);
+	  starpu_sched_component_worker_new(i);
     struct starpu_sched_component * fifo_component =
     struct starpu_sched_component * fifo_component =
 	  starpu_sched_component_fifo_create(&fifo_data);
 	  starpu_sched_component_fifo_create(&fifo_data);
     fifo_component->add_child
     fifo_component->add_child

+ 1 - 1
doc/doxygen/chapters/501_environment_variables.doxy

@@ -831,7 +831,7 @@ that have a limited amount of memory.
 \anchor STARPU_LIMIT_CPU_MEM
 \anchor STARPU_LIMIT_CPU_MEM
 \addindex __env__STARPU_LIMIT_CPU_MEM
 \addindex __env__STARPU_LIMIT_CPU_MEM
 This variable specifies the maximum number of megabytes that should be
 This variable specifies the maximum number of megabytes that should be
-available to the application on each CPU device. Setting it enables allocation
+available to the application in the main CPU memory. Setting it enables allocation
 cache in main memory
 cache in main memory
 </dd>
 </dd>
 
 

+ 16 - 12
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -56,33 +56,33 @@ essentially used for synchronization tasks.
 
 
 \def STARPU_CPU
 \def STARPU_CPU
 \ingroup API_Codelet_And_Tasks
 \ingroup API_Codelet_And_Tasks
-This macro is used when setting the field starpu_codelet::where
-to specify the codelet may be executed on a CPU processing unit.
+This macro is used when setting the field starpu_codelet::where (or starpu_task::where)
+to specify the codelet (or the task) may be executed on a CPU processing unit.
 
 
 \def STARPU_CUDA
 \def STARPU_CUDA
 \ingroup API_Codelet_And_Tasks
 \ingroup API_Codelet_And_Tasks
-This macro is used when setting the field starpu_codelet::where
-to specify the codelet may be executed on a CUDA processing unit.
+This macro is used when setting the field starpu_codelet::where (or starpu_task::where)
+to specify the codelet (or the task) may be executed on a CUDA processing unit.
 
 
 \def STARPU_OPENCL
 \def STARPU_OPENCL
 \ingroup API_Codelet_And_Tasks
 \ingroup API_Codelet_And_Tasks
-This macro is used when setting the field starpu_codelet::where to
-specify the codelet may be executed on a OpenCL processing unit.
+This macro is used when setting the field starpu_codelet::where (or starpu_task::where) to
+specify the codelet (or the task) may be executed on a OpenCL processing unit.
 
 
 \def STARPU_MIC
 \def STARPU_MIC
 \ingroup API_Codelet_And_Tasks
 \ingroup API_Codelet_And_Tasks
-This macro is used when setting the field starpu_codelet::where to
-specify the codelet may be executed on a MIC processing unit.
+This macro is used when setting the field starpu_codelet::where (or starpu_task::where) to
+specify the codelet (or the task) may be executed on a MIC processing unit.
 
 
 \def STARPU_MPI_MS
 \def STARPU_MPI_MS
 \ingroup API_Codelet_And_Tasks
 \ingroup API_Codelet_And_Tasks
-This macro is used when setting the field starpu_codelet::where to
-specify the codelet may be executed on a MPI Slave processing unit.
+This macro is used when setting the field starpu_codelet::where (or starpu_task::where) to
+specify the codelet (or the task) may be executed on a MPI Slave processing unit.
 
 
 \def STARPU_SCC
 \def STARPU_SCC
 \ingroup API_Codelet_And_Tasks
 \ingroup API_Codelet_And_Tasks
-This macro is used when setting the field starpu_codelet::where to
-specify the codelet may be executed on an SCC processing unit.
+This macro is used when setting the field starpu_codelet::where (or starpu_task::where) to
+specify the codelet (or the task) may be executed on an SCC processing unit.
 
 
 \def STARPU_MAIN_RAM
 \def STARPU_MAIN_RAM
 \ingroup API_Codelet_And_Tasks
 \ingroup API_Codelet_And_Tasks
@@ -443,6 +443,10 @@ the configuration of a task allocated with starpu_task_create().
     the task. The access modes are now defined in the field
     the task. The access modes are now defined in the field
     starpu_codelet::modes.
     starpu_codelet::modes.
 
 
+\var uint32_t starpu_task::where
+    When set, specifies where the task is allowed to be executed.
+    When unset, it takes the value of starpu_codelet::where.
+
 \var int starpu_task::nbuffers
 \var int starpu_task::nbuffers
     Specifies the number of buffers. This is only used when
     Specifies the number of buffers. This is only used when
     starpu_codelet::nbuffers is \ref STARPU_VARIABLE_NBUFFERS.
     starpu_codelet::nbuffers is \ref STARPU_VARIABLE_NBUFFERS.

+ 17 - 0
doc/doxygen/chapters/api/cuda_extensions.doxy

@@ -95,4 +95,21 @@ Report a cublas error.
 Calls starpu_cublas_report_error(), passing the current
 Calls starpu_cublas_report_error(), passing the current
 function, file and line position.
 function, file and line position.
 
 
+\fn void starpu_cusparse_init(void)
+\ingroup API_CUDA_Extensions
+Calling starpu_cusparse_init() will initialize CUSPARSE on every CUDA device
+controlled by StarPU. This call blocks until CUSPARSE has been properly
+initialized on every device.
+
+\fn cusparseHandle_t starpu_cusparse_get_local_handle(void)
+\ingroup API_CUDA_Extensions
+This function returns the CUSPARSE handle to be used to queue CUSPARSE
+kernels. It is properly initialized and configured for multistream by
+starpu_cusparse_init().
+
+\fn void starpu_cusparse_shutdown(void)
+\ingroup API_CUDA_Extensions
+This function synchronously deinitializes the CUSPARSE library on
+every CUDA device.
+
 */
 */

+ 2 - 2
doc/doxygen/chapters/api/scheduling_contexts.doxy

@@ -132,13 +132,13 @@ Create a context indicating an approximate interval of resources
 Execute the callback whenever the last task of the context finished executing, it is called with the parameters \p sched_ctx and any other parameter needed
 Execute the callback whenever the last task of the context finished executing, it is called with the parameters \p sched_ctx and any other parameter needed
 by the application (packed in \p args)
 by the application (packed in \p args)
 
 
-\fn void starpu_sched_ctx_add_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id)
+\fn void starpu_sched_ctx_add_workers(int *workerids_ctx, unsigned nworkers_ctx, unsigned sched_ctx_id)
 \ingroup API_Scheduling_Contexts
 \ingroup API_Scheduling_Contexts
 Add dynamically the workers in \p workerids_ctx to the
 Add dynamically the workers in \p workerids_ctx to the
 context \p sched_ctx_id. The last argument cannot be greater than
 context \p sched_ctx_id. The last argument cannot be greater than
 \ref STARPU_NMAX_SCHED_CTXS.
 \ref STARPU_NMAX_SCHED_CTXS.
 
 
-\fn void starpu_sched_ctx_remove_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id)
+\fn void starpu_sched_ctx_remove_workers(int *workerids_ctx, unsigned nworkers_ctx, unsigned sched_ctx_id)
 \ingroup API_Scheduling_Contexts
 \ingroup API_Scheduling_Contexts
 Remove the workers in \p workerids_ctx from the context
 Remove the workers in \p workerids_ctx from the context
 \p sched_ctx_id. The last argument cannot be greater than
 \p sched_ctx_id. The last argument cannot be greater than

+ 0 - 6
doc/doxygen/chapters/api/scheduling_policy.doxy

@@ -124,12 +124,6 @@ this should be called by push functions to wake the potential workers that are
 supposed to pick up the tasks which just have been pushed, otherwise they may
 supposed to pick up the tasks which just have been pushed, otherwise they may
 remain sleeping.
 remain sleeping.
 
 
-\fn void starpu_worker_get_job_id(struct starpu_task *task)
-\ingroup API_Scheduling_Policy
-Return the job id of the given task, i.e. a number that uniquely identifies this
-task for the local MPI node, and can be found in the various offline execution
-traces reports.
-
 \fn int starpu_sched_set_min_priority(int min_prio)
 \fn int starpu_sched_set_min_priority(int min_prio)
 \ingroup API_Scheduling_Policy
 \ingroup API_Scheduling_Policy
 TODO: check if this is correct
 TODO: check if this is correct

+ 7 - 7
doc/doxygen/chapters/api/task_lists.doxy

@@ -28,15 +28,15 @@ Push \p task at the front of \p list
 \ingroup API_Task_Lists
 \ingroup API_Task_Lists
 Push \p task at the back of \p list
 Push \p task at the back of \p list
 
 
-\fn struct starpu_task *starpu_task_list_front(struct starpu_task_list *list)
+\fn struct starpu_task *starpu_task_list_front(const struct starpu_task_list *list)
 \ingroup API_Task_Lists
 \ingroup API_Task_Lists
 Get the front of \p list (without removing it)
 Get the front of \p list (without removing it)
 
 
-\fn struct starpu_task *starpu_task_list_back(struct starpu_task_list *list)
+\fn struct starpu_task *starpu_task_list_back(const struct starpu_task_list *list)
 \ingroup API_Task_Lists
 \ingroup API_Task_Lists
 Get the back of \p list (without removing it)
 Get the back of \p list (without removing it)
 
 
-\fn int starpu_task_list_empty(struct starpu_task_list *list)
+\fn int starpu_task_list_empty(const struct starpu_task_list *list)
 \ingroup API_Task_Lists
 \ingroup API_Task_Lists
 Test if \p list is empty
 Test if \p list is empty
 
 
@@ -52,19 +52,19 @@ Remove the element at the front of \p list
 \ingroup API_Task_Lists
 \ingroup API_Task_Lists
 Remove the element at the back of \p list
 Remove the element at the back of \p list
 
 
-\fn struct starpu_task *starpu_task_list_begin(struct starpu_task_list *list)
+\fn struct starpu_task *starpu_task_list_begin(const struct starpu_task_list *list)
 \ingroup API_Task_Lists
 \ingroup API_Task_Lists
 Get the first task of \p list.
 Get the first task of \p list.
 
 
-\fn struct starpu_task *starpu_task_list_end(struct starpu_task_list *list)
+\fn struct starpu_task *starpu_task_list_end(const struct starpu_task_list *list)
 \ingroup API_Task_Lists
 \ingroup API_Task_Lists
 Get the end of \p list.
 Get the end of \p list.
 
 
-\fn struct starpu_task *starpu_task_list_next(struct starpu_task *task)
+\fn struct starpu_task *starpu_task_list_next(const struct starpu_task *task)
 \ingroup API_Task_Lists
 \ingroup API_Task_Lists
 Get the next task of \p list. This is not erase-safe.
 Get the next task of \p list. This is not erase-safe.
 
 
-\fn int starpu_task_list_ismember(struct starpu_task_list *list, struct starpu_task *look)
+\fn int starpu_task_list_ismember(const struct starpu_task_list *list, const struct starpu_task *look)
 \ingroup API_Task_Lists
 \ingroup API_Task_Lists
 Test whether the given task \p look is contained in the \p list.
 Test whether the given task \p look is contained in the \p list.
 
 

+ 66 - 0
doc/doxygen/chapters/api/workers.doxy

@@ -264,4 +264,70 @@ whose StarPU identifier is \p node.
 \ingroup API_Workers_Properties
 \ingroup API_Workers_Properties
 Return worker \p type as a string.
 Return worker \p type as a string.
 
 
+\fn int starpu_worker_sched_op_pending(void)
+\ingroup API_Workers_Properties
+Return \c !0 if current worker has a scheduling operation in progress,
+and \c 0 otherwise.
+
+\fn void starpu_worker_relax_on(void)
+\ingroup API_Workers_Properties
+Allow other threads and workers to temporarily observe the current
+worker state, even though it is performing a scheduling operation.
+Must be called by a worker before performing a potentially blocking
+call such as acquiring a mutex other than its own sched_mutex. This
+function increases \c state_relax_refcnt from the current worker. No
+more than <c>UINT_MAX-1</c> nested relax_on calls should performed on
+the same worker. This function is automatically called by \ref
+starpu_worker_lock to relax the caller worker state while attempting
+to lock the targer worker.
+
+\fn void starpu_worker_relax_off(void)
+\ingroup API_Workers_Properties
+Must be called after a potentially blocking call is complete, to
+restore the relax state in place before the corresponding relax_on.
+Decreases \c state_relax_refcnt. Calls to \ref starpu_worker_relax_on
+and \c starpu_worker_relax_off must be well parenthesized. This
+function is automatically called by \ref starpu_worker_unlock after the 
+target worker has been unlocked.
+
+\fn int starpu_worker_get_relax_state(void)
+\ingroup API_Workers_Properties
+Returns \c !0 if the current worker \c state_relax_refcnt!=0 and \c 0
+otherwise.
+
+\fn void starpu_worker_lock(int workerid)
+\ingroup API_Workers_Properties
+Acquire the sched mutex of \p workerid. If the caller is a worker,
+distinct from \p workerid, the caller worker automatically enter relax
+state while acquiring the target worker lock.
+
+\fn int starpu_worker_trylock(int workerid)
+\ingroup API_Workers_Properties
+Attempt to acquire the sched mutex of \p workerid. Returns \c 0 if
+successful, \c !0 if \p workerid sched mutex is held or the
+corresponding worker is not in relaxed stated.
+If the caller is a worker, distinct from \p workerid, the caller
+worker automatically enter relax state if successfully acquiring the target
+worker lock.
+
+\fn void starpu_worker_unlock(int workerid)
+\ingroup API_Workers_Properties
+Release the previously acquired sched mutex of \p workerid. Restore
+the relaxed state of the caller worker if needed.
+
+\fn void starpu_worker_lock_self(void)
+\ingroup API_Workers_Properties
+Acquire the current worker sched mutex.
+
+\fn void starpu_worker_unlock_self(void)
+\ingroup API_Workers_Properties
+Release the current worker sched mutex.
+
+\fn int starpu_wake_worker_relax(int workerid)
+\ingroup API_Workers_Properties
+Wake up \p workerid while temporarily entering the current worker relaxed state
+if needed during the waiting process. Returns 1 if \p workerid has been woken
+up or its state_keep_awake flag has been set to 1, and 0 otherwise (if \p
+workerid was not in the STATE_SLEEPING or in the STATE_SCHEDULING).
+
 */
 */

+ 1 - 0
examples/heat/heat.sh

@@ -3,6 +3,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2017  Université de Bordeaux
 # Copyright (C) 2017  Université de Bordeaux
+# Copyright (C) 2017  Inria
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by

+ 19 - 2
examples/interface/complex_codelet.h

@@ -22,6 +22,21 @@
 
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 
 
+/* Dumb performance model for simgrid */
+static double complex_cost_function(struct starpu_task *task, unsigned nimpl)
+{
+	(void) task;
+	(void) nimpl;
+	return 0.000001;
+}
+
+static struct starpu_perfmodel complex_model =
+{
+	.type = STARPU_COMMON,
+	.cost_function = complex_cost_function,
+	.symbol = "complex"
+};
+
 void compare_complex_codelet(void *descr[], void *_args)
 void compare_complex_codelet(void *descr[], void *_args)
 {
 {
 	int nx1 = STARPU_COMPLEX_GET_NX(descr[0]);
 	int nx1 = STARPU_COMPLEX_GET_NX(descr[0]);
@@ -57,7 +72,8 @@ struct starpu_codelet cl_compare =
 	/* .cpu_funcs_name = {"compare_complex_codelet"}, */
 	/* .cpu_funcs_name = {"compare_complex_codelet"}, */
 	.nbuffers = 2,
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_R},
 	.modes = {STARPU_R, STARPU_R},
-	.name = "cl_compare"
+	.name = "cl_compare",
+	.model = &complex_model
 };
 };
 
 
 void display_complex_codelet(void *descr[], void *_args)
 void display_complex_codelet(void *descr[], void *_args)
@@ -83,7 +99,8 @@ struct starpu_codelet cl_display =
 	.cpu_funcs_name = {"display_complex_codelet"},
 	.cpu_funcs_name = {"display_complex_codelet"},
 	.nbuffers = 1,
 	.nbuffers = 1,
 	.modes = {STARPU_R},
 	.modes = {STARPU_R},
-	.name = "cl_display"
+	.name = "cl_display",
+	.model = &complex_model
 };
 };
 
 
 #endif /* __COMPLEX_CODELET_H */
 #endif /* __COMPLEX_CODELET_H */

+ 3 - 0
examples/mandelbrot/mandelbrot.c

@@ -2,6 +2,7 @@
  *
  *
  * Copyright (C) 2010, 2011, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -497,7 +498,9 @@ int main(int argc, char **argv)
 	conf.ncuda = 0;
 	conf.ncuda = 0;
 
 
 	if (use_spmd_p)
 	if (use_spmd_p)
+	{
 		conf.sched_policy_name = "peager";
 		conf.sched_policy_name = "peager";
+	}
 
 
 	ret = starpu_init(&conf);
 	ret = starpu_init(&conf);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

+ 8 - 2
examples/sched_ctx/dummy_sched_with_ctx.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2016  Université de Bordeaux
+ * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2010-2013, 2016  CNRS
  * Copyright (C) 2010-2013, 2016  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -118,8 +118,14 @@ static struct starpu_task *pop_task_dummy(unsigned sched_ctx_id)
 	 * the calling worker. So we just take the head of the list and give it
 	 * the calling worker. So we just take the head of the list and give it
 	 * to the worker. */
 	 * to the worker. */
 	struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
+#ifdef STARPU_NON_BLOCKING_DRIVERS
+	if (starpu_task_list_empty(&data->sched_list))
+		return NULL;
+#endif
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
-	struct starpu_task *task = starpu_task_list_pop_back(&data->sched_list);
+	struct starpu_task *task = NULL;
+	if (!starpu_task_list_empty(&data->sched_list))
+		task = starpu_task_list_pop_back(&data->sched_list);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 	return task;
 	return task;
 }
 }

+ 5 - 1
examples/sched_ctx/nested_sched_ctxs.c

@@ -2,6 +2,7 @@
  *
  *
  * Copyright (C) 2010-2015  Université de Bordeaux
  * Copyright (C) 2010-2015  Université de Bordeaux
  * Copyright (C) 2010-2014, 2016, 2017  CNRS
  * Copyright (C) 2010-2014, 2016, 2017  CNRS
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -53,7 +54,10 @@ static void sched_ctx_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 	unsigned sched_ctx = (uintptr_t)arg;
 	unsigned sched_ctx = (uintptr_t)arg;
 	int t = parallel_code(sched_ctx);
 	int t = parallel_code(sched_ctx);
 	if (sched_ctx > 0 && sched_ctx < 3)
 	if (sched_ctx > 0 && sched_ctx < 3)
-		tasks_executed[sched_ctx-1] += t;
+	{
+		STARPU_ATOMIC_ADD(&tasks_executed[sched_ctx-1], t);
+	}
+
 	//printf("w %d executed %d it \n", w, n);
 	//printf("w %d executed %d it \n", w, n);
 }
 }
 
 

+ 8 - 6
examples/sched_ctx/parallel_code.c

@@ -16,6 +16,7 @@
  */
  */
 
 
 #include <starpu.h>
 #include <starpu.h>
+#ifdef STARPU_USE_CPU
 #include <omp.h>
 #include <omp.h>
 
 
 #ifdef STARPU_QUICK_CHECK
 #ifdef STARPU_QUICK_CHECK
@@ -71,16 +72,10 @@ int main(int argc, char **argv)
 	int nprocs1;
 	int nprocs1;
 	int *procs1;
 	int *procs1;
 
 
-#ifdef STARPU_USE_CPU
 	unsigned ncpus =  starpu_cpu_worker_get_count();
 	unsigned ncpus =  starpu_cpu_worker_get_count();
 	procs1 = (int*)malloc(ncpus*sizeof(int));
 	procs1 = (int*)malloc(ncpus*sizeof(int));
 	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus);
 	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus);
 	nprocs1 = ncpus;
 	nprocs1 = ncpus;
-#else
-	nprocs1 = 1;
-	procs1 = (int*)malloc(nprocs1*sizeof(int));
-	procs1[0] = 0;
-#endif
 
 
 	unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0);
 	unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0);
 
 
@@ -100,3 +95,10 @@ int main(int argc, char **argv)
 
 
 	return 0;
 	return 0;
 }
 }
+#else /* STARPU_USE_CPU */
+int main(int argc, char **argv)
+{
+	/* starpu_sched_ctx_exec_parallel_code() requires a CPU worker has parallel region master */
+	return 77; /* STARPU_TEST_SKIPPED */
+}
+#endif /* STARPU_USE_CPU */

+ 15 - 11
examples/sched_ctx/parallel_tasks_reuse_handle.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2015 INRIA
+ * Copyright (C) 2015, 2017 INRIA
  * Copyright (C) 2015, 2016 CNRS
  * Copyright (C) 2015, 2016 CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -17,6 +17,7 @@
 
 
 #include <starpu.h>
 #include <starpu.h>
 #include <omp.h>
 #include <omp.h>
+#include <pthread.h>
 
 
 #ifdef STARPU_QUICK_CHECK
 #ifdef STARPU_QUICK_CHECK
 #define NTASKS 64
 #define NTASKS 64
@@ -28,6 +29,8 @@
 #define LOOPS  10
 #define LOOPS  10
 #endif
 #endif
 
 
+#define N_NESTED_CTXS 2
+
 struct context
 struct context
 {
 {
 	int ncpus;
 	int ncpus;
@@ -38,6 +41,7 @@ struct context
 /* Helper for the task that will initiate everything */
 /* Helper for the task that will initiate everything */
 void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
 void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
 {
 {
+	fprintf(stderr, "%p: %s -->\n", (void*)pthread_self(), __func__);
 	int sched_ctx = *(int *)sched_ctx_;
 	int sched_ctx = *(int *)sched_ctx_;
 	int *cpuids = NULL;
 	int *cpuids = NULL;
 	int ncpuids = 0;
 	int ncpuids = 0;
@@ -50,6 +54,7 @@ void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_)
 
 
 	omp_set_num_threads(ncpuids);
 	omp_set_num_threads(ncpuids);
 	free(cpuids);
 	free(cpuids);
+	fprintf(stderr, "%p: %s <--\n", (void*)pthread_self(), __func__);
 	return;
 	return;
 }
 }
 
 
@@ -101,25 +106,24 @@ void parallel_task_init()
 						  0);
 						  0);
 
 
 	/* Initialize nested contexts */
 	/* Initialize nested contexts */
-	/* WARNING : the number of contexts must be a divisor of the number of available cpus*/
-
-	contexts = malloc(sizeof(struct context)*2);
-	int cpus_per_context = main_context.ncpus/2;
+	contexts = malloc(sizeof(struct context)*N_NESTED_CTXS);
+	int cpus_per_context = main_context.ncpus/N_NESTED_CTXS;
 	int i;
 	int i;
-	for(i = 0; i < 2; i++)
+	for(i = 0; i < N_NESTED_CTXS; i++)
 	{
 	{
-		fprintf(stderr, "ncpus %d for context %d \n",cpus_per_context, i);
 		contexts[i].ncpus = cpus_per_context;
 		contexts[i].ncpus = cpus_per_context;
+		if (i == N_NESTED_CTXS-1)
+			contexts[i].ncpus += main_context.ncpus%N_NESTED_CTXS;
 		contexts[i].cpus = main_context.cpus+i*cpus_per_context;
 		contexts[i].cpus = main_context.cpus+i*cpus_per_context;
 	}
 	}
 
 
-	for(i = 0; i < 2; i++)
+	for(i = 0; i < N_NESTED_CTXS; i++)
 		contexts[i].id = starpu_sched_ctx_create(contexts[i].cpus,
 		contexts[i].id = starpu_sched_ctx_create(contexts[i].cpus,
 							 contexts[i].ncpus,"nested_ctx",
 							 contexts[i].ncpus,"nested_ctx",
 							 STARPU_SCHED_CTX_NESTED,main_context.id,
 							 STARPU_SCHED_CTX_NESTED,main_context.id,
 							 0);
 							 0);
 
 
-	for (i = 0; i < 2; i++)
+	for (i = 0; i < N_NESTED_CTXS; i++)
 	{
 	{
 		parallel_task_init_one_context(&contexts[i].id);
 		parallel_task_init_one_context(&contexts[i].id);
 	}
 	}
@@ -131,7 +135,7 @@ void parallel_task_init()
 void parallel_task_deinit()
 void parallel_task_deinit()
 {
 {
 	int i;
 	int i;
-	for (i=0; i<2;i++)
+	for (i=0; i<N_NESTED_CTXS;i++)
 		starpu_sched_ctx_delete(contexts[i].id);
 		starpu_sched_ctx_delete(contexts[i].id);
 	free(contexts);
 	free(contexts);
 	free(main_context.cpus);
 	free(main_context.cpus);
@@ -174,7 +178,7 @@ int main(int argc, char **argv)
 		return 77;
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
-	if (starpu_cpu_worker_get_count() < 2)
+	if (starpu_cpu_worker_get_count() < N_NESTED_CTXS)
 	{
 	{
 		starpu_shutdown();
 		starpu_shutdown();
 		return 77;
 		return 77;

+ 19 - 10
examples/sched_ctx/sched_ctx.c

@@ -2,6 +2,7 @@
  *
  *
  * Copyright (C) 2010-2014  Université de Bordeaux
  * Copyright (C) 2010-2014  Université de Bordeaux
  * Copyright (C) 2010-2014, 2016  CNRS
  * Copyright (C) 2010-2014, 2016  CNRS
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,17 +26,25 @@
 #endif
 #endif
 
 
 int tasks_executed = 0;
 int tasks_executed = 0;
-starpu_pthread_mutex_t mut;
+int ctx1_tasks_executed = 0;
+int ctx2_tasks_executed = 0;
 
 
 static void sched_ctx_cpu_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
 static void sched_ctx_cpu_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
 {
 {
-	STARPU_PTHREAD_MUTEX_LOCK(&mut);
-	tasks_executed++;
-	STARPU_PTHREAD_MUTEX_UNLOCK(&mut);
+	(void)STARPU_ATOMIC_ADD(&tasks_executed,1);
+	(void)STARPU_ATOMIC_ADD(&ctx1_tasks_executed,1);
 }
 }
 
 
-static void sched_ctx_cuda_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
+static void sched_ctx2_cpu_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
 {
 {
+	(void)STARPU_ATOMIC_ADD(&tasks_executed,1);
+	(void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1);
+}
+
+static void sched_ctx2_cuda_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
+{
+	(void)STARPU_ATOMIC_ADD(&tasks_executed,1);
+	(void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1);
 }
 }
 
 
 static struct starpu_codelet sched_ctx_codelet1 =
 static struct starpu_codelet sched_ctx_codelet1 =
@@ -48,8 +57,8 @@ static struct starpu_codelet sched_ctx_codelet1 =
 
 
 static struct starpu_codelet sched_ctx_codelet2 =
 static struct starpu_codelet sched_ctx_codelet2 =
 {
 {
-	.cpu_funcs = {sched_ctx_cpu_func},
-	.cuda_funcs = {sched_ctx_cuda_func},
+	.cpu_funcs = {sched_ctx2_cpu_func},
+	.cuda_funcs = {sched_ctx2_cuda_func},
 	.model = NULL,
 	.model = NULL,
 	.nbuffers = 0,
 	.nbuffers = 0,
 	.name = "sched_ctx"
 	.name = "sched_ctx"
@@ -71,8 +80,6 @@ int main(int argc, char **argv)
 		return 77;
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
-	STARPU_PTHREAD_MUTEX_INIT(&mut, NULL);
-
 #ifdef STARPU_USE_CPU
 #ifdef STARPU_USE_CPU
 	nprocs1 = starpu_cpu_worker_get_count();
 	nprocs1 = starpu_cpu_worker_get_count();
 	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, nprocs1);
 	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, nprocs1);
@@ -155,7 +162,9 @@ int main(int argc, char **argv)
 	starpu_sched_ctx_add_workers(procs1, nprocs1, sched_ctx2);
 	starpu_sched_ctx_add_workers(procs1, nprocs1, sched_ctx2);
 	starpu_sched_ctx_delete(sched_ctx1);
 	starpu_sched_ctx_delete(sched_ctx1);
 	starpu_sched_ctx_delete(sched_ctx2);
 	starpu_sched_ctx_delete(sched_ctx2);
-	printf("tasks executed %d out of %d\n", tasks_executed, ntasks/2);
+	printf("tasks executed %d out of %d\n", tasks_executed, ntasks+1);
+	printf("tasks executed on ctx1: %d\n", ctx1_tasks_executed);
+	printf("tasks executed on ctx2: %d\n", ctx2_tasks_executed);
 
 
 enodev:
 enodev:
 	starpu_shutdown();
 	starpu_shutdown();

+ 4 - 2
examples/scheduler/dummy_sched.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2016  Université de Bordeaux
+ * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2010-2013, 2016  CNRS
  * Copyright (C) 2010-2013, 2016  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -122,7 +122,9 @@ static struct starpu_task *pop_task_dummy(unsigned sched_ctx_id)
 		return NULL;
 		return NULL;
 #endif
 #endif
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
-	struct starpu_task *task = starpu_task_list_pop_back(&data->sched_list);
+	struct starpu_task *task = NULL;
+	if (!starpu_task_list_empty(&data->sched_list))
+		task = starpu_task_list_pop_back(&data->sched_list);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 	return task;
 	return task;
 }
 }

+ 3 - 21
include/fstarpu_mod.f90

@@ -1,6 +1,6 @@
 ! StarPU --- Runtime system for heterogeneous multicore architectures.
 ! StarPU --- Runtime system for heterogeneous multicore architectures.
 !
 !
-! Copyright (C) 2016  Inria
+! Copyright (C) 2016-2017  Inria
 !
 !
 ! StarPU is free software; you can redistribute it and/or modify
 ! StarPU is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Lesser General Public License as published by
 ! it under the terms of the GNU Lesser General Public License as published by
@@ -44,6 +44,7 @@ module fstarpu_mod
         type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_DATA
         type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_DATA
         type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_WORKER
         type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_WORKER
         type(c_ptr), bind(C) :: FSTARPU_WORKER_ORDER
         type(c_ptr), bind(C) :: FSTARPU_WORKER_ORDER
+        type(c_ptr), bind(C) :: FSTARPU_EXECUTE_WHERE
         type(c_ptr), bind(C) :: FSTARPU_HYPERVISOR_TAG
         type(c_ptr), bind(C) :: FSTARPU_HYPERVISOR_TAG
         type(c_ptr), bind(C) :: FSTARPU_POSSIBLY_PARALLEL
         type(c_ptr), bind(C) :: FSTARPU_POSSIBLY_PARALLEL
         type(c_ptr), bind(C) :: FSTARPU_FLOPS
         type(c_ptr), bind(C) :: FSTARPU_FLOPS
@@ -2022,26 +2023,6 @@ module fstarpu_mod
                         integer(c_int), value, intent(in) :: sched_ctx_id
                         integer(c_int), value, intent(in) :: sched_ctx_id
                 end subroutine fstarpu_sched_ctx_list_task_counters_reset_all
                 end subroutine fstarpu_sched_ctx_list_task_counters_reset_all
 
 
-                ! void starpu_sched_ctx_set_priority(int *workers, int nworkers, unsigned sched_ctx_id, unsigned priority);
-                subroutine fstarpu_sched_ctx_set_priority (workers, nworkers,  sched_ctx_id, priority) &
-                                bind(c,name="starpu_sched_ctx_set_priority")
-                        use iso_c_binding, only: c_int
-                        integer(c_int), intent(in) :: workers(*)
-                        integer(c_int), value, intent(in) :: nworkers
-                        integer(c_int), value, intent(in) :: sched_ctx_id
-                        integer(c_int), value, intent(in) :: priority
-                end subroutine fstarpu_sched_ctx_set_priority
-
-                ! void starpu_sched_ctx_set_priority_on_level(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority);
-                subroutine fstarpu_sched_ctx_set_priority_on_level ( workers_to_add, nworkers_to_add, sched_ctx, priority) &
-                                bind(c,name="starpu_sched_ctx_set_priority_on_level")
-                        use iso_c_binding, only: c_int
-                        integer(c_int), intent(in) :: workers_to_add(*)
-                        integer(c_int), value, intent(in) :: nworkers_to_add
-                        integer(c_int), value, intent(in) :: sched_ctx
-                        integer(c_int), value, intent(in) :: priority
-                end subroutine fstarpu_sched_ctx_set_priority_on_level
-
                 ! unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id);
                 ! unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id);
                 function fstarpu_sched_ctx_get_priority (worker, sched_ctx_id) &
                 function fstarpu_sched_ctx_get_priority (worker, sched_ctx_id) &
                                 bind(c,name="starpu_sched_ctx_get_priority")
                                 bind(c,name="starpu_sched_ctx_get_priority")
@@ -2280,6 +2261,7 @@ module fstarpu_mod
                         FSTARPU_EXECUTE_ON_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_DATA"//C_NULL_CHAR)
                         FSTARPU_EXECUTE_ON_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_DATA"//C_NULL_CHAR)
                         FSTARPU_EXECUTE_ON_WORKER       = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_WORKER"//C_NULL_CHAR)
                         FSTARPU_EXECUTE_ON_WORKER       = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_WORKER"//C_NULL_CHAR)
                         FSTARPU_WORKER_ORDER    = fstarpu_get_constant(C_CHAR_"FSTARPU_WORKER_ORDER"//C_NULL_CHAR)
                         FSTARPU_WORKER_ORDER    = fstarpu_get_constant(C_CHAR_"FSTARPU_WORKER_ORDER"//C_NULL_CHAR)
+                        FSTARPU_EXECUTE_WHERE       = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_WHERE"//C_NULL_CHAR)
                         FSTARPU_HYPERVISOR_TAG  = fstarpu_get_constant(C_CHAR_"FSTARPU_HYPERVISOR_TAG"//C_NULL_CHAR)
                         FSTARPU_HYPERVISOR_TAG  = fstarpu_get_constant(C_CHAR_"FSTARPU_HYPERVISOR_TAG"//C_NULL_CHAR)
                         FSTARPU_POSSIBLY_PARALLEL       = fstarpu_get_constant(C_CHAR_"FSTARPU_POSSIBLY_PARALLEL"//C_NULL_CHAR)
                         FSTARPU_POSSIBLY_PARALLEL       = fstarpu_get_constant(C_CHAR_"FSTARPU_POSSIBLY_PARALLEL"//C_NULL_CHAR)
                         FSTARPU_FLOPS   = fstarpu_get_constant(C_CHAR_"FSTARPU_FLOPS"//C_NULL_CHAR)
                         FSTARPU_FLOPS   = fstarpu_get_constant(C_CHAR_"FSTARPU_FLOPS"//C_NULL_CHAR)

+ 1 - 0
include/starpu.h

@@ -62,6 +62,7 @@ typedef UINT_PTR uintptr_t;
 #include <starpu_rand.h>
 #include <starpu_rand.h>
 #include <starpu_cuda.h>
 #include <starpu_cuda.h>
 #include <starpu_cublas.h>
 #include <starpu_cublas.h>
+#include <starpu_cusparse.h>
 #include <starpu_bound.h>
 #include <starpu_bound.h>
 #include <starpu_hash.h>
 #include <starpu_hash.h>
 #include <starpu_profiling.h>
 #include <starpu_profiling.h>

+ 5 - 1
include/starpu_cublas_v2.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2012  Université de Bordeaux
+ * Copyright (C) 2010-2012, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -18,6 +18,8 @@
 #ifndef __STARPU_CUBLAS_V2_H__
 #ifndef __STARPU_CUBLAS_V2_H__
 #define __STARPU_CUBLAS_V2_H__
 #define __STARPU_CUBLAS_V2_H__
 
 
+#if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
+
 #include <cublas_v2.h>
 #include <cublas_v2.h>
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
@@ -31,4 +33,6 @@ cublasHandle_t starpu_cublas_get_local_handle(void);
 }
 }
 #endif
 #endif
 
 
+#endif
+
 #endif /* __STARPU_CUBLAS_V2_H__ */
 #endif /* __STARPU_CUBLAS_V2_H__ */

+ 38 - 0
include/starpu_cusparse.h

@@ -0,0 +1,38 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2012, 2017  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_CUSPARSE_H__
+#define __STARPU_CUSPARSE_H__
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+void starpu_cusparse_init(void);
+void starpu_cusparse_shutdown(void);
+
+#if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
+#include <cusparse.h>
+cusparseHandle_t starpu_cusparse_get_local_handle(void);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __STARPU_CUSPARSE_H__ */

+ 19 - 0
include/starpu_sched_component.h

@@ -2,6 +2,7 @@
  *
  *
  * Copyright (C) 2013  Simon Archipoff
  * Copyright (C) 2013  Simon Archipoff
  * Copyright (C) 2014  CNRS
  * Copyright (C) 2014  CNRS
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -92,6 +93,9 @@ int starpu_sched_tree_push_task(struct starpu_task *task);
 int starpu_sched_component_push_task(struct starpu_sched_component *from, struct starpu_sched_component *to, struct starpu_task *task);
 int starpu_sched_component_push_task(struct starpu_sched_component *from, struct starpu_sched_component *to, struct starpu_task *task);
 struct starpu_task *starpu_sched_tree_pop_task(unsigned sched_ctx);
 struct starpu_task *starpu_sched_tree_pop_task(unsigned sched_ctx);
 struct starpu_task *starpu_sched_component_pull_task(struct starpu_sched_component *from, struct starpu_sched_component *to);
 struct starpu_task *starpu_sched_component_pull_task(struct starpu_sched_component *from, struct starpu_sched_component *to);
+struct starpu_task* starpu_sched_component_pump_downstream(struct starpu_sched_component *component, int* success);
+void starpu_sched_component_send_can_push_to_parents(struct starpu_sched_component * component);
+
 void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
 void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
 void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
 void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
 
 
@@ -107,6 +111,7 @@ void starpu_sched_component_prefetch_on_node(struct starpu_sched_component *comp
 void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child);
 void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child);
 
 
 struct starpu_sched_component *starpu_sched_component_worker_get(unsigned sched_ctx, int workerid);
 struct starpu_sched_component *starpu_sched_component_worker_get(unsigned sched_ctx, int workerid);
+struct starpu_sched_component *starpu_sched_component_worker_new(unsigned sched_ctx, int workerid);
 int starpu_sched_component_worker_get_workerid(struct starpu_sched_component *worker_component);
 int starpu_sched_component_worker_get_workerid(struct starpu_sched_component *worker_component);
 int starpu_sched_component_is_worker(struct starpu_sched_component *component);
 int starpu_sched_component_is_worker(struct starpu_sched_component *component);
 int starpu_sched_component_is_simple_worker(struct starpu_sched_component *component);
 int starpu_sched_component_is_simple_worker(struct starpu_sched_component *component);
@@ -196,6 +201,20 @@ struct starpu_sched_component_specs
 struct starpu_sched_tree *starpu_sched_component_make_scheduler(unsigned sched_ctx_id, struct starpu_sched_component_specs s);
 struct starpu_sched_tree *starpu_sched_component_make_scheduler(unsigned sched_ctx_id, struct starpu_sched_component_specs s);
 #endif /* STARPU_HAVE_HWLOC */
 #endif /* STARPU_HAVE_HWLOC */
 
 
+#define STARPU_COMPONENT_MUTEX_LOCK(m) \
+do \
+{ \
+	const int _relaxed_state = _starpu_worker_get_relax_state(); \
+	if (!_relaxed_state) \
+		_starpu_worker_relax_on(); \
+	STARPU_PTHREAD_MUTEX_LOCK((m)); \
+	if (!_relaxed_state) \
+		_starpu_worker_relax_off(); \
+} \
+while(0)
+
+#define STARPU_COMPONENT_MUTEX_UNLOCK(m) STARPU_PTHREAD_MUTEX_UNLOCK((m))
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif

+ 7 - 9
include/starpu_sched_ctx.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010 - 2012  INRIA
+ * Copyright (C) 2010 - 2012, 2017  INRIA
  * Copyright (C) 2016  Uppsala University
  * Copyright (C) 2016  Uppsala University
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -43,9 +43,9 @@ unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const
 
 
 void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void* args), void *args);
 void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void* args), void *args);
 
 
-void starpu_sched_ctx_add_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id);
+void starpu_sched_ctx_add_workers(int *workerids_ctx, unsigned nworkers_ctx, unsigned sched_ctx_id);
 
 
-void starpu_sched_ctx_remove_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id);
+void starpu_sched_ctx_remove_workers(int *workerids_ctx, unsigned nworkers_ctx, unsigned sched_ctx_id);
 
 
 void starpu_sched_ctx_display_workers(unsigned sched_ctx_id, FILE *f);
 void starpu_sched_ctx_display_workers(unsigned sched_ctx_id, FILE *f);
 
 
@@ -133,16 +133,14 @@ void starpu_sched_ctx_list_task_counters_decrement(unsigned sched_ctx_id, int wo
 
 
 void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid);
 void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid);
 
 
-void starpu_sched_ctx_list_task_counters_increment_all(struct starpu_task *task, unsigned sched_ctx_id);
+void starpu_sched_ctx_list_task_counters_increment_all_ctx_locked(struct starpu_task *task, unsigned sched_ctx_id);
 
 
-void starpu_sched_ctx_list_task_counters_decrement_all(struct starpu_task *task, unsigned sched_ctx_id);
+void starpu_sched_ctx_list_task_counters_decrement_all_ctx_locked(struct starpu_task *task, unsigned sched_ctx_id);
 
 
 void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id);
 void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id);
 
 
 void starpu_sched_ctx_set_priority(int *workers, int nworkers, unsigned sched_ctx_id, unsigned priority);
 void starpu_sched_ctx_set_priority(int *workers, int nworkers, unsigned sched_ctx_id, unsigned priority);
 
 
-void starpu_sched_ctx_set_priority_on_level(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority);
-
 unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id);
 unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id);
 
 
 void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids);
 void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids);
@@ -160,9 +158,9 @@ unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned
 /* If not, returns STARPU_NMAX_SCHED_CTXS. */
 /* If not, returns STARPU_NMAX_SCHED_CTXS. */
 unsigned starpu_sched_ctx_master_get_context(int masterid);
 unsigned starpu_sched_ctx_master_get_context(int masterid);
 
 
-void starpu_sched_ctx_revert_task_counters(unsigned sched_ctx_id, double flops);
+void starpu_sched_ctx_revert_task_counters_ctx_locked(unsigned sched_ctx_id, double flops);
 
 
-void starpu_sched_ctx_move_task_to_ctx(struct starpu_task *task, unsigned sched_ctx, unsigned manage_mutex, unsigned with_repush);
+void starpu_sched_ctx_move_task_to_ctx_locked(struct starpu_task *task, unsigned sched_ctx, unsigned with_repush);
 
 
 int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id);
 int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id);
 
 

+ 3 - 5
include/starpu_scheduler.h

@@ -3,6 +3,7 @@
  * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2016  Uppsala University
  * Copyright (C) 2016  Uppsala University
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -60,13 +61,10 @@ void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sc
 unsigned long starpu_task_get_job_id(struct starpu_task *task);
 unsigned long starpu_task_get_job_id(struct starpu_task *task);
 
 
 /* This function must be called to wake up a worker that is sleeping on the cond. 
 /* This function must be called to wake up a worker that is sleeping on the cond. 
- * It returns 0 whenever the worker is not in a sleeping state */
-int starpu_wake_worker(int workerid);
-int starpu_wakeup_worker(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex);
+ * It returns 0 whenever the worker is not in a sleeping state or has the state_keep_awake flag on */
+int starpu_wake_worker_no_relax(int workerid);
 /* This is a version of starpu_wake_worker which assumes that the sched mutex is locked */
 /* This is a version of starpu_wake_worker which assumes that the sched mutex is locked */
 int starpu_wake_worker_locked(int workerid);
 int starpu_wake_worker_locked(int workerid);
-/* This is a version of starpu_wakeup_worker which assumes that the sched mutex is locked */
-int starpu_wakeup_worker_locked(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex);
 
 
 int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl);
 int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl);
 int starpu_worker_can_execute_task_impl(unsigned workerid, struct starpu_task *task, unsigned *impl_mask);
 int starpu_worker_can_execute_task_impl(unsigned workerid, struct starpu_task *task, unsigned *impl_mask);

+ 1 - 0
include/starpu_task.h

@@ -136,6 +136,7 @@ struct starpu_task
 	const char *name;
 	const char *name;
 
 
 	struct starpu_codelet *cl;
 	struct starpu_codelet *cl;
+	int32_t where;
 
 
 	int nbuffers;
 	int nbuffers;
 
 

+ 33 - 125
include/starpu_task_list.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2012, 2016  Université de Bordeaux
+ * Copyright (C) 2010-2012, 2016-2017  Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,148 +25,56 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
+	/* NOTE: this needs to have at least the same size as lists in src/common/list.h */
+#ifdef BUILDING_STARPU
+#define STARPU_TASK_LIST_INLINE extern inline
+#else
 struct starpu_task_list
 struct starpu_task_list
 {
 {
 	struct starpu_task *head;
 	struct starpu_task *head;
 	struct starpu_task *tail;
 	struct starpu_task *tail;
 };
 };
+#define STARPU_TASK_LIST_INLINE extern
+#endif
 
 
-static STARPU_INLINE
-void starpu_task_list_init(struct starpu_task_list *list)
-{
-	list->head = NULL;
-	list->tail = NULL;
-}
-
-static STARPU_INLINE
-void starpu_task_list_push_front(struct starpu_task_list *list, struct starpu_task *task)
-{
-	if (list->tail == NULL)
-	{
-		list->tail = task;
-	}
-	else
-	{
-		list->head->prev = task;
-	}
-
-	task->prev = NULL;
-	task->next = list->head;
-	list->head = task;
-}
-
-static STARPU_INLINE
-void starpu_task_list_push_back(struct starpu_task_list *list, struct starpu_task *task)
-{
-	if (list->head == NULL)
-	{
-		list->head = task;
-	}
-	else
-	{
-		list->tail->next = task;
-	}
-
-	task->next = NULL;
-	task->prev = list->tail;
-	list->tail = task;
-}
-
-static STARPU_INLINE
-struct starpu_task *starpu_task_list_front(struct starpu_task_list *list)
-{
-	return list->head;
-}
-
-static STARPU_INLINE
-struct starpu_task *starpu_task_list_back(struct starpu_task_list *list)
-{
-	return list->tail;
-}
-
-static STARPU_INLINE
-int starpu_task_list_empty(struct starpu_task_list *list)
-{
-	return (list->head == NULL);
-}
+STARPU_TASK_LIST_INLINE
+void starpu_task_list_init(struct starpu_task_list *list);
 
 
-static STARPU_INLINE
-void starpu_task_list_erase(struct starpu_task_list *list, struct starpu_task *task)
-{
-	struct starpu_task *p = task->prev;
-
-	if (p)
-	{
-		p->next = task->next;
-	}
-	else
-	{
-		list->head = task->next;
-	}
-
-	if (task->next)
-	{
-		task->next->prev = p;
-	}
-	else
-	{
-		list->tail = p;
-	}
-
-	task->prev = NULL;
-	task->next = NULL;
-}
+STARPU_TASK_LIST_INLINE
+void starpu_task_list_push_front(struct starpu_task_list *list, struct starpu_task *task);
 
 
-static STARPU_INLINE
-struct starpu_task *starpu_task_list_pop_front(struct starpu_task_list *list)
-{
-	struct starpu_task *task = list->head;
+STARPU_TASK_LIST_INLINE
+void starpu_task_list_push_back(struct starpu_task_list *list, struct starpu_task *task);
 
 
-	if (task)
-		starpu_task_list_erase(list, task);
+STARPU_TASK_LIST_INLINE
+struct starpu_task *starpu_task_list_front(const struct starpu_task_list *list);
 
 
-	return task;
-}
+STARPU_TASK_LIST_INLINE
+struct starpu_task *starpu_task_list_back(const struct starpu_task_list *list);
 
 
-static STARPU_INLINE
-struct starpu_task *starpu_task_list_pop_back(struct starpu_task_list *list)
-{
-	struct starpu_task *task = list->tail;
+STARPU_TASK_LIST_INLINE
+int starpu_task_list_empty(const struct starpu_task_list *list);
 
 
-	if (task)
-		starpu_task_list_erase(list, task);
+STARPU_TASK_LIST_INLINE
+void starpu_task_list_erase(struct starpu_task_list *list, struct starpu_task *task);
 
 
-	return task;
-}
+STARPU_TASK_LIST_INLINE
+struct starpu_task *starpu_task_list_pop_front(struct starpu_task_list *list);
 
 
-static STARPU_INLINE
-struct starpu_task *starpu_task_list_begin(struct starpu_task_list *list)
-{
-	return list->head;
-}
+STARPU_TASK_LIST_INLINE
+struct starpu_task *starpu_task_list_pop_back(struct starpu_task_list *list);
 
 
-static STARPU_INLINE
-struct starpu_task *starpu_task_list_end(struct starpu_task_list *list STARPU_ATTRIBUTE_UNUSED)
-{
-	return NULL;
-}
+STARPU_TASK_LIST_INLINE
+struct starpu_task *starpu_task_list_begin(const struct starpu_task_list *list);
 
 
-static STARPU_INLINE
-struct starpu_task *starpu_task_list_next(struct starpu_task *task)
-{
-	return task->next;
-}
+STARPU_TASK_LIST_INLINE
+struct starpu_task *starpu_task_list_end(const struct starpu_task_list *list STARPU_ATTRIBUTE_UNUSED);
 
 
-static STARPU_INLINE
-int starpu_task_list_ismember(struct starpu_task_list *list, struct starpu_task *look)
-{
-	struct starpu_task *task;
+STARPU_TASK_LIST_INLINE
+struct starpu_task *starpu_task_list_next(const struct starpu_task *task);
 
 
-	for (task  = list->head; task != NULL; task  = task->next)
-		if (task == look)
-			return 1;
-	return 0;
-}
+STARPU_TASK_LIST_INLINE
+int starpu_task_list_ismember(const struct starpu_task_list *list, const struct starpu_task *look);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }

+ 10 - 9
include/starpu_task_util.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2015  Université de Bordeaux
  * Copyright (C) 2010-2015  Université de Bordeaux
- * Copyright (C) 2010-2014, 2016  CNRS
+ * Copyright (C) 2010-2014, 2016, 2017  CNRS
  * Copyright (C) 2014       INRIA
  * Copyright (C) 2014       INRIA
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -34,7 +34,7 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 
 
 /* NOTE: when adding a value here, please make sure to update both
 /* NOTE: when adding a value here, please make sure to update both
  * src/util/starpu_task_insert_utils.c (in two places) and
  * src/util/starpu_task_insert_utils.c (in two places) and
- * mpi/src/starpu_mpi_task_insert.c */
+ * mpi/src/starpu_mpi_task_insert.c and mpi/src/starpu_mpi_task_insert_fortran.c */
 #define STARPU_MODE_SHIFT	17
 #define STARPU_MODE_SHIFT	17
 #define STARPU_VALUE		 (1<<STARPU_MODE_SHIFT)
 #define STARPU_VALUE		 (1<<STARPU_MODE_SHIFT)
 #define STARPU_CALLBACK		 (2<<STARPU_MODE_SHIFT)
 #define STARPU_CALLBACK		 (2<<STARPU_MODE_SHIFT)
@@ -54,13 +54,14 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 #define STARPU_PROLOGUE_CALLBACK_POP   (16<<STARPU_MODE_SHIFT)
 #define STARPU_PROLOGUE_CALLBACK_POP   (16<<STARPU_MODE_SHIFT)
 #define STARPU_PROLOGUE_CALLBACK_POP_ARG (17<<STARPU_MODE_SHIFT)
 #define STARPU_PROLOGUE_CALLBACK_POP_ARG (17<<STARPU_MODE_SHIFT)
 #define STARPU_EXECUTE_ON_WORKER (18<<STARPU_MODE_SHIFT)
 #define STARPU_EXECUTE_ON_WORKER (18<<STARPU_MODE_SHIFT)
-#define STARPU_TAG_ONLY          (19<<STARPU_MODE_SHIFT)
-#define STARPU_POSSIBLY_PARALLEL    (20<<STARPU_MODE_SHIFT)
-#define STARPU_WORKER_ORDER      (21<<STARPU_MODE_SHIFT)
-#define STARPU_NODE_SELECTION_POLICY (22<<STARPU_MODE_SHIFT)
-#define STARPU_NAME		 (23<<STARPU_MODE_SHIFT)
-#define STARPU_CL_ARGS		(24<<STARPU_MODE_SHIFT)
-#define STARPU_SHIFTED_MODE_MAX (25<<STARPU_MODE_SHIFT)
+#define STARPU_EXECUTE_WHERE     (19<<STARPU_MODE_SHIFT)
+#define STARPU_TAG_ONLY          (20<<STARPU_MODE_SHIFT)
+#define STARPU_POSSIBLY_PARALLEL    (21<<STARPU_MODE_SHIFT)
+#define STARPU_WORKER_ORDER      (22<<STARPU_MODE_SHIFT)
+#define STARPU_NODE_SELECTION_POLICY (23<<STARPU_MODE_SHIFT)
+#define STARPU_NAME		 (24<<STARPU_MODE_SHIFT)
+#define STARPU_CL_ARGS		(25<<STARPU_MODE_SHIFT)
+#define STARPU_SHIFTED_MODE_MAX (26<<STARPU_MODE_SHIFT)
 
 
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
 int starpu_task_insert(struct starpu_codelet *cl, ...);
 int starpu_task_insert(struct starpu_codelet *cl, ...);

+ 22 - 1
include/starpu_worker.h

@@ -127,7 +127,7 @@ struct starpu_tree* starpu_workers_get_tree(void);
 
 
 unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx);
 unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx);
 
 
-unsigned starpu_worker_is_blocked(int workerid);
+unsigned starpu_worker_is_blocked_in_parallel(int workerid);
 
 
 unsigned starpu_worker_is_slave_somewhere(int workerid);
 unsigned starpu_worker_is_slave_somewhere(int workerid);
 
 
@@ -140,6 +140,27 @@ int starpu_worker_get_devids(enum starpu_worker_archtype type, int *devids, int
 int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum starpu_worker_archtype type);
 int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum starpu_worker_archtype type);
 
 
 unsigned starpu_worker_get_sched_ctx_id_stream(unsigned stream_workerid);
 unsigned starpu_worker_get_sched_ctx_id_stream(unsigned stream_workerid);
+
+int starpu_worker_sched_op_pending(void);
+
+void starpu_worker_relax_on(void);
+
+void starpu_worker_relax_off(void);
+
+int starpu_worker_get_relax_state(void);
+
+void starpu_worker_lock(int workerid);
+
+int starpu_worker_trylock(int workerid);
+
+void starpu_worker_unlock(int workerid);
+
+void starpu_worker_lock_self(void);
+
+void starpu_worker_unlock_self(void);
+
+int starpu_wake_worker_relax(int workerid);
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif

+ 26 - 3
mpi/examples/Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2009-2013, 2015-2016  Université de Bordeaux
+# Copyright (C) 2009-2013, 2015-2017  Université de Bordeaux
 # Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
 # Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
 # Copyright (C) 2016  Inria
 # Copyright (C) 2016  Inria
 #
 #
@@ -17,6 +17,15 @@
 
 
 include $(top_srcdir)/starpu.mk
 include $(top_srcdir)/starpu.mk
 
 
+if STARPU_SIMGRID
+STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling
+STARPU_HOSTNAME=mirage
+MALLOC_PERTURB_=0
+export STARPU_PERF_MODEL_DIR
+export STARPU_HOSTNAME
+export MALLOC_PERTURB_
+endif
+
 CC=$(MPICC)
 CC=$(MPICC)
 CCLD=$(MPICC)
 CCLD=$(MPICC)
 FC=$(MPIFORT)
 FC=$(MPIFORT)
@@ -26,17 +35,23 @@ if STARPU_HAVE_WINDOWS
 LOADER_BIN		=
 LOADER_BIN		=
 else
 else
 loader_CPPFLAGS 	= 	$(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
 loader_CPPFLAGS 	= 	$(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
+if !STARPU_SIMGRID
 LOADER			=	loader
 LOADER			=	loader
 LOADER_BIN		=	$(abs_top_builddir)/mpi/examples/$(LOADER)
 LOADER_BIN		=	$(abs_top_builddir)/mpi/examples/$(LOADER)
+endif
 loader_SOURCES		=	../../tests/loader.c
 loader_SOURCES		=	../../tests/loader.c
 endif
 endif
 
 
+if STARPU_SIMGRID
+MPI			=	$(abs_top_builddir)/tools/starpu_smpirun -np 4 -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile
+else
 # we always test on 4 processes, the execution time is not that bigger
 # we always test on 4 processes, the execution time is not that bigger
 if STARPU_QUICK_CHECK
 if STARPU_QUICK_CHECK
 MPI			=	$(MPIEXEC) $(MPIEXEC_ARGS) -np 4
 MPI			=	$(MPIEXEC) $(MPIEXEC_ARGS) -np 4
 else
 else
 MPI			=	$(MPIEXEC) $(MPIEXEC_ARGS) -np 4
 MPI			=	$(MPIEXEC) $(MPIEXEC_ARGS) -np 4
 endif
 endif
+endif
 
 
 if STARPU_HAVE_AM111
 if STARPU_HAVE_AM111
 TESTS_ENVIRONMENT	=	STARPU_WORKERS_NOBIND=1 STARPU_NCPU=4 top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)"
 TESTS_ENVIRONMENT	=	STARPU_WORKERS_NOBIND=1 STARPU_NCPU=4 top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)"
@@ -45,11 +60,9 @@ else
 TESTS_ENVIRONMENT 	=	STARPU_WORKERS_NOBIND=1 STARPU_NCPU=4 top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(MPI) $(LOADER_BIN)
 TESTS_ENVIRONMENT 	=	STARPU_WORKERS_NOBIND=1 STARPU_NCPU=4 top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(MPI) $(LOADER_BIN)
 endif
 endif
 
 
-if !STARPU_SIMGRID
 if STARPU_MPI_CHECK
 if STARPU_MPI_CHECK
 TESTS			=	$(starpu_mpi_EXAMPLES)
 TESTS			=	$(starpu_mpi_EXAMPLES)
 endif
 endif
-endif
 
 
 check_PROGRAMS = $(LOADER) $(starpu_mpi_EXAMPLES)
 check_PROGRAMS = $(LOADER) $(starpu_mpi_EXAMPLES)
 starpu_mpi_EXAMPLES =
 starpu_mpi_EXAMPLES =
@@ -248,11 +261,13 @@ matrix_decomposition_mpi_cholesky_distributed_LDADD =	\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
 	$(STARPU_BLAS_LDFLAGS) -lm
 	$(STARPU_BLAS_LDFLAGS) -lm
 
 
+if !STARPU_SIMGRID
 starpu_mpi_EXAMPLES +=				\
 starpu_mpi_EXAMPLES +=				\
 	matrix_decomposition/mpi_cholesky			\
 	matrix_decomposition/mpi_cholesky			\
 	matrix_decomposition/mpi_cholesky_distributed
 	matrix_decomposition/mpi_cholesky_distributed
 endif
 endif
 endif
 endif
+endif
 
 
 ########################
 ########################
 # MPI Matrix mult example #
 # MPI Matrix mult example #
@@ -269,9 +284,11 @@ matrix_mult_mm_LDADD =			\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
 	-lm
 	-lm
 
 
+if !STARPU_SIMGRID
 starpu_mpi_EXAMPLES +=				\
 starpu_mpi_EXAMPLES +=				\
 	matrix_mult/mm
 	matrix_mult/mm
 endif
 endif
+endif
 
 
 ##########################################
 ##########################################
 # Native Fortran MPI Matrix mult example #
 # Native Fortran MPI Matrix mult example #
@@ -303,12 +320,14 @@ native_fortran_nf_basic_ring_LDADD =					\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la	\
 	-lm
 	-lm
 
 
+if !STARPU_SIMGRID
 starpu_mpi_EXAMPLES +=				\
 starpu_mpi_EXAMPLES +=				\
 	native_fortran/nf_mm			\
 	native_fortran/nf_mm			\
 	native_fortran/nf_basic_ring
 	native_fortran/nf_basic_ring
 endif
 endif
 endif
 endif
 endif
 endif
+endif
 
 
 ###################
 ###################
 # complex example #
 # complex example #
@@ -344,9 +363,11 @@ user_datatype_user_datatype_SOURCES =		\
 user_datatype_user_datatype_LDADD =		\
 user_datatype_user_datatype_LDADD =		\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 
 
+if !STARPU_SIMGRID
 starpu_mpi_EXAMPLES	+=			\
 starpu_mpi_EXAMPLES	+=			\
 	user_datatype/user_datatype
 	user_datatype/user_datatype
 endif
 endif
+endif
 
 
 ###################
 ###################
 # comm example #
 # comm example #
@@ -362,10 +383,12 @@ comm_comm_LDADD =		\
 comm_mix_comm_LDADD =		\
 comm_mix_comm_LDADD =		\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 
 
+if !STARPU_SIMGRID
 starpu_mpi_EXAMPLES	+=			\
 starpu_mpi_EXAMPLES	+=			\
 	comm/comm				\
 	comm/comm				\
 	comm/mix_comm
 	comm/mix_comm
 endif
 endif
+endif
 
 
 if STARPU_HAVE_MPIFORT
 if STARPU_HAVE_MPIFORT
 if BUILD_EXAMPLES
 if BUILD_EXAMPLES

+ 17 - 1
mpi/examples/complex/mpi_complex.c

@@ -26,11 +26,27 @@ void display_foo_codelet(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	FPRINTF(stderr, "foo = %d\n", *foo);
 	FPRINTF(stderr, "foo = %d\n", *foo);
 }
 }
 
 
+/* Dumb performance model for simgrid */
+static double display_cost_function(struct starpu_task *task, unsigned nimpl)
+{
+	(void) task;
+	(void) nimpl;
+	return 0.000001;
+}
+
+static struct starpu_perfmodel display_model =
+{
+	.type = STARPU_COMMON,
+	.cost_function = display_cost_function,
+	.symbol = "display"
+};
+
 struct starpu_codelet foo_display =
 struct starpu_codelet foo_display =
 {
 {
 	.cpu_funcs = {display_foo_codelet},
 	.cpu_funcs = {display_foo_codelet},
 	.nbuffers = 1,
 	.nbuffers = 1,
-	.modes = {STARPU_R}
+	.modes = {STARPU_R},
+	.model = &display_model
 };
 };
 
 
 int main(int argc, char **argv)
 int main(int argc, char **argv)

+ 17 - 1
mpi/examples/stencil/stencil5.c

@@ -37,11 +37,27 @@ void stencil5_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 //	fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
 //	fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
 }
 }
 
 
+/* Dumb performance model for simgrid */
+static double stencil5_cost_function(struct starpu_task *task, unsigned nimpl)
+{
+	(void) task;
+	(void) nimpl;
+	return 0.000001;
+}
+
+static struct starpu_perfmodel stencil5_model =
+{
+	.type = STARPU_COMMON,
+	.cost_function = stencil5_cost_function,
+	.symbol = "stencil5"
+};
+
 struct starpu_codelet stencil5_cl =
 struct starpu_codelet stencil5_cl =
 {
 {
 	.cpu_funcs = {stencil5_cpu},
 	.cpu_funcs = {stencil5_cpu},
 	.nbuffers = 5,
 	.nbuffers = 5,
-	.modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}
+	.modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R},
+	.model = &stencil5_model
 };
 };
 
 
 #ifdef STARPU_QUICK_CHECK
 #ifdef STARPU_QUICK_CHECK

+ 6 - 4
mpi/src/starpu_mpi.c

@@ -94,11 +94,10 @@ static int posted_requests = 0, newer_requests, barrier_running = 0;
 #pragma weak smpi_simulated_main_
 #pragma weak smpi_simulated_main_
 extern int smpi_simulated_main_(int argc, char *argv[]);
 extern int smpi_simulated_main_(int argc, char *argv[]);
 
 
-#ifdef HAVE_SMPI_PROCESS_SET_USER_DATA
+#pragma weak smpi_process_set_user_data
 #if !HAVE_DECL_SMPI_PROCESS_SET_USER_DATA
 #if !HAVE_DECL_SMPI_PROCESS_SET_USER_DATA
 extern void smpi_process_set_user_data(void *);
 extern void smpi_process_set_user_data(void *);
 #endif
 #endif
-#endif
 
 
 static void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
 static void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
 {
 {
@@ -1334,12 +1333,15 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 		argv_cpy[i] = strdup((*(argc_argv->argv))[i]);
 		argv_cpy[i] = strdup((*(argc_argv->argv))[i]);
 	MSG_process_create_with_arguments("main", smpi_simulated_main_, NULL, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy);
 	MSG_process_create_with_arguments("main", smpi_simulated_main_, NULL, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy);
 	/* And set TSD for us */
 	/* And set TSD for us */
-#ifdef HAVE_SMPI_PROCESS_SET_USER_DATA
 	void **tsd;
 	void **tsd;
 	_STARPU_CALLOC(tsd, MAX_TSD + 1, sizeof(void*));
 	_STARPU_CALLOC(tsd, MAX_TSD + 1, sizeof(void*));
+	if (!smpi_process_set_user_data)
+	{
+		fprintf(stderr,"Your version of simgrid does not provide smpi_process_set_user_data, we can not continue without it\n");
+		exit(1);
+	}
 	smpi_process_set_user_data(tsd);
 	smpi_process_set_user_data(tsd);
 #endif
 #endif
-#endif
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
 	_starpu_fxt_wait_initialisation();
 	_starpu_fxt_wait_initialisation();

+ 6 - 0
mpi/src/starpu_mpi_task_insert.c

@@ -386,6 +386,12 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
                 {
                 {
                         (void)va_arg(varg_list_copy, void *);
                         (void)va_arg(varg_list_copy, void *);
 		}
 		}
+		else if (arg_type==STARPU_EXECUTE_WHERE)
+		{
+			// the flag is decoded and set later when
+			// calling function _starpu_task_insert_create()
+			(void)va_arg(varg_list_copy, uint32_t);
+		}
 		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
 		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
 		{
 		{
 			// the flag is decoded and set later when
 			// the flag is decoded and set later when

+ 5 - 0
mpi/src/starpu_mpi_task_insert_fortran.c

@@ -241,6 +241,11 @@ int _fstarpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_no
 			arg_i++;
 			arg_i++;
 			/* void* */
 			/* void* */
 		}
 		}
+		else if (arg_type==STARPU_EXECUTE_WHERE)
+		{
+			arg_i++;
+			/* int* */
+		}
 		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
 		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
 		{
 		{
 			arg_i++;
 			arg_i++;

+ 20 - 17
mpi/tests/Makefile.am

@@ -107,17 +107,29 @@ starpu_mpi_TESTS +=				\
 	cache					\
 	cache					\
 	cache_disable				\
 	cache_disable				\
 	callback				\
 	callback				\
+	early_request				\
 	insert_task				\
 	insert_task				\
 	insert_task_block			\
 	insert_task_block			\
+	insert_task_dyn_handles			\
+	insert_task_node_choice			\
 	insert_task_owner			\
 	insert_task_owner			\
 	insert_task_owner2			\
 	insert_task_owner2			\
 	insert_task_owner_data			\
 	insert_task_owner_data			\
-	insert_task_node_choice			\
-	matrix
+	matrix					\
+	matrix2					\
+	mpi_detached_tag			\
+	mpi_irecv_detached			\
+	mpi_isend_detached			\
+	mpi_reduction				\
+	mpi_scatter_gather			\
+	policy_register				\
+	policy_register_many			\
+	policy_selection			\
+	policy_selection2			\
+	ring_async_implicit
 
 
 if !STARPU_SIMGRID
 if !STARPU_SIMGRID
 starpu_mpi_TESTS +=				\
 starpu_mpi_TESTS +=				\
-	datatypes				\
 	pingpong				\
 	pingpong				\
 	mpi_test				\
 	mpi_test				\
 	mpi_isend				\
 	mpi_isend				\
@@ -125,15 +137,11 @@ starpu_mpi_TESTS +=				\
 	mpi_earlyrecv2				\
 	mpi_earlyrecv2				\
 	mpi_earlyrecv2_sync			\
 	mpi_earlyrecv2_sync			\
 	mpi_irecv				\
 	mpi_irecv				\
-	mpi_isend_detached			\
-	mpi_irecv_detached			\
-	mpi_detached_tag			\
 	mpi_redux				\
 	mpi_redux				\
 	ring					\
 	ring					\
 	ring_sync				\
 	ring_sync				\
 	ring_sync_detached			\
 	ring_sync_detached			\
 	ring_async				\
 	ring_async				\
-	ring_async_implicit			\
 	block_interface				\
 	block_interface				\
 	block_interface_pinned			\
 	block_interface_pinned			\
 	matrix2					\
 	matrix2					\
@@ -141,24 +149,19 @@ starpu_mpi_TESTS +=				\
 	insert_task_sent_cache			\
 	insert_task_sent_cache			\
 	insert_task_recv_cache			\
 	insert_task_recv_cache			\
 	insert_task_count			\
 	insert_task_count			\
-	insert_task_dyn_handles			\
 	multiple_send				\
 	multiple_send				\
-	mpi_scatter_gather			\
-	mpi_reduction				\
 	user_defined_datatype			\
 	user_defined_datatype			\
 	tags_checking				\
 	tags_checking				\
 	sync					\
 	sync					\
 	gather					\
 	gather					\
 	gather2					\
 	gather2					\
-	policy_register				\
-	policy_register_many			\
+	load_balancer
+
+# Expected to fail
+starpu_mpi_TESTS +=				\
 	policy_register_toomany			\
 	policy_register_toomany			\
 	policy_unregister			\
 	policy_unregister			\
-	policy_selection			\
-	policy_selection2			\
-	early_request				\
-	starpu_redefine				\
-	load_balancer
+	starpu_redefine
 endif
 endif
 
 
 noinst_PROGRAMS =				\
 noinst_PROGRAMS =				\

+ 9 - 6
mpi/tests/block_interface.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2014, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2014, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -27,16 +27,18 @@
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret, rank, size;
 	int ret, rank, size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size < 2)
 	if (size < 2)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -142,7 +144,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 11 - 7
mpi/tests/block_interface_pinned.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -27,16 +27,18 @@
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret, rank, size;
 	int ret, rank, size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size < 2)
 	if (size < 2)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -44,7 +46,8 @@ int main(int argc, char **argv)
 
 
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -146,7 +149,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 10 - 6
mpi/tests/datatypes.c

@@ -332,16 +332,18 @@ int main(int argc, char **argv)
 {
 {
 	int ret, rank, size;
 	int ret, rank, size;
 	int error=0;
 	int error=0;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size < 2)
 	if (size < 2)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -349,7 +351,8 @@ int main(int argc, char **argv)
 
 
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -362,7 +365,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return rank == 0 ? error : 0;
 	return rank == 0 ? error : 0;
 }
 }

+ 10 - 6
mpi/tests/early_request.c

@@ -191,23 +191,26 @@ int main(int argc, char * argv[])
 	/* Init */
 	/* Init */
 	int ret;
 	int ret;
 	int mpi_rank, mpi_size;
 	int mpi_rank, mpi_size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &mpi_size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &mpi_size);
+
 	if (starpu_cpu_worker_get_count() == 0)
 	if (starpu_cpu_worker_get_count() == 0)
 	{
 	{
 		if (mpi_rank == 0)
 		if (mpi_rank == 0)
 			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
 			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -246,7 +249,8 @@ int main(int argc, char * argv[])
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 	FPRINTF(stderr, "No assert until end\n");
 	FPRINTF(stderr, "No assert until end\n");
 	return 0;
 	return 0;
 }
 }

+ 10 - 6
mpi/tests/gather.c

@@ -22,22 +22,25 @@ int main(int argc, char **argv)
 	int ret, rank, size;
 	int ret, rank, size;
 	starpu_data_handle_t handle;
 	starpu_data_handle_t handle;
 	int var;
 	int var;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size<3)
 	if (size<3)
 	{
 	{
 		FPRINTF(stderr, "We need more than 2 processes.\n");
 		FPRINTF(stderr, "We need more than 2 processes.\n");
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -69,7 +72,8 @@ int main(int argc, char **argv)
 
 
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 10 - 6
mpi/tests/gather2.c

@@ -20,22 +20,25 @@
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret, rank, size;
 	int ret, rank, size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size<3)
 	if (size<3)
 	{
 	{
 		FPRINTF(stderr, "We need more than 2 processes.\n");
 		FPRINTF(stderr, "We need more than 2 processes.\n");
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -91,7 +94,8 @@ int main(int argc, char **argv)
 
 
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 13 - 4
mpi/tests/helper.h

@@ -16,16 +16,17 @@
 
 
 #include <errno.h>
 #include <errno.h>
 #include <starpu_mpi.h>
 #include <starpu_mpi.h>
+#include <starpu_config.h>
 #include "../../tests/helper.h"
 #include "../../tests/helper.h"
 
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 #define FPRINTF_MPI(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \
 #define FPRINTF_MPI(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \
 			int _disp_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &_disp_rank); \
 			int _disp_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &_disp_rank); \
 			fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \
 			fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \
-			fflush(ofile); }} while(0);
+			fflush(ofile); }} while(0)
 
 
-#define MPI_INIT_THREAD(argc, argv, required) do {	    \
-		int thread_support;					\
+#define MPI_INIT_THREAD_real(argc, argv, required) do {	\
+		int thread_support;				\
 		if (MPI_Init_thread(argc, argv, required, &thread_support) != MPI_SUCCESS) \
 		if (MPI_Init_thread(argc, argv, required, &thread_support) != MPI_SUCCESS) \
 		{						\
 		{						\
 			fprintf(stderr,"MPI_Init_thread failed\n");	\
 			fprintf(stderr,"MPI_Init_thread failed\n");	\
@@ -34,5 +35,13 @@
 		if (thread_support == MPI_THREAD_FUNNELED)		\
 		if (thread_support == MPI_THREAD_FUNNELED)		\
 			fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); \
 			fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); \
 		if (thread_support < MPI_THREAD_FUNNELED)		\
 		if (thread_support < MPI_THREAD_FUNNELED)		\
-			fprintf(stderr,"Warning: MPI does not have thread support!\n"); } while(0);
+			fprintf(stderr,"Warning: MPI does not have thread support!\n"); } while(0)
+
+#ifdef STARPU_SIMGRID
+#define MPI_INIT_THREAD(argc, argv, required, init) do { *(init) = 1 ; } while(0)
+#else
+#define MPI_INIT_THREAD(argc, argv, required, init) do {	\
+		*(init) = 0;                                    \
+		MPI_INIT_THREAD_real(argc, argv, required); } while(0)
+#endif
 
 

+ 1 - 1
mpi/tests/insert_task_compute.c

@@ -226,7 +226,7 @@ int main(int argc, char **argv)
 	int after_node[2][4] = {{220, 20, 11, 22}, {220, 20, 11, 22}};
 	int after_node[2][4] = {{220, 20, 11, 22}, {220, 20, 11, 22}};
 	int node, insert_task, data_array;
 	int node, insert_task, data_array;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED);
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 
 
 	global_ret = 0;
 	global_ret = 0;

+ 11 - 7
mpi/tests/insert_task_count.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -63,16 +63,18 @@ int main(int argc, char **argv)
 	int ret, rank, size;
 	int ret, rank, size;
 	int token = 0;
 	int token = 0;
 	starpu_data_handle_t token_handle;
 	starpu_data_handle_t token_handle;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -84,7 +86,8 @@ int main(int argc, char **argv)
 		}
 		}
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -120,7 +123,8 @@ int main(int argc, char **argv)
 
 
 	FPRINTF_MPI(stderr, "Final value for token %d\n", token);
 	FPRINTF_MPI(stderr, "Final value for token %d\n", token);
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
 	if (rank == 1)
 	if (rank == 1)

+ 9 - 4
mpi/tests/insert_task_dyn_handles.c

@@ -73,15 +73,17 @@ int main(int argc, char **argv)
         starpu_data_handle_t *data_handles;
         starpu_data_handle_t *data_handles;
         starpu_data_handle_t factor_handle;
         starpu_data_handle_t factor_handle;
 	struct starpu_data_descr *descrs;
 	struct starpu_data_descr *descrs;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+
 	if (starpu_cpu_worker_get_count() == 0)
 	if (starpu_cpu_worker_get_count() == 0)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -142,6 +144,7 @@ enodev:
 	}
 	}
 	else if (rank == 0)
 	else if (rank == 0)
 	{
 	{
+#ifndef STARPU_SIMGRID
 		for(i=0 ; i<STARPU_NMAXBUFS-1 ; i++)
 		for(i=0 ; i<STARPU_NMAXBUFS-1 ; i++)
 		{
 		{
 			if (x[i] != nloops * FFACTOR * 2)
 			if (x[i] != nloops * FFACTOR * 2)
@@ -162,6 +165,7 @@ enodev:
 		{
 		{
 			FPRINTF_MPI(stderr, "[end of loop] all values are correct\n");
 			FPRINTF_MPI(stderr, "[end of loop] all values are correct\n");
 		}
 		}
+#endif
 		free(x);
 		free(x);
 	}
 	}
 	else
 	else
@@ -173,6 +177,7 @@ enodev:
 
 
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 	return ret;
 	return ret;
 }
 }

+ 1 - 1
mpi/tests/insert_task_recv_cache.c

@@ -137,7 +137,7 @@ int main(int argc, char **argv)
 	size_t *comm_amount_with_cache;
 	size_t *comm_amount_with_cache;
 	size_t *comm_amount_without_cache;
 	size_t *comm_amount_without_cache;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED);
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
 

+ 1 - 1
mpi/tests/insert_task_sent_cache.c

@@ -143,7 +143,7 @@ int main(int argc, char **argv)
 	size_t *comm_amount_with_cache;
 	size_t *comm_amount_with_cache;
 	size_t *comm_amount_without_cache;
 	size_t *comm_amount_without_cache;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED);
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
 

+ 5 - 3
mpi/tests/load_balancer.c

@@ -46,14 +46,15 @@ int main(int argc, char **argv)
 {
 {
 	int ret;
 	int ret;
 	struct starpu_mpi_lb_conf itf;
 	struct starpu_mpi_lb_conf itf;
+	int mpi_init;
 
 
 	itf.get_neighbors = get_neighbors;
 	itf.get_neighbors = get_neighbors;
 	itf.get_data_unit_to_migrate = get_data_unit_to_migrate;
 	itf.get_data_unit_to_migrate = get_data_unit_to_migrate;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
 	unsetenv("STARPU_MPI_LB");
 	unsetenv("STARPU_MPI_LB");
@@ -65,7 +66,8 @@ int main(int argc, char **argv)
 
 
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 10 - 6
mpi/tests/matrix2.c

@@ -58,16 +58,18 @@ int main(int argc, char **argv)
 	unsigned X[N];
 	unsigned X[N];
 	starpu_data_handle_t data_A[N];
 	starpu_data_handle_t data_A[N];
 	starpu_data_handle_t data_X[N];
 	starpu_data_handle_t data_X[N];
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if ((size < 3) || (starpu_cpu_worker_get_count() == 0))
 	if ((size < 3) || (starpu_cpu_worker_get_count() == 0))
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -79,7 +81,8 @@ int main(int argc, char **argv)
 		}
 		}
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -154,6 +157,7 @@ int main(int argc, char **argv)
 	}
 	}
 #endif
 #endif
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 	return 0;
 	return 0;
 }
 }

+ 11 - 7
mpi/tests/mpi_detached_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2014-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -33,16 +33,18 @@ starpu_data_handle_t tab_handle;
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret, rank, size;
 	int ret, rank, size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size%2 != 0)
 	if (size%2 != 0)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -50,7 +52,8 @@ int main(int argc, char **argv)
 
 
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -84,7 +87,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 11 - 7
mpi/tests/mpi_earlyrecv.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -25,22 +25,25 @@ int main(int argc, char **argv)
 	starpu_data_handle_t tab_handle[4];
 	starpu_data_handle_t tab_handle[4];
 	int values[4];
 	int values[4];
 	starpu_mpi_req request[2] = {NULL, NULL};
 	starpu_mpi_req request[2] = {NULL, NULL};
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size%2 != 0)
 	if (size%2 != 0)
 	{
 	{
 		FPRINTF_MPI(stderr, "We need a even number of processes.\n");
 		FPRINTF_MPI(stderr, "We need a even number of processes.\n");
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -123,7 +126,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return ret;
 	return ret;
 }
 }

+ 11 - 7
mpi/tests/mpi_earlyrecv2.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -207,22 +207,25 @@ int main(int argc, char **argv)
 {
 {
 	int ret=0, global_ret=0;
 	int ret=0, global_ret=0;
 	int rank, size;
 	int rank, size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size%2 != 0)
 	if (size%2 != 0)
 	{
 	{
 		FPRINTF(stderr, "We need a even number of processes.\n");
 		FPRINTF(stderr, "We need a even number of processes.\n");
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -247,7 +250,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return global_ret;
 	return global_ret;
 }
 }

+ 1 - 1
mpi/tests/mpi_earlyrecv2_sync.c

@@ -211,7 +211,7 @@ int main(int argc, char **argv)
 	int ret=0, global_ret=0;
 	int ret=0, global_ret=0;
 	int rank, size;
 	int rank, size;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED);
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
 

+ 11 - 7
mpi/tests/mpi_irecv.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -31,16 +31,18 @@ starpu_data_handle_t tab_handle;
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret, rank, size;
 	int ret, rank, size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size%2 != 0)
 	if (size%2 != 0)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -48,7 +50,8 @@ int main(int argc, char **argv)
 
 
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -81,7 +84,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 11 - 7
mpi/tests/mpi_irecv_detached.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010, 2012, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2012, 2014-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -48,16 +48,18 @@ void callback(void *arg STARPU_ATTRIBUTE_UNUSED)
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret, rank, size;
 	int ret, rank, size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size%2 != 0)
 	if (size%2 != 0)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -65,7 +67,8 @@ int main(int argc, char **argv)
 
 
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -101,7 +104,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 11 - 7
mpi/tests/mpi_isend.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -31,16 +31,18 @@ starpu_data_handle_t tab_handle;
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret, rank, size;
 	int ret, rank, size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size%2 != 0)
 	if (size%2 != 0)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -48,7 +50,8 @@ int main(int argc, char **argv)
 
 
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -82,7 +85,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 11 - 7
mpi/tests/mpi_isend_detached.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010, 2012, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2012, 2014-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -47,16 +47,18 @@ int main(int argc, char **argv)
 	int ret, rank, size;
 	int ret, rank, size;
 	float *tab;
 	float *tab;
 	starpu_data_handle_t tab_handle;
 	starpu_data_handle_t tab_handle;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size%2 != 0)
 	if (size%2 != 0)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -64,7 +66,8 @@ int main(int argc, char **argv)
 
 
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -106,7 +109,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 10 - 7
mpi/tests/mpi_redux.c

@@ -36,18 +36,20 @@ int main(int argc, char **argv)
 	int ret, rank, size, sum;
 	int ret, rank, size, sum;
 	int value=0;
 	int value=0;
 	starpu_data_handle_t *handles;
 	starpu_data_handle_t *handles;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
-
-	sum = ((size-1) * (size) / 2);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	sum = ((size-1) * (size) / 2);
+
 	if (rank == 0)
 	if (rank == 0)
 	{
 	{
 		int src;
 		int src;
@@ -99,7 +101,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	STARPU_ASSERT_MSG(sum == value, "Sum of first %d integers is %d, not %d\n", size-1, sum, value);
 	STARPU_ASSERT_MSG(sum == value, "Sum of first %d integers is %d, not %d\n", size-1, sum, value);
 
 

+ 11 - 7
mpi/tests/mpi_test.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2010, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -31,16 +31,18 @@ int main(int argc, char **argv)
 	int ret, rank, size;
 	int ret, rank, size;
 	float *tab;
 	float *tab;
 	starpu_data_handle_t tab_handle;
 	starpu_data_handle_t tab_handle;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size%2 != 0)
 	if (size%2 != 0)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -48,7 +50,8 @@ int main(int argc, char **argv)
 
 
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -88,7 +91,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 11 - 7
mpi/tests/pingpong.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -32,16 +32,18 @@ starpu_data_handle_t tab_handle;
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret, rank, size;
 	int ret, rank, size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size%2 != 0)
 	if (size%2 != 0)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -49,7 +51,8 @@ int main(int argc, char **argv)
 
 
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -81,7 +84,8 @@ int main(int argc, char **argv)
 
 
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 10 - 6
mpi/tests/policy_register.c

@@ -69,16 +69,18 @@ int main(int argc, char **argv)
 	int policy;
 	int policy;
 	struct starpu_task *task;
 	struct starpu_task *task;
 	starpu_data_handle_t handles[2];
 	starpu_data_handle_t handles[2];
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size < 2)
 	if (size < 2)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -86,7 +88,8 @@ int main(int argc, char **argv)
 
 
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -140,7 +143,8 @@ int main(int argc, char **argv)
 	starpu_data_unregister(handles[1]);
 	starpu_data_unregister(handles[1]);
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 10 - 5
mpi/tests/policy_selection.c

@@ -56,16 +56,19 @@ int main(int argc, char **argv)
 	int policy = 12;
 	int policy = 12;
 	struct starpu_task *task;
 	struct starpu_task *task;
 	starpu_data_handle_t handles[3];
 	starpu_data_handle_t handles[3];
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
+	(void)mpi_init;
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(NULL, NULL, 0);
 	ret = starpu_mpi_init(NULL, NULL, 0);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size < 3)
 	if (size < 3)
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -73,7 +76,8 @@ int main(int argc, char **argv)
 
 
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -181,7 +185,8 @@ int main(int argc, char **argv)
 
 
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 14 - 5
mpi/tests/policy_selection2.c

@@ -54,16 +54,19 @@ int main(int argc, char **argv)
 	int rank, size;
 	int rank, size;
 	int data[3];
 	int data[3];
 	starpu_data_handle_t handles[3];
 	starpu_data_handle_t handles[3];
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
+	(void)mpi_init;
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(NULL, NULL, 0);
 	ret = starpu_mpi_init(NULL, NULL, 0);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if ((size < 3) || (starpu_cpu_worker_get_count() == 0))
 	if ((size < 3) || (starpu_cpu_worker_get_count() == 0))
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -75,7 +78,8 @@ int main(int argc, char **argv)
 		}
 		}
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -97,11 +101,13 @@ int main(int argc, char **argv)
 	for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_R);
 	for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_R);
 	FPRINTF_MPI(stderr, "data[%d,%d,%d] = %d,%d,%d\n", 0, 1, 2, data[0], data[1], data[2]);
 	FPRINTF_MPI(stderr, "data[%d,%d,%d] = %d,%d,%d\n", 0, 1, 2, data[0], data[1], data[2]);
 	for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]);
 	for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]);
+#ifndef STARPU_SIMGRID
 	if (rank == 2)
 	if (rank == 2)
 	{
 	{
 		STARPU_ASSERT_MSG(data[0] == 2*data[2] && data[1] == 2*data[2], "Computation incorrect. data[%d] (%d) != 2*data[%d] (%d) && data[%d] (%d) != 2*data[%d] (%d)\n",
 		STARPU_ASSERT_MSG(data[0] == 2*data[2] && data[1] == 2*data[2], "Computation incorrect. data[%d] (%d) != 2*data[%d] (%d) && data[%d] (%d) != 2*data[%d] (%d)\n",
 				  0, data[0], 2, data[2], 1, data[1], 2, data[2]);
 				  0, data[0], 2, data[2], 1, data[1], 2, data[2]);
 	}
 	}
+#endif
 
 
 	for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_W);
 	for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_W);
 	for(i=0 ; i<2 ; i++) data[i] = 12;
 	for(i=0 ; i<2 ; i++) data[i] = 12;
@@ -115,17 +121,20 @@ int main(int argc, char **argv)
 	for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_R);
 	for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_R);
 	FPRINTF_MPI(stderr, "data[%d,%d,%d] = %d,%d,%d\n", 0, 1, 2, data[0], data[1], data[2]);
 	FPRINTF_MPI(stderr, "data[%d,%d,%d] = %d,%d,%d\n", 0, 1, 2, data[0], data[1], data[2]);
 	for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]);
 	for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]);
+#ifndef STARPU_SIMGRID
 	if (rank == 1)
 	if (rank == 1)
 	{
 	{
 		STARPU_ASSERT_MSG(data[0] == 2*data[2] && data[1] == 2*data[2], "Computation incorrect. data[%d] (%d) != 2*data[%d] (%d) && data[%d] (%d) != 2*data[%d] (%d)\n",
 		STARPU_ASSERT_MSG(data[0] == 2*data[2] && data[1] == 2*data[2], "Computation incorrect. data[%d] (%d) != 2*data[%d] (%d) && data[%d] (%d) != 2*data[%d] (%d)\n",
 				  0, data[0], 2, data[2], 1, data[1], 2, data[2]);
 				  0, data[0], 2, data[2], 1, data[1], 2, data[2]);
 	}
 	}
+#endif
 
 
 	for(i=0 ; i<3 ; i++) starpu_data_unregister(handles[i]);
 	for(i=0 ; i<3 ; i++) starpu_data_unregister(handles[i]);
 
 
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 11 - 7
mpi/tests/ring.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -76,16 +76,18 @@ void increment_token(void)
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret, rank, size;
 	int ret, rank, size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -97,7 +99,8 @@ int main(int argc, char **argv)
 		}
 		}
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -142,7 +145,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
 	if (rank == last_rank)
 	if (rank == last_rank)

+ 11 - 7
mpi/tests/ring_async.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -76,16 +76,18 @@ void increment_token(void)
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret, rank, size;
 	int ret, rank, size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -97,7 +99,8 @@ int main(int argc, char **argv)
 		}
 		}
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -147,7 +150,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
 	if (rank == last_rank)
 	if (rank == last_rank)

+ 11 - 7
mpi/tests/ring_sync.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -76,16 +76,18 @@ void increment_token(void)
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret, rank, size;
 	int ret, rank, size;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -97,7 +99,8 @@ int main(int argc, char **argv)
 		}
 		}
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -145,7 +148,8 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
 
 
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
 	if (rank == last_rank)
 	if (rank == last_rank)

+ 11 - 7
mpi/tests/ring_sync_detached.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -88,16 +88,18 @@ int main(int argc, char **argv)
 	int ret, rank, size;
 	int ret, rank, size;
 	int token = 42;
 	int token = 42;
 	starpu_data_handle_t token_handle;
 	starpu_data_handle_t token_handle;
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
 	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	{
 	{
 		if (rank == 0)
 		if (rank == 0)
@@ -109,7 +111,8 @@ int main(int argc, char **argv)
 		}
 		}
 		starpu_mpi_shutdown();
 		starpu_mpi_shutdown();
 		starpu_shutdown();
 		starpu_shutdown();
-		MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
@@ -161,7 +164,8 @@ int main(int argc, char **argv)
 	starpu_shutdown();
 	starpu_shutdown();
 
 
 	FPRINTF_MPI(stderr, "Final value for token %d\n", token);
 	FPRINTF_MPI(stderr, "Final value for token %d\n", token);
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
 	if (rank == last_rank)
 	if (rank == last_rank)

+ 5 - 3
mpi/tests/starpu_redefine.c

@@ -21,14 +21,15 @@ int main(int argc, char **argv)
 {
 {
 	int ret;
 	int ret;
 	starpu_data_handle_t handle;
 	starpu_data_handle_t handle;
+	int mpi_init;
 
 
 	disable_coredump();
 	disable_coredump();
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
+	ret = starpu_mpi_init(NULL, NULL, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
 	starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&ret, 1, sizeof(int));
 	starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&ret, 1, sizeof(int));
@@ -37,7 +38,8 @@ int main(int argc, char **argv)
 
 
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
-	MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 
 
 	return 0;
 	return 0;
 }
 }

+ 8 - 5
mpi/tests/sync.c

@@ -23,15 +23,17 @@ int main(int argc, char **argv)
 	int rank, other_rank;
 	int rank, other_rank;
 	int ret;
 	int ret;
 	starpu_data_handle_t data[2];
 	starpu_data_handle_t data[2];
+	int mpi_init;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
-        starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
-        starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+        MPI_Comm_size(MPI_COMM_WORLD, &size);
 
 
         if (size % 2)
         if (size % 2)
         {
         {
 		FPRINTF(stderr, "We need a even number of processes.\n");
 		FPRINTF(stderr, "We need a even number of processes.\n");
-                MPI_Finalize();
+		if (!mpi_init)
+			MPI_Finalize();
                 return STARPU_TEST_SKIPPED;
                 return STARPU_TEST_SKIPPED;
         }
         }
 
 
@@ -92,6 +94,7 @@ int main(int argc, char **argv)
 
 
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 	starpu_shutdown();
-        MPI_Finalize();
+	if (!mpi_init)
+		MPI_Finalize();
 	return 0;
 	return 0;
 }
 }

+ 1 - 1
mpi/tests/tags_checking.c

@@ -124,7 +124,7 @@ int main(int argc, char **argv)
 	int ret=0;
 	int ret=0;
 	int sdetached, rdetached;
 	int sdetached, rdetached;
 
 
-	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED);
         starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
         starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
         starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
         starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
 

+ 0 - 1
sc_hypervisor/src/sc_hypervisor.c

@@ -403,7 +403,6 @@ void sc_hypervisor_unregister_ctx(unsigned sched_ctx)
 	if(npus)
 	if(npus)
 	{
 	{
 		starpu_sched_ctx_set_priority(pus, npus, father, 1);
 		starpu_sched_ctx_set_priority(pus, npus, father, 1);
-		starpu_sched_ctx_set_priority_on_level(pus, npus, father, 1);
 		free(pus);
 		free(pus);
 	}
 	}
 
 

+ 10 - 0
socl/examples/matmul/matmul.c

@@ -1,6 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010,2011, 2015, 2017 University of Bordeaux
  * Copyright (C) 2010,2011, 2015, 2017 University of Bordeaux
+ * Copyright (C) 2017 Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -13,6 +14,14 @@
  *
  *
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
+#ifndef STARPU_NON_BLOCKING_DRIVERS
+int main(int argc, const char** argv) {
+	(void) argv;
+	(void) argv;
+	/* testcase does not seem to support blocking drivers */
+	return 77;
+}
+#else
 
 
 #ifdef __APPLE_CC__
 #ifdef __APPLE_CC__
 #include <OpenCL/opencl.h>
 #include <OpenCL/opencl.h>
@@ -512,3 +521,4 @@ void computeReference(TYPE* C, const TYPE* A, const TYPE* B, unsigned int hA, un
 			C[i * wB + j] = (TYPE)sum;
 			C[i * wB + j] = (TYPE)sum;
 		}
 		}
 }
 }
+#endif /* STARPU_NON_BLOCKING_DRIVERS */

+ 1 - 1
socl/src/cl_createkernel.c

@@ -29,7 +29,7 @@ static void soclCreateKernel_task(void *data) {
       return;
       return;
    }
    }
 
 
-   DEBUG_MSG("[Device %d] Creating kernel...\n", starpu_worker_get_id_check());
+   DEBUG_MSG("[Device %u] Creating kernel...\n", starpu_worker_get_id_check());
    k->cl_kernels[range] = clCreateKernel(k->program->cl_programs[range], k->kernel_name, &err);
    k->cl_kernels[range] = clCreateKernel(k->program->cl_programs[range], k->kernel_name, &err);
    if (err != CL_SUCCESS) {
    if (err != CL_SUCCESS) {
       k->errcodes[range] = err;
       k->errcodes[range] = err;

+ 4 - 4
socl/src/gc.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2012 University of Bordeaux
  * Copyright (C) 2010-2012 University of Bordeaux
- * Copyright (C) 2012, 2014 CNRS
+ * Copyright (C) 2012, 2014, 2017 CNRS
  * Copyright (C) 2012 Vincent Danjean <Vincent.Danjean@ens-lyon.org>
  * Copyright (C) 2012 Vincent Danjean <Vincent.Danjean@ens-lyon.org>
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -108,7 +108,7 @@ void gc_stop(void) {
 
 
 int gc_entity_release_ex(entity e, const char * DEBUG_PARAM(caller)) {
 int gc_entity_release_ex(entity e, const char * DEBUG_PARAM(caller)) {
 
 
-  DEBUG_MSG("[%s] Decrementing refcount of %s %p to ", caller, e->name, e);
+  DEBUG_MSG("[%s] Decrementing refcount of %s %p to ", caller, e->name, (void *)e);
 
 
   /* Decrement reference count */
   /* Decrement reference count */
   int refs = __sync_sub_and_fetch(&e->refs, 1);
   int refs = __sync_sub_and_fetch(&e->refs, 1);
@@ -120,7 +120,7 @@ int gc_entity_release_ex(entity e, const char * DEBUG_PARAM(caller)) {
   if (refs != 0)
   if (refs != 0)
     return 0;
     return 0;
 
 
-  DEBUG_MSG("[%s] Releasing %s %p\n", caller, e->name, e);
+  DEBUG_MSG("[%s] Releasing %s %p\n", caller, e->name, (void *)e);
 
 
   GC_LOCK;
   GC_LOCK;
 
 
@@ -209,7 +209,7 @@ void gc_print_remaining_entities(void) {
 
 
    entity e = entities;
    entity e = entities;
    while (e != NULL) {
    while (e != NULL) {
-      DEBUG_MSG("  - %s %p\n", e->name, e);
+      DEBUG_MSG("  - %s %p\n", e->name, (void *)e);
       e = e->next;
       e = e->next;
    }
    }
 
 

+ 1 - 1
socl/src/task.c

@@ -105,7 +105,7 @@ cl_int task_submit_ex(starpu_task task, cl_command cmd) {
   gc_entity_release(ev);
   gc_entity_release(ev);
 
 
   /* Submit task */
   /* Submit task */
-  int ret = (task->cl != NULL && task->cl->where == STARPU_OPENCL ?
+  int ret = (task->cl != NULL && task->where == STARPU_OPENCL ?
         starpu_task_submit_to_ctx(task, cmd->event->cq->context->sched_ctx) :
         starpu_task_submit_to_ctx(task, cmd->event->cq->context->sched_ctx) :
         starpu_task_submit(task));
         starpu_task_submit(task));
 
 

+ 2 - 0
src/Makefile.am

@@ -168,6 +168,7 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 		\
 	common/thread.c						\
 	common/thread.c						\
 	common/rbtree.c						\
 	common/rbtree.c						\
 	common/graph.c						\
 	common/graph.c						\
+	common/inlines.c					\
 	core/jobs.c						\
 	core/jobs.c						\
 	core/task.c						\
 	core/task.c						\
 	core/task_bundle.c					\
 	core/task_bundle.c					\
@@ -317,6 +318,7 @@ endif
 endif
 endif
 
 
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cublas.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cublas.c
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cusparse.c
 
 
 if STARPU_USE_OPENCL
 if STARPU_USE_OPENCL
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl.c
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl.c

+ 12 - 0
src/common/graph.c

@@ -1,6 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2016-2017  Université de Bordeaux
  * Copyright (C) 2016-2017  Université de Bordeaux
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,6 +29,7 @@
 #include <starpu.h>
 #include <starpu.h>
 #include <core/jobs.h>
 #include <core/jobs.h>
 #include <common/graph.h>
 #include <common/graph.h>
+#include <core/workers.h>
 
 
 /* Protects the whole task graph except the dropped list */
 /* Protects the whole task graph except the dropped list */
 static starpu_pthread_rwlock_t graph_lock;
 static starpu_pthread_rwlock_t graph_lock;
@@ -60,7 +62,9 @@ void _starpu_graph_init(void)
 /* LockWR the graph lock */
 /* LockWR the graph lock */
 void _starpu_graph_wrlock(void)
 void _starpu_graph_wrlock(void)
 {
 {
+	_starpu_worker_relax_on();
 	STARPU_PTHREAD_RWLOCK_WRLOCK(&graph_lock);
 	STARPU_PTHREAD_RWLOCK_WRLOCK(&graph_lock);
+	_starpu_worker_relax_off();
 }
 }
 
 
 void _starpu_graph_drop_node(struct _starpu_graph_node *node);
 void _starpu_graph_drop_node(struct _starpu_graph_node *node);
@@ -94,14 +98,18 @@ void _starpu_graph_drop_dropped_nodes(void)
 /* UnlockWR the graph lock */
 /* UnlockWR the graph lock */
 void _starpu_graph_wrunlock(void)
 void _starpu_graph_wrunlock(void)
 {
 {
+	_starpu_worker_relax_on();
 	STARPU_PTHREAD_MUTEX_LOCK(&dropped_lock);
 	STARPU_PTHREAD_MUTEX_LOCK(&dropped_lock);
+	_starpu_worker_relax_off();
 	_starpu_graph_drop_dropped_nodes();
 	_starpu_graph_drop_dropped_nodes();
 }
 }
 
 
 /* LockRD the graph lock */
 /* LockRD the graph lock */
 void _starpu_graph_rdlock(void)
 void _starpu_graph_rdlock(void)
 {
 {
+	_starpu_worker_relax_on();
 	STARPU_PTHREAD_RWLOCK_RDLOCK(&graph_lock);
 	STARPU_PTHREAD_RWLOCK_RDLOCK(&graph_lock);
+	_starpu_worker_relax_off();
 }
 }
 
 
 /* UnlockRD the graph lock */
 /* UnlockRD the graph lock */
@@ -247,12 +255,16 @@ void _starpu_graph_drop_job(struct _starpu_job *job)
 	if (!node)
 	if (!node)
 		return;
 		return;
 
 
+	_starpu_worker_relax_on();
 	STARPU_PTHREAD_MUTEX_LOCK(&node->mutex);
 	STARPU_PTHREAD_MUTEX_LOCK(&node->mutex);
+	_starpu_worker_relax_off();
 	/* Will not be able to use the job any more */
 	/* Will not be able to use the job any more */
 	node->job = NULL;
 	node->job = NULL;
 	STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex);
 
 
+	_starpu_worker_relax_on();
 	STARPU_PTHREAD_MUTEX_LOCK(&dropped_lock);
 	STARPU_PTHREAD_MUTEX_LOCK(&dropped_lock);
+	_starpu_worker_relax_off();
 	/* Queue for removal when lock becomes available */
 	/* Queue for removal when lock becomes available */
 	_starpu_graph_node_multilist_push_back_dropped(&dropped, node);
 	_starpu_graph_node_multilist_push_back_dropped(&dropped, node);
 	if (STARPU_PTHREAD_RWLOCK_TRYWRLOCK(&graph_lock) == 0)
 	if (STARPU_PTHREAD_RWLOCK_TRYWRLOCK(&graph_lock) == 0)

+ 22 - 0
src/common/inlines.c

@@ -0,0 +1,22 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2017  Université de Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/* This includes the inline definitions in a .c file so that they can also be
+ * referenced from outside */
+
+#define LIST_INLINE
+#define PRIO_LIST_INLINE
+#include <core/task.h>

+ 42 - 38
src/common/list.h

@@ -126,6 +126,9 @@
  */
  */
 
 
 
 
+#ifndef LIST_INLINE
+#define LIST_INLINE static inline
+#endif
 
 
 /**@hideinitializer
 /**@hideinitializer
  * Generates a new type for list of elements */
  * Generates a new type for list of elements */
@@ -146,75 +149,76 @@
  * The effective type declaration for lists */
  * The effective type declaration for lists */
 #define LIST_CREATE_TYPE_NOSTRUCT(ENAME, _prev, _next) \
 #define LIST_CREATE_TYPE_NOSTRUCT(ENAME, _prev, _next) \
   /** @internal */ \
   /** @internal */ \
+ /* NOTE: this must not be greater than the struct defined in include/starpu_task_list.h */ \
   struct ENAME##_list \
   struct ENAME##_list \
   { \
   { \
     struct ENAME *_head; /**< @internal head of the list */ \
     struct ENAME *_head; /**< @internal head of the list */ \
     struct ENAME *_tail; /**< @internal tail of the list */ \
     struct ENAME *_tail; /**< @internal tail of the list */ \
   }; \
   }; \
-  /** @internal */static inline struct ENAME *ENAME##_new(void) \
+  /** @internal */LIST_INLINE struct ENAME *ENAME##_new(void) \
     { struct ENAME *e; _STARPU_MALLOC(e, sizeof(struct ENAME)); \
     { struct ENAME *e; _STARPU_MALLOC(e, sizeof(struct ENAME)); \
       e->_next = NULL; e->_prev = NULL; return e; } \
       e->_next = NULL; e->_prev = NULL; return e; } \
-  /** @internal */static inline void ENAME##_delete(struct ENAME *e) \
+  /** @internal */LIST_INLINE void ENAME##_delete(struct ENAME *e) \
     { free(e); } \
     { free(e); } \
-  /** @internal */static inline void ENAME##_list_push_front(struct ENAME##_list *l, struct ENAME *e) \
+  /** @internal */LIST_INLINE void ENAME##_list_push_front(struct ENAME##_list *l, struct ENAME *e) \
     { if(l->_tail == NULL) l->_tail = e; else l->_head->_prev = e; \
     { if(l->_tail == NULL) l->_tail = e; else l->_head->_prev = e; \
       e->_prev = NULL; e->_next = l->_head; l->_head = e; } \
       e->_prev = NULL; e->_next = l->_head; l->_head = e; } \
-  /** @internal */static inline void ENAME##_list_push_back(struct ENAME##_list *l, struct ENAME *e) \
+  /** @internal */LIST_INLINE void ENAME##_list_push_back(struct ENAME##_list *l, struct ENAME *e) \
     { if(l->_head == NULL) l->_head = e; else l->_tail->_next = e; \
     { if(l->_head == NULL) l->_head = e; else l->_tail->_next = e; \
       e->_next = NULL; e->_prev = l->_tail; l->_tail = e; } \
       e->_next = NULL; e->_prev = l->_tail; l->_tail = e; } \
-  /** @internal */static inline void ENAME##_list_insert_before(struct ENAME##_list *l, struct ENAME *e, struct ENAME *o) \
+  /** @internal */LIST_INLINE void ENAME##_list_insert_before(struct ENAME##_list *l, struct ENAME *e, struct ENAME *o) \
     { struct ENAME *p = o->_prev; if (p) { p->_next = e; e->_prev = p; } else { l->_head = e; e->_prev = NULL; } \
     { struct ENAME *p = o->_prev; if (p) { p->_next = e; e->_prev = p; } else { l->_head = e; e->_prev = NULL; } \
       e->_next = o; o->_prev = e; } \
       e->_next = o; o->_prev = e; } \
-  /** @internal */static inline void ENAME##_list_insert_after(struct ENAME##_list *l, struct ENAME *e, struct ENAME *o) \
+  /** @internal */LIST_INLINE void ENAME##_list_insert_after(struct ENAME##_list *l, struct ENAME *e, struct ENAME *o) \
     { struct ENAME *n = o->_next; if (n) { n->_prev = e; e->_next = n; } else { l->_tail = e; e->_next = NULL; } \
     { struct ENAME *n = o->_next; if (n) { n->_prev = e; e->_next = n; } else { l->_tail = e; e->_next = NULL; } \
       e->_prev = o; o->_next = e; } \
       e->_prev = o; o->_next = e; } \
-  /** @internal */static inline void ENAME##_list_push_list_front(struct ENAME##_list *l1, struct ENAME##_list *l2) \
+  /** @internal */LIST_INLINE void ENAME##_list_push_list_front(struct ENAME##_list *l1, struct ENAME##_list *l2) \
     { if (l2->_head == NULL) { l2->_head = l1->_head; l2->_tail = l1->_tail; } \
     { if (l2->_head == NULL) { l2->_head = l1->_head; l2->_tail = l1->_tail; } \
       else if (l1->_head != NULL) { l1->_tail->_next = l2->_head; l2->_head->_prev = l1->_tail; l2->_head = l1->_head; } } \
       else if (l1->_head != NULL) { l1->_tail->_next = l2->_head; l2->_head->_prev = l1->_tail; l2->_head = l1->_head; } } \
-  /** @internal */static inline void ENAME##_list_push_list_back(struct ENAME##_list *l1, struct ENAME##_list *l2) \
+  /** @internal */LIST_INLINE void ENAME##_list_push_list_back(struct ENAME##_list *l1, struct ENAME##_list *l2) \
     { if(l1->_head == NULL) { l1->_head = l2->_head; l1->_tail = l2->_tail; } \
     { if(l1->_head == NULL) { l1->_head = l2->_head; l1->_tail = l2->_tail; } \
       else if (l2->_head != NULL) { l1->_tail->_next = l2->_head; l2->_head->_prev = l1->_tail; l1->_tail = l2->_tail; } } \
       else if (l2->_head != NULL) { l1->_tail->_next = l2->_head; l2->_head->_prev = l1->_tail; l1->_tail = l2->_tail; } } \
-  /** @internal */static inline struct ENAME *ENAME##_list_front(const struct ENAME##_list *l) \
+  /** @internal */LIST_INLINE struct ENAME *ENAME##_list_front(const struct ENAME##_list *l) \
     { return l->_head; } \
     { return l->_head; } \
-  /** @internal */static inline struct ENAME *ENAME##_list_back(const struct ENAME##_list *l) \
+  /** @internal */LIST_INLINE struct ENAME *ENAME##_list_back(const struct ENAME##_list *l) \
     { return l->_tail; } \
     { return l->_tail; } \
-  /** @internal */static inline void ENAME##_list_init(struct ENAME##_list *l) \
+  /** @internal */LIST_INLINE void ENAME##_list_init(struct ENAME##_list *l) \
     { l->_head=NULL; l->_tail=l->_head; } \
     { l->_head=NULL; l->_tail=l->_head; } \
-  /** @internal */static inline struct ENAME##_list *ENAME##_list_new(void) \
+  /** @internal */LIST_INLINE struct ENAME##_list *ENAME##_list_new(void) \
     { struct ENAME##_list *l; _STARPU_MALLOC(l, sizeof(struct ENAME##_list)); \
     { struct ENAME##_list *l; _STARPU_MALLOC(l, sizeof(struct ENAME##_list)); \
       ENAME##_list_init(l); return l; } \
       ENAME##_list_init(l); return l; } \
-  /** @internal */static inline int ENAME##_list_empty(const struct ENAME##_list *l) \
+  /** @internal */LIST_INLINE int ENAME##_list_empty(const struct ENAME##_list *l) \
     { return (l->_head == NULL); } \
     { return (l->_head == NULL); } \
-  /** @internal */static inline void ENAME##_list_delete(struct ENAME##_list *l) \
+  /** @internal */LIST_INLINE void ENAME##_list_delete(struct ENAME##_list *l) \
     { free(l); } \
     { free(l); } \
-  /** @internal */static inline void ENAME##_list_erase(struct ENAME##_list *l, struct ENAME *c) \
+  /** @internal */LIST_INLINE void ENAME##_list_erase(struct ENAME##_list *l, struct ENAME *c) \
     { struct ENAME *p = c->_prev; if(p) p->_next = c->_next; else l->_head = c->_next; \
     { struct ENAME *p = c->_prev; if(p) p->_next = c->_next; else l->_head = c->_next; \
       if(c->_next) c->_next->_prev = p; else l->_tail = p; } \
       if(c->_next) c->_next->_prev = p; else l->_tail = p; } \
-  /** @internal */static inline struct ENAME *ENAME##_list_pop_front(struct ENAME##_list *l) \
+  /** @internal */LIST_INLINE struct ENAME *ENAME##_list_pop_front(struct ENAME##_list *l) \
     { struct ENAME *e = ENAME##_list_front(l); \
     { struct ENAME *e = ENAME##_list_front(l); \
       ENAME##_list_erase(l, e); return e; } \
       ENAME##_list_erase(l, e); return e; } \
-  /** @internal */static inline struct ENAME *ENAME##_list_pop_back(struct ENAME##_list *l) \
+  /** @internal */LIST_INLINE struct ENAME *ENAME##_list_pop_back(struct ENAME##_list *l) \
     { struct ENAME *e = ENAME##_list_back(l); \
     { struct ENAME *e = ENAME##_list_back(l); \
       ENAME##_list_erase(l, e); return e; } \
       ENAME##_list_erase(l, e); return e; } \
-  /** @internal */static inline struct ENAME *ENAME##_list_begin(const struct ENAME##_list *l) \
+  /** @internal */LIST_INLINE struct ENAME *ENAME##_list_begin(const struct ENAME##_list *l) \
     { return l->_head; } \
     { return l->_head; } \
-  /** @internal */static inline struct ENAME *ENAME##_list_end(const struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \
+  /** @internal */LIST_INLINE struct ENAME *ENAME##_list_end(const struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \
     { return NULL; } \
     { return NULL; } \
-  /** @internal */static inline struct ENAME *ENAME##_list_next(const struct ENAME *i) \
+  /** @internal */LIST_INLINE struct ENAME *ENAME##_list_next(const struct ENAME *i) \
     { return i->_next; } \
     { return i->_next; } \
-  /** @internal */static inline struct ENAME *ENAME##_list_last(const struct ENAME##_list *l) \
+  /** @internal */LIST_INLINE struct ENAME *ENAME##_list_last(const struct ENAME##_list *l) \
     { return l->_tail; } \
     { return l->_tail; } \
-  /** @internal */static inline struct ENAME *ENAME##_list_alpha(const struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \
+  /** @internal */LIST_INLINE struct ENAME *ENAME##_list_alpha(const struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \
     { return NULL; } \
     { return NULL; } \
-  /** @internal */static inline struct ENAME *ENAME##_list_prev(const struct ENAME *i) \
+  /** @internal */LIST_INLINE struct ENAME *ENAME##_list_prev(const struct ENAME *i) \
     { return i->_prev; } \
     { return i->_prev; } \
-  /** @internal */static inline int ENAME##_list_ismember(const struct ENAME##_list *l, const struct ENAME *e) \
+  /** @internal */LIST_INLINE int ENAME##_list_ismember(const struct ENAME##_list *l, const struct ENAME *e) \
     { struct ENAME *i=l->_head; while(i!=NULL){ if (i == e) return 1; i=i->_next; } return 0; } \
     { struct ENAME *i=l->_head; while(i!=NULL){ if (i == e) return 1; i=i->_next; } return 0; } \
-  /** @internal */static inline int ENAME##_list_member(const struct ENAME##_list *l, const struct ENAME *e) \
+  /** @internal */LIST_INLINE int ENAME##_list_member(const struct ENAME##_list *l, const struct ENAME *e) \
     { struct ENAME *i=l->_head; int k=0; while(i!=NULL){if (i == e) return k; k++; i=i->_next; } return -1; } \
     { struct ENAME *i=l->_head; int k=0; while(i!=NULL){if (i == e) return k; k++; i=i->_next; } return -1; } \
-  /** @internal */static inline int ENAME##_list_size(const struct ENAME##_list *l) \
+  /** @internal */LIST_INLINE int ENAME##_list_size(const struct ENAME##_list *l) \
     { struct ENAME *i=l->_head; int k=0; while(i!=NULL){k++;i=i->_next;} return k; } \
     { struct ENAME *i=l->_head; int k=0; while(i!=NULL){k++;i=i->_next;} return k; } \
-  /** @internal */static inline int ENAME##_list_check(const struct ENAME##_list *l) \
+  /** @internal */LIST_INLINE int ENAME##_list_check(const struct ENAME##_list *l) \
     { struct ENAME *i=l->_head; while(i) \
     { struct ENAME *i=l->_head; while(i) \
     { if ((i->_next == NULL) && i != l->_tail) return 0; \
     { if ((i->_next == NULL) && i != l->_tail) return 0; \
       if (i->_next == i) return 0; \
       if (i->_next == i) return 0; \
@@ -247,18 +251,18 @@ struct ENAME##_multilist_##MEMBER { \
 /* Create the inlines */
 /* Create the inlines */
 #define MULTILIST_CREATE_INLINES(TYPE, ENAME, MEMBER) \
 #define MULTILIST_CREATE_INLINES(TYPE, ENAME, MEMBER) \
 /* Cast from list element to real type.  */ \
 /* Cast from list element to real type.  */ \
-static inline TYPE *ENAME##_of_multilist_##MEMBER(struct ENAME##_multilist_##MEMBER *elt) { \
+LIST_INLINE TYPE *ENAME##_of_multilist_##MEMBER(struct ENAME##_multilist_##MEMBER *elt) { \
 	return ((TYPE *) ((uintptr_t) (elt) - ((uintptr_t) (&((TYPE *) 0)->MEMBER)))); \
 	return ((TYPE *) ((uintptr_t) (elt) - ((uintptr_t) (&((TYPE *) 0)->MEMBER)))); \
 } \
 } \
 \
 \
 /* Initialize a list head.  */ \
 /* Initialize a list head.  */ \
-static inline void ENAME##_multilist_init_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \
+LIST_INLINE void ENAME##_multilist_init_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \
 	head->next = head; \
 	head->next = head; \
 	head->prev = head; \
 	head->prev = head; \
 } \
 } \
 \
 \
 /* Push element to head of a list.  */ \
 /* Push element to head of a list.  */ \
-static inline void ENAME##_multilist_push_front_##MEMBER(struct ENAME##_multilist_##MEMBER *head, TYPE *e) { \
+LIST_INLINE void ENAME##_multilist_push_front_##MEMBER(struct ENAME##_multilist_##MEMBER *head, TYPE *e) { \
 	STARPU_ASSERT_MULTILIST(e->MEMBER.prev == NULL); \
 	STARPU_ASSERT_MULTILIST(e->MEMBER.prev == NULL); \
 	STARPU_ASSERT_MULTILIST(e->MEMBER.next == NULL); \
 	STARPU_ASSERT_MULTILIST(e->MEMBER.next == NULL); \
 	e->MEMBER.next = head->next; \
 	e->MEMBER.next = head->next; \
@@ -268,7 +272,7 @@ static inline void ENAME##_multilist_push_front_##MEMBER(struct ENAME##_multilis
 } \
 } \
 \
 \
 /* Push element to tail of a list.  */ \
 /* Push element to tail of a list.  */ \
-static inline void ENAME##_multilist_push_back_##MEMBER(struct ENAME##_multilist_##MEMBER *head, TYPE *e) { \
+LIST_INLINE void ENAME##_multilist_push_back_##MEMBER(struct ENAME##_multilist_##MEMBER *head, TYPE *e) { \
 	STARPU_ASSERT_MULTILIST(e->MEMBER.prev == NULL); \
 	STARPU_ASSERT_MULTILIST(e->MEMBER.prev == NULL); \
 	STARPU_ASSERT_MULTILIST(e->MEMBER.next == NULL); \
 	STARPU_ASSERT_MULTILIST(e->MEMBER.next == NULL); \
 	e->MEMBER.prev = head->prev; \
 	e->MEMBER.prev = head->prev; \
@@ -278,7 +282,7 @@ static inline void ENAME##_multilist_push_back_##MEMBER(struct ENAME##_multilist
 } \
 } \
 \
 \
 /* Erase element from a list.  */ \
 /* Erase element from a list.  */ \
-static inline void ENAME##_multilist_erase_##MEMBER(struct ENAME##_multilist_##MEMBER *head STARPU_ATTRIBUTE_UNUSED, TYPE *e) { \
+LIST_INLINE void ENAME##_multilist_erase_##MEMBER(struct ENAME##_multilist_##MEMBER *head STARPU_ATTRIBUTE_UNUSED, TYPE *e) { \
 	STARPU_ASSERT_MULTILIST(e->MEMBER.next->prev == &e->MEMBER); \
 	STARPU_ASSERT_MULTILIST(e->MEMBER.next->prev == &e->MEMBER); \
 	e->MEMBER.next->prev = e->MEMBER.prev; \
 	e->MEMBER.next->prev = e->MEMBER.prev; \
 	STARPU_ASSERT_MULTILIST(e->MEMBER.prev->next == &e->MEMBER); \
 	STARPU_ASSERT_MULTILIST(e->MEMBER.prev->next == &e->MEMBER); \
@@ -288,30 +292,30 @@ static inline void ENAME##_multilist_erase_##MEMBER(struct ENAME##_multilist_##M
 } \
 } \
 \
 \
 /* Test whether the element was queued on the list.  */ \
 /* Test whether the element was queued on the list.  */ \
-static inline int ENAME##_multilist_queued_##MEMBER(TYPE *e) { \
+LIST_INLINE int ENAME##_multilist_queued_##MEMBER(TYPE *e) { \
 	return ((e)->MEMBER.next != NULL); \
 	return ((e)->MEMBER.next != NULL); \
 } \
 } \
 \
 \
 /* Test whether the list is empty.  */ \
 /* Test whether the list is empty.  */ \
-static inline int ENAME##_multilist_empty_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \
+LIST_INLINE int ENAME##_multilist_empty_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \
 	return head->next == head; \
 	return head->next == head; \
 } \
 } \
 \
 \
 /* Return the first element of the list.  */ \
 /* Return the first element of the list.  */ \
-static inline TYPE *ENAME##_multilist_begin_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \
+LIST_INLINE TYPE *ENAME##_multilist_begin_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \
 	return ENAME##_of_multilist_##MEMBER(head->next); \
 	return ENAME##_of_multilist_##MEMBER(head->next); \
 } \
 } \
 /* Return the value to be tested at the end of the list.  */ \
 /* Return the value to be tested at the end of the list.  */ \
-static inline TYPE *ENAME##_multilist_end_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \
+LIST_INLINE TYPE *ENAME##_multilist_end_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \
 	return ENAME##_of_multilist_##MEMBER(head); \
 	return ENAME##_of_multilist_##MEMBER(head); \
 } \
 } \
 /* Return the next element of the list.  */ \
 /* Return the next element of the list.  */ \
-static inline TYPE *ENAME##_multilist_next_##MEMBER(TYPE *e) { \
+LIST_INLINE TYPE *ENAME##_multilist_next_##MEMBER(TYPE *e) { \
 	return ENAME##_of_multilist_##MEMBER(e->MEMBER.next); \
 	return ENAME##_of_multilist_##MEMBER(e->MEMBER.next); \
 } \
 } \
 \
 \
  /* Move a list from its head to another head.  */ \
  /* Move a list from its head to another head.  */ \
-static inline void ENAME##_multilist_move_##MEMBER(struct ENAME##_multilist_##MEMBER *head, struct ENAME##_multilist_##MEMBER *newhead) { \
+LIST_INLINE void ENAME##_multilist_move_##MEMBER(struct ENAME##_multilist_##MEMBER *head, struct ENAME##_multilist_##MEMBER *newhead) { \
 	if (ENAME##_multilist_empty_##MEMBER(head)) \
 	if (ENAME##_multilist_empty_##MEMBER(head)) \
 		ENAME##_multilist_init_##MEMBER(newhead); \
 		ENAME##_multilist_init_##MEMBER(newhead); \
 	else { \
 	else { \

+ 46 - 42
src/common/prio_list.h

@@ -37,7 +37,7 @@
  *   * Test that the priority list is empty
  *   * Test that the priority list is empty
  * void FOO_prio_list_empty(struct FOO_prio_list*)
  * void FOO_prio_list_empty(struct FOO_prio_list*)
  *   * Erase element from the priority list
  *   * Erase element from the priority list
- * void FOO_prio_list_empty(struct FOO_prio_list*, struct FOO*)
+ * void FOO_prio_list_erase(struct FOO_prio_list*, struct FOO*)
  *   * Return and erase the first element of the priority list
  *   * Return and erase the first element of the priority list
  * void FOO_prio_list_pop_front(struct FOO_prio_list*)
  * void FOO_prio_list_pop_front(struct FOO_prio_list*)
  *   * Catenate second priority list at ends of the first priority list
  *   * Catenate second priority list at ends of the first priority list
@@ -66,6 +66,10 @@
 
 
 #include <common/rbtree.h>
 #include <common/rbtree.h>
 
 
+#ifndef PRIO_LIST_INLINE
+#define PRIO_LIST_INLINE static inline
+#endif
+
 #define PRIO_LIST_TYPE(ENAME, PRIOFIELD) \
 #define PRIO_LIST_TYPE(ENAME, PRIOFIELD) \
 	PRIO_LIST_CREATE_TYPE(ENAME, PRIOFIELD)
 	PRIO_LIST_CREATE_TYPE(ENAME, PRIOFIELD)
 
 
@@ -83,22 +87,22 @@
 		int prio; \
 		int prio; \
 		struct ENAME##_list list; \
 		struct ENAME##_list list; \
 	}; \
 	}; \
-	static inline struct ENAME##_prio_list_stage *ENAME##_node_to_list_stage(struct starpu_rbtree_node *node) \
+	PRIO_LIST_INLINE struct ENAME##_prio_list_stage *ENAME##_node_to_list_stage(struct starpu_rbtree_node *node) \
 	{ \
 	{ \
 		/* This assumes node is first member of stage */ \
 		/* This assumes node is first member of stage */ \
 		return (struct ENAME##_prio_list_stage *) node; \
 		return (struct ENAME##_prio_list_stage *) node; \
 	} \
 	} \
-	static inline const struct ENAME##_prio_list_stage *ENAME##_node_to_list_stage_const(const struct starpu_rbtree_node *node) \
+	PRIO_LIST_INLINE const struct ENAME##_prio_list_stage *ENAME##_node_to_list_stage_const(const struct starpu_rbtree_node *node) \
 	{ \
 	{ \
 		/* This assumes node is first member of stage */ \
 		/* This assumes node is first member of stage */ \
 		return (struct ENAME##_prio_list_stage *) node; \
 		return (struct ENAME##_prio_list_stage *) node; \
 	} \
 	} \
-	static inline void ENAME##_prio_list_init(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE void ENAME##_prio_list_init(struct ENAME##_prio_list *priolist) \
 	{ \
 	{ \
 		starpu_rbtree_init(&priolist->tree); \
 		starpu_rbtree_init(&priolist->tree); \
 		priolist->empty = 1; \
 		priolist->empty = 1; \
 	} \
 	} \
-	static inline void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \
 	{ \
 	{ \
 		if (starpu_rbtree_empty(&priolist->tree)) \
 		if (starpu_rbtree_empty(&priolist->tree)) \
 			return; \
 			return; \
@@ -109,13 +113,13 @@
 		starpu_rbtree_remove(&priolist->tree, root); \
 		starpu_rbtree_remove(&priolist->tree, root); \
 		free(stage); \
 		free(stage); \
 	} \
 	} \
-	static inline int ENAME##_prio_list_cmp_fn(int prio, const struct starpu_rbtree_node *node) \
+	PRIO_LIST_INLINE int ENAME##_prio_list_cmp_fn(int prio, const struct starpu_rbtree_node *node) \
 	{ \
 	{ \
 		/* Sort by decreasing order */ \
 		/* Sort by decreasing order */ \
 		const struct ENAME##_prio_list_stage *e2 = ENAME##_node_to_list_stage_const(node); \
 		const struct ENAME##_prio_list_stage *e2 = ENAME##_node_to_list_stage_const(node); \
 		return (e2->prio - prio); \
 		return (e2->prio - prio); \
 	} \
 	} \
-	static inline struct ENAME##_prio_list_stage *ENAME##_prio_list_add(struct ENAME##_prio_list *priolist, int prio) \
+	PRIO_LIST_INLINE struct ENAME##_prio_list_stage *ENAME##_prio_list_add(struct ENAME##_prio_list *priolist, int prio) \
 	{ \
 	{ \
 		unsigned long slot; \
 		unsigned long slot; \
 		struct starpu_rbtree_node *node; \
 		struct starpu_rbtree_node *node; \
@@ -132,25 +136,25 @@
 		} \
 		} \
 		return stage; \
 		return stage; \
 	} \
 	} \
-	static inline void ENAME##_prio_list_push_back(struct ENAME##_prio_list *priolist, struct ENAME *e) \
+	PRIO_LIST_INLINE void ENAME##_prio_list_push_back(struct ENAME##_prio_list *priolist, struct ENAME *e) \
 	{ \
 	{ \
 		struct ENAME##_prio_list_stage *stage = ENAME##_prio_list_add(priolist, e->PRIOFIELD); \
 		struct ENAME##_prio_list_stage *stage = ENAME##_prio_list_add(priolist, e->PRIOFIELD); \
 		ENAME##_list_push_back(&stage->list, e); \
 		ENAME##_list_push_back(&stage->list, e); \
 		priolist->empty = 0; \
 		priolist->empty = 0; \
 	} \
 	} \
-	static inline void ENAME##_prio_list_push_front(struct ENAME##_prio_list *priolist, struct ENAME *e) \
+	PRIO_LIST_INLINE void ENAME##_prio_list_push_front(struct ENAME##_prio_list *priolist, struct ENAME *e) \
 	{ \
 	{ \
 		struct ENAME##_prio_list_stage *stage = ENAME##_prio_list_add(priolist, e->PRIOFIELD); \
 		struct ENAME##_prio_list_stage *stage = ENAME##_prio_list_add(priolist, e->PRIOFIELD); \
 		ENAME##_list_push_front(&stage->list, e); \
 		ENAME##_list_push_front(&stage->list, e); \
 		priolist->empty = 0; \
 		priolist->empty = 0; \
 	} \
 	} \
-	static inline int ENAME##_prio_list_empty(const struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE int ENAME##_prio_list_empty(const struct ENAME##_prio_list *priolist) \
 	{ \
 	{ \
 		return priolist->empty; \
 		return priolist->empty; \
 	} \
 	} \
 	/* Version of list_empty which does not use the cached empty flag,
 	/* Version of list_empty which does not use the cached empty flag,
 	 * typically used to compute the value of the flag */ \
 	 * typically used to compute the value of the flag */ \
-	static inline int ENAME##_prio_list_empty_slow(const struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE int ENAME##_prio_list_empty_slow(const struct ENAME##_prio_list *priolist) \
 	{ \
 	{ \
 		if (starpu_rbtree_empty(&priolist->tree)) \
 		if (starpu_rbtree_empty(&priolist->tree)) \
 			return 1; \
 			return 1; \
@@ -161,7 +165,7 @@
 			return 1; \
 			return 1; \
 		return 0; \
 		return 0; \
 	} \
 	} \
-	static inline void ENAME##_prio_list_erase(struct ENAME##_prio_list *priolist, struct ENAME *e) \
+	PRIO_LIST_INLINE void ENAME##_prio_list_erase(struct ENAME##_prio_list *priolist, struct ENAME *e) \
 	{ \
 	{ \
 		struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, e->PRIOFIELD, ENAME##_prio_list_cmp_fn); \
 		struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, e->PRIOFIELD, ENAME##_prio_list_cmp_fn); \
 		struct ENAME##_prio_list_stage *stage = ENAME##_node_to_list_stage(node); \
 		struct ENAME##_prio_list_stage *stage = ENAME##_node_to_list_stage(node); \
@@ -176,7 +180,7 @@
 			priolist->empty = ENAME##_prio_list_empty_slow(priolist); \
 			priolist->empty = ENAME##_prio_list_empty_slow(priolist); \
 		} \
 		} \
 	} \
 	} \
-	static inline int ENAME##_prio_list_get_next_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node *node, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \
+	PRIO_LIST_INLINE int ENAME##_prio_list_get_next_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node *node, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \
 	{ \
 	{ \
 		struct ENAME##_prio_list_stage *stage; \
 		struct ENAME##_prio_list_stage *stage; \
 		while(1) { \
 		while(1) { \
@@ -201,12 +205,12 @@
 		*pstage = stage; \
 		*pstage = stage; \
 		return 1; \
 		return 1; \
 	} \
 	} \
-	static inline int ENAME##_prio_list_get_first_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \
+	PRIO_LIST_INLINE int ENAME##_prio_list_get_first_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \
 	{ \
 	{ \
 		struct starpu_rbtree_node *node = starpu_rbtree_first(&priolist->tree); \
 		struct starpu_rbtree_node *node = starpu_rbtree_first(&priolist->tree); \
 		return ENAME##_prio_list_get_next_nonempty_stage(priolist, node, pnode, pstage); \
 		return ENAME##_prio_list_get_next_nonempty_stage(priolist, node, pnode, pstage); \
 	} \
 	} \
-	static inline struct ENAME *ENAME##_prio_list_pop_front(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_front(struct ENAME##_prio_list *priolist) \
 	{ \
 	{ \
 		struct starpu_rbtree_node *node; \
 		struct starpu_rbtree_node *node; \
 		struct ENAME##_prio_list_stage *stage; \
 		struct ENAME##_prio_list_stage *stage; \
@@ -225,7 +229,7 @@
 		} \
 		} \
 		return ret; \
 		return ret; \
 	} \
 	} \
-	static inline int ENAME##_prio_list_get_prev_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node *node, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \
+	PRIO_LIST_INLINE int ENAME##_prio_list_get_prev_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node *node, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \
 	{ \
 	{ \
 		struct ENAME##_prio_list_stage *stage; \
 		struct ENAME##_prio_list_stage *stage; \
 		while(1) { \
 		while(1) { \
@@ -250,12 +254,12 @@
 		*pstage = stage; \
 		*pstage = stage; \
 		return 1; \
 		return 1; \
 	} \
 	} \
-	static inline int ENAME##_prio_list_get_last_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \
+	PRIO_LIST_INLINE int ENAME##_prio_list_get_last_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \
 	{ \
 	{ \
 		struct starpu_rbtree_node *node = starpu_rbtree_last(&priolist->tree); \
 		struct starpu_rbtree_node *node = starpu_rbtree_last(&priolist->tree); \
 		return ENAME##_prio_list_get_prev_nonempty_stage(priolist, node, pnode, pstage); \
 		return ENAME##_prio_list_get_prev_nonempty_stage(priolist, node, pnode, pstage); \
 	} \
 	} \
-	static inline struct ENAME *ENAME##_prio_list_pop_back(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_back(struct ENAME##_prio_list *priolist) \
 	{ \
 	{ \
 		struct starpu_rbtree_node *node; \
 		struct starpu_rbtree_node *node; \
 		struct ENAME##_prio_list_stage *stage; \
 		struct ENAME##_prio_list_stage *stage; \
@@ -274,7 +278,7 @@
 		} \
 		} \
 		return ret; \
 		return ret; \
 	} \
 	} \
-	static inline void ENAME##_prio_list_push_prio_list_back(struct ENAME##_prio_list *priolist, struct ENAME##_prio_list *priolist_toadd) \
+	PRIO_LIST_INLINE void ENAME##_prio_list_push_prio_list_back(struct ENAME##_prio_list *priolist, struct ENAME##_prio_list *priolist_toadd) \
 	{ \
 	{ \
 		struct starpu_rbtree_node *node_toadd, *tmp; \
 		struct starpu_rbtree_node *node_toadd, *tmp; \
 		starpu_rbtree_for_each_remove(&priolist_toadd->tree, node_toadd, tmp) { \
 		starpu_rbtree_for_each_remove(&priolist_toadd->tree, node_toadd, tmp) { \
@@ -306,7 +310,7 @@
 			} \
 			} \
 		} \
 		} \
 	} \
 	} \
-	static inline int ENAME##_prio_list_ismember(const struct ENAME##_prio_list *priolist, const struct ENAME *e) \
+	PRIO_LIST_INLINE int ENAME##_prio_list_ismember(const struct ENAME##_prio_list *priolist, const struct ENAME *e) \
 	{ \
 	{ \
 		struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, e->PRIOFIELD, ENAME##_prio_list_cmp_fn); \
 		struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, e->PRIOFIELD, ENAME##_prio_list_cmp_fn); \
 		if (node) { \
 		if (node) { \
@@ -315,7 +319,7 @@
 		} \
 		} \
 		return 0; \
 		return 0; \
 	} \
 	} \
-	static inline struct ENAME *ENAME##_prio_list_begin(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_begin(struct ENAME##_prio_list *priolist) \
 	{ \
 	{ \
 		struct starpu_rbtree_node *node; \
 		struct starpu_rbtree_node *node; \
 		struct ENAME##_prio_list_stage *stage; \
 		struct ENAME##_prio_list_stage *stage; \
@@ -323,9 +327,9 @@
 			return NULL; \
 			return NULL; \
 		return ENAME##_list_begin(&stage->list); \
 		return ENAME##_list_begin(&stage->list); \
 	} \
 	} \
-	static inline struct ENAME *ENAME##_prio_list_end(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_end(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED) \
 	{ return NULL; } \
 	{ return NULL; } \
-	static inline struct ENAME *ENAME##_prio_list_next(struct ENAME##_prio_list *priolist, const struct ENAME *i) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_next(struct ENAME##_prio_list *priolist, const struct ENAME *i) \
 	{ \
 	{ \
 		struct ENAME *next = ENAME##_list_next(i); \
 		struct ENAME *next = ENAME##_list_next(i); \
 		if (next != ENAME##_list_end(NULL)) \
 		if (next != ENAME##_list_end(NULL)) \
@@ -337,7 +341,7 @@
 			return NULL; \
 			return NULL; \
 		return ENAME##_list_begin(&stage->list); \
 		return ENAME##_list_begin(&stage->list); \
 	} \
 	} \
-	static inline struct ENAME *ENAME##_prio_list_last(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_last(struct ENAME##_prio_list *priolist) \
 	{ \
 	{ \
 		struct starpu_rbtree_node *node; \
 		struct starpu_rbtree_node *node; \
 		struct ENAME##_prio_list_stage *stage; \
 		struct ENAME##_prio_list_stage *stage; \
@@ -345,9 +349,9 @@
 			return NULL; \
 			return NULL; \
 		return ENAME##_list_last(&stage->list); \
 		return ENAME##_list_last(&stage->list); \
 	} \
 	} \
-	static inline struct ENAME *ENAME##_prio_list_alpha(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_alpha(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED) \
 	{ return NULL; } \
 	{ return NULL; } \
-	static inline struct ENAME *ENAME##_prio_list_prev(struct ENAME##_prio_list *priolist, const struct ENAME *i) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_prev(struct ENAME##_prio_list *priolist, const struct ENAME *i) \
 	{ \
 	{ \
 		struct ENAME *next = ENAME##_list_prev(i); \
 		struct ENAME *next = ENAME##_list_prev(i); \
 		if (next != ENAME##_list_alpha(NULL)) \
 		if (next != ENAME##_list_alpha(NULL)) \
@@ -365,11 +369,11 @@
 /* gdbinit can't recurse in a tree. Use a mere list in debugging mode.  */
 /* gdbinit can't recurse in a tree. Use a mere list in debugging mode.  */
 #define PRIO_LIST_CREATE_TYPE(ENAME, PRIOFIELD) \
 #define PRIO_LIST_CREATE_TYPE(ENAME, PRIOFIELD) \
 	struct ENAME##_prio_list { struct ENAME##_list list; }; \
 	struct ENAME##_prio_list { struct ENAME##_list list; }; \
-	static inline void ENAME##_prio_list_init(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE void ENAME##_prio_list_init(struct ENAME##_prio_list *priolist) \
 	{ ENAME##_list_init(&(priolist)->list); } \
 	{ ENAME##_list_init(&(priolist)->list); } \
-	static inline void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \
 	{ (void) (priolist); /* ENAME##_list_deinit(&(priolist)->list); */ } \
 	{ (void) (priolist); /* ENAME##_list_deinit(&(priolist)->list); */ } \
-	static inline void ENAME##_prio_list_push_back(struct ENAME##_prio_list *priolist, struct ENAME *e) \
+	PRIO_LIST_INLINE void ENAME##_prio_list_push_back(struct ENAME##_prio_list *priolist, struct ENAME *e) \
 	{ \
 	{ \
 		struct ENAME *cur; \
 		struct ENAME *cur; \
 		for (cur  = ENAME##_list_begin(&(priolist)->list); \
 		for (cur  = ENAME##_list_begin(&(priolist)->list); \
@@ -382,7 +386,7 @@
 		else \
 		else \
 			ENAME##_list_insert_before(&(priolist)->list, (e), cur); \
 			ENAME##_list_insert_before(&(priolist)->list, (e), cur); \
 	} \
 	} \
-	static inline void ENAME##_prio_list_push_front(struct ENAME##_prio_list *priolist, struct ENAME *e) \
+	PRIO_LIST_INLINE void ENAME##_prio_list_push_front(struct ENAME##_prio_list *priolist, struct ENAME *e) \
 	{ \
 	{ \
 		struct ENAME *cur; \
 		struct ENAME *cur; \
 		for (cur  = ENAME##_list_begin(&(priolist)->list); \
 		for (cur  = ENAME##_list_begin(&(priolist)->list); \
@@ -395,29 +399,29 @@
 		else \
 		else \
 			ENAME##_list_insert_before(&(priolist)->list, (e), cur); \
 			ENAME##_list_insert_before(&(priolist)->list, (e), cur); \
 	} \
 	} \
-	static inline int ENAME##_prio_list_empty(const struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE int ENAME##_prio_list_empty(const struct ENAME##_prio_list *priolist) \
 	{ return ENAME##_list_empty(&(priolist)->list); } \
 	{ return ENAME##_list_empty(&(priolist)->list); } \
-	static inline void ENAME##_prio_list_erase(struct ENAME##_prio_list *priolist, struct ENAME *e) \
+	PRIO_LIST_INLINE void ENAME##_prio_list_erase(struct ENAME##_prio_list *priolist, struct ENAME *e) \
 	{ ENAME##_list_erase(&(priolist)->list, (e)); } \
 	{ ENAME##_list_erase(&(priolist)->list, (e)); } \
-	static inline struct ENAME *ENAME##_prio_list_pop_front(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_front(struct ENAME##_prio_list *priolist) \
 	{ return ENAME##_list_pop_front(&(priolist)->list); } \
 	{ return ENAME##_list_pop_front(&(priolist)->list); } \
-	static inline struct ENAME *ENAME##_prio_list_pop_back(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_back(struct ENAME##_prio_list *priolist) \
 	{ return ENAME##_list_pop_back(&(priolist)->list); } \
 	{ return ENAME##_list_pop_back(&(priolist)->list); } \
-	static inline void ENAME##_prio_list_push_prio_list_back(struct ENAME##_prio_list *priolist, struct ENAME##_prio_list *priolist_toadd) \
+	PRIO_LIST_INLINE void ENAME##_prio_list_push_prio_list_back(struct ENAME##_prio_list *priolist, struct ENAME##_prio_list *priolist_toadd) \
 	{ ENAME##_list_push_list_back(&(priolist)->list, &(priolist_toadd)->list); } \
 	{ ENAME##_list_push_list_back(&(priolist)->list, &(priolist_toadd)->list); } \
-	static inline int ENAME##_prio_list_ismember(const struct ENAME##_prio_list *priolist, const struct ENAME *e) \
+	PRIO_LIST_INLINE int ENAME##_prio_list_ismember(const struct ENAME##_prio_list *priolist, const struct ENAME *e) \
 	{ return ENAME##_list_ismember(&(priolist)->list, (e)); } \
 	{ return ENAME##_list_ismember(&(priolist)->list, (e)); } \
-	static inline struct ENAME *ENAME##_prio_list_begin(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_begin(struct ENAME##_prio_list *priolist) \
 	{ return ENAME##_list_begin(&(priolist)->list); } \
 	{ return ENAME##_list_begin(&(priolist)->list); } \
-	static inline struct ENAME *ENAME##_prio_list_end(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_end(struct ENAME##_prio_list *priolist) \
 	{ return ENAME##_list_end(&(priolist)->list); } \
 	{ return ENAME##_list_end(&(priolist)->list); } \
-	static inline struct ENAME *ENAME##_prio_list_next(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_next(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \
 	{ return ENAME##_list_next(i); } \
 	{ return ENAME##_list_next(i); } \
-	static inline struct ENAME *ENAME##_prio_list_last(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_last(struct ENAME##_prio_list *priolist) \
 	{ return ENAME##_list_last(&(priolist)->list); } \
 	{ return ENAME##_list_last(&(priolist)->list); } \
-	static inline struct ENAME *ENAME##_prio_list_alpha(struct ENAME##_prio_list *priolist) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_alpha(struct ENAME##_prio_list *priolist) \
 	{ return ENAME##_list_alpha(&(priolist)->list); } \
 	{ return ENAME##_list_alpha(&(priolist)->list); } \
-	static inline struct ENAME *ENAME##_prio_list_prev(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \
+	PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_prev(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \
 	{ return ENAME##_list_prev(i); } \
 	{ return ENAME##_list_prev(i); } \
 
 
 #endif
 #endif

+ 22 - 24
src/common/thread.c

@@ -2,6 +2,7 @@
  *
  *
  * Copyright (C) 2010, 2012-2017  Université de Bordeaux
  * Copyright (C) 2010, 2012-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -234,11 +235,21 @@ int starpu_pthread_key_delete(starpu_pthread_key_t key)
 	return 0;
 	return 0;
 }
 }
 
 
+/* We need it only when using smpi */
+#pragma weak smpi_process_get_user_data
+#if !HAVE_DECL_SMPI_PROCESS_SET_USER_DATA
+extern void *smpi_process_get_user_data();
+#endif
+
 int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
 int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
 {
 {
 	void **array;
 	void **array;
-#ifdef STARPU_SIMGRID_HAVE_SIMIX_PROCESS_GET_CODE
-	if ((SIMIX_process_get_code() == _starpu_mpi_simgrid_init) || (!strcmp(SIMIX_process_self_get_name(),"wait for mpi transfer")))
+#ifdef HAVE_SMPI_PROCESS_SET_USER_DATA
+	const char *process_name = SIMIX_process_self_get_name();
+	char *end;
+	/* Test whether it is an MPI rank */
+	strtol(process_name, &end, 10);
+	if (!*end || !strcmp(process_name, "wait for mpi transfer"))
 		/* Special-case the SMPI process */
 		/* Special-case the SMPI process */
 		array = smpi_process_get_user_data();
 		array = smpi_process_get_user_data();
 	else
 	else
@@ -251,9 +262,13 @@ int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
 void* starpu_pthread_getspecific(starpu_pthread_key_t key)
 void* starpu_pthread_getspecific(starpu_pthread_key_t key)
 {
 {
 	void **array;
 	void **array;
-#ifdef STARPU_SIMGRID_HAVE_SIMIX_PROCESS_GET_CODE
-	if ((SIMIX_process_get_code() == _starpu_mpi_simgrid_init) || (!strcmp(SIMIX_process_self_get_name(),"wait for mpi transfer")))
-		/* Special-case the SMPI process */
+#ifdef HAVE_SMPI_PROCESS_SET_USER_DATA
+	const char *process_name = SIMIX_process_self_get_name();
+	char *end;
+	/* Test whether it is an MPI rank */
+	strtol(process_name, &end, 10);
+	if (!*end || !strcmp(process_name, "wait for mpi transfer"))
+		/* Special-case the SMPI processes */
 		array = smpi_process_get_user_data();
 		array = smpi_process_get_user_data();
 	else
 	else
 #endif
 #endif
@@ -720,34 +735,17 @@ int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier)
  * macros of course) which record when the mutex is held or not */
  * macros of course) which record when the mutex is held or not */
 int starpu_pthread_mutex_lock_sched(starpu_pthread_mutex_t *mutex)
 int starpu_pthread_mutex_lock_sched(starpu_pthread_mutex_t *mutex)
 {
 {
-	int p_ret = starpu_pthread_mutex_lock(mutex);
-	int workerid = starpu_worker_get_id();
-	if(workerid != -1 && _starpu_worker_mutex_is_sched_mutex(workerid, mutex))
-		_starpu_worker_set_flag_sched_mutex_locked(workerid, 1);
-	return p_ret;
+	return starpu_pthread_mutex_lock(mutex);
 }
 }
 
 
 int starpu_pthread_mutex_unlock_sched(starpu_pthread_mutex_t *mutex)
 int starpu_pthread_mutex_unlock_sched(starpu_pthread_mutex_t *mutex)
 {
 {
-	int workerid = starpu_worker_get_id();
-	if(workerid != -1 && _starpu_worker_mutex_is_sched_mutex(workerid, mutex))
-		_starpu_worker_set_flag_sched_mutex_locked(workerid, 0);
-
 	return starpu_pthread_mutex_unlock(mutex);
 	return starpu_pthread_mutex_unlock(mutex);
 }
 }
 
 
 int starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex)
 int starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex)
 {
 {
-	int ret = starpu_pthread_mutex_trylock(mutex);
-
-	if (!ret)
-	{
-		int workerid = starpu_worker_get_id();
-		if(workerid != -1 && _starpu_worker_mutex_is_sched_mutex(workerid, mutex))
-			_starpu_worker_set_flag_sched_mutex_locked(workerid, 1);
-	}
-
-	return ret;
+	return starpu_pthread_mutex_trylock(mutex);
 }
 }
 
 
 #ifdef STARPU_DEBUG
 #ifdef STARPU_DEBUG

+ 4 - 0
src/common/utils.c

@@ -464,6 +464,10 @@ char *_starpu_get_home_path(void)
 		static int warn;
 		static int warn;
 		path = starpu_getenv("TMPDIR");
 		path = starpu_getenv("TMPDIR");
 		if (!path)
 		if (!path)
+			path = starpu_getenv("TEMP");
+		if (!path)
+			path = starpu_getenv("TMP");
+		if (!path)
 			path = "/tmp";
 			path = "/tmp";
 		if (!warn)
 		if (!warn)
 		{
 		{

+ 2 - 1
src/core/combined_workers.c

@@ -2,6 +2,7 @@
  *
  *
  * Copyright (C) 2010-2015  Université de Bordeaux
  * Copyright (C) 2010-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2014, 2016  CNRS
  * Copyright (C) 2010, 2011, 2014, 2016  CNRS
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -160,7 +161,7 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 #endif
 #endif
 	}
 	}
 
 
-	starpu_sched_ctx_add_workers(&combined_worker_id, 1, STARPU_GLOBAL_SCHED_CTX);
+	starpu_sched_ctx_add_combined_workers(&combined_worker_id, 1, STARPU_GLOBAL_SCHED_CTX);
 
 
 	return new_workerid;
 	return new_workerid;
 }
 }

+ 2 - 3
src/core/dependencies/cg.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2012, 2014-2017  Université de Bordeaux
  * Copyright (C) 2010-2012, 2014-2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
  * Copyright (C) 2012 INRIA
  * Copyright (C) 2012 INRIA
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -126,8 +126,7 @@ int _starpu_list_task_scheduled_successors_in_cg_list(struct _starpu_cg_list *su
 		if (n < ndeps)
 		if (n < ndeps)
 		{
 		{
 			struct starpu_task *task = cg->succ.job->task;
 			struct starpu_task *task = cg->succ.job->task;
-			if (task->cl == NULL || task->cl->where == STARPU_NOWHERE
-					|| task->execute_on_a_specific_worker)
+			if (task->cl == NULL || task->where == STARPU_NOWHERE || task->execute_on_a_specific_worker)
 				/* will not be scheduled */
 				/* will not be scheduled */
 				continue;
 				continue;
 			task_array[n] = task;
 			task_array[n] = task;

+ 2 - 3
src/core/errorcheck.h

@@ -2,6 +2,7 @@
  *
  *
  * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
  * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
  * Copyright (C) 2010, 2011  CNRS
  * Copyright (C) 2010, 2011  CNRS
+ * Copyright (C) 2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -37,9 +38,7 @@ enum _starpu_worker_status
 	/* while executing the scheduler code */
 	/* while executing the scheduler code */
 	STATUS_SCHEDULING,
 	STATUS_SCHEDULING,
 	/* while sleeping because there is nothing to do */
 	/* while sleeping because there is nothing to do */
-	STATUS_SLEEPING,
-	/* while a sleeping worker is about to wake up (to avoid waking twice for the same worker) */
-	STATUS_WAKING_UP
+	STATUS_SLEEPING
 };
 };
 
 
 struct _starpu_worker;
 struct _starpu_worker;

+ 6 - 11
src/core/jobs.c

@@ -1,9 +1,9 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2009-2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
- * Copyright (C) 2011, 2014, 2016  INRIA
+ * Copyright (C) 2011, 2014, 2016-2017  INRIA
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -272,12 +272,6 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 		0
 		0
 #endif
 #endif
 		;
 		;
-	/* Read cl fields before releasing dependencies, for the case of a
-	 * switch_cl which is freed by data_unregister happening as soon as
-	 * the dependencies are released.
-	 */
-	unsigned nowhere = !task->cl || task->cl->where == STARPU_NOWHERE;
-
 #ifdef STARPU_DEBUG
 #ifdef STARPU_DEBUG
 	STARPU_PTHREAD_MUTEX_LOCK(&all_jobs_list_mutex);
 	STARPU_PTHREAD_MUTEX_LOCK(&all_jobs_list_mutex);
 	_starpu_job_multilist_erase_all_submitted(&all_jobs_list, j);
 	_starpu_job_multilist_erase_all_submitted(&all_jobs_list, j);
@@ -345,6 +339,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	 * scheduler to process it : the task structure doesn't contain any valuable
 	 * scheduler to process it : the task structure doesn't contain any valuable
 	 * data as it's not linked to an actual worker */
 	 * data as it's not linked to an actual worker */
 	/* control task should not execute post_exec_hook */
 	/* control task should not execute post_exec_hook */
+	unsigned nowhere = !task->cl || task->cl->where == STARPU_NOWHERE || task->where == STARPU_NOWHERE;
 	if(j->task_size == 1 && !nowhere && !j->internal
 	if(j->task_size == 1 && !nowhere && !j->internal
 #ifdef STARPU_OPENMP
 #ifdef STARPU_OPENMP
 	/* If this is a continuation, we do not execute the post_exec_hook. The
 	/* If this is a continuation, we do not execute the post_exec_hook. The
@@ -711,10 +706,10 @@ int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *t
 {
 {
 	/* Check that the worker is able to execute the task ! */
 	/* Check that the worker is able to execute the task ! */
 	STARPU_ASSERT(task && task->cl);
 	STARPU_ASSERT(task && task->cl);
-	if (STARPU_UNLIKELY(!(worker->worker_mask & task->cl->where)))
+	if (STARPU_UNLIKELY(!(worker->worker_mask & task->where)))
 		return -ENODEV;
 		return -ENODEV;
 
 
-	STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex);
+	_starpu_worker_lock(worker->workerid);
 
 
 	if (task->execute_on_a_specific_worker && task->workerorder)
 	if (task->execute_on_a_specific_worker && task->workerorder)
 	{
 	{
@@ -758,7 +753,7 @@ int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *t
 
 
 	starpu_wake_worker_locked(worker->workerid);
 	starpu_wake_worker_locked(worker->workerid);
 	starpu_push_task_end(task);
 	starpu_push_task_end(task);
-	STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
+	_starpu_worker_unlock(worker->workerid);
 
 
 	return 0;
 	return 0;
 }
 }

+ 6 - 6
src/core/jobs.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009-2016  Université de Bordeaux
  * Copyright (C) 2009-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2013, 2014, 2015  CNRS
+ * Copyright (C) 2010, 2011, 2013, 2014, 2015, 2017  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2014  INRIA
  * Copyright (C) 2014  INRIA
  *
  *
@@ -51,11 +51,11 @@ struct _starpu_worker;
 /* codelet function */
 /* codelet function */
 typedef void (*_starpu_cl_func_t)(void **, void *);
 typedef void (*_starpu_cl_func_t)(void **, void *);
 
 
-#define _STARPU_CPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_CPU)
-#define _STARPU_CUDA_MAY_PERFORM(j)      ((j)->task->cl->where & STARPU_CUDA)
-#define _STARPU_OPENCL_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_OPENCL)
-#define _STARPU_MIC_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_MIC)
-#define _STARPU_SCC_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_SCC)
+#define _STARPU_CPU_MAY_PERFORM(j)	((j)->task->where & STARPU_CPU)
+#define _STARPU_CUDA_MAY_PERFORM(j)      ((j)->task->where & STARPU_CUDA)
+#define _STARPU_OPENCL_MAY_PERFORM(j)	((j)->task->where & STARPU_OPENCL)
+#define _STARPU_MIC_MAY_PERFORM(j)	((j)->task->where & STARPU_MIC)
+#define _STARPU_SCC_MAY_PERFORM(j)	((j)->task->where & STARPU_SCC)
 
 
 struct _starpu_data_descr
 struct _starpu_data_descr
 {
 {

+ 1 - 2
src/core/perfmodel/multiple_regression.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2009, 2010, 2011, 2015-2016  Université de Bordeaux
  * Copyright (C) 2009, 2010, 2011, 2015-2016  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2016  CNRS
  * Copyright (C) 2010, 2011, 2016  CNRS
- * Copyright (C) 2016  Inria
+ * Copyright (C) 2016-2017  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -265,7 +265,6 @@ int _starpu_multiple_regression(struct starpu_perfmodel_history_list *ptr, doubl
 
 
 	/* Computing number of rows */
 	/* Computing number of rows */
 	long n=find_long_list_size(ptr);
 	long n=find_long_list_size(ptr);
-	STARPU_ASSERT(n);
 
 
         /* Reading old calibrations if necessary */
         /* Reading old calibrations if necessary */
 	FILE *f=NULL;
 	FILE *f=NULL;

+ 20 - 7
src/core/perfmodel/perfmodel.c

@@ -181,6 +181,7 @@ void _starpu_init_and_load_perfmodel(struct starpu_perfmodel *model)
 
 
 static double starpu_model_expected_perf(struct starpu_task *task, struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch,  unsigned nimpl)
 static double starpu_model_expected_perf(struct starpu_task *task, struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch,  unsigned nimpl)
 {
 {
+	double exp_perf = 0.0;
 	if (model)
 	if (model)
 	{
 	{
 		_starpu_init_and_load_perfmodel(model);
 		_starpu_init_and_load_perfmodel(model);
@@ -190,24 +191,36 @@ static double starpu_model_expected_perf(struct starpu_task *task, struct starpu
 		switch (model->type)
 		switch (model->type)
 		{
 		{
 			case STARPU_PER_ARCH:
 			case STARPU_PER_ARCH:
-				return per_arch_task_expected_perf(model, arch, task, nimpl);
+				exp_perf = per_arch_task_expected_perf(model, arch, task, nimpl);
+				STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf);
+				break;
 			case STARPU_COMMON:
 			case STARPU_COMMON:
-				return common_task_expected_perf(model, arch, task, nimpl);
+				exp_perf = common_task_expected_perf(model, arch, task, nimpl);
+				STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf);
+				break;
 			case STARPU_HISTORY_BASED:
 			case STARPU_HISTORY_BASED:
-				return _starpu_history_based_job_expected_perf(model, arch, j, nimpl);
+				exp_perf = _starpu_history_based_job_expected_perf(model, arch, j, nimpl);
+				STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf);
+				break;
 			case STARPU_REGRESSION_BASED:
 			case STARPU_REGRESSION_BASED:
-				return _starpu_regression_based_job_expected_perf(model, arch, j, nimpl);
+				exp_perf = _starpu_regression_based_job_expected_perf(model, arch, j, nimpl);
+				STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf);
+				break;
 			case STARPU_NL_REGRESSION_BASED:
 			case STARPU_NL_REGRESSION_BASED:
-				return _starpu_non_linear_regression_based_job_expected_perf(model, arch, j,nimpl);
+				exp_perf = _starpu_non_linear_regression_based_job_expected_perf(model, arch, j,nimpl);
+				STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf);
+				break;
 			case STARPU_MULTIPLE_REGRESSION_BASED:
 			case STARPU_MULTIPLE_REGRESSION_BASED:
-				return _starpu_multiple_regression_based_job_expected_perf(model, arch, j, nimpl);
+				exp_perf = _starpu_multiple_regression_based_job_expected_perf(model, arch, j, nimpl);
+				STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf);
+				break;
 			default:
 			default:
 				STARPU_ABORT();
 				STARPU_ABORT();
 		}
 		}
 	}
 	}
 
 
 	/* no model was found */
 	/* no model was found */
-	return 0.0;
+	return exp_perf;
 }
 }
 
 
 double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
 double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)

+ 153 - 3
src/core/perfmodel/perfmodel_history.c

@@ -242,6 +242,64 @@ static void insert_history_entry(struct starpu_perfmodel_history_entry *entry, s
 }
 }
 
 
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
+static void check_reg_model(struct starpu_perfmodel *model, int comb, int impl)
+{
+	struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl];
+	struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression;
+
+	/*
+	 * Linear Regression model
+	 */
+
+	/* Unless we have enough measurements, we put NaN in the file to indicate the model is invalid */
+	double alpha = nan(""), beta = nan("");
+	if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
+	{
+		if (reg_model->nsample > 1)
+		{
+			alpha = reg_model->alpha;
+			beta = reg_model->beta;
+		}
+	}
+
+	/* TODO: check:
+	 * reg_model->sumlnx
+	 * reg_model->sumlnx2
+	 * reg_model->sumlny
+	 * reg_model->sumlnxlny
+	 * alpha
+	 * beta
+	 * reg_model->minx
+	 * reg_model->maxx
+	 */
+	(void)alpha;
+	(void)beta;
+
+	/*
+	 * Non-Linear Regression model
+	 */
+
+	double a = nan(""), b = nan(""), c = nan("");
+
+	if (model->type == STARPU_NL_REGRESSION_BASED)
+		_starpu_regression_non_linear_power(per_arch_model->list, &a, &b, &c);
+
+	/* TODO: check:
+	 * a
+	 * b
+	 * c
+	 */
+
+	/*
+	 * Multiple Regression Model
+	 */
+
+	if (model->type == STARPU_MULTIPLE_REGRESSION_BASED)
+	{
+		/* TODO: check: */
+	}
+}
+
 static void dump_reg_model(FILE *f, struct starpu_perfmodel *model, int comb, int impl)
 static void dump_reg_model(FILE *f, struct starpu_perfmodel *model, int comb, int impl)
 {
 {
 	struct starpu_perfmodel_per_arch *per_arch_model;
 	struct starpu_perfmodel_per_arch *per_arch_model;
@@ -416,6 +474,15 @@ static void scan_reg_model(FILE *f, const char *path, struct starpu_perfmodel_re
 
 
 
 
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
+static void check_history_entry(struct starpu_perfmodel_history_entry *entry)
+{
+	STARPU_ASSERT_MSG(entry->deviation >= 0, "entry=%p, entry->deviation=%lf\n", entry, entry->deviation);
+	STARPU_ASSERT_MSG(entry->sum >= 0, "entry=%p, entry->sum=%lf\n", entry, entry->sum);
+	STARPU_ASSERT_MSG(entry->sum2 >= 0, "entry=%p, entry->sum2=%lf\n", entry, entry->sum2);
+	STARPU_ASSERT_MSG(entry->mean >= 0, "entry=%p, entry->mean=%lf\n", entry, entry->mean);
+	STARPU_ASSERT_MSG(isnan(entry->flops)||entry->flops >= 0, "entry=%p, entry->flops=%lf\n", entry, entry->flops);
+	STARPU_ASSERT_MSG(entry->duration >= 0, "entry=%p, entry->duration=%lf\n", entry, entry->duration);
+}
 static void dump_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
 static void dump_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
 {
 {
 	fprintf(f, "%08x\t%-15lu\t%-15e\t%-15e\t%-15e\t%-15e\t%-15e\t%u\n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
 	fprintf(f, "%08x\t%-15lu\t%-15e\t%-15e\t%-15e\t%-15e\t%-15e\t%u\n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
@@ -458,6 +525,11 @@ static void scan_history_entry(FILE *f, const char *path, struct starpu_perfmode
 
 
 	if (entry)
 	if (entry)
 	{
 	{
+		STARPU_ASSERT_MSG(flops >=0, "Negative flops %lf in performance model file %s", flops, path);
+		STARPU_ASSERT_MSG(mean >=0, "Negative mean %lf in performance model file %s", mean, path);
+		STARPU_ASSERT_MSG(deviation >=0, "Negative deviation %lf in performance model file %s", deviation, path);
+		STARPU_ASSERT_MSG(sum >=0, "Negative sum %lf in performance model file %s", sum, path);
+		STARPU_ASSERT_MSG(sum2 >=0, "Negative sum2 %lf in performance model file %s", sum2, path);
 		entry->footprint = footprint;
 		entry->footprint = footprint;
 		entry->size = size;
 		entry->size = size;
 		entry->flops = flops;
 		entry->flops = flops;
@@ -487,7 +559,7 @@ static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_p
 		struct starpu_perfmodel_history_entry *entry = NULL;
 		struct starpu_perfmodel_history_entry *entry = NULL;
 		if (scan_history)
 		if (scan_history)
 		{
 		{
-			_STARPU_MALLOC(entry, sizeof(struct starpu_perfmodel_history_entry));
+			_STARPU_CALLOC(entry, 1, sizeof(struct starpu_perfmodel_history_entry));
 
 
 			/* Tell  helgrind that we do not care about
 			/* Tell  helgrind that we do not care about
 			 * racing access to the sampling, we only want a
 			 * racing access to the sampling, we only want a
@@ -660,6 +732,43 @@ static int parse_model_file(FILE *f, const char *path, struct starpu_perfmodel *
 }
 }
 
 
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
+static void check_per_arch_model(struct starpu_perfmodel *model, int comb, unsigned impl)
+{
+	struct starpu_perfmodel_per_arch *per_arch_model;
+
+	per_arch_model = &model->state->per_arch[comb][impl];
+	/* count the number of elements in the lists */
+	struct starpu_perfmodel_history_list *ptr = NULL;
+	unsigned nentries = 0;
+
+	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
+	{
+		/* Dump the list of all entries in the history */
+		ptr = per_arch_model->list;
+		while(ptr)
+		{
+			nentries++;
+			ptr = ptr->next;
+		}
+	}
+
+	/* header */
+	char archname[32];
+	starpu_perfmodel_get_arch_name(arch_combs[comb], archname,  32, impl);
+	STARPU_ASSERT(strlen(archname)>0);
+	check_reg_model(model, comb, impl);
+
+	/* Dump the history into the model file in case it is necessary */
+	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
+	{
+		ptr = per_arch_model->list;
+		while (ptr)
+		{
+			check_history_entry(ptr->entry);
+			ptr = ptr->next;
+		}
+	}
+}
 static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, int comb, unsigned impl)
 static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, int comb, unsigned impl)
 {
 {
 	struct starpu_perfmodel_per_arch *per_arch_model;
 	struct starpu_perfmodel_per_arch *per_arch_model;
@@ -704,6 +813,39 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, in
 	fprintf(f, "\n");
 	fprintf(f, "\n");
 }
 }
 
 
+static void check_model(struct starpu_perfmodel *model)
+{
+	int ncombs = model->state->ncombs;
+	STARPU_ASSERT(ncombs >= 0);
+
+	int i, impl, dev;
+	for(i = 0; i < ncombs; i++)
+	{
+		int comb = model->state->combs[i];
+		STARPU_ASSERT(comb >= 0);
+
+		int ndevices = arch_combs[comb]->ndevices;
+		STARPU_ASSERT(ndevices >= 1);
+
+		for(dev = 0; dev < ndevices; dev++)
+		{
+			STARPU_ASSERT(arch_combs[comb]->devices[dev].type >= 0);
+			STARPU_ASSERT(arch_combs[comb]->devices[dev].type <= 5);
+
+			STARPU_ASSERT(arch_combs[comb]->devices[dev].devid >= 0);
+
+			STARPU_ASSERT(arch_combs[comb]->devices[dev].ncores >= 0);
+		}
+
+		int nimpls = model->state->nimpls[comb];
+		STARPU_ASSERT(nimpls >= 1);
+		for (impl = 0; impl < nimpls; impl++)
+		{
+			check_per_arch_model(model, comb, impl);
+		}
+	}
+}
+
 static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 {
 {
 	fprintf(f, "##################\n");
 	fprintf(f, "##################\n");
@@ -873,6 +1015,7 @@ static void save_history_based_model(struct starpu_perfmodel *model)
 	STARPU_ASSERT_MSG(f, "Could not save performance model %s\n", path);
 	STARPU_ASSERT_MSG(f, "Could not save performance model %s\n", path);
 
 
 	locked = _starpu_fwrlock(f) == 0;
 	locked = _starpu_fwrlock(f) == 0;
+	check_model(model);
 	_starpu_fftruncate(f, 0);
 	_starpu_fftruncate(f, 0);
 	dump_model_file(f, model);
 	dump_model_file(f, model);
 	if (locked)
 	if (locked)
@@ -1423,6 +1566,7 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, s
 	history = per_arch_model->history;
 	history = per_arch_model->history;
 	HASH_FIND_UINT32_T(history, &key, elt);
 	HASH_FIND_UINT32_T(history, &key, elt);
 	entry = (elt == NULL) ? NULL : elt->history_entry;
 	entry = (elt == NULL) ? NULL : elt->history_entry;
+	STARPU_ASSERT_MSG(!entry || entry->mean >= 0, "entry=%p, entry->mean=%lf\n", entry, entry?entry->mean:NAN);
 	STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock);
 	STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock);
 
 
 	/* Here helgrind would shout that this is unprotected access.
 	/* Here helgrind would shout that this is unprotected access.
@@ -1430,10 +1574,13 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, s
 	 * a good-enough estimation */
 	 * a good-enough estimation */
 
 
 	if (entry && entry->nsample >= _starpu_calibration_minimum)
 	if (entry && entry->nsample >= _starpu_calibration_minimum)
+	{
+		STARPU_ASSERT_MSG(entry->mean >= 0, "entry->mean=%lf\n", entry->mean);
 		/* TODO: report differently if we've scheduled really enough
 		/* TODO: report differently if we've scheduled really enough
 		 * of that task and the scheduler should perhaps put it aside */
 		 * of that task and the scheduler should perhaps put it aside */
 		/* Calibrated enough */
 		/* Calibrated enough */
 		exp = entry->mean;
 		exp = entry->mean;
+	}
 
 
 docal:
 docal:
 	STARPU_HG_DISABLE_CHECKING(model->benchmarking);
 	STARPU_HG_DISABLE_CHECKING(model->benchmarking);
@@ -1447,6 +1594,7 @@ docal:
 		model->benchmarking = 1;
 		model->benchmarking = 1;
 	}
 	}
 
 
+	STARPU_ASSERT_MSG(isnan(exp)||exp >= 0, "exp=%lf\n", exp);
 	return exp;
 	return exp;
 }
 }
 
 
@@ -1470,6 +1618,7 @@ int _starpu_perfmodel_create_comb_if_needed(struct starpu_perfmodel_arch* arch)
 
 
 void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned impl)
 void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned impl)
 {
 {
+	STARPU_ASSERT_MSG(measured >= 0, "measured=%lf\n", measured);
 	if (model)
 	if (model)
 	{
 	{
 		int c;
 		int c;
@@ -1526,7 +1675,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 			if (!entry)
 			if (!entry)
 			{
 			{
 				/* this is the first entry with such a footprint */
 				/* this is the first entry with such a footprint */
-				_STARPU_MALLOC(entry, sizeof(struct starpu_perfmodel_history_entry));
+				_STARPU_CALLOC(entry, 1, sizeof(struct starpu_perfmodel_history_entry));
 
 
 				/* Tell  helgrind that we do not care about
 				/* Tell  helgrind that we do not care about
 				 * racing access to the sampling, we only want a
 				 * racing access to the sampling, we only want a
@@ -1585,7 +1734,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
 
 					unsigned n = entry->nsample;
 					unsigned n = entry->nsample;
 					entry->mean = entry->sum / n;
 					entry->mean = entry->sum / n;
-					entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
+					entry->deviation = sqrt((fabs(entry->sum2 - (entry->sum*entry->sum))/n)/n);
 				}
 				}
 
 
 				if (j->task->flops != 0.)
 				if (j->task->flops != 0.)
@@ -1645,6 +1794,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 			_STARPU_MALLOC(entry->parameters, model->nparameters*sizeof(double));
 			_STARPU_MALLOC(entry->parameters, model->nparameters*sizeof(double));
 			model->parameters(j->task, entry->parameters);
 			model->parameters(j->task, entry->parameters);
 			entry->tag = j->task->tag_id;
 			entry->tag = j->task->tag_id;
+			STARPU_ASSERT(measured >= 0);
 			entry->duration = measured;
 			entry->duration = measured;
 
 
 			struct starpu_perfmodel_history_list *link;
 			struct starpu_perfmodel_history_list *link;

+ 14 - 16
src/core/perfmodel/perfmodel_print.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011, 2013-2016  Université de Bordeaux
  * Copyright (C) 2011, 2013-2016  Université de Bordeaux
- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -39,7 +39,7 @@ void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per
 			if (!parameter)
 			if (!parameter)
 			{
 			{
 				/* There isn't a parameter that is explicitely requested, so we display all parameters */
 				/* There isn't a parameter that is explicitely requested, so we display all parameters */
-				printf("%08x\t%-15lu\t%-15e\t%-15e\t%-15e\t%u\n", entry->footprint,
+				fprintf(output, "%08x\t%-15lu\t%-15e\t%-15e\t%-15e\t%u\n", entry->footprint,
 					(unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->nsample);
 					(unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->nsample);
 			}
 			}
 			else
 			else
@@ -47,12 +47,12 @@ void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per
 				/* only display the parameter that was specifically requested */
 				/* only display the parameter that was specifically requested */
 				if (strcmp(parameter, "mean") == 0)
 				if (strcmp(parameter, "mean") == 0)
 				{
 				{
-					printf("%-15e\n", entry->mean);
+					fprintf(output, "%-15e\n", entry->mean);
 				}
 				}
 
 
 				if (strcmp(parameter, "stddev") == 0)
 				if (strcmp(parameter, "stddev") == 0)
 				{
 				{
-					printf("%-15e\n", entry->deviation);
+					fprintf(output, "%-15e\n", entry->deviation);
 					return;
 					return;
 				}
 				}
 			}
 			}
@@ -113,7 +113,7 @@ void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmo
 #if 0
 #if 0
 		char debugname[1024];
 		char debugname[1024];
 		starpu_perfmodel_debugfilepath(model, arch, debugname, 1024, nimpl);
 		starpu_perfmodel_debugfilepath(model, arch, debugname, 1024, nimpl);
-		printf("\t debug file path : %s\n", debugname);
+		_STARPU_MSG("\t debug file path : %s\n", debugname);
 #endif
 #endif
 	}
 	}
 	else
 	else
@@ -121,31 +121,31 @@ void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmo
 		/* only display the parameter that was specifically requested */
 		/* only display the parameter that was specifically requested */
 		if (strcmp(parameter, "a") == 0)
 		if (strcmp(parameter, "a") == 0)
 		{
 		{
-			printf("%e\n", arch_model->regression.a);
+			fprintf(output, "%e\n", arch_model->regression.a);
 			return;
 			return;
 		}
 		}
 
 
 		if (strcmp(parameter, "b") == 0)
 		if (strcmp(parameter, "b") == 0)
 		{
 		{
-			printf("%e\n", arch_model->regression.b);
+			fprintf(output, "%e\n", arch_model->regression.b);
 			return;
 			return;
 		}
 		}
 
 
 		if (strcmp(parameter, "c") == 0)
 		if (strcmp(parameter, "c") == 0)
 		{
 		{
-			printf("%e\n", arch_model->regression.c);
+			fprintf(output, "%e\n", arch_model->regression.c);
 			return;
 			return;
 		}
 		}
 
 
 		if (strcmp(parameter, "alpha") == 0)
 		if (strcmp(parameter, "alpha") == 0)
 		{
 		{
-			printf("%e\n", arch_model->regression.alpha);
+			fprintf(output, "%e\n", arch_model->regression.alpha);
 			return;
 			return;
 		}
 		}
 
 
 		if (strcmp(parameter, "beta") == 0)
 		if (strcmp(parameter, "beta") == 0)
 		{
 		{
-			printf("%e\n", arch_model->regression.beta);
+			fprintf(output, "%e\n", arch_model->regression.beta);
 			return;
 			return;
 		}
 		}
 
 
@@ -153,7 +153,7 @@ void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmo
 		{
 		{
 			char debugname[256];
 			char debugname[256];
 			starpu_perfmodel_debugfilepath(model, arch, debugname, 256, nimpl);
 			starpu_perfmodel_debugfilepath(model, arch, debugname, 256, nimpl);
-			printf("%s\n", debugname);
+			fprintf(output, "%s\n", debugname);
 			return;
 			return;
 		}
 		}
 
 
@@ -165,8 +165,7 @@ void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmo
 
 
 		/* TODO display if it's valid ? */
 		/* TODO display if it's valid ? */
 
 
-		fprintf(output, "Unknown parameter requested, aborting.\n");
-		exit(-1);
+		_STARPU_ERROR("Unknown parameter requested, aborting.\n");
 	}
 	}
 }
 }
 
 
@@ -209,8 +208,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 			/* For combined CPU workers */
 			/* For combined CPU workers */
 			if ((k < 1) || (k > STARPU_MAXCPUS))
 			if ((k < 1) || (k > STARPU_MAXCPUS))
 			{
 			{
-				fprintf(output, "Invalid CPU size\n");
-				exit(-1);
+				_STARPU_ERROR("Invalid CPU size\n");
 			}
 			}
 
 
 			int implid;
 			int implid;
@@ -282,7 +280,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 			return 0;
 			return 0;
 		}
 		}
 
 
-		fprintf(output, "Unknown architecture requested\n");
+		_STARPU_MSG("Unknown architecture requested\n");
 		return -1;
 		return -1;
 	}
 	}
 	return 0;
 	return 0;

Diferenças do arquivo suprimidas por serem muito extensas
+ 827 - 629
src/core/sched_ctx.c


+ 66 - 46
src/core/sched_ctx.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011, 2013  INRIA
+ * Copyright (C) 2011, 2013, 2017  INRIA
  * Copyright (C) 2016  Uppsala University
  * Copyright (C) 2016  Uppsala University
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -74,27 +74,12 @@ struct _starpu_sched_ctx
 	long iterations[2];
 	long iterations[2];
 	int iteration_level;
 	int iteration_level;
 
 
-	/* cond to block push when there are no workers in the ctx */
-	starpu_pthread_cond_t no_workers_cond;
-
-	/* mutex to block push when there are no workers in the ctx */
-	starpu_pthread_mutex_t no_workers_mutex;
-
 	/*ready tasks that couldn't be pushed because the ctx has no workers*/
 	/*ready tasks that couldn't be pushed because the ctx has no workers*/
 	struct starpu_task_list empty_ctx_tasks;
 	struct starpu_task_list empty_ctx_tasks;
 
 
-	/* mutext protecting empty_ctx_tasks list */
-	starpu_pthread_mutex_t empty_ctx_mutex;
-
 	/*ready tasks that couldn't be pushed because the the window of tasks was already full*/
 	/*ready tasks that couldn't be pushed because the the window of tasks was already full*/
 	struct starpu_task_list waiting_tasks;
 	struct starpu_task_list waiting_tasks;
 
 
-	/* mutext protecting waiting_tasks list */
-	starpu_pthread_mutex_t waiting_tasks_mutex;
-
-	/* mutext protecting write to all worker's sched_ctx_list structure for this sched_ctx */
-	starpu_pthread_mutex_t sched_ctx_list_mutex;
-
 	/* min CPUs to execute*/
 	/* min CPUs to execute*/
 	int min_ncpus;
 	int min_ncpus;
 
 
@@ -143,27 +128,6 @@ struct _starpu_sched_ctx
 	   if not master is -1 */
 	   if not master is -1 */
 	int main_master;
 	int main_master;
 
 
-	/* conditions variables used when parallel sections are executed in contexts */
-	starpu_pthread_cond_t parallel_sect_cond[STARPU_NMAXWORKERS];
-	starpu_pthread_mutex_t parallel_sect_mutex[STARPU_NMAXWORKERS];
-	starpu_pthread_cond_t parallel_sect_cond_busy[STARPU_NMAXWORKERS];
-	int busy[STARPU_NMAXWORKERS];
-
-	/* boolean indicating that workers should block in order to allow
-	   parallel sections to be executed on their allocated resources */
-	unsigned parallel_sect[STARPU_NMAXWORKERS];
-
-	/* semaphore that block appl thread until starpu threads are
-	   all blocked and ready to exec the parallel code */
-	starpu_sem_t fall_asleep_sem[STARPU_NMAXWORKERS];
-
-	/* semaphore that block appl thread until starpu threads are 
-	   all woke up and ready continue appl */
-	starpu_sem_t wake_up_sem[STARPU_NMAXWORKERS];
-
-	/* bool indicating if the workers is sleeping in this ctx */
-	unsigned sleeping[STARPU_NMAXWORKERS];
-
 	/* ctx nesting the current ctx */
 	/* ctx nesting the current ctx */
 	unsigned nesting_sched_ctx;
 	unsigned nesting_sched_ctx;
 
 
@@ -191,8 +155,21 @@ struct _starpu_sched_ctx
 	int sms_end_idx;
 	int sms_end_idx;
 
 
 	int stream_worker;
 	int stream_worker;
+
+	starpu_pthread_rwlock_t rwlock;
+	starpu_pthread_t lock_write_owner;
 };
 };
 
 
+/* per-worker list of deferred ctx_change ops */
+LIST_TYPE(_starpu_ctx_change,
+	int sched_ctx_id;
+	int op;
+	int nworkers_to_notify;
+	int *workerids_to_notify;
+	int nworkers_to_change;
+	int *workerids_to_change;
+);
+
 struct _starpu_machine_config;
 struct _starpu_machine_config;
 
 
 /* init sched_ctx_id of all contextes*/
 /* init sched_ctx_id of all contextes*/
@@ -242,19 +219,10 @@ void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker
 /* Check if the worker belongs to another sched_ctx */
 /* Check if the worker belongs to another sched_ctx */
 unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id);
 unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id);
 
 
-/* mutex synchronising several simultaneous modifications of a context */
-starpu_pthread_rwlock_t* _starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id);
-
 /* indicates wheather this worker should go to sleep or not 
 /* indicates wheather this worker should go to sleep or not 
    (if it is the last one awake in a context he should better keep awake) */
    (if it is the last one awake in a context he should better keep awake) */
 unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker);
 unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker);
 
 
-/* let the appl know that the worker blocked to execute parallel code */
-void _starpu_sched_ctx_signal_worker_blocked(unsigned sched_ctx_id, int workerid);
-
-/* let the appl know that the worker woke up */
-void _starpu_sched_ctx_signal_worker_woke_up(unsigned sched_ctx_id, int workerid);
-
 /* If starpu_sched_ctx_set_context() has been called, returns the context
 /* If starpu_sched_ctx_set_context() has been called, returns the context
  * id set by its last call, or the id of the initial context */
  * id set by its last call, or the id of the initial context */
 unsigned _starpu_sched_ctx_get_current_context();
 unsigned _starpu_sched_ctx_get_current_context();
@@ -273,10 +241,62 @@ void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task,
 
 
 #endif //STARPU_USE_SC_HYPERVISOR
 #endif //STARPU_USE_SC_HYPERVISOR
 
 
+void starpu_sched_ctx_add_combined_workers(int *combined_workers_to_add, unsigned n_combined_workers_to_add, unsigned sched_ctx_id);
+
 /* if the worker is the master of a parallel context, and the job is meant to be executed on this parallel context, return a pointer to the context */
 /* if the worker is the master of a parallel context, and the job is meant to be executed on this parallel context, return a pointer to the context */
 struct _starpu_sched_ctx *__starpu_sched_ctx_get_sched_ctx_for_worker_and_job(struct _starpu_worker *worker, struct _starpu_job *j);
 struct _starpu_sched_ctx *__starpu_sched_ctx_get_sched_ctx_for_worker_and_job(struct _starpu_worker *worker, struct _starpu_job *j);
 
 
 #define _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(w,j) \
 #define _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(w,j) \
 	(_starpu_get_nsched_ctxs() <= 1 ? _starpu_get_sched_ctx_struct(0) : __starpu_sched_ctx_get_sched_ctx_for_worker_and_job((w),(j)))
 	(_starpu_get_nsched_ctxs() <= 1 ? _starpu_get_sched_ctx_struct(0) : __starpu_sched_ctx_get_sched_ctx_for_worker_and_job((w),(j)))
 
 
+static inline struct _starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id);
+
+static inline int _starpu_sched_ctx_check_write_locked(unsigned sched_ctx_id)
+{
+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
+	return starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self());
+}
+#define STARPU_SCHED_CTX_CHECK_LOCK(sched_ctx_id) STARPU_ASSERT(_starpu_sched_ctx_check_write_locked((sched_ctx_id)))
+
+static inline void _starpu_sched_ctx_lock_write(unsigned sched_ctx_id)
+{
+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
+	STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner);
+	STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self()));
+	STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner);
+	STARPU_PTHREAD_RWLOCK_WRLOCK(&sched_ctx->rwlock);
+	sched_ctx->lock_write_owner = starpu_pthread_self();
+}
+
+static inline void _starpu_sched_ctx_unlock_write(unsigned sched_ctx_id)
+{
+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
+	STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner);
+	STARPU_ASSERT(starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self()));
+	memset(&sched_ctx->lock_write_owner, 0, sizeof(sched_ctx->lock_write_owner));
+	STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner);
+	STARPU_PTHREAD_RWLOCK_UNLOCK(&sched_ctx->rwlock);
+}
+
+static inline void _starpu_sched_ctx_lock_read(unsigned sched_ctx_id)
+{
+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
+	STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner);
+	STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self()));
+	STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner);
+	STARPU_PTHREAD_RWLOCK_RDLOCK(&sched_ctx->rwlock);
+}
+
+static inline void _starpu_sched_ctx_unlock_read(unsigned sched_ctx_id)
+{
+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
+	STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner);
+	STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self()));
+	STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner);
+	STARPU_PTHREAD_RWLOCK_UNLOCK(&sched_ctx->rwlock);
+}
+
+/* Go through the list of deferred ctx changes of the current worker and apply
+ * any ctx change operation found until the list is empty */
+void _starpu_worker_apply_deferred_ctx_changes(void);
 #endif // __SCHED_CONTEXT_H__
 #endif // __SCHED_CONTEXT_H__

+ 3 - 2
src/core/sched_ctx_list.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011, 2013  INRIA
+ * Copyright (C) 2011, 2013, 2017  INRIA
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -253,7 +253,8 @@ void _starpu_sched_ctx_list_remove_elt(struct _starpu_sched_ctx_list **list,
 		if (parent->prev == NULL)
 		if (parent->prev == NULL)
 		{
 		{
 			*list = parent->next;
 			*list = parent->next;
-			parent->next->prev = NULL;
+			if (parent->next != NULL)
+				parent->next->prev = NULL;
 		}
 		}
 		else
 		else
 		{
 		{

+ 0 - 0
src/core/sched_policy.c


Alguns arquivos não foram mostrados porque muitos arquivos mudaram nesse diff