Pārlūkot izejas kodu

Merge branch 'master' into fpga

Nathalie Furmento 6 gadi atpakaļ
vecāks
revīzija
1bef44b8f4
100 mainītis faili ar 1312 papildinājumiem un 593 dzēšanām
  1. 2 0
      ChangeLog
  2. 30 17
      configure.ac
  3. 87 2
      doc/doxygen/chapters/380_offline_performance_tools.doxy
  4. 3 2
      doc/doxygen/chapters/470_simgrid.doxy
  5. 22 1
      doc/doxygen/chapters/api/threads.doxy
  6. BIN
      doc/doxygen/chapters/images/starvz_visu.eps
  7. BIN
      doc/doxygen/chapters/images/starvz_visu.png
  8. 1 2
      doc/doxygen_dev/Makefile.am
  9. 2 2
      examples/cholesky/cholesky.sh
  10. 9 9
      examples/heat/heat.sh
  11. 11 11
      examples/lu/lu.sh
  12. 19 3
      examples/mult/xgemm.c
  13. 2 2
      examples/scheduler/schedulers.sh
  14. 2 2
      examples/scheduler/schedulers_context.sh
  15. 5 1
      include/starpu_config.h.in
  16. 8 2
      include/starpu_task.h
  17. 10 4
      include/starpu_thread.h
  18. 53 5
      include/starpu_thread_util.h
  19. 21 1
      m4/libs.m4
  20. 1 5
      mpi/examples/Makefile.am
  21. 3 3
      mpi/examples/perf.sh
  22. 2 0
      mpi/examples/user_datatype/my_interface.h
  23. 70 34
      mpi/src/mpi/starpu_mpi_mpi.c
  24. 8 8
      mpi/src/mpi/starpu_mpi_mpi_backend.c
  25. 8 3
      mpi/src/nmad/starpu_mpi_nmad.c
  26. 4 4
      mpi/src/starpu_mpi.c
  27. 17 17
      mpi/src/starpu_mpi_req.c
  28. 23 23
      mpi/tests/Makefile.am
  29. 4 1
      mpi/tests/driver.c
  30. 4 1
      mpi/tests/mpi_earlyrecv.c
  31. 2 2
      mpi/tests/mpi_earlyrecv2.c
  32. 2 2
      mpi/tests/mpi_earlyrecv2_sync.c
  33. 4 1
      mpi/tests/mpi_test.c
  34. 4 1
      mpi/tests/multiple_send.c
  35. 4 4
      mpi/tests/pingpong.c
  36. 10 1
      mpi/tests/sendrecv_bench.c
  37. 2 1
      src/common/fxt.c
  38. 2 2
      src/common/graph.c
  39. 7 2
      src/common/list.h
  40. 14 4
      src/common/prio_list.h
  41. 21 0
      src/common/rbtree.h
  42. 6 3
      src/common/thread.c
  43. 10 10
      src/core/dependencies/cg.c
  44. 2 2
      src/core/dependencies/cg.h
  45. 44 16
      src/core/dependencies/implicit_data_deps.c
  46. 2 2
      src/core/dependencies/implicit_data_deps.h
  47. 6 6
      src/core/dependencies/tags.c
  48. 5 7
      src/core/jobs.c
  49. 8 8
      src/core/perfmodel/perfmodel_history.c
  50. 36 9
      src/core/simgrid.c
  51. 15 2
      src/core/simgrid.h
  52. 28 9
      src/core/simgrid_cpp.cpp
  53. 27 11
      src/core/task.c
  54. 5 5
      src/core/task_bundle.c
  55. 11 5
      src/core/topology.c
  56. 2 2
      src/core/workers.h
  57. 49 38
      src/datawizard/filters.c
  58. 2 1
      src/datawizard/interfaces/bcsr_filters.c
  59. 53 51
      src/datawizard/interfaces/data_interface.c
  60. 6 6
      src/datawizard/memstats.c
  61. 33 15
      src/datawizard/user_interactions.c
  62. 7 7
      src/drivers/mpi/driver_mpi_source.h
  63. 2 2
      src/profiling/bound.c
  64. 2 2
      src/util/openmp_runtime_support.c
  65. 11 1
      tests/Makefile.am
  66. 2 2
      tests/cholesky/sched.sh
  67. 3 3
      tests/cholesky/sched_one_gpu.sh
  68. 2 1
      tests/cholesky_ctxs/evaluate_expression.sh
  69. 24 24
      tests/coverage/coverage.sh
  70. 51 4
      tests/datawizard/acquire_cb.c
  71. 1 1
      tests/datawizard/interfaces/test_interfaces.sh
  72. 3 3
      tests/datawizard/locality.sh
  73. 5 5
      tests/experiments/bandwidth_cuda/bench_bandwidth.sh
  74. 2 2
      tests/heat/deps.sh
  75. 4 4
      tests/heat/dmda.sh
  76. 2 2
      tests/heat/gflops.sh
  77. 2 2
      tests/heat/gflops_sched.sh
  78. 9 9
      tests/heat/granularity.sh
  79. 5 5
      tests/heat/heat.sh
  80. 2 2
      tests/heat/model_perturbation.sh
  81. 3 3
      tests/heat/sched.sh
  82. 2 2
      tests/heat/speedup.sh
  83. 2 2
      tests/incrementer/speed.sh
  84. 51 13
      tests/loader.c
  85. 2 2
      tests/main/combined_workers/bfs/run.sh
  86. 2 2
      tests/memory/memstress.sh
  87. 2 2
      tests/memory/memstress2.sh
  88. 19 0
      tests/microbenchs/async_tasks_data_overhead.sh
  89. 53 25
      tests/microbenchs/async_tasks_overhead.c
  90. 2 2
      tests/microbenchs/microbench.sh
  91. 2 2
      tests/microbenchs/starpu_check.sh
  92. 19 0
      tests/microbenchs/sync_tasks_data_overhead.sh
  93. 70 15
      tests/microbenchs/sync_tasks_overhead.c
  94. 19 0
      tests/microbenchs/tasks_data_overhead.sh
  95. 63 42
      tests/microbenchs/tasks_overhead.c
  96. 2 2
      tests/microbenchs/tasks_size_overhead.sh
  97. 2 2
      tests/microbenchs/tasks_size_overhead_sched.sh
  98. 3 1
      tests/microbenchs/tasks_size_overhead_scheds.sh
  99. 2 2
      tests/mult/gflops.sh
  100. 0 0
      tests/mult/sched.sh

+ 2 - 0
ChangeLog

@@ -43,6 +43,8 @@ Small features:
   * Move optimized cuda 2d copy from interfaces to new
   * Move optimized cuda 2d copy from interfaces to new
     starpu_cuda_copy2d_async_sync and starpu_cuda_copy3d_async_sync, and use
     starpu_cuda_copy2d_async_sync and starpu_cuda_copy3d_async_sync, and use
     them from starpu_interface_copy2d and 3d.
     them from starpu_interface_copy2d and 3d.
+  * New function starpu_task_watchdog_set_hook to specify a function
+    to be called when the watchdog is raised
 
 
 StarPU 1.3.3 (git revision 11afc5b007fe1ab1c729b55b47a5a98ef7f3cfad)
 StarPU 1.3.3 (git revision 11afc5b007fe1ab1c729b55b47a5a98ef7f3cfad)
 ====================================================================
 ====================================================================

+ 30 - 17
configure.ac

@@ -273,34 +273,38 @@ if test x$enable_simgrid = xyes ; then
 		]
 		]
 	)
 	)
 	AC_CHECK_HEADERS([simgrid/msg.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MSG_H], [1], [Define to 1 if you have msg.h in simgrid/.])])
 	AC_CHECK_HEADERS([simgrid/msg.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MSG_H], [1], [Define to 1 if you have msg.h in simgrid/.])])
+	AC_CHECK_HEADERS([msg/msg.h], [AC_DEFINE([STARPU_HAVE_MSG_MSG_H], [1], [Define to 1 if you have msg.h in msg/.])])
 	AC_CHECK_HEADERS([simgrid/host.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_HOST_H], [1], [Define to 1 if you have host.h in simgrid/.])])
 	AC_CHECK_HEADERS([simgrid/host.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_HOST_H], [1], [Define to 1 if you have host.h in simgrid/.])])
+	AC_CHECK_HEADERS([xbt/base.h], [AC_DEFINE([STARPU_HAVE_XBT_BASE_H], [1], [Define to 1 if you have base.h in xbt/.])])
+	AC_CHECK_HEADERS([simgrid/version.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_VERSION_H], [1], [Define to 1 if you have version.h in simgrid/.])], [], [[
+			  #ifdef STARPU_HAVE_XBT_BASE_H
+			  #include <xbt/base.h>
+			  #endif
+			  ]])
 	AC_CHECK_HEADERS([simgrid/simdag.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_SIMDAG_H], [1], [Define to 1 if you have simdag.h in simgrid/.])])
 	AC_CHECK_HEADERS([simgrid/simdag.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_SIMDAG_H], [1], [Define to 1 if you have simdag.h in simgrid/.])])
 	AC_CHECK_HEADERS([xbt/synchro.h], [AC_DEFINE([STARPU_HAVE_XBT_SYNCHRO_H], [1], [Define to 1 if you have synchro.h in xbt/.])])
 	AC_CHECK_HEADERS([xbt/synchro.h], [AC_DEFINE([STARPU_HAVE_XBT_SYNCHRO_H], [1], [Define to 1 if you have synchro.h in xbt/.])])
+	AC_CHECK_HEADERS([xbt/config.h], [AC_DEFINE([STARPU_HAVE_XBT_CONFIG_H], [1], [Define to 1 if you have config.h in xbt/.])])
+	AC_CHECK_HEADERS([simgrid/actor.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ACTOR_H], [1], [Define to 1 if you have actor.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/engine.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ENGINE_H], [1], [Define to 1 if you have engine.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/semaphore.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_SEMAPHORE_H], [1], [Define to 1 if you have semaphore.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/mutex.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MUTEX_H], [1], [Define to 1 if you have mutex.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/cond.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_COND_H], [1], [Define to 1 if you have cond.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/barrier.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_BARRIER_H], [1], [Define to 1 if you have barrier.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/engine.h])
+	AC_CHECK_HEADERS([simgrid/zone.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ZONE_H], [1], [Define to 1 if you have zone.h in simgrid/.])])
 	AC_CHECK_TYPES([smx_actor_t], [AC_DEFINE([STARPU_HAVE_SMX_ACTOR_T], [1], [Define to 1 if you have the smx_actor_t type.])], [], [[#include <simgrid/simix.h>]])
 	AC_CHECK_TYPES([smx_actor_t], [AC_DEFINE([STARPU_HAVE_SMX_ACTOR_T], [1], [Define to 1 if you have the smx_actor_t type.])], [], [[#include <simgrid/simix.h>]])
 
 
 	# Latest functions
 	# Latest functions
-	AC_CHECK_FUNCS([MSG_process_attach sg_actor_attach sg_actor_init MSG_zone_get_hosts MSG_process_self_name MSG_process_userdata_init sg_actor_data])
-	AC_CHECK_FUNCS([xbt_mutex_try_acquire smpi_process_set_user_data sg_zone_get_by_name sg_link_name sg_host_route sg_host_self sg_host_speed simcall_process_create sg_config_continue_after_help])
+	AC_CHECK_FUNCS([MSG_process_attach sg_actor_attach sg_actor_init MSG_zone_get_hosts sg_zone_get_hosts MSG_process_self_name MSG_process_userdata_init sg_actor_data])
+	AC_CHECK_FUNCS([xbt_mutex_try_acquire smpi_process_set_user_data SMPI_thread_create sg_zone_get_by_name sg_link_name sg_host_route sg_host_self sg_host_speed simcall_process_create sg_config_continue_after_help])
+	AC_CHECK_FUNCS([simgrid_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_SIMGRID_INIT], [1], [Define to 1 if you have the `simgrid_init' function.])])
 	AC_CHECK_FUNCS([xbt_barrier_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT], [1], [Define to 1 if you have the `xbt_barrier_init' function.])])
 	AC_CHECK_FUNCS([xbt_barrier_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT], [1], [Define to 1 if you have the `xbt_barrier_init' function.])])
+	AC_CHECK_FUNCS([sg_actor_sleep_for sg_actor_self sg_actor_ref sg_host_get_properties sg_host_send_to sg_host_sendto sg_cfg_set_int sg_actor_self_execute simgrid_get_clock])
 	AC_CHECK_DECLS([smpi_process_set_user_data], [], [], [[#include <smpi/smpi.h>]])
 	AC_CHECK_DECLS([smpi_process_set_user_data], [], [], [[#include <smpi/smpi.h>]])
 
 
 	# Oldies for compatibility with older simgrid
 	# Oldies for compatibility with older simgrid
 	AC_CHECK_FUNCS([MSG_get_as_by_name MSG_zone_get_by_name MSG_environment_get_routing_root MSG_host_get_speed])
 	AC_CHECK_FUNCS([MSG_get_as_by_name MSG_zone_get_by_name MSG_environment_get_routing_root MSG_host_get_speed])
 
 
-	AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
-		    		[[
-#ifdef STARPU_HAVE_SIMGRID_MSG_H
-#include <simgrid/msg.h>
-#else
-#include <msg/msg.h>
-#endif
-				 ]],
-				[[msg_host_t foo; ]]
-			    )],
-	                 [],
-	                 [
-			   AC_MSG_ERROR(StarPU needs a version of Simgrid which defines the type msg_host_t (should be any version >= 3.8.1))
-		         ])
 	AC_DEFINE(STARPU_SIMGRID, [1], [Define this to enable simgrid execution])
 	AC_DEFINE(STARPU_SIMGRID, [1], [Define this to enable simgrid execution])
 	# We won't bind or detect anything
 	# We won't bind or detect anything
 	with_hwloc=no
 	with_hwloc=no
@@ -727,7 +731,7 @@ fi
 if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then
 if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then
     if test x$enable_simgrid = xyes ; then
     if test x$enable_simgrid = xyes ; then
         if test x$enable_shared = xyes ; then
         if test x$enable_shared = xyes ; then
-	    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, use --disable-shared to fix this])
+	    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, use --disable-shared to fix this, or disable MPI with --disable-mpi])
         else
         else
 	    CFLAGS="$CFLAGS -fPIC"
 	    CFLAGS="$CFLAGS -fPIC"
 	    CXXFLAGS="$CXXFLAGS -fPIC"
 	    CXXFLAGS="$CXXFLAGS -fPIC"
@@ -920,6 +924,12 @@ if test x$have_pthread_setname_np = xyes; then
 	AC_DEFINE(STARPU_HAVE_PTHREAD_SETNAME_NP,[1],[pthread_setname_np is available])
 	AC_DEFINE(STARPU_HAVE_PTHREAD_SETNAME_NP,[1],[pthread_setname_np is available])
 fi
 fi
 
 
+if test "x$cross_compiling" = "xno"; then
+	STARPU_INIT_ZERO([[#include <pthread.h>]], pthread_mutex_t, PTHREAD_MUTEX_INITIALIZER)
+	STARPU_INIT_ZERO([[#include <pthread.h>]], pthread_cond_t, PTHREAD_COND_INITIALIZER)
+	STARPU_INIT_ZERO([[#include <pthread.h>]], pthread_rwlock_t, PTHREAD_RWLOCK_INITIALIZER)
+fi
+
 # There is no posix_memalign on Mac OS X, only memalign
 # There is no posix_memalign on Mac OS X, only memalign
 AC_CHECK_FUNCS([posix_memalign], [AC_DEFINE([STARPU_HAVE_POSIX_MEMALIGN], [1], [Define to 1 if you have the `posix_memalign' function.])])
 AC_CHECK_FUNCS([posix_memalign], [AC_DEFINE([STARPU_HAVE_POSIX_MEMALIGN], [1], [Define to 1 if you have the `posix_memalign' function.])])
 AC_CHECK_FUNCS([memalign], [AC_DEFINE([STARPU_HAVE_MEMALIGN], [1], [Define to 1 if you have the `memalign' function.])])
 AC_CHECK_FUNCS([memalign], [AC_DEFINE([STARPU_HAVE_MEMALIGN], [1], [Define to 1 if you have the `memalign' function.])])
@@ -3564,6 +3574,9 @@ AC_CONFIG_COMMANDS([executable-scripts], [
   chmod +x doc/doxygen/doxygen_filter.sh
   chmod +x doc/doxygen/doxygen_filter.sh
   chmod +x doc/doxygen_dev/doxygen_filter.sh
   chmod +x doc/doxygen_dev/doxygen_filter.sh
   mkdir -p tests/microbenchs
   mkdir -p tests/microbenchs
+  test -e tests/microbenchs/tasks_data_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_data_overhead.sh tests/microbenchs/
+  test -e tests/microbenchs/sync_tasks_data_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/sync_tasks_data_overhead.sh tests/microbenchs/
+  test -e tests/microbenchs/async_tasks_data_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/async_tasks_data_overhead.sh tests/microbenchs/
   test -e tests/microbenchs/tasks_size_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead.sh tests/microbenchs/
   test -e tests/microbenchs/tasks_size_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead.sh tests/microbenchs/
   test -e tests/microbenchs/tasks_size_overhead_sched.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead_sched.sh tests/microbenchs/
   test -e tests/microbenchs/tasks_size_overhead_sched.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead_sched.sh tests/microbenchs/
   test -e tests/microbenchs/tasks_size_overhead_scheds.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead_scheds.sh tests/microbenchs/
   test -e tests/microbenchs/tasks_size_overhead_scheds.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead_scheds.sh tests/microbenchs/

+ 87 - 2
doc/doxygen/chapters/380_offline_performance_tools.doxy

@@ -572,15 +572,37 @@ $ starpu_paje_sort paje.trace
 
 
 \section PapiCounters PAPI counters
 \section PapiCounters PAPI counters
 
 
-Performance counter values can be obtained from the PAPI framework if
+Performance counter values could be obtained from the PAPI framework if
 <c>./configure</c> detected the libpapi. One has to set the \ref STARPU_PROFILING
 <c>./configure</c> detected the libpapi. One has to set the \ref STARPU_PROFILING
-environment variable to 1 and then specify which counters to record with the
+environment variable to 1 and then specify which events to record with the
 \ref STARPU_PROF_PAPI_EVENTS environment variable. For instance:
 \ref STARPU_PROF_PAPI_EVENTS environment variable. For instance:
 
 
 \verbatim
 \verbatim
 export STARPU_PROFILING=1 STARPU_PROF_PAPI_EVENTS="PAPI_TOT_INS PAPI_TOT_CYC"
 export STARPU_PROFILING=1 STARPU_PROF_PAPI_EVENTS="PAPI_TOT_INS PAPI_TOT_CYC"
 \endverbatim
 \endverbatim
 
 
+In the current simple implementation, only CPU tasks have their events measured
+and require CPUs that support the PAPI events. All events that PAPI support are
+available from their documentation (https://icl.cs.utk.edu/projects/papi/wiki/PAPIC:Preset_Event_Definitions).
+It is important to note that not all events are available on all systems, and
+general PAPI recommendations should be followed.
+
+The counter values can be accessed using the profiling interface:
+\code{.c}
+task->profiling_info->papi_values
+\endcode
+Also, it can be accessed and/or saved with tracing when using \ref STARPU_FXT_TRACE. With the use of <c>starpu_fxt_tool</c>
+the file <c>papi.rec</c> is generated containing the following triple:
+
+\verbatim
+Task Id
+Event Id
+Value
+\endverbatim
+
+External tools like <c>rec2csv</c> can be used to convert this rec file to a <c>csv</c>, where each
+line represents a value for an event for a task.
+
 \section TheoreticalLowerBoundOnExecutionTime Theoretical Lower Bound On Execution Time
 \section TheoreticalLowerBoundOnExecutionTime Theoretical Lower Bound On Execution Time
 
 
 StarPU can record a trace of what tasks are needed to complete the
 StarPU can record a trace of what tasks are needed to complete the
@@ -645,6 +667,69 @@ the priorities as the StarPU scheduler would, i.e. schedule prioritized
 tasks before less prioritized tasks, to check to which extend this results
 tasks before less prioritized tasks, to check to which extend this results
 to a less optimal solution. This increases even more computation time.
 to a less optimal solution. This increases even more computation time.
 
 
+\section starvz Trace visualization with StarVZ
+
+Creating views with StarVZ (see: https://github.com/schnorr/starvz) is made up of two steps. The initial
+stage consists of a pre-processing of the traces generated by the application.
+The second step consists of the analysis itself and is carried out with the
+aid of R packages. To download and install StarVZ, it is necessary to have R,
+pajeng and the following packages:
+
+\verbatim
+# For pajeng
+apt install -y git cmake build-essential libboost-dev asciidoc flex bison
+git clone git://github.com/schnorr/pajeng.git
+mkdir -p pajeng/b ; cd pajeng/b
+cmake ..
+make
+
+# For R tidyverse
+apt install -y r-base libxml2-dev libssl-dev libcurl4-openssl-dev libgit2-dev libboost-dev
+\endverbatim
+
+To install the StarVZ the following commands can be used:
+
+\verbatim
+git clone https://github.com/schnorr/starvz.git
+echo "install.packages(c('tidyverse', 'devtools'), repos = 'https://cloud.r-project.org')" | R --vanilla
+echo "library(devtools); devtools::install_local(path='./starvz/R_package')" | R --vanilla
+\endverbatim
+
+To generate traces from an application, it is necessary to set \ref STARPU_GENERATE_TRACE.
+and build StarPU with FxT. Then, Step 1 of StarVZ can be used on a folder with
+StarPU FxT traces:
+
+\verbatim
+export PATH=starvz/:$PATH
+export PATH=pajeng/b:$PATH
+export PATH=$STARPU_HOME/bin:$PATH
+
+./starvz/src/phase1-workflow.sh /tmp/ ""
+\endverbatim
+
+Then the second step can be executed directly in R, StarVZ enables a set of
+different plots that can be configured on a .yaml file. A default file is provided
+<c>full_config.yaml</c>; also the options can be changed directly in R.
+
+\verbatim
+library(starvz)
+dtrace <- the_fast_reader_function("./")
+
+pajer <- config::get(file = "starvz/full_config.yaml")
+
+pajer$starpu$active = TRUE
+pajer$submitted$active = TRUE
+pajer$st$abe$active = TRUE
+
+plot <- the_master_function(dtrace)
+\endverbatim
+
+An example of visualization follows:
+
+\image html starvz_visu.png
+\image latex starvz_visu.eps "" width=\textwidth
+
+
 \section MemoryFeedback Memory Feedback
 \section MemoryFeedback Memory Feedback
 
 
 It is possible to enable memory statistics. To do so, you need to pass
 It is possible to enable memory statistics. To do so, you need to pass

+ 3 - 2
doc/doxygen/chapters/470_simgrid.doxy

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011,2012,2014,2016,2017                 Inria
  * Copyright (C) 2011,2012,2014,2016,2017                 Inria
  * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014-2019                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2020                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -23,7 +23,8 @@
 /*! \page SimGridSupport SimGrid Support
 /*! \page SimGridSupport SimGrid Support
 
 
 StarPU can use Simgrid in order to simulate execution on an arbitrary
 StarPU can use Simgrid in order to simulate execution on an arbitrary
-platform. This was tested with SimGrid from 3.11 to 3.16, and 3.18 to 3.24.
+platform. This was tested with SimGrid from 3.11 to 3.16, and 3.18 to
+3.25. SimGrid versions 3.25 and above need to be configured with -Denable_msg=ON .
 Other versions may have compatibility issues. 3.17 notably does not build at
 Other versions may have compatibility issues. 3.17 notably does not build at
 all. MPI simulation does not work with version 3.22.
 all. MPI simulation does not work with version 3.22.
 
 

+ 22 - 1
doc/doxygen/chapters/api/threads.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2017, 2019                          CNRS
  * Copyright (C) 2010-2017, 2019                          CNRS
- * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014,2016,2020                 Université de Bordeaux
  * Copyright (C) 2011,2012                                Inria
  * Copyright (C) 2011,2012                                Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -35,6 +35,13 @@ Call starpu_pthread_create() and abort on error.
 \ingroup API_Threads
 \ingroup API_Threads
 Call starpu_pthread_mutex_init() and abort on error.
 Call starpu_pthread_mutex_init() and abort on error.
 
 
+\def STARPU_PTHREAD_MUTEX_INIT0
+\ingroup API_Threads
+Call starpu_pthread_mutex_init() only if the content of
+PTHREAD_MUTEX_INITIALIZER is not zero. This should be called instead
+of STARPU_PTHREAD_MUTEX_INIT when it is known that the content of the
+pthread_mutex_t was already zeroed.
+
 \def STARPU_PTHREAD_MUTEX_DESTROY
 \def STARPU_PTHREAD_MUTEX_DESTROY
 \ingroup API_Threads
 \ingroup API_Threads
 Call starpu_pthread_mutex_destroy() and abort on error.
 Call starpu_pthread_mutex_destroy() and abort on error.
@@ -67,6 +74,13 @@ Call starpu_pthread_getspecific() and abort on error.
 \ingroup API_Threads
 \ingroup API_Threads
 Call starpu_pthread_rwlock_init() and abort on error.
 Call starpu_pthread_rwlock_init() and abort on error.
 
 
+\def STARPU_PTHREAD_RWLOCK_INIT0
+\ingroup API_Threads
+Call starpu_pthread_rwlock_init() only if the content of
+PTHREAD_RWLOCK_INITIALIZER is not zero. This should be called instead
+of STARPU_PTHREAD_RWLOCK_INIT when it is known that the content of the
+pthread_rwlock_t was already zeroed.
+
 \def STARPU_PTHREAD_RWLOCK_RDLOCK
 \def STARPU_PTHREAD_RWLOCK_RDLOCK
 \ingroup API_Threads
 \ingroup API_Threads
 Call starpu_pthread_rwlock_rdlock() and abort on error.
 Call starpu_pthread_rwlock_rdlock() and abort on error.
@@ -87,6 +101,13 @@ Call starpu_pthread_rwlock_destroy() and abort on error.
 \ingroup API_Threads
 \ingroup API_Threads
 Call starpu_pthread_cond_init() and abort on error.
 Call starpu_pthread_cond_init() and abort on error.
 
 
+\def STARPU_PTHREAD_COND_INIT0
+\ingroup API_Threads
+Call starpu_pthread_cond_init() only if the content of
+PTHREAD_COND_INITIALIZER is not zero. This should be called instead
+of STARPU_PTHREAD_COND_INIT when it is known that the content of the
+pthread_cond_t was already zeroed.
+
 \def STARPU_PTHREAD_COND_DESTROY
 \def STARPU_PTHREAD_COND_DESTROY
 \ingroup API_Threads
 \ingroup API_Threads
 Call starpu_pthread_cond_destroy() and abort on error.
 Call starpu_pthread_cond_destroy() and abort on error.

BIN
doc/doxygen/chapters/images/starvz_visu.eps


BIN
doc/doxygen/chapters/images/starvz_visu.png


+ 1 - 2
doc/doxygen_dev/Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2010-2018                                CNRS
+# Copyright (C) 2010-2018, 2020                          CNRS
 # Copyright (C) 2013-2018                                Inria
 # Copyright (C) 2013-2018                                Inria
 # Copyright (C) 2009,2011,2013,2014,2017                 Université de Bordeaux
 # Copyright (C) 2009,2011,2013,2014,2017                 Université de Bordeaux
 #
 #
@@ -126,7 +126,6 @@ $(DOX_TAG): $(dox_inputs)
 
 
 $(DOX_PDF): $(DOX_TAG) refman.tex
 $(DOX_PDF): $(DOX_TAG) refman.tex
 	@cp $(top_srcdir)/doc/doxygen_dev/chapters/version.sty $(DOX_LATEX_DIR)
 	@cp $(top_srcdir)/doc/doxygen_dev/chapters/version.sty $(DOX_LATEX_DIR)
-	@-cp $(top_srcdir)/doc/doxygen_dev/chapters/images/*pdf $(DOX_LATEX_DIR)
 	@echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex
 	@echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex
 	@cd $(DOX_LATEX_DIR) ;\
 	@cd $(DOX_LATEX_DIR) ;\
 	rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\
 	rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\

+ 2 - 2
examples/cholesky/cholesky.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2018-2019                                Université de Bordeaux
+# Copyright (C) 2018-2020                                Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -34,7 +34,7 @@ for size in `seq 2 2 30` ; do
 	for STARPU_SCHED in $STARPU_SCHEDS
 	for STARPU_SCHED in $STARPU_SCHEDS
 	do
 	do
 		export STARPU_SCHED
 		export STARPU_SCHED
-		GFLOPS=`${ROOT}_implicit -size $((size * 960)) -nblocks $size 2> /dev/null | grep -v GFlops | cut -d '	' -f 3`
+		GFLOPS=`$STARPU_LAUNCH ${ROOT}_implicit -size $((size * 960)) -nblocks $size 2> /dev/null | grep -v GFlops | cut -d '	' -f 3`
 		[ -n "$GFLOPS" ] || GFLOPS='""'
 		[ -n "$GFLOPS" ] || GFLOPS='""'
 		echo -n "	$GFLOPS"
 		echo -n "	$GFLOPS"
 	done
 	done

+ 9 - 9
examples/heat/heat.sh

@@ -3,7 +3,7 @@
 #
 #
 # Copyright (C) 2017                                     CNRS
 # Copyright (C) 2017                                     CNRS
 # Copyright (C) 2017                                     Inria
 # Copyright (C) 2017                                     Inria
-# Copyright (C) 2017                                     Université de Bordeaux
+# Copyright (C) 2017, 2020                                     Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -28,23 +28,23 @@ if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
 	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/heat" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/heat
 	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/heat" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/heat
 fi
 fi
 
 
-$PREFIX/heat -shape 0
-$PREFIX/heat -shape 1
+$STARPU_LAUNCH $PREFIX/heat -shape 0
+$STARPU_LAUNCH $PREFIX/heat -shape 1
 # sometimes lead to pivot being 0
 # sometimes lead to pivot being 0
-#$PREFIX/heat -shape 2
+#$STARPU_LAUNCH $PREFIX/heat -shape 2
 
 
-$PREFIX/heat -cg
+$STARPU_LAUNCH $PREFIX/heat -cg
 
 
 # TODO: FIXME
 # TODO: FIXME
 
 
 # segfault
 # segfault
-#$PREFIX/heat -v1
+#$STARPU_LAUNCH $PREFIX/heat -v1
 
 
 # (actually the default...)
 # (actually the default...)
-$PREFIX/heat -v2
+$STARPU_LAUNCH $PREFIX/heat -v2
 
 
 # hang
 # hang
-#$PREFIX/heat -v3
+#$STARPU_LAUNCH $PREFIX/heat -v3
 
 
 # hang
 # hang
-#$PREFIX/heat -v4
+#$STARPU_LAUNCH $PREFIX/heat -v4

+ 11 - 11
examples/lu/lu.sh

@@ -2,7 +2,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2017                                     CNRS
 # Copyright (C) 2017                                     CNRS
-# Copyright (C) 2017,2019                                Université de Bordeaux
+# Copyright (C) 2017,2019-2020                                Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -27,11 +27,11 @@ if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
 	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_implicit_example_float" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_implicit_example_float
 	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_implicit_example_float" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_implicit_example_float
 fi
 fi
 
 
-$PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -piv
-$PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -no-stride
-$PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -bound
-$PREFIX/lu_implicit_example_float -size $((160 * 2)) -nblocks 2 -bounddeps
-$PREFIX/lu_implicit_example_float -size $((160 * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio
+$STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -piv
+$STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -no-stride
+$STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -bound
+$STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 2)) -nblocks 2 -bounddeps
+$STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio
 
 
 if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
 if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
 	STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/lu_example_float
 	STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/lu_example_float
@@ -39,8 +39,8 @@ if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
 	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_example_float" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_example_float
 	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_example_float" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_example_float
 fi
 fi
 
 
-$PREFIX/lu_example_float -size $((160 * 4)) -nblocks 4 -piv
-$PREFIX/lu_example_float -size $((160 * 4)) -nblocks 4 -no-stride
-$PREFIX/lu_example_float -size $((160 * 4)) -nblocks 4 -bound
-$PREFIX/lu_example_float -size $((160 * 2)) -nblocks 2 -bounddeps
-$PREFIX/lu_example_float -size $((160 * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio
+$STARPU_LAUNCH $PREFIX/lu_example_float -size $((160 * 4)) -nblocks 4 -piv
+$STARPU_LAUNCH $PREFIX/lu_example_float -size $((160 * 4)) -nblocks 4 -no-stride
+$STARPU_LAUNCH $PREFIX/lu_example_float -size $((160 * 4)) -nblocks 4 -bound
+$STARPU_LAUNCH $PREFIX/lu_example_float -size $((160 * 2)) -nblocks 2 -bounddeps
+$STARPU_LAUNCH $PREFIX/lu_example_float -size $((160 * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio

+ 19 - 3
examples/mult/xgemm.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2017, 2019                                Université de Bordeaux
+ * Copyright (C) 2009-2017,2019-2020                      Université de Bordeaux
  * Copyright (C) 2012,2013                                Inria
  * Copyright (C) 2012,2013                                Inria
  * Copyright (C) 2017                                     Erwan Leria
  * Copyright (C) 2017                                     Erwan Leria
  * Copyright (C) 2010                                     Mehdi Juhoor
  * Copyright (C) 2010                                     Mehdi Juhoor
@@ -29,6 +29,7 @@
 
 
 #include <limits.h>
 #include <limits.h>
 #include <string.h>
 #include <string.h>
+#include <unistd.h>
 #include <math.h>
 #include <math.h>
 #include <sys/types.h>
 #include <sys/types.h>
 #include <starpu.h>
 #include <starpu.h>
@@ -58,6 +59,7 @@ static unsigned zdim = 960*4;
 #endif
 #endif
 static unsigned check = 0;
 static unsigned check = 0;
 static unsigned bound = 0;
 static unsigned bound = 0;
+static unsigned print_hostname = 0;
 
 
 static TYPE *A, *B, *C;
 static TYPE *A, *B, *C;
 static starpu_data_handle_t A_handle, B_handle, C_handle;
 static starpu_data_handle_t A_handle, B_handle, C_handle;
@@ -304,6 +306,11 @@ static void parse_args(int argc, char **argv)
 			bound = 1;
 			bound = 1;
 		}
 		}
 
 
+		else if (strcmp(argv[i], "-hostname") == 0)
+		{
+			print_hostname = 1;
+		}
+
 		else if (strcmp(argv[i], "-check") == 0)
 		else if (strcmp(argv[i], "-check") == 0)
 		{
 		{
 			check = 1;
 			check = 1;
@@ -316,7 +323,7 @@ static void parse_args(int argc, char **argv)
 
 
 		else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0)
 		else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0)
 		{
 		{
-			fprintf(stderr,"Usage: %s [-nblocks n] [-nblocksx x] [-nblocksy y] [-x x] [-y y] [-xy n] [-z z] [-size size] [-iter iter] [-bound] [-check] [-spmd]\n", argv[0]);
+			fprintf(stderr,"Usage: %s [-nblocks n] [-nblocksx x] [-nblocksy y] [-x x] [-y y] [-xy n] [-z z] [-size size] [-iter iter] [-bound] [-check] [-spmd] [-hostname]\n", argv[0]);
 			fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%u blocks, %u iterations\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, niter);
 			fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%u blocks, %u iterations\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, niter);
 			exit(EXIT_SUCCESS);
 			exit(EXIT_SUCCESS);
 		}
 		}
@@ -400,10 +407,19 @@ int main(int argc, char **argv)
 	if (bound)
 	if (bound)
 		starpu_bound_compute(&min, &min_int, 1);
 		starpu_bound_compute(&min, &min_int, 1);
 
 
-	PRINTF("# x\ty\tz\tms\tGFlops");
+	PRINTF("# ");
+	if (print_hostname)
+		PRINTF("node\t");
+	PRINTF("x\ty\tz\tms\tGFlops");
 	if (bound)
 	if (bound)
 		PRINTF("\tTms\tTGFlops\tTims\tTiGFlops");
 		PRINTF("\tTms\tTGFlops\tTims\tTiGFlops");
 	PRINTF("\n");
 	PRINTF("\n");
+	if (print_hostname)
+	{
+		char hostname[255];
+		gethostname(hostname, 255);
+		PRINTF("%s\t", hostname);
+	}
 	PRINTF("%u\t%u\t%u\t%.0f\t%.1f", xdim, ydim, zdim, timing/niter/1000.0, flops/timing/1000.0);
 	PRINTF("%u\t%u\t%u\t%.0f\t%.1f", xdim, ydim, zdim, timing/niter/1000.0, flops/timing/1000.0);
 	if (bound)
 	if (bound)
 		PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0);
 		PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0);

+ 2 - 2
examples/scheduler/schedulers.sh

@@ -3,7 +3,7 @@
 #
 #
 # Copyright (C) 2012                                     Inria
 # Copyright (C) 2012                                     Inria
 # Copyright (C) 2012-2015,2017,2018                      CNRS
 # Copyright (C) 2012-2015,2017,2018                      CNRS
-# Copyright (C) 2012,2017,2019                           Université de Bordeaux
+# Copyright (C) 2012,2017,2019-2020                      Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -40,6 +40,6 @@ fi
 for sched in $SCHEDULERS
 for sched in $SCHEDULERS
 do
 do
     echo "cholesky.$sched"
     echo "cholesky.$sched"
-    STARPU_SCHED=$sched ./cholesky/cholesky_tag -size $((960*3)) -nblocks 3
+    STARPU_SCHED=$sched $STARPU_LAUNCH ./cholesky/cholesky_tag -size $((960*3)) -nblocks 3
     check_success $?
     check_success $?
 done
 done

+ 2 - 2
examples/scheduler/schedulers_context.sh

@@ -2,7 +2,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2012,2014,2015,2017,2018                 CNRS
 # Copyright (C) 2012,2014,2015,2017,2018                 CNRS
-# Copyright (C) 2017,2019                                Université de Bordeaux
+# Copyright (C) 2017,2019-2020                           Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -39,6 +39,6 @@ fi
 for sched in $SCHEDULERS
 for sched in $SCHEDULERS
 do
 do
     echo "sched_ctx.$sched"
     echo "sched_ctx.$sched"
-    STARPU_SCHED=$sched ./sched_ctx/sched_ctx
+    STARPU_SCHED=$sched $STARPU_LAUNCH ./sched_ctx/sched_ctx
     check_success $?
     check_success $?
 done
 done

+ 5 - 1
include/starpu_config.h.in

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011,2012,2014,2016,2017                 Inria
  * Copyright (C) 2011,2012,2014,2016,2017                 Inria
- * Copyright (C) 2009-2019                                Université de Bordeaux
+ * Copyright (C) 2009-2020                                Université de Bordeaux
  * Copyright (C) 2010-2017,2019                           CNRS
  * Copyright (C) 2010-2017,2019                           CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -44,6 +44,7 @@
 #undef STARPU_SIMGRID_MC
 #undef STARPU_SIMGRID_MC
 #undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT
 #undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT
 #undef STARPU_HAVE_SIMGRID_MSG_H
 #undef STARPU_HAVE_SIMGRID_MSG_H
+#undef STARPU_HAVE_MSG_MSG_H
 #undef STARPU_HAVE_SIMGRID_ACTOR_H
 #undef STARPU_HAVE_SIMGRID_ACTOR_H
 #undef STARPU_HAVE_SIMGRID_SEMAPHORE_H
 #undef STARPU_HAVE_SIMGRID_SEMAPHORE_H
 #undef STARPU_HAVE_SIMGRID_MUTEX_H
 #undef STARPU_HAVE_SIMGRID_MUTEX_H
@@ -166,6 +167,9 @@ typedef ssize_t starpu_ssize_t;
 #undef STARPU_HAVE_PTHREAD_BARRIER
 #undef STARPU_HAVE_PTHREAD_BARRIER
 #undef STARPU_HAVE_PTHREAD_SETNAME_NP
 #undef STARPU_HAVE_PTHREAD_SETNAME_NP
 #undef STARPU_HAVE_STRUCT_TIMESPEC
 #undef STARPU_HAVE_STRUCT_TIMESPEC
+#undef STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO
+#undef STARPU_PTHREAD_COND_INITIALIZER_ZERO
+#undef STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO
 
 
 /* This is only for building examples */
 /* This is only for building examples */
 #undef STARPU_HAVE_HELGRIND_H
 #undef STARPU_HAVE_HELGRIND_H

+ 8 - 2
include/starpu_task.h

@@ -1,8 +1,8 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011-2017,2019                           Inria
+ * Copyright (C) 2011-2017,2020                           Inria
  * Copyright (C) 2009-2019                                Université de Bordeaux
  * Copyright (C) 2009-2019                                Université de Bordeaux
- * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
+ * Copyright (C) 2010-2015,2017,2018,2019,2020            CNRS
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2016                                     Uppsala University
  * Copyright (C) 2016                                     Uppsala University
  *
  *
@@ -1667,6 +1667,12 @@ void starpu_task_ft_failed(struct starpu_task *task);
  */
  */
 void starpu_task_ft_success(struct starpu_task *meta_task);
 void starpu_task_ft_success(struct starpu_task *meta_task);
 
 
+/**
+   Set the function to call when the watchdog detects that StarPU has
+   not finished any task for STARPU_WATCHDOG_TIMEOUT seconds
+*/
+void starpu_task_watchdog_set_hook(void (*hook)(void *), void *hook_arg);
+
 /** @} */
 /** @} */
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus

+ 10 - 4
include/starpu_thread.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2013,2015-2017                           Inria
  * Copyright (C) 2013,2015-2017                           Inria
  * Copyright (C) 2010-2015,2017,2019                           CNRS
  * Copyright (C) 2010-2015,2017,2019                           CNRS
- * Copyright (C) 2010,2012-2019                           Université de Bordeaux
+ * Copyright (C) 2010,2012-2020                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,7 +25,10 @@
 #include <starpu_util.h>
 #include <starpu_util.h>
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 #include <pthread.h>
 #include <pthread.h>
-#ifdef STARPU_HAVE_XBT_SYNCHRO_H
+#ifdef STARPU_HAVE_SIMGRID_MUTEX_H
+#include <simgrid/mutex.h>
+#include <simgrid/cond.h>
+#elif defined(STARPU_HAVE_XBT_SYNCHRO_H)
 #include <xbt/synchro.h>
 #include <xbt/synchro.h>
 #else
 #else
 #include <xbt/synchro_core.h>
 #include <xbt/synchro_core.h>
@@ -45,9 +48,12 @@
 #ifdef STARPU_HAVE_SIMGRID_BARRIER_H
 #ifdef STARPU_HAVE_SIMGRID_BARRIER_H
 #include <simgrid/barrier.h>
 #include <simgrid/barrier.h>
 #endif
 #endif
+#ifdef STARPU_HAVE_SIMGRID_HOST_H
+#include <simgrid/host.h>
+#endif
 #ifdef STARPU_HAVE_SIMGRID_MSG_H
 #ifdef STARPU_HAVE_SIMGRID_MSG_H
 #include <simgrid/msg.h>
 #include <simgrid/msg.h>
-#else
+#elif defined(STARPU_HAVE_MSG_MSG_H)
 #include <msg/msg.h>
 #include <msg/msg.h>
 #endif
 #endif
 #elif !defined(_MSC_VER) || defined(BUILDING_STARPU)
 #elif !defined(_MSC_VER) || defined(BUILDING_STARPU)
@@ -81,7 +87,7 @@ typedef msg_host_t starpu_sg_host_t;
 #endif
 #endif
 int starpu_pthread_equal(starpu_pthread_t t1, starpu_pthread_t t2);
 int starpu_pthread_equal(starpu_pthread_t t1, starpu_pthread_t t2);
 starpu_pthread_t starpu_pthread_self(void);
 starpu_pthread_t starpu_pthread_self(void);
-int starpu_pthread_create_on(char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg, starpu_sg_host_t host);
+int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg, starpu_sg_host_t host);
 int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
 int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
 starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t code, starpu_sg_host_t host, int argc, char *argv[]);
 starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t code, starpu_sg_host_t host, int argc, char *argv[]);
 int starpu_pthread_join(starpu_pthread_t thread, void **retval);
 int starpu_pthread_join(starpu_pthread_t thread, void **retval);

+ 53 - 5
include/starpu_thread_util.h

@@ -1,8 +1,8 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012,2013                                Inria
  * Copyright (C) 2012,2013                                Inria
- * Copyright (C) 2010-2013,2015,2017,2019                      CNRS
- * Copyright (C) 2010-2014,2016,2017                      Université de Bordeaux
+ * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
+ * Copyright (C) 2010-2014,2016,2017,2020                 Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -64,7 +64,7 @@
  * Encapsulation of the starpu_pthread_mutex_* functions.
  * Encapsulation of the starpu_pthread_mutex_* functions.
  */
  */
 
 
-#define STARPU_PTHREAD_MUTEX_INIT(mutex, attr) do {                           \
+#define _STARPU_PTHREAD_MUTEX_INIT(mutex, attr) do {                           \
 	int p_ret = starpu_pthread_mutex_init((mutex), (attr));                \
 	int p_ret = starpu_pthread_mutex_init((mutex), (attr));                \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \
 		fprintf(stderr,                                                \
 		fprintf(stderr,                                                \
@@ -74,6 +74,22 @@
 	}                                                                      \
 	}                                                                      \
 } while (0)
 } while (0)
 
 
+#ifdef STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO
+#define STARPU_PTHREAD_MUTEX_INIT(mutex, attr) do {                            \
+	if (!attr)                                                             \
+		memset(mutex, 0, sizeof(*mutex));                              \
+	else                                                                   \
+		_STARPU_PTHREAD_MUTEX_INIT(mutex, attr);                       \
+} while (0)
+#define STARPU_PTHREAD_MUTEX_INIT0(mutex, attr) do {                           \
+	if (attr)                                                              \
+		_STARPU_PTHREAD_MUTEX_INIT(mutex, attr);                       \
+} while (0)
+#else
+#define STARPU_PTHREAD_MUTEX_INIT(mutex, attr) _STARPU_PTHREAD_MUTEX_INIT(mutex, attr)
+#define STARPU_PTHREAD_MUTEX_INIT0(mutex, attr) _STARPU_PTHREAD_MUTEX_INIT(mutex, attr)
+#endif
+
 #define STARPU_PTHREAD_MUTEX_DESTROY(mutex) do {                              \
 #define STARPU_PTHREAD_MUTEX_DESTROY(mutex) do {                              \
 	int p_ret = starpu_pthread_mutex_destroy(mutex);                       \
 	int p_ret = starpu_pthread_mutex_destroy(mutex);                       \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \
@@ -199,7 +215,7 @@ int _starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex, char *fil
 /*
 /*
  * Encapsulation of the starpu_pthread_rwlock_* functions.
  * Encapsulation of the starpu_pthread_rwlock_* functions.
  */
  */
-#define STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) do {                          \
+#define _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) do {                         \
 	int p_ret = starpu_pthread_rwlock_init((rwlock), (attr));              \
 	int p_ret = starpu_pthread_rwlock_init((rwlock), (attr));              \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \
 		fprintf(stderr,                                                \
 		fprintf(stderr,                                                \
@@ -209,6 +225,22 @@ int _starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex, char *fil
 	}                                                                      \
 	}                                                                      \
 } while (0)
 } while (0)
 
 
+#ifdef STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO
+#define STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) do {                            \
+	if (!attr)                                                             \
+		memset(rwlock, 0, sizeof(*rwlock));                              \
+	else                                                                   \
+		_STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr);                       \
+} while (0)
+#define STARPU_PTHREAD_RWLOCK_INIT0(rwlock, attr) do {                           \
+	if (attr)                                                              \
+		_STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr);                       \
+} while (0)
+#else
+#define STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr)
+#define STARPU_PTHREAD_RWLOCK_INIT0(rwlock, attr) _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr)
+#endif
+
 #define STARPU_PTHREAD_RWLOCK_RDLOCK(rwlock) do {                              \
 #define STARPU_PTHREAD_RWLOCK_RDLOCK(rwlock) do {                              \
 	int p_ret = starpu_pthread_rwlock_rdlock(rwlock);                      \
 	int p_ret = starpu_pthread_rwlock_rdlock(rwlock);                      \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \
@@ -282,7 +314,7 @@ int _starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock, char *file
 /*
 /*
  * Encapsulation of the starpu_pthread_cond_* functions.
  * Encapsulation of the starpu_pthread_cond_* functions.
  */
  */
-#define STARPU_PTHREAD_COND_INIT(cond, attr) do {                             \
+#define _STARPU_PTHREAD_COND_INIT(cond, attr) do {                             \
 	int p_ret = starpu_pthread_cond_init((cond), (attr));                  \
 	int p_ret = starpu_pthread_cond_init((cond), (attr));                  \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \
 		fprintf(stderr,                                                \
 		fprintf(stderr,                                                \
@@ -292,6 +324,22 @@ int _starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock, char *file
 	}                                                                      \
 	}                                                                      \
 } while (0)
 } while (0)
 
 
+#ifdef STARPU_PTHREAD_COND_INITIALIZER_ZERO
+#define STARPU_PTHREAD_COND_INIT(cond, attr) do {                            \
+	if (!attr)                                                             \
+		memset(cond, 0, sizeof(*cond));                              \
+	else                                                                   \
+		_STARPU_PTHREAD_COND_INIT(cond, attr);                       \
+} while (0)
+#define STARPU_PTHREAD_COND_INIT0(cond, attr) do {                           \
+	if (attr)                                                              \
+		_STARPU_PTHREAD_COND_INIT(cond, attr);                       \
+} while (0)
+#else
+#define STARPU_PTHREAD_COND_INIT(cond, attr) _STARPU_PTHREAD_COND_INIT(cond, attr)
+#define STARPU_PTHREAD_COND_INIT0(cond, attr) _STARPU_PTHREAD_COND_INIT(cond, attr)
+#endif
+
 #define STARPU_PTHREAD_COND_DESTROY(cond) do {                                \
 #define STARPU_PTHREAD_COND_DESTROY(cond) do {                                \
 	int p_ret = starpu_pthread_cond_destroy(cond);                         \
 	int p_ret = starpu_pthread_cond_destroy(cond);                         \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \

+ 21 - 1
m4/libs.m4

@@ -2,7 +2,7 @@
 #
 #
 # Copyright (C) 2011                                     Inria
 # Copyright (C) 2011                                     Inria
 # Copyright (C) 2012,2017                                CNRS
 # Copyright (C) 2012,2017                                CNRS
-# Copyright (C) 2011,2014                                Université de Bordeaux
+# Copyright (C) 2011,2014,2020                           Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -53,3 +53,23 @@ AC_DEFUN([STARPU_CHECK_LIB], [dnl
 AC_DEFUN([STARPU_HAVE_LIBRARY], [dnl
 AC_DEFUN([STARPU_HAVE_LIBRARY], [dnl
 STARPU_CHECK_LIB([$1], [$2], main, [$3], [$4], [$5])
 STARPU_CHECK_LIB([$1], [$2], main, [$3], [$4], [$5])
 ])dnl
 ])dnl
+
+# STARPU_INIT_ZERO(INCLUDES, TYPE, INIT_MACRO)
+# Checks whether when TYPE is initialized with INIT_MACRO, the content is just
+# plain zeroes
+AC_DEFUN([STARPU_INIT_ZERO], [dnl
+AC_MSG_CHECKING(whether $3 just zeroes)
+AC_RUN_IFELSE([AC_LANG_PROGRAM(
+		$1,
+		[[$2 var = $3;
+		 char *p;
+		 for (p = (char*) &var; p < (char*) (&var+1); p++)
+		   if (*p != 0)
+		     return 1;
+		 return 0;
+		]],
+		)],
+		[AC_DEFINE([STARPU_$3_ZERO], [1], [Define to 1 if `$3' is just zeroes])
+		 AC_MSG_RESULT(yes)],
+		[AC_MSG_RESULT(no)])
+])dnl

+ 1 - 5
mpi/examples/Makefile.am

@@ -2,7 +2,7 @@
 #
 #
 # Copyright (C) 2012,2014,2016                           Inria
 # Copyright (C) 2012,2014,2016                           Inria
 # Copyright (C) 2010-2017,2019                           CNRS
 # Copyright (C) 2010-2017,2019                           CNRS
-# Copyright (C) 2009-2017,2019                           Université de Bordeaux
+# Copyright (C) 2009-2017,2019-2020                      Université de Bordeaux
 # Copyright (C) 2013                                     Thibaut Lambert
 # Copyright (C) 2013                                     Thibaut Lambert
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -248,13 +248,11 @@ matrix_decomposition_mpi_cholesky_distributed_SOURCES =	\
 matrix_decomposition_mpi_cholesky_distributed_LDADD =	\
 matrix_decomposition_mpi_cholesky_distributed_LDADD =	\
 	$(STARPU_BLAS_LDFLAGS) -lm
 	$(STARPU_BLAS_LDFLAGS) -lm
 
 
-if !STARPU_SIMGRID
 starpu_mpi_EXAMPLES +=				\
 starpu_mpi_EXAMPLES +=				\
 	matrix_decomposition/mpi_cholesky			\
 	matrix_decomposition/mpi_cholesky			\
 	matrix_decomposition/mpi_cholesky_distributed
 	matrix_decomposition/mpi_cholesky_distributed
 endif
 endif
 endif
 endif
-endif
 
 
 ########################
 ########################
 # MPI Matrix mult example #
 # MPI Matrix mult example #
@@ -336,11 +334,9 @@ complex_mpi_complex_SOURCES =		\
 	complex/mpi_complex.c		\
 	complex/mpi_complex.c		\
 	../../examples/interface/complex_interface.c
 	../../examples/interface/complex_interface.c
 
 
-if !STARPU_SIMGRID
 starpu_mpi_EXAMPLES	+=			\
 starpu_mpi_EXAMPLES	+=			\
 	complex/mpi_complex
 	complex/mpi_complex
 endif
 endif
-endif
 
 
 #########################
 #########################
 # user_datatype example #
 # user_datatype example #

+ 3 - 3
mpi/examples/perf.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2010,2011,2014                           Université de Bordeaux
+# Copyright (C) 2010,2011,2014,2020                      Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -40,7 +40,7 @@ ncalibrate=0
 for i in `seq 1 $ncalibrate`
 for i in `seq 1 $ncalibrate`
 do
 do
 echo "STARPU_CALIBRATE $i/$ncalibrate"
 echo "STARPU_CALIBRATE $i/$ncalibrate"
-STARPU_CALIBRATE=1 STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $nnodes ./mpi_lu/plu_example_float -p 2 -q 2 -nblocks 32 -size $((32*$BLOCKSIZE)) -numa
+STARPU_CALIBRATE=1 STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $nnodes $STARPU_LAUNCH ./mpi_lu/plu_example_float -p 2 -q 2 -nblocks 32 -size $((32*$BLOCKSIZE)) -numa
 done
 done
 
 
 func()
 func()
@@ -57,7 +57,7 @@ echo "*******************************************">> log
 cat log
 cat log
 cat log >> log.all
 cat log >> log.all
 
 
-STARPU_NCPUS=0 STARPU_NCUDA=$ngpus STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err
+STARPU_NCPUS=0 STARPU_NCUDA=$ngpus STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np $STARPU_LAUNCH ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err
 cat log.out > log
 cat log.out > log
 cat log.err >> log
 cat log.err >> log
 cat log
 cat log

+ 2 - 0
mpi/examples/user_datatype/my_interface.h

@@ -59,6 +59,7 @@ static struct starpu_codelet starpu_my_data_display_codelet =
 	.cpu_funcs_name = {"starpu_my_data_display_codelet_cpu"},
 	.cpu_funcs_name = {"starpu_my_data_display_codelet_cpu"},
 	.nbuffers = 1,
 	.nbuffers = 1,
 	.modes = {STARPU_R},
 	.modes = {STARPU_R},
+	.model = &starpu_perfmodel_nop,
 	.name = "starpu_my_data_display_codelet"
 	.name = "starpu_my_data_display_codelet"
 };
 };
 
 
@@ -68,6 +69,7 @@ static struct starpu_codelet starpu_my_data_compare_codelet =
 	.cpu_funcs_name = {"starpu_my_data_compare_codelet_cpu"},
 	.cpu_funcs_name = {"starpu_my_data_compare_codelet_cpu"},
 	.nbuffers = 2,
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_R},
 	.modes = {STARPU_R, STARPU_R},
+	.model = &starpu_perfmodel_nop,
 	.name = "starpu_my_data_compare_codelet"
 	.name = "starpu_my_data_compare_codelet"
 };
 };
 
 

+ 70 - 34
mpi/src/mpi/starpu_mpi_mpi.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2019                                Université de Bordeaux
+ * Copyright (C) 2009-2020                                Université de Bordeaux
  * Copyright (C) 2012,2013,2016,2017                      Inria
  * Copyright (C) 2012,2013,2016,2017                      Inria
  * Copyright (C) 2017                                     Guillaume Beauchamp
  * Copyright (C) 2017                                     Guillaume Beauchamp
  *
  *
@@ -19,6 +19,10 @@
 
 
 #include <stdlib.h>
 #include <stdlib.h>
 #include <limits.h>
 #include <limits.h>
+#include <common/config.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 #include <starpu_mpi.h>
 #include <starpu_mpi.h>
 #include <starpu_mpi_datatype.h>
 #include <starpu_mpi_datatype.h>
 #include <starpu_mpi_private.h>
 #include <starpu_mpi_private.h>
@@ -33,7 +37,6 @@
 #include <mpi/starpu_mpi_tag.h>
 #include <mpi/starpu_mpi_tag.h>
 #include <mpi/starpu_mpi_comm.h>
 #include <mpi/starpu_mpi_comm.h>
 #include <starpu_mpi_init.h>
 #include <starpu_mpi_init.h>
-#include <common/config.h>
 #include <common/thread.h>
 #include <common/thread.h>
 #include <datawizard/interfaces/data_interface.h>
 #include <datawizard/interfaces/data_interface.h>
 #include <datawizard/coherency.h>
 #include <datawizard/coherency.h>
@@ -325,7 +328,7 @@ static void _starpu_mpi_simgrid_wait_req_func(void* arg)
 	STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(ret));
 	STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(ret));
 
 
 	*(sim_req->done) = 1;
 	*(sim_req->done) = 1;
-	starpu_pthread_queue_signal(sim_req->queue);
+	starpu_pthread_queue_broadcast(sim_req->queue);
 
 
 	free(sim_req);
 	free(sim_req);
 
 
@@ -501,10 +504,10 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 	{
 	{
 		_STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.node.comm);
 		_STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.node.comm);
 		req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.node.comm, &req->backend->data_request);
 		req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.node.comm, &req->backend->data_request);
+	}
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
-		_starpu_mpi_simgrid_wait_req(&req->backend->data_request, &req->status_store, &req->queue, &req->done);
+	_starpu_mpi_simgrid_wait_req(&req->backend->data_request, &req->status_store, &req->queue, &req->done);
 #endif
 #endif
-	}
 	STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_IRecv returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
 	STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_IRecv returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
 
 
 	_STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag);
 	_STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag);
@@ -526,6 +529,7 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 /*                                                      */
 /*                                                      */
 /********************************************************/
 /********************************************************/
 
 
+#ifndef STARPU_SIMGRID
 void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
 void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
 {
 {
 	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_LOG_IN();
@@ -535,10 +539,6 @@ void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
 	_STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
 	_STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
 	if (req->backend->data_request != MPI_REQUEST_NULL)
 	if (req->backend->data_request != MPI_REQUEST_NULL)
 	{
 	{
-		// TODO: Fix for STARPU_SIMGRID
-#ifdef STARPU_SIMGRID
-		STARPU_MPI_ASSERT_MSG(0, "Implement this in STARPU_SIMGRID");
-#endif
 		req->ret = MPI_Wait(&req->backend->data_request, waiting_req->status);
 		req->ret = MPI_Wait(&req->backend->data_request, waiting_req->status);
 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
 	}
 	}
@@ -548,15 +548,36 @@ void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
 
 
 	_STARPU_MPI_LOG_OUT();
 	_STARPU_MPI_LOG_OUT();
 }
 }
+#endif
 
 
 int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
 int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
 {
 {
 	int ret;
 	int ret;
 	struct _starpu_mpi_req *req = *public_req;
 	struct _starpu_mpi_req *req = *public_req;
-	struct _starpu_mpi_req *waiting_req;
 
 
 	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_LOG_IN();
 
 
+#ifdef STARPU_SIMGRID
+	_STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
+	starpu_pthread_wait_t wait;
+	starpu_pthread_wait_init(&wait);
+	starpu_pthread_queue_register(&wait, &req->queue);
+	while (1)
+	{
+		starpu_pthread_wait_reset(&wait);
+		if (req->done)
+			break;
+		starpu_pthread_wait_wait(&wait);
+	}
+	starpu_pthread_queue_unregister(&wait, &req->queue);
+	starpu_pthread_wait_destroy(&wait);
+	_STARPU_MPI_TRACE_UWAIT_END(req->node_tag.node.rank, req->node_tag.data_tag);
+
+	if (status)
+		*status = req->status_store;
+	_starpu_mpi_handle_request_termination(req);
+#else
+	struct _starpu_mpi_req *waiting_req;
 	/* We cannot try to complete a MPI request that was not actually posted
 	/* We cannot try to complete a MPI request that was not actually posted
 	 * to MPI yet. */
 	 * to MPI yet. */
 	STARPU_PTHREAD_MUTEX_LOCK(&(req->backend->req_mutex));
 	STARPU_PTHREAD_MUTEX_LOCK(&(req->backend->req_mutex));
@@ -580,16 +601,17 @@ int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
 		STARPU_PTHREAD_COND_WAIT(&req->backend->req_cond, &req->backend->req_mutex);
 		STARPU_PTHREAD_COND_WAIT(&req->backend->req_cond, &req->backend->req_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
 
 
-	ret = req->ret;
-
 	/* The internal request structure was automatically allocated */
 	/* The internal request structure was automatically allocated */
+	_starpu_mpi_request_destroy(waiting_req);
+#endif
+
 	*public_req = NULL;
 	*public_req = NULL;
 	if (req->backend->internal_req)
 	if (req->backend->internal_req)
 	{
 	{
 		_starpu_mpi_request_destroy(req->backend->internal_req);
 		_starpu_mpi_request_destroy(req->backend->internal_req);
 	}
 	}
+	ret = req->ret;
 	_starpu_mpi_request_destroy(req);
 	_starpu_mpi_request_destroy(req);
-	_starpu_mpi_request_destroy(waiting_req);
 
 
 	_STARPU_MPI_LOG_OUT();
 	_STARPU_MPI_LOG_OUT();
 	return ret;
 	return ret;
@@ -601,6 +623,7 @@ int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
 /*                                                      */
 /*                                                      */
 /********************************************************/
 /********************************************************/
 
 
+#ifndef STARPU_SIMGRID
 void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 {
 {
 	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_LOG_IN();
@@ -613,12 +636,7 @@ void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 
 
 	_STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
 	_STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
 
 
-#ifdef STARPU_SIMGRID
-	req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, testing_req->flag);
-	memcpy(testing_req->status, &req->status_store, sizeof(*testing_req->status));
-#else
 	req->ret = MPI_Test(&req->backend->data_request, testing_req->flag, testing_req->status);
 	req->ret = MPI_Test(&req->backend->data_request, testing_req->flag, testing_req->status);
-#endif
 
 
 	STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
 	STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
 
 
@@ -636,6 +654,7 @@ void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 	STARPU_PTHREAD_MUTEX_UNLOCK(&testing_req->backend->req_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&testing_req->backend->req_mutex);
 	_STARPU_MPI_LOG_OUT();
 	_STARPU_MPI_LOG_OUT();
 }
 }
+#endif
 
 
 int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 {
 {
@@ -648,6 +667,15 @@ int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 
 
 	STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Test cannot be called on a detached request");
 	STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Test cannot be called on a detached request");
 
 
+#ifdef STARPU_SIMGRID
+	ret = req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, flag);
+	if (*flag)
+	{
+		if (status)
+			*status = req->status_store;
+		_starpu_mpi_handle_request_termination(req);
+	}
+#else
 	STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex);
 	STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex);
 	unsigned submitted = req->submitted;
 	unsigned submitted = req->submitted;
 	STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
@@ -676,25 +704,26 @@ int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 
 
 		ret = testing_req->ret;
 		ret = testing_req->ret;
 
 
-		if (*(testing_req->flag))
-		{
-			/* The request was completed so we free the internal
-			 * request structure which was automatically allocated
-			 * */
-			*public_req = NULL;
-			if (req->backend->internal_req)
-			{
-				_starpu_mpi_request_destroy(req->backend->internal_req);
-			}
-			_starpu_mpi_request_destroy(req);
-		}
-
 		_starpu_mpi_request_destroy(testing_req);
 		_starpu_mpi_request_destroy(testing_req);
 	}
 	}
 	else
 	else
 	{
 	{
 		*flag = 0;
 		*flag = 0;
 	}
 	}
+#endif
+
+	if (*flag)
+	{
+		/* The request was completed so we free the internal
+		 * request structure which was automatically allocated
+		 * */
+		*public_req = NULL;
+		if (req->backend->internal_req)
+		{
+			_starpu_mpi_request_destroy(req->backend->internal_req);
+		}
+		_starpu_mpi_request_destroy(req);
+	}
 
 
 	_STARPU_MPI_LOG_OUT();
 	_STARPU_MPI_LOG_OUT();
 	return ret;
 	return ret;
@@ -930,6 +959,9 @@ static void _starpu_mpi_early_data_cb(void* arg)
 			args->req->submitted = 1;
 			args->req->submitted = 1;
 			STARPU_PTHREAD_COND_BROADCAST(&args->req->backend->req_cond);
 			STARPU_PTHREAD_COND_BROADCAST(&args->req->backend->req_cond);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->backend->req_mutex);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->backend->req_mutex);
+#ifdef STARPU_SIMGRID
+			args->req->done = 1;
+#endif
 		}
 		}
 	}
 	}
 
 
@@ -1133,7 +1165,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
 
 	if (starpu_bind_thread_on(_starpu_mpi_thread_cpuid, STARPU_THREAD_ACTIVE, "MPI") < 0)
 	if (starpu_bind_thread_on(_starpu_mpi_thread_cpuid, STARPU_THREAD_ACTIVE, "MPI") < 0)
 	{
 	{
-		_STARPU_DISP("No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n");
+		char hostname[65];
+		gethostname(hostname, sizeof(hostname));
+		_STARPU_DISP("[%s] No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n", hostname);
 	}
 	}
 	_starpu_mpi_do_initialize(argc_argv);
 	_starpu_mpi_do_initialize(argc_argv);
 	if (_starpu_mpi_thread_cpuid >= 0)
 	if (_starpu_mpi_thread_cpuid >= 0)
@@ -1150,13 +1184,15 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	int i;
 	int i;
 	for (i = 0; i < *(argc_argv->argc); i++)
 	for (i = 0; i < *(argc_argv->argc); i++)
 		argv_cpy[i] = strdup((*(argc_argv->argv))[i]);
 		argv_cpy[i] = strdup((*(argc_argv->argv))[i]);
+	void **tsd;
+	_STARPU_CALLOC(tsd, MAX_TSD + 1, sizeof(void*));
 #ifdef HAVE_SG_ACTOR_DATA
 #ifdef HAVE_SG_ACTOR_DATA
 	_starpu_simgrid_actor_create("main", smpi_simulated_main_, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy);
 	_starpu_simgrid_actor_create("main", smpi_simulated_main_, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy);
+	/* And set TSD for us */
+	sg_actor_data_set(sg_actor_self(), tsd);
 #else
 #else
 	MSG_process_create_with_arguments("main", smpi_simulated_main_, NULL, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy);
 	MSG_process_create_with_arguments("main", smpi_simulated_main_, NULL, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy);
 	/* And set TSD for us */
 	/* And set TSD for us */
-	void **tsd;
-	_STARPU_CALLOC(tsd, MAX_TSD + 1, sizeof(void*));
 	if (!smpi_process_set_user_data)
 	if (!smpi_process_set_user_data)
 	{
 	{
 		_STARPU_ERROR("Your version of simgrid does not provide smpi_process_set_user_data, we can not continue without it\n");
 		_STARPU_ERROR("Your version of simgrid does not provide smpi_process_set_user_data, we can not continue without it\n");

+ 8 - 8
mpi/src/mpi/starpu_mpi_mpi_backend.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2017                                     Inria
  * Copyright (C) 2017                                     Inria
  * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
  * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
- * Copyright (C) 2009-2014,2017,2018-2019                 Université de Bordeaux
+ * Copyright (C) 2009-2014,2017,2018-2020                 Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -51,21 +51,21 @@ void _starpu_mpi_mpi_backend_request_init(struct _starpu_mpi_req *req)
 {
 {
 	_STARPU_MPI_CALLOC(req->backend, 1, sizeof(struct _starpu_mpi_req_backend));
 	_STARPU_MPI_CALLOC(req->backend, 1, sizeof(struct _starpu_mpi_req_backend));
 
 
-	req->backend->data_request = 0;
+	//req->backend->data_request = 0;
 
 
 	STARPU_PTHREAD_MUTEX_INIT(&req->backend->req_mutex, NULL);
 	STARPU_PTHREAD_MUTEX_INIT(&req->backend->req_mutex, NULL);
 	STARPU_PTHREAD_COND_INIT(&req->backend->req_cond, NULL);
 	STARPU_PTHREAD_COND_INIT(&req->backend->req_cond, NULL);
 	STARPU_PTHREAD_MUTEX_INIT(&req->backend->posted_mutex, NULL);
 	STARPU_PTHREAD_MUTEX_INIT(&req->backend->posted_mutex, NULL);
 	STARPU_PTHREAD_COND_INIT(&req->backend->posted_cond, NULL);
 	STARPU_PTHREAD_COND_INIT(&req->backend->posted_cond, NULL);
 
 
-	req->backend->other_request = NULL;
+	//req->backend->other_request = NULL;
 
 
-	req->backend->size_req = 0;
-	req->backend->internal_req = NULL;
-	req->backend->is_internal_req = 0;
+	//req->backend->size_req = 0;
+	//req->backend->internal_req = NULL;
+	//req->backend->is_internal_req = 0;
 	req->backend->to_destroy = 1;
 	req->backend->to_destroy = 1;
-	req->backend->early_data_handle = NULL;
-	req->backend->envelope = NULL;
+	//req->backend->early_data_handle = NULL;
+	//req->backend->envelope = NULL;
 }
 }
 
 
 void _starpu_mpi_mpi_backend_request_fill(struct _starpu_mpi_req *req, MPI_Comm comm, int is_internal_req)
 void _starpu_mpi_mpi_backend_request_fill(struct _starpu_mpi_req *req, MPI_Comm comm, int is_internal_req)

+ 8 - 3
mpi/src/nmad/starpu_mpi_nmad.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2017                                     Inria
  * Copyright (C) 2017                                     Inria
  * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
  * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
- * Copyright (C) 2009-2014,2017,2018-2019                 Université de Bordeaux
+ * Copyright (C) 2009-2014,2017,2018-2020                 Université de Bordeaux
  * Copyright (C) 2017                                     Guillaume Beauchamp
  * Copyright (C) 2017                                     Guillaume Beauchamp
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -19,6 +19,10 @@
 
 
 #include <stdlib.h>
 #include <stdlib.h>
 #include <limits.h>
 #include <limits.h>
+#include <common/config.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 #include <starpu_mpi.h>
 #include <starpu_mpi.h>
 #include <starpu_mpi_datatype.h>
 #include <starpu_mpi_datatype.h>
 #include <starpu_mpi_private.h>
 #include <starpu_mpi_private.h>
@@ -28,7 +32,6 @@
 #include <starpu_mpi_cache.h>
 #include <starpu_mpi_cache.h>
 #include <starpu_mpi_select_node.h>
 #include <starpu_mpi_select_node.h>
 #include <starpu_mpi_init.h>
 #include <starpu_mpi_init.h>
-#include <common/config.h>
 #include <common/thread.h>
 #include <common/thread.h>
 #include <datawizard/coherency.h>
 #include <datawizard/coherency.h>
 #include <core/task.h>
 #include <core/task.h>
@@ -414,7 +417,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
 
 	if (starpu_bind_thread_on(_starpu_mpi_thread_cpuid, 0, "MPI") < 0)
 	if (starpu_bind_thread_on(_starpu_mpi_thread_cpuid, 0, "MPI") < 0)
 	{
 	{
-		_STARPU_DISP("No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n");
+		char hostname[65];
+		gethostname(hostname, sizeof(hostname));
+		_STARPU_DISP("[%s] No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n", hostname);
 	}
 	}
 	_starpu_mpi_do_initialize(argc_argv);
 	_starpu_mpi_do_initialize(argc_argv);
 	if (_starpu_mpi_thread_cpuid >= 0)
 	if (_starpu_mpi_thread_cpuid >= 0)

+ 4 - 4
mpi/src/starpu_mpi.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012,2013,2016,2017                      Inria
  * Copyright (C) 2012,2013,2016,2017                      Inria
  * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2018                                Université de Bordeaux
+ * Copyright (C) 2009-2018,2020                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -46,7 +46,7 @@ static void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum sta
 
 
 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg, int sequential_consistency)
 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg, int sequential_consistency)
 {
 {
-	if (_starpu_mpi_fake_world_size != -1)
+	if (STARPU_UNLIKELY(_starpu_mpi_fake_world_size != -1))
 	{
 	{
 		/* Don't actually do the communication */
 		/* Don't actually do the communication */
 		return NULL;
 		return NULL;
@@ -114,9 +114,9 @@ int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_
 	MPI_Status status;
 	MPI_Status status;
 
 
 	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_LOG_IN();
-	memset(&status, 0, sizeof(MPI_Status));
-
 	starpu_mpi_isend_prio(data_handle, &req, dest, data_tag, prio, comm);
 	starpu_mpi_isend_prio(data_handle, &req, dest, data_tag, prio, comm);
+
+	memset(&status, 0, sizeof(MPI_Status));
 	starpu_mpi_wait(&req, &status);
 	starpu_mpi_wait(&req, &status);
 
 
 	_STARPU_MPI_LOG_OUT();
 	_STARPU_MPI_LOG_OUT();

+ 17 - 17
mpi/src/starpu_mpi_req.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2019                                Université de Bordeaux
+ * Copyright (C) 2009-2020                                Université de Bordeaux
  * Copyright (C) 2012,2013,2016,2017                      Inria
  * Copyright (C) 2012,2013,2016,2017                      Inria
  * Copyright (C) 2017                                     Guillaume Beauchamp
  * Copyright (C) 2017                                     Guillaume Beauchamp
  *
  *
@@ -25,37 +25,37 @@ void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
 	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
 	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
 
 
 	/* Initialize the request structure */
 	/* Initialize the request structure */
-	(*req)->data_handle = NULL;
-	(*req)->prio = 0;
+	//(*req)->data_handle = NULL;
+	//(*req)->prio = 0;
 
 
-	(*req)->datatype = 0;
-	(*req)->datatype_name = NULL;
-	(*req)->ptr = NULL;
+	//(*req)->datatype = 0;
+	//(*req)->datatype_name = NULL;
+	//(*req)->ptr = NULL;
 	(*req)->count = -1;
 	(*req)->count = -1;
 	(*req)->registered_datatype = -1;
 	(*req)->registered_datatype = -1;
 
 
 	(*req)->node_tag.node.rank = -1;
 	(*req)->node_tag.node.rank = -1;
 	(*req)->node_tag.data_tag = -1;
 	(*req)->node_tag.data_tag = -1;
-	(*req)->node_tag.node.comm = 0;
+	//(*req)->node_tag.node.comm = 0;
 
 
-	(*req)->func = NULL;
+	//(*req)->func = NULL;
 
 
-	(*req)->status = NULL;
-	(*req)->flag = NULL;
+	//(*req)->status = NULL;
+	//(*req)->flag = NULL;
 	_starpu_mpi_req_multilist_init_coop_sends(*req);
 	_starpu_mpi_req_multilist_init_coop_sends(*req);
 
 
 	(*req)->ret = -1;
 	(*req)->ret = -1;
 
 
 	(*req)->request_type = UNKNOWN_REQ;
 	(*req)->request_type = UNKNOWN_REQ;
 
 
-	(*req)->submitted = 0;
-	(*req)->completed = 0;
-	(*req)->posted = 0;
+	//(*req)->submitted = 0;
+	//(*req)->completed = 0;
+	//(*req)->posted = 0;
 
 
-	(*req)->sync = 0;
+	//(*req)->sync = 0;
 	(*req)->detached = -1;
 	(*req)->detached = -1;
-	(*req)->callback = NULL;
-	(*req)->callback_arg = NULL;
+	//(*req)->callback = NULL;
+	//(*req)->callback_arg = NULL;
 
 
 	(*req)->sequential_consistency = 1;
 	(*req)->sequential_consistency = 1;
 	(*req)->pre_sync_jobid = -1;
 	(*req)->pre_sync_jobid = -1;
@@ -64,7 +64,7 @@ void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 	starpu_pthread_queue_init(&((*req)->queue));
 	starpu_pthread_queue_init(&((*req)->queue));
 	starpu_pthread_queue_register(&_starpu_mpi_thread_wait, &((*req)->queue));
 	starpu_pthread_queue_register(&_starpu_mpi_thread_wait, &((*req)->queue));
-	(*req)->done = 0;
+	//(*req)->done = 0;
 #endif
 #endif
 	_mpi_backend._starpu_mpi_backend_request_init(*req);
 	_mpi_backend._starpu_mpi_backend_request_init(*req);
 }
 }

+ 23 - 23
mpi/tests/Makefile.am

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2010-2019                                CNRS
 # Copyright (C) 2010-2019                                CNRS
-# Copyright (C) 2009-2018                                Université de Bordeaux
+# Copyright (C) 2009-2018, 2020                                Université de Bordeaux
 # Copyright (C) 2013                                     Thibaut Lambert
 # Copyright (C) 2013                                     Thibaut Lambert
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -95,12 +95,17 @@ if BUILD_TESTS
 starpu_mpi_TESTS =
 starpu_mpi_TESTS =
 
 
 starpu_mpi_TESTS +=				\
 starpu_mpi_TESTS +=				\
+	broadcast				\
 	cache					\
 	cache					\
 	cache_disable				\
 	cache_disable				\
 	callback				\
 	callback				\
+	driver					\
 	early_request				\
 	early_request				\
+	gather					\
+	gather2					\
 	insert_task				\
 	insert_task				\
 	insert_task_block			\
 	insert_task_block			\
+	insert_task_count			\
 	insert_task_dyn_handles			\
 	insert_task_dyn_handles			\
 	insert_task_node_choice			\
 	insert_task_node_choice			\
 	insert_task_owner			\
 	insert_task_owner			\
@@ -108,52 +113,47 @@ starpu_mpi_TESTS +=				\
 	insert_task_owner_data			\
 	insert_task_owner_data			\
 	matrix					\
 	matrix					\
 	matrix2					\
 	matrix2					\
+	mpi_barrier				\
 	mpi_detached_tag			\
 	mpi_detached_tag			\
+	mpi_earlyrecv				\
+	mpi_irecv				\
 	mpi_irecv_detached			\
 	mpi_irecv_detached			\
+	mpi_isend				\
 	mpi_isend_detached			\
 	mpi_isend_detached			\
 	mpi_reduction				\
 	mpi_reduction				\
+	mpi_redux				\
 	mpi_scatter_gather			\
 	mpi_scatter_gather			\
+	mpi_test				\
+	multiple_send				\
+	pingpong				\
 	policy_register				\
 	policy_register				\
 	policy_register_many			\
 	policy_register_many			\
 	policy_selection			\
 	policy_selection			\
 	policy_selection2			\
 	policy_selection2			\
+	ring					\
+	ring_async				\
 	ring_async_implicit			\
 	ring_async_implicit			\
+	ring_sync				\
+	ring_sync_detached			\
 	temporary				\
 	temporary				\
-	early_stuff
+	user_defined_datatype			\
+	early_stuff				\
+	sendrecv_bench
 
 
 if !STARPU_SIMGRID
 if !STARPU_SIMGRID
+# missing support in simgrid
 starpu_mpi_TESTS +=				\
 starpu_mpi_TESTS +=				\
 	attr					\
 	attr					\
-	broadcast				\
-	pingpong				\
-	mpi_test				\
-	mpi_isend				\
-	mpi_earlyrecv				\
 	mpi_earlyrecv2				\
 	mpi_earlyrecv2				\
 	mpi_earlyrecv2_sync			\
 	mpi_earlyrecv2_sync			\
-	mpi_irecv				\
-	mpi_barrier				\
-	mpi_redux				\
-	ring					\
-	ring_sync				\
-	ring_sync_detached			\
-	ring_async				\
 	block_interface				\
 	block_interface				\
 	block_interface_pinned			\
 	block_interface_pinned			\
-	matrix2					\
 	insert_task_compute			\
 	insert_task_compute			\
 	insert_task_sent_cache			\
 	insert_task_sent_cache			\
 	insert_task_recv_cache			\
 	insert_task_recv_cache			\
-	insert_task_count			\
 	insert_task_seq				\
 	insert_task_seq				\
-	multiple_send				\
-	user_defined_datatype			\
 	tags_checking				\
 	tags_checking				\
-	sync					\
-	gather					\
-	gather2					\
-	driver					\
-	sendrecv_bench
+	sync
 
 
 if STARPU_USE_MPI_MPI
 if STARPU_USE_MPI_MPI
 starpu_mpi_TESTS +=				\
 starpu_mpi_TESTS +=				\

+ 4 - 1
mpi/tests/driver.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2017,2018                                CNRS
  * Copyright (C) 2017,2018                                CNRS
- * Copyright (C) 2018                                     Université de Bordeaux
+ * Copyright (C) 2018,2020                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -100,6 +100,9 @@ int main(int argc, char **argv)
 			}
 			}
 		}
 		}
 		finished = request[0] == NULL && request[1] == NULL;
 		finished = request[0] == NULL && request[1] == NULL;
+#ifdef STARPU_SIMGRID
+		starpu_sleep(0.001);
+#endif
 	}
 	}
 
 
 	if (rank%2 == 0)
 	if (rank%2 == 0)

+ 4 - 1
mpi/tests/mpi_earlyrecv.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2015,2017                           CNRS
  * Copyright (C) 2010-2015,2017                           CNRS
- * Copyright (C) 2009,2010,2014,2015,2017,2018            Université de Bordeaux
+ * Copyright (C) 2009,2010,2014,2015,2017,2018,2020       Université de Bordeaux
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2013                                     Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -98,6 +98,9 @@ int main(int argc, char **argv)
 			}
 			}
 		}
 		}
 		finished = request[0] == NULL && request[1] == NULL;
 		finished = request[0] == NULL && request[1] == NULL;
+#ifdef STARPU_SIMGRID
+		starpu_sleep(0.001);
+#endif
 	}
 	}
 
 
 	if (rank%2 == 0)
 	if (rank%2 == 0)

+ 2 - 2
mpi/tests/mpi_earlyrecv2.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009,2010,2014,2015,2017,2018            Université de Bordeaux
+ * Copyright (C) 2009,2010,2014,2015,2017,2018,2020       Université de Bordeaux
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2013                                     Inria
  *
  *
@@ -73,7 +73,7 @@ int exchange(int rank, starpu_data_handle_t *handles, check_func func, int detac
 		}
 		}
 
 
 		// We sleep to make sure that the data for the tag 9 will be received before the recv is posted
 		// We sleep to make sure that the data for the tag 9 will be received before the recv is posted
-		usleep(2000000);
+		starpu_sleep(2);
 		for(i=1 ; i<NB ; i++)
 		for(i=1 ; i<NB ; i++)
 		{
 		{
 			if (detached)
 			if (detached)

+ 2 - 2
mpi/tests/mpi_earlyrecv2_sync.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009,2010,2015,2018                      Université de Bordeaux
+ * Copyright (C) 2009,2010,2015,2018,2020                 Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -65,7 +65,7 @@ int exchange(int rank, starpu_data_handle_t *handles, check_func func)
 		STARPU_ASSERT(req[1] != NULL);
 		STARPU_ASSERT(req[1] != NULL);
 
 
 		// We sleep to make sure that the data for the tag 8 and the tag 9 will be received before the recv are posted
 		// We sleep to make sure that the data for the tag 8 and the tag 9 will be received before the recv are posted
-		usleep(2000000);
+		starpu_sleep(2);
 		for(i=2 ; i<NB ; i++)
 		for(i=2 ; i<NB ; i++)
 		{
 		{
 			starpu_mpi_irecv(handles[i], &req[i], other_rank, i, MPI_COMM_WORLD);
 			starpu_mpi_irecv(handles[i], &req[i], other_rank, i, MPI_COMM_WORLD);

+ 4 - 1
mpi/tests/mpi_test.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010,2011,2014,2015,2017,2018            Université de Bordeaux
+ * Copyright (C) 2010,2011,2014,2015,2017,2018,2020       Université de Bordeaux
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  *
  *
@@ -79,6 +79,9 @@ int main(int argc, char **argv)
 		{
 		{
 			MPI_Status status;
 			MPI_Status status;
 			starpu_mpi_test(&req, &finished, &status);
 			starpu_mpi_test(&req, &finished, &status);
+#ifdef STARPU_SIMGRID
+			starpu_sleep(0.001);
+#endif
 		}
 		}
 		while (!finished);
 		while (!finished);
 	}
 	}

+ 4 - 1
mpi/tests/multiple_send.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2011-2013,2015,2017                      CNRS
  * Copyright (C) 2011-2013,2015,2017                      CNRS
- * Copyright (C) 2011,2015,2018                           Université de Bordeaux
+ * Copyright (C) 2011,2015,2018,2020                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -80,6 +80,9 @@ int main(int argc, char **argv)
 					}
 					}
 				}
 				}
 			}
 			}
+#ifdef STARPU_SIMGRID
+			starpu_sleep(0.001);
+#endif
 		}
 		}
 	}
 	}
 	FPRINTF(stderr, "[%d] All requests finished\n", rank);
 	FPRINTF(stderr, "[%d] All requests finished\n", rank);

+ 4 - 4
mpi/tests/pingpong.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2011,2014,2015,2017,2018            Université de Bordeaux
+ * Copyright (C) 2009-2011,2014,2015,2017,2018,2020       Université de Bordeaux
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  *
  *
@@ -153,7 +153,7 @@ int main(int argc, char **argv)
 				starpu_mpi_recv(tab_handle, other_rank, loop, MPI_COMM_WORLD, &status);
 				starpu_mpi_recv(tab_handle, other_rank, loop, MPI_COMM_WORLD, &status);
 			}
 			}
 
 
-			usleep(sleep_time * 1000);
+			starpu_sleep(sleep_time / 1000);
 		}
 		}
 	}
 	}
 	else // broadcasts
 	else // broadcasts
@@ -168,7 +168,7 @@ int main(int argc, char **argv)
 					if (r != rank)
 					if (r != rank)
 					{
 					{
 						starpu_mpi_send(tab_handle, r, (r * niter) + loop, MPI_COMM_WORLD);
 						starpu_mpi_send(tab_handle, r, (r * niter) + loop, MPI_COMM_WORLD);
-						usleep(sleep_time * 1000);
+						starpu_sleep(sleep_time / 1000);
 					}
 					}
 				}
 				}
 			}
 			}
@@ -178,7 +178,7 @@ int main(int argc, char **argv)
 				starpu_mpi_recv(tab_handle, sender, (rank * niter) + loop, MPI_COMM_WORLD, &status);
 				starpu_mpi_recv(tab_handle, sender, (rank * niter) + loop, MPI_COMM_WORLD, &status);
 
 
 				for (int r = 0; r < (size-1); r++)
 				for (int r = 0; r < (size-1); r++)
-					usleep(sleep_time * 1000);
+					starpu_sleep(sleep_time / 1000);
 			}
 			}
 		}
 		}
 	}
 	}

+ 10 - 1
mpi/tests/sendrecv_bench.c

@@ -95,7 +95,16 @@ int main(int argc, char **argv)
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize);
 
 
-	STARPU_ASSERT_MSG(worldsize == 2, "We need two prcesses.");
+	if (worldsize != 2)
+	{
+		if (rank == 0)
+			FPRINTF(stderr, "We need 2 processes.\n");
+
+		starpu_mpi_shutdown();
+		if (!mpi_init)
+			MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
+	}
 
 
 
 
 	if (rank == 0)
 	if (rank == 0)

+ 2 - 1
src/common/fxt.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012,2013,2015                           Inria
  * Copyright (C) 2012,2013,2015                           Inria
- * Copyright (C) 2008-2019                                Université de Bordeaux
+ * Copyright (C) 2008-2020                                Université de Bordeaux
  * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2010-2018                                CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -19,6 +19,7 @@
 #include <starpu.h>
 #include <starpu.h>
 #include <common/config.h>
 #include <common/config.h>
 #include <common/utils.h>
 #include <common/utils.h>
+#include <core/simgrid.h>
 #include <starpu_util.h>
 #include <starpu_util.h>
 #include <starpu_profiling.h>
 #include <starpu_profiling.h>
 
 

+ 2 - 2
src/common/graph.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2016,2017                                CNRS
  * Copyright (C) 2016,2017                                CNRS
  * Copyright (C) 2017                                     Inria
  * Copyright (C) 2017                                     Inria
- * Copyright (C) 2016-2018                                Université de Bordeaux
+ * Copyright (C) 2016-2018,2020                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -140,7 +140,7 @@ void _starpu_graph_add_job(struct _starpu_job *job)
 	_STARPU_CALLOC(node, 1, sizeof(*node));
 	_STARPU_CALLOC(node, 1, sizeof(*node));
 	node->job = job;
 	node->job = job;
 	job->graph_node = node;
 	job->graph_node = node;
-	STARPU_PTHREAD_MUTEX_INIT(&node->mutex, NULL);
+	STARPU_PTHREAD_MUTEX_INIT0(&node->mutex, NULL);
 
 
 	_starpu_graph_wrlock();
 	_starpu_graph_wrlock();
 
 

+ 7 - 2
src/common/list.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2008-2018                                Université de Bordeaux
+ * Copyright (C) 2008-2018,2020                           Université de Bordeaux
  * Copyright (C) 2010-2012,2015-2018                      CNRS
  * Copyright (C) 2010-2012,2015-2018                      CNRS
  * Copyright (C) 2017                                     Inria
  * Copyright (C) 2017                                     Inria
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
@@ -51,6 +51,9 @@
  *   * Initializes a list (initially empty)
  *   * Initializes a list (initially empty)
  *   void		FOO_list_init(struct FOO_list*);
  *   void		FOO_list_init(struct FOO_list*);
  *
  *
+ *   * Initializes a list (initially empty), assuming that the content of FOO_list was already zeroed
+ *   void		FOO_list_init0(struct FOO_list*);
+ *
  *   * Suppresses a liste
  *   * Suppresses a liste
  *   void		FOO_list_delete(struct FOO_list*);
  *   void		FOO_list_delete(struct FOO_list*);
  *
  *
@@ -225,7 +228,9 @@
   /** @internal */LIST_INLINE struct ENAME *ENAME##_list_back(const struct ENAME##_list *l) \
   /** @internal */LIST_INLINE struct ENAME *ENAME##_list_back(const struct ENAME##_list *l) \
     { return l->_tail; } \
     { return l->_tail; } \
   /** @internal */LIST_INLINE void ENAME##_list_init(struct ENAME##_list *l) \
   /** @internal */LIST_INLINE void ENAME##_list_init(struct ENAME##_list *l) \
-    { l->_head=NULL; l->_tail=l->_head; } \
+    { l->_head=NULL; l->_tail=NULL; } \
+  /** @internal */LIST_INLINE void ENAME##_list_init0(struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \
+    { } \
   /** @internal */LIST_INLINE struct ENAME##_list *ENAME##_list_new(void) \
   /** @internal */LIST_INLINE struct ENAME##_list *ENAME##_list_new(void) \
     { struct ENAME##_list *l; _STARPU_MALLOC(l, sizeof(struct ENAME##_list)); \
     { struct ENAME##_list *l; _STARPU_MALLOC(l, sizeof(struct ENAME##_list)); \
       ENAME##_list_init(l); return l; } \
       ENAME##_list_init(l); return l; } \

+ 14 - 4
src/common/prio_list.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2017,2018                                Inria
  * Copyright (C) 2017,2018                                Inria
  * Copyright (C) 2016,2017                                CNRS
  * Copyright (C) 2016,2017                                CNRS
- * Copyright (C) 2015-2017,2019                           Université de Bordeaux
+ * Copyright (C) 2015-2017,2019-2020                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -37,6 +37,9 @@
  * * Initialize a new priority list
  * * Initialize a new priority list
  * void FOO_prio_list_init(struct FOO_prio_list*)
  * void FOO_prio_list_init(struct FOO_prio_list*)
  *
  *
+ * * Initialize a new priority list, assuming that the content of FOO_prio_list was already zeroed
+ * void FOO_prio_list_init0(struct FOO_prio_list*)
+ *
  * * Free an empty priority list
  * * Free an empty priority list
  * void FOO_prio_list_deinit(struct FOO_prio_list*)
  * void FOO_prio_list_deinit(struct FOO_prio_list*)
  *
  *
@@ -152,6 +155,11 @@
 		starpu_rbtree_init(&priolist->tree); \
 		starpu_rbtree_init(&priolist->tree); \
 		priolist->empty = 1; \
 		priolist->empty = 1; \
 	} \
 	} \
+	PRIO_LIST_INLINE void ENAME##_prio_list_init0(struct ENAME##_prio_list *priolist) \
+	{ \
+		starpu_rbtree_init0(&priolist->tree); \
+		priolist->empty = 1; \
+	} \
 	PRIO_LIST_INLINE void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \
 	PRIO_LIST_INLINE void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \
 	{ \
 	{ \
 		if (starpu_rbtree_empty(&priolist->tree)) \
 		if (starpu_rbtree_empty(&priolist->tree)) \
@@ -183,10 +191,10 @@
 		if (node) \
 		if (node) \
 			stage = ENAME##_node_to_list_stage(node); \
 			stage = ENAME##_node_to_list_stage(node); \
 		else { \
 		else { \
-			_STARPU_MALLOC(stage, sizeof(*stage));	\
-			starpu_rbtree_node_init(&stage->node); \
+			_STARPU_CALLOC(stage, 1, sizeof(*stage));	\
+			starpu_rbtree_node_init0(&stage->node); \
 			stage->prio = prio; \
 			stage->prio = prio; \
-			ENAME##_list_init(&stage->list); \
+			ENAME##_list_init0(&stage->list); \
 			starpu_rbtree_insert_slot(&priolist->tree, slot, &stage->node); \
 			starpu_rbtree_insert_slot(&priolist->tree, slot, &stage->node); \
 		} \
 		} \
 		return stage; \
 		return stage; \
@@ -469,6 +477,8 @@
 	struct ENAME##_prio_list { struct ENAME##_list list; }; \
 	struct ENAME##_prio_list { struct ENAME##_list list; }; \
 	PRIO_LIST_INLINE void ENAME##_prio_list_init(struct ENAME##_prio_list *priolist) \
 	PRIO_LIST_INLINE void ENAME##_prio_list_init(struct ENAME##_prio_list *priolist) \
 	{ ENAME##_list_init(&(priolist)->list); } \
 	{ ENAME##_list_init(&(priolist)->list); } \
+	PRIO_LIST_INLINE void ENAME##_prio_list_init0(struct ENAME##_prio_list *priolist) \
+	{ ENAME##_list_init0(&(priolist)->list); } \
 	PRIO_LIST_INLINE void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \
 	PRIO_LIST_INLINE void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \
 	{ (void) (priolist); /* ENAME##_list_deinit(&(priolist)->list); */ } \
 	{ (void) (priolist); /* ENAME##_list_deinit(&(priolist)->list); */ } \
 	PRIO_LIST_INLINE void ENAME##_prio_list_push_back(struct ENAME##_prio_list *priolist, struct ENAME *e) \
 	PRIO_LIST_INLINE void ENAME##_prio_list_push_back(struct ENAME##_prio_list *priolist, struct ENAME *e) \

+ 21 - 0
src/common/rbtree.h

@@ -34,6 +34,8 @@
 #include <stdint.h>
 #include <stdint.h>
 #include <sys/types.h>
 #include <sys/types.h>
 
 
+#include <starpu_util.h>
+
 #define MACRO_BEGIN ({
 #define MACRO_BEGIN ({
 #define MACRO_END })
 #define MACRO_END })
 /*
 /*
@@ -68,6 +70,13 @@ static inline void starpu_rbtree_init(struct starpu_rbtree *tree)
 }
 }
 
 
 /*
 /*
+ * This version assumes that the content of tree was already zeroed
+ */
+static inline void starpu_rbtree_init0(struct starpu_rbtree *tree STARPU_ATTRIBUTE_UNUSED)
+{
+}
+
+/*
  * Initialize a node.
  * Initialize a node.
  *
  *
  * A node is in no tree when its parent points to itself.
  * A node is in no tree when its parent points to itself.
@@ -82,6 +91,18 @@ static inline void starpu_rbtree_node_init(struct starpu_rbtree_node *node)
 }
 }
 
 
 /*
 /*
+ * This version assumes that the content of node was already zeroed
+ */
+static inline void starpu_rbtree_node_init0(struct starpu_rbtree_node *node)
+{
+    assert(starpu_rbtree_check_alignment(node));
+
+    node->parent = (uintptr_t)node | STARPU_RBTREE_COLOR_RED;
+    //node->children[STARPU_RBTREE_LEFT] = NULL;
+    //node->children[STARPU_RBTREE_RIGHT] = NULL;
+}
+
+/*
  * Return true if node is in no tree.
  * Return true if node is in no tree.
  */
  */
 static inline int starpu_rbtree_node_unlinked(const struct starpu_rbtree_node *node)
 static inline int starpu_rbtree_node_unlinked(const struct starpu_rbtree_node *node)

+ 6 - 3
src/common/thread.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2013,2015,2017                           Inria
  * Copyright (C) 2013,2015,2017                           Inria
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2010,2012-2019                           Université de Bordeaux
+ * Copyright (C) 2010,2012-2020                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -29,7 +29,10 @@
 #include <limits.h>
 #include <limits.h>
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
-#ifdef STARPU_HAVE_XBT_SYNCHRO_H
+#ifdef STARPU_HAVE_SIMGRID_MUTEX_H
+#include <simgrid/mutex.h>
+#include <simgrid/cond.h>
+#elif defined(STARPU_HAVE_XBT_SYNCHRO_H)
 #include <xbt/synchro.h>
 #include <xbt/synchro.h>
 #else
 #else
 #include <xbt/synchro_core.h>
 #include <xbt/synchro_core.h>
@@ -72,7 +75,7 @@ starpu_pthread_t starpu_pthread_self(void)
 #endif
 #endif
 }
 }
 
 
-int starpu_pthread_create_on(char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr STARPU_ATTRIBUTE_UNUSED, void *(*start_routine) (void *), void *arg, starpu_sg_host_t host)
+int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr STARPU_ATTRIBUTE_UNUSED, void *(*start_routine) (void *), void *arg, starpu_sg_host_t host)
 {
 {
 	char **_args;
 	char **_args;
 	_STARPU_MALLOC(_args, 3*sizeof(char*));
 	_STARPU_MALLOC(_args, 3*sizeof(char*));

+ 10 - 10
src/core/dependencies/cg.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
- * Copyright (C) 2010-2012,2014-2018                      Université de Bordeaux
+ * Copyright (C) 2010-2012,2014-2018,2020                 Université de Bordeaux
  * Copyright (C) 2010-2013,2015-2018                      CNRS
  * Copyright (C) 2010-2013,2015-2018                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -24,23 +24,23 @@
 #include <core/dependencies/cg.h>
 #include <core/dependencies/cg.h>
 #include <core/dependencies/tags.h>
 #include <core/dependencies/tags.h>
 
 
-void _starpu_cg_list_init(struct _starpu_cg_list *list)
+void _starpu_cg_list_init0(struct _starpu_cg_list *list)
 {
 {
 	_starpu_spin_init(&list->lock);
 	_starpu_spin_init(&list->lock);
-	list->ndeps = 0;
-	list->ndeps_completed = 0;
+	//list->ndeps = 0;
+	//list->ndeps_completed = 0;
 #ifdef STARPU_DEBUG
 #ifdef STARPU_DEBUG
-	list->deps = NULL;
-	list->done = NULL;
+	//list->deps = NULL;
+	//list->done = NULL;
 #endif
 #endif
 
 
-	list->terminated = 0;
+	//list->terminated = 0;
 
 
-	list->nsuccs = 0;
+	//list->nsuccs = 0;
 #ifdef STARPU_DYNAMIC_DEPS_SIZE
 #ifdef STARPU_DYNAMIC_DEPS_SIZE
 	/* this is a small initial default value ... may be changed */
 	/* this is a small initial default value ... may be changed */
-	list->succ_list_size = 0;
-	list->succ = NULL;
+	//list->succ_list_size = 0;
+	//list->succ = NULL;
 #endif
 #endif
 }
 }
 
 

+ 2 - 2
src/core/dependencies/cg.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2018                                Université de Bordeaux
+ * Copyright (C) 2010-2018,2020                           Université de Bordeaux
  * Copyright (C) 2010,2011,2013,2015,2017                 CNRS
  * Copyright (C) 2010,2011,2013,2015,2017                 CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -114,7 +114,7 @@ void _starpu_notify_dependencies(struct _starpu_job *j);
 void _starpu_job_notify_start(struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch);
 void _starpu_job_notify_start(struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch);
 void _starpu_job_notify_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data);
 void _starpu_job_notify_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data);
 
 
-void _starpu_cg_list_init(struct _starpu_cg_list *list);
+void _starpu_cg_list_init0(struct _starpu_cg_list *list);
 void _starpu_cg_list_deinit(struct _starpu_cg_list *list);
 void _starpu_cg_list_deinit(struct _starpu_cg_list *list);
 int _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct _starpu_cg *cg);
 int _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct _starpu_cg *cg);
 int _starpu_list_task_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]);
 int _starpu_list_task_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]);

+ 44 - 16
src/core/dependencies/implicit_data_deps.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011,2012,2016                           Inria
  * Copyright (C) 2011,2012,2016                           Inria
- * Copyright (C) 2010-2019                                Université de Bordeaux
+ * Copyright (C) 2010-2020                                Université de Bordeaux
  * Copyright (C) 2010-2013,2015-2018                      CNRS
  * Copyright (C) 2010-2013,2015-2018                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -49,8 +49,8 @@ static void _starpu_add_dependency(starpu_data_handle_t handle, struct starpu_ta
 	_starpu_add_ghost_dependency(handle, _starpu_get_job_associated_to_task(previous)->job_id, next);
 	_starpu_add_ghost_dependency(handle, _starpu_get_job_associated_to_task(previous)->job_id, next);
 }
 }
 
 
-/* Add pre_sync_task as new accessor among the existing ones, making it depend on the last synchronization task if any.  */
-static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot)
+/* Add post_sync_task as new accessor among the existing ones, making pre_sync_task depend on the last synchronization task if any.  */
+static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot)
 {
 {
 	/* Add this task to the list of readers */
 	/* Add this task to the list of readers */
 	STARPU_ASSERT(!post_sync_task_dependency_slot->prev);
 	STARPU_ASSERT(!post_sync_task_dependency_slot->prev);
@@ -64,6 +64,7 @@ static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task
 	/* This task depends on the previous synchronization task if any */
 	/* This task depends on the previous synchronization task if any */
 	if (handle->last_sync_task && handle->last_sync_task != post_sync_task)
 	if (handle->last_sync_task && handle->last_sync_task != post_sync_task)
 	{
 	{
+		*submit_pre_sync= 1;
 		struct starpu_task *task_array[1] = {handle->last_sync_task};
 		struct starpu_task *task_array[1] = {handle->last_sync_task};
 		_starpu_task_declare_deps_array(pre_sync_task, 1, task_array, 0);
 		_starpu_task_declare_deps_array(pre_sync_task, 1, task_array, 0);
 		_starpu_add_dependency(handle, handle->last_sync_task, pre_sync_task);
 		_starpu_add_dependency(handle, handle->last_sync_task, pre_sync_task);
@@ -93,7 +94,7 @@ static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task
 		_STARPU_DEP_DEBUG("dep ID%lu -> %p\n", handle->last_submitted_ghost_sync_id, pre_sync_task);
 		_STARPU_DEP_DEBUG("dep ID%lu -> %p\n", handle->last_submitted_ghost_sync_id, pre_sync_task);
 	}
 	}
 
 
-	if (!pre_sync_task->cl)
+	if (*submit_pre_sync && !pre_sync_task->cl)
 	{
 	{
 		/* Add a reference to be released in _starpu_handle_job_termination */
 		/* Add a reference to be released in _starpu_handle_job_termination */
 		_starpu_spin_lock(&handle->header_lock);
 		_starpu_spin_lock(&handle->header_lock);
@@ -202,7 +203,14 @@ static void _starpu_add_sync_task(starpu_data_handle_t handle, struct starpu_tas
  * */
  * */
 /* NB : handle->sequential_consistency_mutex must be hold by the caller;
 /* NB : handle->sequential_consistency_mutex must be hold by the caller;
  * returns a task, to be submitted after releasing that mutex. */
  * returns a task, to be submitted after releasing that mutex. */
-struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot,
+/* *submit_pre_sync is whether the pre_sync_task will be submitted or not. The
+ * caller should set it to 1 if it intends to submit it anyway, or to 0
+ * if it may not submit it (because it has no other use for the task than
+ * synchronization). In the latter case,
+ * _starpu_detect_implicit_data_deps_with_handle will set it to 1 in case the
+ * task really needs to be submitted, or leave it to 0 if there is nothing to be
+ * waited for anyway. */
+struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot,
 								  starpu_data_handle_t handle, enum starpu_data_access_mode mode, unsigned task_handle_sequential_consistency)
 								  starpu_data_handle_t handle, enum starpu_data_access_mode mode, unsigned task_handle_sequential_consistency)
 {
 {
 	struct starpu_task *task = NULL;
 	struct starpu_task *task = NULL;
@@ -228,8 +236,14 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 
 
 		/* Skip tasks that are associated to a reduction phase so that
 		/* Skip tasks that are associated to a reduction phase so that
 		 * they do not interfere with the application. */
 		 * they do not interfere with the application. */
-		if (pre_sync_job->reduction_task || post_sync_job->reduction_task)
+		if (pre_sync_job->reduction_task) {
+			*submit_pre_sync = 1;
 			return NULL;
 			return NULL;
+		}
+		if (post_sync_job->reduction_task) {
+			*submit_pre_sync = 0;
+			return NULL;
+		}
 
 
 		/* In case we are generating the DAG, we add an implicit
 		/* In case we are generating the DAG, we add an implicit
 		 * dependency between the pre and the post sync tasks in case
 		 * dependency between the pre and the post sync tasks in case
@@ -264,7 +278,9 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 		{
 		{
 			_STARPU_DEP_DEBUG("concurrently\n");
 			_STARPU_DEP_DEBUG("concurrently\n");
 			/* Can access concurrently with current tasks */
 			/* Can access concurrently with current tasks */
-			_starpu_add_accessor(handle, pre_sync_task, post_sync_task, post_sync_task_dependency_slot);
+			if (handle->last_sync_task != NULL)
+				*submit_pre_sync = 1;
+			_starpu_add_accessor(handle, pre_sync_task, submit_pre_sync, post_sync_task, post_sync_task_dependency_slot);
 		}
 		}
 		else
 		else
 		{
 		{
@@ -277,6 +293,7 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 					|| (l != &handle->last_submitted_accessors && handle->last_submitted_ghost_accessors_id))
 					|| (l != &handle->last_submitted_accessors && handle->last_submitted_ghost_accessors_id))
 			{
 			{
 				/* Several previous accessors */
 				/* Several previous accessors */
+				*submit_pre_sync = 1;
 
 
 				if (mode == STARPU_W)
 				if (mode == STARPU_W)
 				{
 				{
@@ -308,7 +325,7 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 					/* Make this task wait for the previous ones */
 					/* Make this task wait for the previous ones */
 					_starpu_add_sync_task(handle, sync_task, sync_task, post_sync_task);
 					_starpu_add_sync_task(handle, sync_task, sync_task, post_sync_task);
 					/* And the requested task wait for this one */
 					/* And the requested task wait for this one */
-					_starpu_add_accessor(handle, pre_sync_task, post_sync_task, post_sync_task_dependency_slot);
+					_starpu_add_accessor(handle, pre_sync_task, submit_pre_sync, post_sync_task, post_sync_task_dependency_slot);
 
 
 					task = sync_task;
 					task = sync_task;
 				}
 				}
@@ -321,6 +338,7 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 				{
 				{
 					/* One accessor, make it the sync task,
 					/* One accessor, make it the sync task,
 					 * and start depending on it. */
 					 * and start depending on it. */
+					*submit_pre_sync = 1;
 					_STARPU_DEP_DEBUG("One previous accessor, depending on it\n");
 					_STARPU_DEP_DEBUG("One previous accessor, depending on it\n");
 					handle->last_sync_task = l->task;
 					handle->last_sync_task = l->task;
 					l->next = NULL;
 					l->next = NULL;
@@ -343,10 +361,12 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 				{
 				{
 					_STARPU_DEP_DEBUG("No previous accessor, no dependency\n");
 					_STARPU_DEP_DEBUG("No previous accessor, no dependency\n");
 				}
 				}
-				_starpu_add_accessor(handle, pre_sync_task, post_sync_task, post_sync_task_dependency_slot);
+				_starpu_add_accessor(handle, pre_sync_task, submit_pre_sync, post_sync_task, post_sync_task_dependency_slot);
 			}
 			}
 		}
 		}
 		handle->last_submitted_mode = mode;
 		handle->last_submitted_mode = mode;
+	} else {
+		*submit_pre_sync = 0;
 	}
 	}
         _STARPU_LOG_OUT();
         _STARPU_LOG_OUT();
 	return task;
 	return task;
@@ -423,9 +443,10 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 		STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
 		STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
 		unsigned index = descrs[buffer].index;
 		unsigned index = descrs[buffer].index;
 		unsigned task_handle_sequential_consistency = task->handles_sequential_consistency ? task->handles_sequential_consistency[index] : handle->sequential_consistency;
 		unsigned task_handle_sequential_consistency = task->handles_sequential_consistency ? task->handles_sequential_consistency[index] : handle->sequential_consistency;
+		int submit_pre_sync = 1;
 		if (!task_handle_sequential_consistency)
 		if (!task_handle_sequential_consistency)
 			j->sequential_consistency = 0;
 			j->sequential_consistency = 0;
-		new_task = _starpu_detect_implicit_data_deps_with_handle(task, task, &dep_slots[buffer], handle, mode, task_handle_sequential_consistency);
+		new_task = _starpu_detect_implicit_data_deps_with_handle(task, &submit_pre_sync, task, &dep_slots[buffer], handle, mode, task_handle_sequential_consistency);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 		if (new_task)
 		if (new_task)
 		{
 		{
@@ -631,6 +652,7 @@ int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_d
 	if (sequential_consistency)
 	if (sequential_consistency)
 	{
 	{
 		struct starpu_task *sync_task, *new_task;
 		struct starpu_task *sync_task, *new_task;
+		int submit_pre_sync = 0;
 		sync_task = starpu_task_create();
 		sync_task = starpu_task_create();
 		sync_task->name = sync_name;
 		sync_task->name = sync_name;
 		sync_task->detach = 0;
 		sync_task->detach = 0;
@@ -639,7 +661,7 @@ int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_d
 
 
 		/* It is not really a RW access, but we want to make sure that
 		/* It is not really a RW access, but we want to make sure that
 		 * all previous accesses are done */
 		 * all previous accesses are done */
-		new_task = _starpu_detect_implicit_data_deps_with_handle(sync_task, sync_task, &_starpu_get_job_associated_to_task(sync_task)->implicit_dep_slot, handle, mode, sequential_consistency);
+		new_task = _starpu_detect_implicit_data_deps_with_handle(sync_task, &submit_pre_sync, sync_task, &_starpu_get_job_associated_to_task(sync_task)->implicit_dep_slot, handle, mode, sequential_consistency);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 
 
 		if (new_task)
 		if (new_task)
@@ -648,11 +670,17 @@ int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_d
 			STARPU_ASSERT(!ret);
 			STARPU_ASSERT(!ret);
 		}
 		}
 
 
-		/* TODO detect if this is superflous */
-		int ret = _starpu_task_submit_internally(sync_task);
-		STARPU_ASSERT(!ret);
-		ret = starpu_task_wait(sync_task);
-		STARPU_ASSERT(ret == 0);
+		if (submit_pre_sync)
+		{
+			int ret = _starpu_task_submit_internally(sync_task);
+			STARPU_ASSERT(!ret);
+			ret = starpu_task_wait(sync_task);
+			STARPU_ASSERT(ret == 0);
+		}
+		else
+		{
+			starpu_task_destroy(sync_task);
+		}
 	}
 	}
 	else
 	else
 	{
 	{

+ 2 - 2
src/core/dependencies/implicit_data_deps.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2012,2014,2015,2017,2018            Université de Bordeaux
+ * Copyright (C) 2010-2012,2014,2015,2017,2018,2020       Université de Bordeaux
  * Copyright (C) 2010,2011,2013,2015,2017,2018            CNRS
  * Copyright (C) 2010,2011,2013,2015,2017,2018            CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,7 +21,7 @@
 #include <starpu.h>
 #include <starpu.h>
 #include <common/config.h>
 #include <common/config.h>
 
 
-struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot,
+struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot,
 								  starpu_data_handle_t handle, enum starpu_data_access_mode mode, unsigned task_handle_sequential_consistency);
 								  starpu_data_handle_t handle, enum starpu_data_access_mode mode, unsigned task_handle_sequential_consistency);
 int _starpu_test_implicit_data_deps_with_handle(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
 int _starpu_test_implicit_data_deps_with_handle(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
 void _starpu_detect_implicit_data_deps(struct starpu_task *task);
 void _starpu_detect_implicit_data_deps(struct starpu_task *task);

+ 6 - 6
src/core/dependencies/tags.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2008-2014,2016-2018                      Université de Bordeaux
+ * Copyright (C) 2008-2014,2016-2018,2020                 Université de Bordeaux
  * Copyright (C) 2011,2012,2016                           Inria
  * Copyright (C) 2011,2012,2016                           Inria
  * Copyright (C) 2010-2013,2015-2017,2019                 CNRS
  * Copyright (C) 2010-2013,2015-2017,2019                 CNRS
  *
  *
@@ -76,16 +76,16 @@ static struct _starpu_cg *create_cg_tag(unsigned ntags, struct _starpu_tag *tag)
 static struct _starpu_tag *_starpu_tag_init(starpu_tag_t id)
 static struct _starpu_tag *_starpu_tag_init(starpu_tag_t id)
 {
 {
 	struct _starpu_tag *tag;
 	struct _starpu_tag *tag;
-	_STARPU_MALLOC(tag, sizeof(struct _starpu_tag));
+	_STARPU_CALLOC(tag, 1, sizeof(struct _starpu_tag));
 
 
-	tag->job = NULL;
-	tag->is_assigned = 0;
-	tag->is_submitted = 0;
+	//tag->job = NULL;
+	//tag->is_assigned = 0;
+	//tag->is_submitted = 0;
 
 
 	tag->id = id;
 	tag->id = id;
 	tag->state = STARPU_INVALID_STATE;
 	tag->state = STARPU_INVALID_STATE;
 
 
-	_starpu_cg_list_init(&tag->tag_successors);
+	_starpu_cg_list_init0(&tag->tag_successors);
 
 
 	_starpu_spin_init(&tag->lock);
 	_starpu_spin_init(&tag->lock);
 
 

+ 5 - 7
src/core/jobs.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011-2017                                Inria
  * Copyright (C) 2011-2017                                Inria
- * Copyright (C) 2008-2019                                Université de Bordeaux
+ * Copyright (C) 2008-2020                                Université de Bordeaux
  * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2011                                     Télécom-SudParis
@@ -70,11 +70,9 @@ struct _starpu_job* STARPU_ATTRIBUTE_MALLOC _starpu_job_create(struct starpu_tas
 	struct _starpu_job *job;
 	struct _starpu_job *job;
         _STARPU_LOG_IN();
         _STARPU_LOG_IN();
 
 
-	_STARPU_MALLOC(job, sizeof(*job));
-
 	/* As most of the fields must be initialized at NULL, let's put 0
 	/* As most of the fields must be initialized at NULL, let's put 0
 	 * everywhere */
 	 * everywhere */
-	memset(job, 0, sizeof(*job));
+	_STARPU_CALLOC(job, 1, sizeof(*job));
 
 
 	if (task->dyn_handles)
 	if (task->dyn_handles)
 	{
 	{
@@ -99,10 +97,10 @@ struct _starpu_job* STARPU_ATTRIBUTE_MALLOC _starpu_job_create(struct starpu_tas
 			maxnjobs = jobs;
 			maxnjobs = jobs;
 	}
 	}
 
 
-	_starpu_cg_list_init(&job->job_successors);
+	_starpu_cg_list_init0(&job->job_successors);
 
 
-	STARPU_PTHREAD_MUTEX_INIT(&job->sync_mutex, NULL);
-	STARPU_PTHREAD_COND_INIT(&job->sync_cond, NULL);
+	STARPU_PTHREAD_MUTEX_INIT0(&job->sync_mutex, NULL);
+	STARPU_PTHREAD_COND_INIT0(&job->sync_cond, NULL);
 
 
 	/* By default we have sequential tasks */
 	/* By default we have sequential tasks */
 	job->task_size = 1;
 	job->task_size = 1;

+ 8 - 8
src/core/perfmodel/perfmodel_history.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011-2014,2016,2017                      Inria
  * Copyright (C) 2011-2014,2016,2017                      Inria
- * Copyright (C) 2008-2019                                Université de Bordeaux
+ * Copyright (C) 2008-2020                                Université de Bordeaux
  * Copyright (C) 2010-2017, 2019                          CNRS
  * Copyright (C) 2010-2017, 2019                          CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2011                                     Télécom-SudParis
@@ -582,7 +582,7 @@ static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_p
 			 * good-enough estimation */
 			 * good-enough estimation */
 			STARPU_HG_DISABLE_CHECKING(entry->nsample);
 			STARPU_HG_DISABLE_CHECKING(entry->nsample);
 			STARPU_HG_DISABLE_CHECKING(entry->mean);
 			STARPU_HG_DISABLE_CHECKING(entry->mean);
-			entry->nerror = 0;
+			//entry->nerror = 0;
 		}
 		}
 
 
 		scan_history_entry(f, path, entry);
 		scan_history_entry(f, path, entry);
@@ -1886,18 +1886,18 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
 
 				/* Do not take the first measurement into account, it is very often quite bogus */
 				/* Do not take the first measurement into account, it is very often quite bogus */
 				/* TODO: it'd be good to use a better estimation heuristic, like the median, or latest n values, etc. */
 				/* TODO: it'd be good to use a better estimation heuristic, like the median, or latest n values, etc. */
-				entry->mean = 0;
-				entry->sum = 0;
+				//entry->mean = 0;
+				//entry->sum = 0;
 
 
-				entry->deviation = 0.0;
-				entry->sum2 = 0;
+				//entry->deviation = 0.0;
+				//entry->sum2 = 0;
 
 
 				entry->size = _starpu_job_get_data_size(model, arch, impl, j);
 				entry->size = _starpu_job_get_data_size(model, arch, impl, j);
 				entry->flops = j->task->flops;
 				entry->flops = j->task->flops;
 
 
 				entry->footprint = key;
 				entry->footprint = key;
-				entry->nsample = 0;
-				entry->nerror = 0;
+				//entry->nsample = 0;
+				//entry->nerror = 0;
 
 
 				insert_history_entry(entry, list, &per_arch_model->history);
 				insert_history_entry(entry, list, &per_arch_model->history);
 			}
 			}

+ 36 - 9
src/core/simgrid.c

@@ -38,6 +38,12 @@
 #ifdef STARPU_HAVE_SIMGRID_HOST_H
 #ifdef STARPU_HAVE_SIMGRID_HOST_H
 #include <simgrid/host.h>
 #include <simgrid/host.h>
 #endif
 #endif
+#ifdef STARPU_HAVE_SIMGRID_ENGINE_H
+#include <simgrid/engine.h>
+#endif
+#ifdef STARPU_HAVE_XBT_CONFIG_H
+#include <xbt/config.h>
+#endif
 #include <smpi/smpi.h>
 #include <smpi/smpi.h>
 
 
 #pragma weak starpu_main
 #pragma weak starpu_main
@@ -137,9 +143,9 @@ int _starpu_simgrid_get_nbhosts(const char *prefix)
 		char name[32];
 		char name[32];
 		STARPU_ASSERT(starpu_mpi_world_rank);
 		STARPU_ASSERT(starpu_mpi_world_rank);
 		snprintf(name, sizeof(name), STARPU_MPI_AS_PREFIX"%d", starpu_mpi_world_rank());
 		snprintf(name, sizeof(name), STARPU_MPI_AS_PREFIX"%d", starpu_mpi_world_rank());
-#if defined(HAVE_MSG_ZONE_GET_HOSTS) || defined(MSG_zone_get_hosts)
+#if defined(HAVE_MSG_ZONE_GET_HOSTS) || defined(HAVE_SG_ZONE_GET_HOSTS) || defined(MSG_zone_get_hosts) || defined(sg_zone_get_hosts)
 		hosts = xbt_dynar_new(sizeof(sg_host_t), NULL);
 		hosts = xbt_dynar_new(sizeof(sg_host_t), NULL);
-#  if defined(HAVE_SG_ZONE_GET_BY_NAME) || defined(sg_zone_get_by_name)
+#  if defined(HAVE_SG_ZONE_GET_HOSTS) || defined(sg_zone_get_hosts)
 		sg_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts);
 		sg_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts);
 #  else
 #  else
 		MSG_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts);
 		MSG_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts);
@@ -280,7 +286,11 @@ void _starpu_start_simgrid(int *argc, char **argv)
 
 
 	simgrid_started = 1;
 	simgrid_started = 1;
 
 
+#if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT)
+	simgrid_init(argc, argv);
+#else
 	MSG_init(argc, argv);
 	MSG_init(argc, argv);
+#endif
 	/* Simgrid uses tiny stacks by default.  This comes unexpected to our users.  */
 	/* Simgrid uses tiny stacks by default.  This comes unexpected to our users.  */
 	unsigned stack_size = 8192;
 	unsigned stack_size = 8192;
 #ifdef HAVE_GETRLIMIT
 #ifdef HAVE_GETRLIMIT
@@ -304,7 +314,11 @@ void _starpu_start_simgrid(int *argc, char **argv)
 #else
 #else
 	_starpu_simgrid_get_platform_path(4, path, sizeof(path));
 	_starpu_simgrid_get_platform_path(4, path, sizeof(path));
 #endif
 #endif
+#if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT)
+	simgrid_load_platform(path);
+#else
 	MSG_create_environment(path);
 	MSG_create_environment(path);
+#endif
 
 
 	simgrid_transfer_cost = starpu_get_env_number_default("STARPU_SIMGRID_TRANSFER_COST", 1);
 	simgrid_transfer_cost = starpu_get_env_number_default("STARPU_SIMGRID_TRANSFER_COST", 1);
 }
 }
@@ -378,14 +392,22 @@ int main(int argc, char **argv)
 	_starpu_simgrid_actor_create("main", &do_starpu_main, _starpu_simgrid_get_host_by_name("MAIN"), argc, argv_cpy);
 	_starpu_simgrid_actor_create("main", &do_starpu_main, _starpu_simgrid_get_host_by_name("MAIN"), argc, argv_cpy);
 
 
 	/* And run maestro in the main thread */
 	/* And run maestro in the main thread */
+#if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT)
+	simgrid_run();
+#else
 	MSG_main();
 	MSG_main();
+#endif
 	return main_ret;
 	return main_ret;
 }
 }
 
 
-#if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach)
+#if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach) || defined(HAVE_SG_ACTOR_ATTACH)
 static void maestro(void *data STARPU_ATTRIBUTE_UNUSED)
 static void maestro(void *data STARPU_ATTRIBUTE_UNUSED)
 {
 {
+#if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT)
+	simgrid_run();
+#else
 	MSG_main();
 	MSG_main();
+#endif
 }
 }
 #endif
 #endif
 
 
@@ -721,7 +743,7 @@ void _starpu_simgrid_submit_job(int workerid, struct _starpu_job *j, struct star
 
 
 /* Note: simgrid is not parallel, so there is no need to hold locks for management of transfers.  */
 /* Note: simgrid is not parallel, so there is no need to hold locks for management of transfers.  */
 LIST_TYPE(transfer,
 LIST_TYPE(transfer,
-#ifdef HAVE_SG_HOST_SEND_TO
+#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO)
 	size_t size;
 	size_t size;
 #else
 #else
 	msg_task_t task;
 	msg_task_t task;
@@ -854,15 +876,20 @@ static void *transfer_execute(void *arg)
 		if (t->last_transfer == transfer)
 		if (t->last_transfer == transfer)
 			t->last_transfer = NULL;
 			t->last_transfer = NULL;
 
 
-#ifdef HAVE_SG_HOST_SEND_TO
+#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO)
 		if (transfer->size)
 		if (transfer->size)
 #else
 #else
 		if (transfer->task)
 		if (transfer->task)
 #endif
 #endif
 		{
 		{
 			_STARPU_DEBUG("transfer %p started\n", transfer);
 			_STARPU_DEBUG("transfer %p started\n", transfer);
-#ifdef HAVE_SG_HOST_SEND_TO
-			sg_host_send_to(_starpu_simgrid_memory_node_get_host(transfer->src_node),
+#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO)
+#ifdef HAVE_SG_HOST_SENDTO
+			sg_host_sendto
+#else
+			sg_host_send_to
+#endif
+				(_starpu_simgrid_memory_node_get_host(transfer->src_node),
 					_starpu_simgrid_memory_node_get_host(transfer->dst_node),
 					_starpu_simgrid_memory_node_get_host(transfer->dst_node),
 					transfer->size);
 					transfer->size);
 #else
 #else
@@ -963,7 +990,7 @@ static void _starpu_simgrid_wait_transfers(void)
 	struct transfer *sync = transfer_new();
 	struct transfer *sync = transfer_new();
 	struct transfer *cur;
 	struct transfer *cur;
 
 
-#ifdef HAVE_SG_HOST_SEND_TO
+#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO)
 	sync->size = 0;
 	sync->size = 0;
 #else
 #else
 	sync->task = NULL;
 	sync->task = NULL;
@@ -1031,7 +1058,7 @@ int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node,
 
 
 	_STARPU_DEBUG("creating transfer %p for %lu bytes\n", transfer, (unsigned long) size);
 	_STARPU_DEBUG("creating transfer %p for %lu bytes\n", transfer, (unsigned long) size);
 
 
-#ifdef HAVE_SG_HOST_SEND_TO
+#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO)
 	transfer->size = size;
 	transfer->size = size;
 #else
 #else
 	msg_task_t task;
 	msg_task_t task;

+ 15 - 2
src/core/simgrid.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2016,2017                                Inria
  * Copyright (C) 2016,2017                                Inria
  * Copyright (C) 2013,2017                                CNRS
  * Copyright (C) 2013,2017                                CNRS
- * Copyright (C) 2012-2019                                Université de Bordeaux
+ * Copyright (C) 2012-2020                                Université de Bordeaux
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -27,10 +27,23 @@ extern "C"
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_HAVE_SIMGRID_MSG_H
 #ifdef STARPU_HAVE_SIMGRID_MSG_H
 #include <simgrid/msg.h>
 #include <simgrid/msg.h>
-#else
+#elif defined(STARPU_HAVE_MSG_MSG_H)
 #include <msg/msg.h>
 #include <msg/msg.h>
 #endif
 #endif
 
 
+#ifdef STARPU_HAVE_XBT_BASE_H
+#include <xbt/base.h>
+#endif
+#ifdef STARPU_HAVE_SIMGRID_VERSION_H
+#include <simgrid/version.h>
+#endif
+#ifdef STARPU_HAVE_SIMGRID_ZONE_H
+#include <simgrid/zone.h>
+#endif
+#ifdef STARPU_HAVE_SIMGRID_HOST_H
+#include <simgrid/host.h>
+#endif
+
 #include <xbt/xbt_os_time.h>
 #include <xbt/xbt_os_time.h>
 
 
 struct _starpu_pthread_args
 struct _starpu_pthread_args

+ 28 - 9
src/core/simgrid_cpp.cpp

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2016,2017                                CNRS
  * Copyright (C) 2016,2017                                CNRS
- * Copyright (C) 2012-2019                                Université de Bordeaux
+ * Copyright (C) 2012-2020                                Université de Bordeaux
  * Copyright (C) 2016,2017                                Inria
  * Copyright (C) 2016,2017                                Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,19 +21,12 @@
 #include <common/config.h>
 #include <common/config.h>
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
-#ifdef STARPU_HAVE_SIMGRID_MSG_H
-#include <simgrid/msg.h>
-#else
-#include <msg/msg.h>
-#endif
 #if SIMGRID_VERSION >= 32190
 #if SIMGRID_VERSION >= 32190
 #include <simgrid/simix.hpp>
 #include <simgrid/simix.hpp>
 #else
 #else
 #include <simgrid/simix.h>
 #include <simgrid/simix.h>
 #endif
 #endif
-#ifdef STARPU_HAVE_SIMGRID_HOST_H
-#include <simgrid/host.h>
-#endif
+#include <smpi/smpi.h>
 
 
 /* thread_create function which implements inheritence of MPI privatization */
 /* thread_create function which implements inheritence of MPI privatization */
 /* See https://github.com/simgrid/simgrid/issues/139 */
 /* See https://github.com/simgrid/simgrid/issues/139 */
@@ -42,9 +35,26 @@ typedef struct
 {
 {
 	void_f_pvoid_t code;
 	void_f_pvoid_t code;
 	void *userparam;
 	void *userparam;
+#if SIMGRID_VERSION < 32501
 	void *father_data;
 	void *father_data;
+#endif
 } thread_data_t;
 } thread_data_t;
 
 
+#if SIMGRID_VERSION >= 32501
+static void *_starpu_simgrid_xbt_thread_create_wrapper(void *arg)
+{
+	thread_data_t *t = (thread_data_t *) arg;
+	/* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
+	starpu_sleep(0.000001);
+#ifdef HAVE_SMPI_THREAD_CREATE
+	/* Make this actor inherit SMPI data from father actor */
+	SMPI_thread_create();
+#endif
+	t->code(t->userparam);
+	free(t);
+	return NULL;
+}
+#else
 #if SIMGRID_VERSION >= 32190
 #if SIMGRID_VERSION >= 32190
 static void _starpu_simgrid_xbt_thread_create_wrapper(void)
 static void _starpu_simgrid_xbt_thread_create_wrapper(void)
 #else
 #else
@@ -74,9 +84,17 @@ static int _starpu_simgrid_xbt_thread_create_wrapper(int argc STARPU_ATTRIBUTE_U
 	return 0;
 	return 0;
 #endif
 #endif
 }
 }
+#endif
 
 
 void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code, void *param)
 void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code, void *param)
 {
 {
+#if SIMGRID_VERSION >= 32501
+	starpu_pthread_t t;
+	thread_data_t *res = (thread_data_t *) malloc(sizeof(thread_data_t));
+	res->userparam = param;
+	res->code = code;
+	starpu_pthread_create_on(name, &t, NULL, _starpu_simgrid_xbt_thread_create_wrapper, res, sg_host_self());
+#else
 #if SIMGRID_VERSION >= 32190 || defined(HAVE_SIMCALL_PROCESS_CREATE) || defined(simcall_process_create)
 #if SIMGRID_VERSION >= 32190 || defined(HAVE_SIMCALL_PROCESS_CREATE) || defined(simcall_process_create)
 #ifdef HAVE_SMX_ACTOR_T
 #ifdef HAVE_SMX_ACTOR_T
 	smx_actor_t process STARPU_ATTRIBUTE_UNUSED;
 	smx_actor_t process STARPU_ATTRIBUTE_UNUSED;
@@ -122,6 +140,7 @@ void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code, vo
 #else
 #else
 	STARPU_ABORT_MSG("Can't run StarPU-Simgrid-MPI with a Simgrid version which does not provide simcall_process_create and does not fix https://github.com/simgrid/simgrid/issues/139 , sorry.");
 	STARPU_ABORT_MSG("Can't run StarPU-Simgrid-MPI with a Simgrid version which does not provide simcall_process_create and does not fix https://github.com/simgrid/simgrid/issues/139 , sorry.");
 #endif
 #endif
+#endif
 }
 }
 
 
 #endif
 #endif

+ 27 - 11
src/core/task.c

@@ -1,9 +1,9 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011-2019                                Inria
  * Copyright (C) 2011-2019                                Inria
- * Copyright (C) 2009-2019                                Université de Bordeaux
+ * Copyright (C) 2009-2020                                Université de Bordeaux
  * Copyright (C) 2017                                     Erwan Leria
  * Copyright (C) 2017                                     Erwan Leria
- * Copyright (C) 2010-2019                                CNRS
+ * Copyright (C) 2010-2020                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2016                                     Uppsala University
  * Copyright (C) 2016                                     Uppsala University
@@ -245,6 +245,12 @@ static int limit_max_submitted_tasks;
 static int watchdog_crash;
 static int watchdog_crash;
 static int watchdog_delay;
 static int watchdog_delay;
 
 
+/*
+ * Function to call when watchdog detects that no task has finished for more than STARPU_WATCHDOG_TIMEOUT seconds
+ */
+static void (*watchdog_hook)(void *) = NULL;
+static void * watchdog_hook_arg = NULL;
+
 #define _STARPU_TASK_MAGIC 42
 #define _STARPU_TASK_MAGIC 42
 
 
 /* Called once at starpu_init */
 /* Called once at starpu_init */
@@ -788,7 +794,7 @@ static int _starpu_task_submit_head(struct starpu_task *task)
 					  "Codelet %p has too many buffers (%d vs max %d). Either use --enable-maxbuffers configure option to increase the max, or use dyn_handles instead of handles.",
 					  "Codelet %p has too many buffers (%d vs max %d). Either use --enable-maxbuffers configure option to increase the max, or use dyn_handles instead of handles.",
 					  task->cl, STARPU_TASK_GET_NBUFFERS(task), STARPU_NMAXBUFS);
 					  task->cl, STARPU_TASK_GET_NBUFFERS(task), STARPU_NMAXBUFS);
 
 
-		if (task->dyn_handles)
+		if (STARPU_UNLIKELY(task->dyn_handles))
 		{
 		{
 			_STARPU_MALLOC(task->dyn_interfaces, nbuffers * sizeof(void *));
 			_STARPU_MALLOC(task->dyn_interfaces, nbuffers * sizeof(void *));
 		}
 		}
@@ -821,7 +827,7 @@ static int _starpu_task_submit_head(struct starpu_task *task)
 		}
 		}
 
 
 		/* Check the type of worker(s) required by the task exist */
 		/* Check the type of worker(s) required by the task exist */
-		if (!_starpu_worker_exists(task))
+		if (STARPU_UNLIKELY(!_starpu_worker_exists(task)))
 		{
 		{
 			_STARPU_LOG_OUT_TAG("ENODEV");
 			_STARPU_LOG_OUT_TAG("ENODEV");
 			return -ENODEV;
 			return -ENODEV;
@@ -830,7 +836,7 @@ static int _starpu_task_submit_head(struct starpu_task *task)
 		/* In case we require that a task should be explicitely
 		/* In case we require that a task should be explicitely
 		 * executed on a specific worker, we make sure that the worker
 		 * executed on a specific worker, we make sure that the worker
 		 * is able to execute this task.  */
 		 * is able to execute this task.  */
-		if (task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0))
+		if (STARPU_UNLIKELY(task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0)))
 		{
 		{
 			_STARPU_LOG_OUT_TAG("ENODEV");
 			_STARPU_LOG_OUT_TAG("ENODEV");
 			return -ENODEV;
 			return -ENODEV;
@@ -932,7 +938,7 @@ int _starpu_task_submit(struct starpu_task *task, int nodeps)
 			_starpu_detect_implicit_data_deps(task);
 			_starpu_detect_implicit_data_deps(task);
 	}
 	}
 
 
-	if (bundle)
+	if (STARPU_UNLIKELY(bundle))
 	{
 	{
 		/* We need to make sure that models for other tasks of the
 		/* We need to make sure that models for other tasks of the
 		 * bundle are also loaded, so the scheduler can estimate the
 		 * bundle are also loaded, so the scheduler can estimate the
@@ -967,7 +973,7 @@ int _starpu_task_submit(struct starpu_task *task, int nodeps)
 	 * dependency. */
 	 * dependency. */
 	task->status = STARPU_TASK_BLOCKED;
 	task->status = STARPU_TASK_BLOCKED;
 
 
-	if (profiling)
+	if (STARPU_UNLIKELY(profiling))
 		_starpu_clock_gettime(&info->submit_time);
 		_starpu_clock_gettime(&info->submit_time);
 
 
 	ret = _starpu_submit_job(j, nodeps);
 	ret = _starpu_submit_job(j, nodeps);
@@ -1563,14 +1569,18 @@ static void *watchdog_func(void *arg)
 		if (!config->watchdog_ok && last_nsubmitted
 		if (!config->watchdog_ok && last_nsubmitted
 				&& last_nsubmitted == starpu_task_nsubmitted())
 				&& last_nsubmitted == starpu_task_nsubmitted())
 		{
 		{
-			_STARPU_MSG("The StarPU watchdog detected that no task finished for %fs (can be configured through STARPU_WATCHDOG_TIMEOUT)\n",
-				    timeout);
+			if (watchdog_hook == NULL)
+				_STARPU_MSG("The StarPU watchdog detected that no task finished for %fs (can be configured through STARPU_WATCHDOG_TIMEOUT)\n",
+									timeout);
+			else
+				watchdog_hook(watchdog_hook_arg);
+
 			if (watchdog_crash)
 			if (watchdog_crash)
 			{
 			{
 				_STARPU_MSG("Crashing the process\n");
 				_STARPU_MSG("Crashing the process\n");
 				raise(SIGABRT);
 				raise(SIGABRT);
 			}
 			}
-			else
+			else if (watchdog_hook == NULL)
 				_STARPU_MSG("Set the STARPU_WATCHDOG_CRASH environment variable if you want to abort the process in such a case\n");
 				_STARPU_MSG("Set the STARPU_WATCHDOG_CRASH environment variable if you want to abort the process in such a case\n");
 		}
 		}
 		/* Only shout again after another period */
 		/* Only shout again after another period */
@@ -1580,7 +1590,13 @@ static void *watchdog_func(void *arg)
 	return NULL;
 	return NULL;
 }
 }
 
 
-void _starpu_watchdog_init(void)
+void starpu_task_watchdog_set_hook(void (*hook)(void *), void *hook_arg)
+{
+	watchdog_hook = hook;
+	watchdog_hook_arg = hook_arg;
+}
+
+void _starpu_watchdog_init()
 {
 {
 	struct _starpu_machine_config *config = _starpu_get_machine_config();
 	struct _starpu_machine_config *config = _starpu_get_machine_config();
 	char *timeout_env = starpu_getenv("STARPU_WATCHDOG_TIMEOUT");
 	char *timeout_env = starpu_getenv("STARPU_WATCHDOG_TIMEOUT");

+ 5 - 5
src/core/task_bundle.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011-2014                                Université de Bordeaux
+ * Copyright (C) 2011-2014, 2020                                Université de Bordeaux
  * Copyright (C) 2011,2012                                Inria
  * Copyright (C) 2011,2012                                Inria
  * Copyright (C) 2011,2013,2015-2017                      CNRS
  * Copyright (C) 2011,2013,2015-2017                      CNRS
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2011                                     Télécom-SudParis
@@ -29,15 +29,15 @@
 /* Initialize a task bundle */
 /* Initialize a task bundle */
 void starpu_task_bundle_create(starpu_task_bundle_t *bundle)
 void starpu_task_bundle_create(starpu_task_bundle_t *bundle)
 {
 {
-	_STARPU_MALLOC(*bundle, sizeof(struct _starpu_task_bundle));
+	_STARPU_CALLOC(*bundle, 1, sizeof(struct _starpu_task_bundle));
 
 
-	STARPU_PTHREAD_MUTEX_INIT(&(*bundle)->mutex, NULL);
+	STARPU_PTHREAD_MUTEX_INIT0(&(*bundle)->mutex, NULL);
 	/* Of course at the beginning a bundle is open,
 	/* Of course at the beginning a bundle is open,
 	 * user can insert and remove tasks from it */
 	 * user can insert and remove tasks from it */
-	(*bundle)->closed = 0;
+	//(*bundle)->closed = 0;
 
 
 	/* Start with an empty list */
 	/* Start with an empty list */
-	(*bundle)->list = NULL;
+	//(*bundle)->list = NULL;
 
 
 }
 }
 
 

+ 11 - 5
src/core/topology.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011-2017                                Inria
  * Copyright (C) 2011-2017                                Inria
- * Copyright (C) 2009-2019                                Université de Bordeaux
+ * Copyright (C) 2009-2020                                Université de Bordeaux
  * Copyright (C) 2010-2017, 2019                                CNRS
  * Copyright (C) 2010-2017, 2019                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2016                                     Uppsala University
  * Copyright (C) 2016                                     Uppsala University
@@ -21,6 +21,9 @@
 #include <stdlib.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdio.h>
 #include <common/config.h>
 #include <common/config.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 #include <core/workers.h>
 #include <core/workers.h>
 #include <core/debug.h>
 #include <core/debug.h>
 #include <core/topology.h>
 #include <core/topology.h>
@@ -2030,12 +2033,15 @@ int _starpu_bind_thread_on_cpu(int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid S
 			 (previous >= 0 && previous == workerid) ||
 			 (previous >= 0 && previous == workerid) ||
 			 (name && cpu_name[cpuid] && !strcmp(name, cpu_name[cpuid])) ) )
 			 (name && cpu_name[cpuid] && !strcmp(name, cpu_name[cpuid])) ) )
 		{
 		{
+			char hostname[65];
+			gethostname(hostname, sizeof(hostname));
+
 			if (previous == STARPU_ACTIVETHREAD)
 			if (previous == STARPU_ACTIVETHREAD)
-				_STARPU_DISP("Warning: active thread %s was already bound to PU %d\n", cpu_name[cpuid], cpuid);
+				_STARPU_DISP("[%s] Warning: active thread %s was already bound to PU %d\n", hostname, cpu_name[cpuid], cpuid);
 			else if (previous == STARPU_NONACTIVETHREAD)
 			else if (previous == STARPU_NONACTIVETHREAD)
-				_STARPU_DISP("Warning: non-active thread %s was already bound to PU %d\n", cpu_name[cpuid], cpuid);
+				_STARPU_DISP("[%s] Warning: non-active thread %s was already bound to PU %d\n", hostname, cpu_name[cpuid], cpuid);
 			else
 			else
-				_STARPU_DISP("Warning: worker %d was already bound to PU %d\n", previous, cpuid);
+				_STARPU_DISP("[%s] Warning: worker %d was already bound to PU %d\n", hostname, previous, cpuid);
 
 
 			if (workerid == STARPU_ACTIVETHREAD)
 			if (workerid == STARPU_ACTIVETHREAD)
 				_STARPU_DISP("and we were told to also bind active thread %s to it.\n", name);
 				_STARPU_DISP("and we were told to also bind active thread %s to it.\n", name);
@@ -2048,7 +2054,7 @@ int _starpu_bind_thread_on_cpu(int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid S
 
 
 			if (workerid >= 0)
 			if (workerid >= 0)
 				/* This shouldn't happen for workers */
 				/* This shouldn't happen for workers */
-				_STARPU_DISP("Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported %d cores and %d threads, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler?\n", config->topology.nhwcpus, config->topology.nhwpus);
+				_STARPU_DISP("[%s] Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported %d cores and %d threads, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler?\n", hostname, config->topology.nhwcpus, config->topology.nhwpus);
 			ret = -1;
 			ret = -1;
 		}
 		}
 		else
 		else

+ 2 - 2
src/core/workers.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011-2017,2019                           Inria
  * Copyright (C) 2011-2017,2019                           Inria
- * Copyright (C) 2008-2019                                Université de Bordeaux
+ * Copyright (C) 2008-2020                                Université de Bordeaux
  * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2016                                     Uppsala University
  * Copyright (C) 2016                                     Uppsala University
@@ -1202,7 +1202,7 @@ int _starpu_get_catch_signals(void);
 static inline int _starpu_perf_counter_paused(void) 
 static inline int _starpu_perf_counter_paused(void) 
 {
 {
 	STARPU_RMB();
 	STARPU_RMB();
-	return _starpu_config.perf_counter_pause_depth > 0;
+	return STARPU_UNLIKELY(_starpu_config.perf_counter_pause_depth > 0);
 }
 }
 
 
 /* @}*/
 /* @}*/

+ 49 - 38
src/datawizard/filters.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011,2012,2016,2017                      Inria
  * Copyright (C) 2011,2012,2016,2017                      Inria
- * Copyright (C) 2008-2019                                Université de Bordeaux
+ * Copyright (C) 2008-2020                                Université de Bordeaux
  * Copyright (C) 2010                                     Mehdi Juhoor
  * Copyright (C) 2010                                     Mehdi Juhoor
  * Copyright (C) 2010-2013,2015-2019                      CNRS
  * Copyright (C) 2010-2013,2015-2019                      CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
@@ -228,24 +228,29 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 		else
 		else
 			ops = initial_handle->ops;
 			ops = initial_handle->ops;
 
 
+		/* As most of the fields must be initialized at NULL, let's put
+		 * 0 everywhere */
+		memset(child, 0, sizeof(*child));
 		_starpu_data_handle_init(child, ops, initial_handle->mf_node);
 		_starpu_data_handle_init(child, ops, initial_handle->mf_node);
 
 
-		child->nchildren = 0;
-		child->nplans = 0;
-		child->switch_cl = NULL;
-		child->partitioned = 0;
-		child->readonly = 0;
+		//child->nchildren = 0;
+		//child->nplans = 0;
+		//child->switch_cl = NULL;
+		//child->partitioned = 0;
+		//child->readonly = 0;
 		child->active = inherit_state;
 		child->active = inherit_state;
-		child->active_ro = 0;
-                child->mpi_data = NULL;
+		//child->active_ro = 0;
+                //child->mpi_data = NULL;
 		child->root_handle = initial_handle->root_handle;
 		child->root_handle = initial_handle->root_handle;
 		child->father_handle = initial_handle;
 		child->father_handle = initial_handle;
-		child->active_children = NULL;
-		child->active_readonly_children = NULL;
-		child->nactive_readonly_children = 0;
+		//child->active_children = NULL;
+		//child->active_readonly_children = NULL;
+		//child->nactive_readonly_children = 0;
 		child->nsiblings = nparts;
 		child->nsiblings = nparts;
 		if (inherit_state)
 		if (inherit_state)
-			child->siblings = NULL;
+		{
+			//child->siblings = NULL;
+		}
 		else
 		else
 			child->siblings = childrenp;
 			child->siblings = childrenp;
 		child->sibling_index = i;
 		child->sibling_index = i;
@@ -258,31 +263,31 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 		/* initialize the chunk lock */
 		/* initialize the chunk lock */
 		_starpu_data_requester_prio_list_init(&child->req_list);
 		_starpu_data_requester_prio_list_init(&child->req_list);
 		_starpu_data_requester_prio_list_init(&child->reduction_req_list);
 		_starpu_data_requester_prio_list_init(&child->reduction_req_list);
-		child->reduction_tmp_handles = NULL;
-		child->write_invalidation_req = NULL;
-		child->refcnt = 0;
-		child->unlocking_reqs = 0;
-		child->busy_count = 0;
-		child->busy_waiting = 0;
-		STARPU_PTHREAD_MUTEX_INIT(&child->busy_mutex, NULL);
-		STARPU_PTHREAD_COND_INIT(&child->busy_cond, NULL);
-		child->reduction_refcnt = 0;
+		//child->reduction_tmp_handles = NULL;
+		//child->write_invalidation_req = NULL;
+		//child->refcnt = 0;
+		//child->unlocking_reqs = 0;
+		//child->busy_count = 0;
+		//child->busy_waiting = 0;
+		STARPU_PTHREAD_MUTEX_INIT0(&child->busy_mutex, NULL);
+		STARPU_PTHREAD_COND_INIT0(&child->busy_cond, NULL);
+		//child->reduction_refcnt = 0;
 		_starpu_spin_init(&child->header_lock);
 		_starpu_spin_init(&child->header_lock);
 
 
 		child->sequential_consistency = initial_handle->sequential_consistency;
 		child->sequential_consistency = initial_handle->sequential_consistency;
 		child->initialized = initial_handle->initialized;
 		child->initialized = initial_handle->initialized;
 		child->ooc = initial_handle->ooc;
 		child->ooc = initial_handle->ooc;
 
 
-		STARPU_PTHREAD_MUTEX_INIT(&child->sequential_consistency_mutex, NULL);
+		//STARPU_PTHREAD_MUTEX_INIT(&child->sequential_consistency_mutex, NULL);
 		child->last_submitted_mode = STARPU_R;
 		child->last_submitted_mode = STARPU_R;
-		child->last_sync_task = NULL;
-		child->last_submitted_accessors.task = NULL;
+		//child->last_sync_task = NULL;
+		//child->last_submitted_accessors.task = NULL;
 		child->last_submitted_accessors.next = &child->last_submitted_accessors;
 		child->last_submitted_accessors.next = &child->last_submitted_accessors;
 		child->last_submitted_accessors.prev = &child->last_submitted_accessors;
 		child->last_submitted_accessors.prev = &child->last_submitted_accessors;
-		child->post_sync_tasks = NULL;
+		//child->post_sync_tasks = NULL;
 		/* Tell helgrind that the race in _starpu_unlock_post_sync_tasks is fine */
 		/* Tell helgrind that the race in _starpu_unlock_post_sync_tasks is fine */
 		STARPU_HG_DISABLE_CHECKING(child->post_sync_tasks_cnt);
 		STARPU_HG_DISABLE_CHECKING(child->post_sync_tasks_cnt);
-		child->post_sync_tasks_cnt = 0;
+		//child->post_sync_tasks_cnt = 0;
 
 
 		/* The methods used for reduction are propagated to the
 		/* The methods used for reduction are propagated to the
 		 * children. */
 		 * children. */
@@ -290,17 +295,19 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 		child->init_cl = initial_handle->init_cl;
 		child->init_cl = initial_handle->init_cl;
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
-		child->last_submitted_ghost_sync_id_is_valid = 0;
-		child->last_submitted_ghost_sync_id = 0;
-		child->last_submitted_ghost_accessors_id = NULL;
+		//child->last_submitted_ghost_sync_id_is_valid = 0;
+		//child->last_submitted_ghost_sync_id = 0;
+		//child->last_submitted_ghost_accessors_id = NULL;
 #endif
 #endif
 
 
 		if (_starpu_global_arbiter)
 		if (_starpu_global_arbiter)
 			/* Just for testing purpose */
 			/* Just for testing purpose */
 			starpu_data_assign_arbiter(child, _starpu_global_arbiter);
 			starpu_data_assign_arbiter(child, _starpu_global_arbiter);
 		else
 		else
-			child->arbiter = NULL;
-		_starpu_data_requester_prio_list_init(&child->arbitered_req_list);
+		{
+			//child->arbiter = NULL;
+		}
+		_starpu_data_requester_prio_list_init0(&child->arbitered_req_list);
 
 
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 		{
@@ -317,16 +324,20 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 			if (inherit_state || !initial_replicate->automatically_allocated)
 			if (inherit_state || !initial_replicate->automatically_allocated)
 				child_replicate->allocated = initial_replicate->allocated;
 				child_replicate->allocated = initial_replicate->allocated;
 			else
 			else
-				child_replicate->allocated = 0;
+			{
+				//child_replicate->allocated = 0;
+			}
 			/* Do not allow memory reclaiming within the child for parent bits */
 			/* Do not allow memory reclaiming within the child for parent bits */
-			child_replicate->automatically_allocated = 0;
-			child_replicate->refcnt = 0;
+			//child_replicate->automatically_allocated = 0;
+			//child_replicate->refcnt = 0;
 			child_replicate->memory_node = node;
 			child_replicate->memory_node = node;
-			child_replicate->relaxed_coherency = 0;
+			//child_replicate->relaxed_coherency = 0;
 			if (inherit_state)
 			if (inherit_state)
 				child_replicate->initialized = initial_replicate->initialized;
 				child_replicate->initialized = initial_replicate->initialized;
 			else
 			else
-				child_replicate->initialized = 0;
+			{
+				//child_replicate->initialized = 0;
+			}
 
 
 			/* update the interface */
 			/* update the interface */
 			void *initial_interface = starpu_data_get_interface_on_node(initial_handle, node);
 			void *initial_interface = starpu_data_get_interface_on_node(initial_handle, node);
@@ -336,8 +347,8 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 			f->filter_func(initial_interface, child_interface, f, i, nparts);
 			f->filter_func(initial_interface, child_interface, f, i, nparts);
 		}
 		}
 
 
-		child->per_worker = NULL;
-		child->user_data = NULL;
+		//child->per_worker = NULL;
+		//child->user_data = NULL;
 
 
 		/* We compute the size and the footprint of the child once and
 		/* We compute the size and the footprint of the child once and
 		 * store it in the handle */
 		 * store it in the handle */

+ 2 - 1
src/datawizard/interfaces/bcsr_filters.c

@@ -87,7 +87,8 @@ void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_inte
 	if (bcsr_father->nzval)
 	if (bcsr_father->nzval)
 	{
 	{
 		uint8_t *nzval = (uint8_t *)(bcsr_father->nzval);
 		uint8_t *nzval = (uint8_t *)(bcsr_father->nzval);
-		matrix_child->ptr = (uintptr_t)&nzval[firstentry + ptr_offset];
+		matrix_child->dev_handle = matrix_child->ptr = (uintptr_t)&nzval[firstentry + ptr_offset];
+		matrix_child->offset = 0;
 	}
 	}
 }
 }
 
 

+ 53 - 51
src/datawizard/interfaces/data_interface.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011-2017                                Inria
  * Copyright (C) 2011-2017                                Inria
- * Copyright (C) 2009-2019                                Université de Bordeaux
+ * Copyright (C) 2009-2020                                Université de Bordeaux
  * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2010-2019                                CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -262,69 +262,69 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 	STARPU_ASSERT(handle);
 	STARPU_ASSERT(handle);
 
 
 	/* initialize the new lock */
 	/* initialize the new lock */
-	_starpu_data_requester_prio_list_init(&handle->req_list);
-	handle->refcnt = 0;
-	handle->unlocking_reqs = 0;
-	handle->busy_count = 0;
-	handle->busy_waiting = 0;
-	STARPU_PTHREAD_MUTEX_INIT(&handle->busy_mutex, NULL);
-	STARPU_PTHREAD_COND_INIT(&handle->busy_cond, NULL);
+	_starpu_data_requester_prio_list_init0(&handle->req_list);
+	//handle->refcnt = 0;
+	//handle->unlocking_reqs = 0;
+	//handle->busy_count = 0;
+	//handle->busy_waiting = 0;
+	STARPU_PTHREAD_MUTEX_INIT0(&handle->busy_mutex, NULL);
+	STARPU_PTHREAD_COND_INIT0(&handle->busy_cond, NULL);
 	_starpu_spin_init(&handle->header_lock);
 	_starpu_spin_init(&handle->header_lock);
 
 
 	/* first take care to properly lock the data */
 	/* first take care to properly lock the data */
 	_starpu_spin_lock(&handle->header_lock);
 	_starpu_spin_lock(&handle->header_lock);
 
 
 	/* there is no hierarchy yet */
 	/* there is no hierarchy yet */
-	handle->nchildren = 0;
-	handle->nplans = 0;
-	handle->switch_cl = NULL;
-	handle->partitioned = 0;
-	handle->readonly = 0;
+	//handle->nchildren = 0;
+	//handle->nplans = 0;
+	//handle->switch_cl = NULL;
+	//handle->partitioned = 0;
+	//handle->readonly = 0;
 	handle->active = 1;
 	handle->active = 1;
-	handle->active_ro = 0;
+	//handle->active_ro = 0;
 	handle->root_handle = handle;
 	handle->root_handle = handle;
-	handle->father_handle = NULL;
-	handle->active_children = NULL;
-	handle->active_readonly_children = NULL;
-	handle->nactive_readonly_children = 0;
-	handle->nsiblings = 0;
-	handle->siblings = NULL;
-	handle->sibling_index = 0; /* could be anything for the root */
+	//handle->father_handle = NULL;
+	//handle->active_children = NULL;
+	//handle->active_readonly_children = NULL;
+	//handle->nactive_readonly_children = 0;
+	//handle->nsiblings = 0;
+	//handle->siblings = NULL;
+	//handle->sibling_index = 0; /* could be anything for the root */
 	handle->depth = 1; /* the tree is just a node yet */
 	handle->depth = 1; /* the tree is just a node yet */
-        handle->mpi_data = NULL; /* invalid until set */
+        //handle->mpi_data = NULL; /* invalid until set */
 
 
-	handle->is_not_important = 0;
+	//handle->is_not_important = 0;
 
 
 	handle->sequential_consistency =
 	handle->sequential_consistency =
 		starpu_data_get_default_sequential_consistency_flag();
 		starpu_data_get_default_sequential_consistency_flag();
 	handle->initialized = home_node != -1;
 	handle->initialized = home_node != -1;
 	handle->ooc = 1;
 	handle->ooc = 1;
 
 
-	STARPU_PTHREAD_MUTEX_INIT(&handle->sequential_consistency_mutex, NULL);
+	STARPU_PTHREAD_MUTEX_INIT0(&handle->sequential_consistency_mutex, NULL);
 	handle->last_submitted_mode = STARPU_R;
 	handle->last_submitted_mode = STARPU_R;
-	handle->last_sync_task = NULL;
-	handle->last_submitted_accessors.task = NULL;
+	//handle->last_sync_task = NULL;
+	//handle->last_submitted_accessors.task = NULL;
 	handle->last_submitted_accessors.next = &handle->last_submitted_accessors;
 	handle->last_submitted_accessors.next = &handle->last_submitted_accessors;
 	handle->last_submitted_accessors.prev = &handle->last_submitted_accessors;
 	handle->last_submitted_accessors.prev = &handle->last_submitted_accessors;
-	handle->post_sync_tasks = NULL;
+	//handle->post_sync_tasks = NULL;
 
 
 	/* Tell helgrind that the race in _starpu_unlock_post_sync_tasks is fine */
 	/* Tell helgrind that the race in _starpu_unlock_post_sync_tasks is fine */
 	STARPU_HG_DISABLE_CHECKING(handle->post_sync_tasks_cnt);
 	STARPU_HG_DISABLE_CHECKING(handle->post_sync_tasks_cnt);
-	handle->post_sync_tasks_cnt = 0;
+	//handle->post_sync_tasks_cnt = 0;
 
 
 	/* By default, there are no methods available to perform a reduction */
 	/* By default, there are no methods available to perform a reduction */
-	handle->redux_cl = NULL;
-	handle->init_cl = NULL;
+	//handle->redux_cl = NULL;
+	//handle->init_cl = NULL;
 
 
-	handle->reduction_refcnt = 0;
-	_starpu_data_requester_prio_list_init(&handle->reduction_req_list);
-	handle->reduction_tmp_handles = NULL;
-	handle->write_invalidation_req = NULL;
+	//handle->reduction_refcnt = 0;
+	_starpu_data_requester_prio_list_init0(&handle->reduction_req_list);
+	//handle->reduction_tmp_handles = NULL;
+	//handle->write_invalidation_req = NULL;
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
-	handle->last_submitted_ghost_sync_id_is_valid = 0;
-	handle->last_submitted_ghost_sync_id = 0;
-	handle->last_submitted_ghost_accessors_id = NULL;
+	//handle->last_submitted_ghost_sync_id_is_valid = 0;
+	//handle->last_submitted_ghost_sync_id = 0;
+	//handle->last_submitted_ghost_accessors_id = NULL;
 #endif
 #endif
 
 
 	handle->wt_mask = wt_mask;
 	handle->wt_mask = wt_mask;
@@ -339,8 +339,10 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 		/* Just for testing purpose */
 		/* Just for testing purpose */
 		starpu_data_assign_arbiter(handle, _starpu_global_arbiter);
 		starpu_data_assign_arbiter(handle, _starpu_global_arbiter);
 	else
 	else
-		handle->arbiter = NULL;
-	_starpu_data_requester_prio_list_init(&handle->arbitered_req_list);
+	{
+		//handle->arbiter = NULL;
+	}
+	_starpu_data_requester_prio_list_init0(&handle->arbitered_req_list);
 	handle->last_locality = -1;
 	handle->last_locality = -1;
 
 
 	/* that new data is invalid from all nodes perpective except for the
 	/* that new data is invalid from all nodes perpective except for the
@@ -352,28 +354,28 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 		replicate = &handle->per_node[node];
 		replicate = &handle->per_node[node];
 
 
 		replicate->memory_node = node;
 		replicate->memory_node = node;
-		replicate->relaxed_coherency = 0;
-		replicate->refcnt = 0;
+		//replicate->relaxed_coherency = 0;
+		//replicate->refcnt = 0;
 
 
 		if ((int) node == home_node)
 		if ((int) node == home_node)
 		{
 		{
 			/* this is the home node with the only valid copy */
 			/* this is the home node with the only valid copy */
 			replicate->state = STARPU_OWNER;
 			replicate->state = STARPU_OWNER;
 			replicate->allocated = 1;
 			replicate->allocated = 1;
-			replicate->automatically_allocated = 0;
+			//replicate->automatically_allocated = 0;
 			replicate->initialized = 1;
 			replicate->initialized = 1;
 		}
 		}
 		else
 		else
 		{
 		{
 			/* the value is not available here yet */
 			/* the value is not available here yet */
 			replicate->state = STARPU_INVALID;
 			replicate->state = STARPU_INVALID;
-			replicate->allocated = 0;
-			replicate->initialized = 0;
+			//replicate->allocated = 0;
+			//replicate->initialized = 0;
 		}
 		}
 	}
 	}
 
 
-	handle->per_worker = NULL;
-	handle->user_data = NULL;
+	//handle->per_worker = NULL;
+	//handle->user_data = NULL;
 
 
 	/* now the data is available ! */
 	/* now the data is available ! */
 	_starpu_spin_unlock(&handle->header_lock);
 	_starpu_spin_unlock(&handle->header_lock);
@@ -451,8 +453,8 @@ int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_int
 	handle->magic = 42;
 	handle->magic = 42;
 	handle->ops = interface_ops;
 	handle->ops = interface_ops;
 	handle->mf_node = mf_node;
 	handle->mf_node = mf_node;
-	handle->mpi_data = NULL;
-	handle->partition_automatic_disabled = 0;
+	//handle->mpi_data = NULL;
+	//handle->partition_automatic_disabled = 0;
 
 
 	size_t interfacesize = interface_ops->interface_size;
 	size_t interfacesize = interface_ops->interface_size;
 
 
@@ -761,12 +763,12 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 		int home_node = handle->home_node;
 		int home_node = handle->home_node;
 		if (home_node >= 0)
 		if (home_node >= 0)
 		{
 		{
-			struct _starpu_unregister_callback_arg arg;
+			struct _starpu_unregister_callback_arg arg = { 0 };
 			arg.handle = handle;
 			arg.handle = handle;
 			arg.memory_node = (unsigned)home_node;
 			arg.memory_node = (unsigned)home_node;
 			arg.terminated = 0;
 			arg.terminated = 0;
-			STARPU_PTHREAD_MUTEX_INIT(&arg.mutex, NULL);
-			STARPU_PTHREAD_COND_INIT(&arg.cond, NULL);
+			STARPU_PTHREAD_MUTEX_INIT0(&arg.mutex, NULL);
+			STARPU_PTHREAD_COND_INIT0(&arg.cond, NULL);
 
 
 			if (!_starpu_attempt_to_submit_data_request_from_apps(handle, STARPU_R,
 			if (!_starpu_attempt_to_submit_data_request_from_apps(handle, STARPU_R,
 					_starpu_data_unregister_fetch_data_callback, &arg))
 					_starpu_data_unregister_fetch_data_callback, &arg))

+ 6 - 6
src/datawizard/memstats.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2012,2015-2017                      CNRS
  * Copyright (C) 2010-2012,2015-2017                      CNRS
- * Copyright (C) 2009,2010,2012,2014                      Université de Bordeaux
+ * Copyright (C) 2009,2010,2012,2014,2020                 Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -31,11 +31,11 @@ void _starpu_memory_stats_init_per_node(starpu_data_handle_t handle STARPU_ATTRI
 {
 {
 #ifdef STARPU_MEMORY_STATS
 #ifdef STARPU_MEMORY_STATS
 	/* Stats initilization */
 	/* Stats initilization */
-	handle->memory_stats->direct_access[node]=0;
-	handle->memory_stats->loaded_shared[node]=0;
-	handle->memory_stats->shared_to_owner[node]=0;
-	handle->memory_stats->loaded_owner[node]=0;
-	handle->memory_stats->invalidated[node]=0;
+	//handle->memory_stats->direct_access[node]=0;
+	//handle->memory_stats->loaded_shared[node]=0;
+	//handle->memory_stats->shared_to_owner[node]=0;
+	//handle->memory_stats->loaded_owner[node]=0;
+	//handle->memory_stats->invalidated[node]=0;
 #endif
 #endif
 }
 }
 
 

+ 33 - 15
src/datawizard/user_interactions.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011-2013,2017                           Inria
  * Copyright (C) 2011-2013,2017                           Inria
- * Copyright (C) 2009-2019                                Université de Bordeaux
+ * Copyright (C) 2009-2020                                Université de Bordeaux
  * Copyright (C) 2010-2013,2015-2018                      CNRS
  * Copyright (C) 2010-2013,2015-2018                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -85,9 +85,9 @@ static inline void _starpu_data_acquire_wrapper_init(struct user_interaction_wra
 	wrapper->handle = handle;
 	wrapper->handle = handle;
 	wrapper->node = node;
 	wrapper->node = node;
 	wrapper->mode = mode;
 	wrapper->mode = mode;
-	wrapper->finished = 0;
-	STARPU_PTHREAD_COND_INIT(&wrapper->cond, NULL);
-	STARPU_PTHREAD_MUTEX_INIT(&wrapper->lock, NULL);
+	//wrapper->finished = 0;
+	STARPU_PTHREAD_COND_INIT0(&wrapper->cond, NULL);
+	STARPU_PTHREAD_MUTEX_INIT0(&wrapper->lock, NULL);
 }
 }
 
 
 /* Called to signal completion of asynchronous data acquisition */
 /* Called to signal completion of asynchronous data acquisition */
@@ -216,6 +216,7 @@ int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_dat
 	{
 	{
 		struct starpu_task *new_task;
 		struct starpu_task *new_task;
 		struct _starpu_job *pre_sync_job, *post_sync_job;
 		struct _starpu_job *pre_sync_job, *post_sync_job;
+		int submit_pre_sync = 0;
 		wrapper->pre_sync_task = starpu_task_create();
 		wrapper->pre_sync_task = starpu_task_create();
 		wrapper->pre_sync_task->name = "_starpu_data_acquire_cb_pre";
 		wrapper->pre_sync_task->name = "_starpu_data_acquire_cb_pre";
 		wrapper->pre_sync_task->detach = 1;
 		wrapper->pre_sync_task->detach = 1;
@@ -237,18 +238,26 @@ int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_dat
 		if (quick)
 		if (quick)
 			pre_sync_job->quick_next = post_sync_job;
 			pre_sync_job->quick_next = post_sync_job;
 
 
-		new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper->pre_sync_task, wrapper->post_sync_task, &_starpu_get_job_associated_to_task(wrapper->post_sync_task)->implicit_dep_slot, handle, mode, sequential_consistency);
+		new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper->pre_sync_task, &submit_pre_sync, wrapper->post_sync_task, &_starpu_get_job_associated_to_task(wrapper->post_sync_task)->implicit_dep_slot, handle, mode, sequential_consistency);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 
 
-		if (new_task)
+		if (STARPU_UNLIKELY(new_task))
 		{
 		{
 			int ret = _starpu_task_submit_internally(new_task);
 			int ret = _starpu_task_submit_internally(new_task);
 			STARPU_ASSERT(!ret);
 			STARPU_ASSERT(!ret);
 		}
 		}
 
 
-		/* TODO detect if this is superflous */
-		int ret = _starpu_task_submit_internally(wrapper->pre_sync_task);
-		STARPU_ASSERT(!ret);
+		if (submit_pre_sync)
+		{
+			int ret = _starpu_task_submit_internally(wrapper->pre_sync_task);
+			STARPU_ASSERT(!ret);
+		}
+		else
+		{
+			wrapper->pre_sync_task->detach = 0;
+			starpu_task_destroy(wrapper->pre_sync_task);
+			starpu_data_acquire_cb_pre_sync_callback(wrapper);
+		}
 	}
 	}
 	else
 	else
 	{
 	{
@@ -360,6 +369,7 @@ int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum star
 	if (sequential_consistency)
 	if (sequential_consistency)
 	{
 	{
 		struct starpu_task *new_task;
 		struct starpu_task *new_task;
+		int submit_pre_sync = 0;
 		wrapper.pre_sync_task = starpu_task_create();
 		wrapper.pre_sync_task = starpu_task_create();
 		wrapper.pre_sync_task->name = "_starpu_data_acquire_pre";
 		wrapper.pre_sync_task->name = "_starpu_data_acquire_pre";
 		wrapper.pre_sync_task->detach = 0;
 		wrapper.pre_sync_task->detach = 0;
@@ -370,18 +380,26 @@ int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum star
 		wrapper.post_sync_task->detach = 1;
 		wrapper.post_sync_task->detach = 1;
 		wrapper.post_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE;
 		wrapper.post_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE;
 
 
-		new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper.pre_sync_task, wrapper.post_sync_task, &_starpu_get_job_associated_to_task(wrapper.post_sync_task)->implicit_dep_slot, handle, mode, sequential_consistency);
+		new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper.pre_sync_task, &submit_pre_sync, wrapper.post_sync_task, &_starpu_get_job_associated_to_task(wrapper.post_sync_task)->implicit_dep_slot, handle, mode, sequential_consistency);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
-		if (new_task)
+
+		if (STARPU_UNLIKELY(new_task))
 		{
 		{
 			int ret = _starpu_task_submit_internally(new_task);
 			int ret = _starpu_task_submit_internally(new_task);
 			STARPU_ASSERT(!ret);
 			STARPU_ASSERT(!ret);
 		}
 		}
 
 
-		/* TODO detect if this is superflous */
-		wrapper.pre_sync_task->synchronous = 1;
-		int ret = _starpu_task_submit_internally(wrapper.pre_sync_task);
-		STARPU_ASSERT(!ret);
+		if (submit_pre_sync)
+		{
+			wrapper.pre_sync_task->synchronous = 1;
+			int ret = _starpu_task_submit_internally(wrapper.pre_sync_task);
+			STARPU_ASSERT(!ret);
+		}
+		else
+		{
+			wrapper.pre_sync_task->detach = 0;
+			starpu_task_destroy(wrapper.pre_sync_task);
+		}
 	}
 	}
 	else
 	else
 	{
 	{

+ 7 - 7
src/drivers/mpi/driver_mpi_source.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2016,2017                                Inria
  * Copyright (C) 2016,2017                                Inria
  * Copyright (C) 2017,2019                                CNRS
  * Copyright (C) 2017,2019                                CNRS
- * Copyright (C) 2017                                     Université de Bordeaux
+ * Copyright (C) 2017,2020                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -50,13 +50,13 @@ int _starpu_mpi_copy_mpi_to_ram_async(void *src, unsigned src_node, void *dst, u
 int _starpu_mpi_copy_ram_to_mpi_async(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size, void * event);
 int _starpu_mpi_copy_ram_to_mpi_async(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size, void * event);
 int _starpu_mpi_copy_sink_to_sink_async(void *src, unsigned src_node, void *dst, unsigned dst_node, size_t size, void * event);
 int _starpu_mpi_copy_sink_to_sink_async(void *src, unsigned src_node, void *dst, unsigned dst_node, size_t size, void * event);
 
 
-int _starpu_mpi_copy_data_from_mpi_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
-int _starpu_mpi_copy_data_from_mpi_to_mpi(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
-int _starpu_mpi_copy_data_from_cpu_to_mpi(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
+int _starpu_mpi_copy_interface_from_mpi_to_cpu(starpu_interface_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_interface_request *req);
+int _starpu_mpi_copy_interface_from_mpi_to_mpi(starpu_interface_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_interface_request *req);
+int _starpu_mpi_copy_interface_from_cpu_to_mpi(starpu_interface_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_interface_request *req);
 
 
-int _starpu_mpi_copy_interface_from_mpi_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
-int _starpu_mpi_copy_interface_from_mpi_to_mpi(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
-int _starpu_mpi_copy_interface_from_cpu_to_mpi(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
+int _starpu_mpi_copy_data_from_mpi_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
+int _starpu_mpi_copy_data_from_mpi_to_mpi(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
+int _starpu_mpi_copy_data_from_cpu_to_mpi(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
 
 
 int _starpu_mpi_is_direct_access_supported(unsigned node, unsigned handling_node);
 int _starpu_mpi_is_direct_access_supported(unsigned node, unsigned handling_node);
 uintptr_t _starpu_mpi_malloc_on_node(unsigned dst_node, size_t size, int flags);
 uintptr_t _starpu_mpi_malloc_on_node(unsigned dst_node, size_t size, int flags);

+ 2 - 2
src/profiling/bound.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011,2012,2014                           Inria
  * Copyright (C) 2011,2012,2014                           Inria
- * Copyright (C) 2010-2017,2019                           Université de Bordeaux
+ * Copyright (C) 2010-2017,2019-2020                      Université de Bordeaux
  * Copyright (C) 2010-2017,2019                           CNRS
  * Copyright (C) 2010-2017,2019                           CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2011                                     Télécom-SudParis
@@ -257,7 +257,7 @@ static void new_task(struct _starpu_job *j)
 /* A new task was submitted, record it */
 /* A new task was submitted, record it */
 void _starpu_bound_record(struct _starpu_job *j)
 void _starpu_bound_record(struct _starpu_job *j)
 {
 {
-	if (!_starpu_bound_recording)
+	if (STARPU_LIKELY(!_starpu_bound_recording))
 		return;
 		return;
 
 
 	if (!good_job(j))
 	if (!good_job(j))

+ 2 - 2
src/util/openmp_runtime_support.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2014-2018                                Inria
  * Copyright (C) 2014-2018                                Inria
  * Copyright (C) 2014-2017,2019                           CNRS
  * Copyright (C) 2014-2017,2019                           CNRS
- * Copyright (C) 2015,2017,2019                           Université de Bordeaux
+ * Copyright (C) 2015,2017,2019-2020                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -319,7 +319,7 @@ static struct starpu_omp_region *create_omp_region_struct(struct starpu_omp_regi
 	_STARPU_CALLOC(region, 1, sizeof(*region));
 	_STARPU_CALLOC(region, 1, sizeof(*region));
 	region->parent_region = parent_region;
 	region->parent_region = parent_region;
 	region->owner_device = owner_device;
 	region->owner_device = owner_device;
-	starpu_omp_thread_list_init(&region->thread_list);
+	starpu_omp_thread_list_init0(&region->thread_list);
 
 
 	_starpu_spin_init(&region->lock);
 	_starpu_spin_init(&region->lock);
 	_starpu_spin_init(&region->registered_handles_lock);
 	_starpu_spin_init(&region->registered_handles_lock);

+ 11 - 1
tests/Makefile.am

@@ -32,6 +32,9 @@ EXTRA_DIST =					\
 	regression/profiles.in			\
 	regression/profiles.in			\
 	regression/regression.sh.in		\
 	regression/regression.sh.in		\
 	regression/profiles.build.only.in	\
 	regression/profiles.build.only.in	\
+	microbenchs/tasks_data_overhead.sh	\
+	microbenchs/sync_tasks_data_overhead.sh	\
+	microbenchs/async_tasks_data_overhead.sh	\
 	microbenchs/tasks_size_overhead.sh	\
 	microbenchs/tasks_size_overhead.sh	\
 	microbenchs/tasks_size_overhead_sched.sh	\
 	microbenchs/tasks_size_overhead_sched.sh	\
 	microbenchs/tasks_size_overhead_scheds.sh	\
 	microbenchs/tasks_size_overhead_scheds.sh	\
@@ -396,11 +399,18 @@ examplebin_PROGRAMS = \
 	microbenchs/tasks_size_overhead		\
 	microbenchs/tasks_size_overhead		\
 	microbenchs/local_pingpong
 	microbenchs/local_pingpong
 examplebin_SCRIPTS = \
 examplebin_SCRIPTS = \
+	microbenchs/tasks_data_overhead.sh \
+	microbenchs/sync_tasks_data_overhead.sh \
+	microbenchs/async_tasks_data_overhead.sh \
 	microbenchs/tasks_size_overhead.gp \
 	microbenchs/tasks_size_overhead.gp \
 	microbenchs/tasks_size_overhead.sh
 	microbenchs/tasks_size_overhead.sh
 if !STARPU_SIMGRID
 if !STARPU_SIMGRID
 if !STARPU_USE_MPI_MASTER_SLAVE
 if !STARPU_USE_MPI_MASTER_SLAVE
-TESTS += microbenchs/tasks_size_overhead_scheds.sh
+TESTS += \
+	microbenchs/tasks_data_overhead.sh \
+	microbenchs/sync_tasks_data_overhead.sh \
+	microbenchs/async_tasks_data_overhead.sh \
+	microbenchs/tasks_size_overhead_scheds.sh
 endif
 endif
 endif
 endif
 
 

+ 2 - 2
tests/cholesky/sched.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2008-2011,2014,2019                      Université de Bordeaux
+# Copyright (C) 2008-2011,2014,2019-2020                 Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -39,7 +39,7 @@ trace_sched()
 		do
 		do
 			echo "$iter / $maxiter"
 			echo "$iter / $maxiter"
 			 echo "$ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null"
 			 echo "$ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null"
-			 val=`$ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null`
+			 val=`$STARPU_LAUNCH $ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null`
 			 echo "$val" >> $filename
 			 echo "$val" >> $filename
 		done
 		done
 	done
 	done

+ 3 - 3
tests/cholesky/sched_one_gpu.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2009-2011,2014,2019                      Université de Bordeaux
+# Copyright (C) 2009-2011,2014,2019-2020                 Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -50,7 +50,7 @@ trace_sched()
 		do
 		do
 			echo "$iter / $maxiter"
 			echo "$iter / $maxiter"
 			echo "$ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null"
 			echo "$ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null"
-			val=`$ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null`
+			val=`$STARPU_LAUNCH $ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null`
 			echo "$val" >> $filename
 			echo "$val" >> $filename
 			echo "$val"
 			echo "$val"
 		done
 		done
@@ -67,7 +67,7 @@ mkdir -p $TIMINGDIR
 # calibrate
 # calibrate
 for i in `seq 1 5` 
 for i in `seq 1 5` 
 do
 do
-STARPU_SCHED="dm" $ROOTDIR/examples/cholesky/dw_cholesky -nblocks 16 -size 16384 2> /dev/null
+STARPU_SCHED="dm" $STARPU_LAUNCH $ROOTDIR/examples/cholesky/dw_cholesky -nblocks 16 -size 16384 2> /dev/null
 done
 done
 
 
 for sched in $schedlist
 for sched in $schedlist

+ 2 - 1
tests/cholesky_ctxs/evaluate_expression.sh

@@ -3,6 +3,7 @@
 #
 #
 # Copyright (C) 2011                                     Inria
 # Copyright (C) 2011                                     Inria
 # Copyright (C) 2012,2017                                CNRS
 # Copyright (C) 2012,2017                                CNRS
+# Copyright (C) 2020                                     Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -33,7 +34,7 @@ for s in `seq 1 $nsamples`
 do
 do
     echo "$ROOTDIR/examples/$BENCH_NAME $OPTIONS"
     echo "$ROOTDIR/examples/$BENCH_NAME $OPTIONS"
     
     
-    val=`$ROOTDIR/examples/$BENCH_NAME $OPTIONS`
+    val=`$STARPU_LAUNCH $ROOTDIR/examples/$BENCH_NAME $OPTIONS`
     
     
     echo "$val"
     echo "$val"
     
     

+ 24 - 24
tests/coverage/coverage.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2010,2011,2014,2017                      Université de Bordeaux
+# Copyright (C) 2010,2011,2014,2017,2020                 Université de Bordeaux
 # Copyright (C) 2010,2011,2015,2017                      CNRS
 # Copyright (C) 2010,2011,2015,2017                      CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -29,109 +29,109 @@ apps()
 {
 {
     if [ -f $exampledir/basic_examples/block ] ; then
     if [ -f $exampledir/basic_examples/block ] ; then
 	echo "block opencl"
 	echo "block opencl"
-	STARPU_NCUDA=0 STARPU_NCPUS=0 $exampledir/basic_examples/block
+	STARPU_NCUDA=0 STARPU_NCPUS=0 $STARPU_LAUNCH $exampledir/basic_examples/block
 	check_success $?
 	check_success $?
     fi
     fi
 
 
     if [ -f $exampledir/basic_examples/variable ] ; then
     if [ -f $exampledir/basic_examples/variable ] ; then
 	echo "variable opencl"
 	echo "variable opencl"
-	STARPU_NCUDA=0 STARPU_NCPUS=0 $exampledir/basic_examples/variable 100
+	STARPU_NCUDA=0 STARPU_NCPUS=0 $STARPU_LAUNCH $exampledir/basic_examples/variable 100
 	check_success $?
 	check_success $?
 
 
 	echo "variable no worker"
 	echo "variable no worker"
-	STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $exampledir/basic_examples/variable
+	STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $STARPU_LAUNCH $exampledir/basic_examples/variable
 	check_success $?
 	check_success $?
     fi
     fi
 
 
     if [ -f $exampledir/incrementer/incrementer ] ; then
     if [ -f $exampledir/incrementer/incrementer ] ; then
 	echo "incrementer opencl"
 	echo "incrementer opencl"
-	STARPU_NCUDA=0 STARPU_NCPUS=0 $exampledir/incrementer/incrementer 10
+	STARPU_NCUDA=0 STARPU_NCPUS=0 $STARPU_LAUNCH $exampledir/incrementer/incrementer 10
 	check_success $?
 	check_success $?
 
 
 	echo "incrementer no worker"
 	echo "incrementer no worker"
-	STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $exampledir/incrementer/incrementer
+	STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $STARPU_LAUNCH $exampledir/incrementer/incrementer
 	check_success $?
 	check_success $?
     fi
     fi
 
 
     if [ -f $exampledir/tag_example/tag_example ] ; then
     if [ -f $exampledir/tag_example/tag_example ] ; then
 	echo "tag_example"
 	echo "tag_example"
-	$exampledir/tag_example/tag_example -iter 64 -i 128 -j 24
+	$STARPU_LAUNCH $exampledir/tag_example/tag_example -iter 64 -i 128 -j 24
 	check_success $?
 	check_success $?
     fi
     fi
 
 
     if [ -f $exampledir/tag_example/tag_example2 ] ; then
     if [ -f $exampledir/tag_example/tag_example2 ] ; then
 	echo "tag_example2"
 	echo "tag_example2"
-	$exampledir/tag_example/tag_example2 -iter 64 -i 128
+	$STARPU_LAUNCH $exampledir/tag_example/tag_example2 -iter 64 -i 128
 	check_success $?
 	check_success $?
     fi
     fi
 
 
     if [ -f $exampledir/cholesky/dw_cholesky ] ; then
     if [ -f $exampledir/cholesky/dw_cholesky ] ; then
 	echo "chol.dm"
 	echo "chol.dm"
-	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $exampledir/cholesky/dw_cholesky -pin
+	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin
 	check_success $?
 	check_success $?
 
 
 	echo "chol.dmda"
 	echo "chol.dmda"
-	STARPU_CALIBRATE=1 STARPU_SCHED="dmda" $exampledir/cholesky/dw_cholesky -pin
+	STARPU_CALIBRATE=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin
 	check_success $?
 	check_success $?
 
 
 	echo "chol.cpu"
 	echo "chol.cpu"
-	STARPU_CALIBRATE=1 STARPU_NCUDA=0 STARPU_SCHED="dm" $exampledir/cholesky/dw_cholesky -pin
+	STARPU_CALIBRATE=1 STARPU_NCUDA=0 STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin
 	check_success $?
 	check_success $?
 
 
 	echo "chol.gpu"
 	echo "chol.gpu"
-	STARPU_CALIBRATE=1 STARPU_NCPUS=0 STARPU_SCHED="dm" $exampledir/cholesky/dw_cholesky -pin
+	STARPU_CALIBRATE=1 STARPU_NCPUS=0 STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin
 	check_success $?
 	check_success $?
     fi
     fi
 
 
     if [ -f $exampledir/heat/heat ] ; then
     if [ -f $exampledir/heat/heat ] ; then
 	echo "heat.dm.4k.calibrate.v2"
 	echo "heat.dm.4k.calibrate.v2"
-	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $exampledir/heat/heat -ntheta 66 -nthick 66 -nblocks 4 -v2 -pin
+	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 66 -nblocks 4 -v2 -pin
 	check_success $?
 	check_success $?
 
 
 	echo "heat.dm.8k.calibrate.v2"
 	echo "heat.dm.8k.calibrate.v2"
-	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -v2 -pin
+	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -v2 -pin
 	check_success $?
 	check_success $?
 
 
 	echo "heat.dm.8k.no.pin.v2"
 	echo "heat.dm.8k.no.pin.v2"
-	STARPU_SCHED="dm" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -v2
+	STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -v2
 	check_success $?
 	check_success $?
 
 
 #	echo "heat.dm.8k.v2.no.prio"
 #	echo "heat.dm.8k.v2.no.prio"
-#	STARPU_SCHED="no-prio" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
+#	STARPU_SCHED="no-prio" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
 #	check_success $?
 #	check_success $?
 
 
 	echo "heat.dm.8k.v2.random"
 	echo "heat.dm.8k.v2.random"
-	STARPU_SCHED="random" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
+	STARPU_SCHED="random" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
 	check_success $?
 	check_success $?
 
 
 	echo "heat.dm.8k.v2"
 	echo "heat.dm.8k.v2"
-	STARPU_SCHED="dm" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
+	STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
 	check_success $?
 	check_success $?
 
 
 	echo "heat.greedy.8k.v2"
 	echo "heat.greedy.8k.v2"
-	STARPU_SCHED="greedy" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
+	STARPU_SCHED="greedy" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
 	check_success $?
 	check_success $?
 
 
 	echo "heat.8k.cg"
 	echo "heat.8k.cg"
-	$exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 -cg
+	$STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 -cg
 	check_success $?
 	check_success $?
 
 
 	echo "heat.dm.8k.cg"
 	echo "heat.dm.8k.cg"
-	STARPU_SCHED="dm" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 -cg
+	STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 -cg
 	check_success $?
 	check_success $?
     fi
     fi
 
 
     if [ -f $exampledir/mult/dw_mult_no_stride ] ; then
     if [ -f $exampledir/mult/dw_mult_no_stride ] ; then
 	echo "mult.dm.common"
 	echo "mult.dm.common"
-	STARPU_SCHED="dm" $exampledir/mult/dw_mult_no_stride -nblocks 4 -x 4096 -y 4096 -z 1024 -pin -common-model
+	STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/mult/dw_mult_no_stride -nblocks 4 -x 4096 -y 4096 -z 1024 -pin -common-model
 	check_success $?
 	check_success $?
 
 
 	echo "mult.dm"
 	echo "mult.dm"
-	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $exampledir/mult/dw_mult_no_stride -nblocks 8 -x 4096 -y 4096 -z 4096 -pin
+	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/mult/dw_mult_no_stride -nblocks 8 -x 4096 -y 4096 -z 4096 -pin
 	check_success $?
 	check_success $?
 
 
 	echo "mult.dmda"
 	echo "mult.dmda"
-	STARPU_CALIBRATE=1 STARPU_SCHED="dmda" $exampledir/mult/dw_mult_no_stride -nblocks 8 -x 4096 -y 4096 -z 4096 -pin
+	STARPU_CALIBRATE=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $exampledir/mult/dw_mult_no_stride -nblocks 8 -x 4096 -y 4096 -z 4096 -pin
 	check_success $?
 	check_success $?
     fi
     fi
 }
 }

+ 51 - 4
tests/datawizard/acquire_cb.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011,2013,2014,2016                      Université de Bordeaux
+ * Copyright (C) 2011,2013,2014,2016, 2020                      Université de Bordeaux
  * Copyright (C) 2011-2013                                Inria
  * Copyright (C) 2011-2013                                Inria
  * Copyright (C) 2011-2013,2017                           CNRS
  * Copyright (C) 2011-2013,2017                           CNRS
  *
  *
@@ -20,20 +20,27 @@
 #include "../helper.h"
 #include "../helper.h"
 
 
 /*
 /*
- * Test that when using starpu_data_acquire_cb, the callback is properly called
+ * Test that when using starpu_data_acquire_cb, the callback_w is properly called
  */
  */
 
 
 unsigned token = 0;
 unsigned token = 0;
 starpu_data_handle_t token_handle;
 starpu_data_handle_t token_handle;
 
 
 static
 static
-void callback(void *arg)
+void callback_w(void *arg)
 {
 {
 	(void)arg;
 	(void)arg;
 	token = 42;
 	token = 42;
         starpu_data_release(token_handle);
         starpu_data_release(token_handle);
 }
 }
 
 
+static
+void callback_r(void *arg)
+{
+	(void)arg;
+        starpu_data_release(token_handle);
+}
+
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret;
 	int ret;
@@ -42,8 +49,48 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
+	starpu_variable_data_register(&token_handle, -1, 0, sizeof(unsigned));
+	starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_unregister(token_handle);
+	STARPU_ASSERT(token == 42);
+
+	token = 0;
+
+	starpu_variable_data_register(&token_handle, -1, 0, sizeof(unsigned));
+	starpu_data_acquire(token_handle, STARPU_W);
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_release(token_handle);
+	starpu_data_unregister(token_handle);
+
+	token = 0;
+
 	starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned));
 	starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned));
-        starpu_data_acquire_cb(token_handle, STARPU_RW, callback, NULL);
+	/* These are getting executed immediately */
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+
+	starpu_data_acquire(token_handle, STARPU_W);
+	/* These will wait for our relase */
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
+	starpu_data_release(token_handle);
 
 
 	starpu_data_unregister(token_handle);
 	starpu_data_unregister(token_handle);
 
 

+ 1 - 1
tests/datawizard/interfaces/test_interfaces.sh

@@ -17,7 +17,7 @@
 
 
 for i in bcsr block coo csr matrix multiformat variable vector void
 for i in bcsr block coo csr matrix multiformat variable vector void
 do
 do
-    ./tests/datawizard/interfaces/$i/${i}_interface
+    $STARPU_LAUNCH ./tests/datawizard/interfaces/$i/${i}_interface
     ret=$?
     ret=$?
     if test "$ret" = "0"
     if test "$ret" = "0"
     then
     then

+ 3 - 3
tests/datawizard/locality.sh

@@ -4,7 +4,7 @@
 # Copyright (C) 2018                                     Federal University of Rio Grande do Sul (UFRGS)
 # Copyright (C) 2018                                     Federal University of Rio Grande do Sul (UFRGS)
 # Copyright (C) 2017                                     CNRS
 # Copyright (C) 2017                                     CNRS
 # Copyright (C) 2017                                     Inria
 # Copyright (C) 2017                                     Inria
-# Copyright (C) 2017,2018-2019                           Université de Bordeaux
+# Copyright (C) 2017,2018-2020                           Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -33,8 +33,8 @@ if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
 fi
 fi
 
 
 test -x $PREFIX/../../tools/starpu_fxt_tool || exit 77
 test -x $PREFIX/../../tools/starpu_fxt_tool || exit 77
-STARPU_SCHED=modular-eager STARPU_FXT_PREFIX=$PREFIX/ $PREFIX/locality
-$PREFIX/../../tools/starpu_fxt_tool -memory-states -label-deps -i $PREFIX/prof_file_${USER}_0
+STARPU_SCHED=modular-eager STARPU_FXT_PREFIX=$PREFIX/ $STARPU_LAUNCH $PREFIX/locality
+$STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_tool -memory-states -label-deps -i $PREFIX/prof_file_${USER}_0
 
 
 # Check that they are approved by Grenoble :)
 # Check that they are approved by Grenoble :)
 
 

+ 5 - 5
tests/experiments/bandwidth_cuda/bench_bandwidth.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2009-2011,2014                           Université de Bordeaux
+# Copyright (C) 2009-2011,2014,2020                      Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -25,7 +25,7 @@ echo "H -> D"
 for log in `seq 1 13`
 for log in `seq 1 13`
 do
 do
 	size=$((2**$log))
 	size=$((2**$log))
-	echo "$size	`./cuda_bandwidth -pin -HtoD -size $size -cpu-ld $size -gpu-ld $size -iter 50`" >> .results/htod-pin.data 
+	echo "$size	`$STARPU_LAUNCH ./cuda_bandwidth -pin -HtoD -size $size -cpu-ld $size -gpu-ld $size -iter 50`" >> .results/htod-pin.data 
 done
 done
 
 
 echo "D -> H"
 echo "D -> H"
@@ -33,7 +33,7 @@ echo "D -> H"
 for log in `seq 1 13`
 for log in `seq 1 13`
 do
 do
 	size=$((2**$log))
 	size=$((2**$log))
-	echo "$size	`./cuda_bandwidth -pin -size $size -cpu-ld $size -gpu-ld $size -iter 50`" >> .results/dtoh-pin.data 
+	echo "$size	`$STARPU_LAUNCH ./cuda_bandwidth -pin -size $size -cpu-ld $size -gpu-ld $size -iter 50`" >> .results/dtoh-pin.data 
 done
 done
 
 
 ./bench_bandwidth.gp
 ./bench_bandwidth.gp
@@ -48,7 +48,7 @@ do
 	for log in `seq 1 $stridelog`
 	for log in `seq 1 $stridelog`
 	do
 	do
 		size=$((2**$log))
 		size=$((2**$log))
-		echo "$size	`./cuda_bandwidth -pin -HtoD -size $size -cpu-ld $stridesize -gpu-ld $stridesize -iter 50`" >> .results/htod-pin.$stridesize.data 
+		echo "$size	`$STARPU_LAUNCH ./cuda_bandwidth -pin -HtoD -size $size -cpu-ld $stridesize -gpu-ld $stridesize -iter 50`" >> .results/htod-pin.$stridesize.data 
 	done
 	done
 done
 done
 
 
@@ -62,6 +62,6 @@ do
 	for log in `seq 1 $stridelog`
 	for log in `seq 1 $stridelog`
 	do
 	do
 		size=$((2**$log))
 		size=$((2**$log))
-		echo "$size	`./cuda_bandwidth -pin -size $size -cpu-ld $stridesize -gpu-ld $stridesize -iter 50`" >> .results/dtoh-pin.$stridesize.data 
+		echo "$size	`$STARPU_LAUNCH ./cuda_bandwidth -pin -size $size -cpu-ld $stridesize -gpu-ld $stridesize -iter 50`" >> .results/dtoh-pin.$stridesize.data 
 	done
 	done
 done
 done

+ 2 - 2
tests/heat/deps.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -40,7 +40,7 @@ trace_deps()
 		for iter in `seq 1 $maxiter`
 		for iter in `seq 1 $maxiter`
 		do
 		do
 			echo "$iter / $maxiter"
 			echo "$iter / $maxiter"
-			 val=`$ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
+			 val=`$STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
 			 echo "$val" >> $filename
 			 echo "$val" >> $filename
 		done
 		done
 	done
 	done

+ 4 - 4
tests/heat/dmda.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2009-2011,2014, 2019                           Université de Bordeaux
+# Copyright (C) 2009-2011,2014,2019-2020                 Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -38,7 +38,7 @@ calibrate_point()
 		export STARPU_SCHED=$strat
 		export STARPU_SCHED=$strat
 		export STARPU_CALIBRATE=1
 		export STARPU_CALIBRATE=1
 		export STARPU_PREFETCH=$prefetch
 		export STARPU_PREFETCH=$prefetch
-		val=`$ROOTDIR/examples/heat/heat -pin -nblocks $nblocks -size $size -v3 2> /dev/null`
+		val=`$STARPU_LAUNCH $ROOTDIR/examples/heat/heat -pin -nblocks $nblocks -size $size -v3 2> /dev/null`
 		echo "$val"
 		echo "$val"
 	done
 	done
 
 
@@ -76,14 +76,14 @@ do
 	export STARPU_SCHED="dm"
 	export STARPU_SCHED="dm"
 	export STARPU_CALIBRATE=1
 	export STARPU_CALIBRATE=1
 	export STARPU_PREFETCH=1
 	export STARPU_PREFETCH=1
-	valdm=$($ROOTDIR/examples/heat/heat -pin -size $size -nblocks $nblocks -v3 2> logdm)
+	valdm=$($STARPU_LAUNCH $ROOTDIR/examples/heat/heat -pin -size $size -nblocks $nblocks -v3 2> logdm)
 
 
 	calibrate_point "dmda" $nblocks 1
 	calibrate_point "dmda" $nblocks 1
 
 
 	export STARPU_SCHED="dmda"
 	export STARPU_SCHED="dmda"
 	export STARPU_CALIBRATE=1
 	export STARPU_CALIBRATE=1
 	export STARPU_PREFETCH=1
 	export STARPU_PREFETCH=1
-	valdmda=$($ROOTDIR/examples/heat/heat -pin -size $size -nblocks $nblocks -v3 2> logdmda)
+	valdmda=$($STARPU_LAUNCH $ROOTDIR/examples/heat/heat -pin -size $size -nblocks $nblocks -v3 2> logdmda)
 	
 	
 	dmmiss=`grep "TOTAL MSI" logdm|sed -e "s/.*miss.*[1-9]* (\(.*\) %)/\1/"`
 	dmmiss=`grep "TOTAL MSI" logdm|sed -e "s/.*miss.*[1-9]* (\(.*\) %)/\1/"`
 	dmtotal=`grep "TOTAL transfers" logdm|sed -e "s/TOTAL transfers \(.*\) MB/\1/"`
 	dmtotal=`grep "TOTAL transfers" logdm|sed -e "s/TOTAL transfers \(.*\) MB/\1/"`

+ 2 - 2
tests/heat/gflops.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -49,7 +49,7 @@ measure_heat()
 	for i in `seq 1 $nsample`
 	for i in `seq 1 $nsample`
 	do
 	do
 		echo "iter $i/$nsample"
 		echo "iter $i/$nsample"
-		val=`$ROOTDIR/examples/heat -nthick $thick -ntheta $theta -nblocks $nblocks -pin -v2 2>/dev/null`
+		val=`$STARPU_LAUNCH $ROOTDIR/examples/heat -nthick $thick -ntheta $theta -nblocks $nblocks -pin -v2 2>/dev/null`
 		total=`echo "$val + $total" |bc -l`
 		total=`echo "$val + $total" |bc -l`
 	done
 	done
 
 

+ 2 - 2
tests/heat/gflops_sched.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -51,7 +51,7 @@ measure_heat()
 	for i in `seq 1 $nsample`
 	for i in `seq 1 $nsample`
 	do
 	do
 		echo "iter $i/$nsample"
 		echo "iter $i/$nsample"
-		val=`STARPU_SCHED=$policy $ROOTDIR/examples/heat/heat -nthick $thick -ntheta $theta -nblocks $nblocks -pin -v2 2>/dev/null`
+		val=`STARPU_SCHED=$policy $STARPU_LAUNCH $ROOTDIR/examples/heat/heat -nthick $thick -ntheta $theta -nblocks $nblocks -pin -v2 2>/dev/null`
 		total=`echo "$val + $total" |bc -l`
 		total=`echo "$val + $total" |bc -l`
 	done
 	done
 
 

+ 9 - 9
tests/heat/granularity.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2008-2011,2014, 2019                           Université de Bordeaux
+# Copyright (C) 2008-2011,2014,2019-2020                 Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -62,7 +62,7 @@ trace_granularity()
 		for iter in `seq 1 $maxiter`
 		for iter in `seq 1 $maxiter`
 		do
 		do
 			echo "$iter / $maxiter"
 			echo "$iter / $maxiter"
-			 val=`STARPU_NCPUS=8 STARPU_NCUDA=3 STARPU_SCHED="dmda" STARPU_PREFETCH=1 STARPU_CALIBRATE=1 $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
+			 val=`STARPU_NCPUS=8 STARPU_NCUDA=3 STARPU_SCHED="dmda" STARPU_PREFETCH=1 STARPU_CALIBRATE=1 $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
 			 echo "$val"
 			 echo "$val"
 			 echo "$val" >> $filename
 			 echo "$val" >> $filename
 		done
 		done
@@ -103,7 +103,7 @@ trace_granularity_hybrid()
 		for iter in `seq 1 $maxiter`
 		for iter in `seq 1 $maxiter`
 		do
 		do
 			echo "$iter / $maxiter"
 			echo "$iter / $maxiter"
-			 val=`STARPU_SCHED="dmda" STARPU_PREFETCH=1 STARPU_CALIBRATE=1 $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
+			 val=`STARPU_SCHED="dmda" STARPU_PREFETCH=1 STARPU_CALIBRATE=1 $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
 			 echo "$val"
 			 echo "$val"
 			 echo "$val" >> $filename
 			 echo "$val" >> $filename
 		done
 		done
@@ -123,12 +123,12 @@ calibrate_grain()
 
 
 	OPTIONS="-pin -nblocks $blocks -size $size -v3"
 	OPTIONS="-pin -nblocks $blocks -size $size -v3"
 
 
-	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_SCHED="dm" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null 
-	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
-	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
-	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
-	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
-	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
+	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_SCHED="dm" $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null 
+	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
+	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
+	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
+	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
+	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
 }
 }
 
 
 mkdir -p $TIMINGDIR
 mkdir -p $TIMINGDIR

+ 5 - 5
tests/heat/heat.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -47,7 +47,7 @@ do
 			export STARPU_NCUDA=$cublas
 			export STARPU_NCUDA=$cublas
 
 
 			echo "size $size cpus $cpus cublas $cublas blocks $blocks" 
 			echo "size $size cpus $cpus cublas $cublas blocks $blocks" 
-			$ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $BLOCKS 2>/dev/null| tee $filename
+			$STARPU_LAUNCH $ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $BLOCKS 2>/dev/null| tee $filename
 		done
 		done
 	done
 	done
 
 
@@ -63,7 +63,7 @@ do
 			export STARPU_NCUDA=$cublas
 			export STARPU_NCUDA=$cublas
 
 
 			echo "size $size cpus $cpus cublas $cublas blocks $blocks" 
 			echo "size $size cpus $cpus cublas $cublas blocks $blocks" 
-			$ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $BLOCKS 2>/dev/null| tee $filename
+			$STARPU_LAUNCH $ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $BLOCKS 2>/dev/null| tee $filename
 		done
 		done
 	done
 	done
 done
 done
@@ -90,14 +90,14 @@ do
 
 
 		echo "size $size cpus 4 cublas 0 blocks $blocks"
 		echo "size $size cpus 4 cublas 0 blocks $blocks"
 		filename=$TIMINGDIR/timing.4.0.$size.$blocks
 		filename=$TIMINGDIR/timing.4.0.$size.$blocks
-		$ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $blocks 2>/dev/null| tee $filename
+		$STARPU_LAUNCH $ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $blocks 2>/dev/null| tee $filename
 
 
 		export STARPU_NCPUS=3
 		export STARPU_NCPUS=3
 		export STARPU_NCUDA=1
 		export STARPU_NCUDA=1
 
 
 		echo "size $size cpus 3 cublas 1 blocks $blocks"
 		echo "size $size cpus 3 cublas 1 blocks $blocks"
 		filename=$TIMINGDIR/timing.3.1.$size.$blocks
 		filename=$TIMINGDIR/timing.3.1.$size.$blocks
-		$ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $blocks 2>/dev/null| tee $filename
+		$STARPU_LAUNCH $ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $blocks 2>/dev/null| tee $filename
 	done
 	done
 done
 done
 
 

+ 2 - 2
tests/heat/model_perturbation.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2008-2011,2014, 2019                           Université de Bordeaux
+# Copyright (C) 2008-2011,2014,2019-2020                 Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -57,7 +57,7 @@ trace_perturbation()
 		for iter in `seq 1 $nsamples`
 		for iter in `seq 1 $nsamples`
 		do
 		do
 			echo "$iter / $nsamples"
 			echo "$iter / $nsamples"
-			 val=`$ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
+			 val=`$STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
 			 echo "$val" >> $filename
 			 echo "$val" >> $filename
 		done
 		done
 	done
 	done

+ 3 - 3
tests/heat/sched.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2008-2011,2014, 2019                           Université de Bordeaux
+# Copyright (C) 2008-2011,2014, 2019-2020                           Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -38,7 +38,7 @@ calibrate_point()
 		export STARPU_SCHED=$strat
 		export STARPU_SCHED=$strat
 		export STARPU_CALIBRATE=1
 		export STARPU_CALIBRATE=1
 		export STARPU_PREFETCH=$prefetch
 		export STARPU_PREFETCH=$prefetch
-		val=`$ROOTDIR/examples/heat/heat -pin -nblocks $nblocks -size $size -v3 2> /dev/null`
+		val=`$STARPU_LAUNCH $ROOTDIR/examples/heat/heat -pin -nblocks $nblocks -size $size -v3 2> /dev/null`
 		echo "$val"
 		echo "$val"
 	done
 	done
 
 
@@ -68,7 +68,7 @@ trace_point()
 		export STARPU_SCHED=$strat
 		export STARPU_SCHED=$strat
 		export STARPU_CALIBRATE=$docalibrate
 		export STARPU_CALIBRATE=$docalibrate
 		export STARPU_PREFETCH=$prefetch
 		export STARPU_PREFETCH=$prefetch
-		val=`$ROOTDIR/examples/heat/heat -pin -nblocks $nblocks -size $size -v3  2> /dev/null`
+		val=`$STARPU_LAUNCH $ROOTDIR/examples/heat/heat -pin -nblocks $nblocks -size $size -v3  2> /dev/null`
 		echo "$val"
 		echo "$val"
 		echo "$val" >> $filename
 		echo "$val" >> $filename
 	done
 	done

+ 2 - 2
tests/heat/speedup.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -41,7 +41,7 @@ do
 		echo "ncpus $cpus size $size"
 		echo "ncpus $cpus size $size"
 
 
 		filename=$TIMINGDIR/timing.$cpus.$size
 		filename=$TIMINGDIR/timing.$cpus.$size
-		$ROOTDIR/examples/heat -v2 -pin -nthick 34 -ntheta $(($theta+2)) -nblocks 16 2>/dev/null| tee $filename
+		$STARPU_LAUNCH $ROOTDIR/examples/heat -v2 -pin -nthick 34 -ntheta $(($theta+2)) -nblocks 16 2>/dev/null| tee $filename
 
 
 		echo "$cpus	`cat $TIMINGDIR/timing.$cpus.$size`	`cat  $TIMINGDIR/timing.1.$size`" >> $TIMINGDIR/speedup.$size
 		echo "$cpus	`cat $TIMINGDIR/timing.$cpus.$size`	`cat  $TIMINGDIR/timing.1.$size`" >> $TIMINGDIR/speedup.$size
 	done
 	done

+ 2 - 2
tests/incrementer/speed.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2010,2011,2014                           Université de Bordeaux
+# Copyright (C) 2010,2011,2014,2020                      Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -27,7 +27,7 @@ max=20
 
 
 for logi in `seq 0 $max`
 for logi in `seq 0 $max`
 do
 do
-	$examplebindir/incrementer $i 2> .tmpperf
+	$STARPU_LAUNCH $examplebindir/incrementer $i 2> .tmpperf
 
 
 	grep "ms" .tmpperf
 	grep "ms" .tmpperf
 	grep "ms" .tmpperf | sed -e "s/^\(.*\) elems took \(.*\) ms$/\1	\2/" >> .perftable 
 	grep "ms" .tmpperf | sed -e "s/^\(.*\) elems took \(.*\) ms$/\1	\2/" >> .perftable 

+ 51 - 13
tests/loader.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011,2012,2017                           Inria
  * Copyright (C) 2011,2012,2017                           Inria
  * Copyright (C) 2011-2020                                CNRS
  * Copyright (C) 2011-2020                                CNRS
- * Copyright (C) 2010,2014-2018                           Université de Bordeaux
+ * Copyright (C) 2010,2014-2018,2020                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -219,6 +219,8 @@ int main(int argc, char *argv[])
 	char *test_args;
 	char *test_args;
 	char *launcher;
 	char *launcher;
 	char *launcher_args;
 	char *launcher_args;
+	char *libtool;
+	const char *top_builddir = getenv ("top_builddir");
 	struct sigaction sa;
 	struct sigaction sa;
 	int   ret;
 	int   ret;
 	struct timeval start;
 	struct timeval start;
@@ -291,6 +293,54 @@ int main(int argc, char *argv[])
 	if (launcher_args)
 	if (launcher_args)
 		launcher_args=strdup(launcher_args);
 		launcher_args=strdup(launcher_args);
 
 
+	if (top_builddir == NULL)
+	{
+		fprintf(stderr,
+			"warning: $top_builddir undefined, "
+			"so $STARPU_CHECK_LAUNCHER ignored\n");
+		launcher = NULL;
+		launcher_args = NULL;
+		libtool = NULL;
+	}
+	else
+	{
+		libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1);
+		strcpy(libtool, top_builddir);
+		strcat(libtool, "/libtool");
+	}
+
+	if (launcher)
+	{
+		const char *top_srcdir = getenv("top_srcdir");
+		decode(&launcher, "@top_srcdir@", top_srcdir);
+		decode(&launcher_args, "@top_srcdir@", top_srcdir);
+	}
+
+	size_t len = strlen(test_name);
+	if (launcher && len >= 3 &&
+	    test_name[len-3] == '.' &&
+	    test_name[len-2] == 's' &&
+	    test_name[len-1] == 'h')
+	{
+		/* This is a shell script, don't run the check on bash, but pass
+		 * the script the decoded variables */
+		setenv("STARPU_CHECK_LAUNCHER", launcher, 1);
+		if (launcher_args)
+			setenv("STARPU_CHECK_LAUNCHER_ARGS", launcher_args, 1);
+		else
+			launcher_args = "";
+
+		/* And give a convenience macro */
+		size_t len_launch = strlen(libtool) + 1 + strlen("--mode=execute") + 1
+				  + strlen(launcher) + 1 + strlen(launcher_args) + 1;
+		char *launch = malloc(len_launch);
+		snprintf(launch, len_launch, "%s --mode=execute %s %s", libtool, launcher, launcher_args);
+		setenv("STARPU_LAUNCH", launch, 1);
+
+		launcher = NULL;
+		launcher_args = NULL;
+	}
+
 	setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1);
 	setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1);
 
 
 	/* set SIGALARM handler */
 	/* set SIGALARM handler */
@@ -308,19 +358,10 @@ int main(int argc, char *argv[])
 			/* "Launchers" such as Valgrind need to be inserted
 			/* "Launchers" such as Valgrind need to be inserted
 			 * after the Libtool-generated wrapper scripts, hence
 			 * after the Libtool-generated wrapper scripts, hence
 			 * this special-case.  */
 			 * this special-case.  */
-			const char *top_builddir = getenv ("top_builddir");
-			const char *top_srcdir = getenv("top_srcdir");
 			if (top_builddir != NULL)
 			if (top_builddir != NULL)
 			{
 			{
 				char *launcher_argv[100];
 				char *launcher_argv[100];
 				int i=3;
 				int i=3;
-				char libtool[strlen(top_builddir)
-					     + sizeof("libtool") + 1];
-				strcpy(libtool, top_builddir);
-				strcat(libtool, "/libtool");
-
-				decode(&launcher, "@top_srcdir@", top_srcdir);
-				decode(&launcher_args, "@top_srcdir@", top_srcdir);
 
 
 				launcher_argv[0] = libtool;
 				launcher_argv[0] = libtool;
 				launcher_argv[1] = "--mode=execute";
 				launcher_argv[1] = "--mode=execute";
@@ -341,9 +382,6 @@ int main(int argc, char *argv[])
 			}
 			}
 			else
 			else
 			{
 			{
-				fprintf(stderr,
-					"warning: $top_builddir undefined, "
-					"so $STARPU_CHECK_LAUNCHER ignored\n");
 				execl(test_name, test_name, test_args, NULL);
 				execl(test_name, test_name, test_args, NULL);
 			}
 			}
 		}
 		}

+ 2 - 2
tests/main/combined_workers/bfs/run.sh

@@ -3,7 +3,7 @@
 #
 #
 # Copyright (C) 2012,2016,2017                           CNRS
 # Copyright (C) 2012,2016,2017                           CNRS
 # Copyright (C) 2012                                     Inria
 # Copyright (C) 2012                                     Inria
-# Copyright (C) 2014,2019                                Université de Bordeaux
+# Copyright (C) 2014,2019-2020                           Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -31,4 +31,4 @@ export STARPU_NCUDA=0
 export STARPU_NOPENCL=0
 export STARPU_NOPENCL=0
 export STARPU_WORKER_STATS=1
 export STARPU_WORKER_STATS=1
 export STARPU_CALIBRATE=1
 export STARPU_CALIBRATE=1
-./bfs data/graph65536.txt
+$STARPU_LAUNCH ./bfs data/graph65536.txt

+ 2 - 2
tests/memory/memstress.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -41,7 +41,7 @@ trace_stress()
 		echo "Computing size $size with $memstress MB of memory LESS"
 		echo "Computing size $size with $memstress MB of memory LESS"
 		
 		
 		echo "$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null"
 		echo "$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null"
-		timing=`$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null`
+		timing=`$STARPU_LAUNCH $ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null`
 	
 	
 		echo "size : $size memstress $memstress => $timing us"
 		echo "size : $size memstress $memstress => $timing us"
 
 

+ 2 - 2
tests/memory/memstress2.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -40,7 +40,7 @@ trace_stress()
 
 
 		
 		
 		echo "$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null"
 		echo "$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null"
-		timing=`$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null`
+		timing=`$STARPU_LAUNCH $ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null`
 	
 	
 		echo "size : $size memstress $stress => $timing us"
 		echo "size : $size memstress $stress => $timing us"
 
 

+ 19 - 0
tests/microbenchs/async_tasks_data_overhead.sh

@@ -0,0 +1,19 @@
+#!/bin/bash
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020                                     Université de Bordeaux
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+ROOT=${0%.sh}
+ROOT=${ROOT/tasks_data_overhead/tasks_overhead}
+exec $STARPU_LAUNCH $ROOT -b 1 "$@"

+ 53 - 25
tests/microbenchs/async_tasks_overhead.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2014,2016                           Université de Bordeaux
+ * Copyright (C) 2009-2014,2016,2020                      Université de Bordeaux
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -24,7 +24,17 @@
  * Measure the cost of submitting asynchronous tasks
  * Measure the cost of submitting asynchronous tasks
  */
  */
 
 
+starpu_data_handle_t data_handles[8];
+float *buffers[8];
+
+#ifdef STARPU_QUICK_CHECK
+static unsigned ntasks = 128;
+#else
 static unsigned ntasks = 65536;
 static unsigned ntasks = 65536;
+#endif
+static unsigned nbuffers = 0;
+
+#define BUFFERSIZE 16
 
 
 //static unsigned finished = 0;
 //static unsigned finished = 0;
 
 
@@ -45,36 +55,29 @@ static struct starpu_codelet dummy_codelet =
         .opencl_funcs = {dummy_func},
         .opencl_funcs = {dummy_func},
 	.cpu_funcs_name = {"dummy_func"},
 	.cpu_funcs_name = {"dummy_func"},
 	.model = NULL,
 	.model = NULL,
-	.nbuffers = 0
+	.nbuffers = 0,
+	.modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW}
 };
 };
 
 
-//static void inject_one_task(void)
-//{
-//	struct starpu_task *task = starpu_task_create();
-//
-//	task->cl = &dummy_codelet;
-//	task->cl_arg = NULL;
-//	task->detach = 0;
-//
-//	int ret = starpu_task_submit(task);
-//	STARPU_ASSERT(!ret);
-//}
-
 static void usage(char **argv)
 static void usage(char **argv)
 {
 {
-	fprintf(stderr, "%s [-i ntasks] [-p sched_policy] [-h]\n", argv[0]);
-	exit(-1);
+	fprintf(stderr, "Usage: %s [-i ntasks] [-p sched_policy] [-b nbuffers] [-h]\n", argv[0]);
+	exit(EXIT_FAILURE);
 }
 }
 
 
 static void parse_args(int argc, char **argv, struct starpu_conf *conf)
 static void parse_args(int argc, char **argv, struct starpu_conf *conf)
 {
 {
 	int c;
 	int c;
-	while ((c = getopt(argc, argv, "i:p:h")) != -1)
+	while ((c = getopt(argc, argv, "i:b:p:h")) != -1)
 	switch(c)
 	switch(c)
 	{
 	{
 		case 'i':
 		case 'i':
 			ntasks = atoi(optarg);
 			ntasks = atoi(optarg);
 			break;
 			break;
+		case 'b':
+			nbuffers = atoi(optarg);
+			dummy_codelet.nbuffers = nbuffers;
+			break;
 		case 'p':
 		case 'p':
 			conf->sched_policy_name = optarg;
 			conf->sched_policy_name = optarg;
 			break;
 			break;
@@ -96,19 +99,22 @@ int main(int argc, char **argv)
 	starpu_conf_init(&conf);
 	starpu_conf_init(&conf);
 	conf.ncpus = 2;
 	conf.ncpus = 2;
 
 
-#ifdef STARPU_QUICK_CHECK
-	ntasks = 128;
-#endif
-
 	parse_args(argc, argv, &conf);
 	parse_args(argc, argv, &conf);
 
 
 	ret = starpu_initialize(&conf, &argc, &argv);
 	ret = starpu_initialize(&conf, &argc, &argv);
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
+	unsigned buffer;
+	for (buffer = 0; buffer < nbuffers; buffer++)
+	{
+		starpu_malloc((void**)&buffers[buffer], BUFFERSIZE*sizeof(float));
+		starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], BUFFERSIZE, sizeof(float));
+	}
+
 	starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
 	starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
 
 
-	fprintf(stderr, "#tasks : %u\n", ntasks);
+	fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers);
 
 
 	/* Create an array of tasks */
 	/* Create an array of tasks */
 	struct starpu_task **tasks = (struct starpu_task **) malloc(ntasks*sizeof(struct starpu_task *));
 	struct starpu_task **tasks = (struct starpu_task **) malloc(ntasks*sizeof(struct starpu_task *));
@@ -117,8 +123,14 @@ int main(int argc, char **argv)
 	{
 	{
 		struct starpu_task *task = starpu_task_create();
 		struct starpu_task *task = starpu_task_create();
 		task->cl = &dummy_codelet;
 		task->cl = &dummy_codelet;
-		task->cl_arg = NULL;
 		task->detach = 0;
 		task->detach = 0;
+
+		/* we have 8 buffers at most */
+		for (buffer = 0; buffer < nbuffers; buffer++)
+		{
+			task->handles[buffer] = data_handles[buffer];
+		}
+
 		tasks[i] = task;
 		tasks[i] = task;
 	}
 	}
 
 
@@ -165,21 +177,37 @@ int main(int argc, char **argv)
 
 
                 if (output_dir && bench_id)
                 if (output_dir && bench_id)
 		{
 		{
+                        char number[1+sizeof(nbuffers)*3+1];
+                        const char *numberp;
                         char file[1024];
                         char file[1024];
                         FILE *f;
                         FILE *f;
 
 
-                        snprintf(file, sizeof(file), "%s/async_tasks_overhead_total.dat", output_dir);
+                        if (nbuffers)
+                        {
+                                snprintf(number, sizeof(number), "_%u", nbuffers);
+                                numberp = number;
+                        }
+                        else
+                                numberp = "";
+
+                        snprintf(file, sizeof(file), "%s/async_tasks_overhead_total%s.dat", output_dir, numberp);
                         f = fopen(file, "a");
                         f = fopen(file, "a");
                         fprintf(f, "%s\t%f\n", bench_id, timing/1000000);
                         fprintf(f, "%s\t%f\n", bench_id, timing/1000000);
                         fclose(f);
                         fclose(f);
 
 
-                        snprintf(file, sizeof(file), "%s/async_tasks_overhead_per_task.dat", output_dir);
+                        snprintf(file, sizeof(file), "%s/async_tasks_overhead_per_task%s.dat", output_dir, numberp);
                         f = fopen(file, "a");
                         f = fopen(file, "a");
                         fprintf(f, "%s\t%f\n", bench_id, timing/ntasks);
                         fprintf(f, "%s\t%f\n", bench_id, timing/ntasks);
                         fclose(f);
                         fclose(f);
                 }
                 }
         }
         }
 
 
+	for (buffer = 0; buffer < nbuffers; buffer++)
+	{
+		starpu_data_unregister(data_handles[buffer]);
+		starpu_free((void*)buffers[buffer]);
+	}
+
 	starpu_shutdown();
 	starpu_shutdown();
 	free(tasks);
 	free(tasks);
 
 

+ 2 - 2
tests/microbenchs/microbench.sh

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2016,2017                                CNRS
 # Copyright (C) 2016,2017                                CNRS
-# Copyright (C) 2016,2017,2019                           Université de Bordeaux
+# Copyright (C) 2016,2017,2019-2020                      Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -50,7 +50,7 @@ test_scheds()
 	for sched in $SCHEDS;
 	for sched in $SCHEDS;
 	do
 	do
 	    	set +e
 	    	set +e
-		STARPU_SCHED=$sched $(dirname $0)/$TEST "$@"
+		STARPU_SCHED=$sched $STARPU_LAUNCH $(dirname $0)/$TEST "$@"
 		ret=$?
 		ret=$?
 	    	set -e
 	    	set -e
 		if test $ret = 0
 		if test $ret = 0

+ 2 - 2
tests/microbenchs/starpu_check.sh

@@ -1,7 +1,7 @@
 #!/bin/sh
 #!/bin/sh
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2009-2011,2014                           Université de Bordeaux
+# Copyright (C) 2009-2011,2014,2020                      Université de Bordeaux
 # Copyright (C) 2010,2011,2015,2017                      CNRS
 # Copyright (C) 2010,2011,2015,2017                      CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -40,7 +40,7 @@ test_with_timeout()
 
 
 	echo "$application"
 	echo "$application"
 
 
-	$application > /dev/null 2> /dev/null & _pid_appli=$!;
+	$STARPU_LAUNCH $application > /dev/null 2> /dev/null & _pid_appli=$!;
 	(sleep $timeout ; kill -9 $_pid_appli 2> /dev/null) & _pid_killer=$!
 	(sleep $timeout ; kill -9 $_pid_appli 2> /dev/null) & _pid_killer=$!
 	wait $_pid_appli
 	wait $_pid_appli
 	ret=$?
 	ret=$?

+ 19 - 0
tests/microbenchs/sync_tasks_data_overhead.sh

@@ -0,0 +1,19 @@
+#!/bin/bash
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020                                     Université de Bordeaux
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+ROOT=${0%.sh}
+ROOT=${ROOT/tasks_data_overhead/tasks_overhead}
+exec $STARPU_LAUNCH $ROOT -b 1 "$@"

+ 70 - 15
tests/microbenchs/sync_tasks_overhead.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2014,2016                           Université de Bordeaux
+ * Copyright (C) 2009-2014,2016,2020                      Université de Bordeaux
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -25,7 +25,17 @@
  * Measure the cost of submitting synchronous tasks
  * Measure the cost of submitting synchronous tasks
  */
  */
 
 
+starpu_data_handle_t data_handles[8];
+float *buffers[8];
+
+#ifdef STARPU_QUICK_CHECK
+static unsigned ntasks = 128;
+#else
 static unsigned ntasks = 65536;
 static unsigned ntasks = 65536;
+#endif
+static unsigned nbuffers = 0;
+
+#define BUFFERSIZE 16
 
 
 void dummy_func(void *descr[], void *arg)
 void dummy_func(void *descr[], void *arg)
 {
 {
@@ -40,11 +50,11 @@ static struct starpu_codelet dummy_codelet =
         .opencl_funcs = {dummy_func},
         .opencl_funcs = {dummy_func},
 	.cpu_funcs_name = {"dummy_func"},
 	.cpu_funcs_name = {"dummy_func"},
 	.model = NULL,
 	.model = NULL,
-	.nbuffers = 0
+	.nbuffers = 0,
+	.modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW}
 };
 };
 
 
-static
-int inject_one_task(void)
+static int inject_one_task(void)
 {
 {
 	int ret;
 	int ret;
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
@@ -59,15 +69,31 @@ int inject_one_task(void)
 
 
 }
 }
 
 
-static void parse_args(int argc, char **argv)
+static void usage(char **argv)
+{
+	fprintf(stderr, "Usage: %s [-i ntasks] [-p sched_policy] [-b nbuffers] [-h]\n", argv[0]);
+	exit(EXIT_FAILURE);
+}
+
+static void parse_args(int argc, char **argv, struct starpu_conf *conf)
 {
 {
 	int c;
 	int c;
-	while ((c = getopt(argc, argv, "i:")) != -1)
+	while ((c = getopt(argc, argv, "i:b:p:h")) != -1)
 	switch(c)
 	switch(c)
 	{
 	{
 		case 'i':
 		case 'i':
 			ntasks = atoi(optarg);
 			ntasks = atoi(optarg);
 			break;
 			break;
+		case 'b':
+			nbuffers = atoi(optarg);
+			dummy_codelet.nbuffers = nbuffers;
+			break;
+		case 'p':
+			conf->sched_policy_name = optarg;
+			break;
+		case 'h':
+			usage(argv);
+			break;
 	}
 	}
 }
 }
 
 
@@ -82,22 +108,35 @@ int main(int argc, char **argv)
 	starpu_conf_init(&conf);
 	starpu_conf_init(&conf);
 	conf.ncpus = 2;
 	conf.ncpus = 2;
 
 
-#ifdef STARPU_QUICK_CHECK
-	ntasks = 128;
-#endif
-
-	parse_args(argc, argv);
+	parse_args(argc, argv, &conf);
 
 
 	ret = starpu_initialize(&conf, &argc, &argv);
 	ret = starpu_initialize(&conf, &argc, &argv);
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
-	fprintf(stderr, "#tasks : %u\n", ntasks);
+	unsigned buffer;
+	for (buffer = 0; buffer < nbuffers; buffer++)
+	{
+		starpu_malloc((void**)&buffers[buffer], BUFFERSIZE*sizeof(float));
+		starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], BUFFERSIZE, sizeof(float));
+	}
+
+	fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers);
 
 
 	start = starpu_timing_now();
 	start = starpu_timing_now();
 	for (i = 0; i < ntasks; i++)
 	for (i = 0; i < ntasks; i++)
 	{
 	{
-		ret = inject_one_task();
+		struct starpu_task *task = starpu_task_create();
+		task->cl = &dummy_codelet;
+		task->synchronous = 1;
+
+		/* we have 8 buffers at most */
+		for (buffer = 0; buffer < nbuffers; buffer++)
+		{
+			task->handles[buffer] = data_handles[buffer];
+		}
+
+		ret = starpu_task_submit(task);
 		if (ret == -ENODEV) goto enodev;
 		if (ret == -ENODEV) goto enodev;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
 	}
@@ -114,21 +153,37 @@ int main(int argc, char **argv)
 
 
                 if (output_dir && bench_id)
                 if (output_dir && bench_id)
 		{
 		{
+                        char number[1+sizeof(nbuffers)*3+1];
+                        const char *numberp;
                         char file[1024];
                         char file[1024];
                         FILE *f;
                         FILE *f;
 
 
-                        snprintf(file, sizeof(file), "%s/sync_tasks_overhead_total.dat", output_dir);
+                        if (nbuffers)
+                        {
+                                snprintf(number, sizeof(number), "_%u", nbuffers);
+                                numberp = number;
+                        }
+                        else
+                                numberp = "";
+
+                        snprintf(file, sizeof(file), "%s/sync_tasks_overhead_total%s.dat", output_dir, numberp);
                         f = fopen(file, "a");
                         f = fopen(file, "a");
                         fprintf(f, "%s\t%f\n", bench_id, timing/1000000);
                         fprintf(f, "%s\t%f\n", bench_id, timing/1000000);
                         fclose(f);
                         fclose(f);
 
 
-                        snprintf(file, sizeof(file), "%s/sync_tasks_overhead_per_task.dat", output_dir);
+                        snprintf(file, sizeof(file), "%s/sync_tasks_overhead_per_task%s.dat", output_dir, numberp);
                         f = fopen(file, "a");
                         f = fopen(file, "a");
                         fprintf(f, "%s\t%f\n", bench_id, timing/ntasks);
                         fprintf(f, "%s\t%f\n", bench_id, timing/ntasks);
                         fclose(f);
                         fclose(f);
                 }
                 }
         }
         }
 
 
+	for (buffer = 0; buffer < nbuffers; buffer++)
+	{
+		starpu_data_unregister(data_handles[buffer]);
+		starpu_free((void*)buffers[buffer]);
+	}
+
 	starpu_shutdown();
 	starpu_shutdown();
 
 
 	return EXIT_SUCCESS;
 	return EXIT_SUCCESS;

+ 19 - 0
tests/microbenchs/tasks_data_overhead.sh

@@ -0,0 +1,19 @@
+#!/bin/bash
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020                                     Université de Bordeaux
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+ROOT=${0%.sh}
+ROOT=${ROOT/tasks_data_overhead/tasks_overhead}
+exec $STARPU_LAUNCH $ROOT -b 1 "$@"

+ 63 - 42
tests/microbenchs/tasks_overhead.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2011,2013,2014,2016                 Université de Bordeaux
+ * Copyright (C) 2009-2011,2013,2014,2016,2020            Université de Bordeaux
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2013                                     Inria
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  *
  *
@@ -36,6 +36,8 @@ static unsigned ntasks = 65536;
 #endif
 #endif
 static unsigned nbuffers = 0;
 static unsigned nbuffers = 0;
 
 
+#define BUFFERSIZE 16
+
 struct starpu_task *tasks;
 struct starpu_task *tasks;
 
 
 void dummy_func(void *descr[], void *arg)
 void dummy_func(void *descr[], void *arg)
@@ -55,25 +57,16 @@ static struct starpu_codelet dummy_codelet =
 	.modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW}
 	.modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW}
 };
 };
 
 
-static
-int inject_one_task(void)
+static void usage(char **argv)
 {
 {
-	struct starpu_task *task = starpu_task_create();
-
-	task->cl = &dummy_codelet;
-	task->cl_arg = NULL;
-	task->callback_func = NULL;
-	task->synchronous = 1;
-
-	int ret;
-	ret = starpu_task_submit(task);
-	return ret;
+	fprintf(stderr, "Usage: %s [-i ntasks] [-p sched_policy] [-b nbuffers] [-h]\n", argv[0]);
+	exit(EXIT_FAILURE);
 }
 }
 
 
-static void parse_args(int argc, char **argv)
+static void parse_args(int argc, char **argv, struct starpu_conf *conf)
 {
 {
 	int c;
 	int c;
-	while ((c = getopt(argc, argv, "i:b:h")) != -1)
+	while ((c = getopt(argc, argv, "i:b:p:h")) != -1)
 	switch(c)
 	switch(c)
 	{
 	{
 		case 'i':
 		case 'i':
@@ -83,8 +76,11 @@ static void parse_args(int argc, char **argv)
 			nbuffers = atoi(optarg);
 			nbuffers = atoi(optarg);
 			dummy_codelet.nbuffers = nbuffers;
 			dummy_codelet.nbuffers = nbuffers;
 			break;
 			break;
+		case 'p':
+			conf->sched_policy_name = optarg;
+			break;
 		case 'h':
 		case 'h':
-			fprintf(stderr, "Usage: %s [-i ntasks] [-b nbuffers] [-h]\n", argv[0]);
+			usage(argv);
 			break;
 			break;
 	}
 	}
 }
 }
@@ -105,7 +101,7 @@ int main(int argc, char **argv)
 	starpu_conf_init(&conf);
 	starpu_conf_init(&conf);
 	conf.ncpus = 2;
 	conf.ncpus = 2;
 
 
-	parse_args(argc, argv);
+	parse_args(argc, argv, &conf);
 
 
 	ret = starpu_initialize(&conf, &argc, &argv);
 	ret = starpu_initialize(&conf, &argc, &argv);
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
@@ -114,8 +110,8 @@ int main(int argc, char **argv)
 	unsigned buffer;
 	unsigned buffer;
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 	{
-		starpu_malloc((void**)&buffers[buffer], 16*sizeof(float));
-		starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], 16, sizeof(float));
+		starpu_malloc((void**)&buffers[buffer], BUFFERSIZE*sizeof(float));
+		starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], BUFFERSIZE, sizeof(float));
 	}
 	}
 
 
 	fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers);
 	fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers);
@@ -126,9 +122,7 @@ int main(int argc, char **argv)
 	for (i = 0; i < ntasks; i++)
 	for (i = 0; i < ntasks; i++)
 	{
 	{
 		starpu_task_init(&tasks[i]);
 		starpu_task_init(&tasks[i]);
-		tasks[i].callback_func = NULL;
 		tasks[i].cl = &dummy_codelet;
 		tasks[i].cl = &dummy_codelet;
-		tasks[i].cl_arg = NULL;
 		tasks[i].synchronous = 0;
 		tasks[i].synchronous = 0;
 		tasks[i].use_tag = 1;
 		tasks[i].use_tag = 1;
 		tasks[i].tag_id = (starpu_tag_t)i;
 		tasks[i].tag_id = (starpu_tag_t)i;
@@ -142,19 +136,33 @@ int main(int argc, char **argv)
 	tasks[ntasks-1].detach = 0;
 	tasks[ntasks-1].detach = 0;
 
 
 	start_submit = starpu_timing_now();
 	start_submit = starpu_timing_now();
-	for (i = 1; i < ntasks; i++)
-	{
-		starpu_tag_declare_deps((starpu_tag_t)i, 1, (starpu_tag_t)(i-1));
-
-		ret = starpu_task_submit(&tasks[i]);
-		if (ret == -ENODEV) goto enodev;
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-	}
+        if (nbuffers)
+        {
+                /* Data dependency, just submit them all */
+                for (i = 0; i < ntasks; i++)
+                {
+                        ret = starpu_task_submit(&tasks[i]);
+                        if (ret == -ENODEV) goto enodev;
+                        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+                }
+        }
+        else
+        {
+                /* No data dependency, we have to introduce dependencies by hand */
+                for (i = 1; i < ntasks; i++)
+                {
+                        starpu_tag_declare_deps((starpu_tag_t)i, 1, (starpu_tag_t)(i-1));
+
+                        ret = starpu_task_submit(&tasks[i]);
+                        if (ret == -ENODEV) goto enodev;
+                        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+                }
 
 
-	/* submit the first task */
-	ret = starpu_task_submit(&tasks[0]);
-	if (ret == -ENODEV) goto enodev;
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+                /* submit the first task */
+                ret = starpu_task_submit(&tasks[0]);
+                if (ret == -ENODEV) goto enodev;
+                STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+        }
 
 
 	end_submit = starpu_timing_now();
 	end_submit = starpu_timing_now();
 
 
@@ -169,9 +177,6 @@ int main(int argc, char **argv)
 	for (i = 0; i < ntasks; i++)
 	for (i = 0; i < ntasks; i++)
 		starpu_task_clean(&tasks[i]);
 		starpu_task_clean(&tasks[i]);
 
 
-	for (buffer = 0; buffer < nbuffers; buffer++)
-		starpu_data_unregister(data_handles[buffer]);
-
 	timing_submit = end_submit - start_submit;
 	timing_submit = end_submit - start_submit;
 	timing_exec = end_exec - start_exec;
 	timing_exec = end_exec - start_exec;
 
 
@@ -190,41 +195,57 @@ int main(int argc, char **argv)
 
 
                 if (output_dir && bench_id)
                 if (output_dir && bench_id)
 		{
 		{
+                        char number[1+sizeof(nbuffers)*3+1];
+                        const char *numberp;
                         char file[1024];
                         char file[1024];
                         FILE *f;
                         FILE *f;
 
 
-                        snprintf(file, sizeof(file), "%s/tasks_overhead_total_submit.dat", output_dir);
+                        if (nbuffers)
+                        {
+                                snprintf(number, sizeof(number), "_%u", nbuffers);
+                                numberp = number;
+                        }
+                        else
+                                numberp = "";
+
+                        snprintf(file, sizeof(file), "%s/tasks_overhead_total_submit%s.dat", output_dir, numberp);
                         f = fopen(file, "a");
                         f = fopen(file, "a");
                         fprintf(f, "%s\t%f\n", bench_id, timing_submit/1000000);
                         fprintf(f, "%s\t%f\n", bench_id, timing_submit/1000000);
                         fclose(f);
                         fclose(f);
 
 
-                        snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_submit.dat", output_dir);
+                        snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_submit%s.dat", output_dir, numberp);
                         f = fopen(file, "a");
                         f = fopen(file, "a");
                         fprintf(f, "%s\t%f\n", bench_id, timing_submit/ntasks);
                         fprintf(f, "%s\t%f\n", bench_id, timing_submit/ntasks);
                         fclose(f);
                         fclose(f);
 
 
-                        snprintf(file, sizeof(file), "%s/tasks_overhead_total_execution.dat", output_dir);
+                        snprintf(file, sizeof(file), "%s/tasks_overhead_total_execution%s.dat", output_dir, numberp);
                         f = fopen(file, "a");
                         f = fopen(file, "a");
                         fprintf(f, "%s\t%f\n", bench_id, timing_exec/1000000);
                         fprintf(f, "%s\t%f\n", bench_id, timing_exec/1000000);
                         fclose(f);
                         fclose(f);
 
 
-                        snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_execution.dat", output_dir);
+                        snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_execution%s.dat", output_dir, numberp);
                         f = fopen(file, "a");
                         f = fopen(file, "a");
                         fprintf(f, "%s\t%f\n", bench_id, timing_exec/ntasks);
                         fprintf(f, "%s\t%f\n", bench_id, timing_exec/ntasks);
                         fclose(f);
                         fclose(f);
 
 
-                        snprintf(file, sizeof(file), "%s/tasks_overhead_total_submit_execution.dat", output_dir);
+                        snprintf(file, sizeof(file), "%s/tasks_overhead_total_submit_execution%s.dat", output_dir, numberp);
                         f = fopen(file, "a");
                         f = fopen(file, "a");
                         fprintf(f, "%s\t%f\n", bench_id, (timing_submit+timing_exec)/1000000);
                         fprintf(f, "%s\t%f\n", bench_id, (timing_submit+timing_exec)/1000000);
                         fclose(f);
                         fclose(f);
 
 
-                        snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_submit_execution.dat", output_dir);
+                        snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_submit_execution%s.dat", output_dir, numberp);
                         f = fopen(file, "a");
                         f = fopen(file, "a");
                         fprintf(f, "%s\t%f\n", bench_id, (timing_submit+timing_exec)/ntasks);
                         fprintf(f, "%s\t%f\n", bench_id, (timing_submit+timing_exec)/ntasks);
                         fclose(f);
                         fclose(f);
                 }
                 }
         }
         }
 
 
+	for (buffer = 0; buffer < nbuffers; buffer++)
+	{
+		starpu_data_unregister(data_handles[buffer]);
+		starpu_free((void*)buffers[buffer]);
+	}
+
 	starpu_shutdown();
 	starpu_shutdown();
 	free(tasks);
 	free(tasks);
 	return EXIT_SUCCESS;
 	return EXIT_SUCCESS;

+ 2 - 2
tests/microbenchs/tasks_size_overhead.sh

@@ -2,7 +2,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2010,2011,2013,2015,2017                 CNRS
 # Copyright (C) 2010,2011,2013,2015,2017                 CNRS
-# Copyright (C) 2009,2010,2012,2014,2016                 Université de Bordeaux
+# Copyright (C) 2009,2010,2012,2014,2016,2020            Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -16,6 +16,6 @@
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
 #
 ROOT=${0%.sh}
 ROOT=${0%.sh}
-$ROOT "$@" > tasks_size_overhead.output
+$STARPU_LAUNCH $ROOT "$@" > tasks_size_overhead.output
 $ROOT.gp
 $ROOT.gp
 gv tasks_size_overhead.eps
 gv tasks_size_overhead.eps

+ 2 - 2
tests/microbenchs/tasks_size_overhead_sched.sh

@@ -2,7 +2,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2010,2011,2016,2017                      CNRS
 # Copyright (C) 2010,2011,2016,2017                      CNRS
-# Copyright (C) 2009,2010,2016                           Université de Bordeaux
+# Copyright (C) 2009,2010,2016,2020                      Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -18,7 +18,7 @@
 ROOT=${0%.sh}
 ROOT=${0%.sh}
 ROOT=${ROOT%_sched}
 ROOT=${ROOT%_sched}
 unset STARPU_SSILENT
 unset STARPU_SSILENT
-$ROOT "$@" > tasks_size_overhead.output
+$STARPU_LAUNCH $_STARPU_LAUNCH $ROOT "$@" > tasks_size_overhead.output
 ret=$?
 ret=$?
 if test "$ret" = "0"
 if test "$ret" = "0"
 then
 then

+ 3 - 1
tests/microbenchs/tasks_size_overhead_scheds.sh

@@ -2,7 +2,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2016,2017                                CNRS
 # Copyright (C) 2016,2017                                CNRS
-# Copyright (C) 2016,2019                                Université de Bordeaux
+# Copyright (C) 2016,2019-2020                           Université de Bordeaux
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -24,4 +24,6 @@ then
 	FAST="-i 8"
 	FAST="-i 8"
 fi
 fi
 
 
+_STARPU_LAUNCH="$STARPU_LAUNCH"
+unset STARPU_LAUNCH
 test_scheds tasks_size_overhead_sched.sh $FAST
 test_scheds tasks_size_overhead_sched.sh $FAST

+ 2 - 2
tests/mult/gflops.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
 # Copyright (C) 2010,2015,2017                           CNRS
 # Copyright (C) 2010,2015,2017                           CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -51,7 +51,7 @@ trace_size()
 		if [ $tile -lt $size -a $nblocks -lt 32 -a $(($size % $tile)) == 0 ];
 		if [ $tile -lt $size -a $nblocks -lt 32 -a $(($size % $tile)) == 0 ];
 		then
 		then
 			echo "start tile $tile size $size nblocks $nblocks  "
 			echo "start tile $tile size $size nblocks $nblocks  "
-			timing=`$ROOTDIR/examples/mult/dw_mult -pin -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null`
+			timing=`$STARPU_LAUNCH $ROOTDIR/examples/mult/dw_mult -pin -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null`
 		else
 		else
 			timing="x"
 			timing="x"
 		fi
 		fi

+ 0 - 0
tests/mult/sched.sh


Daži faili netika attēloti, jo izmaiņu fails ir pārāk liels