5 vuotta sitten · 1bef44b8f4
--- a/ChangeLog
+++ b/ChangeLog
@@ -43,6 +43,8 @@ Small features:
 
				   * Move optimized cuda 2d copy from interfaces to new
			
 
				     starpu_cuda_copy2d_async_sync and starpu_cuda_copy3d_async_sync, and use
			
 
				     them from starpu_interface_copy2d and 3d.
			
 
				+  * New function starpu_task_watchdog_set_hook to specify a function
			
 
				+    to be called when the watchdog is raised
			
 
				 
			
 
				 StarPU 1.3.3 (git revision 11afc5b007fe1ab1c729b55b47a5a98ef7f3cfad)
			
 
				 ====================================================================
			
--- a/configure.ac
+++ b/configure.ac
@@ -273,34 +273,38 @@ if test x$enable_simgrid = xyes ; then
 
				 		]
			
 
				 	)
			
 
				 	AC_CHECK_HEADERS([simgrid/msg.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MSG_H], [1], [Define to 1 if you have msg.h in simgrid/.])])
			
 
				+	AC_CHECK_HEADERS([msg/msg.h], [AC_DEFINE([STARPU_HAVE_MSG_MSG_H], [1], [Define to 1 if you have msg.h in msg/.])])
			
 
				 	AC_CHECK_HEADERS([simgrid/host.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_HOST_H], [1], [Define to 1 if you have host.h in simgrid/.])])
			
 
				+	AC_CHECK_HEADERS([xbt/base.h], [AC_DEFINE([STARPU_HAVE_XBT_BASE_H], [1], [Define to 1 if you have base.h in xbt/.])])
			
 
				+	AC_CHECK_HEADERS([simgrid/version.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_VERSION_H], [1], [Define to 1 if you have version.h in simgrid/.])], [], [[
			
 
				+			  #ifdef STARPU_HAVE_XBT_BASE_H
			
 
				+			  #include <xbt/base.h>
			
 
				+			  #endif
			
 
				+			  ]])
			
 
				 	AC_CHECK_HEADERS([simgrid/simdag.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_SIMDAG_H], [1], [Define to 1 if you have simdag.h in simgrid/.])])
			
 
				 	AC_CHECK_HEADERS([xbt/synchro.h], [AC_DEFINE([STARPU_HAVE_XBT_SYNCHRO_H], [1], [Define to 1 if you have synchro.h in xbt/.])])
			
 
				+	AC_CHECK_HEADERS([xbt/config.h], [AC_DEFINE([STARPU_HAVE_XBT_CONFIG_H], [1], [Define to 1 if you have config.h in xbt/.])])
			
 
				+	AC_CHECK_HEADERS([simgrid/actor.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ACTOR_H], [1], [Define to 1 if you have actor.h in simgrid/.])])
			
 
				+	AC_CHECK_HEADERS([simgrid/engine.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ENGINE_H], [1], [Define to 1 if you have engine.h in simgrid/.])])
			
 
				+	AC_CHECK_HEADERS([simgrid/semaphore.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_SEMAPHORE_H], [1], [Define to 1 if you have semaphore.h in simgrid/.])])
			
 
				+	AC_CHECK_HEADERS([simgrid/mutex.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MUTEX_H], [1], [Define to 1 if you have mutex.h in simgrid/.])])
			
 
				+	AC_CHECK_HEADERS([simgrid/cond.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_COND_H], [1], [Define to 1 if you have cond.h in simgrid/.])])
			
 
				+	AC_CHECK_HEADERS([simgrid/barrier.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_BARRIER_H], [1], [Define to 1 if you have barrier.h in simgrid/.])])
			
 
				+	AC_CHECK_HEADERS([simgrid/engine.h])
			
 
				+	AC_CHECK_HEADERS([simgrid/zone.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ZONE_H], [1], [Define to 1 if you have zone.h in simgrid/.])])
			
 
				 	AC_CHECK_TYPES([smx_actor_t], [AC_DEFINE([STARPU_HAVE_SMX_ACTOR_T], [1], [Define to 1 if you have the smx_actor_t type.])], [], [[#include <simgrid/simix.h>]])
			
 
				 
			
 
				 	# Latest functions
			
 
				-	AC_CHECK_FUNCS([MSG_process_attach sg_actor_attach sg_actor_init MSG_zone_get_hosts MSG_process_self_name MSG_process_userdata_init sg_actor_data])
			
 
				-	AC_CHECK_FUNCS([xbt_mutex_try_acquire smpi_process_set_user_data sg_zone_get_by_name sg_link_name sg_host_route sg_host_self sg_host_speed simcall_process_create sg_config_continue_after_help])
			
 
				+	AC_CHECK_FUNCS([MSG_process_attach sg_actor_attach sg_actor_init MSG_zone_get_hosts sg_zone_get_hosts MSG_process_self_name MSG_process_userdata_init sg_actor_data])
			
 
				+	AC_CHECK_FUNCS([xbt_mutex_try_acquire smpi_process_set_user_data SMPI_thread_create sg_zone_get_by_name sg_link_name sg_host_route sg_host_self sg_host_speed simcall_process_create sg_config_continue_after_help])
			
 
				+	AC_CHECK_FUNCS([simgrid_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_SIMGRID_INIT], [1], [Define to 1 if you have the `simgrid_init' function.])])
			
 
				 	AC_CHECK_FUNCS([xbt_barrier_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT], [1], [Define to 1 if you have the `xbt_barrier_init' function.])])
			
 
				+	AC_CHECK_FUNCS([sg_actor_sleep_for sg_actor_self sg_actor_ref sg_host_get_properties sg_host_send_to sg_host_sendto sg_cfg_set_int sg_actor_self_execute simgrid_get_clock])
			
 
				 	AC_CHECK_DECLS([smpi_process_set_user_data], [], [], [[#include <smpi/smpi.h>]])
			
 
				 
			
 
				 	# Oldies for compatibility with older simgrid
			
 
				 	AC_CHECK_FUNCS([MSG_get_as_by_name MSG_zone_get_by_name MSG_environment_get_routing_root MSG_host_get_speed])
			
 
				 
			
 
				-	AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
			
 
				-		    		[[
			
 
				-#ifdef STARPU_HAVE_SIMGRID_MSG_H
			
 
				-#include <simgrid/msg.h>
			
 
				-#else
			
 
				-#include <msg/msg.h>
			
 
				-#endif
			
 
				-				 ]],
			
 
				-				[[msg_host_t foo; ]]
			
 
				-			    )],
			
 
				-	                 [],
			
 
				-	                 [
			
 
				-			   AC_MSG_ERROR(StarPU needs a version of Simgrid which defines the type msg_host_t (should be any version >= 3.8.1))
			
 
				-		         ])
			
 
				 	AC_DEFINE(STARPU_SIMGRID, [1], [Define this to enable simgrid execution])
			
 
				 	# We won't bind or detect anything
			
 
				 	with_hwloc=no
			
@@ -727,7 +731,7 @@ fi
 
				 if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then
			
 
				     if test x$enable_simgrid = xyes ; then
			
 
				         if test x$enable_shared = xyes ; then
			
 
				-	    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, use --disable-shared to fix this])
			
 
				+	    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, use --disable-shared to fix this, or disable MPI with --disable-mpi])
			
 
				         else
			
 
				 	    CFLAGS="$CFLAGS -fPIC"
			
 
				 	    CXXFLAGS="$CXXFLAGS -fPIC"
			
@@ -920,6 +924,12 @@ if test x$have_pthread_setname_np = xyes; then
 
				 	AC_DEFINE(STARPU_HAVE_PTHREAD_SETNAME_NP,[1],[pthread_setname_np is available])
			
 
				 fi
			
 
				 
			
 
				+if test "x$cross_compiling" = "xno"; then
			
 
				+	STARPU_INIT_ZERO([[#include <pthread.h>]], pthread_mutex_t, PTHREAD_MUTEX_INITIALIZER)
			
 
				+	STARPU_INIT_ZERO([[#include <pthread.h>]], pthread_cond_t, PTHREAD_COND_INITIALIZER)
			
 
				+	STARPU_INIT_ZERO([[#include <pthread.h>]], pthread_rwlock_t, PTHREAD_RWLOCK_INITIALIZER)
			
 
				+fi
			
 
				+
			
 
				 # There is no posix_memalign on Mac OS X, only memalign
			
 
				 AC_CHECK_FUNCS([posix_memalign], [AC_DEFINE([STARPU_HAVE_POSIX_MEMALIGN], [1], [Define to 1 if you have the `posix_memalign' function.])])
			
 
				 AC_CHECK_FUNCS([memalign], [AC_DEFINE([STARPU_HAVE_MEMALIGN], [1], [Define to 1 if you have the `memalign' function.])])
			
@@ -3564,6 +3574,9 @@ AC_CONFIG_COMMANDS([executable-scripts], [
 
				   chmod +x doc/doxygen/doxygen_filter.sh
			
 
				   chmod +x doc/doxygen_dev/doxygen_filter.sh
			
 
				   mkdir -p tests/microbenchs
			
 
				+  test -e tests/microbenchs/tasks_data_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_data_overhead.sh tests/microbenchs/
			
 
				+  test -e tests/microbenchs/sync_tasks_data_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/sync_tasks_data_overhead.sh tests/microbenchs/
			
 
				+  test -e tests/microbenchs/async_tasks_data_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/async_tasks_data_overhead.sh tests/microbenchs/
			
 
				   test -e tests/microbenchs/tasks_size_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead.sh tests/microbenchs/
			
 
				   test -e tests/microbenchs/tasks_size_overhead_sched.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead_sched.sh tests/microbenchs/
			
 
				   test -e tests/microbenchs/tasks_size_overhead_scheds.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead_scheds.sh tests/microbenchs/
			
--- a/doc/doxygen/chapters/380_offline_performance_tools.doxy
+++ b/doc/doxygen/chapters/380_offline_performance_tools.doxy
@@ -572,15 +572,37 @@ $ starpu_paje_sort paje.trace
 
				 
			
 
				 \section PapiCounters PAPI counters
			
 
				 
			
 
				-Performance counter values can be obtained from the PAPI framework if
			
 
				+Performance counter values could be obtained from the PAPI framework if
			
 
				 <c>./configure</c> detected the libpapi. One has to set the \ref STARPU_PROFILING
			
 
				-environment variable to 1 and then specify which counters to record with the
			
 
				+environment variable to 1 and then specify which events to record with the
			
 
				 \ref STARPU_PROF_PAPI_EVENTS environment variable. For instance:
			
 
				 
			
 
				 \verbatim
			
 
				 export STARPU_PROFILING=1 STARPU_PROF_PAPI_EVENTS="PAPI_TOT_INS PAPI_TOT_CYC"
			
 
				 \endverbatim
			
 
				 
			
 
				+In the current simple implementation, only CPU tasks have their events measured
			
 
				+and require CPUs that support the PAPI events. All events that PAPI support are
			
 
				+available from their documentation (https://icl.cs.utk.edu/projects/papi/wiki/PAPIC:Preset_Event_Definitions).
			
 
				+It is important to note that not all events are available on all systems, and
			
 
				+general PAPI recommendations should be followed.
			
 
				+
			
 
				+The counter values can be accessed using the profiling interface:
			
 
				+\code{.c}
			
 
				+task->profiling_info->papi_values
			
 
				+\endcode
			
 
				+Also, it can be accessed and/or saved with tracing when using \ref STARPU_FXT_TRACE. With the use of <c>starpu_fxt_tool</c>
			
 
				+the file <c>papi.rec</c> is generated containing the following triple:
			
 
				+
			
 
				+\verbatim
			
 
				+Task Id
			
 
				+Event Id
			
 
				+Value
			
 
				+\endverbatim
			
 
				+
			
 
				+External tools like <c>rec2csv</c> can be used to convert this rec file to a <c>csv</c>, where each
			
 
				+line represents a value for an event for a task.
			
 
				+
			
 
				 \section TheoreticalLowerBoundOnExecutionTime Theoretical Lower Bound On Execution Time
			
 
				 
			
 
				 StarPU can record a trace of what tasks are needed to complete the
			
@@ -645,6 +667,69 @@ the priorities as the StarPU scheduler would, i.e. schedule prioritized
 
				 tasks before less prioritized tasks, to check to which extend this results
			
 
				 to a less optimal solution. This increases even more computation time.
			
 
				 
			
 
				+\section starvz Trace visualization with StarVZ
			
 
				+
			
 
				+Creating views with StarVZ (see: https://github.com/schnorr/starvz) is made up of two steps. The initial
			
 
				+stage consists of a pre-processing of the traces generated by the application.
			
 
				+The second step consists of the analysis itself and is carried out with the
			
 
				+aid of R packages. To download and install StarVZ, it is necessary to have R,
			
 
				+pajeng and the following packages:
			
 
				+
			
 
				+\verbatim
			
 
				+# For pajeng
			
 
				+apt install -y git cmake build-essential libboost-dev asciidoc flex bison
			
 
				+git clone git://github.com/schnorr/pajeng.git
			
 
				+mkdir -p pajeng/b ; cd pajeng/b
			
 
				+cmake ..
			
 
				+make
			
 
				+
			
 
				+# For R tidyverse
			
 
				+apt install -y r-base libxml2-dev libssl-dev libcurl4-openssl-dev libgit2-dev libboost-dev
			
 
				+\endverbatim
			
 
				+
			
 
				+To install the StarVZ the following commands can be used:
			
 
				+
			
 
				+\verbatim
			
 
				+git clone https://github.com/schnorr/starvz.git
			
 
				+echo "install.packages(c('tidyverse', 'devtools'), repos = 'https://cloud.r-project.org')" | R --vanilla
			
 
				+echo "library(devtools); devtools::install_local(path='./starvz/R_package')" | R --vanilla
			
 
				+\endverbatim
			
 
				+
			
 
				+To generate traces from an application, it is necessary to set \ref STARPU_GENERATE_TRACE.
			
 
				+and build StarPU with FxT. Then, Step 1 of StarVZ can be used on a folder with
			
 
				+StarPU FxT traces:
			
 
				+
			
 
				+\verbatim
			
 
				+export PATH=starvz/:$PATH
			
 
				+export PATH=pajeng/b:$PATH
			
 
				+export PATH=$STARPU_HOME/bin:$PATH
			
 
				+
			
 
				+./starvz/src/phase1-workflow.sh /tmp/ ""
			
 
				+\endverbatim
			
 
				+
			
 
				+Then the second step can be executed directly in R, StarVZ enables a set of
			
 
				+different plots that can be configured on a .yaml file. A default file is provided
			
 
				+<c>full_config.yaml</c>; also the options can be changed directly in R.
			
 
				+
			
 
				+\verbatim
			
 
				+library(starvz)
			
 
				+dtrace <- the_fast_reader_function("./")
			
 
				+
			
 
				+pajer <- config::get(file = "starvz/full_config.yaml")
			
 
				+
			
 
				+pajer$starpu$active = TRUE
			
 
				+pajer$submitted$active = TRUE
			
 
				+pajer$st$abe$active = TRUE
			
 
				+
			
 
				+plot <- the_master_function(dtrace)
			
 
				+\endverbatim
			
 
				+
			
 
				+An example of visualization follows:
			
 
				+
			
 
				+\image html starvz_visu.png
			
 
				+\image latex starvz_visu.eps "" width=\textwidth
			
 
				+
			
 
				+
			
 
				 \section MemoryFeedback Memory Feedback
			
 
				 
			
 
				 It is possible to enable memory statistics. To do so, you need to pass
			
--- a/doc/doxygen/chapters/470_simgrid.doxy
+++ b/doc/doxygen/chapters/470_simgrid.doxy
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011,2012,2014,2016,2017                 Inria
			
 
				  * Copyright (C) 2010-2019                                CNRS
			
 
				- * Copyright (C) 2009-2011,2014-2019                      Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2011,2014-2020                      Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -23,7 +23,8 @@
 
				 /*! \page SimGridSupport SimGrid Support
			
 
				 
			
 
				 StarPU can use Simgrid in order to simulate execution on an arbitrary
			
 
				-platform. This was tested with SimGrid from 3.11 to 3.16, and 3.18 to 3.24.
			
 
				+platform. This was tested with SimGrid from 3.11 to 3.16, and 3.18 to
			
 
				+3.25. SimGrid versions 3.25 and above need to be configured with -Denable_msg=ON .
			
 
				 Other versions may have compatibility issues. 3.17 notably does not build at
			
 
				 all. MPI simulation does not work with version 3.22.
			
 
				 
			
--- a/doc/doxygen/chapters/api/threads.doxy
+++ b/doc/doxygen/chapters/api/threads.doxy
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2017, 2019                          CNRS
			
 
				- * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2011,2014,2016,2020                 Université de Bordeaux
			
 
				  * Copyright (C) 2011,2012                                Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -35,6 +35,13 @@ Call starpu_pthread_create() and abort on error.
 
				 \ingroup API_Threads
			
 
				 Call starpu_pthread_mutex_init() and abort on error.
			
 
				 
			
 
				+\def STARPU_PTHREAD_MUTEX_INIT0
			
 
				+\ingroup API_Threads
			
 
				+Call starpu_pthread_mutex_init() only if the content of
			
 
				+PTHREAD_MUTEX_INITIALIZER is not zero. This should be called instead
			
 
				+of STARPU_PTHREAD_MUTEX_INIT when it is known that the content of the
			
 
				+pthread_mutex_t was already zeroed.
			
 
				+
			
 
				 \def STARPU_PTHREAD_MUTEX_DESTROY
			
 
				 \ingroup API_Threads
			
 
				 Call starpu_pthread_mutex_destroy() and abort on error.
			
@@ -67,6 +74,13 @@ Call starpu_pthread_getspecific() and abort on error.
 
				 \ingroup API_Threads
			
 
				 Call starpu_pthread_rwlock_init() and abort on error.
			
 
				 
			
 
				+\def STARPU_PTHREAD_RWLOCK_INIT0
			
 
				+\ingroup API_Threads
			
 
				+Call starpu_pthread_rwlock_init() only if the content of
			
 
				+PTHREAD_RWLOCK_INITIALIZER is not zero. This should be called instead
			
 
				+of STARPU_PTHREAD_RWLOCK_INIT when it is known that the content of the
			
 
				+pthread_rwlock_t was already zeroed.
			
 
				+
			
 
				 \def STARPU_PTHREAD_RWLOCK_RDLOCK
			
 
				 \ingroup API_Threads
			
 
				 Call starpu_pthread_rwlock_rdlock() and abort on error.
			
@@ -87,6 +101,13 @@ Call starpu_pthread_rwlock_destroy() and abort on error.
 
				 \ingroup API_Threads
			
 
				 Call starpu_pthread_cond_init() and abort on error.
			
 
				 
			
 
				+\def STARPU_PTHREAD_COND_INIT0
			
 
				+\ingroup API_Threads
			
 
				+Call starpu_pthread_cond_init() only if the content of
			
 
				+PTHREAD_COND_INITIALIZER is not zero. This should be called instead
			
 
				+of STARPU_PTHREAD_COND_INIT when it is known that the content of the
			
 
				+pthread_cond_t was already zeroed.
			
 
				+
			
 
				 \def STARPU_PTHREAD_COND_DESTROY
			
 
				 \ingroup API_Threads
			
 
				 Call starpu_pthread_cond_destroy() and abort on error.
			
--- a/doc/doxygen/chapters/images/starvz_visu.eps
+++ b/doc/doxygen/chapters/images/starvz_visu.eps
--- a/doc/doxygen/chapters/images/starvz_visu.png
+++ b/doc/doxygen/chapters/images/starvz_visu.png
--- a/doc/doxygen_dev/Makefile.am
+++ b/doc/doxygen_dev/Makefile.am
@@ -1,6 +1,6 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2010-2018                                CNRS
			
 
				+# Copyright (C) 2010-2018, 2020                          CNRS
			
 
				 # Copyright (C) 2013-2018                                Inria
			
 
				 # Copyright (C) 2009,2011,2013,2014,2017                 Université de Bordeaux
			
 
				 #
			
@@ -126,7 +126,6 @@ $(DOX_TAG): $(dox_inputs)
 
				 
			
 
				 $(DOX_PDF): $(DOX_TAG) refman.tex
			
 
				 	@cp $(top_srcdir)/doc/doxygen_dev/chapters/version.sty $(DOX_LATEX_DIR)
			
 
				-	@-cp $(top_srcdir)/doc/doxygen_dev/chapters/images/*pdf $(DOX_LATEX_DIR)
			
 
				 	@echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex
			
 
				 	@cd $(DOX_LATEX_DIR) ;\
			
 
				 	rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\
			
--- a/examples/cholesky/cholesky.sh
+++ b/examples/cholesky/cholesky.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2018-2019                                Université de Bordeaux
			
 
				+# Copyright (C) 2018-2020                                Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -34,7 +34,7 @@ for size in `seq 2 2 30` ; do
 
				 	for STARPU_SCHED in $STARPU_SCHEDS
			
 
				 	do
			
 
				 		export STARPU_SCHED
			
 
				-		GFLOPS=`${ROOT}_implicit -size $((size * 960)) -nblocks $size 2> /dev/null | grep -v GFlops | cut -d '	' -f 3`
			
 
				+		GFLOPS=`$STARPU_LAUNCH ${ROOT}_implicit -size $((size * 960)) -nblocks $size 2> /dev/null | grep -v GFlops | cut -d '	' -f 3`
			
 
				 		[ -n "$GFLOPS" ] || GFLOPS='""'
			
 
				 		echo -n "	$GFLOPS"
			
 
				 	done
			
--- a/examples/heat/heat.sh
+++ b/examples/heat/heat.sh
@@ -3,7 +3,7 @@
 
				 #
			
 
				 # Copyright (C) 2017                                     CNRS
			
 
				 # Copyright (C) 2017                                     Inria
			
 
				-# Copyright (C) 2017                                     Université de Bordeaux
			
 
				+# Copyright (C) 2017, 2020                                     Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -28,23 +28,23 @@ if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
 
				 	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/heat" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/heat
			
 
				 fi
			
 
				 
			
 
				-$PREFIX/heat -shape 0
			
 
				-$PREFIX/heat -shape 1
			
 
				+$STARPU_LAUNCH $PREFIX/heat -shape 0
			
 
				+$STARPU_LAUNCH $PREFIX/heat -shape 1
			
 
				 # sometimes lead to pivot being 0
			
 
				-#$PREFIX/heat -shape 2
			
 
				+#$STARPU_LAUNCH $PREFIX/heat -shape 2
			
 
				 
			
 
				-$PREFIX/heat -cg
			
 
				+$STARPU_LAUNCH $PREFIX/heat -cg
			
 
				 
			
 
				 # TODO: FIXME
			
 
				 
			
 
				 # segfault
			
 
				-#$PREFIX/heat -v1
			
 
				+#$STARPU_LAUNCH $PREFIX/heat -v1
			
 
				 
			
 
				 # (actually the default...)
			
 
				-$PREFIX/heat -v2
			
 
				+$STARPU_LAUNCH $PREFIX/heat -v2
			
 
				 
			
 
				 # hang
			
 
				-#$PREFIX/heat -v3
			
 
				+#$STARPU_LAUNCH $PREFIX/heat -v3
			
 
				 
			
 
				 # hang
			
 
				-#$PREFIX/heat -v4
			
 
				+#$STARPU_LAUNCH $PREFIX/heat -v4
			
--- a/examples/lu/lu.sh
+++ b/examples/lu/lu.sh
@@ -2,7 +2,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2017                                     CNRS
			
 
				-# Copyright (C) 2017,2019                                Université de Bordeaux
			
 
				+# Copyright (C) 2017,2019-2020                                Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -27,11 +27,11 @@ if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
 
				 	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_implicit_example_float" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_implicit_example_float
			
 
				 fi
			
 
				 
			
 
				-$PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -piv
			
 
				-$PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -no-stride
			
 
				-$PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -bound
			
 
				-$PREFIX/lu_implicit_example_float -size $((160 * 2)) -nblocks 2 -bounddeps
			
 
				-$PREFIX/lu_implicit_example_float -size $((160 * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio
			
 
				+$STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -piv
			
 
				+$STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -no-stride
			
 
				+$STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -bound
			
 
				+$STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 2)) -nblocks 2 -bounddeps
			
 
				+$STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio
			
 
				 
			
 
				 if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
			
 
				 	STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/lu_example_float
			
@@ -39,8 +39,8 @@ if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
 
				 	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_example_float" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_example_float
			
 
				 fi
			
 
				 
			
 
				-$PREFIX/lu_example_float -size $((160 * 4)) -nblocks 4 -piv
			
 
				-$PREFIX/lu_example_float -size $((160 * 4)) -nblocks 4 -no-stride
			
 
				-$PREFIX/lu_example_float -size $((160 * 4)) -nblocks 4 -bound
			
 
				-$PREFIX/lu_example_float -size $((160 * 2)) -nblocks 2 -bounddeps
			
 
				-$PREFIX/lu_example_float -size $((160 * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio
			
 
				+$STARPU_LAUNCH $PREFIX/lu_example_float -size $((160 * 4)) -nblocks 4 -piv
			
 
				+$STARPU_LAUNCH $PREFIX/lu_example_float -size $((160 * 4)) -nblocks 4 -no-stride
			
 
				+$STARPU_LAUNCH $PREFIX/lu_example_float -size $((160 * 4)) -nblocks 4 -bound
			
 
				+$STARPU_LAUNCH $PREFIX/lu_example_float -size $((160 * 2)) -nblocks 2 -bounddeps
			
 
				+$STARPU_LAUNCH $PREFIX/lu_example_float -size $((160 * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio
			
--- a/examples/mult/xgemm.c
+++ b/examples/mult/xgemm.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2017, 2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2017,2019-2020                      Université de Bordeaux
			
 
				  * Copyright (C) 2012,2013                                Inria
			
 
				  * Copyright (C) 2017                                     Erwan Leria
			
 
				  * Copyright (C) 2010                                     Mehdi Juhoor
			
@@ -29,6 +29,7 @@
 
				 
			
 
				 #include <limits.h>
			
 
				 #include <string.h>
			
 
				+#include <unistd.h>
			
 
				 #include <math.h>
			
 
				 #include <sys/types.h>
			
 
				 #include <starpu.h>
			
@@ -58,6 +59,7 @@ static unsigned zdim = 960*4;
 
				 #endif
			
 
				 static unsigned check = 0;
			
 
				 static unsigned bound = 0;
			
 
				+static unsigned print_hostname = 0;
			
 
				 
			
 
				 static TYPE *A, *B, *C;
			
 
				 static starpu_data_handle_t A_handle, B_handle, C_handle;
			
@@ -304,6 +306,11 @@ static void parse_args(int argc, char **argv)
 
				 			bound = 1;
			
 
				 		}
			
 
				 
			
 
				+		else if (strcmp(argv[i], "-hostname") == 0)
			
 
				+		{
			
 
				+			print_hostname = 1;
			
 
				+		}
			
 
				+
			
 
				 		else if (strcmp(argv[i], "-check") == 0)
			
 
				 		{
			
 
				 			check = 1;
			
@@ -316,7 +323,7 @@ static void parse_args(int argc, char **argv)
 
				 
			
 
				 		else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0)
			
 
				 		{
			
 
				-			fprintf(stderr,"Usage: %s [-nblocks n] [-nblocksx x] [-nblocksy y] [-x x] [-y y] [-xy n] [-z z] [-size size] [-iter iter] [-bound] [-check] [-spmd]\n", argv[0]);
			
 
				+			fprintf(stderr,"Usage: %s [-nblocks n] [-nblocksx x] [-nblocksy y] [-x x] [-y y] [-xy n] [-z z] [-size size] [-iter iter] [-bound] [-check] [-spmd] [-hostname]\n", argv[0]);
			
 
				 			fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%u blocks, %u iterations\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, niter);
			
 
				 			exit(EXIT_SUCCESS);
			
 
				 		}
			
@@ -400,10 +407,19 @@ int main(int argc, char **argv)
 
				 	if (bound)
			
 
				 		starpu_bound_compute(&min, &min_int, 1);
			
 
				 
			
 
				-	PRINTF("# x\ty\tz\tms\tGFlops");
			
 
				+	PRINTF("# ");
			
 
				+	if (print_hostname)
			
 
				+		PRINTF("node\t");
			
 
				+	PRINTF("x\ty\tz\tms\tGFlops");
			
 
				 	if (bound)
			
 
				 		PRINTF("\tTms\tTGFlops\tTims\tTiGFlops");
			
 
				 	PRINTF("\n");
			
 
				+	if (print_hostname)
			
 
				+	{
			
 
				+		char hostname[255];
			
 
				+		gethostname(hostname, 255);
			
 
				+		PRINTF("%s\t", hostname);
			
 
				+	}
			
 
				 	PRINTF("%u\t%u\t%u\t%.0f\t%.1f", xdim, ydim, zdim, timing/niter/1000.0, flops/timing/1000.0);
			
 
				 	if (bound)
			
 
				 		PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0);
			
--- a/examples/scheduler/schedulers.sh
+++ b/examples/scheduler/schedulers.sh
@@ -3,7 +3,7 @@
 
				 #
			
 
				 # Copyright (C) 2012                                     Inria
			
 
				 # Copyright (C) 2012-2015,2017,2018                      CNRS
			
 
				-# Copyright (C) 2012,2017,2019                           Université de Bordeaux
			
 
				+# Copyright (C) 2012,2017,2019-2020                      Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -40,6 +40,6 @@ fi
 
				 for sched in $SCHEDULERS
			
 
				 do
			
 
				     echo "cholesky.$sched"
			
 
				-    STARPU_SCHED=$sched ./cholesky/cholesky_tag -size $((960*3)) -nblocks 3
			
 
				+    STARPU_SCHED=$sched $STARPU_LAUNCH ./cholesky/cholesky_tag -size $((960*3)) -nblocks 3
			
 
				     check_success $?
			
 
				 done
			
--- a/examples/scheduler/schedulers_context.sh
+++ b/examples/scheduler/schedulers_context.sh
@@ -2,7 +2,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2012,2014,2015,2017,2018                 CNRS
			
 
				-# Copyright (C) 2017,2019                                Université de Bordeaux
			
 
				+# Copyright (C) 2017,2019-2020                           Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -39,6 +39,6 @@ fi
 
				 for sched in $SCHEDULERS
			
 
				 do
			
 
				     echo "sched_ctx.$sched"
			
 
				-    STARPU_SCHED=$sched ./sched_ctx/sched_ctx
			
 
				+    STARPU_SCHED=$sched $STARPU_LAUNCH ./sched_ctx/sched_ctx
			
 
				     check_success $?
			
 
				 done
			
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011,2012,2014,2016,2017                 Inria
			
 
				- * Copyright (C) 2009-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2017,2019                           CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -44,6 +44,7 @@
 
				 #undef STARPU_SIMGRID_MC
			
 
				 #undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT
			
 
				 #undef STARPU_HAVE_SIMGRID_MSG_H
			
 
				+#undef STARPU_HAVE_MSG_MSG_H
			
 
				 #undef STARPU_HAVE_SIMGRID_ACTOR_H
			
 
				 #undef STARPU_HAVE_SIMGRID_SEMAPHORE_H
			
 
				 #undef STARPU_HAVE_SIMGRID_MUTEX_H
			
@@ -166,6 +167,9 @@ typedef ssize_t starpu_ssize_t;
 
				 #undef STARPU_HAVE_PTHREAD_BARRIER
			
 
				 #undef STARPU_HAVE_PTHREAD_SETNAME_NP
			
 
				 #undef STARPU_HAVE_STRUCT_TIMESPEC
			
 
				+#undef STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO
			
 
				+#undef STARPU_PTHREAD_COND_INITIALIZER_ZERO
			
 
				+#undef STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO
			
 
				 
			
 
				 /* This is only for building examples */
			
 
				 #undef STARPU_HAVE_HELGRIND_H
			
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -1,8 +1,8 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2011-2017,2019                           Inria
			
 
				+ * Copyright (C) 2011-2017,2020                           Inria
			
 
				  * Copyright (C) 2009-2019                                Université de Bordeaux
			
 
				- * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
			
 
				+ * Copyright (C) 2010-2015,2017,2018,2019,2020            CNRS
			
 
				  * Copyright (C) 2011                                     Télécom-SudParis
			
 
				  * Copyright (C) 2016                                     Uppsala University
			
 
				  *
			
@@ -1667,6 +1667,12 @@ void starpu_task_ft_failed(struct starpu_task *task);
 
				  */
			
 
				 void starpu_task_ft_success(struct starpu_task *meta_task);
			
 
				 
			
 
				+/**
			
 
				+   Set the function to call when the watchdog detects that StarPU has
			
 
				+   not finished any task for STARPU_WATCHDOG_TIMEOUT seconds
			
 
				+*/
			
 
				+void starpu_task_watchdog_set_hook(void (*hook)(void *), void *hook_arg);
			
 
				+
			
 
				 /** @} */
			
 
				 
			
 
				 #ifdef __cplusplus
			
--- a/include/starpu_thread.h
+++ b/include/starpu_thread.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2013,2015-2017                           Inria
			
 
				  * Copyright (C) 2010-2015,2017,2019                           CNRS
			
 
				- * Copyright (C) 2010,2012-2019                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010,2012-2020                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -25,7 +25,10 @@
 
				 #include <starpu_util.h>
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 #include <pthread.h>
			
 
				-#ifdef STARPU_HAVE_XBT_SYNCHRO_H
			
 
				+#ifdef STARPU_HAVE_SIMGRID_MUTEX_H
			
 
				+#include <simgrid/mutex.h>
			
 
				+#include <simgrid/cond.h>
			
 
				+#elif defined(STARPU_HAVE_XBT_SYNCHRO_H)
			
 
				 #include <xbt/synchro.h>
			
 
				 #else
			
 
				 #include <xbt/synchro_core.h>
			
@@ -45,9 +48,12 @@
 
				 #ifdef STARPU_HAVE_SIMGRID_BARRIER_H
			
 
				 #include <simgrid/barrier.h>
			
 
				 #endif
			
 
				+#ifdef STARPU_HAVE_SIMGRID_HOST_H
			
 
				+#include <simgrid/host.h>
			
 
				+#endif
			
 
				 #ifdef STARPU_HAVE_SIMGRID_MSG_H
			
 
				 #include <simgrid/msg.h>
			
 
				-#else
			
 
				+#elif defined(STARPU_HAVE_MSG_MSG_H)
			
 
				 #include <msg/msg.h>
			
 
				 #endif
			
 
				 #elif !defined(_MSC_VER) || defined(BUILDING_STARPU)
			
@@ -81,7 +87,7 @@ typedef msg_host_t starpu_sg_host_t;
 
				 #endif
			
 
				 int starpu_pthread_equal(starpu_pthread_t t1, starpu_pthread_t t2);
			
 
				 starpu_pthread_t starpu_pthread_self(void);
			
 
				-int starpu_pthread_create_on(char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg, starpu_sg_host_t host);
			
 
				+int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg, starpu_sg_host_t host);
			
 
				 int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
			
 
				 starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t code, starpu_sg_host_t host, int argc, char *argv[]);
			
 
				 int starpu_pthread_join(starpu_pthread_t thread, void **retval);
			
--- a/include/starpu_thread_util.h
+++ b/include/starpu_thread_util.h
@@ -1,8 +1,8 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012,2013                                Inria
			
 
				- * Copyright (C) 2010-2013,2015,2017,2019                      CNRS
			
 
				- * Copyright (C) 2010-2014,2016,2017                      Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
			
 
				+ * Copyright (C) 2010-2014,2016,2017,2020                 Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -64,7 +64,7 @@
 
				  * Encapsulation of the starpu_pthread_mutex_* functions.
			
 
				  */
			
 
				 
			
 
				-#define STARPU_PTHREAD_MUTEX_INIT(mutex, attr) do {                           \
			
 
				+#define _STARPU_PTHREAD_MUTEX_INIT(mutex, attr) do {                           \
			
 
				 	int p_ret = starpu_pthread_mutex_init((mutex), (attr));                \
			
 
				 	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				 		fprintf(stderr,                                                \
			
@@ -74,6 +74,22 @@
 
				 	}                                                                      \
			
 
				 } while (0)
			
 
				 
			
 
				+#ifdef STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO
			
 
				+#define STARPU_PTHREAD_MUTEX_INIT(mutex, attr) do {                            \
			
 
				+	if (!attr)                                                             \
			
 
				+		memset(mutex, 0, sizeof(*mutex));                              \
			
 
				+	else                                                                   \
			
 
				+		_STARPU_PTHREAD_MUTEX_INIT(mutex, attr);                       \
			
 
				+} while (0)
			
 
				+#define STARPU_PTHREAD_MUTEX_INIT0(mutex, attr) do {                           \
			
 
				+	if (attr)                                                              \
			
 
				+		_STARPU_PTHREAD_MUTEX_INIT(mutex, attr);                       \
			
 
				+} while (0)
			
 
				+#else
			
 
				+#define STARPU_PTHREAD_MUTEX_INIT(mutex, attr) _STARPU_PTHREAD_MUTEX_INIT(mutex, attr)
			
 
				+#define STARPU_PTHREAD_MUTEX_INIT0(mutex, attr) _STARPU_PTHREAD_MUTEX_INIT(mutex, attr)
			
 
				+#endif
			
 
				+
			
 
				 #define STARPU_PTHREAD_MUTEX_DESTROY(mutex) do {                              \
			
 
				 	int p_ret = starpu_pthread_mutex_destroy(mutex);                       \
			
 
				 	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
@@ -199,7 +215,7 @@ int _starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex, char *fil
 
				 /*
			
 
				  * Encapsulation of the starpu_pthread_rwlock_* functions.
			
 
				  */
			
 
				-#define STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) do {                          \
			
 
				+#define _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) do {                         \
			
 
				 	int p_ret = starpu_pthread_rwlock_init((rwlock), (attr));              \
			
 
				 	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				 		fprintf(stderr,                                                \
			
@@ -209,6 +225,22 @@ int _starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex, char *fil
 
				 	}                                                                      \
			
 
				 } while (0)
			
 
				 
			
 
				+#ifdef STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO
			
 
				+#define STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) do {                            \
			
 
				+	if (!attr)                                                             \
			
 
				+		memset(rwlock, 0, sizeof(*rwlock));                              \
			
 
				+	else                                                                   \
			
 
				+		_STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr);                       \
			
 
				+} while (0)
			
 
				+#define STARPU_PTHREAD_RWLOCK_INIT0(rwlock, attr) do {                           \
			
 
				+	if (attr)                                                              \
			
 
				+		_STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr);                       \
			
 
				+} while (0)
			
 
				+#else
			
 
				+#define STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr)
			
 
				+#define STARPU_PTHREAD_RWLOCK_INIT0(rwlock, attr) _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr)
			
 
				+#endif
			
 
				+
			
 
				 #define STARPU_PTHREAD_RWLOCK_RDLOCK(rwlock) do {                              \
			
 
				 	int p_ret = starpu_pthread_rwlock_rdlock(rwlock);                      \
			
 
				 	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
@@ -282,7 +314,7 @@ int _starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock, char *file
 
				 /*
			
 
				  * Encapsulation of the starpu_pthread_cond_* functions.
			
 
				  */
			
 
				-#define STARPU_PTHREAD_COND_INIT(cond, attr) do {                             \
			
 
				+#define _STARPU_PTHREAD_COND_INIT(cond, attr) do {                             \
			
 
				 	int p_ret = starpu_pthread_cond_init((cond), (attr));                  \
			
 
				 	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				 		fprintf(stderr,                                                \
			
@@ -292,6 +324,22 @@ int _starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock, char *file
 
				 	}                                                                      \
			
 
				 } while (0)
			
 
				 
			
 
				+#ifdef STARPU_PTHREAD_COND_INITIALIZER_ZERO
			
 
				+#define STARPU_PTHREAD_COND_INIT(cond, attr) do {                            \
			
 
				+	if (!attr)                                                             \
			
 
				+		memset(cond, 0, sizeof(*cond));                              \
			
 
				+	else                                                                   \
			
 
				+		_STARPU_PTHREAD_COND_INIT(cond, attr);                       \
			
 
				+} while (0)
			
 
				+#define STARPU_PTHREAD_COND_INIT0(cond, attr) do {                           \
			
 
				+	if (attr)                                                              \
			
 
				+		_STARPU_PTHREAD_COND_INIT(cond, attr);                       \
			
 
				+} while (0)
			
 
				+#else
			
 
				+#define STARPU_PTHREAD_COND_INIT(cond, attr) _STARPU_PTHREAD_COND_INIT(cond, attr)
			
 
				+#define STARPU_PTHREAD_COND_INIT0(cond, attr) _STARPU_PTHREAD_COND_INIT(cond, attr)
			
 
				+#endif
			
 
				+
			
 
				 #define STARPU_PTHREAD_COND_DESTROY(cond) do {                                \
			
 
				 	int p_ret = starpu_pthread_cond_destroy(cond);                         \
			
 
				 	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
--- a/m4/libs.m4
+++ b/m4/libs.m4
@@ -2,7 +2,7 @@
 
				 #
			
 
				 # Copyright (C) 2011                                     Inria
			
 
				 # Copyright (C) 2012,2017                                CNRS
			
 
				-# Copyright (C) 2011,2014                                Université de Bordeaux
			
 
				+# Copyright (C) 2011,2014,2020                           Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -53,3 +53,23 @@ AC_DEFUN([STARPU_CHECK_LIB], [dnl
 
				 AC_DEFUN([STARPU_HAVE_LIBRARY], [dnl
			
 
				 STARPU_CHECK_LIB([$1], [$2], main, [$3], [$4], [$5])
			
 
				 ])dnl
			
 
				+
			
 
				+# STARPU_INIT_ZERO(INCLUDES, TYPE, INIT_MACRO)
			
 
				+# Checks whether when TYPE is initialized with INIT_MACRO, the content is just
			
 
				+# plain zeroes
			
 
				+AC_DEFUN([STARPU_INIT_ZERO], [dnl
			
 
				+AC_MSG_CHECKING(whether $3 just zeroes)
			
 
				+AC_RUN_IFELSE([AC_LANG_PROGRAM(
			
 
				+		$1,
			
 
				+		[[$2 var = $3;
			
 
				+		 char *p;
			
 
				+		 for (p = (char*) &var; p < (char*) (&var+1); p++)
			
 
				+		   if (*p != 0)
			
 
				+		     return 1;
			
 
				+		 return 0;
			
 
				+		]],
			
 
				+		)],
			
 
				+		[AC_DEFINE([STARPU_$3_ZERO], [1], [Define to 1 if `$3' is just zeroes])
			
 
				+		 AC_MSG_RESULT(yes)],
			
 
				+		[AC_MSG_RESULT(no)])
			
 
				+])dnl
			
--- a/mpi/examples/Makefile.am
+++ b/mpi/examples/Makefile.am
@@ -2,7 +2,7 @@
 
				 #
			
 
				 # Copyright (C) 2012,2014,2016                           Inria
			
 
				 # Copyright (C) 2010-2017,2019                           CNRS
			
 
				-# Copyright (C) 2009-2017,2019                           Université de Bordeaux
			
 
				+# Copyright (C) 2009-2017,2019-2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2013                                     Thibaut Lambert
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -248,13 +248,11 @@ matrix_decomposition_mpi_cholesky_distributed_SOURCES =	\
 
				 matrix_decomposition_mpi_cholesky_distributed_LDADD =	\
			
 
				 	$(STARPU_BLAS_LDFLAGS) -lm
			
 
				 
			
 
				-if !STARPU_SIMGRID
			
 
				 starpu_mpi_EXAMPLES +=				\
			
 
				 	matrix_decomposition/mpi_cholesky			\
			
 
				 	matrix_decomposition/mpi_cholesky_distributed
			
 
				 endif
			
 
				 endif
			
 
				-endif
			
 
				 
			
 
				 ########################
			
 
				 # MPI Matrix mult example #
			
@@ -336,11 +334,9 @@ complex_mpi_complex_SOURCES =		\
 
				 	complex/mpi_complex.c		\
			
 
				 	../../examples/interface/complex_interface.c
			
 
				 
			
 
				-if !STARPU_SIMGRID
			
 
				 starpu_mpi_EXAMPLES	+=			\
			
 
				 	complex/mpi_complex
			
 
				 endif
			
 
				-endif
			
 
				 
			
 
				 #########################
			
 
				 # user_datatype example #
			
--- a/mpi/examples/perf.sh
+++ b/mpi/examples/perf.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2010,2011,2014                           Université de Bordeaux
			
 
				+# Copyright (C) 2010,2011,2014,2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -40,7 +40,7 @@ ncalibrate=0
 
				 for i in `seq 1 $ncalibrate`
			
 
				 do
			
 
				 echo "STARPU_CALIBRATE $i/$ncalibrate"
			
 
				-STARPU_CALIBRATE=1 STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $nnodes ./mpi_lu/plu_example_float -p 2 -q 2 -nblocks 32 -size $((32*$BLOCKSIZE)) -numa
			
 
				+STARPU_CALIBRATE=1 STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $nnodes $STARPU_LAUNCH ./mpi_lu/plu_example_float -p 2 -q 2 -nblocks 32 -size $((32*$BLOCKSIZE)) -numa
			
 
				 done
			
 
				 
			
 
				 func()
			
@@ -57,7 +57,7 @@ echo "*******************************************">> log
 
				 cat log
			
 
				 cat log >> log.all
			
 
				 
			
 
				-STARPU_NCPUS=0 STARPU_NCUDA=$ngpus STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err
			
 
				+STARPU_NCPUS=0 STARPU_NCUDA=$ngpus STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np $STARPU_LAUNCH ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err
			
 
				 cat log.out > log
			
 
				 cat log.err >> log
			
 
				 cat log
			
--- a/mpi/examples/user_datatype/my_interface.h
+++ b/mpi/examples/user_datatype/my_interface.h
@@ -59,6 +59,7 @@ static struct starpu_codelet starpu_my_data_display_codelet =
 
				 	.cpu_funcs_name = {"starpu_my_data_display_codelet_cpu"},
			
 
				 	.nbuffers = 1,
			
 
				 	.modes = {STARPU_R},
			
 
				+	.model = &starpu_perfmodel_nop,
			
 
				 	.name = "starpu_my_data_display_codelet"
			
 
				 };
			
 
				 
			
@@ -68,6 +69,7 @@ static struct starpu_codelet starpu_my_data_compare_codelet =
 
				 	.cpu_funcs_name = {"starpu_my_data_compare_codelet_cpu"},
			
 
				 	.nbuffers = 2,
			
 
				 	.modes = {STARPU_R, STARPU_R},
			
 
				+	.model = &starpu_perfmodel_nop,
			
 
				 	.name = "starpu_my_data_compare_codelet"
			
 
				 };
			
 
				 
			
--- a/mpi/src/mpi/starpu_mpi_mpi.c
+++ b/mpi/src/mpi/starpu_mpi_mpi.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2019                                CNRS
			
 
				- * Copyright (C) 2009-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2012,2013,2016,2017                      Inria
			
 
				  * Copyright (C) 2017                                     Guillaume Beauchamp
			
 
				  *
			
@@ -19,6 +19,10 @@
 
				 
			
 
				 #include <stdlib.h>
			
 
				 #include <limits.h>
			
 
				+#include <common/config.h>
			
 
				+#ifdef HAVE_UNISTD_H
			
 
				+#include <unistd.h>
			
 
				+#endif
			
 
				 #include <starpu_mpi.h>
			
 
				 #include <starpu_mpi_datatype.h>
			
 
				 #include <starpu_mpi_private.h>
			
@@ -33,7 +37,6 @@
 
				 #include <mpi/starpu_mpi_tag.h>
			
 
				 #include <mpi/starpu_mpi_comm.h>
			
 
				 #include <starpu_mpi_init.h>
			
 
				-#include <common/config.h>
			
 
				 #include <common/thread.h>
			
 
				 #include <datawizard/interfaces/data_interface.h>
			
 
				 #include <datawizard/coherency.h>
			
@@ -325,7 +328,7 @@ static void _starpu_mpi_simgrid_wait_req_func(void* arg)
 
				 	STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(ret));
			
 
				 
			
 
				 	*(sim_req->done) = 1;
			
 
				-	starpu_pthread_queue_signal(sim_req->queue);
			
 
				+	starpu_pthread_queue_broadcast(sim_req->queue);
			
 
				 
			
 
				 	free(sim_req);
			
 
				 
			
@@ -501,10 +504,10 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 
				 	{
			
 
				 		_STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.node.comm);
			
 
				 		req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.node.comm, &req->backend->data_request);
			
 
				+	}
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-		_starpu_mpi_simgrid_wait_req(&req->backend->data_request, &req->status_store, &req->queue, &req->done);
			
 
				+	_starpu_mpi_simgrid_wait_req(&req->backend->data_request, &req->status_store, &req->queue, &req->done);
			
 
				 #endif
			
 
				-	}
			
 
				 	STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_IRecv returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
 
				 
			
 
				 	_STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag);
			
@@ -526,6 +529,7 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 
				 /*                                                      */
			
 
				 /********************************************************/
			
 
				 
			
 
				+#ifndef STARPU_SIMGRID
			
 
				 void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
			
 
				 {
			
 
				 	_STARPU_MPI_LOG_IN();
			
@@ -535,10 +539,6 @@ void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
 
				 	_STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
			
 
				 	if (req->backend->data_request != MPI_REQUEST_NULL)
			
 
				 	{
			
 
				-		// TODO: Fix for STARPU_SIMGRID
			
 
				-#ifdef STARPU_SIMGRID
			
 
				-		STARPU_MPI_ASSERT_MSG(0, "Implement this in STARPU_SIMGRID");
			
 
				-#endif
			
 
				 		req->ret = MPI_Wait(&req->backend->data_request, waiting_req->status);
			
 
				 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
 
				 	}
			
@@ -548,15 +548,36 @@ void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
 
				 
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				 int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
			
 
				 {
			
 
				 	int ret;
			
 
				 	struct _starpu_mpi_req *req = *public_req;
			
 
				-	struct _starpu_mpi_req *waiting_req;
			
 
				 
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				 
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	_STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
			
 
				+	starpu_pthread_wait_t wait;
			
 
				+	starpu_pthread_wait_init(&wait);
			
 
				+	starpu_pthread_queue_register(&wait, &req->queue);
			
 
				+	while (1)
			
 
				+	{
			
 
				+		starpu_pthread_wait_reset(&wait);
			
 
				+		if (req->done)
			
 
				+			break;
			
 
				+		starpu_pthread_wait_wait(&wait);
			
 
				+	}
			
 
				+	starpu_pthread_queue_unregister(&wait, &req->queue);
			
 
				+	starpu_pthread_wait_destroy(&wait);
			
 
				+	_STARPU_MPI_TRACE_UWAIT_END(req->node_tag.node.rank, req->node_tag.data_tag);
			
 
				+
			
 
				+	if (status)
			
 
				+		*status = req->status_store;
			
 
				+	_starpu_mpi_handle_request_termination(req);
			
 
				+#else
			
 
				+	struct _starpu_mpi_req *waiting_req;
			
 
				 	/* We cannot try to complete a MPI request that was not actually posted
			
 
				 	 * to MPI yet. */
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&(req->backend->req_mutex));
			
@@ -580,16 +601,17 @@ int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
 
				 		STARPU_PTHREAD_COND_WAIT(&req->backend->req_cond, &req->backend->req_mutex);
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
			
 
				 
			
 
				-	ret = req->ret;
			
 
				-
			
 
				 	/* The internal request structure was automatically allocated */
			
 
				+	_starpu_mpi_request_destroy(waiting_req);
			
 
				+#endif
			
 
				+
			
 
				 	*public_req = NULL;
			
 
				 	if (req->backend->internal_req)
			
 
				 	{
			
 
				 		_starpu_mpi_request_destroy(req->backend->internal_req);
			
 
				 	}
			
 
				+	ret = req->ret;
			
 
				 	_starpu_mpi_request_destroy(req);
			
 
				-	_starpu_mpi_request_destroy(waiting_req);
			
 
				 
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 	return ret;
			
@@ -601,6 +623,7 @@ int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
 
				 /*                                                      */
			
 
				 /********************************************************/
			
 
				 
			
 
				+#ifndef STARPU_SIMGRID
			
 
				 void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
			
 
				 {
			
 
				 	_STARPU_MPI_LOG_IN();
			
@@ -613,12 +636,7 @@ void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 
				 
			
 
				 	_STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
			
 
				 
			
 
				-#ifdef STARPU_SIMGRID
			
 
				-	req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, testing_req->flag);
			
 
				-	memcpy(testing_req->status, &req->status_store, sizeof(*testing_req->status));
			
 
				-#else
			
 
				 	req->ret = MPI_Test(&req->backend->data_request, testing_req->flag, testing_req->status);
			
 
				-#endif
			
 
				 
			
 
				 	STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
 
				 
			
@@ -636,6 +654,7 @@ void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&testing_req->backend->req_mutex);
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				 int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
			
 
				 {
			
@@ -648,6 +667,15 @@ int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 
				 
			
 
				 	STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Test cannot be called on a detached request");
			
 
				 
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	ret = req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, flag);
			
 
				+	if (*flag)
			
 
				+	{
			
 
				+		if (status)
			
 
				+			*status = req->status_store;
			
 
				+		_starpu_mpi_handle_request_termination(req);
			
 
				+	}
			
 
				+#else
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex);
			
 
				 	unsigned submitted = req->submitted;
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
			
@@ -676,25 +704,26 @@ int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 
				 
			
 
				 		ret = testing_req->ret;
			
 
				 
			
 
				-		if (*(testing_req->flag))
			
 
				-		{
			
 
				-			/* The request was completed so we free the internal
			
 
				-			 * request structure which was automatically allocated
			
 
				-			 * */
			
 
				-			*public_req = NULL;
			
 
				-			if (req->backend->internal_req)
			
 
				-			{
			
 
				-				_starpu_mpi_request_destroy(req->backend->internal_req);
			
 
				-			}
			
 
				-			_starpu_mpi_request_destroy(req);
			
 
				-		}
			
 
				-
			
 
				 		_starpu_mpi_request_destroy(testing_req);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				 		*flag = 0;
			
 
				 	}
			
 
				+#endif
			
 
				+
			
 
				+	if (*flag)
			
 
				+	{
			
 
				+		/* The request was completed so we free the internal
			
 
				+		 * request structure which was automatically allocated
			
 
				+		 * */
			
 
				+		*public_req = NULL;
			
 
				+		if (req->backend->internal_req)
			
 
				+		{
			
 
				+			_starpu_mpi_request_destroy(req->backend->internal_req);
			
 
				+		}
			
 
				+		_starpu_mpi_request_destroy(req);
			
 
				+	}
			
 
				 
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 	return ret;
			
@@ -930,6 +959,9 @@ static void _starpu_mpi_early_data_cb(void* arg)
 
				 			args->req->submitted = 1;
			
 
				 			STARPU_PTHREAD_COND_BROADCAST(&args->req->backend->req_cond);
			
 
				 			STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->backend->req_mutex);
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+			args->req->done = 1;
			
 
				+#endif
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1133,7 +1165,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 
			
 
				 	if (starpu_bind_thread_on(_starpu_mpi_thread_cpuid, STARPU_THREAD_ACTIVE, "MPI") < 0)
			
 
				 	{
			
 
				-		_STARPU_DISP("No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n");
			
 
				+		char hostname[65];
			
 
				+		gethostname(hostname, sizeof(hostname));
			
 
				+		_STARPU_DISP("[%s] No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n", hostname);
			
 
				 	}
			
 
				 	_starpu_mpi_do_initialize(argc_argv);
			
 
				 	if (_starpu_mpi_thread_cpuid >= 0)
			
@@ -1150,13 +1184,15 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 	int i;
			
 
				 	for (i = 0; i < *(argc_argv->argc); i++)
			
 
				 		argv_cpy[i] = strdup((*(argc_argv->argv))[i]);
			
 
				+	void **tsd;
			
 
				+	_STARPU_CALLOC(tsd, MAX_TSD + 1, sizeof(void*));
			
 
				 #ifdef HAVE_SG_ACTOR_DATA
			
 
				 	_starpu_simgrid_actor_create("main", smpi_simulated_main_, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy);
			
 
				+	/* And set TSD for us */
			
 
				+	sg_actor_data_set(sg_actor_self(), tsd);
			
 
				 #else
			
 
				 	MSG_process_create_with_arguments("main", smpi_simulated_main_, NULL, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy);
			
 
				 	/* And set TSD for us */
			
 
				-	void **tsd;
			
 
				-	_STARPU_CALLOC(tsd, MAX_TSD + 1, sizeof(void*));
			
 
				 	if (!smpi_process_set_user_data)
			
 
				 	{
			
 
				 		_STARPU_ERROR("Your version of simgrid does not provide smpi_process_set_user_data, we can not continue without it\n");
			
--- a/mpi/src/mpi/starpu_mpi_mpi_backend.c
+++ b/mpi/src/mpi/starpu_mpi_mpi_backend.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2017                                     Inria
			
 
				  * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
			
 
				- * Copyright (C) 2009-2014,2017,2018-2019                 Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2014,2017,2018-2020                 Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -51,21 +51,21 @@ void _starpu_mpi_mpi_backend_request_init(struct _starpu_mpi_req *req)
 
				 {
			
 
				 	_STARPU_MPI_CALLOC(req->backend, 1, sizeof(struct _starpu_mpi_req_backend));
			
 
				 
			
 
				-	req->backend->data_request = 0;
			
 
				+	//req->backend->data_request = 0;
			
 
				 
			
 
				 	STARPU_PTHREAD_MUTEX_INIT(&req->backend->req_mutex, NULL);
			
 
				 	STARPU_PTHREAD_COND_INIT(&req->backend->req_cond, NULL);
			
 
				 	STARPU_PTHREAD_MUTEX_INIT(&req->backend->posted_mutex, NULL);
			
 
				 	STARPU_PTHREAD_COND_INIT(&req->backend->posted_cond, NULL);
			
 
				 
			
 
				-	req->backend->other_request = NULL;
			
 
				+	//req->backend->other_request = NULL;
			
 
				 
			
 
				-	req->backend->size_req = 0;
			
 
				-	req->backend->internal_req = NULL;
			
 
				-	req->backend->is_internal_req = 0;
			
 
				+	//req->backend->size_req = 0;
			
 
				+	//req->backend->internal_req = NULL;
			
 
				+	//req->backend->is_internal_req = 0;
			
 
				 	req->backend->to_destroy = 1;
			
 
				-	req->backend->early_data_handle = NULL;
			
 
				-	req->backend->envelope = NULL;
			
 
				+	//req->backend->early_data_handle = NULL;
			
 
				+	//req->backend->envelope = NULL;
			
 
				 }
			
 
				 
			
 
				 void _starpu_mpi_mpi_backend_request_fill(struct _starpu_mpi_req *req, MPI_Comm comm, int is_internal_req)
			
--- a/mpi/src/nmad/starpu_mpi_nmad.c
+++ b/mpi/src/nmad/starpu_mpi_nmad.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2017                                     Inria
			
 
				  * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
			
 
				- * Copyright (C) 2009-2014,2017,2018-2019                 Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2014,2017,2018-2020                 Université de Bordeaux
			
 
				  * Copyright (C) 2017                                     Guillaume Beauchamp
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -19,6 +19,10 @@
 
				 
			
 
				 #include <stdlib.h>
			
 
				 #include <limits.h>
			
 
				+#include <common/config.h>
			
 
				+#ifdef HAVE_UNISTD_H
			
 
				+#include <unistd.h>
			
 
				+#endif
			
 
				 #include <starpu_mpi.h>
			
 
				 #include <starpu_mpi_datatype.h>
			
 
				 #include <starpu_mpi_private.h>
			
@@ -28,7 +32,6 @@
 
				 #include <starpu_mpi_cache.h>
			
 
				 #include <starpu_mpi_select_node.h>
			
 
				 #include <starpu_mpi_init.h>
			
 
				-#include <common/config.h>
			
 
				 #include <common/thread.h>
			
 
				 #include <datawizard/coherency.h>
			
 
				 #include <core/task.h>
			
@@ -414,7 +417,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 
			
 
				 	if (starpu_bind_thread_on(_starpu_mpi_thread_cpuid, 0, "MPI") < 0)
			
 
				 	{
			
 
				-		_STARPU_DISP("No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n");
			
 
				+		char hostname[65];
			
 
				+		gethostname(hostname, sizeof(hostname));
			
 
				+		_STARPU_DISP("[%s] No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n", hostname);
			
 
				 	}
			
 
				 	_starpu_mpi_do_initialize(argc_argv);
			
 
				 	if (_starpu_mpi_thread_cpuid >= 0)
			
--- a/mpi/src/starpu_mpi.c
+++ b/mpi/src/starpu_mpi.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2012,2013,2016,2017                      Inria
			
 
				  * Copyright (C) 2010-2019                                CNRS
			
 
				- * Copyright (C) 2009-2018                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2018,2020                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -46,7 +46,7 @@ static void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum sta
 
				 
			
 
				 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				 {
			
 
				-	if (_starpu_mpi_fake_world_size != -1)
			
 
				+	if (STARPU_UNLIKELY(_starpu_mpi_fake_world_size != -1))
			
 
				 	{
			
 
				 		/* Don't actually do the communication */
			
 
				 		return NULL;
			
@@ -114,9 +114,9 @@ int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_
 
				 	MPI_Status status;
			
 
				 
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				-	memset(&status, 0, sizeof(MPI_Status));
			
 
				-
			
 
				 	starpu_mpi_isend_prio(data_handle, &req, dest, data_tag, prio, comm);
			
 
				+
			
 
				+	memset(&status, 0, sizeof(MPI_Status));
			
 
				 	starpu_mpi_wait(&req, &status);
			
 
				 
			
 
				 	_STARPU_MPI_LOG_OUT();
			
--- a/mpi/src/starpu_mpi_req.c
+++ b/mpi/src/starpu_mpi_req.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2019                                CNRS
			
 
				- * Copyright (C) 2009-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2012,2013,2016,2017                      Inria
			
 
				  * Copyright (C) 2017                                     Guillaume Beauchamp
			
 
				  *
			
@@ -25,37 +25,37 @@ void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
 
				 	_STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req));
			
 
				 
			
 
				 	/* Initialize the request structure */
			
 
				-	(*req)->data_handle = NULL;
			
 
				-	(*req)->prio = 0;
			
 
				+	//(*req)->data_handle = NULL;
			
 
				+	//(*req)->prio = 0;
			
 
				 
			
 
				-	(*req)->datatype = 0;
			
 
				-	(*req)->datatype_name = NULL;
			
 
				-	(*req)->ptr = NULL;
			
 
				+	//(*req)->datatype = 0;
			
 
				+	//(*req)->datatype_name = NULL;
			
 
				+	//(*req)->ptr = NULL;
			
 
				 	(*req)->count = -1;
			
 
				 	(*req)->registered_datatype = -1;
			
 
				 
			
 
				 	(*req)->node_tag.node.rank = -1;
			
 
				 	(*req)->node_tag.data_tag = -1;
			
 
				-	(*req)->node_tag.node.comm = 0;
			
 
				+	//(*req)->node_tag.node.comm = 0;
			
 
				 
			
 
				-	(*req)->func = NULL;
			
 
				+	//(*req)->func = NULL;
			
 
				 
			
 
				-	(*req)->status = NULL;
			
 
				-	(*req)->flag = NULL;
			
 
				+	//(*req)->status = NULL;
			
 
				+	//(*req)->flag = NULL;
			
 
				 	_starpu_mpi_req_multilist_init_coop_sends(*req);
			
 
				 
			
 
				 	(*req)->ret = -1;
			
 
				 
			
 
				 	(*req)->request_type = UNKNOWN_REQ;
			
 
				 
			
 
				-	(*req)->submitted = 0;
			
 
				-	(*req)->completed = 0;
			
 
				-	(*req)->posted = 0;
			
 
				+	//(*req)->submitted = 0;
			
 
				+	//(*req)->completed = 0;
			
 
				+	//(*req)->posted = 0;
			
 
				 
			
 
				-	(*req)->sync = 0;
			
 
				+	//(*req)->sync = 0;
			
 
				 	(*req)->detached = -1;
			
 
				-	(*req)->callback = NULL;
			
 
				-	(*req)->callback_arg = NULL;
			
 
				+	//(*req)->callback = NULL;
			
 
				+	//(*req)->callback_arg = NULL;
			
 
				 
			
 
				 	(*req)->sequential_consistency = 1;
			
 
				 	(*req)->pre_sync_jobid = -1;
			
@@ -64,7 +64,7 @@ void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
 
				 #ifdef STARPU_SIMGRID
			
 
				 	starpu_pthread_queue_init(&((*req)->queue));
			
 
				 	starpu_pthread_queue_register(&_starpu_mpi_thread_wait, &((*req)->queue));
			
 
				-	(*req)->done = 0;
			
 
				+	//(*req)->done = 0;
			
 
				 #endif
			
 
				 	_mpi_backend._starpu_mpi_backend_request_init(*req);
			
 
				 }
			
--- a/mpi/tests/Makefile.am
+++ b/mpi/tests/Makefile.am
@@ -1,7 +1,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2010-2019                                CNRS
			
 
				-# Copyright (C) 2009-2018                                Université de Bordeaux
			
 
				+# Copyright (C) 2009-2018, 2020                                Université de Bordeaux
			
 
				 # Copyright (C) 2013                                     Thibaut Lambert
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -95,12 +95,17 @@ if BUILD_TESTS
 
				 starpu_mpi_TESTS =
			
 
				 
			
 
				 starpu_mpi_TESTS +=				\
			
 
				+	broadcast				\
			
 
				 	cache					\
			
 
				 	cache_disable				\
			
 
				 	callback				\
			
 
				+	driver					\
			
 
				 	early_request				\
			
 
				+	gather					\
			
 
				+	gather2					\
			
 
				 	insert_task				\
			
 
				 	insert_task_block			\
			
 
				+	insert_task_count			\
			
 
				 	insert_task_dyn_handles			\
			
 
				 	insert_task_node_choice			\
			
 
				 	insert_task_owner			\
			
@@ -108,52 +113,47 @@ starpu_mpi_TESTS +=				\
 
				 	insert_task_owner_data			\
			
 
				 	matrix					\
			
 
				 	matrix2					\
			
 
				+	mpi_barrier				\
			
 
				 	mpi_detached_tag			\
			
 
				+	mpi_earlyrecv				\
			
 
				+	mpi_irecv				\
			
 
				 	mpi_irecv_detached			\
			
 
				+	mpi_isend				\
			
 
				 	mpi_isend_detached			\
			
 
				 	mpi_reduction				\
			
 
				+	mpi_redux				\
			
 
				 	mpi_scatter_gather			\
			
 
				+	mpi_test				\
			
 
				+	multiple_send				\
			
 
				+	pingpong				\
			
 
				 	policy_register				\
			
 
				 	policy_register_many			\
			
 
				 	policy_selection			\
			
 
				 	policy_selection2			\
			
 
				+	ring					\
			
 
				+	ring_async				\
			
 
				 	ring_async_implicit			\
			
 
				+	ring_sync				\
			
 
				+	ring_sync_detached			\
			
 
				 	temporary				\
			
 
				-	early_stuff
			
 
				+	user_defined_datatype			\
			
 
				+	early_stuff				\
			
 
				+	sendrecv_bench
			
 
				 
			
 
				 if !STARPU_SIMGRID
			
 
				+# missing support in simgrid
			
 
				 starpu_mpi_TESTS +=				\
			
 
				 	attr					\
			
 
				-	broadcast				\
			
 
				-	pingpong				\
			
 
				-	mpi_test				\
			
 
				-	mpi_isend				\
			
 
				-	mpi_earlyrecv				\
			
 
				 	mpi_earlyrecv2				\
			
 
				 	mpi_earlyrecv2_sync			\
			
 
				-	mpi_irecv				\
			
 
				-	mpi_barrier				\
			
 
				-	mpi_redux				\
			
 
				-	ring					\
			
 
				-	ring_sync				\
			
 
				-	ring_sync_detached			\
			
 
				-	ring_async				\
			
 
				 	block_interface				\
			
 
				 	block_interface_pinned			\
			
 
				-	matrix2					\
			
 
				 	insert_task_compute			\
			
 
				 	insert_task_sent_cache			\
			
 
				 	insert_task_recv_cache			\
			
 
				-	insert_task_count			\
			
 
				 	insert_task_seq				\
			
 
				-	multiple_send				\
			
 
				-	user_defined_datatype			\
			
 
				 	tags_checking				\
			
 
				-	sync					\
			
 
				-	gather					\
			
 
				-	gather2					\
			
 
				-	driver					\
			
 
				-	sendrecv_bench
			
 
				+	sync
			
 
				 
			
 
				 if STARPU_USE_MPI_MPI
			
 
				 starpu_mpi_TESTS +=				\
			
--- a/mpi/tests/driver.c
+++ b/mpi/tests/driver.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2017,2018                                CNRS
			
 
				- * Copyright (C) 2018                                     Université de Bordeaux
			
 
				+ * Copyright (C) 2018,2020                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -100,6 +100,9 @@ int main(int argc, char **argv)
 
				 			}
			
 
				 		}
			
 
				 		finished = request[0] == NULL && request[1] == NULL;
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+		starpu_sleep(0.001);
			
 
				+#endif
			
 
				 	}
			
 
				 
			
 
				 	if (rank%2 == 0)
			
--- a/mpi/tests/mpi_earlyrecv.c
+++ b/mpi/tests/mpi_earlyrecv.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2015,2017                           CNRS
			
 
				- * Copyright (C) 2009,2010,2014,2015,2017,2018            Université de Bordeaux
			
 
				+ * Copyright (C) 2009,2010,2014,2015,2017,2018,2020       Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -98,6 +98,9 @@ int main(int argc, char **argv)
 
				 			}
			
 
				 		}
			
 
				 		finished = request[0] == NULL && request[1] == NULL;
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+		starpu_sleep(0.001);
			
 
				+#endif
			
 
				 	}
			
 
				 
			
 
				 	if (rank%2 == 0)
			
--- a/mpi/tests/mpi_earlyrecv2.c
+++ b/mpi/tests/mpi_earlyrecv2.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2017                                CNRS
			
 
				- * Copyright (C) 2009,2010,2014,2015,2017,2018            Université de Bordeaux
			
 
				+ * Copyright (C) 2009,2010,2014,2015,2017,2018,2020       Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  * Copyright (C) 2013                                     Inria
			
 
				  *
			
@@ -73,7 +73,7 @@ int exchange(int rank, starpu_data_handle_t *handles, check_func func, int detac
 
				 		}
			
 
				 
			
 
				 		// We sleep to make sure that the data for the tag 9 will be received before the recv is posted
			
 
				-		usleep(2000000);
			
 
				+		starpu_sleep(2);
			
 
				 		for(i=1 ; i<NB ; i++)
			
 
				 		{
			
 
				 			if (detached)
			
--- a/mpi/tests/mpi_earlyrecv2_sync.c
+++ b/mpi/tests/mpi_earlyrecv2_sync.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2017                                CNRS
			
 
				- * Copyright (C) 2009,2010,2015,2018                      Université de Bordeaux
			
 
				+ * Copyright (C) 2009,2010,2015,2018,2020                 Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -65,7 +65,7 @@ int exchange(int rank, starpu_data_handle_t *handles, check_func func)
 
				 		STARPU_ASSERT(req[1] != NULL);
			
 
				 
			
 
				 		// We sleep to make sure that the data for the tag 8 and the tag 9 will be received before the recv are posted
			
 
				-		usleep(2000000);
			
 
				+		starpu_sleep(2);
			
 
				 		for(i=2 ; i<NB ; i++)
			
 
				 		{
			
 
				 			starpu_mpi_irecv(handles[i], &req[i], other_rank, i, MPI_COMM_WORLD);
			
--- a/mpi/tests/mpi_test.c
+++ b/mpi/tests/mpi_test.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010,2011,2014,2015,2017,2018            Université de Bordeaux
			
 
				+ * Copyright (C) 2010,2011,2014,2015,2017,2018,2020       Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Inria
			
 
				  * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				  *
			
@@ -79,6 +79,9 @@ int main(int argc, char **argv)
 
				 		{
			
 
				 			MPI_Status status;
			
 
				 			starpu_mpi_test(&req, &finished, &status);
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+			starpu_sleep(0.001);
			
 
				+#endif
			
 
				 		}
			
 
				 		while (!finished);
			
 
				 	}
			
--- a/mpi/tests/multiple_send.c
+++ b/mpi/tests/multiple_send.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2013                                     Inria
			
 
				  * Copyright (C) 2011-2013,2015,2017                      CNRS
			
 
				- * Copyright (C) 2011,2015,2018                           Université de Bordeaux
			
 
				+ * Copyright (C) 2011,2015,2018,2020                      Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -80,6 +80,9 @@ int main(int argc, char **argv)
 
				 					}
			
 
				 				}
			
 
				 			}
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+			starpu_sleep(0.001);
			
 
				+#endif
			
 
				 		}
			
 
				 	}
			
 
				 	FPRINTF(stderr, "[%d] All requests finished\n", rank);
			
--- a/mpi/tests/pingpong.c
+++ b/mpi/tests/pingpong.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2011,2014,2015,2017,2018            Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2011,2014,2015,2017,2018,2020       Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Inria
			
 
				  * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				  *
			
@@ -153,7 +153,7 @@ int main(int argc, char **argv)
 
				 				starpu_mpi_recv(tab_handle, other_rank, loop, MPI_COMM_WORLD, &status);
			
 
				 			}
			
 
				 
			
 
				-			usleep(sleep_time * 1000);
			
 
				+			starpu_sleep(sleep_time / 1000);
			
 
				 		}
			
 
				 	}
			
 
				 	else // broadcasts
			
@@ -168,7 +168,7 @@ int main(int argc, char **argv)
 
				 					if (r != rank)
			
 
				 					{
			
 
				 						starpu_mpi_send(tab_handle, r, (r * niter) + loop, MPI_COMM_WORLD);
			
 
				-						usleep(sleep_time * 1000);
			
 
				+						starpu_sleep(sleep_time / 1000);
			
 
				 					}
			
 
				 				}
			
 
				 			}
			
@@ -178,7 +178,7 @@ int main(int argc, char **argv)
 
				 				starpu_mpi_recv(tab_handle, sender, (rank * niter) + loop, MPI_COMM_WORLD, &status);
			
 
				 
			
 
				 				for (int r = 0; r < (size-1); r++)
			
 
				-					usleep(sleep_time * 1000);
			
 
				+					starpu_sleep(sleep_time / 1000);
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
--- a/mpi/tests/sendrecv_bench.c
+++ b/mpi/tests/sendrecv_bench.c
@@ -95,7 +95,16 @@ int main(int argc, char **argv)
 
				 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
			
 
				 	starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize);
			
 
				 
			
 
				-	STARPU_ASSERT_MSG(worldsize == 2, "We need two prcesses.");
			
 
				+	if (worldsize != 2)
			
 
				+	{
			
 
				+		if (rank == 0)
			
 
				+			FPRINTF(stderr, "We need 2 processes.\n");
			
 
				+
			
 
				+		starpu_mpi_shutdown();
			
 
				+		if (!mpi_init)
			
 
				+			MPI_Finalize();
			
 
				+		return STARPU_TEST_SKIPPED;
			
 
				+	}
			
 
				 
			
 
				 
			
 
				 	if (rank == 0)
			
--- a/src/common/fxt.c
+++ b/src/common/fxt.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012,2013,2015                           Inria
			
 
				- * Copyright (C) 2008-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2008-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2018                                CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -19,6 +19,7 @@
 
				 #include <starpu.h>
			
 
				 #include <common/config.h>
			
 
				 #include <common/utils.h>
			
 
				+#include <core/simgrid.h>
			
 
				 #include <starpu_util.h>
			
 
				 #include <starpu_profiling.h>
			
 
				 
			
--- a/src/common/graph.c
+++ b/src/common/graph.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2016,2017                                CNRS
			
 
				  * Copyright (C) 2017                                     Inria
			
 
				- * Copyright (C) 2016-2018                                Université de Bordeaux
			
 
				+ * Copyright (C) 2016-2018,2020                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -140,7 +140,7 @@ void _starpu_graph_add_job(struct _starpu_job *job)
 
				 	_STARPU_CALLOC(node, 1, sizeof(*node));
			
 
				 	node->job = job;
			
 
				 	job->graph_node = node;
			
 
				-	STARPU_PTHREAD_MUTEX_INIT(&node->mutex, NULL);
			
 
				+	STARPU_PTHREAD_MUTEX_INIT0(&node->mutex, NULL);
			
 
				 
			
 
				 	_starpu_graph_wrlock();
			
 
				 
			
--- a/src/common/list.h
+++ b/src/common/list.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2008-2018                                Université de Bordeaux
			
 
				+ * Copyright (C) 2008-2018,2020                           Université de Bordeaux
			
 
				  * Copyright (C) 2010-2012,2015-2018                      CNRS
			
 
				  * Copyright (C) 2017                                     Inria
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
@@ -51,6 +51,9 @@
 
				  *   * Initializes a list (initially empty)
			
 
				  *   void		FOO_list_init(struct FOO_list*);
			
 
				  *
			
 
				+ *   * Initializes a list (initially empty), assuming that the content of FOO_list was already zeroed
			
 
				+ *   void		FOO_list_init0(struct FOO_list*);
			
 
				+ *
			
 
				  *   * Suppresses a liste
			
 
				  *   void		FOO_list_delete(struct FOO_list*);
			
 
				  *
			
@@ -225,7 +228,9 @@
 
				   /** @internal */LIST_INLINE struct ENAME *ENAME##_list_back(const struct ENAME##_list *l) \
			
 
				     { return l->_tail; } \
			
 
				   /** @internal */LIST_INLINE void ENAME##_list_init(struct ENAME##_list *l) \
			
 
				-    { l->_head=NULL; l->_tail=l->_head; } \
			
 
				+    { l->_head=NULL; l->_tail=NULL; } \
			
 
				+  /** @internal */LIST_INLINE void ENAME##_list_init0(struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \
			
 
				+    { } \
			
 
				   /** @internal */LIST_INLINE struct ENAME##_list *ENAME##_list_new(void) \
			
 
				     { struct ENAME##_list *l; _STARPU_MALLOC(l, sizeof(struct ENAME##_list)); \
			
 
				       ENAME##_list_init(l); return l; } \
			
--- a/src/common/prio_list.h
+++ b/src/common/prio_list.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2017,2018                                Inria
			
 
				  * Copyright (C) 2016,2017                                CNRS
			
 
				- * Copyright (C) 2015-2017,2019                           Université de Bordeaux
			
 
				+ * Copyright (C) 2015-2017,2019-2020                      Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -37,6 +37,9 @@
 
				  * * Initialize a new priority list
			
 
				  * void FOO_prio_list_init(struct FOO_prio_list*)
			
 
				  *
			
 
				+ * * Initialize a new priority list, assuming that the content of FOO_prio_list was already zeroed
			
 
				+ * void FOO_prio_list_init0(struct FOO_prio_list*)
			
 
				+ *
			
 
				  * * Free an empty priority list
			
 
				  * void FOO_prio_list_deinit(struct FOO_prio_list*)
			
 
				  *
			
@@ -152,6 +155,11 @@
 
				 		starpu_rbtree_init(&priolist->tree); \
			
 
				 		priolist->empty = 1; \
			
 
				 	} \
			
 
				+	PRIO_LIST_INLINE void ENAME##_prio_list_init0(struct ENAME##_prio_list *priolist) \
			
 
				+	{ \
			
 
				+		starpu_rbtree_init0(&priolist->tree); \
			
 
				+		priolist->empty = 1; \
			
 
				+	} \
			
 
				 	PRIO_LIST_INLINE void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \
			
 
				 	{ \
			
 
				 		if (starpu_rbtree_empty(&priolist->tree)) \
			
@@ -183,10 +191,10 @@
 
				 		if (node) \
			
 
				 			stage = ENAME##_node_to_list_stage(node); \
			
 
				 		else { \
			
 
				-			_STARPU_MALLOC(stage, sizeof(*stage));	\
			
 
				-			starpu_rbtree_node_init(&stage->node); \
			
 
				+			_STARPU_CALLOC(stage, 1, sizeof(*stage));	\
			
 
				+			starpu_rbtree_node_init0(&stage->node); \
			
 
				 			stage->prio = prio; \
			
 
				-			ENAME##_list_init(&stage->list); \
			
 
				+			ENAME##_list_init0(&stage->list); \
			
 
				 			starpu_rbtree_insert_slot(&priolist->tree, slot, &stage->node); \
			
 
				 		} \
			
 
				 		return stage; \
			
@@ -469,6 +477,8 @@
 
				 	struct ENAME##_prio_list { struct ENAME##_list list; }; \
			
 
				 	PRIO_LIST_INLINE void ENAME##_prio_list_init(struct ENAME##_prio_list *priolist) \
			
 
				 	{ ENAME##_list_init(&(priolist)->list); } \
			
 
				+	PRIO_LIST_INLINE void ENAME##_prio_list_init0(struct ENAME##_prio_list *priolist) \
			
 
				+	{ ENAME##_list_init0(&(priolist)->list); } \
			
 
				 	PRIO_LIST_INLINE void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \
			
 
				 	{ (void) (priolist); /* ENAME##_list_deinit(&(priolist)->list); */ } \
			
 
				 	PRIO_LIST_INLINE void ENAME##_prio_list_push_back(struct ENAME##_prio_list *priolist, struct ENAME *e) \
			
--- a/src/common/rbtree.h
+++ b/src/common/rbtree.h
@@ -34,6 +34,8 @@
 
				 #include <stdint.h>
			
 
				 #include <sys/types.h>
			
 
				 
			
 
				+#include <starpu_util.h>
			
 
				+
			
 
				 #define MACRO_BEGIN ({
			
 
				 #define MACRO_END })
			
 
				 /*
			
@@ -68,6 +70,13 @@ static inline void starpu_rbtree_init(struct starpu_rbtree *tree)
 
				 }
			
 
				 
			
 
				 /*
			
 
				+ * This version assumes that the content of tree was already zeroed
			
 
				+ */
			
 
				+static inline void starpu_rbtree_init0(struct starpu_rbtree *tree STARPU_ATTRIBUTE_UNUSED)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				  * Initialize a node.
			
 
				  *
			
 
				  * A node is in no tree when its parent points to itself.
			
@@ -82,6 +91,18 @@ static inline void starpu_rbtree_node_init(struct starpu_rbtree_node *node)
 
				 }
			
 
				 
			
 
				 /*
			
 
				+ * This version assumes that the content of node was already zeroed
			
 
				+ */
			
 
				+static inline void starpu_rbtree_node_init0(struct starpu_rbtree_node *node)
			
 
				+{
			
 
				+    assert(starpu_rbtree_check_alignment(node));
			
 
				+
			
 
				+    node->parent = (uintptr_t)node | STARPU_RBTREE_COLOR_RED;
			
 
				+    //node->children[STARPU_RBTREE_LEFT] = NULL;
			
 
				+    //node->children[STARPU_RBTREE_RIGHT] = NULL;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				  * Return true if node is in no tree.
			
 
				  */
			
 
				 static inline int starpu_rbtree_node_unlinked(const struct starpu_rbtree_node *node)
			
--- a/src/common/thread.c
+++ b/src/common/thread.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2013,2015,2017                           Inria
			
 
				  * Copyright (C) 2010-2017                                CNRS
			
 
				- * Copyright (C) 2010,2012-2019                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010,2012-2020                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -29,7 +29,10 @@
 
				 #include <limits.h>
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-#ifdef STARPU_HAVE_XBT_SYNCHRO_H
			
 
				+#ifdef STARPU_HAVE_SIMGRID_MUTEX_H
			
 
				+#include <simgrid/mutex.h>
			
 
				+#include <simgrid/cond.h>
			
 
				+#elif defined(STARPU_HAVE_XBT_SYNCHRO_H)
			
 
				 #include <xbt/synchro.h>
			
 
				 #else
			
 
				 #include <xbt/synchro_core.h>
			
@@ -72,7 +75,7 @@ starpu_pthread_t starpu_pthread_self(void)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-int starpu_pthread_create_on(char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr STARPU_ATTRIBUTE_UNUSED, void *(*start_routine) (void *), void *arg, starpu_sg_host_t host)
			
 
				+int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr STARPU_ATTRIBUTE_UNUSED, void *(*start_routine) (void *), void *arg, starpu_sg_host_t host)
			
 
				 {
			
 
				 	char **_args;
			
 
				 	_STARPU_MALLOC(_args, 3*sizeof(char*));
			
--- a/src/core/dependencies/cg.c
+++ b/src/core/dependencies/cg.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012                                     Inria
			
 
				- * Copyright (C) 2010-2012,2014-2018                      Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2012,2014-2018,2020                 Université de Bordeaux
			
 
				  * Copyright (C) 2010-2013,2015-2018                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -24,23 +24,23 @@
 
				 #include <core/dependencies/cg.h>
			
 
				 #include <core/dependencies/tags.h>
			
 
				 
			
 
				-void _starpu_cg_list_init(struct _starpu_cg_list *list)
			
 
				+void _starpu_cg_list_init0(struct _starpu_cg_list *list)
			
 
				 {
			
 
				 	_starpu_spin_init(&list->lock);
			
 
				-	list->ndeps = 0;
			
 
				-	list->ndeps_completed = 0;
			
 
				+	//list->ndeps = 0;
			
 
				+	//list->ndeps_completed = 0;
			
 
				 #ifdef STARPU_DEBUG
			
 
				-	list->deps = NULL;
			
 
				-	list->done = NULL;
			
 
				+	//list->deps = NULL;
			
 
				+	//list->done = NULL;
			
 
				 #endif
			
 
				 
			
 
				-	list->terminated = 0;
			
 
				+	//list->terminated = 0;
			
 
				 
			
 
				-	list->nsuccs = 0;
			
 
				+	//list->nsuccs = 0;
			
 
				 #ifdef STARPU_DYNAMIC_DEPS_SIZE
			
 
				 	/* this is a small initial default value ... may be changed */
			
 
				-	list->succ_list_size = 0;
			
 
				-	list->succ = NULL;
			
 
				+	//list->succ_list_size = 0;
			
 
				+	//list->succ = NULL;
			
 
				 #endif
			
 
				 }
			
 
				 
			
--- a/src/core/dependencies/cg.h
+++ b/src/core/dependencies/cg.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2018                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2018,2020                           Université de Bordeaux
			
 
				  * Copyright (C) 2010,2011,2013,2015,2017                 CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -114,7 +114,7 @@ void _starpu_notify_dependencies(struct _starpu_job *j);
 
				 void _starpu_job_notify_start(struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch);
			
 
				 void _starpu_job_notify_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data);
			
 
				 
			
 
				-void _starpu_cg_list_init(struct _starpu_cg_list *list);
			
 
				+void _starpu_cg_list_init0(struct _starpu_cg_list *list);
			
 
				 void _starpu_cg_list_deinit(struct _starpu_cg_list *list);
			
 
				 int _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct _starpu_cg *cg);
			
 
				 int _starpu_list_task_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]);
			
--- a/src/core/dependencies/implicit_data_deps.c
+++ b/src/core/dependencies/implicit_data_deps.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011,2012,2016                           Inria
			
 
				- * Copyright (C) 2010-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2013,2015-2018                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -49,8 +49,8 @@ static void _starpu_add_dependency(starpu_data_handle_t handle, struct starpu_ta
 
				 	_starpu_add_ghost_dependency(handle, _starpu_get_job_associated_to_task(previous)->job_id, next);
			
 
				 }
			
 
				 
			
 
				-/* Add pre_sync_task as new accessor among the existing ones, making it depend on the last synchronization task if any.  */
			
 
				-static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot)
			
 
				+/* Add post_sync_task as new accessor among the existing ones, making pre_sync_task depend on the last synchronization task if any.  */
			
 
				+static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot)
			
 
				 {
			
 
				 	/* Add this task to the list of readers */
			
 
				 	STARPU_ASSERT(!post_sync_task_dependency_slot->prev);
			
@@ -64,6 +64,7 @@ static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task
 
				 	/* This task depends on the previous synchronization task if any */
			
 
				 	if (handle->last_sync_task && handle->last_sync_task != post_sync_task)
			
 
				 	{
			
 
				+		*submit_pre_sync= 1;
			
 
				 		struct starpu_task *task_array[1] = {handle->last_sync_task};
			
 
				 		_starpu_task_declare_deps_array(pre_sync_task, 1, task_array, 0);
			
 
				 		_starpu_add_dependency(handle, handle->last_sync_task, pre_sync_task);
			
@@ -93,7 +94,7 @@ static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task
 
				 		_STARPU_DEP_DEBUG("dep ID%lu -> %p\n", handle->last_submitted_ghost_sync_id, pre_sync_task);
			
 
				 	}
			
 
				 
			
 
				-	if (!pre_sync_task->cl)
			
 
				+	if (*submit_pre_sync && !pre_sync_task->cl)
			
 
				 	{
			
 
				 		/* Add a reference to be released in _starpu_handle_job_termination */
			
 
				 		_starpu_spin_lock(&handle->header_lock);
			
@@ -202,7 +203,14 @@ static void _starpu_add_sync_task(starpu_data_handle_t handle, struct starpu_tas
 
				  * */
			
 
				 /* NB : handle->sequential_consistency_mutex must be hold by the caller;
			
 
				  * returns a task, to be submitted after releasing that mutex. */
			
 
				-struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot,
			
 
				+/* *submit_pre_sync is whether the pre_sync_task will be submitted or not. The
			
 
				+ * caller should set it to 1 if it intends to submit it anyway, or to 0
			
 
				+ * if it may not submit it (because it has no other use for the task than
			
 
				+ * synchronization). In the latter case,
			
 
				+ * _starpu_detect_implicit_data_deps_with_handle will set it to 1 in case the
			
 
				+ * task really needs to be submitted, or leave it to 0 if there is nothing to be
			
 
				+ * waited for anyway. */
			
 
				+struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot,
			
 
				 								  starpu_data_handle_t handle, enum starpu_data_access_mode mode, unsigned task_handle_sequential_consistency)
			
 
				 {
			
 
				 	struct starpu_task *task = NULL;
			
@@ -228,8 +236,14 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 
				 
			
 
				 		/* Skip tasks that are associated to a reduction phase so that
			
 
				 		 * they do not interfere with the application. */
			
 
				-		if (pre_sync_job->reduction_task || post_sync_job->reduction_task)
			
 
				+		if (pre_sync_job->reduction_task) {
			
 
				+			*submit_pre_sync = 1;
			
 
				 			return NULL;
			
 
				+		}
			
 
				+		if (post_sync_job->reduction_task) {
			
 
				+			*submit_pre_sync = 0;
			
 
				+			return NULL;
			
 
				+		}
			
 
				 
			
 
				 		/* In case we are generating the DAG, we add an implicit
			
 
				 		 * dependency between the pre and the post sync tasks in case
			
@@ -264,7 +278,9 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 
				 		{
			
 
				 			_STARPU_DEP_DEBUG("concurrently\n");
			
 
				 			/* Can access concurrently with current tasks */
			
 
				-			_starpu_add_accessor(handle, pre_sync_task, post_sync_task, post_sync_task_dependency_slot);
			
 
				+			if (handle->last_sync_task != NULL)
			
 
				+				*submit_pre_sync = 1;
			
 
				+			_starpu_add_accessor(handle, pre_sync_task, submit_pre_sync, post_sync_task, post_sync_task_dependency_slot);
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
@@ -277,6 +293,7 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 
				 					|| (l != &handle->last_submitted_accessors && handle->last_submitted_ghost_accessors_id))
			
 
				 			{
			
 
				 				/* Several previous accessors */
			
 
				+				*submit_pre_sync = 1;
			
 
				 
			
 
				 				if (mode == STARPU_W)
			
 
				 				{
			
@@ -308,7 +325,7 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 
				 					/* Make this task wait for the previous ones */
			
 
				 					_starpu_add_sync_task(handle, sync_task, sync_task, post_sync_task);
			
 
				 					/* And the requested task wait for this one */
			
 
				-					_starpu_add_accessor(handle, pre_sync_task, post_sync_task, post_sync_task_dependency_slot);
			
 
				+					_starpu_add_accessor(handle, pre_sync_task, submit_pre_sync, post_sync_task, post_sync_task_dependency_slot);
			
 
				 
			
 
				 					task = sync_task;
			
 
				 				}
			
@@ -321,6 +338,7 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 
				 				{
			
 
				 					/* One accessor, make it the sync task,
			
 
				 					 * and start depending on it. */
			
 
				+					*submit_pre_sync = 1;
			
 
				 					_STARPU_DEP_DEBUG("One previous accessor, depending on it\n");
			
 
				 					handle->last_sync_task = l->task;
			
 
				 					l->next = NULL;
			
@@ -343,10 +361,12 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 
				 				{
			
 
				 					_STARPU_DEP_DEBUG("No previous accessor, no dependency\n");
			
 
				 				}
			
 
				-				_starpu_add_accessor(handle, pre_sync_task, post_sync_task, post_sync_task_dependency_slot);
			
 
				+				_starpu_add_accessor(handle, pre_sync_task, submit_pre_sync, post_sync_task, post_sync_task_dependency_slot);
			
 
				 			}
			
 
				 		}
			
 
				 		handle->last_submitted_mode = mode;
			
 
				+	} else {
			
 
				+		*submit_pre_sync = 0;
			
 
				 	}
			
 
				         _STARPU_LOG_OUT();
			
 
				 	return task;
			
@@ -423,9 +443,10 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 
				 		STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
			
 
				 		unsigned index = descrs[buffer].index;
			
 
				 		unsigned task_handle_sequential_consistency = task->handles_sequential_consistency ? task->handles_sequential_consistency[index] : handle->sequential_consistency;
			
 
				+		int submit_pre_sync = 1;
			
 
				 		if (!task_handle_sequential_consistency)
			
 
				 			j->sequential_consistency = 0;
			
 
				-		new_task = _starpu_detect_implicit_data_deps_with_handle(task, task, &dep_slots[buffer], handle, mode, task_handle_sequential_consistency);
			
 
				+		new_task = _starpu_detect_implicit_data_deps_with_handle(task, &submit_pre_sync, task, &dep_slots[buffer], handle, mode, task_handle_sequential_consistency);
			
 
				 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
			
 
				 		if (new_task)
			
 
				 		{
			
@@ -631,6 +652,7 @@ int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_d
 
				 	if (sequential_consistency)
			
 
				 	{
			
 
				 		struct starpu_task *sync_task, *new_task;
			
 
				+		int submit_pre_sync = 0;
			
 
				 		sync_task = starpu_task_create();
			
 
				 		sync_task->name = sync_name;
			
 
				 		sync_task->detach = 0;
			
@@ -639,7 +661,7 @@ int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_d
 
				 
			
 
				 		/* It is not really a RW access, but we want to make sure that
			
 
				 		 * all previous accesses are done */
			
 
				-		new_task = _starpu_detect_implicit_data_deps_with_handle(sync_task, sync_task, &_starpu_get_job_associated_to_task(sync_task)->implicit_dep_slot, handle, mode, sequential_consistency);
			
 
				+		new_task = _starpu_detect_implicit_data_deps_with_handle(sync_task, &submit_pre_sync, sync_task, &_starpu_get_job_associated_to_task(sync_task)->implicit_dep_slot, handle, mode, sequential_consistency);
			
 
				 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
			
 
				 
			
 
				 		if (new_task)
			
@@ -648,11 +670,17 @@ int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_d
 
				 			STARPU_ASSERT(!ret);
			
 
				 		}
			
 
				 
			
 
				-		/* TODO detect if this is superflous */
			
 
				-		int ret = _starpu_task_submit_internally(sync_task);
			
 
				-		STARPU_ASSERT(!ret);
			
 
				-		ret = starpu_task_wait(sync_task);
			
 
				-		STARPU_ASSERT(ret == 0);
			
 
				+		if (submit_pre_sync)
			
 
				+		{
			
 
				+			int ret = _starpu_task_submit_internally(sync_task);
			
 
				+			STARPU_ASSERT(!ret);
			
 
				+			ret = starpu_task_wait(sync_task);
			
 
				+			STARPU_ASSERT(ret == 0);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			starpu_task_destroy(sync_task);
			
 
				+		}
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
--- a/src/core/dependencies/implicit_data_deps.h
+++ b/src/core/dependencies/implicit_data_deps.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2012,2014,2015,2017,2018            Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2012,2014,2015,2017,2018,2020       Université de Bordeaux
			
 
				  * Copyright (C) 2010,2011,2013,2015,2017,2018            CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -21,7 +21,7 @@
 
				 #include <starpu.h>
			
 
				 #include <common/config.h>
			
 
				 
			
 
				-struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot,
			
 
				+struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot,
			
 
				 								  starpu_data_handle_t handle, enum starpu_data_access_mode mode, unsigned task_handle_sequential_consistency);
			
 
				 int _starpu_test_implicit_data_deps_with_handle(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
			
 
				 void _starpu_detect_implicit_data_deps(struct starpu_task *task);
			
--- a/src/core/dependencies/tags.c
+++ b/src/core/dependencies/tags.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2008-2014,2016-2018                      Université de Bordeaux
			
 
				+ * Copyright (C) 2008-2014,2016-2018,2020                 Université de Bordeaux
			
 
				  * Copyright (C) 2011,2012,2016                           Inria
			
 
				  * Copyright (C) 2010-2013,2015-2017,2019                 CNRS
			
 
				  *
			
@@ -76,16 +76,16 @@ static struct _starpu_cg *create_cg_tag(unsigned ntags, struct _starpu_tag *tag)
 
				 static struct _starpu_tag *_starpu_tag_init(starpu_tag_t id)
			
 
				 {
			
 
				 	struct _starpu_tag *tag;
			
 
				-	_STARPU_MALLOC(tag, sizeof(struct _starpu_tag));
			
 
				+	_STARPU_CALLOC(tag, 1, sizeof(struct _starpu_tag));
			
 
				 
			
 
				-	tag->job = NULL;
			
 
				-	tag->is_assigned = 0;
			
 
				-	tag->is_submitted = 0;
			
 
				+	//tag->job = NULL;
			
 
				+	//tag->is_assigned = 0;
			
 
				+	//tag->is_submitted = 0;
			
 
				 
			
 
				 	tag->id = id;
			
 
				 	tag->state = STARPU_INVALID_STATE;
			
 
				 
			
 
				-	_starpu_cg_list_init(&tag->tag_successors);
			
 
				+	_starpu_cg_list_init0(&tag->tag_successors);
			
 
				 
			
 
				 	_starpu_spin_init(&tag->lock);
			
 
				 
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011-2017                                Inria
			
 
				- * Copyright (C) 2008-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2008-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2019                                CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  * Copyright (C) 2011                                     Télécom-SudParis
			
@@ -70,11 +70,9 @@ struct _starpu_job* STARPU_ATTRIBUTE_MALLOC _starpu_job_create(struct starpu_tas
 
				 	struct _starpu_job *job;
			
 
				         _STARPU_LOG_IN();
			
 
				 
			
 
				-	_STARPU_MALLOC(job, sizeof(*job));
			
 
				-
			
 
				 	/* As most of the fields must be initialized at NULL, let's put 0
			
 
				 	 * everywhere */
			
 
				-	memset(job, 0, sizeof(*job));
			
 
				+	_STARPU_CALLOC(job, 1, sizeof(*job));
			
 
				 
			
 
				 	if (task->dyn_handles)
			
 
				 	{
			
@@ -99,10 +97,10 @@ struct _starpu_job* STARPU_ATTRIBUTE_MALLOC _starpu_job_create(struct starpu_tas
 
				 			maxnjobs = jobs;
			
 
				 	}
			
 
				 
			
 
				-	_starpu_cg_list_init(&job->job_successors);
			
 
				+	_starpu_cg_list_init0(&job->job_successors);
			
 
				 
			
 
				-	STARPU_PTHREAD_MUTEX_INIT(&job->sync_mutex, NULL);
			
 
				-	STARPU_PTHREAD_COND_INIT(&job->sync_cond, NULL);
			
 
				+	STARPU_PTHREAD_MUTEX_INIT0(&job->sync_mutex, NULL);
			
 
				+	STARPU_PTHREAD_COND_INIT0(&job->sync_cond, NULL);
			
 
				 
			
 
				 	/* By default we have sequential tasks */
			
 
				 	job->task_size = 1;
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011-2014,2016,2017                      Inria
			
 
				- * Copyright (C) 2008-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2008-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2017, 2019                          CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  * Copyright (C) 2011                                     Télécom-SudParis
			
@@ -582,7 +582,7 @@ static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_p
 
				 			 * good-enough estimation */
			
 
				 			STARPU_HG_DISABLE_CHECKING(entry->nsample);
			
 
				 			STARPU_HG_DISABLE_CHECKING(entry->mean);
			
 
				-			entry->nerror = 0;
			
 
				+			//entry->nerror = 0;
			
 
				 		}
			
 
				 
			
 
				 		scan_history_entry(f, path, entry);
			
@@ -1886,18 +1886,18 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 
			
 
				 				/* Do not take the first measurement into account, it is very often quite bogus */
			
 
				 				/* TODO: it'd be good to use a better estimation heuristic, like the median, or latest n values, etc. */
			
 
				-				entry->mean = 0;
			
 
				-				entry->sum = 0;
			
 
				+				//entry->mean = 0;
			
 
				+				//entry->sum = 0;
			
 
				 
			
 
				-				entry->deviation = 0.0;
			
 
				-				entry->sum2 = 0;
			
 
				+				//entry->deviation = 0.0;
			
 
				+				//entry->sum2 = 0;
			
 
				 
			
 
				 				entry->size = _starpu_job_get_data_size(model, arch, impl, j);
			
 
				 				entry->flops = j->task->flops;
			
 
				 
			
 
				 				entry->footprint = key;
			
 
				-				entry->nsample = 0;
			
 
				-				entry->nerror = 0;
			
 
				+				//entry->nsample = 0;
			
 
				+				//entry->nerror = 0;
			
 
				 
			
 
				 				insert_history_entry(entry, list, &per_arch_model->history);
			
 
				 			}
			
--- a/src/core/simgrid.c
+++ b/src/core/simgrid.c
@@ -38,6 +38,12 @@
 
				 #ifdef STARPU_HAVE_SIMGRID_HOST_H
			
 
				 #include <simgrid/host.h>
			
 
				 #endif
			
 
				+#ifdef STARPU_HAVE_SIMGRID_ENGINE_H
			
 
				+#include <simgrid/engine.h>
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_XBT_CONFIG_H
			
 
				+#include <xbt/config.h>
			
 
				+#endif
			
 
				 #include <smpi/smpi.h>
			
 
				 
			
 
				 #pragma weak starpu_main
			
@@ -137,9 +143,9 @@ int _starpu_simgrid_get_nbhosts(const char *prefix)
 
				 		char name[32];
			
 
				 		STARPU_ASSERT(starpu_mpi_world_rank);
			
 
				 		snprintf(name, sizeof(name), STARPU_MPI_AS_PREFIX"%d", starpu_mpi_world_rank());
			
 
				-#if defined(HAVE_MSG_ZONE_GET_HOSTS) || defined(MSG_zone_get_hosts)
			
 
				+#if defined(HAVE_MSG_ZONE_GET_HOSTS) || defined(HAVE_SG_ZONE_GET_HOSTS) || defined(MSG_zone_get_hosts) || defined(sg_zone_get_hosts)
			
 
				 		hosts = xbt_dynar_new(sizeof(sg_host_t), NULL);
			
 
				-#  if defined(HAVE_SG_ZONE_GET_BY_NAME) || defined(sg_zone_get_by_name)
			
 
				+#  if defined(HAVE_SG_ZONE_GET_HOSTS) || defined(sg_zone_get_hosts)
			
 
				 		sg_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts);
			
 
				 #  else
			
 
				 		MSG_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts);
			
@@ -280,7 +286,11 @@ void _starpu_start_simgrid(int *argc, char **argv)
 
				 
			
 
				 	simgrid_started = 1;
			
 
				 
			
 
				+#if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT)
			
 
				+	simgrid_init(argc, argv);
			
 
				+#else
			
 
				 	MSG_init(argc, argv);
			
 
				+#endif
			
 
				 	/* Simgrid uses tiny stacks by default.  This comes unexpected to our users.  */
			
 
				 	unsigned stack_size = 8192;
			
 
				 #ifdef HAVE_GETRLIMIT
			
@@ -304,7 +314,11 @@ void _starpu_start_simgrid(int *argc, char **argv)
 
				 #else
			
 
				 	_starpu_simgrid_get_platform_path(4, path, sizeof(path));
			
 
				 #endif
			
 
				+#if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT)
			
 
				+	simgrid_load_platform(path);
			
 
				+#else
			
 
				 	MSG_create_environment(path);
			
 
				+#endif
			
 
				 
			
 
				 	simgrid_transfer_cost = starpu_get_env_number_default("STARPU_SIMGRID_TRANSFER_COST", 1);
			
 
				 }
			
@@ -378,14 +392,22 @@ int main(int argc, char **argv)
 
				 	_starpu_simgrid_actor_create("main", &do_starpu_main, _starpu_simgrid_get_host_by_name("MAIN"), argc, argv_cpy);
			
 
				 
			
 
				 	/* And run maestro in the main thread */
			
 
				+#if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT)
			
 
				+	simgrid_run();
			
 
				+#else
			
 
				 	MSG_main();
			
 
				+#endif
			
 
				 	return main_ret;
			
 
				 }
			
 
				 
			
 
				-#if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach)
			
 
				+#if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach) || defined(HAVE_SG_ACTOR_ATTACH)
			
 
				 static void maestro(void *data STARPU_ATTRIBUTE_UNUSED)
			
 
				 {
			
 
				+#if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT)
			
 
				+	simgrid_run();
			
 
				+#else
			
 
				 	MSG_main();
			
 
				+#endif
			
 
				 }
			
 
				 #endif
			
 
				 
			
@@ -721,7 +743,7 @@ void _starpu_simgrid_submit_job(int workerid, struct _starpu_job *j, struct star
 
				 
			
 
				 /* Note: simgrid is not parallel, so there is no need to hold locks for management of transfers.  */
			
 
				 LIST_TYPE(transfer,
			
 
				-#ifdef HAVE_SG_HOST_SEND_TO
			
 
				+#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO)
			
 
				 	size_t size;
			
 
				 #else
			
 
				 	msg_task_t task;
			
@@ -854,15 +876,20 @@ static void *transfer_execute(void *arg)
 
				 		if (t->last_transfer == transfer)
			
 
				 			t->last_transfer = NULL;
			
 
				 
			
 
				-#ifdef HAVE_SG_HOST_SEND_TO
			
 
				+#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO)
			
 
				 		if (transfer->size)
			
 
				 #else
			
 
				 		if (transfer->task)
			
 
				 #endif
			
 
				 		{
			
 
				 			_STARPU_DEBUG("transfer %p started\n", transfer);
			
 
				-#ifdef HAVE_SG_HOST_SEND_TO
			
 
				-			sg_host_send_to(_starpu_simgrid_memory_node_get_host(transfer->src_node),
			
 
				+#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO)
			
 
				+#ifdef HAVE_SG_HOST_SENDTO
			
 
				+			sg_host_sendto
			
 
				+#else
			
 
				+			sg_host_send_to
			
 
				+#endif
			
 
				+				(_starpu_simgrid_memory_node_get_host(transfer->src_node),
			
 
				 					_starpu_simgrid_memory_node_get_host(transfer->dst_node),
			
 
				 					transfer->size);
			
 
				 #else
			
@@ -963,7 +990,7 @@ static void _starpu_simgrid_wait_transfers(void)
 
				 	struct transfer *sync = transfer_new();
			
 
				 	struct transfer *cur;
			
 
				 
			
 
				-#ifdef HAVE_SG_HOST_SEND_TO
			
 
				+#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO)
			
 
				 	sync->size = 0;
			
 
				 #else
			
 
				 	sync->task = NULL;
			
@@ -1031,7 +1058,7 @@ int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node,
 
				 
			
 
				 	_STARPU_DEBUG("creating transfer %p for %lu bytes\n", transfer, (unsigned long) size);
			
 
				 
			
 
				-#ifdef HAVE_SG_HOST_SEND_TO
			
 
				+#if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO)
			
 
				 	transfer->size = size;
			
 
				 #else
			
 
				 	msg_task_t task;
			
--- a/src/core/simgrid.h
+++ b/src/core/simgrid.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2016,2017                                Inria
			
 
				  * Copyright (C) 2013,2017                                CNRS
			
 
				- * Copyright (C) 2012-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2012-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -27,10 +27,23 @@ extern "C"
 
				 #ifdef STARPU_SIMGRID
			
 
				 #ifdef STARPU_HAVE_SIMGRID_MSG_H
			
 
				 #include <simgrid/msg.h>
			
 
				-#else
			
 
				+#elif defined(STARPU_HAVE_MSG_MSG_H)
			
 
				 #include <msg/msg.h>
			
 
				 #endif
			
 
				 
			
 
				+#ifdef STARPU_HAVE_XBT_BASE_H
			
 
				+#include <xbt/base.h>
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_SIMGRID_VERSION_H
			
 
				+#include <simgrid/version.h>
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_SIMGRID_ZONE_H
			
 
				+#include <simgrid/zone.h>
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_SIMGRID_HOST_H
			
 
				+#include <simgrid/host.h>
			
 
				+#endif
			
 
				+
			
 
				 #include <xbt/xbt_os_time.h>
			
 
				 
			
 
				 struct _starpu_pthread_args
			
--- a/src/core/simgrid_cpp.cpp
+++ b/src/core/simgrid_cpp.cpp
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2016,2017                                CNRS
			
 
				- * Copyright (C) 2012-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2012-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2016,2017                                Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -21,19 +21,12 @@
 
				 #include <common/config.h>
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-#ifdef STARPU_HAVE_SIMGRID_MSG_H
			
 
				-#include <simgrid/msg.h>
			
 
				-#else
			
 
				-#include <msg/msg.h>
			
 
				-#endif
			
 
				 #if SIMGRID_VERSION >= 32190
			
 
				 #include <simgrid/simix.hpp>
			
 
				 #else
			
 
				 #include <simgrid/simix.h>
			
 
				 #endif
			
 
				-#ifdef STARPU_HAVE_SIMGRID_HOST_H
			
 
				-#include <simgrid/host.h>
			
 
				-#endif
			
 
				+#include <smpi/smpi.h>
			
 
				 
			
 
				 /* thread_create function which implements inheritence of MPI privatization */
			
 
				 /* See https://github.com/simgrid/simgrid/issues/139 */
			
@@ -42,9 +35,26 @@ typedef struct
 
				 {
			
 
				 	void_f_pvoid_t code;
			
 
				 	void *userparam;
			
 
				+#if SIMGRID_VERSION < 32501
			
 
				 	void *father_data;
			
 
				+#endif
			
 
				 } thread_data_t;
			
 
				 
			
 
				+#if SIMGRID_VERSION >= 32501
			
 
				+static void *_starpu_simgrid_xbt_thread_create_wrapper(void *arg)
			
 
				+{
			
 
				+	thread_data_t *t = (thread_data_t *) arg;
			
 
				+	/* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
			
 
				+	starpu_sleep(0.000001);
			
 
				+#ifdef HAVE_SMPI_THREAD_CREATE
			
 
				+	/* Make this actor inherit SMPI data from father actor */
			
 
				+	SMPI_thread_create();
			
 
				+#endif
			
 
				+	t->code(t->userparam);
			
 
				+	free(t);
			
 
				+	return NULL;
			
 
				+}
			
 
				+#else
			
 
				 #if SIMGRID_VERSION >= 32190
			
 
				 static void _starpu_simgrid_xbt_thread_create_wrapper(void)
			
 
				 #else
			
@@ -74,9 +84,17 @@ static int _starpu_simgrid_xbt_thread_create_wrapper(int argc STARPU_ATTRIBUTE_U
 
				 	return 0;
			
 
				 #endif
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				 void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code, void *param)
			
 
				 {
			
 
				+#if SIMGRID_VERSION >= 32501
			
 
				+	starpu_pthread_t t;
			
 
				+	thread_data_t *res = (thread_data_t *) malloc(sizeof(thread_data_t));
			
 
				+	res->userparam = param;
			
 
				+	res->code = code;
			
 
				+	starpu_pthread_create_on(name, &t, NULL, _starpu_simgrid_xbt_thread_create_wrapper, res, sg_host_self());
			
 
				+#else
			
 
				 #if SIMGRID_VERSION >= 32190 || defined(HAVE_SIMCALL_PROCESS_CREATE) || defined(simcall_process_create)
			
 
				 #ifdef HAVE_SMX_ACTOR_T
			
 
				 	smx_actor_t process STARPU_ATTRIBUTE_UNUSED;
			
@@ -122,6 +140,7 @@ void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code, vo
 
				 #else
			
 
				 	STARPU_ABORT_MSG("Can't run StarPU-Simgrid-MPI with a Simgrid version which does not provide simcall_process_create and does not fix https://github.com/simgrid/simgrid/issues/139 , sorry.");
			
 
				 #endif
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 #endif
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -1,9 +1,9 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011-2019                                Inria
			
 
				- * Copyright (C) 2009-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2017                                     Erwan Leria
			
 
				- * Copyright (C) 2010-2019                                CNRS
			
 
				+ * Copyright (C) 2010-2020                                CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  * Copyright (C) 2011                                     Télécom-SudParis
			
 
				  * Copyright (C) 2016                                     Uppsala University
			
@@ -245,6 +245,12 @@ static int limit_max_submitted_tasks;
 
				 static int watchdog_crash;
			
 
				 static int watchdog_delay;
			
 
				 
			
 
				+/*
			
 
				+ * Function to call when watchdog detects that no task has finished for more than STARPU_WATCHDOG_TIMEOUT seconds
			
 
				+ */
			
 
				+static void (*watchdog_hook)(void *) = NULL;
			
 
				+static void * watchdog_hook_arg = NULL;
			
 
				+
			
 
				 #define _STARPU_TASK_MAGIC 42
			
 
				 
			
 
				 /* Called once at starpu_init */
			
@@ -788,7 +794,7 @@ static int _starpu_task_submit_head(struct starpu_task *task)
 
				 					  "Codelet %p has too many buffers (%d vs max %d). Either use --enable-maxbuffers configure option to increase the max, or use dyn_handles instead of handles.",
			
 
				 					  task->cl, STARPU_TASK_GET_NBUFFERS(task), STARPU_NMAXBUFS);
			
 
				 
			
 
				-		if (task->dyn_handles)
			
 
				+		if (STARPU_UNLIKELY(task->dyn_handles))
			
 
				 		{
			
 
				 			_STARPU_MALLOC(task->dyn_interfaces, nbuffers * sizeof(void *));
			
 
				 		}
			
@@ -821,7 +827,7 @@ static int _starpu_task_submit_head(struct starpu_task *task)
 
				 		}
			
 
				 
			
 
				 		/* Check the type of worker(s) required by the task exist */
			
 
				-		if (!_starpu_worker_exists(task))
			
 
				+		if (STARPU_UNLIKELY(!_starpu_worker_exists(task)))
			
 
				 		{
			
 
				 			_STARPU_LOG_OUT_TAG("ENODEV");
			
 
				 			return -ENODEV;
			
@@ -830,7 +836,7 @@ static int _starpu_task_submit_head(struct starpu_task *task)
 
				 		/* In case we require that a task should be explicitely
			
 
				 		 * executed on a specific worker, we make sure that the worker
			
 
				 		 * is able to execute this task.  */
			
 
				-		if (task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0))
			
 
				+		if (STARPU_UNLIKELY(task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0)))
			
 
				 		{
			
 
				 			_STARPU_LOG_OUT_TAG("ENODEV");
			
 
				 			return -ENODEV;
			
@@ -932,7 +938,7 @@ int _starpu_task_submit(struct starpu_task *task, int nodeps)
 
				 			_starpu_detect_implicit_data_deps(task);
			
 
				 	}
			
 
				 
			
 
				-	if (bundle)
			
 
				+	if (STARPU_UNLIKELY(bundle))
			
 
				 	{
			
 
				 		/* We need to make sure that models for other tasks of the
			
 
				 		 * bundle are also loaded, so the scheduler can estimate the
			
@@ -967,7 +973,7 @@ int _starpu_task_submit(struct starpu_task *task, int nodeps)
 
				 	 * dependency. */
			
 
				 	task->status = STARPU_TASK_BLOCKED;
			
 
				 
			
 
				-	if (profiling)
			
 
				+	if (STARPU_UNLIKELY(profiling))
			
 
				 		_starpu_clock_gettime(&info->submit_time);
			
 
				 
			
 
				 	ret = _starpu_submit_job(j, nodeps);
			
@@ -1563,14 +1569,18 @@ static void *watchdog_func(void *arg)
 
				 		if (!config->watchdog_ok && last_nsubmitted
			
 
				 				&& last_nsubmitted == starpu_task_nsubmitted())
			
 
				 		{
			
 
				-			_STARPU_MSG("The StarPU watchdog detected that no task finished for %fs (can be configured through STARPU_WATCHDOG_TIMEOUT)\n",
			
 
				-				    timeout);
			
 
				+			if (watchdog_hook == NULL)
			
 
				+				_STARPU_MSG("The StarPU watchdog detected that no task finished for %fs (can be configured through STARPU_WATCHDOG_TIMEOUT)\n",
			
 
				+									timeout);
			
 
				+			else
			
 
				+				watchdog_hook(watchdog_hook_arg);
			
 
				+
			
 
				 			if (watchdog_crash)
			
 
				 			{
			
 
				 				_STARPU_MSG("Crashing the process\n");
			
 
				 				raise(SIGABRT);
			
 
				 			}
			
 
				-			else
			
 
				+			else if (watchdog_hook == NULL)
			
 
				 				_STARPU_MSG("Set the STARPU_WATCHDOG_CRASH environment variable if you want to abort the process in such a case\n");
			
 
				 		}
			
 
				 		/* Only shout again after another period */
			
@@ -1580,7 +1590,13 @@ static void *watchdog_func(void *arg)
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				-void _starpu_watchdog_init(void)
			
 
				+void starpu_task_watchdog_set_hook(void (*hook)(void *), void *hook_arg)
			
 
				+{
			
 
				+	watchdog_hook = hook;
			
 
				+	watchdog_hook_arg = hook_arg;
			
 
				+}
			
 
				+
			
 
				+void _starpu_watchdog_init()
			
 
				 {
			
 
				 	struct _starpu_machine_config *config = _starpu_get_machine_config();
			
 
				 	char *timeout_env = starpu_getenv("STARPU_WATCHDOG_TIMEOUT");
			
--- a/src/core/task_bundle.c
+++ b/src/core/task_bundle.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2011-2014                                Université de Bordeaux
			
 
				+ * Copyright (C) 2011-2014, 2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2011,2012                                Inria
			
 
				  * Copyright (C) 2011,2013,2015-2017                      CNRS
			
 
				  * Copyright (C) 2011                                     Télécom-SudParis
			
@@ -29,15 +29,15 @@
 
				 /* Initialize a task bundle */
			
 
				 void starpu_task_bundle_create(starpu_task_bundle_t *bundle)
			
 
				 {
			
 
				-	_STARPU_MALLOC(*bundle, sizeof(struct _starpu_task_bundle));
			
 
				+	_STARPU_CALLOC(*bundle, 1, sizeof(struct _starpu_task_bundle));
			
 
				 
			
 
				-	STARPU_PTHREAD_MUTEX_INIT(&(*bundle)->mutex, NULL);
			
 
				+	STARPU_PTHREAD_MUTEX_INIT0(&(*bundle)->mutex, NULL);
			
 
				 	/* Of course at the beginning a bundle is open,
			
 
				 	 * user can insert and remove tasks from it */
			
 
				-	(*bundle)->closed = 0;
			
 
				+	//(*bundle)->closed = 0;
			
 
				 
			
 
				 	/* Start with an empty list */
			
 
				-	(*bundle)->list = NULL;
			
 
				+	//(*bundle)->list = NULL;
			
 
				 
			
 
				 }
			
 
				 
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011-2017                                Inria
			
 
				- * Copyright (C) 2009-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2017, 2019                                CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  * Copyright (C) 2016                                     Uppsala University
			
@@ -21,6 +21,9 @@
 
				 #include <stdlib.h>
			
 
				 #include <stdio.h>
			
 
				 #include <common/config.h>
			
 
				+#ifdef HAVE_UNISTD_H
			
 
				+#include <unistd.h>
			
 
				+#endif
			
 
				 #include <core/workers.h>
			
 
				 #include <core/debug.h>
			
 
				 #include <core/topology.h>
			
@@ -2030,12 +2033,15 @@ int _starpu_bind_thread_on_cpu(int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid S
 
				 			 (previous >= 0 && previous == workerid) ||
			
 
				 			 (name && cpu_name[cpuid] && !strcmp(name, cpu_name[cpuid])) ) )
			
 
				 		{
			
 
				+			char hostname[65];
			
 
				+			gethostname(hostname, sizeof(hostname));
			
 
				+
			
 
				 			if (previous == STARPU_ACTIVETHREAD)
			
 
				-				_STARPU_DISP("Warning: active thread %s was already bound to PU %d\n", cpu_name[cpuid], cpuid);
			
 
				+				_STARPU_DISP("[%s] Warning: active thread %s was already bound to PU %d\n", hostname, cpu_name[cpuid], cpuid);
			
 
				 			else if (previous == STARPU_NONACTIVETHREAD)
			
 
				-				_STARPU_DISP("Warning: non-active thread %s was already bound to PU %d\n", cpu_name[cpuid], cpuid);
			
 
				+				_STARPU_DISP("[%s] Warning: non-active thread %s was already bound to PU %d\n", hostname, cpu_name[cpuid], cpuid);
			
 
				 			else
			
 
				-				_STARPU_DISP("Warning: worker %d was already bound to PU %d\n", previous, cpuid);
			
 
				+				_STARPU_DISP("[%s] Warning: worker %d was already bound to PU %d\n", hostname, previous, cpuid);
			
 
				 
			
 
				 			if (workerid == STARPU_ACTIVETHREAD)
			
 
				 				_STARPU_DISP("and we were told to also bind active thread %s to it.\n", name);
			
@@ -2048,7 +2054,7 @@ int _starpu_bind_thread_on_cpu(int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid S
 
				 
			
 
				 			if (workerid >= 0)
			
 
				 				/* This shouldn't happen for workers */
			
 
				-				_STARPU_DISP("Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported %d cores and %d threads, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler?\n", config->topology.nhwcpus, config->topology.nhwpus);
			
 
				+				_STARPU_DISP("[%s] Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported %d cores and %d threads, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler?\n", hostname, config->topology.nhwcpus, config->topology.nhwpus);
			
 
				 			ret = -1;
			
 
				 		}
			
 
				 		else
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011-2017,2019                           Inria
			
 
				- * Copyright (C) 2008-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2008-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2019                                CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  * Copyright (C) 2016                                     Uppsala University
			
@@ -1202,7 +1202,7 @@ int _starpu_get_catch_signals(void);
 
				 static inline int _starpu_perf_counter_paused(void) 
			
 
				 {
			
 
				 	STARPU_RMB();
			
 
				-	return _starpu_config.perf_counter_pause_depth > 0;
			
 
				+	return STARPU_UNLIKELY(_starpu_config.perf_counter_pause_depth > 0);
			
 
				 }
			
 
				 
			
 
				 /* @}*/
			
--- a/src/datawizard/filters.c
+++ b/src/datawizard/filters.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011,2012,2016,2017                      Inria
			
 
				- * Copyright (C) 2008-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2008-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2010                                     Mehdi Juhoor
			
 
				  * Copyright (C) 2010-2013,2015-2019                      CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
@@ -228,24 +228,29 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 
				 		else
			
 
				 			ops = initial_handle->ops;
			
 
				 
			
 
				+		/* As most of the fields must be initialized at NULL, let's put
			
 
				+		 * 0 everywhere */
			
 
				+		memset(child, 0, sizeof(*child));
			
 
				 		_starpu_data_handle_init(child, ops, initial_handle->mf_node);
			
 
				 
			
 
				-		child->nchildren = 0;
			
 
				-		child->nplans = 0;
			
 
				-		child->switch_cl = NULL;
			
 
				-		child->partitioned = 0;
			
 
				-		child->readonly = 0;
			
 
				+		//child->nchildren = 0;
			
 
				+		//child->nplans = 0;
			
 
				+		//child->switch_cl = NULL;
			
 
				+		//child->partitioned = 0;
			
 
				+		//child->readonly = 0;
			
 
				 		child->active = inherit_state;
			
 
				-		child->active_ro = 0;
			
 
				-                child->mpi_data = NULL;
			
 
				+		//child->active_ro = 0;
			
 
				+                //child->mpi_data = NULL;
			
 
				 		child->root_handle = initial_handle->root_handle;
			
 
				 		child->father_handle = initial_handle;
			
 
				-		child->active_children = NULL;
			
 
				-		child->active_readonly_children = NULL;
			
 
				-		child->nactive_readonly_children = 0;
			
 
				+		//child->active_children = NULL;
			
 
				+		//child->active_readonly_children = NULL;
			
 
				+		//child->nactive_readonly_children = 0;
			
 
				 		child->nsiblings = nparts;
			
 
				 		if (inherit_state)
			
 
				-			child->siblings = NULL;
			
 
				+		{
			
 
				+			//child->siblings = NULL;
			
 
				+		}
			
 
				 		else
			
 
				 			child->siblings = childrenp;
			
 
				 		child->sibling_index = i;
			
@@ -258,31 +263,31 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 
				 		/* initialize the chunk lock */
			
 
				 		_starpu_data_requester_prio_list_init(&child->req_list);
			
 
				 		_starpu_data_requester_prio_list_init(&child->reduction_req_list);
			
 
				-		child->reduction_tmp_handles = NULL;
			
 
				-		child->write_invalidation_req = NULL;
			
 
				-		child->refcnt = 0;
			
 
				-		child->unlocking_reqs = 0;
			
 
				-		child->busy_count = 0;
			
 
				-		child->busy_waiting = 0;
			
 
				-		STARPU_PTHREAD_MUTEX_INIT(&child->busy_mutex, NULL);
			
 
				-		STARPU_PTHREAD_COND_INIT(&child->busy_cond, NULL);
			
 
				-		child->reduction_refcnt = 0;
			
 
				+		//child->reduction_tmp_handles = NULL;
			
 
				+		//child->write_invalidation_req = NULL;
			
 
				+		//child->refcnt = 0;
			
 
				+		//child->unlocking_reqs = 0;
			
 
				+		//child->busy_count = 0;
			
 
				+		//child->busy_waiting = 0;
			
 
				+		STARPU_PTHREAD_MUTEX_INIT0(&child->busy_mutex, NULL);
			
 
				+		STARPU_PTHREAD_COND_INIT0(&child->busy_cond, NULL);
			
 
				+		//child->reduction_refcnt = 0;
			
 
				 		_starpu_spin_init(&child->header_lock);
			
 
				 
			
 
				 		child->sequential_consistency = initial_handle->sequential_consistency;
			
 
				 		child->initialized = initial_handle->initialized;
			
 
				 		child->ooc = initial_handle->ooc;
			
 
				 
			
 
				-		STARPU_PTHREAD_MUTEX_INIT(&child->sequential_consistency_mutex, NULL);
			
 
				+		//STARPU_PTHREAD_MUTEX_INIT(&child->sequential_consistency_mutex, NULL);
			
 
				 		child->last_submitted_mode = STARPU_R;
			
 
				-		child->last_sync_task = NULL;
			
 
				-		child->last_submitted_accessors.task = NULL;
			
 
				+		//child->last_sync_task = NULL;
			
 
				+		//child->last_submitted_accessors.task = NULL;
			
 
				 		child->last_submitted_accessors.next = &child->last_submitted_accessors;
			
 
				 		child->last_submitted_accessors.prev = &child->last_submitted_accessors;
			
 
				-		child->post_sync_tasks = NULL;
			
 
				+		//child->post_sync_tasks = NULL;
			
 
				 		/* Tell helgrind that the race in _starpu_unlock_post_sync_tasks is fine */
			
 
				 		STARPU_HG_DISABLE_CHECKING(child->post_sync_tasks_cnt);
			
 
				-		child->post_sync_tasks_cnt = 0;
			
 
				+		//child->post_sync_tasks_cnt = 0;
			
 
				 
			
 
				 		/* The methods used for reduction are propagated to the
			
 
				 		 * children. */
			
@@ -290,17 +295,19 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 
				 		child->init_cl = initial_handle->init_cl;
			
 
				 
			
 
				 #ifdef STARPU_USE_FXT
			
 
				-		child->last_submitted_ghost_sync_id_is_valid = 0;
			
 
				-		child->last_submitted_ghost_sync_id = 0;
			
 
				-		child->last_submitted_ghost_accessors_id = NULL;
			
 
				+		//child->last_submitted_ghost_sync_id_is_valid = 0;
			
 
				+		//child->last_submitted_ghost_sync_id = 0;
			
 
				+		//child->last_submitted_ghost_accessors_id = NULL;
			
 
				 #endif
			
 
				 
			
 
				 		if (_starpu_global_arbiter)
			
 
				 			/* Just for testing purpose */
			
 
				 			starpu_data_assign_arbiter(child, _starpu_global_arbiter);
			
 
				 		else
			
 
				-			child->arbiter = NULL;
			
 
				-		_starpu_data_requester_prio_list_init(&child->arbitered_req_list);
			
 
				+		{
			
 
				+			//child->arbiter = NULL;
			
 
				+		}
			
 
				+		_starpu_data_requester_prio_list_init0(&child->arbitered_req_list);
			
 
				 
			
 
				 		for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 		{
			
@@ -317,16 +324,20 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 
				 			if (inherit_state || !initial_replicate->automatically_allocated)
			
 
				 				child_replicate->allocated = initial_replicate->allocated;
			
 
				 			else
			
 
				-				child_replicate->allocated = 0;
			
 
				+			{
			
 
				+				//child_replicate->allocated = 0;
			
 
				+			}
			
 
				 			/* Do not allow memory reclaiming within the child for parent bits */
			
 
				-			child_replicate->automatically_allocated = 0;
			
 
				-			child_replicate->refcnt = 0;
			
 
				+			//child_replicate->automatically_allocated = 0;
			
 
				+			//child_replicate->refcnt = 0;
			
 
				 			child_replicate->memory_node = node;
			
 
				-			child_replicate->relaxed_coherency = 0;
			
 
				+			//child_replicate->relaxed_coherency = 0;
			
 
				 			if (inherit_state)
			
 
				 				child_replicate->initialized = initial_replicate->initialized;
			
 
				 			else
			
 
				-				child_replicate->initialized = 0;
			
 
				+			{
			
 
				+				//child_replicate->initialized = 0;
			
 
				+			}
			
 
				 
			
 
				 			/* update the interface */
			
 
				 			void *initial_interface = starpu_data_get_interface_on_node(initial_handle, node);
			
@@ -336,8 +347,8 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 
				 			f->filter_func(initial_interface, child_interface, f, i, nparts);
			
 
				 		}
			
 
				 
			
 
				-		child->per_worker = NULL;
			
 
				-		child->user_data = NULL;
			
 
				+		//child->per_worker = NULL;
			
 
				+		//child->user_data = NULL;
			
 
				 
			
 
				 		/* We compute the size and the footprint of the child once and
			
 
				 		 * store it in the handle */
			
--- a/src/datawizard/interfaces/bcsr_filters.c
+++ b/src/datawizard/interfaces/bcsr_filters.c
@@ -87,7 +87,8 @@ void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_inte
 
				 	if (bcsr_father->nzval)
			
 
				 	{
			
 
				 		uint8_t *nzval = (uint8_t *)(bcsr_father->nzval);
			
 
				-		matrix_child->ptr = (uintptr_t)&nzval[firstentry + ptr_offset];
			
 
				+		matrix_child->dev_handle = matrix_child->ptr = (uintptr_t)&nzval[firstentry + ptr_offset];
			
 
				+		matrix_child->offset = 0;
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/src/datawizard/interfaces/data_interface.c
+++ b/src/datawizard/interfaces/data_interface.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011-2017                                Inria
			
 
				- * Copyright (C) 2009-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2019                                CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -262,69 +262,69 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 
				 	STARPU_ASSERT(handle);
			
 
				 
			
 
				 	/* initialize the new lock */
			
 
				-	_starpu_data_requester_prio_list_init(&handle->req_list);
			
 
				-	handle->refcnt = 0;
			
 
				-	handle->unlocking_reqs = 0;
			
 
				-	handle->busy_count = 0;
			
 
				-	handle->busy_waiting = 0;
			
 
				-	STARPU_PTHREAD_MUTEX_INIT(&handle->busy_mutex, NULL);
			
 
				-	STARPU_PTHREAD_COND_INIT(&handle->busy_cond, NULL);
			
 
				+	_starpu_data_requester_prio_list_init0(&handle->req_list);
			
 
				+	//handle->refcnt = 0;
			
 
				+	//handle->unlocking_reqs = 0;
			
 
				+	//handle->busy_count = 0;
			
 
				+	//handle->busy_waiting = 0;
			
 
				+	STARPU_PTHREAD_MUTEX_INIT0(&handle->busy_mutex, NULL);
			
 
				+	STARPU_PTHREAD_COND_INIT0(&handle->busy_cond, NULL);
			
 
				 	_starpu_spin_init(&handle->header_lock);
			
 
				 
			
 
				 	/* first take care to properly lock the data */
			
 
				 	_starpu_spin_lock(&handle->header_lock);
			
 
				 
			
 
				 	/* there is no hierarchy yet */
			
 
				-	handle->nchildren = 0;
			
 
				-	handle->nplans = 0;
			
 
				-	handle->switch_cl = NULL;
			
 
				-	handle->partitioned = 0;
			
 
				-	handle->readonly = 0;
			
 
				+	//handle->nchildren = 0;
			
 
				+	//handle->nplans = 0;
			
 
				+	//handle->switch_cl = NULL;
			
 
				+	//handle->partitioned = 0;
			
 
				+	//handle->readonly = 0;
			
 
				 	handle->active = 1;
			
 
				-	handle->active_ro = 0;
			
 
				+	//handle->active_ro = 0;
			
 
				 	handle->root_handle = handle;
			
 
				-	handle->father_handle = NULL;
			
 
				-	handle->active_children = NULL;
			
 
				-	handle->active_readonly_children = NULL;
			
 
				-	handle->nactive_readonly_children = 0;
			
 
				-	handle->nsiblings = 0;
			
 
				-	handle->siblings = NULL;
			
 
				-	handle->sibling_index = 0; /* could be anything for the root */
			
 
				+	//handle->father_handle = NULL;
			
 
				+	//handle->active_children = NULL;
			
 
				+	//handle->active_readonly_children = NULL;
			
 
				+	//handle->nactive_readonly_children = 0;
			
 
				+	//handle->nsiblings = 0;
			
 
				+	//handle->siblings = NULL;
			
 
				+	//handle->sibling_index = 0; /* could be anything for the root */
			
 
				 	handle->depth = 1; /* the tree is just a node yet */
			
 
				-        handle->mpi_data = NULL; /* invalid until set */
			
 
				+        //handle->mpi_data = NULL; /* invalid until set */
			
 
				 
			
 
				-	handle->is_not_important = 0;
			
 
				+	//handle->is_not_important = 0;
			
 
				 
			
 
				 	handle->sequential_consistency =
			
 
				 		starpu_data_get_default_sequential_consistency_flag();
			
 
				 	handle->initialized = home_node != -1;
			
 
				 	handle->ooc = 1;
			
 
				 
			
 
				-	STARPU_PTHREAD_MUTEX_INIT(&handle->sequential_consistency_mutex, NULL);
			
 
				+	STARPU_PTHREAD_MUTEX_INIT0(&handle->sequential_consistency_mutex, NULL);
			
 
				 	handle->last_submitted_mode = STARPU_R;
			
 
				-	handle->last_sync_task = NULL;
			
 
				-	handle->last_submitted_accessors.task = NULL;
			
 
				+	//handle->last_sync_task = NULL;
			
 
				+	//handle->last_submitted_accessors.task = NULL;
			
 
				 	handle->last_submitted_accessors.next = &handle->last_submitted_accessors;
			
 
				 	handle->last_submitted_accessors.prev = &handle->last_submitted_accessors;
			
 
				-	handle->post_sync_tasks = NULL;
			
 
				+	//handle->post_sync_tasks = NULL;
			
 
				 
			
 
				 	/* Tell helgrind that the race in _starpu_unlock_post_sync_tasks is fine */
			
 
				 	STARPU_HG_DISABLE_CHECKING(handle->post_sync_tasks_cnt);
			
 
				-	handle->post_sync_tasks_cnt = 0;
			
 
				+	//handle->post_sync_tasks_cnt = 0;
			
 
				 
			
 
				 	/* By default, there are no methods available to perform a reduction */
			
 
				-	handle->redux_cl = NULL;
			
 
				-	handle->init_cl = NULL;
			
 
				+	//handle->redux_cl = NULL;
			
 
				+	//handle->init_cl = NULL;
			
 
				 
			
 
				-	handle->reduction_refcnt = 0;
			
 
				-	_starpu_data_requester_prio_list_init(&handle->reduction_req_list);
			
 
				-	handle->reduction_tmp_handles = NULL;
			
 
				-	handle->write_invalidation_req = NULL;
			
 
				+	//handle->reduction_refcnt = 0;
			
 
				+	_starpu_data_requester_prio_list_init0(&handle->reduction_req_list);
			
 
				+	//handle->reduction_tmp_handles = NULL;
			
 
				+	//handle->write_invalidation_req = NULL;
			
 
				 
			
 
				 #ifdef STARPU_USE_FXT
			
 
				-	handle->last_submitted_ghost_sync_id_is_valid = 0;
			
 
				-	handle->last_submitted_ghost_sync_id = 0;
			
 
				-	handle->last_submitted_ghost_accessors_id = NULL;
			
 
				+	//handle->last_submitted_ghost_sync_id_is_valid = 0;
			
 
				+	//handle->last_submitted_ghost_sync_id = 0;
			
 
				+	//handle->last_submitted_ghost_accessors_id = NULL;
			
 
				 #endif
			
 
				 
			
 
				 	handle->wt_mask = wt_mask;
			
@@ -339,8 +339,10 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 
				 		/* Just for testing purpose */
			
 
				 		starpu_data_assign_arbiter(handle, _starpu_global_arbiter);
			
 
				 	else
			
 
				-		handle->arbiter = NULL;
			
 
				-	_starpu_data_requester_prio_list_init(&handle->arbitered_req_list);
			
 
				+	{
			
 
				+		//handle->arbiter = NULL;
			
 
				+	}
			
 
				+	_starpu_data_requester_prio_list_init0(&handle->arbitered_req_list);
			
 
				 	handle->last_locality = -1;
			
 
				 
			
 
				 	/* that new data is invalid from all nodes perpective except for the
			
@@ -352,28 +354,28 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 
				 		replicate = &handle->per_node[node];
			
 
				 
			
 
				 		replicate->memory_node = node;
			
 
				-		replicate->relaxed_coherency = 0;
			
 
				-		replicate->refcnt = 0;
			
 
				+		//replicate->relaxed_coherency = 0;
			
 
				+		//replicate->refcnt = 0;
			
 
				 
			
 
				 		if ((int) node == home_node)
			
 
				 		{
			
 
				 			/* this is the home node with the only valid copy */
			
 
				 			replicate->state = STARPU_OWNER;
			
 
				 			replicate->allocated = 1;
			
 
				-			replicate->automatically_allocated = 0;
			
 
				+			//replicate->automatically_allocated = 0;
			
 
				 			replicate->initialized = 1;
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				 			/* the value is not available here yet */
			
 
				 			replicate->state = STARPU_INVALID;
			
 
				-			replicate->allocated = 0;
			
 
				-			replicate->initialized = 0;
			
 
				+			//replicate->allocated = 0;
			
 
				+			//replicate->initialized = 0;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	handle->per_worker = NULL;
			
 
				-	handle->user_data = NULL;
			
 
				+	//handle->per_worker = NULL;
			
 
				+	//handle->user_data = NULL;
			
 
				 
			
 
				 	/* now the data is available ! */
			
 
				 	_starpu_spin_unlock(&handle->header_lock);
			
@@ -451,8 +453,8 @@ int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_int
 
				 	handle->magic = 42;
			
 
				 	handle->ops = interface_ops;
			
 
				 	handle->mf_node = mf_node;
			
 
				-	handle->mpi_data = NULL;
			
 
				-	handle->partition_automatic_disabled = 0;
			
 
				+	//handle->mpi_data = NULL;
			
 
				+	//handle->partition_automatic_disabled = 0;
			
 
				 
			
 
				 	size_t interfacesize = interface_ops->interface_size;
			
 
				 
			
@@ -761,12 +763,12 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
				 		int home_node = handle->home_node;
			
 
				 		if (home_node >= 0)
			
 
				 		{
			
 
				-			struct _starpu_unregister_callback_arg arg;
			
 
				+			struct _starpu_unregister_callback_arg arg = { 0 };
			
 
				 			arg.handle = handle;
			
 
				 			arg.memory_node = (unsigned)home_node;
			
 
				 			arg.terminated = 0;
			
 
				-			STARPU_PTHREAD_MUTEX_INIT(&arg.mutex, NULL);
			
 
				-			STARPU_PTHREAD_COND_INIT(&arg.cond, NULL);
			
 
				+			STARPU_PTHREAD_MUTEX_INIT0(&arg.mutex, NULL);
			
 
				+			STARPU_PTHREAD_COND_INIT0(&arg.cond, NULL);
			
 
				 
			
 
				 			if (!_starpu_attempt_to_submit_data_request_from_apps(handle, STARPU_R,
			
 
				 					_starpu_data_unregister_fetch_data_callback, &arg))
			
--- a/src/datawizard/memstats.c
+++ b/src/datawizard/memstats.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2012,2015-2017                      CNRS
			
 
				- * Copyright (C) 2009,2010,2012,2014                      Université de Bordeaux
			
 
				+ * Copyright (C) 2009,2010,2012,2014,2020                 Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -31,11 +31,11 @@ void _starpu_memory_stats_init_per_node(starpu_data_handle_t handle STARPU_ATTRI
 
				 {
			
 
				 #ifdef STARPU_MEMORY_STATS
			
 
				 	/* Stats initilization */
			
 
				-	handle->memory_stats->direct_access[node]=0;
			
 
				-	handle->memory_stats->loaded_shared[node]=0;
			
 
				-	handle->memory_stats->shared_to_owner[node]=0;
			
 
				-	handle->memory_stats->loaded_owner[node]=0;
			
 
				-	handle->memory_stats->invalidated[node]=0;
			
 
				+	//handle->memory_stats->direct_access[node]=0;
			
 
				+	//handle->memory_stats->loaded_shared[node]=0;
			
 
				+	//handle->memory_stats->shared_to_owner[node]=0;
			
 
				+	//handle->memory_stats->loaded_owner[node]=0;
			
 
				+	//handle->memory_stats->invalidated[node]=0;
			
 
				 #endif
			
 
				 }
			
 
				 
			
--- a/src/datawizard/user_interactions.c
+++ b/src/datawizard/user_interactions.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011-2013,2017                           Inria
			
 
				- * Copyright (C) 2009-2019                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2020                                Université de Bordeaux
			
 
				  * Copyright (C) 2010-2013,2015-2018                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -85,9 +85,9 @@ static inline void _starpu_data_acquire_wrapper_init(struct user_interaction_wra
 
				 	wrapper->handle = handle;
			
 
				 	wrapper->node = node;
			
 
				 	wrapper->mode = mode;
			
 
				-	wrapper->finished = 0;
			
 
				-	STARPU_PTHREAD_COND_INIT(&wrapper->cond, NULL);
			
 
				-	STARPU_PTHREAD_MUTEX_INIT(&wrapper->lock, NULL);
			
 
				+	//wrapper->finished = 0;
			
 
				+	STARPU_PTHREAD_COND_INIT0(&wrapper->cond, NULL);
			
 
				+	STARPU_PTHREAD_MUTEX_INIT0(&wrapper->lock, NULL);
			
 
				 }
			
 
				 
			
 
				 /* Called to signal completion of asynchronous data acquisition */
			
@@ -216,6 +216,7 @@ int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_dat
 
				 	{
			
 
				 		struct starpu_task *new_task;
			
 
				 		struct _starpu_job *pre_sync_job, *post_sync_job;
			
 
				+		int submit_pre_sync = 0;
			
 
				 		wrapper->pre_sync_task = starpu_task_create();
			
 
				 		wrapper->pre_sync_task->name = "_starpu_data_acquire_cb_pre";
			
 
				 		wrapper->pre_sync_task->detach = 1;
			
@@ -237,18 +238,26 @@ int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_dat
 
				 		if (quick)
			
 
				 			pre_sync_job->quick_next = post_sync_job;
			
 
				 
			
 
				-		new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper->pre_sync_task, wrapper->post_sync_task, &_starpu_get_job_associated_to_task(wrapper->post_sync_task)->implicit_dep_slot, handle, mode, sequential_consistency);
			
 
				+		new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper->pre_sync_task, &submit_pre_sync, wrapper->post_sync_task, &_starpu_get_job_associated_to_task(wrapper->post_sync_task)->implicit_dep_slot, handle, mode, sequential_consistency);
			
 
				 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
			
 
				 
			
 
				-		if (new_task)
			
 
				+		if (STARPU_UNLIKELY(new_task))
			
 
				 		{
			
 
				 			int ret = _starpu_task_submit_internally(new_task);
			
 
				 			STARPU_ASSERT(!ret);
			
 
				 		}
			
 
				 
			
 
				-		/* TODO detect if this is superflous */
			
 
				-		int ret = _starpu_task_submit_internally(wrapper->pre_sync_task);
			
 
				-		STARPU_ASSERT(!ret);
			
 
				+		if (submit_pre_sync)
			
 
				+		{
			
 
				+			int ret = _starpu_task_submit_internally(wrapper->pre_sync_task);
			
 
				+			STARPU_ASSERT(!ret);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			wrapper->pre_sync_task->detach = 0;
			
 
				+			starpu_task_destroy(wrapper->pre_sync_task);
			
 
				+			starpu_data_acquire_cb_pre_sync_callback(wrapper);
			
 
				+		}
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
@@ -360,6 +369,7 @@ int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum star
 
				 	if (sequential_consistency)
			
 
				 	{
			
 
				 		struct starpu_task *new_task;
			
 
				+		int submit_pre_sync = 0;
			
 
				 		wrapper.pre_sync_task = starpu_task_create();
			
 
				 		wrapper.pre_sync_task->name = "_starpu_data_acquire_pre";
			
 
				 		wrapper.pre_sync_task->detach = 0;
			
@@ -370,18 +380,26 @@ int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum star
 
				 		wrapper.post_sync_task->detach = 1;
			
 
				 		wrapper.post_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE;
			
 
				 
			
 
				-		new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper.pre_sync_task, wrapper.post_sync_task, &_starpu_get_job_associated_to_task(wrapper.post_sync_task)->implicit_dep_slot, handle, mode, sequential_consistency);
			
 
				+		new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper.pre_sync_task, &submit_pre_sync, wrapper.post_sync_task, &_starpu_get_job_associated_to_task(wrapper.post_sync_task)->implicit_dep_slot, handle, mode, sequential_consistency);
			
 
				 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
			
 
				-		if (new_task)
			
 
				+
			
 
				+		if (STARPU_UNLIKELY(new_task))
			
 
				 		{
			
 
				 			int ret = _starpu_task_submit_internally(new_task);
			
 
				 			STARPU_ASSERT(!ret);
			
 
				 		}
			
 
				 
			
 
				-		/* TODO detect if this is superflous */
			
 
				-		wrapper.pre_sync_task->synchronous = 1;
			
 
				-		int ret = _starpu_task_submit_internally(wrapper.pre_sync_task);
			
 
				-		STARPU_ASSERT(!ret);
			
 
				+		if (submit_pre_sync)
			
 
				+		{
			
 
				+			wrapper.pre_sync_task->synchronous = 1;
			
 
				+			int ret = _starpu_task_submit_internally(wrapper.pre_sync_task);
			
 
				+			STARPU_ASSERT(!ret);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			wrapper.pre_sync_task->detach = 0;
			
 
				+			starpu_task_destroy(wrapper.pre_sync_task);
			
 
				+		}
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
--- a/src/drivers/mpi/driver_mpi_source.h
+++ b/src/drivers/mpi/driver_mpi_source.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2016,2017                                Inria
			
 
				  * Copyright (C) 2017,2019                                CNRS
			
 
				- * Copyright (C) 2017                                     Université de Bordeaux
			
 
				+ * Copyright (C) 2017,2020                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -50,13 +50,13 @@ int _starpu_mpi_copy_mpi_to_ram_async(void *src, unsigned src_node, void *dst, u
 
				 int _starpu_mpi_copy_ram_to_mpi_async(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size, void * event);
			
 
				 int _starpu_mpi_copy_sink_to_sink_async(void *src, unsigned src_node, void *dst, unsigned dst_node, size_t size, void * event);
			
 
				 
			
 
				-int _starpu_mpi_copy_data_from_mpi_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-int _starpu_mpi_copy_data_from_mpi_to_mpi(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				-int _starpu_mpi_copy_data_from_cpu_to_mpi(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
			
 
				+int _starpu_mpi_copy_interface_from_mpi_to_cpu(starpu_interface_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_interface_request *req);
			
 
				+int _starpu_mpi_copy_interface_from_mpi_to_mpi(starpu_interface_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_interface_request *req);
			
 
				+int _starpu_mpi_copy_interface_from_cpu_to_mpi(starpu_interface_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_interface_request *req);
			
 
				 
			
 
				-int _starpu_mpi_copy_interface_from_mpi_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				-int _starpu_mpi_copy_interface_from_mpi_to_mpi(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				-int _starpu_mpi_copy_interface_from_cpu_to_mpi(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_mpi_copy_data_from_mpi_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_mpi_copy_data_from_mpi_to_mpi(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				+int _starpu_mpi_copy_data_from_cpu_to_mpi(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
			
 
				 
			
 
				 int _starpu_mpi_is_direct_access_supported(unsigned node, unsigned handling_node);
			
 
				 uintptr_t _starpu_mpi_malloc_on_node(unsigned dst_node, size_t size, int flags);
			
--- a/src/profiling/bound.c
+++ b/src/profiling/bound.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011,2012,2014                           Inria
			
 
				- * Copyright (C) 2010-2017,2019                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2017,2019-2020                      Université de Bordeaux
			
 
				  * Copyright (C) 2010-2017,2019                           CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  * Copyright (C) 2011                                     Télécom-SudParis
			
@@ -257,7 +257,7 @@ static void new_task(struct _starpu_job *j)
 
				 /* A new task was submitted, record it */
			
 
				 void _starpu_bound_record(struct _starpu_job *j)
			
 
				 {
			
 
				-	if (!_starpu_bound_recording)
			
 
				+	if (STARPU_LIKELY(!_starpu_bound_recording))
			
 
				 		return;
			
 
				 
			
 
				 	if (!good_job(j))
			
--- a/src/util/openmp_runtime_support.c
+++ b/src/util/openmp_runtime_support.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2014-2018                                Inria
			
 
				  * Copyright (C) 2014-2017,2019                           CNRS
			
 
				- * Copyright (C) 2015,2017,2019                           Université de Bordeaux
			
 
				+ * Copyright (C) 2015,2017,2019-2020                      Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -319,7 +319,7 @@ static struct starpu_omp_region *create_omp_region_struct(struct starpu_omp_regi
 
				 	_STARPU_CALLOC(region, 1, sizeof(*region));
			
 
				 	region->parent_region = parent_region;
			
 
				 	region->owner_device = owner_device;
			
 
				-	starpu_omp_thread_list_init(&region->thread_list);
			
 
				+	starpu_omp_thread_list_init0(&region->thread_list);
			
 
				 
			
 
				 	_starpu_spin_init(&region->lock);
			
 
				 	_starpu_spin_init(&region->registered_handles_lock);
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -32,6 +32,9 @@ EXTRA_DIST =					\
 
				 	regression/profiles.in			\
			
 
				 	regression/regression.sh.in		\
			
 
				 	regression/profiles.build.only.in	\
			
 
				+	microbenchs/tasks_data_overhead.sh	\
			
 
				+	microbenchs/sync_tasks_data_overhead.sh	\
			
 
				+	microbenchs/async_tasks_data_overhead.sh	\
			
 
				 	microbenchs/tasks_size_overhead.sh	\
			
 
				 	microbenchs/tasks_size_overhead_sched.sh	\
			
 
				 	microbenchs/tasks_size_overhead_scheds.sh	\
			
@@ -396,11 +399,18 @@ examplebin_PROGRAMS = \
 
				 	microbenchs/tasks_size_overhead		\
			
 
				 	microbenchs/local_pingpong
			
 
				 examplebin_SCRIPTS = \
			
 
				+	microbenchs/tasks_data_overhead.sh \
			
 
				+	microbenchs/sync_tasks_data_overhead.sh \
			
 
				+	microbenchs/async_tasks_data_overhead.sh \
			
 
				 	microbenchs/tasks_size_overhead.gp \
			
 
				 	microbenchs/tasks_size_overhead.sh
			
 
				 if !STARPU_SIMGRID
			
 
				 if !STARPU_USE_MPI_MASTER_SLAVE
			
 
				-TESTS += microbenchs/tasks_size_overhead_scheds.sh
			
 
				+TESTS += \
			
 
				+	microbenchs/tasks_data_overhead.sh \
			
 
				+	microbenchs/sync_tasks_data_overhead.sh \
			
 
				+	microbenchs/async_tasks_data_overhead.sh \
			
 
				+	microbenchs/tasks_size_overhead_scheds.sh
			
 
				 endif
			
 
				 endif
			
 
				 
			
--- a/tests/cholesky/sched.sh
+++ b/tests/cholesky/sched.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2008-2011,2014,2019                      Université de Bordeaux
			
 
				+# Copyright (C) 2008-2011,2014,2019-2020                 Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -39,7 +39,7 @@ trace_sched()
 
				 		do
			
 
				 			echo "$iter / $maxiter"
			
 
				 			 echo "$ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null"
			
 
				-			 val=`$ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null`
			
 
				+			 val=`$STARPU_LAUNCH $ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null`
			
 
				 			 echo "$val" >> $filename
			
 
				 		done
			
 
				 	done
			
--- a/tests/cholesky/sched_one_gpu.sh
+++ b/tests/cholesky/sched_one_gpu.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2009-2011,2014,2019                      Université de Bordeaux
			
 
				+# Copyright (C) 2009-2011,2014,2019-2020                 Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -50,7 +50,7 @@ trace_sched()
 
				 		do
			
 
				 			echo "$iter / $maxiter"
			
 
				 			echo "$ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null"
			
 
				-			val=`$ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null`
			
 
				+			val=`$STARPU_LAUNCH $ROOTDIR/examples/cholesky/dw_cholesky $OPTIONS 2> /dev/null`
			
 
				 			echo "$val" >> $filename
			
 
				 			echo "$val"
			
 
				 		done
			
@@ -67,7 +67,7 @@ mkdir -p $TIMINGDIR
 
				 # calibrate
			
 
				 for i in `seq 1 5` 
			
 
				 do
			
 
				-STARPU_SCHED="dm" $ROOTDIR/examples/cholesky/dw_cholesky -nblocks 16 -size 16384 2> /dev/null
			
 
				+STARPU_SCHED="dm" $STARPU_LAUNCH $ROOTDIR/examples/cholesky/dw_cholesky -nblocks 16 -size 16384 2> /dev/null
			
 
				 done
			
 
				 
			
 
				 for sched in $schedlist
			
--- a/tests/cholesky_ctxs/evaluate_expression.sh
+++ b/tests/cholesky_ctxs/evaluate_expression.sh
@@ -3,6 +3,7 @@
 
				 #
			
 
				 # Copyright (C) 2011                                     Inria
			
 
				 # Copyright (C) 2012,2017                                CNRS
			
 
				+# Copyright (C) 2020                                     Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -33,7 +34,7 @@ for s in `seq 1 $nsamples`
 
				 do
			
 
				     echo "$ROOTDIR/examples/$BENCH_NAME $OPTIONS"
			
 
				     
			
 
				-    val=`$ROOTDIR/examples/$BENCH_NAME $OPTIONS`
			
 
				+    val=`$STARPU_LAUNCH $ROOTDIR/examples/$BENCH_NAME $OPTIONS`
			
 
				     
			
 
				     echo "$val"
			
 
				     
			
--- a/tests/coverage/coverage.sh
+++ b/tests/coverage/coverage.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2010,2011,2014,2017                      Université de Bordeaux
			
 
				+# Copyright (C) 2010,2011,2014,2017,2020                 Université de Bordeaux
			
 
				 # Copyright (C) 2010,2011,2015,2017                      CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -29,109 +29,109 @@ apps()
 
				 {
			
 
				     if [ -f $exampledir/basic_examples/block ] ; then
			
 
				 	echo "block opencl"
			
 
				-	STARPU_NCUDA=0 STARPU_NCPUS=0 $exampledir/basic_examples/block
			
 
				+	STARPU_NCUDA=0 STARPU_NCPUS=0 $STARPU_LAUNCH $exampledir/basic_examples/block
			
 
				 	check_success $?
			
 
				     fi
			
 
				 
			
 
				     if [ -f $exampledir/basic_examples/variable ] ; then
			
 
				 	echo "variable opencl"
			
 
				-	STARPU_NCUDA=0 STARPU_NCPUS=0 $exampledir/basic_examples/variable 100
			
 
				+	STARPU_NCUDA=0 STARPU_NCPUS=0 $STARPU_LAUNCH $exampledir/basic_examples/variable 100
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "variable no worker"
			
 
				-	STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $exampledir/basic_examples/variable
			
 
				+	STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $STARPU_LAUNCH $exampledir/basic_examples/variable
			
 
				 	check_success $?
			
 
				     fi
			
 
				 
			
 
				     if [ -f $exampledir/incrementer/incrementer ] ; then
			
 
				 	echo "incrementer opencl"
			
 
				-	STARPU_NCUDA=0 STARPU_NCPUS=0 $exampledir/incrementer/incrementer 10
			
 
				+	STARPU_NCUDA=0 STARPU_NCPUS=0 $STARPU_LAUNCH $exampledir/incrementer/incrementer 10
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "incrementer no worker"
			
 
				-	STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $exampledir/incrementer/incrementer
			
 
				+	STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $STARPU_LAUNCH $exampledir/incrementer/incrementer
			
 
				 	check_success $?
			
 
				     fi
			
 
				 
			
 
				     if [ -f $exampledir/tag_example/tag_example ] ; then
			
 
				 	echo "tag_example"
			
 
				-	$exampledir/tag_example/tag_example -iter 64 -i 128 -j 24
			
 
				+	$STARPU_LAUNCH $exampledir/tag_example/tag_example -iter 64 -i 128 -j 24
			
 
				 	check_success $?
			
 
				     fi
			
 
				 
			
 
				     if [ -f $exampledir/tag_example/tag_example2 ] ; then
			
 
				 	echo "tag_example2"
			
 
				-	$exampledir/tag_example/tag_example2 -iter 64 -i 128
			
 
				+	$STARPU_LAUNCH $exampledir/tag_example/tag_example2 -iter 64 -i 128
			
 
				 	check_success $?
			
 
				     fi
			
 
				 
			
 
				     if [ -f $exampledir/cholesky/dw_cholesky ] ; then
			
 
				 	echo "chol.dm"
			
 
				-	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $exampledir/cholesky/dw_cholesky -pin
			
 
				+	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "chol.dmda"
			
 
				-	STARPU_CALIBRATE=1 STARPU_SCHED="dmda" $exampledir/cholesky/dw_cholesky -pin
			
 
				+	STARPU_CALIBRATE=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "chol.cpu"
			
 
				-	STARPU_CALIBRATE=1 STARPU_NCUDA=0 STARPU_SCHED="dm" $exampledir/cholesky/dw_cholesky -pin
			
 
				+	STARPU_CALIBRATE=1 STARPU_NCUDA=0 STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "chol.gpu"
			
 
				-	STARPU_CALIBRATE=1 STARPU_NCPUS=0 STARPU_SCHED="dm" $exampledir/cholesky/dw_cholesky -pin
			
 
				+	STARPU_CALIBRATE=1 STARPU_NCPUS=0 STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin
			
 
				 	check_success $?
			
 
				     fi
			
 
				 
			
 
				     if [ -f $exampledir/heat/heat ] ; then
			
 
				 	echo "heat.dm.4k.calibrate.v2"
			
 
				-	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $exampledir/heat/heat -ntheta 66 -nthick 66 -nblocks 4 -v2 -pin
			
 
				+	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 66 -nblocks 4 -v2 -pin
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "heat.dm.8k.calibrate.v2"
			
 
				-	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -v2 -pin
			
 
				+	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -v2 -pin
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "heat.dm.8k.no.pin.v2"
			
 
				-	STARPU_SCHED="dm" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -v2
			
 
				+	STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -v2
			
 
				 	check_success $?
			
 
				 
			
 
				 #	echo "heat.dm.8k.v2.no.prio"
			
 
				-#	STARPU_SCHED="no-prio" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
			
 
				+#	STARPU_SCHED="no-prio" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
			
 
				 #	check_success $?
			
 
				 
			
 
				 	echo "heat.dm.8k.v2.random"
			
 
				-	STARPU_SCHED="random" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
			
 
				+	STARPU_SCHED="random" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "heat.dm.8k.v2"
			
 
				-	STARPU_SCHED="dm" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
			
 
				+	STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "heat.greedy.8k.v2"
			
 
				-	STARPU_SCHED="greedy" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
			
 
				+	STARPU_SCHED="greedy" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "heat.8k.cg"
			
 
				-	$exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 -cg
			
 
				+	$STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 -cg
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "heat.dm.8k.cg"
			
 
				-	STARPU_SCHED="dm" $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 -cg
			
 
				+	STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 -cg
			
 
				 	check_success $?
			
 
				     fi
			
 
				 
			
 
				     if [ -f $exampledir/mult/dw_mult_no_stride ] ; then
			
 
				 	echo "mult.dm.common"
			
 
				-	STARPU_SCHED="dm" $exampledir/mult/dw_mult_no_stride -nblocks 4 -x 4096 -y 4096 -z 1024 -pin -common-model
			
 
				+	STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/mult/dw_mult_no_stride -nblocks 4 -x 4096 -y 4096 -z 1024 -pin -common-model
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "mult.dm"
			
 
				-	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $exampledir/mult/dw_mult_no_stride -nblocks 8 -x 4096 -y 4096 -z 4096 -pin
			
 
				+	STARPU_CALIBRATE=1 STARPU_SCHED="dm" $STARPU_LAUNCH $exampledir/mult/dw_mult_no_stride -nblocks 8 -x 4096 -y 4096 -z 4096 -pin
			
 
				 	check_success $?
			
 
				 
			
 
				 	echo "mult.dmda"
			
 
				-	STARPU_CALIBRATE=1 STARPU_SCHED="dmda" $exampledir/mult/dw_mult_no_stride -nblocks 8 -x 4096 -y 4096 -z 4096 -pin
			
 
				+	STARPU_CALIBRATE=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $exampledir/mult/dw_mult_no_stride -nblocks 8 -x 4096 -y 4096 -z 4096 -pin
			
 
				 	check_success $?
			
 
				     fi
			
 
				 }
			
--- a/tests/datawizard/acquire_cb.c
+++ b/tests/datawizard/acquire_cb.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2011,2013,2014,2016                      Université de Bordeaux
			
 
				+ * Copyright (C) 2011,2013,2014,2016, 2020                      Université de Bordeaux
			
 
				  * Copyright (C) 2011-2013                                Inria
			
 
				  * Copyright (C) 2011-2013,2017                           CNRS
			
 
				  *
			
@@ -20,20 +20,27 @@
 
				 #include "../helper.h"
			
 
				 
			
 
				 /*
			
 
				- * Test that when using starpu_data_acquire_cb, the callback is properly called
			
 
				+ * Test that when using starpu_data_acquire_cb, the callback_w is properly called
			
 
				  */
			
 
				 
			
 
				 unsigned token = 0;
			
 
				 starpu_data_handle_t token_handle;
			
 
				 
			
 
				 static
			
 
				-void callback(void *arg)
			
 
				+void callback_w(void *arg)
			
 
				 {
			
 
				 	(void)arg;
			
 
				 	token = 42;
			
 
				         starpu_data_release(token_handle);
			
 
				 }
			
 
				 
			
 
				+static
			
 
				+void callback_r(void *arg)
			
 
				+{
			
 
				+	(void)arg;
			
 
				+        starpu_data_release(token_handle);
			
 
				+}
			
 
				+
			
 
				 int main(int argc, char **argv)
			
 
				 {
			
 
				 	int ret;
			
@@ -42,8 +49,48 @@ int main(int argc, char **argv)
 
				 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				+	starpu_variable_data_register(&token_handle, -1, 0, sizeof(unsigned));
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_unregister(token_handle);
			
 
				+	STARPU_ASSERT(token == 42);
			
 
				+
			
 
				+	token = 0;
			
 
				+
			
 
				+	starpu_variable_data_register(&token_handle, -1, 0, sizeof(unsigned));
			
 
				+	starpu_data_acquire(token_handle, STARPU_W);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_release(token_handle);
			
 
				+	starpu_data_unregister(token_handle);
			
 
				+
			
 
				+	token = 0;
			
 
				+
			
 
				 	starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned));
			
 
				-        starpu_data_acquire_cb(token_handle, STARPU_RW, callback, NULL);
			
 
				+	/* These are getting executed immediately */
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+
			
 
				+	starpu_data_acquire(token_handle, STARPU_W);
			
 
				+	/* These will wait for our relase */
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL);
			
 
				+	starpu_data_release(token_handle);
			
 
				 
			
 
				 	starpu_data_unregister(token_handle);
			
 
				 
			
--- a/tests/datawizard/interfaces/test_interfaces.sh
+++ b/tests/datawizard/interfaces/test_interfaces.sh
@@ -17,7 +17,7 @@
 
				 
			
 
				 for i in bcsr block coo csr matrix multiformat variable vector void
			
 
				 do
			
 
				-    ./tests/datawizard/interfaces/$i/${i}_interface
			
 
				+    $STARPU_LAUNCH ./tests/datawizard/interfaces/$i/${i}_interface
			
 
				     ret=$?
			
 
				     if test "$ret" = "0"
			
 
				     then
			
--- a/tests/datawizard/locality.sh
+++ b/tests/datawizard/locality.sh
@@ -4,7 +4,7 @@
 
				 # Copyright (C) 2018                                     Federal University of Rio Grande do Sul (UFRGS)
			
 
				 # Copyright (C) 2017                                     CNRS
			
 
				 # Copyright (C) 2017                                     Inria
			
 
				-# Copyright (C) 2017,2018-2019                           Université de Bordeaux
			
 
				+# Copyright (C) 2017,2018-2020                           Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -33,8 +33,8 @@ if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
 
				 fi
			
 
				 
			
 
				 test -x $PREFIX/../../tools/starpu_fxt_tool || exit 77
			
 
				-STARPU_SCHED=modular-eager STARPU_FXT_PREFIX=$PREFIX/ $PREFIX/locality
			
 
				-$PREFIX/../../tools/starpu_fxt_tool -memory-states -label-deps -i $PREFIX/prof_file_${USER}_0
			
 
				+STARPU_SCHED=modular-eager STARPU_FXT_PREFIX=$PREFIX/ $STARPU_LAUNCH $PREFIX/locality
			
 
				+$STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_tool -memory-states -label-deps -i $PREFIX/prof_file_${USER}_0
			
 
				 
			
 
				 # Check that they are approved by Grenoble :)
			
 
				 
			
--- a/tests/experiments/bandwidth_cuda/bench_bandwidth.sh
+++ b/tests/experiments/bandwidth_cuda/bench_bandwidth.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2009-2011,2014                           Université de Bordeaux
			
 
				+# Copyright (C) 2009-2011,2014,2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -25,7 +25,7 @@ echo "H -> D"
 
				 for log in `seq 1 13`
			
 
				 do
			
 
				 	size=$((2**$log))
			
 
				-	echo "$size	`./cuda_bandwidth -pin -HtoD -size $size -cpu-ld $size -gpu-ld $size -iter 50`" >> .results/htod-pin.data 
			
 
				+	echo "$size	`$STARPU_LAUNCH ./cuda_bandwidth -pin -HtoD -size $size -cpu-ld $size -gpu-ld $size -iter 50`" >> .results/htod-pin.data 
			
 
				 done
			
 
				 
			
 
				 echo "D -> H"
			
@@ -33,7 +33,7 @@ echo "D -> H"
 
				 for log in `seq 1 13`
			
 
				 do
			
 
				 	size=$((2**$log))
			
 
				-	echo "$size	`./cuda_bandwidth -pin -size $size -cpu-ld $size -gpu-ld $size -iter 50`" >> .results/dtoh-pin.data 
			
 
				+	echo "$size	`$STARPU_LAUNCH ./cuda_bandwidth -pin -size $size -cpu-ld $size -gpu-ld $size -iter 50`" >> .results/dtoh-pin.data 
			
 
				 done
			
 
				 
			
 
				 ./bench_bandwidth.gp
			
@@ -48,7 +48,7 @@ do
 
				 	for log in `seq 1 $stridelog`
			
 
				 	do
			
 
				 		size=$((2**$log))
			
 
				-		echo "$size	`./cuda_bandwidth -pin -HtoD -size $size -cpu-ld $stridesize -gpu-ld $stridesize -iter 50`" >> .results/htod-pin.$stridesize.data 
			
 
				+		echo "$size	`$STARPU_LAUNCH ./cuda_bandwidth -pin -HtoD -size $size -cpu-ld $stridesize -gpu-ld $stridesize -iter 50`" >> .results/htod-pin.$stridesize.data 
			
 
				 	done
			
 
				 done
			
 
				 
			
@@ -62,6 +62,6 @@ do
 
				 	for log in `seq 1 $stridelog`
			
 
				 	do
			
 
				 		size=$((2**$log))
			
 
				-		echo "$size	`./cuda_bandwidth -pin -size $size -cpu-ld $stridesize -gpu-ld $stridesize -iter 50`" >> .results/dtoh-pin.$stridesize.data 
			
 
				+		echo "$size	`$STARPU_LAUNCH ./cuda_bandwidth -pin -size $size -cpu-ld $stridesize -gpu-ld $stridesize -iter 50`" >> .results/dtoh-pin.$stridesize.data 
			
 
				 	done
			
 
				 done
			
--- a/tests/heat/deps.sh
+++ b/tests/heat/deps.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
			
 
				+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -40,7 +40,7 @@ trace_deps()
 
				 		for iter in `seq 1 $maxiter`
			
 
				 		do
			
 
				 			echo "$iter / $maxiter"
			
 
				-			 val=`$ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
			
 
				+			 val=`$STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
			
 
				 			 echo "$val" >> $filename
			
 
				 		done
			
 
				 	done
			
--- a/tests/heat/dmda.sh
+++ b/tests/heat/dmda.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2009-2011,2014, 2019                           Université de Bordeaux
			
 
				+# Copyright (C) 2009-2011,2014,2019-2020                 Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -38,7 +38,7 @@ calibrate_point()
 
				 		export STARPU_SCHED=$strat
			
 
				 		export STARPU_CALIBRATE=1
			
 
				 		export STARPU_PREFETCH=$prefetch
			
 
				-		val=`$ROOTDIR/examples/heat/heat -pin -nblocks $nblocks -size $size -v3 2> /dev/null`
			
 
				+		val=`$STARPU_LAUNCH $ROOTDIR/examples/heat/heat -pin -nblocks $nblocks -size $size -v3 2> /dev/null`
			
 
				 		echo "$val"
			
 
				 	done
			
 
				 
			
@@ -76,14 +76,14 @@ do
 
				 	export STARPU_SCHED="dm"
			
 
				 	export STARPU_CALIBRATE=1
			
 
				 	export STARPU_PREFETCH=1
			
 
				-	valdm=$($ROOTDIR/examples/heat/heat -pin -size $size -nblocks $nblocks -v3 2> logdm)
			
 
				+	valdm=$($STARPU_LAUNCH $ROOTDIR/examples/heat/heat -pin -size $size -nblocks $nblocks -v3 2> logdm)
			
 
				 
			
 
				 	calibrate_point "dmda" $nblocks 1
			
 
				 
			
 
				 	export STARPU_SCHED="dmda"
			
 
				 	export STARPU_CALIBRATE=1
			
 
				 	export STARPU_PREFETCH=1
			
 
				-	valdmda=$($ROOTDIR/examples/heat/heat -pin -size $size -nblocks $nblocks -v3 2> logdmda)
			
 
				+	valdmda=$($STARPU_LAUNCH $ROOTDIR/examples/heat/heat -pin -size $size -nblocks $nblocks -v3 2> logdmda)
			
 
				 	
			
 
				 	dmmiss=`grep "TOTAL MSI" logdm|sed -e "s/.*miss.*[1-9]* (\(.*\) %)/\1/"`
			
 
				 	dmtotal=`grep "TOTAL transfers" logdm|sed -e "s/TOTAL transfers \(.*\) MB/\1/"`
			
--- a/tests/heat/gflops.sh
+++ b/tests/heat/gflops.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
			
 
				+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -49,7 +49,7 @@ measure_heat()
 
				 	for i in `seq 1 $nsample`
			
 
				 	do
			
 
				 		echo "iter $i/$nsample"
			
 
				-		val=`$ROOTDIR/examples/heat -nthick $thick -ntheta $theta -nblocks $nblocks -pin -v2 2>/dev/null`
			
 
				+		val=`$STARPU_LAUNCH $ROOTDIR/examples/heat -nthick $thick -ntheta $theta -nblocks $nblocks -pin -v2 2>/dev/null`
			
 
				 		total=`echo "$val + $total" |bc -l`
			
 
				 	done
			
 
				 
			
--- a/tests/heat/gflops_sched.sh
+++ b/tests/heat/gflops_sched.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
			
 
				+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -51,7 +51,7 @@ measure_heat()
 
				 	for i in `seq 1 $nsample`
			
 
				 	do
			
 
				 		echo "iter $i/$nsample"
			
 
				-		val=`STARPU_SCHED=$policy $ROOTDIR/examples/heat/heat -nthick $thick -ntheta $theta -nblocks $nblocks -pin -v2 2>/dev/null`
			
 
				+		val=`STARPU_SCHED=$policy $STARPU_LAUNCH $ROOTDIR/examples/heat/heat -nthick $thick -ntheta $theta -nblocks $nblocks -pin -v2 2>/dev/null`
			
 
				 		total=`echo "$val + $total" |bc -l`
			
 
				 	done
			
 
				 
			
--- a/tests/heat/granularity.sh
+++ b/tests/heat/granularity.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2008-2011,2014, 2019                           Université de Bordeaux
			
 
				+# Copyright (C) 2008-2011,2014,2019-2020                 Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -62,7 +62,7 @@ trace_granularity()
 
				 		for iter in `seq 1 $maxiter`
			
 
				 		do
			
 
				 			echo "$iter / $maxiter"
			
 
				-			 val=`STARPU_NCPUS=8 STARPU_NCUDA=3 STARPU_SCHED="dmda" STARPU_PREFETCH=1 STARPU_CALIBRATE=1 $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
			
 
				+			 val=`STARPU_NCPUS=8 STARPU_NCUDA=3 STARPU_SCHED="dmda" STARPU_PREFETCH=1 STARPU_CALIBRATE=1 $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
			
 
				 			 echo "$val"
			
 
				 			 echo "$val" >> $filename
			
 
				 		done
			
@@ -103,7 +103,7 @@ trace_granularity_hybrid()
 
				 		for iter in `seq 1 $maxiter`
			
 
				 		do
			
 
				 			echo "$iter / $maxiter"
			
 
				-			 val=`STARPU_SCHED="dmda" STARPU_PREFETCH=1 STARPU_CALIBRATE=1 $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
			
 
				+			 val=`STARPU_SCHED="dmda" STARPU_PREFETCH=1 STARPU_CALIBRATE=1 $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
			
 
				 			 echo "$val"
			
 
				 			 echo "$val" >> $filename
			
 
				 		done
			
@@ -123,12 +123,12 @@ calibrate_grain()
 
				 
			
 
				 	OPTIONS="-pin -nblocks $blocks -size $size -v3"
			
 
				 
			
 
				-	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_SCHED="dm" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null 
			
 
				-	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				-	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				-	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				-	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				-	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				+	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_SCHED="dm" $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null 
			
 
				+	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				+	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				+	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				+	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				+	STARPU_NCUDA=3 STARPU_NCPUS=8 STARPU_CALIBRATE=1 STARPU_PREFETCH=1 STARPU_SCHED="dmda" $STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				 }
			
 
				 
			
 
				 mkdir -p $TIMINGDIR
			
--- a/tests/heat/heat.sh
+++ b/tests/heat/heat.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
			
 
				+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -47,7 +47,7 @@ do
 
				 			export STARPU_NCUDA=$cublas
			
 
				 
			
 
				 			echo "size $size cpus $cpus cublas $cublas blocks $blocks" 
			
 
				-			$ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $BLOCKS 2>/dev/null| tee $filename
			
 
				+			$STARPU_LAUNCH $ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $BLOCKS 2>/dev/null| tee $filename
			
 
				 		done
			
 
				 	done
			
 
				 
			
@@ -63,7 +63,7 @@ do
 
				 			export STARPU_NCUDA=$cublas
			
 
				 
			
 
				 			echo "size $size cpus $cpus cublas $cublas blocks $blocks" 
			
 
				-			$ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $BLOCKS 2>/dev/null| tee $filename
			
 
				+			$STARPU_LAUNCH $ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $BLOCKS 2>/dev/null| tee $filename
			
 
				 		done
			
 
				 	done
			
 
				 done
			
@@ -90,14 +90,14 @@ do
 
				 
			
 
				 		echo "size $size cpus 4 cublas 0 blocks $blocks"
			
 
				 		filename=$TIMINGDIR/timing.4.0.$size.$blocks
			
 
				-		$ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $blocks 2>/dev/null| tee $filename
			
 
				+		$STARPU_LAUNCH $ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $blocks 2>/dev/null| tee $filename
			
 
				 
			
 
				 		export STARPU_NCPUS=3
			
 
				 		export STARPU_NCUDA=1
			
 
				 
			
 
				 		echo "size $size cpus 3 cublas 1 blocks $blocks"
			
 
				 		filename=$TIMINGDIR/timing.3.1.$size.$blocks
			
 
				-		$ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $blocks 2>/dev/null| tee $filename
			
 
				+		$STARPU_LAUNCH $ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $blocks 2>/dev/null| tee $filename
			
 
				 	done
			
 
				 done
			
 
				 
			
--- a/tests/heat/model_perturbation.sh
+++ b/tests/heat/model_perturbation.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2008-2011,2014, 2019                           Université de Bordeaux
			
 
				+# Copyright (C) 2008-2011,2014,2019-2020                 Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -57,7 +57,7 @@ trace_perturbation()
 
				 		for iter in `seq 1 $nsamples`
			
 
				 		do
			
 
				 			echo "$iter / $nsamples"
			
 
				-			 val=`$ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
			
 
				+			 val=`$STARPU_LAUNCH $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
			
 
				 			 echo "$val" >> $filename
			
 
				 		done
			
 
				 	done
			
--- a/tests/heat/sched.sh
+++ b/tests/heat/sched.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2008-2011,2014, 2019                           Université de Bordeaux
			
 
				+# Copyright (C) 2008-2011,2014, 2019-2020                           Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -38,7 +38,7 @@ calibrate_point()
 
				 		export STARPU_SCHED=$strat
			
 
				 		export STARPU_CALIBRATE=1
			
 
				 		export STARPU_PREFETCH=$prefetch
			
 
				-		val=`$ROOTDIR/examples/heat/heat -pin -nblocks $nblocks -size $size -v3 2> /dev/null`
			
 
				+		val=`$STARPU_LAUNCH $ROOTDIR/examples/heat/heat -pin -nblocks $nblocks -size $size -v3 2> /dev/null`
			
 
				 		echo "$val"
			
 
				 	done
			
 
				 
			
@@ -68,7 +68,7 @@ trace_point()
 
				 		export STARPU_SCHED=$strat
			
 
				 		export STARPU_CALIBRATE=$docalibrate
			
 
				 		export STARPU_PREFETCH=$prefetch
			
 
				-		val=`$ROOTDIR/examples/heat/heat -pin -nblocks $nblocks -size $size -v3  2> /dev/null`
			
 
				+		val=`$STARPU_LAUNCH $ROOTDIR/examples/heat/heat -pin -nblocks $nblocks -size $size -v3  2> /dev/null`
			
 
				 		echo "$val"
			
 
				 		echo "$val" >> $filename
			
 
				 	done
			
--- a/tests/heat/speedup.sh
+++ b/tests/heat/speedup.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
			
 
				+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -41,7 +41,7 @@ do
 
				 		echo "ncpus $cpus size $size"
			
 
				 
			
 
				 		filename=$TIMINGDIR/timing.$cpus.$size
			
 
				-		$ROOTDIR/examples/heat -v2 -pin -nthick 34 -ntheta $(($theta+2)) -nblocks 16 2>/dev/null| tee $filename
			
 
				+		$STARPU_LAUNCH $ROOTDIR/examples/heat -v2 -pin -nthick 34 -ntheta $(($theta+2)) -nblocks 16 2>/dev/null| tee $filename
			
 
				 
			
 
				 		echo "$cpus	`cat $TIMINGDIR/timing.$cpus.$size`	`cat  $TIMINGDIR/timing.1.$size`" >> $TIMINGDIR/speedup.$size
			
 
				 	done
			
--- a/tests/incrementer/speed.sh
+++ b/tests/incrementer/speed.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2010,2011,2014                           Université de Bordeaux
			
 
				+# Copyright (C) 2010,2011,2014,2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -27,7 +27,7 @@ max=20
 
				 
			
 
				 for logi in `seq 0 $max`
			
 
				 do
			
 
				-	$examplebindir/incrementer $i 2> .tmpperf
			
 
				+	$STARPU_LAUNCH $examplebindir/incrementer $i 2> .tmpperf
			
 
				 
			
 
				 	grep "ms" .tmpperf
			
 
				 	grep "ms" .tmpperf | sed -e "s/^\(.*\) elems took \(.*\) ms$/\1	\2/" >> .perftable 
			
--- a/tests/loader.c
+++ b/tests/loader.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011,2012,2017                           Inria
			
 
				  * Copyright (C) 2011-2020                                CNRS
			
 
				- * Copyright (C) 2010,2014-2018                           Université de Bordeaux
			
 
				+ * Copyright (C) 2010,2014-2018,2020                      Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -219,6 +219,8 @@ int main(int argc, char *argv[])
 
				 	char *test_args;
			
 
				 	char *launcher;
			
 
				 	char *launcher_args;
			
 
				+	char *libtool;
			
 
				+	const char *top_builddir = getenv ("top_builddir");
			
 
				 	struct sigaction sa;
			
 
				 	int   ret;
			
 
				 	struct timeval start;
			
@@ -291,6 +293,54 @@ int main(int argc, char *argv[])
 
				 	if (launcher_args)
			
 
				 		launcher_args=strdup(launcher_args);
			
 
				 
			
 
				+	if (top_builddir == NULL)
			
 
				+	{
			
 
				+		fprintf(stderr,
			
 
				+			"warning: $top_builddir undefined, "
			
 
				+			"so $STARPU_CHECK_LAUNCHER ignored\n");
			
 
				+		launcher = NULL;
			
 
				+		launcher_args = NULL;
			
 
				+		libtool = NULL;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1);
			
 
				+		strcpy(libtool, top_builddir);
			
 
				+		strcat(libtool, "/libtool");
			
 
				+	}
			
 
				+
			
 
				+	if (launcher)
			
 
				+	{
			
 
				+		const char *top_srcdir = getenv("top_srcdir");
			
 
				+		decode(&launcher, "@top_srcdir@", top_srcdir);
			
 
				+		decode(&launcher_args, "@top_srcdir@", top_srcdir);
			
 
				+	}
			
 
				+
			
 
				+	size_t len = strlen(test_name);
			
 
				+	if (launcher && len >= 3 &&
			
 
				+	    test_name[len-3] == '.' &&
			
 
				+	    test_name[len-2] == 's' &&
			
 
				+	    test_name[len-1] == 'h')
			
 
				+	{
			
 
				+		/* This is a shell script, don't run the check on bash, but pass
			
 
				+		 * the script the decoded variables */
			
 
				+		setenv("STARPU_CHECK_LAUNCHER", launcher, 1);
			
 
				+		if (launcher_args)
			
 
				+			setenv("STARPU_CHECK_LAUNCHER_ARGS", launcher_args, 1);
			
 
				+		else
			
 
				+			launcher_args = "";
			
 
				+
			
 
				+		/* And give a convenience macro */
			
 
				+		size_t len_launch = strlen(libtool) + 1 + strlen("--mode=execute") + 1
			
 
				+				  + strlen(launcher) + 1 + strlen(launcher_args) + 1;
			
 
				+		char *launch = malloc(len_launch);
			
 
				+		snprintf(launch, len_launch, "%s --mode=execute %s %s", libtool, launcher, launcher_args);
			
 
				+		setenv("STARPU_LAUNCH", launch, 1);
			
 
				+
			
 
				+		launcher = NULL;
			
 
				+		launcher_args = NULL;
			
 
				+	}
			
 
				+
			
 
				 	setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1);
			
 
				 
			
 
				 	/* set SIGALARM handler */
			
@@ -308,19 +358,10 @@ int main(int argc, char *argv[])
 
				 			/* "Launchers" such as Valgrind need to be inserted
			
 
				 			 * after the Libtool-generated wrapper scripts, hence
			
 
				 			 * this special-case.  */
			
 
				-			const char *top_builddir = getenv ("top_builddir");
			
 
				-			const char *top_srcdir = getenv("top_srcdir");
			
 
				 			if (top_builddir != NULL)
			
 
				 			{
			
 
				 				char *launcher_argv[100];
			
 
				 				int i=3;
			
 
				-				char libtool[strlen(top_builddir)
			
 
				-					     + sizeof("libtool") + 1];
			
 
				-				strcpy(libtool, top_builddir);
			
 
				-				strcat(libtool, "/libtool");
			
 
				-
			
 
				-				decode(&launcher, "@top_srcdir@", top_srcdir);
			
 
				-				decode(&launcher_args, "@top_srcdir@", top_srcdir);
			
 
				 
			
 
				 				launcher_argv[0] = libtool;
			
 
				 				launcher_argv[1] = "--mode=execute";
			
@@ -341,9 +382,6 @@ int main(int argc, char *argv[])
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				-				fprintf(stderr,
			
 
				-					"warning: $top_builddir undefined, "
			
 
				-					"so $STARPU_CHECK_LAUNCHER ignored\n");
			
 
				 				execl(test_name, test_name, test_args, NULL);
			
 
				 			}
			
 
				 		}
			
--- a/tests/main/combined_workers/bfs/run.sh
+++ b/tests/main/combined_workers/bfs/run.sh
@@ -3,7 +3,7 @@
 
				 #
			
 
				 # Copyright (C) 2012,2016,2017                           CNRS
			
 
				 # Copyright (C) 2012                                     Inria
			
 
				-# Copyright (C) 2014,2019                                Université de Bordeaux
			
 
				+# Copyright (C) 2014,2019-2020                           Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -31,4 +31,4 @@ export STARPU_NCUDA=0
 
				 export STARPU_NOPENCL=0
			
 
				 export STARPU_WORKER_STATS=1
			
 
				 export STARPU_CALIBRATE=1
			
 
				-./bfs data/graph65536.txt
			
 
				+$STARPU_LAUNCH ./bfs data/graph65536.txt
			
--- a/tests/memory/memstress.sh
+++ b/tests/memory/memstress.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
			
 
				+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -41,7 +41,7 @@ trace_stress()
 
				 		echo "Computing size $size with $memstress MB of memory LESS"
			
 
				 		
			
 
				 		echo "$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null"
			
 
				-		timing=`$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null`
			
 
				+		timing=`$STARPU_LAUNCH $ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null`
			
 
				 	
			
 
				 		echo "size : $size memstress $memstress => $timing us"
			
 
				 
			
--- a/tests/memory/memstress2.sh
+++ b/tests/memory/memstress2.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
			
 
				+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -40,7 +40,7 @@ trace_stress()
 
				 
			
 
				 		
			
 
				 		echo "$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null"
			
 
				-		timing=`$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null`
			
 
				+		timing=`$STARPU_LAUNCH $ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null`
			
 
				 	
			
 
				 		echo "size : $size memstress $stress => $timing us"
			
 
				 
			
--- a/tests/microbenchs/async_tasks_data_overhead.sh
+++ b/tests/microbenchs/async_tasks_data_overhead.sh
@@ -0,0 +1,19 @@
 
				+#!/bin/bash
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020                                     Université de Bordeaux
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+ROOT=${0%.sh}
			
 
				+ROOT=${ROOT/tasks_data_overhead/tasks_overhead}
			
 
				+exec $STARPU_LAUNCH $ROOT -b 1 "$@"
			
--- a/tests/microbenchs/async_tasks_overhead.c
+++ b/tests/microbenchs/async_tasks_overhead.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2014,2016                           Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2014,2016,2020                      Université de Bordeaux
			
 
				  * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -24,7 +24,17 @@
 
				  * Measure the cost of submitting asynchronous tasks
			
 
				  */
			
 
				 
			
 
				+starpu_data_handle_t data_handles[8];
			
 
				+float *buffers[8];
			
 
				+
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+static unsigned ntasks = 128;
			
 
				+#else
			
 
				 static unsigned ntasks = 65536;
			
 
				+#endif
			
 
				+static unsigned nbuffers = 0;
			
 
				+
			
 
				+#define BUFFERSIZE 16
			
 
				 
			
 
				 //static unsigned finished = 0;
			
 
				 
			
@@ -45,36 +55,29 @@ static struct starpu_codelet dummy_codelet =
 
				         .opencl_funcs = {dummy_func},
			
 
				 	.cpu_funcs_name = {"dummy_func"},
			
 
				 	.model = NULL,
			
 
				-	.nbuffers = 0
			
 
				+	.nbuffers = 0,
			
 
				+	.modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW}
			
 
				 };
			
 
				 
			
 
				-//static void inject_one_task(void)
			
 
				-//{
			
 
				-//	struct starpu_task *task = starpu_task_create();
			
 
				-//
			
 
				-//	task->cl = &dummy_codelet;
			
 
				-//	task->cl_arg = NULL;
			
 
				-//	task->detach = 0;
			
 
				-//
			
 
				-//	int ret = starpu_task_submit(task);
			
 
				-//	STARPU_ASSERT(!ret);
			
 
				-//}
			
 
				-
			
 
				 static void usage(char **argv)
			
 
				 {
			
 
				-	fprintf(stderr, "%s [-i ntasks] [-p sched_policy] [-h]\n", argv[0]);
			
 
				-	exit(-1);
			
 
				+	fprintf(stderr, "Usage: %s [-i ntasks] [-p sched_policy] [-b nbuffers] [-h]\n", argv[0]);
			
 
				+	exit(EXIT_FAILURE);
			
 
				 }
			
 
				 
			
 
				 static void parse_args(int argc, char **argv, struct starpu_conf *conf)
			
 
				 {
			
 
				 	int c;
			
 
				-	while ((c = getopt(argc, argv, "i:p:h")) != -1)
			
 
				+	while ((c = getopt(argc, argv, "i:b:p:h")) != -1)
			
 
				 	switch(c)
			
 
				 	{
			
 
				 		case 'i':
			
 
				 			ntasks = atoi(optarg);
			
 
				 			break;
			
 
				+		case 'b':
			
 
				+			nbuffers = atoi(optarg);
			
 
				+			dummy_codelet.nbuffers = nbuffers;
			
 
				+			break;
			
 
				 		case 'p':
			
 
				 			conf->sched_policy_name = optarg;
			
 
				 			break;
			
@@ -96,19 +99,22 @@ int main(int argc, char **argv)
 
				 	starpu_conf_init(&conf);
			
 
				 	conf.ncpus = 2;
			
 
				 
			
 
				-#ifdef STARPU_QUICK_CHECK
			
 
				-	ntasks = 128;
			
 
				-#endif
			
 
				-
			
 
				 	parse_args(argc, argv, &conf);
			
 
				 
			
 
				 	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				+	unsigned buffer;
			
 
				+	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				+	{
			
 
				+		starpu_malloc((void**)&buffers[buffer], BUFFERSIZE*sizeof(float));
			
 
				+		starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], BUFFERSIZE, sizeof(float));
			
 
				+	}
			
 
				+
			
 
				 	starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
			
 
				 
			
 
				-	fprintf(stderr, "#tasks : %u\n", ntasks);
			
 
				+	fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers);
			
 
				 
			
 
				 	/* Create an array of tasks */
			
 
				 	struct starpu_task **tasks = (struct starpu_task **) malloc(ntasks*sizeof(struct starpu_task *));
			
@@ -117,8 +123,14 @@ int main(int argc, char **argv)
 
				 	{
			
 
				 		struct starpu_task *task = starpu_task_create();
			
 
				 		task->cl = &dummy_codelet;
			
 
				-		task->cl_arg = NULL;
			
 
				 		task->detach = 0;
			
 
				+
			
 
				+		/* we have 8 buffers at most */
			
 
				+		for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				+		{
			
 
				+			task->handles[buffer] = data_handles[buffer];
			
 
				+		}
			
 
				+
			
 
				 		tasks[i] = task;
			
 
				 	}
			
 
				 
			
@@ -165,21 +177,37 @@ int main(int argc, char **argv)
 
				 
			
 
				                 if (output_dir && bench_id)
			
 
				 		{
			
 
				+                        char number[1+sizeof(nbuffers)*3+1];
			
 
				+                        const char *numberp;
			
 
				                         char file[1024];
			
 
				                         FILE *f;
			
 
				 
			
 
				-                        snprintf(file, sizeof(file), "%s/async_tasks_overhead_total.dat", output_dir);
			
 
				+                        if (nbuffers)
			
 
				+                        {
			
 
				+                                snprintf(number, sizeof(number), "_%u", nbuffers);
			
 
				+                                numberp = number;
			
 
				+                        }
			
 
				+                        else
			
 
				+                                numberp = "";
			
 
				+
			
 
				+                        snprintf(file, sizeof(file), "%s/async_tasks_overhead_total%s.dat", output_dir, numberp);
			
 
				                         f = fopen(file, "a");
			
 
				                         fprintf(f, "%s\t%f\n", bench_id, timing/1000000);
			
 
				                         fclose(f);
			
 
				 
			
 
				-                        snprintf(file, sizeof(file), "%s/async_tasks_overhead_per_task.dat", output_dir);
			
 
				+                        snprintf(file, sizeof(file), "%s/async_tasks_overhead_per_task%s.dat", output_dir, numberp);
			
 
				                         f = fopen(file, "a");
			
 
				                         fprintf(f, "%s\t%f\n", bench_id, timing/ntasks);
			
 
				                         fclose(f);
			
 
				                 }
			
 
				         }
			
 
				 
			
 
				+	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				+	{
			
 
				+		starpu_data_unregister(data_handles[buffer]);
			
 
				+		starpu_free((void*)buffers[buffer]);
			
 
				+	}
			
 
				+
			
 
				 	starpu_shutdown();
			
 
				 	free(tasks);
			
 
				 
			
--- a/tests/microbenchs/microbench.sh
+++ b/tests/microbenchs/microbench.sh
@@ -1,7 +1,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2016,2017                                CNRS
			
 
				-# Copyright (C) 2016,2017,2019                           Université de Bordeaux
			
 
				+# Copyright (C) 2016,2017,2019-2020                      Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -50,7 +50,7 @@ test_scheds()
 
				 	for sched in $SCHEDS;
			
 
				 	do
			
 
				 	    	set +e
			
 
				-		STARPU_SCHED=$sched $(dirname $0)/$TEST "$@"
			
 
				+		STARPU_SCHED=$sched $STARPU_LAUNCH $(dirname $0)/$TEST "$@"
			
 
				 		ret=$?
			
 
				 	    	set -e
			
 
				 		if test $ret = 0
			
--- a/tests/microbenchs/starpu_check.sh
+++ b/tests/microbenchs/starpu_check.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/sh
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2009-2011,2014                           Université de Bordeaux
			
 
				+# Copyright (C) 2009-2011,2014,2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2010,2011,2015,2017                      CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -40,7 +40,7 @@ test_with_timeout()
 
				 
			
 
				 	echo "$application"
			
 
				 
			
 
				-	$application > /dev/null 2> /dev/null & _pid_appli=$!;
			
 
				+	$STARPU_LAUNCH $application > /dev/null 2> /dev/null & _pid_appli=$!;
			
 
				 	(sleep $timeout ; kill -9 $_pid_appli 2> /dev/null) & _pid_killer=$!
			
 
				 	wait $_pid_appli
			
 
				 	ret=$?
			
--- a/tests/microbenchs/sync_tasks_data_overhead.sh
+++ b/tests/microbenchs/sync_tasks_data_overhead.sh
@@ -0,0 +1,19 @@
 
				+#!/bin/bash
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020                                     Université de Bordeaux
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+ROOT=${0%.sh}
			
 
				+ROOT=${ROOT/tasks_data_overhead/tasks_overhead}
			
 
				+exec $STARPU_LAUNCH $ROOT -b 1 "$@"
			
--- a/tests/microbenchs/sync_tasks_overhead.c
+++ b/tests/microbenchs/sync_tasks_overhead.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2014,2016                           Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2014,2016,2020                      Université de Bordeaux
			
 
				  * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -25,7 +25,17 @@
 
				  * Measure the cost of submitting synchronous tasks
			
 
				  */
			
 
				 
			
 
				+starpu_data_handle_t data_handles[8];
			
 
				+float *buffers[8];
			
 
				+
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+static unsigned ntasks = 128;
			
 
				+#else
			
 
				 static unsigned ntasks = 65536;
			
 
				+#endif
			
 
				+static unsigned nbuffers = 0;
			
 
				+
			
 
				+#define BUFFERSIZE 16
			
 
				 
			
 
				 void dummy_func(void *descr[], void *arg)
			
 
				 {
			
@@ -40,11 +50,11 @@ static struct starpu_codelet dummy_codelet =
 
				         .opencl_funcs = {dummy_func},
			
 
				 	.cpu_funcs_name = {"dummy_func"},
			
 
				 	.model = NULL,
			
 
				-	.nbuffers = 0
			
 
				+	.nbuffers = 0,
			
 
				+	.modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW}
			
 
				 };
			
 
				 
			
 
				-static
			
 
				-int inject_one_task(void)
			
 
				+static int inject_one_task(void)
			
 
				 {
			
 
				 	int ret;
			
 
				 	struct starpu_task *task = starpu_task_create();
			
@@ -59,15 +69,31 @@ int inject_one_task(void)
 
				 
			
 
				 }
			
 
				 
			
 
				-static void parse_args(int argc, char **argv)
			
 
				+static void usage(char **argv)
			
 
				+{
			
 
				+	fprintf(stderr, "Usage: %s [-i ntasks] [-p sched_policy] [-b nbuffers] [-h]\n", argv[0]);
			
 
				+	exit(EXIT_FAILURE);
			
 
				+}
			
 
				+
			
 
				+static void parse_args(int argc, char **argv, struct starpu_conf *conf)
			
 
				 {
			
 
				 	int c;
			
 
				-	while ((c = getopt(argc, argv, "i:")) != -1)
			
 
				+	while ((c = getopt(argc, argv, "i:b:p:h")) != -1)
			
 
				 	switch(c)
			
 
				 	{
			
 
				 		case 'i':
			
 
				 			ntasks = atoi(optarg);
			
 
				 			break;
			
 
				+		case 'b':
			
 
				+			nbuffers = atoi(optarg);
			
 
				+			dummy_codelet.nbuffers = nbuffers;
			
 
				+			break;
			
 
				+		case 'p':
			
 
				+			conf->sched_policy_name = optarg;
			
 
				+			break;
			
 
				+		case 'h':
			
 
				+			usage(argv);
			
 
				+			break;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -82,22 +108,35 @@ int main(int argc, char **argv)
 
				 	starpu_conf_init(&conf);
			
 
				 	conf.ncpus = 2;
			
 
				 
			
 
				-#ifdef STARPU_QUICK_CHECK
			
 
				-	ntasks = 128;
			
 
				-#endif
			
 
				-
			
 
				-	parse_args(argc, argv);
			
 
				+	parse_args(argc, argv, &conf);
			
 
				 
			
 
				 	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				-	fprintf(stderr, "#tasks : %u\n", ntasks);
			
 
				+	unsigned buffer;
			
 
				+	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				+	{
			
 
				+		starpu_malloc((void**)&buffers[buffer], BUFFERSIZE*sizeof(float));
			
 
				+		starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], BUFFERSIZE, sizeof(float));
			
 
				+	}
			
 
				+
			
 
				+	fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers);
			
 
				 
			
 
				 	start = starpu_timing_now();
			
 
				 	for (i = 0; i < ntasks; i++)
			
 
				 	{
			
 
				-		ret = inject_one_task();
			
 
				+		struct starpu_task *task = starpu_task_create();
			
 
				+		task->cl = &dummy_codelet;
			
 
				+		task->synchronous = 1;
			
 
				+
			
 
				+		/* we have 8 buffers at most */
			
 
				+		for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				+		{
			
 
				+			task->handles[buffer] = data_handles[buffer];
			
 
				+		}
			
 
				+
			
 
				+		ret = starpu_task_submit(task);
			
 
				 		if (ret == -ENODEV) goto enodev;
			
 
				 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				 	}
			
@@ -114,21 +153,37 @@ int main(int argc, char **argv)
 
				 
			
 
				                 if (output_dir && bench_id)
			
 
				 		{
			
 
				+                        char number[1+sizeof(nbuffers)*3+1];
			
 
				+                        const char *numberp;
			
 
				                         char file[1024];
			
 
				                         FILE *f;
			
 
				 
			
 
				-                        snprintf(file, sizeof(file), "%s/sync_tasks_overhead_total.dat", output_dir);
			
 
				+                        if (nbuffers)
			
 
				+                        {
			
 
				+                                snprintf(number, sizeof(number), "_%u", nbuffers);
			
 
				+                                numberp = number;
			
 
				+                        }
			
 
				+                        else
			
 
				+                                numberp = "";
			
 
				+
			
 
				+                        snprintf(file, sizeof(file), "%s/sync_tasks_overhead_total%s.dat", output_dir, numberp);
			
 
				                         f = fopen(file, "a");
			
 
				                         fprintf(f, "%s\t%f\n", bench_id, timing/1000000);
			
 
				                         fclose(f);
			
 
				 
			
 
				-                        snprintf(file, sizeof(file), "%s/sync_tasks_overhead_per_task.dat", output_dir);
			
 
				+                        snprintf(file, sizeof(file), "%s/sync_tasks_overhead_per_task%s.dat", output_dir, numberp);
			
 
				                         f = fopen(file, "a");
			
 
				                         fprintf(f, "%s\t%f\n", bench_id, timing/ntasks);
			
 
				                         fclose(f);
			
 
				                 }
			
 
				         }
			
 
				 
			
 
				+	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				+	{
			
 
				+		starpu_data_unregister(data_handles[buffer]);
			
 
				+		starpu_free((void*)buffers[buffer]);
			
 
				+	}
			
 
				+
			
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	return EXIT_SUCCESS;
			
--- a/tests/microbenchs/tasks_data_overhead.sh
+++ b/tests/microbenchs/tasks_data_overhead.sh
@@ -0,0 +1,19 @@
 
				+#!/bin/bash
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020                                     Université de Bordeaux
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+ROOT=${0%.sh}
			
 
				+ROOT=${ROOT/tasks_data_overhead/tasks_overhead}
			
 
				+exec $STARPU_LAUNCH $ROOT -b 1 "$@"
			
--- a/tests/microbenchs/tasks_overhead.c
+++ b/tests/microbenchs/tasks_overhead.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2011,2013,2014,2016                 Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2011,2013,2014,2016,2020            Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Inria
			
 
				  * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				  *
			
@@ -36,6 +36,8 @@ static unsigned ntasks = 65536;
 
				 #endif
			
 
				 static unsigned nbuffers = 0;
			
 
				 
			
 
				+#define BUFFERSIZE 16
			
 
				+
			
 
				 struct starpu_task *tasks;
			
 
				 
			
 
				 void dummy_func(void *descr[], void *arg)
			
@@ -55,25 +57,16 @@ static struct starpu_codelet dummy_codelet =
 
				 	.modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW}
			
 
				 };
			
 
				 
			
 
				-static
			
 
				-int inject_one_task(void)
			
 
				+static void usage(char **argv)
			
 
				 {
			
 
				-	struct starpu_task *task = starpu_task_create();
			
 
				-
			
 
				-	task->cl = &dummy_codelet;
			
 
				-	task->cl_arg = NULL;
			
 
				-	task->callback_func = NULL;
			
 
				-	task->synchronous = 1;
			
 
				-
			
 
				-	int ret;
			
 
				-	ret = starpu_task_submit(task);
			
 
				-	return ret;
			
 
				+	fprintf(stderr, "Usage: %s [-i ntasks] [-p sched_policy] [-b nbuffers] [-h]\n", argv[0]);
			
 
				+	exit(EXIT_FAILURE);
			
 
				 }
			
 
				 
			
 
				-static void parse_args(int argc, char **argv)
			
 
				+static void parse_args(int argc, char **argv, struct starpu_conf *conf)
			
 
				 {
			
 
				 	int c;
			
 
				-	while ((c = getopt(argc, argv, "i:b:h")) != -1)
			
 
				+	while ((c = getopt(argc, argv, "i:b:p:h")) != -1)
			
 
				 	switch(c)
			
 
				 	{
			
 
				 		case 'i':
			
@@ -83,8 +76,11 @@ static void parse_args(int argc, char **argv)
 
				 			nbuffers = atoi(optarg);
			
 
				 			dummy_codelet.nbuffers = nbuffers;
			
 
				 			break;
			
 
				+		case 'p':
			
 
				+			conf->sched_policy_name = optarg;
			
 
				+			break;
			
 
				 		case 'h':
			
 
				-			fprintf(stderr, "Usage: %s [-i ntasks] [-b nbuffers] [-h]\n", argv[0]);
			
 
				+			usage(argv);
			
 
				 			break;
			
 
				 	}
			
 
				 }
			
@@ -105,7 +101,7 @@ int main(int argc, char **argv)
 
				 	starpu_conf_init(&conf);
			
 
				 	conf.ncpus = 2;
			
 
				 
			
 
				-	parse_args(argc, argv);
			
 
				+	parse_args(argc, argv, &conf);
			
 
				 
			
 
				 	ret = starpu_initialize(&conf, &argc, &argv);
			
 
				 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
@@ -114,8 +110,8 @@ int main(int argc, char **argv)
 
				 	unsigned buffer;
			
 
				 	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				 	{
			
 
				-		starpu_malloc((void**)&buffers[buffer], 16*sizeof(float));
			
 
				-		starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], 16, sizeof(float));
			
 
				+		starpu_malloc((void**)&buffers[buffer], BUFFERSIZE*sizeof(float));
			
 
				+		starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], BUFFERSIZE, sizeof(float));
			
 
				 	}
			
 
				 
			
 
				 	fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers);
			
@@ -126,9 +122,7 @@ int main(int argc, char **argv)
 
				 	for (i = 0; i < ntasks; i++)
			
 
				 	{
			
 
				 		starpu_task_init(&tasks[i]);
			
 
				-		tasks[i].callback_func = NULL;
			
 
				 		tasks[i].cl = &dummy_codelet;
			
 
				-		tasks[i].cl_arg = NULL;
			
 
				 		tasks[i].synchronous = 0;
			
 
				 		tasks[i].use_tag = 1;
			
 
				 		tasks[i].tag_id = (starpu_tag_t)i;
			
@@ -142,19 +136,33 @@ int main(int argc, char **argv)
 
				 	tasks[ntasks-1].detach = 0;
			
 
				 
			
 
				 	start_submit = starpu_timing_now();
			
 
				-	for (i = 1; i < ntasks; i++)
			
 
				-	{
			
 
				-		starpu_tag_declare_deps((starpu_tag_t)i, 1, (starpu_tag_t)(i-1));
			
 
				-
			
 
				-		ret = starpu_task_submit(&tasks[i]);
			
 
				-		if (ret == -ENODEV) goto enodev;
			
 
				-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				-	}
			
 
				+        if (nbuffers)
			
 
				+        {
			
 
				+                /* Data dependency, just submit them all */
			
 
				+                for (i = 0; i < ntasks; i++)
			
 
				+                {
			
 
				+                        ret = starpu_task_submit(&tasks[i]);
			
 
				+                        if (ret == -ENODEV) goto enodev;
			
 
				+                        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+                }
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+                /* No data dependency, we have to introduce dependencies by hand */
			
 
				+                for (i = 1; i < ntasks; i++)
			
 
				+                {
			
 
				+                        starpu_tag_declare_deps((starpu_tag_t)i, 1, (starpu_tag_t)(i-1));
			
 
				+
			
 
				+                        ret = starpu_task_submit(&tasks[i]);
			
 
				+                        if (ret == -ENODEV) goto enodev;
			
 
				+                        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+                }
			
 
				 
			
 
				-	/* submit the first task */
			
 
				-	ret = starpu_task_submit(&tasks[0]);
			
 
				-	if (ret == -ENODEV) goto enodev;
			
 
				-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+                /* submit the first task */
			
 
				+                ret = starpu_task_submit(&tasks[0]);
			
 
				+                if (ret == -ENODEV) goto enodev;
			
 
				+                STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+        }
			
 
				 
			
 
				 	end_submit = starpu_timing_now();
			
 
				 
			
@@ -169,9 +177,6 @@ int main(int argc, char **argv)
 
				 	for (i = 0; i < ntasks; i++)
			
 
				 		starpu_task_clean(&tasks[i]);
			
 
				 
			
 
				-	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				-		starpu_data_unregister(data_handles[buffer]);
			
 
				-
			
 
				 	timing_submit = end_submit - start_submit;
			
 
				 	timing_exec = end_exec - start_exec;
			
 
				 
			
@@ -190,41 +195,57 @@ int main(int argc, char **argv)
 
				 
			
 
				                 if (output_dir && bench_id)
			
 
				 		{
			
 
				+                        char number[1+sizeof(nbuffers)*3+1];
			
 
				+                        const char *numberp;
			
 
				                         char file[1024];
			
 
				                         FILE *f;
			
 
				 
			
 
				-                        snprintf(file, sizeof(file), "%s/tasks_overhead_total_submit.dat", output_dir);
			
 
				+                        if (nbuffers)
			
 
				+                        {
			
 
				+                                snprintf(number, sizeof(number), "_%u", nbuffers);
			
 
				+                                numberp = number;
			
 
				+                        }
			
 
				+                        else
			
 
				+                                numberp = "";
			
 
				+
			
 
				+                        snprintf(file, sizeof(file), "%s/tasks_overhead_total_submit%s.dat", output_dir, numberp);
			
 
				                         f = fopen(file, "a");
			
 
				                         fprintf(f, "%s\t%f\n", bench_id, timing_submit/1000000);
			
 
				                         fclose(f);
			
 
				 
			
 
				-                        snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_submit.dat", output_dir);
			
 
				+                        snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_submit%s.dat", output_dir, numberp);
			
 
				                         f = fopen(file, "a");
			
 
				                         fprintf(f, "%s\t%f\n", bench_id, timing_submit/ntasks);
			
 
				                         fclose(f);
			
 
				 
			
 
				-                        snprintf(file, sizeof(file), "%s/tasks_overhead_total_execution.dat", output_dir);
			
 
				+                        snprintf(file, sizeof(file), "%s/tasks_overhead_total_execution%s.dat", output_dir, numberp);
			
 
				                         f = fopen(file, "a");
			
 
				                         fprintf(f, "%s\t%f\n", bench_id, timing_exec/1000000);
			
 
				                         fclose(f);
			
 
				 
			
 
				-                        snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_execution.dat", output_dir);
			
 
				+                        snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_execution%s.dat", output_dir, numberp);
			
 
				                         f = fopen(file, "a");
			
 
				                         fprintf(f, "%s\t%f\n", bench_id, timing_exec/ntasks);
			
 
				                         fclose(f);
			
 
				 
			
 
				-                        snprintf(file, sizeof(file), "%s/tasks_overhead_total_submit_execution.dat", output_dir);
			
 
				+                        snprintf(file, sizeof(file), "%s/tasks_overhead_total_submit_execution%s.dat", output_dir, numberp);
			
 
				                         f = fopen(file, "a");
			
 
				                         fprintf(f, "%s\t%f\n", bench_id, (timing_submit+timing_exec)/1000000);
			
 
				                         fclose(f);
			
 
				 
			
 
				-                        snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_submit_execution.dat", output_dir);
			
 
				+                        snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_submit_execution%s.dat", output_dir, numberp);
			
 
				                         f = fopen(file, "a");
			
 
				                         fprintf(f, "%s\t%f\n", bench_id, (timing_submit+timing_exec)/ntasks);
			
 
				                         fclose(f);
			
 
				                 }
			
 
				         }
			
 
				 
			
 
				+	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				+	{
			
 
				+		starpu_data_unregister(data_handles[buffer]);
			
 
				+		starpu_free((void*)buffers[buffer]);
			
 
				+	}
			
 
				+
			
 
				 	starpu_shutdown();
			
 
				 	free(tasks);
			
 
				 	return EXIT_SUCCESS;
			
--- a/tests/microbenchs/tasks_size_overhead.sh
+++ b/tests/microbenchs/tasks_size_overhead.sh
@@ -2,7 +2,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2010,2011,2013,2015,2017                 CNRS
			
 
				-# Copyright (C) 2009,2010,2012,2014,2016                 Université de Bordeaux
			
 
				+# Copyright (C) 2009,2010,2012,2014,2016,2020            Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -16,6 +16,6 @@
 
				 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				 #
			
 
				 ROOT=${0%.sh}
			
 
				-$ROOT "$@" > tasks_size_overhead.output
			
 
				+$STARPU_LAUNCH $ROOT "$@" > tasks_size_overhead.output
			
 
				 $ROOT.gp
			
 
				 gv tasks_size_overhead.eps
			
--- a/tests/microbenchs/tasks_size_overhead_sched.sh
+++ b/tests/microbenchs/tasks_size_overhead_sched.sh
@@ -2,7 +2,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2010,2011,2016,2017                      CNRS
			
 
				-# Copyright (C) 2009,2010,2016                           Université de Bordeaux
			
 
				+# Copyright (C) 2009,2010,2016,2020                      Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,7 +18,7 @@
 
				 ROOT=${0%.sh}
			
 
				 ROOT=${ROOT%_sched}
			
 
				 unset STARPU_SSILENT
			
 
				-$ROOT "$@" > tasks_size_overhead.output
			
 
				+$STARPU_LAUNCH $_STARPU_LAUNCH $ROOT "$@" > tasks_size_overhead.output
			
 
				 ret=$?
			
 
				 if test "$ret" = "0"
			
 
				 then
			
--- a/tests/microbenchs/tasks_size_overhead_scheds.sh
+++ b/tests/microbenchs/tasks_size_overhead_scheds.sh
@@ -2,7 +2,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2016,2017                                CNRS
			
 
				-# Copyright (C) 2016,2019                                Université de Bordeaux
			
 
				+# Copyright (C) 2016,2019-2020                           Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -24,4 +24,6 @@ then
 
				 	FAST="-i 8"
			
 
				 fi
			
 
				 
			
 
				+_STARPU_LAUNCH="$STARPU_LAUNCH"
			
 
				+unset STARPU_LAUNCH
			
 
				 test_scheds tasks_size_overhead_sched.sh $FAST
			
--- a/tests/mult/gflops.sh
+++ b/tests/mult/gflops.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2008-2011,2014                           Université de Bordeaux
			
 
				+# Copyright (C) 2008-2011,2014,2020                      Université de Bordeaux
			
 
				 # Copyright (C) 2010,2015,2017                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -51,7 +51,7 @@ trace_size()
 
				 		if [ $tile -lt $size -a $nblocks -lt 32 -a $(($size % $tile)) == 0 ];
			
 
				 		then
			
 
				 			echo "start tile $tile size $size nblocks $nblocks  "
			
 
				-			timing=`$ROOTDIR/examples/mult/dw_mult -pin -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null`
			
 
				+			timing=`$STARPU_LAUNCH $ROOTDIR/examples/mult/dw_mult -pin -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null`
			
 
				 		else
			
 
				 			timing="x"
			
 
				 		fi
			
--- a/tests/mult/sched.sh
+++ b/tests/mult/sched.sh