il y a 5 ans · 1849722415
--- a/ChangeLog
+++ b/ChangeLog
@@ -44,11 +44,16 @@ New features:
 
																   * Add starpu_data_dup_ro().
															
 
																 Small changes:
															
 
																-  * Use the S4U interface of Simgrid instead of xbt and MSG.
															
 
																   * Add a synthetic energy efficiency testcase.
															
 
																-StarPU 1.3.4 (git revision xxx)
															
 
																-==============================================
															
 
																+StarPU 1.3.5 (git revision xxx)
															
 
																+====================================================================
															
 
																+
															
 
																+Small changes:
															
 
																+  * Move MPI cache functions into the public API
															
 
																+
															
 
																+StarPU 1.3.4 (git revision c37a5d024cd997596da41f765557c58099baf896)
															
 
																+====================================================================
															
 
																 Small features:
															
 
																   * New environment variables STARPU_BUS_STATS_FILE and
															
@@ -69,12 +74,17 @@ Small features:
 
																   * Add field starpu_conf::precedence_over_environment_variables to ignore
															
 
																     environment variables when parameters are set directly in starpu_conf
															
 
																   * Add starpu_data_get_coordinates_array
															
 
																+  * MPI: new functions starpu_mpi_interface_datatype_register() and
															
 
																+    starpu_mpi_interface_datatype_unregister() which take a enum
															
 
																+    starpu_data_interface_id instead of a starpu_data_handle_t
															
 
																+  * New script starpu_env to set up StarPU environment variables
															
 
																   * New STARPU_BACKOFF_MIN and STARPU_BACKOFF_MAX environment variables to the
															
 
																     exponential backoff limits of the number of cycles to pause while drivers
															
 
																     are spinning.
															
 
																   * Add STARPU_DISPLAY_BINDINGS environment variable and
															
 
																     starpu_display_bindings() function to display all bindings on the machine by
															
 
																     calling hwloc-ps
															
 
																+
															
 
																 Small changes:
															
 
																   * New configure option --disable-build-doc-pdf
															
@@ -116,7 +126,7 @@ Small features:
 
																     STARPU_TASK_PROFILING_INFO
															
 
																   * New function starpu_create_callback_task() which creates and
															
 
																     submits an empty task with the specified callback
															
 
																-
															
 
																+  * Use the S4U interface of Simgrid instead of xbt and MSG.
															
 
																 Small changes:
															
 
																    * Default modular worker queues to 2 tasks unless it's an heft
															
--- a/Makefile.am
+++ b/Makefile.am
@@ -199,7 +199,7 @@ ctags-local:
 
																 # The pmccabe tool, see <http://www.parisc-linux.org/~bame/pmccabe/>.
															
 
																 PMCCABE = pmccabe
															
 
																-VC_URL = "https://gforge.inria.fr/scm/viewvc.php/trunk/%FILENAME%?view=markup&root=starpu"
															
 
																+VC_URL = "https://gitlab.inria.fr/starpu/starpu/-/blob/master/%FILENAME%"
															
 
																 # Generate a cyclomatic complexity report.  Note that examples and tests are
															
 
																 # excluded because they're not particularly relevant, and more importantly
															
--- a/README
+++ b/README
@@ -87,15 +87,11 @@ advantage of their specificities in a portable fashion.
 
																 || III. Getting StarPU ||
															
 
																 ++=====================++
															
 
																-StarPU is available on https://gforge.inria.fr/projects/starpu/.
															
 
																+StarPU is available on https://gitlab.inria.fr/starpu/starpu
															
 
																-The project's SVN repository can be checked out through anonymous
															
 
																-access with the following command(s).
															
 
																+The GIT repository access can be checked out with the following command.
															
 
																-$ svn checkout svn://scm.gforge.inria.fr/svn/starpu/trunk
															
 
																-$ svn checkout --username anonsvn https://scm.gforge.inria.fr/svn/starpu/trunk
															
 
																-
															
 
																-The password is 'anonsvn'
															
 
																+$ git clone git@gitlab.inria.fr:starpu/starpu.git
															
 
																 ++=============================++
															
 
																 || IV. Building and Installing ||
															
--- a/configure.ac
+++ b/configure.ac
@@ -18,7 +18,7 @@
 
																 #
															
 
																 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																 #
															
 
																-AC_INIT([StarPU], [1.3.99], [starpu-devel@lists.gforge.inria.fr], [starpu], [http://starpu.gforge.inria.fr/])
															
 
																+AC_INIT([StarPU], [1.3.99], [starpu-devel@lists.gforge.inria.fr], [starpu], [http://gitlab.inria.fr/starpu/starpu])
															
 
																 AC_CONFIG_SRCDIR(include/starpu.h)
															
 
																 AC_CONFIG_AUX_DIR([build-aux])
															
--- a/contrib/ci.inria.fr/disabled/Jenkinsfile-basic
+++ b/contrib/ci.inria.fr/disabled/Jenkinsfile-basic
@@ -24,7 +24,7 @@ pipeline
 
																 	// Trigger the build
															
 
																 	triggers
															
 
																 	{
															
 
																-		// Poll gforge explicitly every hour
															
 
																+		// Poll SCM explicitly every hour
															
 
																 		pollSCM('0 * * * *')
															
 
																 	}
															
--- a/contrib/ci.inria.fr/disabled/Jenkinsfile-bsd
+++ b/contrib/ci.inria.fr/disabled/Jenkinsfile-bsd
@@ -24,7 +24,7 @@ pipeline
 
																 	// Trigger the build
															
 
																 	triggers
															
 
																 	{
															
 
																-		// Poll gforge explicitly every past-half hour
															
 
																+		// Poll SCM explicitly every past-half hour
															
 
																 		pollSCM('30 * * * *')
															
 
																 	}
															
--- a/doc/doxygen/Makefile.am
+++ b/doc/doxygen/Makefile.am
@@ -304,8 +304,8 @@ endif
 
																 EXTRA_DIST += doxygen.cfg refman.tex \
															
 
																 	      $(chapters) $(images)
															
 
																-# Rule to update documentation on web server. Should only be used locally.
															
 
																-PUBLISHHOST	?= gforge
															
 
																+# Rule to update documentation on web server. Should only be called from benchmarks dalton directory
															
 
																+PUBLISHDIR	?= /home/benchmarks/softs/starpu/starpu-scripts/mirror/files/doc
															
 
																 update-web: $(DOX_PDF)
															
 
																-	scp -pr starpu.pdf html $(PUBLISHHOST):/home/groups/starpu/htdocs/files/doc
															
 
																+	cp -pr starpu.pdf html $(PUBLISHDIR)
															
--- a/doc/doxygen/chapters/000_introduction.doxy
+++ b/doc/doxygen/chapters/000_introduction.doxy
@@ -77,9 +77,9 @@ policies in a portable fashion (\ref HowToDefineANewSchedulingPolicy).
 
																 The remainder of this section describes the main concepts used in StarPU.
															
 
																 A video is available on the StarPU website
															
 
																-http://starpu.gforge.inria.fr/ that presents these concepts in 26 minutes.
															
 
																+https://starpu.gitlabpages.inria.fr/ that presents these concepts in 26 minutes.
															
 
																-Some tutorials are also available on http://starpu.gforge.inria.fr/tutorials/
															
 
																+Some tutorials are also available on https://starpu.gitlabpages.inria.fr/tutorials/
															
 
																 // explain the notion of codelet and task (i.e. g(A, B)
															
@@ -190,7 +190,7 @@ unregister it.
 
																 \section ResearchPapers Research Papers
															
 
																 Research papers about StarPU can be found at
															
 
																-http://starpu.gforge.inria.fr/publications/.
															
 
																+https://starpu.gitlabpages.inria.fr/publications/.
															
 
																 A good overview is available in the research report at
															
 
																 http://hal.archives-ouvertes.fr/inria-00467677.
															
--- a/doc/doxygen/chapters/101_building.doxy
+++ b/doc/doxygen/chapters/101_building.doxy
@@ -61,27 +61,22 @@ script <c>configure</c>.
 
																 \subsection GettingSources Getting Sources
															
 
																 StarPU's sources can be obtained from the download page of
															
 
																-the StarPU website (http://starpu.gforge.inria.fr/files/).
															
 
																+the StarPU website (https://starpu.gitlabpages.inria.fr/files/).
															
 
																 All releases and the development tree of StarPU are freely available
															
 
																-on Inria's gforge under the LGPL license. Some releases are available
															
 
																+on StarPU SCM server under the LGPL license. Some releases are available
															
 
																 under the BSD license.
															
 
																-The latest release can be downloaded from the Inria's gforge (http://gforge.inria.fr/frs/?group_id=1570) or
															
 
																-directly from the StarPU download page (http://starpu.gforge.inria.fr/files/).
															
 
																+The latest release can be downloaded from the StarPU download page (https://starpu.gitlabpages.inria.fr/files/).
															
 
																-The latest nightly snapshot can be downloaded from the StarPU gforge website (http://starpu.gforge.inria.fr/testing/).
															
 
																-
															
 
																-\verbatim
															
 
																-$ wget http://starpu.gforge.inria.fr/testing/starpu-nightly-latest.tar.gz
															
 
																-\endverbatim
															
 
																+The latest nightly snapshot can be downloaded from the StarPU website (https://starpu.gitlabpages.inria.fr/files/testing/).
															
 
																 And finally, current development version is also accessible via git.
															
 
																 It should only be used if you need the very latest changes (i.e. less
															
 
																 than a day old!).
															
 
																 \verbatim
															
 
																-$ git clone https://scm.gforge.inria.fr/anonscm/git/starpu/starpu.git
															
 
																+$ git clone git@gitlab.inria.fr:starpu/starpu.git
															
 
																 \endverbatim
															
 
																 \subsection ConfiguringStarPU Configuring StarPU
															
@@ -139,7 +134,7 @@ $ make
 
																 Once everything is built, you may want to test the result. An
															
 
																 extensive set of regression tests is provided with StarPU. Running the
															
 
																 tests is done by calling <c>make check</c>. These tests are run every night
															
 
																-and the result from the main profile is publicly available (http://starpu.gforge.inria.fr/testing/master/).
															
 
																+and the result from the main profile is publicly available (https://starpu.gitlabpages/files/testing/master/).
															
 
																 \verbatim
															
 
																 $ make check
															
--- a/doc/doxygen/chapters/410_mpi_support.doxy
+++ b/doc/doxygen/chapters/410_mpi_support.doxy
@@ -759,8 +759,16 @@ add fine-graph starpu_mpi_cache_flush() calls during the algorithm; the effect
 
																 for the data deallocation will be the same, but it will additionally release some
															
 
																 pressure from the StarPU-MPI cache hash table during task submission.
															
 
																-One can determine whether a piece of is cached with starpu_mpi_cached_receive()
															
 
																-and starpu_mpi_cached_send().
															
 
																+One can determine whether a piece of data is cached with
															
 
																+starpu_mpi_cached_receive() and starpu_mpi_cached_send().
															
 
																+
															
 
																+Functions starpu_mpi_cached_receive_set() and
															
 
																+starpu_mpi_cached_send_set() are automatically called by
															
 
																+starpu_mpi_task_insert() but can also be called directly by the
															
 
																+application. Functions starpu_mpi_cached_send_clear() and
															
 
																+starpu_mpi_cached_receive_clear() must be called to clear data from
															
 
																+the cache. They are also automatically called when using
															
 
																+starpu_mpi_task_insert().
															
 
																 The whole caching behavior can be disabled thanks to the \ref STARPU_MPI_CACHE
															
 
																 environment variable. The variable \ref STARPU_MPI_CACHE_STATS can be set to <c>1</c>
															
--- a/doc/doxygen_dev/Makefile.am
+++ b/doc/doxygen_dev/Makefile.am
@@ -245,8 +245,8 @@ endif
 
																 EXTRA_DIST += doxygen.cfg refman.tex \
															
 
																 	      $(chapters) $(images)
															
 
																-# Rule to update documentation on web server. Should only be used locally.
															
 
																-PUBLISHHOST	?= gforge
															
 
																+# Rule to update documentation on web server. Should only be called from benchmarks dalton directory
															
 
																+PUBLISHDIR	?= /home/benchmarks/softs/starpu/starpu-scripts/mirror/files/doc
															
 
																 update-web: $(DOX_PDF)
															
 
																-	scp -pr starpu_dev.pdf html_dev $(PUBLISHHOST):/home/groups/starpu/htdocs/files/doc
															
 
																+	cp -pr starpu_dev.pdf html_dev $(PUBLISHDIR)
															
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -151,8 +151,9 @@ enum starpu_codelet_type
 
																 enum starpu_task_status
															
 
																 {
															
 
																-	STARPU_TASK_INVALID,     /**< The task has just been initialized. */
															
 
																-#define STARPU_TASK_INVALID 0
															
 
																+	STARPU_TASK_INIT,        /**< The task has just been initialized. */
															
 
																+#define STARPU_TASK_INIT 0
															
 
																+#define STARPU_TASK_INVALID STARPU_TASK_INIT  /**< old name for STARPU_TASK_INIT */
															
 
																 	STARPU_TASK_BLOCKED,     /**< The task has just been
															
 
																 				    submitted, and its dependencies has not been checked yet. */
															
 
																 	STARPU_TASK_READY,       /**< The task is ready for execution. */
															
@@ -1295,7 +1296,7 @@ struct starpu_task
 
																 	.detach = 1,					\
															
 
																 	.destroy = 0,					\
															
 
																 	.regenerate = 0,				\
															
 
																-	.status = STARPU_TASK_INVALID,			\
															
 
																+	.status = STARPU_TASK_INIT,			\
															
 
																 	.profiling_info = NULL,				\
															
 
																 	.predicted = NAN,				\
															
 
																 	.predicted_transfer = NAN,			\
															
--- a/mpi/examples/Makefile.am
+++ b/mpi/examples/Makefile.am
@@ -128,6 +128,17 @@ starpu_mpi_EXAMPLES	+=	\
 
																 endif
															
 
																 ##################
															
 
																+# Cache examples #
															
 
																+##################
															
 
																+examplebin_PROGRAMS +=		\
															
 
																+	cache/cache		\
															
 
																+	cache/cache_disable
															
 
																+starpu_mpi_EXAMPLES +=		\
															
 
																+	cache/cache		\
															
 
																+	cache/cache_disable
															
 
																+
															
 
																+
															
 
																+##################
															
 
																 # MPI LU example #
															
 
																 ##################
															
--- a/mpi/tests/cache.c
+++ b/mpi/tests/cache.c
@@ -17,7 +17,6 @@
 
																 #include <starpu_mpi.h>
															
 
																 #include <math.h>
															
 
																 #include "helper.h"
															
 
																-#include <starpu_mpi_cache.h>
															
 
																 void func_cpu(void *descr[], void *_args)
															
 
																 {
															
@@ -57,7 +56,7 @@ void test(struct starpu_codelet *codelet, enum starpu_data_access_mode mode, sta
 
																 	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, codelet, mode, data, STARPU_EXECUTE_ON_NODE, 1, 0);
															
 
																 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
															
 
																-	cache = _starpu_mpi_cache_received_data_get(data);
															
 
																+	cache = starpu_mpi_cached_receive(data);
															
 
																 	if (rank == 1)
															
 
																 	{
															
--- a/mpi/tests/cache_disable.c
+++ b/mpi/tests/cache_disable.c
@@ -17,7 +17,6 @@
 
																 #include <starpu_mpi.h>
															
 
																 #include <math.h>
															
 
																 #include "helper.h"
															
 
																-#include <starpu_mpi_cache.h>
															
 
																 void func_cpu(void *descr[], void *_args)
															
 
																 {
															
@@ -63,7 +62,7 @@ int main(int argc, char **argv)
 
																 	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0);
															
 
																 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
															
 
																-	in_cache = _starpu_mpi_cache_received_data_get(data);
															
 
																+	in_cache = starpu_mpi_cached_receive(data);
															
 
																 	if (rank == 1)
															
 
																 	{
															
 
																 		STARPU_ASSERT_MSG(in_cache == 1, "Data should be in cache\n");
															
@@ -73,7 +72,7 @@ int main(int argc, char **argv)
 
																 	starpu_mpi_cache_set(0);
															
 
																 	// We check the data is no longer in the cache
															
 
																-	in_cache = _starpu_mpi_cache_received_data_get(data);
															
 
																+	in_cache = starpu_mpi_cached_receive(data);
															
 
																 	if (rank == 1)
															
 
																 	{
															
 
																 		STARPU_ASSERT_MSG(in_cache == 0, "Data should NOT be in cache\n");
															
@@ -81,7 +80,7 @@ int main(int argc, char **argv)
 
																 	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0);
															
 
																 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
															
 
																-	in_cache = _starpu_mpi_cache_received_data_get(data);
															
 
																+	in_cache = starpu_mpi_cached_receive(data);
															
 
																 	if (rank == 1)
															
 
																 	{
															
 
																 		STARPU_ASSERT_MSG(in_cache == 0, "Data should NOT be in cache\n");
															
--- a/mpi/include/starpu_mpi.h
+++ b/mpi/include/starpu_mpi.h
@@ -422,12 +422,39 @@ void starpu_mpi_cache_flush_all_data(MPI_Comm comm);
 
																 int starpu_mpi_cached_receive(starpu_data_handle_t data_handle);
															
 
																 /**
															
 
																+ * If \p data is already available in the reception cache, return 1
															
 
																+ * If \p data is NOT available in the reception cache, add it to the
															
 
																+ * cache and return 0
															
 
																+ * Return 0 if the communication cache is not enabled
															
 
																+ */
															
 
																+int starpu_mpi_cached_receive_set(starpu_data_handle_t data);
															
 
																+
															
 
																+/**
															
 
																+ * Remove \p data from the reception cache
															
 
																+ */
															
 
																+void starpu_mpi_cached_receive_clear(starpu_data_handle_t data);
															
 
																+
															
 
																+/**
															
 
																    Test whether \p data_handle is cached for emission to node \p dest,
															
 
																    i.e. the value was previously sent to \p dest, and not flushed
															
 
																    since then.
															
 
																 */
															
 
																 int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest);
															
 
																+/**
															
 
																+ * If \p data is already available in the emission cache for node
															
 
																+ * \p dest, return 1
															
 
																+ * If \p data is NOT available in the emission cache for node \p dest,
															
 
																+ * add it to the cache and return 0
															
 
																+ * Return 0 if the communication cache is not enabled
															
 
																+ */
															
 
																+int starpu_mpi_cached_send_set(starpu_data_handle_t data, int dest);
															
 
																+
															
 
																+/**
															
 
																+ * Remove \p data from the emission cache
															
 
																+ */
															
 
																+void starpu_mpi_cached_send_clear(starpu_data_handle_t data);
															
 
																+
															
 
																 /** @} */
															
 
																 /**
															
--- a/mpi/src/starpu_mpi.c
+++ b/mpi/src/starpu_mpi.c
@@ -346,7 +346,7 @@ void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t da
 
																 	if (me == node)
															
 
																 	{
															
 
																 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
															
 
																-		int already_received = _starpu_mpi_cache_received_data_set(data_handle);
															
 
																+		int already_received = starpu_mpi_cached_receive_set(data_handle);
															
 
																 		if (already_received == 0)
															
 
																 		{
															
 
																 			_STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank);
															
@@ -356,7 +356,7 @@ void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t da
 
																 	else if (me == rank)
															
 
																 	{
															
 
																 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
															
 
																-		int already_sent = _starpu_mpi_cache_sent_data_set(data_handle, node);
															
 
																+		int already_sent = starpu_mpi_cached_send_set(data_handle, node);
															
 
																 		if (already_sent == 0)
															
 
																 		{
															
 
																 			_STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node);
															
@@ -389,7 +389,7 @@ void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle
 
																 	{
															
 
																 		MPI_Status status;
															
 
																 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
															
 
																-		int already_received = _starpu_mpi_cache_received_data_set(data_handle);
															
 
																+		int already_received = starpu_mpi_cached_receive_set(data_handle);
															
 
																 		if (already_received == 0)
															
 
																 		{
															
 
																 			_STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank);
															
@@ -399,7 +399,7 @@ void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle
 
																 	else if (me == rank)
															
 
																 	{
															
 
																 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
															
 
																-		int already_sent = _starpu_mpi_cache_sent_data_set(data_handle, node);
															
 
																+		int already_sent = starpu_mpi_cached_send_set(data_handle, node);
															
 
																 		if (already_sent == 0)
															
 
																 		{
															
 
																 			_STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node);
															
--- a/mpi/src/starpu_mpi_cache.c
+++ b/mpi/src/starpu_mpi_cache.c
@@ -172,7 +172,7 @@ static void _starpu_mpi_cache_data_remove_nolock(starpu_data_handle_t data_handl
 
																 /**************************************
															
 
																  * Received cache
															
 
																  **************************************/
															
 
																-void _starpu_mpi_cache_received_data_clear(starpu_data_handle_t data_handle)
															
 
																+void starpu_mpi_cached_receive_clear(starpu_data_handle_t data_handle)
															
 
																 {
															
 
																 	int mpi_rank = starpu_mpi_data_get_rank(data_handle);
															
 
																 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
															
@@ -198,7 +198,7 @@ void _starpu_mpi_cache_received_data_clear(starpu_data_handle_t data_handle)
 
																 	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
															
 
																 }
															
 
																-int _starpu_mpi_cache_received_data_set(starpu_data_handle_t data_handle)
															
 
																+int starpu_mpi_cached_receive_set(starpu_data_handle_t data_handle)
															
 
																 {
															
 
																 	int mpi_rank = starpu_mpi_data_get_rank(data_handle);
															
 
																 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
															
@@ -226,7 +226,7 @@ int _starpu_mpi_cache_received_data_set(starpu_data_handle_t data_handle)
 
																 	return already_received;
															
 
																 }
															
 
																-int _starpu_mpi_cache_received_data_get(starpu_data_handle_t data_handle)
															
 
																+int starpu_mpi_cached_receive(starpu_data_handle_t data_handle)
															
 
																 {
															
 
																 	int already_received;
															
 
																 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
															
@@ -241,15 +241,10 @@ int _starpu_mpi_cache_received_data_get(starpu_data_handle_t data_handle)
 
																 	return already_received;
															
 
																 }
															
 
																-int starpu_mpi_cached_receive(starpu_data_handle_t data_handle)
															
 
																-{
															
 
																-	return _starpu_mpi_cache_received_data_get(data_handle);
															
 
																-}
															
 
																-
															
 
																 /**************************************
															
 
																  * Send cache
															
 
																  **************************************/
															
 
																-void _starpu_mpi_cache_sent_data_clear(starpu_data_handle_t data_handle)
															
 
																+void starpu_mpi_cached_send_clear(starpu_data_handle_t data_handle)
															
 
																 {
															
 
																 	int n, size;
															
 
																 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
															
@@ -271,7 +266,7 @@ void _starpu_mpi_cache_sent_data_clear(starpu_data_handle_t data_handle)
 
																 	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
															
 
																 }
															
 
																-int _starpu_mpi_cache_sent_data_set(starpu_data_handle_t data_handle, int dest)
															
 
																+int starpu_mpi_cached_send_set(starpu_data_handle_t data_handle, int dest)
															
 
																 {
															
 
																 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
															
@@ -296,7 +291,7 @@ int _starpu_mpi_cache_sent_data_set(starpu_data_handle_t data_handle, int dest)
 
																 	return already_sent;
															
 
																 }
															
 
																-int _starpu_mpi_cache_sent_data_get(starpu_data_handle_t data_handle, int dest)
															
 
																+int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest)
															
 
																 {
															
 
																 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
															
 
																 	int already_sent;
															
@@ -311,11 +306,6 @@ int _starpu_mpi_cache_sent_data_get(starpu_data_handle_t data_handle, int dest)
 
																 	return already_sent;
															
 
																 }
															
 
																-int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest)
															
 
																-{
															
 
																-	return _starpu_mpi_cache_sent_data_get(data_handle, dest);
															
 
																-}
															
 
																-
															
 
																 static void _starpu_mpi_cache_flush_nolock(starpu_data_handle_t data_handle)
															
 
																 {
															
 
																 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
															
--- a/mpi/src/starpu_mpi_cache.h
+++ b/mpi/src/starpu_mpi_cache.h
@@ -32,22 +32,6 @@ void _starpu_mpi_cache_shutdown();
 
																 void _starpu_mpi_cache_data_init(starpu_data_handle_t data_handle);
															
 
																 void _starpu_mpi_cache_data_clear(starpu_data_handle_t data_handle);
															
 
																-/*
															
 
																- * If the data is already available in the cache, return a pointer to the data
															
 
																- * If the data is NOT available in the cache, add it to the cache and return NULL
															
 
																- */
															
 
																-int _starpu_mpi_cache_received_data_set(starpu_data_handle_t data);
															
 
																-int _starpu_mpi_cache_received_data_get(starpu_data_handle_t data);
															
 
																-void _starpu_mpi_cache_received_data_clear(starpu_data_handle_t data);
															
 
																-
															
 
																-/*
															
 
																- * If the data is already available in the cache, return a pointer to the data
															
 
																- * If the data is NOT available in the cache, add it to the cache and return NULL
															
 
																- */
															
 
																-int _starpu_mpi_cache_sent_data_set(starpu_data_handle_t data, int dest);
															
 
																-int _starpu_mpi_cache_sent_data_get(starpu_data_handle_t data, int dest);
															
 
																-void _starpu_mpi_cache_sent_data_clear(starpu_data_handle_t data);
															
 
																-
															
 
																 void _starpu_mpi_cache_flush(starpu_data_handle_t data_handle);
															
 
																 #ifdef __cplusplus
															
--- a/mpi/src/starpu_mpi_task_insert.c
+++ b/mpi/src/starpu_mpi_task_insert.c
@@ -112,7 +112,7 @@ void _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum
 
																 		if (do_execute && mpi_rank != STARPU_MPI_PER_NODE && mpi_rank != me)
															
 
																 		{
															
 
																 			/* The node is going to execute the codelet, but it does not own the data, it needs to receive the data from the owner node */
															
 
																-			int already_received = _starpu_mpi_cache_received_data_set(data);
															
 
																+			int already_received = starpu_mpi_cached_receive_set(data);
															
 
																 			if (already_received == 0)
															
 
																 			{
															
 
																 				if (data_tag == -1)
															
@@ -126,7 +126,7 @@ void _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum
 
																 		if (!do_execute && mpi_rank == me)
															
 
																 		{
															
 
																 			/* The node owns the data, but another node is going to execute the codelet, the node needs to send the data to the executee node. */
															
 
																-			int already_sent = _starpu_mpi_cache_sent_data_set(data, xrank);
															
 
																+			int already_sent = starpu_mpi_cached_send_set(data, xrank);
															
 
																 			if (already_sent == 0)
															
 
																 			{
															
 
																 				if (data_tag == -1)
															
@@ -182,8 +182,8 @@ void _starpu_mpi_clear_data_after_execution(starpu_data_handle_t data, enum star
 
																 		if (mode & STARPU_W || mode & STARPU_REDUX)
															
 
																 		{
															
 
																 			/* The data has been modified, it MUST be removed from the cache */
															
 
																-			_starpu_mpi_cache_sent_data_clear(data);
															
 
																-			_starpu_mpi_cache_received_data_clear(data);
															
 
																+			starpu_mpi_cached_send_clear(data);
															
 
																+			starpu_mpi_cached_receive_clear(data);
															
 
																 		}
															
 
																 	}
															
 
																 	else
															
--- a/mpi/tests/Makefile.am
+++ b/mpi/tests/Makefile.am
@@ -96,8 +96,6 @@ starpu_mpi_TESTS =
 
																 starpu_mpi_TESTS +=				\
															
 
																 	broadcast				\
															
 
																-	cache					\
															
 
																-	cache_disable				\
															
 
																 	callback				\
															
 
																 	driver					\
															
 
																 	early_request				\
															
@@ -192,8 +190,6 @@ noinst_PROGRAMS +=				\
 
																 	block_interface_pinned			\
															
 
																 	attr					\
															
 
																 	broadcast				\
															
 
																-	cache					\
															
 
																-	cache_disable				\
															
 
																 	callback				\
															
 
																 	matrix					\
															
 
																 	matrix2					\
															
--- a/src/core/dependencies/tags.c
+++ b/src/core/dependencies/tags.c
@@ -63,10 +63,21 @@ static struct _starpu_cg *create_cg_tag(unsigned ntags, struct _starpu_tag *tag)
 
																 	cg->ntags = ntags;
															
 
																 	cg->remaining = ntags;
															
 
																+#ifdef STARPU_DEBUG
															
 
																+	cg->ndeps = ntags;
															
 
																+	cg->deps = NULL;
															
 
																+	cg->done = NULL;
															
 
																+#endif
															
 
																 	cg->cg_type = STARPU_CG_TAG;
															
 
																 	cg->succ.tag = tag;
															
 
																 	tag->tag_successors.ndeps++;
															
 
																+#ifdef STARPU_DEBUG
															
 
																+	_STARPU_REALLOC(tag->tag_successors.deps, tag->tag_successors.ndeps * sizeof(tag->tag_successors.deps[0]));
															
 
																+	_STARPU_REALLOC(tag->tag_successors.done, tag->tag_successors.ndeps * sizeof(tag->tag_successors.done[0]));
															
 
																+	tag->tag_successors.deps[tag->tag_successors.ndeps-1] = cg;
															
 
																+	tag->tag_successors.done[tag->tag_successors.ndeps-1] = 0;
															
 
																+#endif
															
 
																 	return cg;
															
 
																 }
															
@@ -364,10 +375,20 @@ void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t
 
																 	struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child);
															
 
																 	_starpu_spin_unlock(&tag_child->lock);
															
 
																+#ifdef STARPU_DEBUG
															
 
																+	_STARPU_MALLOC(cg->deps, ndeps * sizeof(cg->deps[0]));
															
 
																+	_STARPU_MALLOC(cg->done, ndeps * sizeof(cg->done[0]));
															
 
																+#endif
															
 
																+
															
 
																 	for (i = 0; i < ndeps; i++)
															
 
																 	{
															
 
																 		starpu_tag_t dep_id = array[i];
															
 
																+#ifdef STARPU_DEBUG
															
 
																+		cg->deps[i] = (void*) (uintptr_t) dep_id;
															
 
																+		cg->done[i] = 0;
															
 
																+#endif
															
 
																+
															
 
																 		/* id depends on dep_id
															
 
																 		 * so cg should be among dep_id's successors*/
															
 
																 		_STARPU_TRACE_TAG_DEPS(id, dep_id);
															
--- a/src/core/simgrid.c
+++ b/src/core/simgrid.c
@@ -723,7 +723,7 @@ void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_j
 
																 	{
															
 
																 		length = starpu_task_worker_expected_length(starpu_task, workerid, sched_ctx_id, j->nimpl);
															
 
																 		STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length),
															
 
																-				  "Codelet %s does not have a perfmodel (in directory %s), or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated",
															
 
																+				  "Codelet %s does not have a perfmodel (in directory %s), or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated, or fix the STARPU_HOSTNAME and STARPU_PERF_MODEL_DIR environment variables",
															
 
																 				  _starpu_job_get_model_name(j), _starpu_get_perf_model_dir_codelet());
															
 
																                 /* TODO: option to add variance according to performance model,
															
 
																                  * to be able to easily check scheduling robustness */
															
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -288,8 +288,8 @@ void starpu_task_init(struct starpu_task *task)
 
																 	task->detach = 1;
															
 
																-#if STARPU_TASK_INVALID != 0
															
 
																-	task->status = STARPU_TASK_INVALID;
															
 
																+#if STARPU_TASK_INIT != 0
															
 
																+	task->status = STARPU_TASK_INIT;
															
 
																 #endif
															
 
																 	task->predicted = NAN;
															
@@ -766,9 +766,9 @@ static int _starpu_task_submit_head(struct starpu_task *task)
 
																 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																 	if (task->status == STARPU_TASK_STOPPED || task->status == STARPU_TASK_FINISHED)
															
 
																-		task->status = STARPU_TASK_INVALID;
															
 
																+		task->status = STARPU_TASK_INIT;
															
 
																 	else
															
 
																-		STARPU_ASSERT(task->status == STARPU_TASK_INVALID);
															
 
																+		STARPU_ASSERT(task->status == STARPU_TASK_INIT);
															
 
																 	if (j->internal)
															
 
																 	{
															
@@ -1067,7 +1067,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
																 	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task);
															
 
																 	_starpu_job_set_ordered_buffers(j);
															
 
																-	STARPU_ASSERT(task->status == STARPU_TASK_INVALID);
															
 
																+	STARPU_ASSERT(task->status == STARPU_TASK_INIT);
															
 
																 	task->status = STARPU_TASK_READY;
															
 
																 	_starpu_profiling_set_task_push_start_time(task);
															
@@ -1668,7 +1668,7 @@ struct starpu_task *starpu_task_ft_create_retry
 
																 	new_task->failed = 0;
															
 
																 	new_task->scheduled = 0;
															
 
																 	new_task->prefetched = 0;
															
 
																-	new_task->status = STARPU_TASK_INVALID;
															
 
																+	new_task->status = STARPU_TASK_INIT;
															
 
																 	new_task->profiling_info = NULL;
															
 
																 	new_task->prev = NULL;
															
 
																 	new_task->next = NULL;
															
--- a/src/core/task_bundle.c
+++ b/src/core/task_bundle.c
@@ -50,7 +50,7 @@ int starpu_task_bundle_insert(starpu_task_bundle_t bundle, struct starpu_task *t
 
																 		return -EPERM;
															
 
																 	}
															
 
																-	if (task->status != STARPU_TASK_INVALID)
															
 
																+	if (task->status != STARPU_TASK_INIT)
															
 
																 	{
															
 
																 		/* The task has already been submitted, it's too late to put it
															
 
																 		 * into a bundle now. */
															
--- a/src/datawizard/user_interactions.c
+++ b/src/datawizard/user_interactions.c
@@ -227,7 +227,7 @@ int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_dat
 
																 			*pre_sync_jobid = pre_sync_job->job_id;
															
 
																 		wrapper->post_sync_task = starpu_task_create();
															
 
																-		wrapper->post_sync_task->name = "_starpu_data_acquire_cb_post";
															
 
																+		wrapper->post_sync_task->name = "_starpu_data_acquire_cb_release";
															
 
																 		wrapper->post_sync_task->detach = 1;
															
 
																 		wrapper->post_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE;
															
 
																 		post_sync_job = _starpu_get_job_associated_to_task(wrapper->post_sync_task);
															
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -443,151 +443,6 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
																 	return ret;
															
 
																 }
															
 
																-/* TODO: factorize with dmda!! */
															
 
																-static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id)
															
 
																-{
															
 
																-	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																-	int best = -1;
															
 
																-
															
 
																-	double best_exp_end_of_task = 0.0;
															
 
																-	double model_best = 0.0;
															
 
																-	double transfer_model_best = 0.0;
															
 
																-
															
 
																-	int ntasks_best = -1;
															
 
																-	double ntasks_best_end = 0.0;
															
 
																-	int calibrating = 0;
															
 
																-
															
 
																-	/* A priori, we know all estimations */
															
 
																-	int unknown = 0;
															
 
																-
															
 
																-	unsigned best_impl = 0;
															
 
																-	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
															
 
																-
															
 
																-	struct starpu_sched_ctx_iterator it;
															
 
																-
															
 
																-	double now = starpu_timing_now();
															
 
																-
															
 
																-	workers->init_iterator_for_parallel_tasks(workers, &it, task);
															
 
																-	while(workers->has_next(workers, &it))
															
 
																-	{
															
 
																-		unsigned nimpl;
															
 
																-		unsigned impl_mask;
															
 
																-		unsigned worker = workers->get_next(workers, &it);
															
 
																-		struct _starpu_fifo_taskq *fifo  = &dt->queue_array[worker];
															
 
																-		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(worker, sched_ctx_id);
															
 
																-
															
 
																-		/* Sometimes workers didn't take the tasks as early as we expected */
															
 
																-		double exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now);
															
 
																-
															
 
																-		if (!starpu_worker_can_execute_task_impl(worker, task, &impl_mask))
															
 
																-			continue;
															
 
																-
															
 
																-		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
															
 
																-		{
															
 
																-			if (!(impl_mask & (1U << nimpl)))
															
 
																-			{
															
 
																-				/* no one on that queue may execute this task */
															
 
																-				continue;
															
 
																-			}
															
 
																-
															
 
																-			double exp_end;
															
 
																-			double local_length = starpu_task_worker_expected_length(task, worker, sched_ctx_id, nimpl);
															
 
																-			double local_penalty = starpu_task_expected_data_transfer_time_for(task, worker);
															
 
																-			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
															
 
																-
															
 
																-			//_STARPU_DEBUG("Scheduler dm: task length (%lf) worker (%u) kernel (%u) \n", local_length,worker,nimpl);
															
 
																-
															
 
																-			/*
															
 
																-			 * This implements a default greedy scheduler for the
															
 
																-			 * case of tasks which have no performance model, or
															
 
																-			 * whose performance model is not calibrated yet.
															
 
																-			 *
															
 
																-			 * It simply uses the number of tasks already pushed to
															
 
																-			 * the workers, divided by the relative performance of
															
 
																-			 * a CPU and of a GPU.
															
 
																-			 *
															
 
																-			 * This is always computed, but the ntasks_best
															
 
																-			 * selection is only really used if the task indeed has
															
 
																-			 * no performance model, or is not calibrated yet.
															
 
																-			 */
															
 
																-			if (ntasks_best == -1
															
 
																-
															
 
																-			    /* Always compute the greedy decision, at least for
															
 
																-			     * the tasks with no performance model. */
															
 
																-			    || (!calibrating && ntasks_end < ntasks_best_end)
															
 
																-
															
 
																-			    /* The performance model of this task is not
															
 
																-			     * calibrated on this worker, try to run it there
															
 
																-			     * to calibrate it there. */
															
 
																-			    || (!calibrating && isnan(local_length))
															
 
																-
															
 
																-			    /* the performance model of this task is not
															
 
																-			     * calibrated on this worker either, rather run it
															
 
																-			     * there if this one is low on scheduled tasks. */
															
 
																-			    || (calibrating && isnan(local_length) && ntasks_end < ntasks_best_end)
															
 
																-				)
															
 
																-			{
															
 
																-				ntasks_best_end = ntasks_end;
															
 
																-				ntasks_best = worker;
															
 
																-				best_impl = nimpl;
															
 
																-			}
															
 
																-
															
 
																-			if (isnan(local_length))
															
 
																-			{
															
 
																-				/* we are calibrating, we want to speed-up calibration time
															
 
																-				 * so we privilege non-calibrated tasks (but still
															
 
																-				 * greedily distribute them to avoid dumb schedules) */
															
 
																-				static int warned;
															
 
																-				if (!warned)
															
 
																-				{
															
 
																-					warned = 1;
															
 
																-					_STARPU_DISP("Warning: performance model for %s not finished calibrating on worker %u, using a dumb scheduling heuristic for now\n", starpu_task_get_name(task), worker);
															
 
																-				}
															
 
																-				calibrating = 1;
															
 
																-			}
															
 
																-
															
 
																-			if (isnan(local_length) || _STARPU_IS_ZERO(local_length))
															
 
																-				/* there is no prediction available for that task
															
 
																-				 * with that arch yet, so switch to a greedy strategy */
															
 
																-				unknown = 1;
															
 
																-
															
 
																-			if (unknown)
															
 
																-				continue;
															
 
																-
															
 
																-			exp_end = exp_start + fifo->exp_len + local_length;
															
 
																-
															
 
																-			if (best == -1 || exp_end < best_exp_end_of_task)
															
 
																-			{
															
 
																-				/* a better solution was found */
															
 
																-				best_exp_end_of_task = exp_end;
															
 
																-				best = worker;
															
 
																-				model_best = local_length;
															
 
																-				transfer_model_best = local_penalty;
															
 
																-				best_impl = nimpl;
															
 
																-			}
															
 
																-		}
															
 
																-	}
															
 
																-
															
 
																-	if (unknown)
															
 
																-	{
															
 
																-		best = ntasks_best;
															
 
																-		model_best = 0.0;
															
 
																-		transfer_model_best = 0.0;
															
 
																-#ifdef STARPU_VERBOSE
															
 
																-		dt->eager_task_cnt++;
															
 
																-#endif
															
 
																-	}
															
 
																-
															
 
																-	//_STARPU_DEBUG("Scheduler dm: kernel (%u)\n", best_impl);
															
 
																-
															
 
																-	starpu_task_set_implementation(task, best_impl);
															
 
																-
															
 
																-	starpu_sched_task_break(task);
															
 
																-	/* we should now have the best worker in variable "best" */
															
 
																-	return push_task_on_best_worker(task, best,
															
 
																-					model_best, transfer_model_best, prio, sched_ctx_id);
															
 
																-}
															
 
																-
															
 
																 /* TODO: factorise CPU computations, expensive with a lot of cores */
															
 
																 static void compute_all_performance_predictions(struct starpu_task *task,
															
 
																 						unsigned nworkers,
															
@@ -677,15 +532,19 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
																 			{
															
 
																 				/* TODO : conversion time */
															
 
																 				local_task_length[worker_ctx][nimpl] = starpu_task_bundle_expected_length(bundle, perf_arch, nimpl);
															
 
																-				local_data_penalty[worker_ctx][nimpl] = starpu_task_bundle_expected_data_transfer_time(bundle, memory_node);
															
 
																-				local_energy[worker_ctx][nimpl] = starpu_task_bundle_expected_energy(bundle, perf_arch,nimpl);
															
 
																+				if (local_data_penalty)
															
 
																+					local_data_penalty[worker_ctx][nimpl] = starpu_task_bundle_expected_data_transfer_time(bundle, memory_node);
															
 
																+				if (local_energy)
															
 
																+					local_energy[worker_ctx][nimpl] = starpu_task_bundle_expected_energy(bundle, perf_arch,nimpl);
															
 
																 			}
															
 
																 			else
															
 
																 			{
															
 
																 				local_task_length[worker_ctx][nimpl] = starpu_task_worker_expected_length(task, workerid, sched_ctx_id, nimpl);
															
 
																-				local_data_penalty[worker_ctx][nimpl] = starpu_task_expected_data_transfer_time_for(task, workerid);
															
 
																-				local_energy[worker_ctx][nimpl] = starpu_task_worker_expected_energy(task, workerid, sched_ctx_id,nimpl);
															
 
																+				if (local_data_penalty)
															
 
																+					local_data_penalty[worker_ctx][nimpl] = starpu_task_expected_data_transfer_time_for(task, workerid);
															
 
																+				if (local_energy)
															
 
																+					local_energy[worker_ctx][nimpl] = starpu_task_worker_expected_energy(task, workerid, sched_ctx_id,nimpl);
															
 
																 				double conversion_time = starpu_task_expected_conversion_time(task, perf_arch, nimpl);
															
 
																 				if (conversion_time > 0.0)
															
 
																 					local_task_length[worker_ctx][nimpl] += conversion_time;
															
@@ -742,7 +601,10 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
																 			if (unknown)
															
 
																 				continue;
															
 
																-			double task_starting_time = STARPU_MAX(exp_start + prev_exp_len, now + local_data_penalty[worker_ctx][nimpl]); 
															
 
																+			double task_starting_time = exp_start + prev_exp_len;
															
 
																+			if (local_data_penalty)
															
 
																+				task_starting_time = STARPU_MAX(task_starting_time,
															
 
																+					now + local_data_penalty[worker_ctx][nimpl]);
															
 
																 			exp_end[worker_ctx][nimpl] = task_starting_time + local_task_length[worker_ctx][nimpl];
															
@@ -753,8 +615,9 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
																 				nimpl_best = nimpl;
															
 
																 			}
															
 
																-			if (isnan(local_energy[worker_ctx][nimpl]))
															
 
																-				local_energy[worker_ctx][nimpl] = 0.;
															
 
																+			if (local_energy)
															
 
																+				if (isnan(local_energy[worker_ctx][nimpl]))
															
 
																+					local_energy[worker_ctx][nimpl] = 0.;
															
 
																 		}
															
 
																 		worker_ctx++;
															
@@ -774,7 +637,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
																 	*max_exp_endp_of_workers = max_exp_end_of_workers;
															
 
																 }
															
 
																-static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id, unsigned simulate, unsigned sorted_decision)
															
 
																+static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id, unsigned da, unsigned simulate, unsigned sorted_decision)
															
 
																 {
															
 
																 	/* find the queue */
															
 
																 	int best = -1, best_in_ctx = -1;
															
@@ -812,8 +675,8 @@ static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned
 
																 					    exp_end,
															
 
																 					    &max_exp_end_of_workers,
															
 
																 					    &min_exp_end_of_task,
															
 
																-					    local_data_penalty,
															
 
																-					    local_energy,
															
 
																+					    da ? local_data_penalty : NULL,
															
 
																+					    da ? local_energy : NULL,
															
 
																 					    &forced_best,
															
 
																 					    &forced_impl, sched_ctx_id, sorted_decision);
															
@@ -840,11 +703,14 @@ static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned
 
																 					/* no one on that queue may execute this task */
															
 
																 					continue;
															
 
																 				}
															
 
																-				fitness[worker_ctx][nimpl] = dt->alpha * __s_alpha__value *(exp_end[worker_ctx][nimpl] - min_exp_end_of_task)
															
 
																-					+ dt->beta * __s_beta__value *(local_data_penalty[worker_ctx][nimpl])
															
 
																-					+ dt->_gamma * __s_gamma__value *(local_energy[worker_ctx][nimpl]);
															
 
																+				if (da)
															
 
																+					fitness[worker_ctx][nimpl] = dt->alpha * __s_alpha__value *(exp_end[worker_ctx][nimpl] - min_exp_end_of_task)
															
 
																+						+ dt->beta * __s_beta__value *(local_data_penalty[worker_ctx][nimpl])
															
 
																+						+ dt->_gamma * __s_gamma__value *(local_energy[worker_ctx][nimpl]);
															
 
																+				else
															
 
																+					fitness[worker_ctx][nimpl] = exp_end[worker_ctx][nimpl] - min_exp_end_of_task;
															
 
																-				if (exp_end[worker_ctx][nimpl] > max_exp_end_of_workers)
															
 
																+				if (da && exp_end[worker_ctx][nimpl] > max_exp_end_of_workers)
															
 
																 				{
															
 
																 					/* This placement will make the computation
															
 
																 					 * longer, take into account the idle
															
@@ -886,15 +752,17 @@ static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned
 
																 		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(best_in_ctx, sched_ctx_id);
															
 
																 		unsigned memory_node = starpu_worker_get_memory_node(best);
															
 
																 		model_best = starpu_task_expected_length(task, perf_arch, selected_impl);
															
 
																-		transfer_model_best = starpu_task_expected_data_transfer_time(memory_node, task);
															
 
																+		if (da)
															
 
																+			transfer_model_best = starpu_task_expected_data_transfer_time(memory_node, task);
															
 
																 	}
															
 
																 	else
															
 
																 	{
															
 
																 		model_best = local_task_length[best_in_ctx][selected_impl];
															
 
																-		transfer_model_best = local_data_penalty[best_in_ctx][selected_impl];
															
 
																+		if (da)
															
 
																+			transfer_model_best = local_data_penalty[best_in_ctx][selected_impl];
															
 
																 	}
															
 
																-	//_STARPU_DEBUG("Scheduler dmda: kernel (%u)\n", best_impl);
															
 
																+	//_STARPU_DEBUG("Scheduler dmda: kernel (%u)\n", selected_impl);
															
 
																 	starpu_task_set_implementation(task, selected_impl);
															
 
																 	starpu_sched_task_break(task);
															
@@ -911,7 +779,7 @@ static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned
 
																 static int dmda_push_sorted_decision_task(struct starpu_task *task)
															
 
																 {
															
 
																-	return _dmda_push_task(task, 1, task->sched_ctx, 0, 1);
															
 
																+	return _dmda_push_task(task, 1, task->sched_ctx, 1, 0, 1);
															
 
																 }
															
 
																 static int dmda_push_sorted_task(struct starpu_task *task)
															
@@ -919,35 +787,40 @@ static int dmda_push_sorted_task(struct starpu_task *task)
 
																 #ifdef STARPU_DEVEL
															
 
																 #warning TODO: after defining a scheduling window, use that instead of empty_ctx_tasks
															
 
																 #endif
															
 
																-	return _dmda_push_task(task, 1, task->sched_ctx, 0, 0);
															
 
																+	return _dmda_push_task(task, 1, task->sched_ctx, 1, 0, 0);
															
 
																 }
															
 
																 static int dm_push_task(struct starpu_task *task)
															
 
																 {
															
 
																-	return _dm_push_task(task, 0, task->sched_ctx);
															
 
																+	return _dmda_push_task(task, 0, task->sched_ctx, 0, 0, 0);
															
 
																+}
															
 
																+
															
 
																+static double dm_simulate_push_task(struct starpu_task *task)
															
 
																+{
															
 
																+	return _dmda_push_task(task, 0, task->sched_ctx, 0, 1, 0);
															
 
																 }
															
 
																 static int dmda_push_task(struct starpu_task *task)
															
 
																 {
															
 
																 	STARPU_ASSERT(task);
															
 
																-	return _dmda_push_task(task, 0, task->sched_ctx, 0, 0);
															
 
																+	return _dmda_push_task(task, 0, task->sched_ctx, 1, 0, 0);
															
 
																 }
															
 
																 static double dmda_simulate_push_task(struct starpu_task *task)
															
 
																 {
															
 
																 	STARPU_ASSERT(task);
															
 
																-	return _dmda_push_task(task, 0, task->sched_ctx, 1, 0);
															
 
																+	return _dmda_push_task(task, 0, task->sched_ctx, 1, 1, 0);
															
 
																 }
															
 
																 static double dmda_simulate_push_sorted_task(struct starpu_task *task)
															
 
																 {
															
 
																 	STARPU_ASSERT(task);
															
 
																-	return _dmda_push_task(task, 1, task->sched_ctx, 1, 0);
															
 
																+	return _dmda_push_task(task, 1, task->sched_ctx, 1, 1, 0);
															
 
																 }
															
 
																 static double dmda_simulate_push_sorted_decision_task(struct starpu_task *task)
															
 
																 {
															
 
																 	STARPU_ASSERT(task);
															
 
																-	return _dmda_push_task(task, 1, task->sched_ctx, 1, 1);
															
 
																+	return _dmda_push_task(task, 1, task->sched_ctx, 1, 1, 1);
															
 
																 }
															
 
																 #ifdef NOTIFY_READY_SOON
															
@@ -1092,7 +965,7 @@ static void dmda_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id)
 
																 	starpu_worker_unlock_self();
															
 
																 }
															
 
																-static void dmda_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id)
															
 
																+static void _dm_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id, int da)
															
 
																 {
															
 
																 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																 	struct _starpu_fifo_taskq *fifo = &dt->queue_array[workerid];
															
@@ -1100,8 +973,11 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, int pe
 
																 	/* Compute the expected penality */
															
 
																 	double predicted = starpu_task_worker_expected_length(task, perf_workerid, STARPU_NMAX_SCHED_CTXS,
															
 
																 						       starpu_task_get_implementation(task));
															
 
																+	double predicted_transfer = NAN;
															
 
																+
															
 
																+	if (da)
															
 
																+		predicted_transfer = starpu_task_expected_data_transfer_time_for(task, workerid);
															
 
																-	double predicted_transfer = starpu_task_expected_data_transfer_time_for(task, workerid);
															
 
																 	double now = starpu_timing_now();
															
 
																 	/* Update the predictions */
															
@@ -1110,32 +986,35 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, int pe
 
																 	fifo->exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now);
															
 
																 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
															
 
																-	/* If there is no prediction available, we consider the task has a null length */
															
 
																-	if (!isnan(predicted_transfer))
															
 
																+	if (da)
															
 
																 	{
															
 
																-		if (now + predicted_transfer < fifo->exp_end)
															
 
																+		/* If there is no prediction available, we consider the task has a null length */
															
 
																+		if (!isnan(predicted_transfer))
															
 
																 		{
															
 
																-			/* We may hope that the transfer will be finished by
															
 
																-			 * the start of the task. */
															
 
																-			predicted_transfer = 0;
															
 
																-		}
															
 
																-		else
															
 
																-		{
															
 
																-			/* The transfer will not be finished by then, take the
															
 
																-			 * remainder into account */
															
 
																-			predicted_transfer = (now + predicted_transfer) - fifo->exp_end;
															
 
																-		}
															
 
																-		task->predicted_transfer = predicted_transfer;
															
 
																-		fifo->exp_end += predicted_transfer;
															
 
																-		fifo->exp_len += predicted_transfer;
															
 
																-		if(dt->num_priorities != -1)
															
 
																-		{
															
 
																-			int i;
															
 
																-			int task_prio = _starpu_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx);
															
 
																-			for(i = 0; i <= task_prio; i++)
															
 
																-				fifo->exp_len_per_priority[i] += predicted_transfer;
															
 
																-		}
															
 
																+			if (now + predicted_transfer < fifo->exp_end)
															
 
																+			{
															
 
																+				/* We may hope that the transfer will be finished by
															
 
																+				 * the start of the task. */
															
 
																+				predicted_transfer = 0;
															
 
																+			}
															
 
																+			else
															
 
																+			{
															
 
																+				/* The transfer will not be finished by then, take the
															
 
																+				 * remainder into account */
															
 
																+				predicted_transfer = (now + predicted_transfer) - fifo->exp_end;
															
 
																+			}
															
 
																+			task->predicted_transfer = predicted_transfer;
															
 
																+			fifo->exp_end += predicted_transfer;
															
 
																+			fifo->exp_len += predicted_transfer;
															
 
																+			if(dt->num_priorities != -1)
															
 
																+			{
															
 
																+				int i;
															
 
																+				int task_prio = _starpu_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx);
															
 
																+				for(i = 0; i <= task_prio; i++)
															
 
																+					fifo->exp_len_per_priority[i] += predicted_transfer;
															
 
																+			}
															
 
																+		}
															
 
																 	}
															
 
																 	/* If there is no prediction available, we consider the task has a null length */
															
@@ -1166,6 +1045,16 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, int pe
 
																 	starpu_worker_unlock(workerid);
															
 
																 }
															
 
																+static void dm_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id)
															
 
																+{
															
 
																+	_dm_push_task_notify(task, workerid, perf_workerid, sched_ctx_id, 0);
															
 
																+}
															
 
																+
															
 
																+static void dmda_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id)
															
 
																+{
															
 
																+	_dm_push_task_notify(task, workerid, perf_workerid, sched_ctx_id, 1);
															
 
																+}
															
 
																+
															
 
																 static void dmda_post_exec_hook(struct starpu_task * task, unsigned sched_ctx_id)
															
 
																 {
															
 
																 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
@@ -1183,7 +1072,8 @@ struct starpu_sched_policy _starpu_sched_dm_policy =
 
																 	.add_workers = dmda_add_workers ,
															
 
																 	.remove_workers = dmda_remove_workers,
															
 
																 	.push_task = dm_push_task,
															
 
																-	.simulate_push_task = NULL,
															
 
																+	.simulate_push_task = dm_simulate_push_task,
															
 
																+	.push_task_notify = dm_push_task_notify,
															
 
																 	.pop_task = dmda_pop_task,
															
 
																 	.pre_exec_hook = dmda_pre_exec_hook,
															
 
																 	.post_exec_hook = dmda_post_exec_hook,
															
--- a/tools/gdbinit
+++ b/tools/gdbinit
@@ -40,7 +40,7 @@ define starpu-print-task
 
																   set $job = (struct _starpu_job *)$task->starpu_private
															
 
																   set $status=0
															
 
																   if $task->status == 0
															
 
																-    set $status="STARPU_TASK_INVALID"
															
 
																+    set $status="STARPU_TASK_INIT"
															
 
																   end
															
 
																   if $task->status == 1
															
 
																     set $status="STARPU_TASK_BLOCKED"
															
@@ -98,10 +98,11 @@ define starpu-print-task
 
																       if ! $job->job_successors->done[$n]
															
 
																         set $cg = $job->job_successors->deps[$n]
															
 
																         set $m = 0
															
 
																+        printf "\t\tcg:\t\t\t<%u>\n", $cg->ndeps
															
 
																 	while $m < $cg->ndeps
															
 
																 	  if ! $cg->done[$m]
															
 
																 	    set $depj = (struct _starpu_job *) $cg->deps[$m]
															
 
																-            printf "\t\ttask %p\n", $depj->task
															
 
																+            printf "\t\t\ttask %p\n", $depj->task
															
 
																 	  end
															
 
																 	  set $m = $m + 1
															
 
																 	end
															
@@ -219,7 +220,31 @@ define starpu-print-tag
 
																   if $tag_struct->state == STARPU_DONE
															
 
																      set $status="STARPU_DONE"
															
 
																   end
															
 
																-  printf "tag %d state %s\n", $arg0, $status
															
 
																+  printf "tag %d ((struct _starpu_tag *) %p)\n", $arg0, $tag_struct
															
 
																+  printf "\tstate %s\n", $status
															
 
																+  printf "\tdeps %d\n", $tag_struct->tag_successors.ndeps
															
 
																+  printf "\tcompleted %d\n", $tag_struct->tag_successors.ndeps_completed
															
 
																+  printf "\tndeps_remaining:\t\t<%u>\n", $tag_struct->tag_successors->ndeps - $tag_struct->tag_successors->ndeps_completed
															
 
																+  if _starpu_debug
															
 
																+    set $n = 0
															
 
																+    while $n < $tag_struct->tag_successors->ndeps
															
 
																+      if ! $tag_struct->tag_successors->done[$n]
															
 
																+        set $cg = $tag_struct->tag_successors->deps[$n]
															
 
																+        set $m = 0
															
 
																+        printf "\t\tcg:\t\t\t<%u>\n", $cg->ndeps
															
 
																+	while $m < $cg->ndeps
															
 
																+	  if ! $cg->done[$m]
															
 
																+	    set $dept = (starpu_tag_t) $cg->deps[$m]
															
 
																+            printf "\t\t\ttag %u\n", $dept
															
 
																+	  end
															
 
																+	  set $m = $m + 1
															
 
																+	end
															
 
																+      end
															
 
																+      set $n = $n + 1
															
 
																+    end
															
 
																+  end
															
 
																+  printf "\tndeps_completed:\t\t<%u>\n", $tag_struct->tag_successors->ndeps_completed
															
 
																+  printf "\tnsuccs:\t\t\t\t<%u>\n", $tag_struct->tag_successors->nsuccs
															
 
																 end
															
 
																 define starpu-tags