Parcourir la source

Merge branch 'master' into fpga

Nathalie Furmento il y a 5 ans
Parent
commit
1849722415

+ 14 - 4
ChangeLog

@@ -44,11 +44,16 @@ New features:
   * Add starpu_data_dup_ro().
   * Add starpu_data_dup_ro().
 
 
 Small changes:
 Small changes:
-  * Use the S4U interface of Simgrid instead of xbt and MSG.
   * Add a synthetic energy efficiency testcase.
   * Add a synthetic energy efficiency testcase.
 
 
-StarPU 1.3.4 (git revision xxx)
-==============================================
+StarPU 1.3.5 (git revision xxx)
+====================================================================
+
+Small changes:
+  * Move MPI cache functions into the public API
+
+StarPU 1.3.4 (git revision c37a5d024cd997596da41f765557c58099baf896)
+====================================================================
 
 
 Small features:
 Small features:
   * New environment variables STARPU_BUS_STATS_FILE and
   * New environment variables STARPU_BUS_STATS_FILE and
@@ -69,12 +74,17 @@ Small features:
   * Add field starpu_conf::precedence_over_environment_variables to ignore
   * Add field starpu_conf::precedence_over_environment_variables to ignore
     environment variables when parameters are set directly in starpu_conf
     environment variables when parameters are set directly in starpu_conf
   * Add starpu_data_get_coordinates_array
   * Add starpu_data_get_coordinates_array
+  * MPI: new functions starpu_mpi_interface_datatype_register() and
+    starpu_mpi_interface_datatype_unregister() which take a enum
+    starpu_data_interface_id instead of a starpu_data_handle_t
+  * New script starpu_env to set up StarPU environment variables
   * New STARPU_BACKOFF_MIN and STARPU_BACKOFF_MAX environment variables to the
   * New STARPU_BACKOFF_MIN and STARPU_BACKOFF_MAX environment variables to the
     exponential backoff limits of the number of cycles to pause while drivers
     exponential backoff limits of the number of cycles to pause while drivers
     are spinning.
     are spinning.
   * Add STARPU_DISPLAY_BINDINGS environment variable and
   * Add STARPU_DISPLAY_BINDINGS environment variable and
     starpu_display_bindings() function to display all bindings on the machine by
     starpu_display_bindings() function to display all bindings on the machine by
     calling hwloc-ps
     calling hwloc-ps
+
 Small changes:
 Small changes:
   * New configure option --disable-build-doc-pdf
   * New configure option --disable-build-doc-pdf
 
 
@@ -116,7 +126,7 @@ Small features:
     STARPU_TASK_PROFILING_INFO
     STARPU_TASK_PROFILING_INFO
   * New function starpu_create_callback_task() which creates and
   * New function starpu_create_callback_task() which creates and
     submits an empty task with the specified callback
     submits an empty task with the specified callback
-
+  * Use the S4U interface of Simgrid instead of xbt and MSG.
 
 
 Small changes:
 Small changes:
    * Default modular worker queues to 2 tasks unless it's an heft
    * Default modular worker queues to 2 tasks unless it's an heft

+ 1 - 1
Makefile.am

@@ -199,7 +199,7 @@ ctags-local:
 # The pmccabe tool, see <http://www.parisc-linux.org/~bame/pmccabe/>.
 # The pmccabe tool, see <http://www.parisc-linux.org/~bame/pmccabe/>.
 PMCCABE = pmccabe
 PMCCABE = pmccabe
 
 
-VC_URL = "https://gforge.inria.fr/scm/viewvc.php/trunk/%FILENAME%?view=markup&root=starpu"
+VC_URL = "https://gitlab.inria.fr/starpu/starpu/-/blob/master/%FILENAME%"
 
 
 # Generate a cyclomatic complexity report.  Note that examples and tests are
 # Generate a cyclomatic complexity report.  Note that examples and tests are
 # excluded because they're not particularly relevant, and more importantly
 # excluded because they're not particularly relevant, and more importantly

+ 3 - 7
README

@@ -87,15 +87,11 @@ advantage of their specificities in a portable fashion.
 || III. Getting StarPU ||
 || III. Getting StarPU ||
 ++=====================++
 ++=====================++
 
 
-StarPU is available on https://gforge.inria.fr/projects/starpu/.
+StarPU is available on https://gitlab.inria.fr/starpu/starpu
 
 
-The project's SVN repository can be checked out through anonymous
-access with the following command(s).
+The GIT repository access can be checked out with the following command.
 
 
-$ svn checkout svn://scm.gforge.inria.fr/svn/starpu/trunk
-$ svn checkout --username anonsvn https://scm.gforge.inria.fr/svn/starpu/trunk
-
-The password is 'anonsvn'
+$ git clone git@gitlab.inria.fr:starpu/starpu.git
 
 
 ++=============================++
 ++=============================++
 || IV. Building and Installing ||
 || IV. Building and Installing ||

+ 1 - 1
configure.ac

@@ -18,7 +18,7 @@
 #
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
 #
-AC_INIT([StarPU], [1.3.99], [starpu-devel@lists.gforge.inria.fr], [starpu], [http://starpu.gforge.inria.fr/])
+AC_INIT([StarPU], [1.3.99], [starpu-devel@lists.gforge.inria.fr], [starpu], [http://gitlab.inria.fr/starpu/starpu])
 AC_CONFIG_SRCDIR(include/starpu.h)
 AC_CONFIG_SRCDIR(include/starpu.h)
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CONFIG_AUX_DIR([build-aux])
 
 

+ 1 - 1
contrib/ci.inria.fr/disabled/Jenkinsfile-basic

@@ -24,7 +24,7 @@ pipeline
 	// Trigger the build
 	// Trigger the build
 	triggers
 	triggers
 	{
 	{
-		// Poll gforge explicitly every hour
+		// Poll SCM explicitly every hour
 		pollSCM('0 * * * *')
 		pollSCM('0 * * * *')
 	}
 	}
 
 

+ 1 - 1
contrib/ci.inria.fr/disabled/Jenkinsfile-bsd

@@ -24,7 +24,7 @@ pipeline
 	// Trigger the build
 	// Trigger the build
 	triggers
 	triggers
 	{
 	{
-		// Poll gforge explicitly every past-half hour
+		// Poll SCM explicitly every past-half hour
 		pollSCM('30 * * * *')
 		pollSCM('30 * * * *')
 	}
 	}
 
 

+ 3 - 3
doc/doxygen/Makefile.am

@@ -304,8 +304,8 @@ endif
 EXTRA_DIST += doxygen.cfg refman.tex \
 EXTRA_DIST += doxygen.cfg refman.tex \
 	      $(chapters) $(images)
 	      $(chapters) $(images)
 
 
-# Rule to update documentation on web server. Should only be used locally.
-PUBLISHHOST	?= gforge
+# Rule to update documentation on web server. Should only be called from benchmarks dalton directory
+PUBLISHDIR	?= /home/benchmarks/softs/starpu/starpu-scripts/mirror/files/doc
 update-web: $(DOX_PDF)
 update-web: $(DOX_PDF)
-	scp -pr starpu.pdf html $(PUBLISHHOST):/home/groups/starpu/htdocs/files/doc
+	cp -pr starpu.pdf html $(PUBLISHDIR)
 
 

+ 3 - 3
doc/doxygen/chapters/000_introduction.doxy

@@ -77,9 +77,9 @@ policies in a portable fashion (\ref HowToDefineANewSchedulingPolicy).
 The remainder of this section describes the main concepts used in StarPU.
 The remainder of this section describes the main concepts used in StarPU.
 
 
 A video is available on the StarPU website
 A video is available on the StarPU website
-http://starpu.gforge.inria.fr/ that presents these concepts in 26 minutes.
+https://starpu.gitlabpages.inria.fr/ that presents these concepts in 26 minutes.
 
 
-Some tutorials are also available on http://starpu.gforge.inria.fr/tutorials/
+Some tutorials are also available on https://starpu.gitlabpages.inria.fr/tutorials/
 
 
 // explain the notion of codelet and task (i.e. g(A, B)
 // explain the notion of codelet and task (i.e. g(A, B)
 
 
@@ -190,7 +190,7 @@ unregister it.
 \section ResearchPapers Research Papers
 \section ResearchPapers Research Papers
 
 
 Research papers about StarPU can be found at
 Research papers about StarPU can be found at
-http://starpu.gforge.inria.fr/publications/.
+https://starpu.gitlabpages.inria.fr/publications/.
 
 
 A good overview is available in the research report at
 A good overview is available in the research report at
 http://hal.archives-ouvertes.fr/inria-00467677.
 http://hal.archives-ouvertes.fr/inria-00467677.

+ 6 - 11
doc/doxygen/chapters/101_building.doxy

@@ -61,27 +61,22 @@ script <c>configure</c>.
 \subsection GettingSources Getting Sources
 \subsection GettingSources Getting Sources
 
 
 StarPU's sources can be obtained from the download page of
 StarPU's sources can be obtained from the download page of
-the StarPU website (http://starpu.gforge.inria.fr/files/).
+the StarPU website (https://starpu.gitlabpages.inria.fr/files/).
 
 
 All releases and the development tree of StarPU are freely available
 All releases and the development tree of StarPU are freely available
-on Inria's gforge under the LGPL license. Some releases are available
+on StarPU SCM server under the LGPL license. Some releases are available
 under the BSD license.
 under the BSD license.
 
 
-The latest release can be downloaded from the Inria's gforge (http://gforge.inria.fr/frs/?group_id=1570) or
-directly from the StarPU download page (http://starpu.gforge.inria.fr/files/).
+The latest release can be downloaded from the StarPU download page (https://starpu.gitlabpages.inria.fr/files/).
 
 
-The latest nightly snapshot can be downloaded from the StarPU gforge website (http://starpu.gforge.inria.fr/testing/).
-
-\verbatim
-$ wget http://starpu.gforge.inria.fr/testing/starpu-nightly-latest.tar.gz
-\endverbatim
+The latest nightly snapshot can be downloaded from the StarPU website (https://starpu.gitlabpages.inria.fr/files/testing/).
 
 
 And finally, current development version is also accessible via git.
 And finally, current development version is also accessible via git.
 It should only be used if you need the very latest changes (i.e. less
 It should only be used if you need the very latest changes (i.e. less
 than a day old!).
 than a day old!).
 
 
 \verbatim
 \verbatim
-$ git clone https://scm.gforge.inria.fr/anonscm/git/starpu/starpu.git
+$ git clone git@gitlab.inria.fr:starpu/starpu.git
 \endverbatim
 \endverbatim
 
 
 \subsection ConfiguringStarPU Configuring StarPU
 \subsection ConfiguringStarPU Configuring StarPU
@@ -139,7 +134,7 @@ $ make
 Once everything is built, you may want to test the result. An
 Once everything is built, you may want to test the result. An
 extensive set of regression tests is provided with StarPU. Running the
 extensive set of regression tests is provided with StarPU. Running the
 tests is done by calling <c>make check</c>. These tests are run every night
 tests is done by calling <c>make check</c>. These tests are run every night
-and the result from the main profile is publicly available (http://starpu.gforge.inria.fr/testing/master/).
+and the result from the main profile is publicly available (https://starpu.gitlabpages/files/testing/master/).
 
 
 \verbatim
 \verbatim
 $ make check
 $ make check

+ 10 - 2
doc/doxygen/chapters/410_mpi_support.doxy

@@ -759,8 +759,16 @@ add fine-graph starpu_mpi_cache_flush() calls during the algorithm; the effect
 for the data deallocation will be the same, but it will additionally release some
 for the data deallocation will be the same, but it will additionally release some
 pressure from the StarPU-MPI cache hash table during task submission.
 pressure from the StarPU-MPI cache hash table during task submission.
 
 
-One can determine whether a piece of is cached with starpu_mpi_cached_receive()
-and starpu_mpi_cached_send().
+One can determine whether a piece of data is cached with
+starpu_mpi_cached_receive() and starpu_mpi_cached_send().
+
+Functions starpu_mpi_cached_receive_set() and
+starpu_mpi_cached_send_set() are automatically called by
+starpu_mpi_task_insert() but can also be called directly by the
+application. Functions starpu_mpi_cached_send_clear() and
+starpu_mpi_cached_receive_clear() must be called to clear data from
+the cache. They are also automatically called when using
+starpu_mpi_task_insert().
 
 
 The whole caching behavior can be disabled thanks to the \ref STARPU_MPI_CACHE
 The whole caching behavior can be disabled thanks to the \ref STARPU_MPI_CACHE
 environment variable. The variable \ref STARPU_MPI_CACHE_STATS can be set to <c>1</c>
 environment variable. The variable \ref STARPU_MPI_CACHE_STATS can be set to <c>1</c>

+ 3 - 3
doc/doxygen_dev/Makefile.am

@@ -245,8 +245,8 @@ endif
 EXTRA_DIST += doxygen.cfg refman.tex \
 EXTRA_DIST += doxygen.cfg refman.tex \
 	      $(chapters) $(images)
 	      $(chapters) $(images)
 
 
-# Rule to update documentation on web server. Should only be used locally.
-PUBLISHHOST	?= gforge
+# Rule to update documentation on web server. Should only be called from benchmarks dalton directory
+PUBLISHDIR	?= /home/benchmarks/softs/starpu/starpu-scripts/mirror/files/doc
 update-web: $(DOX_PDF)
 update-web: $(DOX_PDF)
-	scp -pr starpu_dev.pdf html_dev $(PUBLISHHOST):/home/groups/starpu/htdocs/files/doc
+	cp -pr starpu_dev.pdf html_dev $(PUBLISHDIR)
 
 

+ 4 - 3
include/starpu_task.h

@@ -151,8 +151,9 @@ enum starpu_codelet_type
 
 
 enum starpu_task_status
 enum starpu_task_status
 {
 {
-	STARPU_TASK_INVALID,     /**< The task has just been initialized. */
-#define STARPU_TASK_INVALID 0
+	STARPU_TASK_INIT,        /**< The task has just been initialized. */
+#define STARPU_TASK_INIT 0
+#define STARPU_TASK_INVALID STARPU_TASK_INIT  /**< old name for STARPU_TASK_INIT */
 	STARPU_TASK_BLOCKED,     /**< The task has just been
 	STARPU_TASK_BLOCKED,     /**< The task has just been
 				    submitted, and its dependencies has not been checked yet. */
 				    submitted, and its dependencies has not been checked yet. */
 	STARPU_TASK_READY,       /**< The task is ready for execution. */
 	STARPU_TASK_READY,       /**< The task is ready for execution. */
@@ -1295,7 +1296,7 @@ struct starpu_task
 	.detach = 1,					\
 	.detach = 1,					\
 	.destroy = 0,					\
 	.destroy = 0,					\
 	.regenerate = 0,				\
 	.regenerate = 0,				\
-	.status = STARPU_TASK_INVALID,			\
+	.status = STARPU_TASK_INIT,			\
 	.profiling_info = NULL,				\
 	.profiling_info = NULL,				\
 	.predicted = NAN,				\
 	.predicted = NAN,				\
 	.predicted_transfer = NAN,			\
 	.predicted_transfer = NAN,			\

+ 11 - 0
mpi/examples/Makefile.am

@@ -128,6 +128,17 @@ starpu_mpi_EXAMPLES	+=	\
 endif
 endif
 
 
 ##################
 ##################
+# Cache examples #
+##################
+examplebin_PROGRAMS +=		\
+	cache/cache		\
+	cache/cache_disable
+starpu_mpi_EXAMPLES +=		\
+	cache/cache		\
+	cache/cache_disable
+
+
+##################
 # MPI LU example #
 # MPI LU example #
 ##################
 ##################
 
 

+ 1 - 2
mpi/tests/cache.c

@@ -17,7 +17,6 @@
 #include <starpu_mpi.h>
 #include <starpu_mpi.h>
 #include <math.h>
 #include <math.h>
 #include "helper.h"
 #include "helper.h"
-#include <starpu_mpi_cache.h>
 
 
 void func_cpu(void *descr[], void *_args)
 void func_cpu(void *descr[], void *_args)
 {
 {
@@ -57,7 +56,7 @@ void test(struct starpu_codelet *codelet, enum starpu_data_access_mode mode, sta
 	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, codelet, mode, data, STARPU_EXECUTE_ON_NODE, 1, 0);
 	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, codelet, mode, data, STARPU_EXECUTE_ON_NODE, 1, 0);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 
 
-	cache = _starpu_mpi_cache_received_data_get(data);
+	cache = starpu_mpi_cached_receive(data);
 
 
 	if (rank == 1)
 	if (rank == 1)
 	{
 	{

+ 3 - 4
mpi/tests/cache_disable.c

@@ -17,7 +17,6 @@
 #include <starpu_mpi.h>
 #include <starpu_mpi.h>
 #include <math.h>
 #include <math.h>
 #include "helper.h"
 #include "helper.h"
-#include <starpu_mpi_cache.h>
 
 
 void func_cpu(void *descr[], void *_args)
 void func_cpu(void *descr[], void *_args)
 {
 {
@@ -63,7 +62,7 @@ int main(int argc, char **argv)
 	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0);
 	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 
 
-	in_cache = _starpu_mpi_cache_received_data_get(data);
+	in_cache = starpu_mpi_cached_receive(data);
 	if (rank == 1)
 	if (rank == 1)
 	{
 	{
 		STARPU_ASSERT_MSG(in_cache == 1, "Data should be in cache\n");
 		STARPU_ASSERT_MSG(in_cache == 1, "Data should be in cache\n");
@@ -73,7 +72,7 @@ int main(int argc, char **argv)
 	starpu_mpi_cache_set(0);
 	starpu_mpi_cache_set(0);
 
 
 	// We check the data is no longer in the cache
 	// We check the data is no longer in the cache
-	in_cache = _starpu_mpi_cache_received_data_get(data);
+	in_cache = starpu_mpi_cached_receive(data);
 	if (rank == 1)
 	if (rank == 1)
 	{
 	{
 		STARPU_ASSERT_MSG(in_cache == 0, "Data should NOT be in cache\n");
 		STARPU_ASSERT_MSG(in_cache == 0, "Data should NOT be in cache\n");
@@ -81,7 +80,7 @@ int main(int argc, char **argv)
 
 
 	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0);
 	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
-	in_cache = _starpu_mpi_cache_received_data_get(data);
+	in_cache = starpu_mpi_cached_receive(data);
 	if (rank == 1)
 	if (rank == 1)
 	{
 	{
 		STARPU_ASSERT_MSG(in_cache == 0, "Data should NOT be in cache\n");
 		STARPU_ASSERT_MSG(in_cache == 0, "Data should NOT be in cache\n");

+ 27 - 0
mpi/include/starpu_mpi.h

@@ -422,12 +422,39 @@ void starpu_mpi_cache_flush_all_data(MPI_Comm comm);
 int starpu_mpi_cached_receive(starpu_data_handle_t data_handle);
 int starpu_mpi_cached_receive(starpu_data_handle_t data_handle);
 
 
 /**
 /**
+ * If \p data is already available in the reception cache, return 1
+ * If \p data is NOT available in the reception cache, add it to the
+ * cache and return 0
+ * Return 0 if the communication cache is not enabled
+ */
+int starpu_mpi_cached_receive_set(starpu_data_handle_t data);
+
+/**
+ * Remove \p data from the reception cache
+ */
+void starpu_mpi_cached_receive_clear(starpu_data_handle_t data);
+
+/**
    Test whether \p data_handle is cached for emission to node \p dest,
    Test whether \p data_handle is cached for emission to node \p dest,
    i.e. the value was previously sent to \p dest, and not flushed
    i.e. the value was previously sent to \p dest, and not flushed
    since then.
    since then.
 */
 */
 int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest);
 int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest);
 
 
+/**
+ * If \p data is already available in the emission cache for node
+ * \p dest, return 1
+ * If \p data is NOT available in the emission cache for node \p dest,
+ * add it to the cache and return 0
+ * Return 0 if the communication cache is not enabled
+ */
+int starpu_mpi_cached_send_set(starpu_data_handle_t data, int dest);
+
+/**
+ * Remove \p data from the emission cache
+ */
+void starpu_mpi_cached_send_clear(starpu_data_handle_t data);
+
 /** @} */
 /** @} */
 
 
 /**
 /**

+ 4 - 4
mpi/src/starpu_mpi.c

@@ -346,7 +346,7 @@ void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t da
 	if (me == node)
 	if (me == node)
 	{
 	{
 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
-		int already_received = _starpu_mpi_cache_received_data_set(data_handle);
+		int already_received = starpu_mpi_cached_receive_set(data_handle);
 		if (already_received == 0)
 		if (already_received == 0)
 		{
 		{
 			_STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank);
 			_STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank);
@@ -356,7 +356,7 @@ void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t da
 	else if (me == rank)
 	else if (me == rank)
 	{
 	{
 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
-		int already_sent = _starpu_mpi_cache_sent_data_set(data_handle, node);
+		int already_sent = starpu_mpi_cached_send_set(data_handle, node);
 		if (already_sent == 0)
 		if (already_sent == 0)
 		{
 		{
 			_STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node);
 			_STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node);
@@ -389,7 +389,7 @@ void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle
 	{
 	{
 		MPI_Status status;
 		MPI_Status status;
 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
-		int already_received = _starpu_mpi_cache_received_data_set(data_handle);
+		int already_received = starpu_mpi_cached_receive_set(data_handle);
 		if (already_received == 0)
 		if (already_received == 0)
 		{
 		{
 			_STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank);
 			_STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank);
@@ -399,7 +399,7 @@ void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle
 	else if (me == rank)
 	else if (me == rank)
 	{
 	{
 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
-		int already_sent = _starpu_mpi_cache_sent_data_set(data_handle, node);
+		int already_sent = starpu_mpi_cached_send_set(data_handle, node);
 		if (already_sent == 0)
 		if (already_sent == 0)
 		{
 		{
 			_STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node);
 			_STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node);

+ 6 - 16
mpi/src/starpu_mpi_cache.c

@@ -172,7 +172,7 @@ static void _starpu_mpi_cache_data_remove_nolock(starpu_data_handle_t data_handl
 /**************************************
 /**************************************
  * Received cache
  * Received cache
  **************************************/
  **************************************/
-void _starpu_mpi_cache_received_data_clear(starpu_data_handle_t data_handle)
+void starpu_mpi_cached_receive_clear(starpu_data_handle_t data_handle)
 {
 {
 	int mpi_rank = starpu_mpi_data_get_rank(data_handle);
 	int mpi_rank = starpu_mpi_data_get_rank(data_handle);
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
@@ -198,7 +198,7 @@ void _starpu_mpi_cache_received_data_clear(starpu_data_handle_t data_handle)
 	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
 }
 }
 
 
-int _starpu_mpi_cache_received_data_set(starpu_data_handle_t data_handle)
+int starpu_mpi_cached_receive_set(starpu_data_handle_t data_handle)
 {
 {
 	int mpi_rank = starpu_mpi_data_get_rank(data_handle);
 	int mpi_rank = starpu_mpi_data_get_rank(data_handle);
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
@@ -226,7 +226,7 @@ int _starpu_mpi_cache_received_data_set(starpu_data_handle_t data_handle)
 	return already_received;
 	return already_received;
 }
 }
 
 
-int _starpu_mpi_cache_received_data_get(starpu_data_handle_t data_handle)
+int starpu_mpi_cached_receive(starpu_data_handle_t data_handle)
 {
 {
 	int already_received;
 	int already_received;
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
@@ -241,15 +241,10 @@ int _starpu_mpi_cache_received_data_get(starpu_data_handle_t data_handle)
 	return already_received;
 	return already_received;
 }
 }
 
 
-int starpu_mpi_cached_receive(starpu_data_handle_t data_handle)
-{
-	return _starpu_mpi_cache_received_data_get(data_handle);
-}
-
 /**************************************
 /**************************************
  * Send cache
  * Send cache
  **************************************/
  **************************************/
-void _starpu_mpi_cache_sent_data_clear(starpu_data_handle_t data_handle)
+void starpu_mpi_cached_send_clear(starpu_data_handle_t data_handle)
 {
 {
 	int n, size;
 	int n, size;
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
@@ -271,7 +266,7 @@ void _starpu_mpi_cache_sent_data_clear(starpu_data_handle_t data_handle)
 	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
 }
 }
 
 
-int _starpu_mpi_cache_sent_data_set(starpu_data_handle_t data_handle, int dest)
+int starpu_mpi_cached_send_set(starpu_data_handle_t data_handle, int dest)
 {
 {
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
 
 
@@ -296,7 +291,7 @@ int _starpu_mpi_cache_sent_data_set(starpu_data_handle_t data_handle, int dest)
 	return already_sent;
 	return already_sent;
 }
 }
 
 
-int _starpu_mpi_cache_sent_data_get(starpu_data_handle_t data_handle, int dest)
+int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest)
 {
 {
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
 	int already_sent;
 	int already_sent;
@@ -311,11 +306,6 @@ int _starpu_mpi_cache_sent_data_get(starpu_data_handle_t data_handle, int dest)
 	return already_sent;
 	return already_sent;
 }
 }
 
 
-int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest)
-{
-	return _starpu_mpi_cache_sent_data_get(data_handle, dest);
-}
-
 static void _starpu_mpi_cache_flush_nolock(starpu_data_handle_t data_handle)
 static void _starpu_mpi_cache_flush_nolock(starpu_data_handle_t data_handle)
 {
 {
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
 	struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;

+ 0 - 16
mpi/src/starpu_mpi_cache.h

@@ -32,22 +32,6 @@ void _starpu_mpi_cache_shutdown();
 void _starpu_mpi_cache_data_init(starpu_data_handle_t data_handle);
 void _starpu_mpi_cache_data_init(starpu_data_handle_t data_handle);
 void _starpu_mpi_cache_data_clear(starpu_data_handle_t data_handle);
 void _starpu_mpi_cache_data_clear(starpu_data_handle_t data_handle);
 
 
-/*
- * If the data is already available in the cache, return a pointer to the data
- * If the data is NOT available in the cache, add it to the cache and return NULL
- */
-int _starpu_mpi_cache_received_data_set(starpu_data_handle_t data);
-int _starpu_mpi_cache_received_data_get(starpu_data_handle_t data);
-void _starpu_mpi_cache_received_data_clear(starpu_data_handle_t data);
-
-/*
- * If the data is already available in the cache, return a pointer to the data
- * If the data is NOT available in the cache, add it to the cache and return NULL
- */
-int _starpu_mpi_cache_sent_data_set(starpu_data_handle_t data, int dest);
-int _starpu_mpi_cache_sent_data_get(starpu_data_handle_t data, int dest);
-void _starpu_mpi_cache_sent_data_clear(starpu_data_handle_t data);
-
 void _starpu_mpi_cache_flush(starpu_data_handle_t data_handle);
 void _starpu_mpi_cache_flush(starpu_data_handle_t data_handle);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus

+ 4 - 4
mpi/src/starpu_mpi_task_insert.c

@@ -112,7 +112,7 @@ void _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum
 		if (do_execute && mpi_rank != STARPU_MPI_PER_NODE && mpi_rank != me)
 		if (do_execute && mpi_rank != STARPU_MPI_PER_NODE && mpi_rank != me)
 		{
 		{
 			/* The node is going to execute the codelet, but it does not own the data, it needs to receive the data from the owner node */
 			/* The node is going to execute the codelet, but it does not own the data, it needs to receive the data from the owner node */
-			int already_received = _starpu_mpi_cache_received_data_set(data);
+			int already_received = starpu_mpi_cached_receive_set(data);
 			if (already_received == 0)
 			if (already_received == 0)
 			{
 			{
 				if (data_tag == -1)
 				if (data_tag == -1)
@@ -126,7 +126,7 @@ void _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum
 		if (!do_execute && mpi_rank == me)
 		if (!do_execute && mpi_rank == me)
 		{
 		{
 			/* The node owns the data, but another node is going to execute the codelet, the node needs to send the data to the executee node. */
 			/* The node owns the data, but another node is going to execute the codelet, the node needs to send the data to the executee node. */
-			int already_sent = _starpu_mpi_cache_sent_data_set(data, xrank);
+			int already_sent = starpu_mpi_cached_send_set(data, xrank);
 			if (already_sent == 0)
 			if (already_sent == 0)
 			{
 			{
 				if (data_tag == -1)
 				if (data_tag == -1)
@@ -182,8 +182,8 @@ void _starpu_mpi_clear_data_after_execution(starpu_data_handle_t data, enum star
 		if (mode & STARPU_W || mode & STARPU_REDUX)
 		if (mode & STARPU_W || mode & STARPU_REDUX)
 		{
 		{
 			/* The data has been modified, it MUST be removed from the cache */
 			/* The data has been modified, it MUST be removed from the cache */
-			_starpu_mpi_cache_sent_data_clear(data);
-			_starpu_mpi_cache_received_data_clear(data);
+			starpu_mpi_cached_send_clear(data);
+			starpu_mpi_cached_receive_clear(data);
 		}
 		}
 	}
 	}
 	else
 	else

+ 0 - 4
mpi/tests/Makefile.am

@@ -96,8 +96,6 @@ starpu_mpi_TESTS =
 
 
 starpu_mpi_TESTS +=				\
 starpu_mpi_TESTS +=				\
 	broadcast				\
 	broadcast				\
-	cache					\
-	cache_disable				\
 	callback				\
 	callback				\
 	driver					\
 	driver					\
 	early_request				\
 	early_request				\
@@ -192,8 +190,6 @@ noinst_PROGRAMS +=				\
 	block_interface_pinned			\
 	block_interface_pinned			\
 	attr					\
 	attr					\
 	broadcast				\
 	broadcast				\
-	cache					\
-	cache_disable				\
 	callback				\
 	callback				\
 	matrix					\
 	matrix					\
 	matrix2					\
 	matrix2					\

+ 21 - 0
src/core/dependencies/tags.c

@@ -63,10 +63,21 @@ static struct _starpu_cg *create_cg_tag(unsigned ntags, struct _starpu_tag *tag)
 
 
 	cg->ntags = ntags;
 	cg->ntags = ntags;
 	cg->remaining = ntags;
 	cg->remaining = ntags;
+#ifdef STARPU_DEBUG
+	cg->ndeps = ntags;
+	cg->deps = NULL;
+	cg->done = NULL;
+#endif
 	cg->cg_type = STARPU_CG_TAG;
 	cg->cg_type = STARPU_CG_TAG;
 
 
 	cg->succ.tag = tag;
 	cg->succ.tag = tag;
 	tag->tag_successors.ndeps++;
 	tag->tag_successors.ndeps++;
+#ifdef STARPU_DEBUG
+	_STARPU_REALLOC(tag->tag_successors.deps, tag->tag_successors.ndeps * sizeof(tag->tag_successors.deps[0]));
+	_STARPU_REALLOC(tag->tag_successors.done, tag->tag_successors.ndeps * sizeof(tag->tag_successors.done[0]));
+	tag->tag_successors.deps[tag->tag_successors.ndeps-1] = cg;
+	tag->tag_successors.done[tag->tag_successors.ndeps-1] = 0;
+#endif
 
 
 	return cg;
 	return cg;
 }
 }
@@ -364,10 +375,20 @@ void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t
 	struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child);
 	struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child);
 	_starpu_spin_unlock(&tag_child->lock);
 	_starpu_spin_unlock(&tag_child->lock);
 
 
+#ifdef STARPU_DEBUG
+	_STARPU_MALLOC(cg->deps, ndeps * sizeof(cg->deps[0]));
+	_STARPU_MALLOC(cg->done, ndeps * sizeof(cg->done[0]));
+#endif
+
 	for (i = 0; i < ndeps; i++)
 	for (i = 0; i < ndeps; i++)
 	{
 	{
 		starpu_tag_t dep_id = array[i];
 		starpu_tag_t dep_id = array[i];
 
 
+#ifdef STARPU_DEBUG
+		cg->deps[i] = (void*) (uintptr_t) dep_id;
+		cg->done[i] = 0;
+#endif
+
 		/* id depends on dep_id
 		/* id depends on dep_id
 		 * so cg should be among dep_id's successors*/
 		 * so cg should be among dep_id's successors*/
 		_STARPU_TRACE_TAG_DEPS(id, dep_id);
 		_STARPU_TRACE_TAG_DEPS(id, dep_id);

+ 1 - 1
src/core/simgrid.c

@@ -723,7 +723,7 @@ void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_j
 	{
 	{
 		length = starpu_task_worker_expected_length(starpu_task, workerid, sched_ctx_id, j->nimpl);
 		length = starpu_task_worker_expected_length(starpu_task, workerid, sched_ctx_id, j->nimpl);
 		STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length),
 		STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length),
-				  "Codelet %s does not have a perfmodel (in directory %s), or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated",
+				  "Codelet %s does not have a perfmodel (in directory %s), or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated, or fix the STARPU_HOSTNAME and STARPU_PERF_MODEL_DIR environment variables",
 				  _starpu_job_get_model_name(j), _starpu_get_perf_model_dir_codelet());
 				  _starpu_job_get_model_name(j), _starpu_get_perf_model_dir_codelet());
                 /* TODO: option to add variance according to performance model,
                 /* TODO: option to add variance according to performance model,
                  * to be able to easily check scheduling robustness */
                  * to be able to easily check scheduling robustness */

+ 6 - 6
src/core/task.c

@@ -288,8 +288,8 @@ void starpu_task_init(struct starpu_task *task)
 
 
 	task->detach = 1;
 	task->detach = 1;
 
 
-#if STARPU_TASK_INVALID != 0
-	task->status = STARPU_TASK_INVALID;
+#if STARPU_TASK_INIT != 0
+	task->status = STARPU_TASK_INIT;
 #endif
 #endif
 
 
 	task->predicted = NAN;
 	task->predicted = NAN;
@@ -766,9 +766,9 @@ static int _starpu_task_submit_head(struct starpu_task *task)
 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
 
 
 	if (task->status == STARPU_TASK_STOPPED || task->status == STARPU_TASK_FINISHED)
 	if (task->status == STARPU_TASK_STOPPED || task->status == STARPU_TASK_FINISHED)
-		task->status = STARPU_TASK_INVALID;
+		task->status = STARPU_TASK_INIT;
 	else
 	else
-		STARPU_ASSERT(task->status == STARPU_TASK_INVALID);
+		STARPU_ASSERT(task->status == STARPU_TASK_INIT);
 
 
 	if (j->internal)
 	if (j->internal)
 	{
 	{
@@ -1067,7 +1067,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task);
 	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task);
 	_starpu_job_set_ordered_buffers(j);
 	_starpu_job_set_ordered_buffers(j);
 
 
-	STARPU_ASSERT(task->status == STARPU_TASK_INVALID);
+	STARPU_ASSERT(task->status == STARPU_TASK_INIT);
 	task->status = STARPU_TASK_READY;
 	task->status = STARPU_TASK_READY;
 	_starpu_profiling_set_task_push_start_time(task);
 	_starpu_profiling_set_task_push_start_time(task);
 
 
@@ -1668,7 +1668,7 @@ struct starpu_task *starpu_task_ft_create_retry
 	new_task->failed = 0;
 	new_task->failed = 0;
 	new_task->scheduled = 0;
 	new_task->scheduled = 0;
 	new_task->prefetched = 0;
 	new_task->prefetched = 0;
-	new_task->status = STARPU_TASK_INVALID;
+	new_task->status = STARPU_TASK_INIT;
 	new_task->profiling_info = NULL;
 	new_task->profiling_info = NULL;
 	new_task->prev = NULL;
 	new_task->prev = NULL;
 	new_task->next = NULL;
 	new_task->next = NULL;

+ 1 - 1
src/core/task_bundle.c

@@ -50,7 +50,7 @@ int starpu_task_bundle_insert(starpu_task_bundle_t bundle, struct starpu_task *t
 		return -EPERM;
 		return -EPERM;
 	}
 	}
 
 
-	if (task->status != STARPU_TASK_INVALID)
+	if (task->status != STARPU_TASK_INIT)
 	{
 	{
 		/* The task has already been submitted, it's too late to put it
 		/* The task has already been submitted, it's too late to put it
 		 * into a bundle now. */
 		 * into a bundle now. */

+ 1 - 1
src/datawizard/user_interactions.c

@@ -227,7 +227,7 @@ int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_dat
 			*pre_sync_jobid = pre_sync_job->job_id;
 			*pre_sync_jobid = pre_sync_job->job_id;
 
 
 		wrapper->post_sync_task = starpu_task_create();
 		wrapper->post_sync_task = starpu_task_create();
-		wrapper->post_sync_task->name = "_starpu_data_acquire_cb_post";
+		wrapper->post_sync_task->name = "_starpu_data_acquire_cb_release";
 		wrapper->post_sync_task->detach = 1;
 		wrapper->post_sync_task->detach = 1;
 		wrapper->post_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE;
 		wrapper->post_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE;
 		post_sync_job = _starpu_get_job_associated_to_task(wrapper->post_sync_task);
 		post_sync_job = _starpu_get_job_associated_to_task(wrapper->post_sync_task);

+ 85 - 195
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -443,151 +443,6 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 	return ret;
 	return ret;
 }
 }
 
 
-/* TODO: factorize with dmda!! */
-static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id)
-{
-	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	int best = -1;
-
-	double best_exp_end_of_task = 0.0;
-	double model_best = 0.0;
-	double transfer_model_best = 0.0;
-
-	int ntasks_best = -1;
-	double ntasks_best_end = 0.0;
-	int calibrating = 0;
-
-	/* A priori, we know all estimations */
-	int unknown = 0;
-
-	unsigned best_impl = 0;
-	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
-
-	struct starpu_sched_ctx_iterator it;
-
-	double now = starpu_timing_now();
-
-	workers->init_iterator_for_parallel_tasks(workers, &it, task);
-	while(workers->has_next(workers, &it))
-	{
-		unsigned nimpl;
-		unsigned impl_mask;
-		unsigned worker = workers->get_next(workers, &it);
-		struct _starpu_fifo_taskq *fifo  = &dt->queue_array[worker];
-		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(worker, sched_ctx_id);
-
-		/* Sometimes workers didn't take the tasks as early as we expected */
-		double exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now);
-
-		if (!starpu_worker_can_execute_task_impl(worker, task, &impl_mask))
-			continue;
-
-		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
-		{
-			if (!(impl_mask & (1U << nimpl)))
-			{
-				/* no one on that queue may execute this task */
-				continue;
-			}
-
-			double exp_end;
-			double local_length = starpu_task_worker_expected_length(task, worker, sched_ctx_id, nimpl);
-			double local_penalty = starpu_task_expected_data_transfer_time_for(task, worker);
-			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
-
-			//_STARPU_DEBUG("Scheduler dm: task length (%lf) worker (%u) kernel (%u) \n", local_length,worker,nimpl);
-
-			/*
-			 * This implements a default greedy scheduler for the
-			 * case of tasks which have no performance model, or
-			 * whose performance model is not calibrated yet.
-			 *
-			 * It simply uses the number of tasks already pushed to
-			 * the workers, divided by the relative performance of
-			 * a CPU and of a GPU.
-			 *
-			 * This is always computed, but the ntasks_best
-			 * selection is only really used if the task indeed has
-			 * no performance model, or is not calibrated yet.
-			 */
-			if (ntasks_best == -1
-
-			    /* Always compute the greedy decision, at least for
-			     * the tasks with no performance model. */
-			    || (!calibrating && ntasks_end < ntasks_best_end)
-
-			    /* The performance model of this task is not
-			     * calibrated on this worker, try to run it there
-			     * to calibrate it there. */
-			    || (!calibrating && isnan(local_length))
-
-			    /* the performance model of this task is not
-			     * calibrated on this worker either, rather run it
-			     * there if this one is low on scheduled tasks. */
-			    || (calibrating && isnan(local_length) && ntasks_end < ntasks_best_end)
-				)
-			{
-				ntasks_best_end = ntasks_end;
-				ntasks_best = worker;
-				best_impl = nimpl;
-			}
-
-			if (isnan(local_length))
-			{
-				/* we are calibrating, we want to speed-up calibration time
-				 * so we privilege non-calibrated tasks (but still
-				 * greedily distribute them to avoid dumb schedules) */
-				static int warned;
-				if (!warned)
-				{
-					warned = 1;
-					_STARPU_DISP("Warning: performance model for %s not finished calibrating on worker %u, using a dumb scheduling heuristic for now\n", starpu_task_get_name(task), worker);
-				}
-				calibrating = 1;
-			}
-
-			if (isnan(local_length) || _STARPU_IS_ZERO(local_length))
-				/* there is no prediction available for that task
-				 * with that arch yet, so switch to a greedy strategy */
-				unknown = 1;
-
-			if (unknown)
-				continue;
-
-			exp_end = exp_start + fifo->exp_len + local_length;
-
-			if (best == -1 || exp_end < best_exp_end_of_task)
-			{
-				/* a better solution was found */
-				best_exp_end_of_task = exp_end;
-				best = worker;
-				model_best = local_length;
-				transfer_model_best = local_penalty;
-				best_impl = nimpl;
-			}
-		}
-	}
-
-	if (unknown)
-	{
-		best = ntasks_best;
-		model_best = 0.0;
-		transfer_model_best = 0.0;
-#ifdef STARPU_VERBOSE
-		dt->eager_task_cnt++;
-#endif
-	}
-
-	//_STARPU_DEBUG("Scheduler dm: kernel (%u)\n", best_impl);
-
-	starpu_task_set_implementation(task, best_impl);
-
-	starpu_sched_task_break(task);
-	/* we should now have the best worker in variable "best" */
-	return push_task_on_best_worker(task, best,
-					model_best, transfer_model_best, prio, sched_ctx_id);
-}
-
 /* TODO: factorise CPU computations, expensive with a lot of cores */
 /* TODO: factorise CPU computations, expensive with a lot of cores */
 static void compute_all_performance_predictions(struct starpu_task *task,
 static void compute_all_performance_predictions(struct starpu_task *task,
 						unsigned nworkers,
 						unsigned nworkers,
@@ -677,15 +532,19 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 			{
 			{
 				/* TODO : conversion time */
 				/* TODO : conversion time */
 				local_task_length[worker_ctx][nimpl] = starpu_task_bundle_expected_length(bundle, perf_arch, nimpl);
 				local_task_length[worker_ctx][nimpl] = starpu_task_bundle_expected_length(bundle, perf_arch, nimpl);
-				local_data_penalty[worker_ctx][nimpl] = starpu_task_bundle_expected_data_transfer_time(bundle, memory_node);
-				local_energy[worker_ctx][nimpl] = starpu_task_bundle_expected_energy(bundle, perf_arch,nimpl);
+				if (local_data_penalty)
+					local_data_penalty[worker_ctx][nimpl] = starpu_task_bundle_expected_data_transfer_time(bundle, memory_node);
+				if (local_energy)
+					local_energy[worker_ctx][nimpl] = starpu_task_bundle_expected_energy(bundle, perf_arch,nimpl);
 
 
 			}
 			}
 			else
 			else
 			{
 			{
 				local_task_length[worker_ctx][nimpl] = starpu_task_worker_expected_length(task, workerid, sched_ctx_id, nimpl);
 				local_task_length[worker_ctx][nimpl] = starpu_task_worker_expected_length(task, workerid, sched_ctx_id, nimpl);
-				local_data_penalty[worker_ctx][nimpl] = starpu_task_expected_data_transfer_time_for(task, workerid);
-				local_energy[worker_ctx][nimpl] = starpu_task_worker_expected_energy(task, workerid, sched_ctx_id,nimpl);
+				if (local_data_penalty)
+					local_data_penalty[worker_ctx][nimpl] = starpu_task_expected_data_transfer_time_for(task, workerid);
+				if (local_energy)
+					local_energy[worker_ctx][nimpl] = starpu_task_worker_expected_energy(task, workerid, sched_ctx_id,nimpl);
 				double conversion_time = starpu_task_expected_conversion_time(task, perf_arch, nimpl);
 				double conversion_time = starpu_task_expected_conversion_time(task, perf_arch, nimpl);
 				if (conversion_time > 0.0)
 				if (conversion_time > 0.0)
 					local_task_length[worker_ctx][nimpl] += conversion_time;
 					local_task_length[worker_ctx][nimpl] += conversion_time;
@@ -742,7 +601,10 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 			if (unknown)
 			if (unknown)
 				continue;
 				continue;
 
 
-			double task_starting_time = STARPU_MAX(exp_start + prev_exp_len, now + local_data_penalty[worker_ctx][nimpl]); 
+			double task_starting_time = exp_start + prev_exp_len;
+			if (local_data_penalty)
+				task_starting_time = STARPU_MAX(task_starting_time,
+					now + local_data_penalty[worker_ctx][nimpl]);
 
 
 			exp_end[worker_ctx][nimpl] = task_starting_time + local_task_length[worker_ctx][nimpl];
 			exp_end[worker_ctx][nimpl] = task_starting_time + local_task_length[worker_ctx][nimpl];
 
 
@@ -753,8 +615,9 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 				nimpl_best = nimpl;
 				nimpl_best = nimpl;
 			}
 			}
 
 
-			if (isnan(local_energy[worker_ctx][nimpl]))
-				local_energy[worker_ctx][nimpl] = 0.;
+			if (local_energy)
+				if (isnan(local_energy[worker_ctx][nimpl]))
+					local_energy[worker_ctx][nimpl] = 0.;
 
 
 		}
 		}
 		worker_ctx++;
 		worker_ctx++;
@@ -774,7 +637,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 	*max_exp_endp_of_workers = max_exp_end_of_workers;
 	*max_exp_endp_of_workers = max_exp_end_of_workers;
 }
 }
 
 
-static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id, unsigned simulate, unsigned sorted_decision)
+static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id, unsigned da, unsigned simulate, unsigned sorted_decision)
 {
 {
 	/* find the queue */
 	/* find the queue */
 	int best = -1, best_in_ctx = -1;
 	int best = -1, best_in_ctx = -1;
@@ -812,8 +675,8 @@ static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned
 					    exp_end,
 					    exp_end,
 					    &max_exp_end_of_workers,
 					    &max_exp_end_of_workers,
 					    &min_exp_end_of_task,
 					    &min_exp_end_of_task,
-					    local_data_penalty,
-					    local_energy,
+					    da ? local_data_penalty : NULL,
+					    da ? local_energy : NULL,
 					    &forced_best,
 					    &forced_best,
 					    &forced_impl, sched_ctx_id, sorted_decision);
 					    &forced_impl, sched_ctx_id, sorted_decision);
 
 
@@ -840,11 +703,14 @@ static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned
 					/* no one on that queue may execute this task */
 					/* no one on that queue may execute this task */
 					continue;
 					continue;
 				}
 				}
-				fitness[worker_ctx][nimpl] = dt->alpha * __s_alpha__value *(exp_end[worker_ctx][nimpl] - min_exp_end_of_task)
-					+ dt->beta * __s_beta__value *(local_data_penalty[worker_ctx][nimpl])
-					+ dt->_gamma * __s_gamma__value *(local_energy[worker_ctx][nimpl]);
+				if (da)
+					fitness[worker_ctx][nimpl] = dt->alpha * __s_alpha__value *(exp_end[worker_ctx][nimpl] - min_exp_end_of_task)
+						+ dt->beta * __s_beta__value *(local_data_penalty[worker_ctx][nimpl])
+						+ dt->_gamma * __s_gamma__value *(local_energy[worker_ctx][nimpl]);
+				else
+					fitness[worker_ctx][nimpl] = exp_end[worker_ctx][nimpl] - min_exp_end_of_task;
 
 
-				if (exp_end[worker_ctx][nimpl] > max_exp_end_of_workers)
+				if (da && exp_end[worker_ctx][nimpl] > max_exp_end_of_workers)
 				{
 				{
 					/* This placement will make the computation
 					/* This placement will make the computation
 					 * longer, take into account the idle
 					 * longer, take into account the idle
@@ -886,15 +752,17 @@ static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned
 		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(best_in_ctx, sched_ctx_id);
 		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(best_in_ctx, sched_ctx_id);
 		unsigned memory_node = starpu_worker_get_memory_node(best);
 		unsigned memory_node = starpu_worker_get_memory_node(best);
 		model_best = starpu_task_expected_length(task, perf_arch, selected_impl);
 		model_best = starpu_task_expected_length(task, perf_arch, selected_impl);
-		transfer_model_best = starpu_task_expected_data_transfer_time(memory_node, task);
+		if (da)
+			transfer_model_best = starpu_task_expected_data_transfer_time(memory_node, task);
 	}
 	}
 	else
 	else
 	{
 	{
 		model_best = local_task_length[best_in_ctx][selected_impl];
 		model_best = local_task_length[best_in_ctx][selected_impl];
-		transfer_model_best = local_data_penalty[best_in_ctx][selected_impl];
+		if (da)
+			transfer_model_best = local_data_penalty[best_in_ctx][selected_impl];
 	}
 	}
 
 
-	//_STARPU_DEBUG("Scheduler dmda: kernel (%u)\n", best_impl);
+	//_STARPU_DEBUG("Scheduler dmda: kernel (%u)\n", selected_impl);
 	starpu_task_set_implementation(task, selected_impl);
 	starpu_task_set_implementation(task, selected_impl);
 
 
 	starpu_sched_task_break(task);
 	starpu_sched_task_break(task);
@@ -911,7 +779,7 @@ static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned
 
 
 static int dmda_push_sorted_decision_task(struct starpu_task *task)
 static int dmda_push_sorted_decision_task(struct starpu_task *task)
 {
 {
-	return _dmda_push_task(task, 1, task->sched_ctx, 0, 1);
+	return _dmda_push_task(task, 1, task->sched_ctx, 1, 0, 1);
 }
 }
 
 
 static int dmda_push_sorted_task(struct starpu_task *task)
 static int dmda_push_sorted_task(struct starpu_task *task)
@@ -919,35 +787,40 @@ static int dmda_push_sorted_task(struct starpu_task *task)
 #ifdef STARPU_DEVEL
 #ifdef STARPU_DEVEL
 #warning TODO: after defining a scheduling window, use that instead of empty_ctx_tasks
 #warning TODO: after defining a scheduling window, use that instead of empty_ctx_tasks
 #endif
 #endif
-	return _dmda_push_task(task, 1, task->sched_ctx, 0, 0);
+	return _dmda_push_task(task, 1, task->sched_ctx, 1, 0, 0);
 }
 }
 
 
 static int dm_push_task(struct starpu_task *task)
 static int dm_push_task(struct starpu_task *task)
 {
 {
-	return _dm_push_task(task, 0, task->sched_ctx);
+	return _dmda_push_task(task, 0, task->sched_ctx, 0, 0, 0);
+}
+
+static double dm_simulate_push_task(struct starpu_task *task)
+{
+	return _dmda_push_task(task, 0, task->sched_ctx, 0, 1, 0);
 }
 }
 
 
 static int dmda_push_task(struct starpu_task *task)
 static int dmda_push_task(struct starpu_task *task)
 {
 {
 	STARPU_ASSERT(task);
 	STARPU_ASSERT(task);
-	return _dmda_push_task(task, 0, task->sched_ctx, 0, 0);
+	return _dmda_push_task(task, 0, task->sched_ctx, 1, 0, 0);
 }
 }
 static double dmda_simulate_push_task(struct starpu_task *task)
 static double dmda_simulate_push_task(struct starpu_task *task)
 {
 {
 	STARPU_ASSERT(task);
 	STARPU_ASSERT(task);
-	return _dmda_push_task(task, 0, task->sched_ctx, 1, 0);
+	return _dmda_push_task(task, 0, task->sched_ctx, 1, 1, 0);
 }
 }
 
 
 static double dmda_simulate_push_sorted_task(struct starpu_task *task)
 static double dmda_simulate_push_sorted_task(struct starpu_task *task)
 {
 {
 	STARPU_ASSERT(task);
 	STARPU_ASSERT(task);
-	return _dmda_push_task(task, 1, task->sched_ctx, 1, 0);
+	return _dmda_push_task(task, 1, task->sched_ctx, 1, 1, 0);
 }
 }
 
 
 static double dmda_simulate_push_sorted_decision_task(struct starpu_task *task)
 static double dmda_simulate_push_sorted_decision_task(struct starpu_task *task)
 {
 {
 	STARPU_ASSERT(task);
 	STARPU_ASSERT(task);
-	return _dmda_push_task(task, 1, task->sched_ctx, 1, 1);
+	return _dmda_push_task(task, 1, task->sched_ctx, 1, 1, 1);
 }
 }
 
 
 #ifdef NOTIFY_READY_SOON
 #ifdef NOTIFY_READY_SOON
@@ -1092,7 +965,7 @@ static void dmda_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id)
 	starpu_worker_unlock_self();
 	starpu_worker_unlock_self();
 }
 }
 
 
-static void dmda_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id)
+static void _dm_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id, int da)
 {
 {
 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	struct _starpu_fifo_taskq *fifo = &dt->queue_array[workerid];
 	struct _starpu_fifo_taskq *fifo = &dt->queue_array[workerid];
@@ -1100,8 +973,11 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, int pe
 	/* Compute the expected penality */
 	/* Compute the expected penality */
 	double predicted = starpu_task_worker_expected_length(task, perf_workerid, STARPU_NMAX_SCHED_CTXS,
 	double predicted = starpu_task_worker_expected_length(task, perf_workerid, STARPU_NMAX_SCHED_CTXS,
 						       starpu_task_get_implementation(task));
 						       starpu_task_get_implementation(task));
+	double predicted_transfer = NAN;
+
+	if (da)
+		predicted_transfer = starpu_task_expected_data_transfer_time_for(task, workerid);
 
 
-	double predicted_transfer = starpu_task_expected_data_transfer_time_for(task, workerid);
 	double now = starpu_timing_now();
 	double now = starpu_timing_now();
 
 
 	/* Update the predictions */
 	/* Update the predictions */
@@ -1110,32 +986,35 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, int pe
 	fifo->exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now);
 	fifo->exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now);
 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
 
 
-	/* If there is no prediction available, we consider the task has a null length */
-	if (!isnan(predicted_transfer))
+	if (da)
 	{
 	{
-		if (now + predicted_transfer < fifo->exp_end)
+		/* If there is no prediction available, we consider the task has a null length */
+		if (!isnan(predicted_transfer))
 		{
 		{
-			/* We may hope that the transfer will be finished by
-			 * the start of the task. */
-			predicted_transfer = 0;
-		}
-		else
-		{
-			/* The transfer will not be finished by then, take the
-			 * remainder into account */
-			predicted_transfer = (now + predicted_transfer) - fifo->exp_end;
-		}
-		task->predicted_transfer = predicted_transfer;
-		fifo->exp_end += predicted_transfer;
-		fifo->exp_len += predicted_transfer;
-		if(dt->num_priorities != -1)
-		{
-			int i;
-			int task_prio = _starpu_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx);
-			for(i = 0; i <= task_prio; i++)
-				fifo->exp_len_per_priority[i] += predicted_transfer;
-		}
+			if (now + predicted_transfer < fifo->exp_end)
+			{
+				/* We may hope that the transfer will be finished by
+				 * the start of the task. */
+				predicted_transfer = 0;
+			}
+			else
+			{
+				/* The transfer will not be finished by then, take the
+				 * remainder into account */
+				predicted_transfer = (now + predicted_transfer) - fifo->exp_end;
+			}
+			task->predicted_transfer = predicted_transfer;
+			fifo->exp_end += predicted_transfer;
+			fifo->exp_len += predicted_transfer;
+			if(dt->num_priorities != -1)
+			{
+				int i;
+				int task_prio = _starpu_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx);
+				for(i = 0; i <= task_prio; i++)
+					fifo->exp_len_per_priority[i] += predicted_transfer;
+			}
 
 
+		}
 	}
 	}
 
 
 	/* If there is no prediction available, we consider the task has a null length */
 	/* If there is no prediction available, we consider the task has a null length */
@@ -1166,6 +1045,16 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, int pe
 	starpu_worker_unlock(workerid);
 	starpu_worker_unlock(workerid);
 }
 }
 
 
+static void dm_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id)
+{
+	_dm_push_task_notify(task, workerid, perf_workerid, sched_ctx_id, 0);
+}
+
+static void dmda_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id)
+{
+	_dm_push_task_notify(task, workerid, perf_workerid, sched_ctx_id, 1);
+}
+
 static void dmda_post_exec_hook(struct starpu_task * task, unsigned sched_ctx_id)
 static void dmda_post_exec_hook(struct starpu_task * task, unsigned sched_ctx_id)
 {
 {
 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
@@ -1183,7 +1072,8 @@ struct starpu_sched_policy _starpu_sched_dm_policy =
 	.add_workers = dmda_add_workers ,
 	.add_workers = dmda_add_workers ,
 	.remove_workers = dmda_remove_workers,
 	.remove_workers = dmda_remove_workers,
 	.push_task = dm_push_task,
 	.push_task = dm_push_task,
-	.simulate_push_task = NULL,
+	.simulate_push_task = dm_simulate_push_task,
+	.push_task_notify = dm_push_task_notify,
 	.pop_task = dmda_pop_task,
 	.pop_task = dmda_pop_task,
 	.pre_exec_hook = dmda_pre_exec_hook,
 	.pre_exec_hook = dmda_pre_exec_hook,
 	.post_exec_hook = dmda_post_exec_hook,
 	.post_exec_hook = dmda_post_exec_hook,

+ 28 - 3
tools/gdbinit

@@ -40,7 +40,7 @@ define starpu-print-task
   set $job = (struct _starpu_job *)$task->starpu_private
   set $job = (struct _starpu_job *)$task->starpu_private
   set $status=0
   set $status=0
   if $task->status == 0
   if $task->status == 0
-    set $status="STARPU_TASK_INVALID"
+    set $status="STARPU_TASK_INIT"
   end
   end
   if $task->status == 1
   if $task->status == 1
     set $status="STARPU_TASK_BLOCKED"
     set $status="STARPU_TASK_BLOCKED"
@@ -98,10 +98,11 @@ define starpu-print-task
       if ! $job->job_successors->done[$n]
       if ! $job->job_successors->done[$n]
         set $cg = $job->job_successors->deps[$n]
         set $cg = $job->job_successors->deps[$n]
         set $m = 0
         set $m = 0
+        printf "\t\tcg:\t\t\t<%u>\n", $cg->ndeps
 	while $m < $cg->ndeps
 	while $m < $cg->ndeps
 	  if ! $cg->done[$m]
 	  if ! $cg->done[$m]
 	    set $depj = (struct _starpu_job *) $cg->deps[$m]
 	    set $depj = (struct _starpu_job *) $cg->deps[$m]
-            printf "\t\ttask %p\n", $depj->task
+            printf "\t\t\ttask %p\n", $depj->task
 	  end
 	  end
 	  set $m = $m + 1
 	  set $m = $m + 1
 	end
 	end
@@ -219,7 +220,31 @@ define starpu-print-tag
   if $tag_struct->state == STARPU_DONE
   if $tag_struct->state == STARPU_DONE
      set $status="STARPU_DONE"
      set $status="STARPU_DONE"
   end
   end
-  printf "tag %d state %s\n", $arg0, $status
+  printf "tag %d ((struct _starpu_tag *) %p)\n", $arg0, $tag_struct
+  printf "\tstate %s\n", $status
+  printf "\tdeps %d\n", $tag_struct->tag_successors.ndeps
+  printf "\tcompleted %d\n", $tag_struct->tag_successors.ndeps_completed
+  printf "\tndeps_remaining:\t\t<%u>\n", $tag_struct->tag_successors->ndeps - $tag_struct->tag_successors->ndeps_completed
+  if _starpu_debug
+    set $n = 0
+    while $n < $tag_struct->tag_successors->ndeps
+      if ! $tag_struct->tag_successors->done[$n]
+        set $cg = $tag_struct->tag_successors->deps[$n]
+        set $m = 0
+        printf "\t\tcg:\t\t\t<%u>\n", $cg->ndeps
+	while $m < $cg->ndeps
+	  if ! $cg->done[$m]
+	    set $dept = (starpu_tag_t) $cg->deps[$m]
+            printf "\t\t\ttag %u\n", $dept
+	  end
+	  set $m = $m + 1
+	end
+      end
+      set $n = $n + 1
+    end
+  end
+  printf "\tndeps_completed:\t\t<%u>\n", $tag_struct->tag_successors->ndeps_completed
+  printf "\tnsuccs:\t\t\t\t<%u>\n", $tag_struct->tag_successors->nsuccs
 end
 end
 
 
 define starpu-tags
 define starpu-tags