浏览代码

merge trunk

Samuel Thibault 8 年之前
父节点
当前提交
ea45438a79
共有 48 个文件被更改,包括 2897 次插入467 次删除
  1. 5 2
      configure.ac
  2. 8 1
      examples/sched_ctx/parallel_tasks_with_cluster_api.c
  3. 2 2
      include/starpu_clusters_util.h
  4. 2 0
      include/starpu_sched_ctx.h
  5. 4 3
      mpi/Makefile.am
  6. 9 3
      mpi/examples/Makefile.am
  7. 286 0
      mpi/examples/stencil/stencil5_lb.c
  8. 4 1
      mpi/include/starpu_mpi.h
  9. 41 0
      mpi/include/starpu_mpi_lb.h
  10. 11 3
      mpi/src/Makefile.am
  11. 156 0
      mpi/src/load_balancer/load_balancer.c
  12. 280 0
      mpi/src/load_balancer/policy/data_movements_interface.c
  13. 48 0
      mpi/src/load_balancer/policy/data_movements_interface.h
  14. 52 0
      mpi/src/load_balancer/policy/load_balancer_policy.h
  15. 268 0
      mpi/src/load_balancer/policy/load_data_interface.c
  16. 70 0
      mpi/src/load_balancer/policy/load_data_interface.h
  17. 640 0
      mpi/src/load_balancer/policy/load_heat_propagation.c
  18. 1 1
      mpi/src/starpu_mpi.c
  19. 24 1
      mpi/src/starpu_mpi_task_insert.c
  20. 7 3
      mpi/tests/Makefile.am
  21. 73 0
      mpi/tests/load_balancer.c
  22. 6 7
      src/common/fxt.c
  23. 8 2
      src/common/fxt.h
  24. 21 1
      src/common/list.h
  25. 32 21
      src/common/utils.c
  26. 16 13
      src/core/disk_ops/disk_leveldb.cpp
  27. 2 1
      src/core/perfmodel/perfmodel_bus.c
  28. 114 239
      src/core/sched_ctx.c
  29. 6 5
      src/core/sched_ctx.h
  30. 4 0
      src/core/sched_policy.c
  31. 21 1
      src/core/simgrid.c
  32. 10 5
      src/core/topology.c
  33. 3 0
      src/core/topology.h
  34. 13 6
      src/datawizard/filters.c
  35. 1 1
      src/datawizard/malloc.c
  36. 4 0
      src/datawizard/memory_manager.c
  37. 116 68
      src/debug/traces/starpu_fxt.c
  38. 142 46
      src/debug/traces/starpu_fxt_mpi.c
  39. 13 3
      src/debug/traces/starpu_paje.c
  40. 44 22
      src/util/starpu_clusters_create.c
  41. 1 1
      starpufft/src/Makefile.am
  42. 8 1
      tests/datawizard/acquire_cb_insert.c
  43. 1 0
      tests/loader.c
  44. 2 2
      tests/sched_ctx/sched_ctx_hierarchy.c
  45. 5 0
      tools/Makefile.am
  46. 3 2
      tools/cppcheck/suppressions.txt
  47. 143 0
      tools/valgrind/hwloc.suppr
  48. 167 0
      tools/valgrind/openmp.suppr

+ 5 - 2
configure.ac

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
-# Copyright (C) 2009-2016  Université de Bordeaux
-# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+# Copyright (C) 2009-2017  Université de Bordeaux
+# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
 # Copyright (C) 2011  Télécom-SudParis
 # Copyright (C) 2011  Télécom-SudParis
 # Copyright (C) 2011, 2012, 2014-2016  INRIA
 # Copyright (C) 2011, 2012, 2014-2016  INRIA
 #
 #
@@ -170,6 +170,7 @@ if test x$enable_simgrid = xyes ; then
 	)
 	)
 	AC_CHECK_HEADERS([simgrid/msg.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MSG_H], [1], [Define to 1 if you have msg.h in simgrid/.])])
 	AC_CHECK_HEADERS([simgrid/msg.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MSG_H], [1], [Define to 1 if you have msg.h in simgrid/.])])
 	AC_CHECK_HEADERS([xbt/synchro.h], [AC_DEFINE([STARPU_HAVE_XBT_SYNCHRO_H], [1], [Define to 1 if you have synchro.h in xbt/.])])
 	AC_CHECK_HEADERS([xbt/synchro.h], [AC_DEFINE([STARPU_HAVE_XBT_SYNCHRO_H], [1], [Define to 1 if you have synchro.h in xbt/.])])
+	AC_CHECK_TYPES([smx_actor_t], [AC_DEFINE([STARPU_HAVE_SMX_ACTOR_T], [1], [Define to 1 if you have the smx_actor_t type.])], [], [[#include <simgrid/simix.h>]])
    	AC_CHECK_FUNCS([MSG_process_join MSG_process_attach MSG_get_as_by_name MSG_environment_get_routing_root MSG_host_get_speed xbt_mutex_try_acquire smpi_process_set_user_data sg_link_name])
    	AC_CHECK_FUNCS([MSG_process_join MSG_process_attach MSG_get_as_by_name MSG_environment_get_routing_root MSG_host_get_speed xbt_mutex_try_acquire smpi_process_set_user_data sg_link_name])
 	AC_CHECK_FUNCS([xbt_barrier_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT], [1], [Define to 1 if you have the `xbt_barrier_init' function.])])
 	AC_CHECK_FUNCS([xbt_barrier_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT], [1], [Define to 1 if you have the `xbt_barrier_init' function.])])
 	AC_CHECK_DECLS([smpi_process_set_user_data], [], [], [[#include <smpi/smpi.h>]])
 	AC_CHECK_DECLS([smpi_process_set_user_data], [], [], [[#include <smpi/smpi.h>]])
@@ -421,6 +422,8 @@ if test "$svncommand" != "" ; then
       svndir=1
       svndir=1
    fi
    fi
 fi
 fi
+AC_MSG_CHECKING(if $srcdir is a subversion directory)
+AC_MSG_RESULT($svndir)
 
 
 # use svnversion to record the current repository revision only if
 # use svnversion to record the current repository revision only if
 # subversion is installed and we are in a working copy
 # subversion is installed and we are in a working copy

+ 8 - 1
examples/sched_ctx/parallel_tasks_with_cluster_api.c

@@ -101,7 +101,14 @@ int main(int argc, char **argv)
 				    STARPU_VALUE,&size,sizeof(int),
 				    STARPU_VALUE,&size,sizeof(int),
 				    0);
 				    0);
 		t->destroy = 1;
 		t->destroy = 1;
-		t->possibly_parallel = 1;
+		/* For two tasks, try out the case when the task isn't parallel and expect
+			 the configuration to be sequential due to this, then automatically changed
+			 back to the parallel one */
+		if (i<=4 || i > 6)
+			t->possibly_parallel = 1;
+		/* Note that this mode requires that you put a prologue callback managing
+			 this on all tasks to be taken into account. */
+		t->prologue_callback_pop_func = &starpu_openmp_prologue;
 
 
 		ret=starpu_task_submit(t);
 		ret=starpu_task_submit(t);
 		if (ret == -ENODEV)
 		if (ret == -ENODEV)

+ 2 - 2
include/starpu_clusters_util.h

@@ -72,10 +72,10 @@ int starpu_uncluster_machine(struct starpu_cluster_machine* clusters);
 int starpu_cluster_print(struct starpu_cluster_machine* clusters);
 int starpu_cluster_print(struct starpu_cluster_machine* clusters);
 
 
 /* Prologue functions */
 /* Prologue functions */
-void starpu_openmp_prologue(void * sched_ctx_id);
+void starpu_openmp_prologue(void*);
 #define starpu_intel_openmp_mkl_prologue starpu_openmp_prologue
 #define starpu_intel_openmp_mkl_prologue starpu_openmp_prologue
 #ifdef STARPU_MKL
 #ifdef STARPU_MKL
-void starpu_gnu_openmp_mkl_prologue(void * sched_ctx_id);
+void starpu_gnu_openmp_mkl_prologue(void*);
 #endif /* STARPU_MKL */
 #endif /* STARPU_MKL */
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus

+ 2 - 0
include/starpu_sched_ctx.h

@@ -119,6 +119,8 @@ void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void *policy_data);
 
 
 void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id);
 void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id);
 
 
+struct starpu_sched_policy *starpu_sched_ctx_get_sched_policy(unsigned sched_ctx_id);
+
 void *starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void *param, unsigned sched_ctx_id);
 void *starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void *param, unsigned sched_ctx_id);
 
 
 int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id);
 int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id);

+ 4 - 3
mpi/Makefile.am

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2009-2013, 2015  Université de Bordeaux
 # Copyright (C) 2009-2013, 2015  Université de Bordeaux
-# Copyright (C) 2010, 2011, 2012, 2013  CNRS
+# Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
 # Copyright (C) 2016  Inria
 # Copyright (C) 2016  Inria
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -22,8 +22,9 @@ pkgconfig_DATA = libstarpumpi.pc starpumpi-1.0.pc starpumpi-1.1.pc starpumpi-1.2
 
 
 versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION)
 versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION)
 versinclude_HEADERS = 					\
 versinclude_HEADERS = 					\
-	include/fstarpu_mpi_mod.f90			\
-	include/starpu_mpi.h
+	include/starpu_mpi.h				\
+	include/starpu_mpi_lb.h				\
+	include/fstarpu_mpi_mod.f90
 
 
 showcheck:
 showcheck:
 	RET=0 ; \
 	RET=0 ; \

+ 9 - 3
mpi/examples/Makefile.am

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2009-2013, 2015-2016  Université de Bordeaux
 # Copyright (C) 2009-2013, 2015-2016  Université de Bordeaux
-# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
 # Copyright (C) 2016  Inria
 # Copyright (C) 2016  Inria
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
@@ -108,13 +108,19 @@ AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS) $(ST
 ###################
 ###################
 if BUILD_EXAMPLES
 if BUILD_EXAMPLES
 examplebin_PROGRAMS +=				\
 examplebin_PROGRAMS +=				\
-	stencil/stencil5
+	stencil/stencil5			\
+	stencil/stencil5_lb
 
 
 stencil_stencil5_LDADD =		\
 stencil_stencil5_LDADD =		\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la -lm
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la -lm
 
 
+stencil_stencil5_lb_LDADD =		\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la -lm
+
 starpu_mpi_EXAMPLES	+=	\
 starpu_mpi_EXAMPLES	+=	\
-	stencil/stencil5
+	stencil/stencil5	\
+	stencil/stencil5_lb
+
 endif
 endif
 
 
 ##################
 ##################

+ 286 - 0
mpi/examples/stencil/stencil5_lb.c

@@ -0,0 +1,286 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011, 2013, 2015-2016              Université Bordeaux
+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <starpu_mpi_lb.h>
+#include <math.h>
+
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+#define FPRINTF_MPI(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \
+    						int _disp_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &_disp_rank);       \
+                                                fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \
+                                                fflush(ofile); }} while(0);
+
+void stencil5_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
+{
+	float *xy = (float *)STARPU_VARIABLE_GET_PTR(descr[0]);
+	float *xm1y = (float *)STARPU_VARIABLE_GET_PTR(descr[1]);
+	float *xp1y = (float *)STARPU_VARIABLE_GET_PTR(descr[2]);
+	float *xym1 = (float *)STARPU_VARIABLE_GET_PTR(descr[3]);
+	float *xyp1 = (float *)STARPU_VARIABLE_GET_PTR(descr[4]);
+
+//	fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
+	*xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5;
+//	fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
+}
+
+struct starpu_codelet stencil5_cl =
+{
+	.cpu_funcs = {stencil5_cpu},
+	.nbuffers = 5,
+	.modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}
+};
+
+#ifdef STARPU_QUICK_CHECK
+#  define NITER_DEF	10
+#  define X         	2
+#  define Y         	2
+#elif !defined(STARPU_LONG_CHECK)
+#  define NITER_DEF	10
+#  define X         	5
+#  define Y         	5
+#else
+#  define NITER_DEF	100
+#  define X         	20
+#  define Y         	20
+#endif
+
+int display = 0;
+int niter = NITER_DEF;
+
+/* Returns the MPI node number where data indexes index is */
+int my_distrib(int x, int y, int nb_nodes)
+{
+	/* Block distrib */
+	return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes;
+}
+
+static void parse_args(int argc, char **argv)
+{
+	int i;
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-iter") == 0)
+		{
+			char *argptr;
+			niter = strtol(argv[++i], &argptr, 10);
+		}
+		if (strcmp(argv[i], "-display") == 0)
+		{
+			display = 1;
+		}
+	}
+}
+
+void get_neighbors(int **neighbor_ids, int *nneighbors)
+{
+	int rank, size;
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	if (size <= 2)
+	{
+		*nneighbors = 1;
+		*neighbor_ids = malloc(sizeof(int));
+		*neighbor_ids[0] = rank==size-1?0:rank+1;
+		fprintf(stderr, "rank %d has neighbor %d\n", rank, *neighbor_ids[0]);
+	}
+	else
+	{
+		*nneighbors = 2;
+		*neighbor_ids = malloc(2*sizeof(int));
+		(*neighbor_ids)[0] = rank==size-1?0:rank+1;
+		(*neighbor_ids)[1] = rank==0?size-1:rank-1;
+		fprintf(stderr, "rank %d has neighbor %d and %d\n", rank, (*neighbor_ids)[0], (*neighbor_ids)[1]);
+	}
+}
+
+struct data_node
+{
+	starpu_data_handle_t data_handle;
+	int node;
+};
+
+struct data_node data_nodes[X][Y];
+
+void get_data_unit_to_migrate(starpu_data_handle_t **handle_unit, int *nhandles, int dst_node)
+{
+	int rank, x, y;
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	fprintf(stderr, "Looking to move data from %d to %d\n", rank, dst_node);
+	for(x = 0; x < X; x++)
+	{
+		for (y = 0; y < Y; y++)
+		{
+			if (data_nodes[x][y].node == rank)
+			{
+				*handle_unit = malloc(sizeof(starpu_data_handle_t));
+				*handle_unit[0] = data_nodes[x][y].data_handle;
+				*nhandles = 1;
+				data_nodes[x][y].node = dst_node;
+				return;
+			}
+		}
+	}
+	*nhandles = 0;
+}
+
+int main(int argc, char **argv)
+{
+	int my_rank, size, x, y, loop;
+	float mean=0;
+	float matrix[X][Y];
+	struct starpu_mpi_lb_conf itf;
+
+	itf.get_neighbors = get_neighbors;
+	itf.get_data_unit_to_migrate = get_data_unit_to_migrate;
+
+	int ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(&argc, &argv, 1);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	if (size > 2)
+	{
+		FPRINTF(stderr, "Only works with 2 nodes\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		return 77;
+	}
+	if (starpu_cpu_worker_get_count() == 0)
+	{
+		FPRINTF(stderr, "We need at least 1 CPU worker.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		return 77;
+	}
+
+	setenv("LB_HEAT_SLEEP_THRESHOLD", "5", 1);
+	starpu_mpi_lb_init("heat", &itf);
+
+	parse_args(argc, argv);
+
+	/* Initial data values */
+	starpu_srand48((long int)time(NULL));
+	for(x = 0; x < X; x++)
+	{
+		for (y = 0; y < Y; y++)
+		{
+			matrix[x][y] = (float)starpu_drand48();
+			mean += matrix[x][y];
+		}
+	}
+	mean /= (X*Y);
+
+	if (display)
+	{
+		FPRINTF_MPI(stdout, "mean=%2.2f\n", mean);
+		for(x = 0; x < X; x++)
+		{
+			fprintf(stdout, "[%d] ", my_rank);
+			for (y = 0; y < Y; y++)
+			{
+				fprintf(stdout, "%2.2f ", matrix[x][y]);
+			}
+			fprintf(stdout, "\n");
+		}
+	}
+
+	/* Initial distribution */
+	for(x = 0; x < X; x++)
+	{
+		for (y = 0; y < Y; y++)
+		{
+			data_nodes[x][y].node = my_distrib(x, y, size);
+			if (data_nodes[x][y].node == my_rank)
+			{
+				//FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y);
+				starpu_variable_data_register(&data_nodes[x][y].data_handle, 0, (uintptr_t)&(matrix[x][y]), sizeof(float));
+			}
+			else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
+				 || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
+			{
+				/* I don't own that index, but will need it for my computations */
+				//FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y);
+				starpu_variable_data_register(&data_nodes[x][y].data_handle, -1, (uintptr_t)NULL, sizeof(float));
+			}
+			else
+			{
+				/* I know it's useless to allocate anything for this */
+				data_nodes[x][y].data_handle = NULL;
+			}
+			if (data_nodes[x][y].data_handle)
+			{
+				starpu_mpi_data_register(data_nodes[x][y].data_handle, (y*X)+x, data_nodes[x][y].node);
+			}
+		}
+	}
+
+	/* First computation with initial distribution */
+	for(loop=0 ; loop<niter; loop++)
+	{
+		for (x = 1; x < X-1; x++)
+		{
+			for (y = 1; y < Y-1; y++)
+			{
+				starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_nodes[x][y].data_handle,
+						       STARPU_R, data_nodes[x-1][y].data_handle, STARPU_R, data_nodes[x+1][y].data_handle,
+						       STARPU_R, data_nodes[x][y-1].data_handle, STARPU_R, data_nodes[x][y+1].data_handle,
+						       STARPU_TAG_ONLY, ((starpu_tag_t)X)*x + y,
+						       0);
+			}
+		}
+	}
+	FPRINTF(stderr, "Waiting ...\n");
+	starpu_task_wait_for_all();
+
+	// The load balancer needs to be shutdown before unregistering data as it needs access to them
+	starpu_mpi_lb_shutdown();
+
+	/* Unregister data */
+	for(x = 0; x < X; x++)
+	{
+		for (y = 0; y < Y; y++)
+		{
+			if (data_nodes[x][y].data_handle)
+			{
+				starpu_data_unregister(data_nodes[x][y].data_handle);
+			}
+		}
+	}
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	if (display)
+	{
+		FPRINTF(stdout, "[%d] mean=%2.2f\n", my_rank, mean);
+		for(x = 0; x < X; x++)
+		{
+			FPRINTF(stdout, "[%d] ", my_rank);
+			for (y = 0; y < Y; y++)
+			{
+				FPRINTF(stdout, "%2.2f ", matrix[x][y]);
+			}
+			FPRINTF(stdout, "\n");
+		}
+	}
+
+	return 0;
+}

+ 4 - 1
mpi/include/starpu_mpi.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009-2012, 2014-2016  Université de Bordeaux
  * Copyright (C) 2009-2012, 2014-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2016  Inria
  * Copyright (C) 2016  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -119,6 +119,9 @@ typedef void (*starpu_mpi_datatype_free_func_t)(MPI_Datatype *);
 int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func);
 int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func);
 int starpu_mpi_datatype_unregister(starpu_data_handle_t handle);
 int starpu_mpi_datatype_unregister(starpu_data_handle_t handle);
 
 
+int starpu_mpi_pre_submit_hook_register(void (*f)(struct starpu_task *));
+int starpu_mpi_pre_submit_hook_unregister();
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif

+ 41 - 0
mpi/include/starpu_mpi_lb.h

@@ -0,0 +1,41 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2016  Inria
+ * Copyright (C) 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_MPI_LOAD_BALANCER_H__
+#define __STARPU_MPI_LOAD_BALANCER_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct starpu_mpi_lb_conf
+{
+	void (*get_neighbors)(int **neighbor_ids, int *nneighbors);
+	void (*get_data_unit_to_migrate)(starpu_data_handle_t **handle_unit, int *nhandles, int dst_node);
+};
+
+/* Inits the load balancer's environment with the load policy provided by the
+ * user
+ */
+void starpu_mpi_lb_init(const char *lb_policy_name, struct starpu_mpi_lb_conf *);
+void starpu_mpi_lb_shutdown();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __STARPU_MPI_LOAD_BALANCER_H__

+ 11 - 3
mpi/src/Makefile.am

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2009-2012  Université de Bordeaux
 # Copyright (C) 2009-2012  Université de Bordeaux
-# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -69,7 +69,10 @@ noinst_HEADERS =					\
 	starpu_mpi_sync_data.h				\
 	starpu_mpi_sync_data.h				\
 	starpu_mpi_comm.h				\
 	starpu_mpi_comm.h				\
 	starpu_mpi_tag.h				\
 	starpu_mpi_tag.h				\
-	starpu_mpi_task_insert.h
+	starpu_mpi_task_insert.h			\
+	load_balancer/policy/data_movements_interface.h	\
+	load_balancer/policy/load_data_interface.h	\
+	load_balancer/policy/load_balancer_policy.h
 
 
 libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 	starpu_mpi.c					\
 	starpu_mpi.c					\
@@ -88,7 +91,12 @@ libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 	starpu_mpi_comm.c				\
 	starpu_mpi_comm.c				\
 	starpu_mpi_tag.c				\
 	starpu_mpi_tag.c				\
 	starpu_mpi_fortran.c				\
 	starpu_mpi_fortran.c				\
-	starpu_mpi_task_insert_fortran.c
+	starpu_mpi_task_insert_fortran.c		\
+	load_balancer/policy/data_movements_interface.c	\
+	load_balancer/policy/load_data_interface.c	\
+	load_balancer/policy/load_heat_propagation.c	\
+	load_balancer/load_balancer.c
+
 
 
 showcheck:
 showcheck:
 	-cat /dev/null
 	-cat /dev/null

+ 156 - 0
mpi/src/load_balancer/load_balancer.c

@@ -0,0 +1,156 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2016  Inria
+ * Copyright (C) 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <starpu.h>
+#include <starpu_mpi.h>
+#include <starpu_scheduler.h>
+#include <common/utils.h>
+
+#include <starpu_mpi_lb.h>
+#include "policy/load_balancer_policy.h"
+
+static struct load_balancer_policy *defined_policy = NULL;
+typedef void (*_post_exec_hook_func_t)(struct starpu_task *task, unsigned sched_ctx_id);
+static _post_exec_hook_func_t saved_post_exec_hook[STARPU_NMAX_SCHED_CTXS];
+
+static void post_exec_hook_wrapper(struct starpu_task *task, unsigned sched_ctx_id)
+{
+	//fprintf(stderr,"I am called ! \n");
+	if (defined_policy && defined_policy->finished_task_entry_point)
+		defined_policy->finished_task_entry_point();
+	if (saved_post_exec_hook[sched_ctx_id])
+		saved_post_exec_hook[sched_ctx_id](task, sched_ctx_id);
+}
+
+static struct load_balancer_policy *predefined_policies[] =
+{
+	&load_heat_propagation_policy,
+	NULL
+};
+
+void starpu_mpi_lb_init(const char *lb_policy_name, struct starpu_mpi_lb_conf *itf)
+{
+	int ret;
+
+	const char *policy_name = starpu_getenv("STARPU_MPI_LB");
+	if (!policy_name)
+		policy_name = lb_policy_name;
+
+	if (!policy_name || (strcmp(policy_name, "help") == 0))
+	{
+		_STARPU_MSG("Warning : load balancing is disabled for this run.\n");
+		_STARPU_MSG("Use the STARPU_MPI_LB = <name> environment variable to use a load balancer.\n");
+		_STARPU_MSG("Available load balancers :\n");
+		struct load_balancer_policy **policy;
+		for(policy=predefined_policies ; *policy!=NULL ; policy++)
+		{
+			struct load_balancer_policy *p = *policy;
+			fprintf(stderr," - %s\n", p->policy_name);
+		}
+		return;
+	}
+
+	if (policy_name)
+	{
+		struct load_balancer_policy **policy;
+		for(policy=predefined_policies ; *policy!=NULL ; policy++)
+		{
+			struct load_balancer_policy *p = *policy;
+			if (p->policy_name)
+			{
+				if (strcmp(policy_name, p->policy_name) == 0)
+				{
+					/* we found a policy with the requested name */
+					defined_policy = p;
+					break;
+				}
+			}
+		}
+	}
+
+	if (!defined_policy)
+	{
+		_STARPU_MSG("Error : no load balancer with the name %s. Load balancing will be disabled for this run.\n", policy_name);
+		return;
+	}
+
+	ret = defined_policy->init(itf);
+	if (ret != 0)
+	{
+		_STARPU_MSG("Error (%d) in %s->init: invalid starpu_mpi_lb_conf. Load balancing will be disabled for this run.\n", ret, defined_policy->policy_name);
+		return;
+	}
+
+	/* starpu_register_hook(submitted_task, defined_policy->submitted_task_entry_point); */
+	if (defined_policy->submitted_task_entry_point)
+		starpu_mpi_pre_submit_hook_register(defined_policy->submitted_task_entry_point);
+
+	/* starpu_register_hook(finished_task, defined_policy->finished_task_entry_point); */
+	if (defined_policy->finished_task_entry_point)
+	{
+		int i;
+		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+		{
+			struct starpu_sched_policy *sched_policy = starpu_sched_ctx_get_sched_policy(i);
+			if (sched_policy)
+			{
+				_STARPU_DEBUG("Setting post_exec_hook for scheduling context %d %s (%d)\n", i, sched_policy->policy_name, STARPU_NMAX_SCHED_CTXS);
+				saved_post_exec_hook[i] = sched_policy->post_exec_hook;
+				sched_policy->post_exec_hook = post_exec_hook_wrapper;
+			}
+			else
+				saved_post_exec_hook[i] = NULL;
+		}
+	}
+
+	return;
+}
+
+void starpu_mpi_lb_shutdown()
+{
+	if (!defined_policy)
+		return;
+
+	int ret = defined_policy->deinit();
+	if (ret != 0)
+	{
+		_STARPU_MSG("Error (%d) in %s->deinit\n", ret, defined_policy->policy_name);
+		return;
+	}
+
+	/* starpu_unregister_hook(submitted_task, defined_policy->submitted_task_entry_point); */
+	if (defined_policy->submitted_task_entry_point)
+		starpu_mpi_pre_submit_hook_unregister();
+
+	/* starpu_unregister_hook(finished_task, defined_policy->finished_task_entry_point); */
+	if (defined_policy->finished_task_entry_point)
+	{
+		int i;
+		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+		{
+			if (saved_post_exec_hook[i])
+			{
+				struct starpu_sched_policy *sched_policy = starpu_sched_ctx_get_sched_policy(i);
+				sched_policy->post_exec_hook = saved_post_exec_hook[i];
+				saved_post_exec_hook[i] = NULL;
+			}
+		}
+	}
+	defined_policy = NULL;
+}

+ 280 - 0
mpi/src/load_balancer/policy/data_movements_interface.c

@@ -0,0 +1,280 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2016  Inria
+ * Copyright (C) 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <stdlib.h>
+
+#include "data_movements_interface.h"
+
+int **data_movements_get_ref_tags_table(starpu_data_handle_t handle)
+{
+	struct data_movements_interface *dm_interface =
+		(struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	if (dm_interface->tags)
+		return &dm_interface->tags;
+	else
+		return NULL;
+}
+
+int **data_movements_get_ref_ranks_table(starpu_data_handle_t handle)
+{
+	struct data_movements_interface *dm_interface =
+		(struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	if (dm_interface->ranks)
+		return &dm_interface->ranks;
+	else
+		return NULL;
+}
+
+int *data_movements_get_tags_table(starpu_data_handle_t handle)
+{
+	struct data_movements_interface *dm_interface =
+		(struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	return dm_interface->tags;
+}
+
+int *data_movements_get_ranks_table(starpu_data_handle_t handle)
+{
+	struct data_movements_interface *dm_interface =
+		(struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	return dm_interface->ranks;
+}
+
+int data_movements_get_size_tables(starpu_data_handle_t handle)
+{
+	struct data_movements_interface *dm_interface =
+		(struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	return dm_interface->size;
+}
+
+int data_movements_reallocate_tables(starpu_data_handle_t handle, int size)
+{
+	struct data_movements_interface *dm_interface =
+		(struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	if (dm_interface->size)
+	{
+		STARPU_ASSERT(dm_interface->tags);
+		free(dm_interface->tags);
+		dm_interface->tags = NULL;
+
+		STARPU_ASSERT(dm_interface->ranks);
+		free(dm_interface->ranks);
+		dm_interface->ranks = NULL;
+	}
+	else
+	{
+		STARPU_ASSERT(!dm_interface->tags);
+		STARPU_ASSERT(!dm_interface->ranks);
+	}
+
+	dm_interface->size = size;
+
+	if (dm_interface->size)
+	{
+		dm_interface->tags = malloc(size*sizeof(int));
+		dm_interface->ranks = malloc(size*sizeof(int));
+	}
+
+	return 0 ;
+}
+
+static void data_movements_register_data_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
+{
+	struct data_movements_interface *dm_interface = (struct data_movements_interface *) data_interface;
+
+	unsigned node;
+	for (node = 0; node < STARPU_MAXNODES; node++)
+	{
+		struct data_movements_interface *local_interface = (struct data_movements_interface *)
+			starpu_data_get_interface_on_node(handle, node);
+
+		local_interface->size = dm_interface->size;
+		if (node == home_node)
+		{
+			local_interface->tags = dm_interface->tags;
+			local_interface->ranks = dm_interface->ranks;
+		}
+		else
+		{
+			local_interface->tags = NULL;
+			local_interface->ranks = NULL;
+		}
+	}
+}
+
+static starpu_ssize_t data_movements_allocate_data_on_node(void *data_interface, unsigned node)
+{
+	struct data_movements_interface *dm_interface = (struct data_movements_interface *) data_interface;
+
+	int *addr_tags = NULL;
+	int *addr_ranks = NULL;
+	starpu_ssize_t requested_memory = dm_interface->size * sizeof(int);
+
+	addr_tags = (int*) starpu_malloc_on_node(node, requested_memory);
+	if (!addr_tags)
+		goto fail_tags;
+	addr_ranks = (int*) starpu_malloc_on_node(node, requested_memory);
+	if (!addr_ranks)
+		goto fail_ranks;
+
+	/* update the data properly in consequence */
+	dm_interface->tags = addr_tags;
+	dm_interface->ranks = addr_ranks;
+
+	return 2*requested_memory;
+
+fail_ranks:
+	starpu_free_on_node(node, (uintptr_t) addr_tags, requested_memory);
+fail_tags:
+	return -ENOMEM;
+}
+
+static void data_movements_free_data_on_node(void *data_interface, unsigned node)
+{
+	struct data_movements_interface *dm_interface = (struct data_movements_interface *) data_interface;
+	starpu_ssize_t requested_memory = dm_interface->size * sizeof(int);
+
+	starpu_free_on_node(node, (uintptr_t) dm_interface->tags, requested_memory);
+	starpu_free_on_node(node, (uintptr_t) dm_interface->ranks, requested_memory);
+}
+
+static size_t data_movements_get_size(starpu_data_handle_t handle)
+{
+	size_t size;
+	struct data_movements_interface *dm_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	size = (dm_interface->size * 2 * sizeof(int)) + sizeof(int);
+	return size;
+}
+
+static uint32_t data_movements_footprint(starpu_data_handle_t handle)
+{
+	return starpu_hash_crc32c_be(data_movements_get_size(handle), 0);
+}
+
+static int data_movements_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count)
+{
+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+	struct data_movements_interface *dm_interface = (struct data_movements_interface *)
+		starpu_data_get_interface_on_node(handle, node);
+
+	*count = data_movements_get_size(handle);
+	if (ptr != NULL)
+	{
+		char *data;
+		starpu_malloc_flags((void**) &data, *count, 0);
+		assert(data);
+		*ptr = data;
+		memcpy(data, &dm_interface->size, sizeof(int));
+		if (dm_interface->size)
+		{
+			memcpy(data+sizeof(int), dm_interface->tags, (dm_interface->size*sizeof(int)));
+			memcpy(data+sizeof(int)+(dm_interface->size*sizeof(int)), dm_interface->ranks, dm_interface->size*sizeof(int));
+		}
+	}
+
+	return 0;
+}
+
+static int data_movements_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
+{
+	char *data = ptr;
+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+	struct data_movements_interface *dm_interface = (struct data_movements_interface *)
+		starpu_data_get_interface_on_node(handle, node);
+
+	int size = 0;
+	memcpy(&size, data, sizeof(int));
+	STARPU_ASSERT(count == (2 * size * sizeof(int)) + sizeof(int));
+
+	data_movements_reallocate_tables(handle, size);
+
+	if (dm_interface->size)
+	{
+		memcpy(dm_interface->tags, data+sizeof(int), dm_interface->size*sizeof(int));
+		memcpy(dm_interface->ranks, data+sizeof(int)+(dm_interface->size*sizeof(int)), dm_interface->size*sizeof(int));
+	}
+
+    return 0;
+}
+
+static int copy_any_to_any(void *src_interface, unsigned src_node,
+			   void *dst_interface, unsigned dst_node,
+			   void *async_data)
+{
+	struct data_movements_interface *src_data_movements = src_interface;
+	struct data_movements_interface *dst_data_movements = dst_interface;
+	int ret = 0;
+
+	if (starpu_interface_copy((uintptr_t) src_data_movements->tags, 0, src_node,
+				    (uintptr_t) dst_data_movements->tags, 0, dst_node,
+				     src_data_movements->size*sizeof(int),
+				     async_data))
+		ret = -EAGAIN;
+	if (starpu_interface_copy((uintptr_t) src_data_movements->ranks, 0, src_node,
+				    (uintptr_t) dst_data_movements->ranks, 0, dst_node,
+				     src_data_movements->size*sizeof(int),
+				     async_data))
+		ret = -EAGAIN;
+	return ret;
+}
+
+static const struct starpu_data_copy_methods data_movements_copy_methods =
+{
+	.any_to_any = copy_any_to_any
+};
+
+static struct starpu_data_interface_ops interface_data_movements_ops =
+{
+	.register_data_handle = data_movements_register_data_handle,
+	.allocate_data_on_node = data_movements_allocate_data_on_node,
+	.free_data_on_node = data_movements_free_data_on_node,
+	.copy_methods = &data_movements_copy_methods,
+	.get_size = data_movements_get_size,
+	.footprint = data_movements_footprint,
+	.interfaceid = STARPU_UNKNOWN_INTERFACE_ID,
+	.interface_size = sizeof(struct data_movements_interface),
+	.handle_to_pointer = NULL,
+	.pack_data = data_movements_pack_data,
+	.unpack_data = data_movements_unpack_data,
+	.describe = NULL
+};
+
+void data_movements_data_register(starpu_data_handle_t *handleptr, unsigned home_node, int *tags, int *ranks, int size)
+{
+	struct data_movements_interface data_movements =
+	{
+		.tags = tags,
+		.ranks = ranks,
+		.size = size
+	};
+
+	if (interface_data_movements_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID)
+	{
+		interface_data_movements_ops.interfaceid = starpu_data_interface_get_next_id();
+	}
+
+	starpu_data_register(handleptr, home_node, &data_movements, &interface_data_movements_ops);
+}

+ 48 - 0
mpi/src/load_balancer/policy/data_movements_interface.h

@@ -0,0 +1,48 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2016  Inria
+ * Copyright (C) 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+
+#ifndef __DATA_MOVEMENTS_INTERFACE_H
+#define __DATA_MOVEMENTS_INTERFACE_H
+
+/* interface for data_movements */
+struct data_movements_interface
+{
+	/* Data tags table */
+	int *tags;
+	/* Ranks table (where to move the corresponding data) */
+	int *ranks;
+	/* Size of the tables */
+	int size;
+};
+
+void data_movements_data_register(starpu_data_handle_t *handle, unsigned home_node, int *ranks, int *tags, int size);
+
+int **data_movements_get_ref_tags_table(starpu_data_handle_t handle);
+int **data_movements_get_ref_ranks_table(starpu_data_handle_t handle);
+int data_movements_reallocate_tables(starpu_data_handle_t handle, int size);
+
+int *data_movements_get_tags_table(starpu_data_handle_t handle);
+int *data_movements_get_ranks_table(starpu_data_handle_t handle);
+int data_movements_get_size_tables(starpu_data_handle_t handle);
+
+#define DATA_MOVEMENTS_GET_SIZE_TABLES(interface)	(((struct data_movements_interface *)(interface))->size)
+#define DATA_MOVEMENTS_GET_TAGS_TABLE(interface)	(((struct data_movements_interface *)(interface))->tags)
+#define DATA_MOVEMENTS_GET_RANKS_TABLE(interface)	(((struct data_movements_interface *)(interface))->ranks)
+
+#endif /* __DATA_MOVEMENTS_INTERFACE_H */

+ 52 - 0
mpi/src/load_balancer/policy/load_balancer_policy.h

@@ -0,0 +1,52 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2016  Inria
+ * Copyright (C) 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __LOAD_BALANCER_POLICY_H__
+#define __LOAD_BALANCER_POLICY_H__
+
+#include <starpu_mpi_lb.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* A load balancer consists in a collection of operations on a data
+ * representing the load of the application (in terms of computation, memory,
+ * whatever). StarPU allows several entry points for the user. The load
+ * balancer allows the user to give its load balancing methods to be used on
+ * these entry points of the runtime system. */
+struct load_balancer_policy
+{
+	int (*init)(struct starpu_mpi_lb_conf *);
+	int (*deinit)();
+	void (*submitted_task_entry_point)();
+	void (*finished_task_entry_point)();
+
+	/* Name of the load balancing policy. The selection of the load balancer is
+	 * performed through the use of the STARPU_MPI_LB=name environment
+	 * variable.
+	 */
+	const char *policy_name;
+};
+
+extern struct load_balancer_policy load_heat_propagation_policy;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __LOAD_BALANCER_POLICY_H__

+ 268 - 0
mpi/src/load_balancer/policy/load_data_interface.c

@@ -0,0 +1,268 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2016  Inria
+ * Copyright (C) 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <stdlib.h>
+
+#include "load_data_interface.h"
+
+int load_data_get_sleep_threshold(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	return ld_interface->sleep_task_threshold;
+}
+
+int load_data_get_wakeup_threshold(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	return ld_interface->wakeup_task_threshold;
+}
+
+int load_data_get_current_phase(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	return ld_interface->phase;
+}
+
+int load_data_get_nsubmitted_tasks(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	return ld_interface->nsubmitted_tasks;
+}
+
+int load_data_get_nfinished_tasks(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	return ld_interface->nfinished_tasks;
+}
+
+int load_data_inc_nsubmitted_tasks(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	(ld_interface->nsubmitted_tasks)++;
+
+	return 0;
+}
+
+int load_data_inc_nfinished_tasks(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	(ld_interface->nfinished_tasks)++;
+
+	return 0;
+}
+
+int load_data_next_phase(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	ld_interface->phase++;
+
+	return 0;
+}
+
+int load_data_update_elapsed_time(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	ld_interface->elapsed_time = starpu_timing_now() - ld_interface->start;
+
+	return 0;
+}
+
+double load_data_get_elapsed_time(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	return ld_interface->elapsed_time;
+}
+
+int load_data_update_wakeup_cond(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	int previous_threshold = ld_interface->wakeup_task_threshold;
+	ld_interface->wakeup_task_threshold += (ld_interface->nsubmitted_tasks - previous_threshold) * ld_interface->wakeup_ratio;
+
+	return 0;
+}
+
+int load_data_wakeup_cond(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+	return ((ld_interface->wakeup_task_threshold > 0) && (ld_interface->nfinished_tasks == ld_interface->wakeup_task_threshold));
+}
+
+static void load_data_register_data_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
+{
+	struct load_data_interface *ld_interface = (struct load_data_interface *) data_interface;
+
+	unsigned node;
+	for (node = 0; node < STARPU_MAXNODES; node++)
+	{
+		struct load_data_interface *local_interface = (struct load_data_interface *)
+			starpu_data_get_interface_on_node(handle, node);
+
+		local_interface->start = ld_interface->start;
+		local_interface->elapsed_time = ld_interface->elapsed_time;
+		local_interface->phase = ld_interface->phase;
+		local_interface->nsubmitted_tasks = ld_interface->nsubmitted_tasks;
+		local_interface->nfinished_tasks = ld_interface->nsubmitted_tasks;
+		local_interface->wakeup_task_threshold = ld_interface->wakeup_task_threshold;
+		local_interface->wakeup_ratio = ld_interface->wakeup_ratio;
+		local_interface->sleep_task_threshold = ld_interface->sleep_task_threshold;
+	}
+}
+
+static starpu_ssize_t load_data_allocate_data_on_node(void *data_interface, unsigned node)
+{
+	(void) data_interface;
+	(void) node;
+
+	return 0;
+}
+
+static void load_data_free_data_on_node(void *data_interface, unsigned node)
+{
+	(void) data_interface;
+	(void) node;
+}
+
+static size_t load_data_get_size(starpu_data_handle_t handle)
+{
+	(void) handle;
+	return (sizeof(struct load_data_interface));
+}
+
+static uint32_t load_data_footprint(starpu_data_handle_t handle)
+{
+	struct load_data_interface *ld_interface =
+		(struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+	return starpu_hash_crc32c_be(ld_interface->start,
+				     starpu_hash_crc32c_be(ld_interface->elapsed_time,
+							   starpu_hash_crc32c_be(ld_interface->nsubmitted_tasks,
+										 starpu_hash_crc32c_be(ld_interface->sleep_task_threshold, ld_interface->wakeup_task_threshold))));
+}
+
+static int load_data_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count)
+{
+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+	struct load_data_interface *ld_interface = (struct load_data_interface *)
+		starpu_data_get_interface_on_node(handle, node);
+
+	*count = load_data_get_size(handle);
+	if (ptr != NULL)
+	{
+		char *data;
+		starpu_malloc_flags((void**) &data, *count, 0);
+		*ptr = data;
+		memcpy(data, ld_interface, *count);
+	}
+
+	return 0;
+}
+
+static int load_data_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
+{
+	char *data = ptr;
+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+	struct load_data_interface *ld_interface = (struct load_data_interface *)
+		starpu_data_get_interface_on_node(handle, node);
+
+	STARPU_ASSERT(count == sizeof(struct load_data_interface));
+	memcpy(ld_interface, data, count);
+
+	return 0;
+}
+
+static int copy_any_to_any(void *src_interface, unsigned src_node,
+			   void *dst_interface, unsigned dst_node,
+			   void *async_data)
+{
+	(void) src_interface;
+	(void) dst_interface;
+	(void) src_node;
+	(void) dst_node;
+	(void) async_data;
+
+	return 0;
+}
+
+static const struct starpu_data_copy_methods load_data_copy_methods =
+{
+	.any_to_any = copy_any_to_any
+};
+
+static struct starpu_data_interface_ops interface_load_data_ops =
+{
+	.register_data_handle = load_data_register_data_handle,
+	.allocate_data_on_node = load_data_allocate_data_on_node,
+	.free_data_on_node = load_data_free_data_on_node,
+	.copy_methods = &load_data_copy_methods,
+	.get_size = load_data_get_size,
+	.footprint = load_data_footprint,
+	.interfaceid = STARPU_UNKNOWN_INTERFACE_ID,
+	.interface_size = sizeof(struct load_data_interface),
+	.handle_to_pointer = NULL,
+	.pack_data = load_data_pack_data,
+	.unpack_data = load_data_unpack_data,
+	.describe = NULL
+};
+
+void load_data_data_register(starpu_data_handle_t *handleptr, unsigned home_node, int sleep_task_threshold, double wakeup_ratio)
+{
+	struct load_data_interface load_data =
+	{
+		.start = starpu_timing_now(),
+		.elapsed_time = 0,
+		.phase = 0,
+		.nsubmitted_tasks = 0,
+		.nfinished_tasks = 0,
+		.sleep_task_threshold = sleep_task_threshold,
+		.wakeup_task_threshold = 0,
+		.wakeup_ratio = wakeup_ratio
+	};
+
+	if (interface_load_data_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID)
+	{
+		interface_load_data_ops.interfaceid = starpu_data_interface_get_next_id();
+	}
+
+	starpu_data_register(handleptr, home_node, &load_data, &interface_load_data_ops);
+}

+ 70 - 0
mpi/src/load_balancer/policy/load_data_interface.h

@@ -0,0 +1,70 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2016  Inria
+ * Copyright (C) 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+
+#ifndef __LOAD_DATA_INTERFACE_H
+#define __LOAD_DATA_INTERFACE_H
+
+/* interface for load_data */
+struct load_data_interface
+{
+	/* Starting time of the execution */
+	double start;
+	/* Elapsed time until the start time and the time when event "launch a load
+	 * balancing phase" is triggered */
+	double elapsed_time;
+	/* Current submission phase, i.e how many balanced steps have already
+	 * happened so far. */
+	int phase;
+	/* Number of currently submitted tasks */
+	int nsubmitted_tasks;
+	/* Number of currently finished tasks */
+	int nfinished_tasks;
+	/* Task threshold to sleep the submission thread */
+	int sleep_task_threshold;
+	/* Task threshold to wake-up the submission thread */
+	int wakeup_task_threshold;
+	/* Ratio of submitted tasks to wait for completion before waking up the
+	 * submission thread */
+	double wakeup_ratio;
+};
+
+void load_data_data_register(starpu_data_handle_t *handle, unsigned home_node, int sleep_task_threshold, double wakeup_ratio);
+
+int load_data_get_sleep_threshold(starpu_data_handle_t handle);
+int load_data_get_wakeup_threshold(starpu_data_handle_t handle);
+int load_data_get_current_phase(starpu_data_handle_t handle);
+int load_data_get_nsubmitted_tasks(starpu_data_handle_t handle);
+int load_data_get_nfinished_tasks(starpu_data_handle_t handle);
+
+int load_data_inc_nsubmitted_tasks(starpu_data_handle_t handle);
+int load_data_inc_nfinished_tasks(starpu_data_handle_t handle);
+
+int load_data_next_phase(starpu_data_handle_t handle);
+
+int load_data_update_elapsed_time(starpu_data_handle_t handle);
+double load_data_get_elapsed_time(starpu_data_handle_t handle);
+
+int load_data_update_wakeup_cond(starpu_data_handle_t handle);
+int load_data_wakeup_cond(starpu_data_handle_t handle);
+
+#define LOAD_DATA_GET_NSUBMITTED_TASKS(interface)	(((struct load_data_interface *)(interface))->nsubmitted_tasks)
+#define LOAD_DATA_GET_SLEEP_THRESHOLD(interface)	(((struct load_data_interface *)(interface))->sleep_task_threshold)
+#define LOAD_DATA_GET_WAKEUP_THRESHOLD(interface)	(((struct load_data_interface *)(interface))->wakeup_task_threshold)
+
+#endif /* __LOAD_DATA_INTERFACE_H */

+ 640 - 0
mpi/src/load_balancer/policy/load_heat_propagation.c

@@ -0,0 +1,640 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2016  Inria
+ * Copyright (C) 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <starpu_mpi_tag.h>
+#include <common/uthash.h>
+#include <common/utils.h>
+#include <math.h>
+
+#include "load_balancer_policy.h"
+#include "data_movements_interface.h"
+#include "load_data_interface.h"
+
+static int TAG_LOAD(int n)
+{
+	return ((n+1) << 24);
+}
+
+static int TAG_MOV(int n)
+{
+	return ((n+1) << 20);
+}
+
+/* Hash table of local pieces of data that has been moved out of the local MPI
+ * node by the load balancer. All of these pieces of data must be migrated back
+ * to the local node at the end of the execution. */
+struct moved_data_entry
+{
+	UT_hash_handle hh;
+	starpu_data_handle_t handle;
+};
+
+static struct moved_data_entry *mdh = NULL;
+
+static starpu_pthread_mutex_t load_data_mutex;
+static starpu_pthread_cond_t load_data_cond;
+
+/* MPI infos */
+static int my_rank;
+static int world_size;
+
+/* Number of neighbours of the local MPI node and their IDs. These are given by
+ * the get_neighbors() method, and thus can be easily changed. */
+static int *neighbor_ids = NULL;
+static int nneighbors = 0;
+
+/* Local load data */
+static starpu_data_handle_t *load_data_handle = NULL;
+static starpu_data_handle_t *load_data_handle_cpy = NULL;
+/* Load data of neighbours */
+static starpu_data_handle_t *neighbor_load_data_handles = NULL;
+
+/* Table which contains a data_movements_handle for each MPI node of
+ * MPI_COMM_WORLD. Since all the MPI nodes must be advised of any data
+ * movement, this table will be used to perform communications of data
+ * movements handles following an all-to-all model. */
+static starpu_data_handle_t *data_movements_handles = NULL;
+
+/* Load balancer interface which contains the application-specific methods for
+ * the load balancer to use. */
+static struct starpu_mpi_lb_conf *user_itf = NULL;
+
+static double time_threshold = 20000;
+
+/******************************************************************************
+ *                              Balancing                                     *
+ *****************************************************************************/
+
+
+/* Decides which data has to move where, and fills the
+ * data_movements_handles[my_rank] data handle from that.
+ * In data :
+ *  - local load_data_handle
+ *  - nneighbors
+ *  - neighbor_ids[nneighbors]
+ *  - neighbor_load_data_handles[nneighbors]
+ * Out data :
+ *  - data_movements_handles[my_rank]
+ */
+
+static void balance(starpu_data_handle_t load_data_cpy)
+{
+	int less_loaded = -1;
+	int n;
+	double elapsed_time, ref_elapsed_time;
+	double my_elapsed_time = load_data_get_elapsed_time(load_data_cpy);
+
+	/* Search for the less loaded neighbor */
+	ref_elapsed_time = my_elapsed_time;
+	for (n = 0; n < nneighbors; n++)
+	{
+		elapsed_time = load_data_get_elapsed_time(neighbor_load_data_handles[n]);
+		if (ref_elapsed_time > elapsed_time)
+		{
+			//fprintf(stderr,"Node%d: ref local time %lf vs neighbour%d time %lf\n", my_rank, ref_elapsed_time, neighbor_ids[n], elapsed_time);
+			less_loaded = neighbor_ids[n];
+			ref_elapsed_time = elapsed_time;
+		}
+	}
+
+	/* We found it */
+	if (less_loaded >= 0)
+	{
+		_STARPU_DEBUG("Less loaded found on node %d : %d\n", my_rank, less_loaded);
+		double diff_time = my_elapsed_time - ref_elapsed_time;
+		/* If the difference is higher than a time threshold, we move
+		 * one data to the less loaded neighbour. */
+		/* TODO: How to decide the time threshold ? */
+		if ((time_threshold > 0) && (diff_time >= time_threshold))
+		{
+			starpu_data_handle_t *handles = NULL;
+			int nhandles = 0;
+			user_itf->get_data_unit_to_migrate(&handles, &nhandles, less_loaded);
+
+			data_movements_reallocate_tables(data_movements_handles[my_rank], nhandles);
+
+			if (nhandles)
+			{
+				int *tags = data_movements_get_tags_table(data_movements_handles[my_rank]);
+				int *ranks = data_movements_get_ranks_table(data_movements_handles[my_rank]);
+
+				for (n = 0; n < nhandles; n++)
+				{
+					tags[n] = starpu_mpi_data_get_tag(handles[n]);
+					ranks[n] = less_loaded;
+				}
+
+				free(handles);
+			}
+		}
+		else
+			data_movements_reallocate_tables(data_movements_handles[my_rank], 0);
+	}
+	else
+		data_movements_reallocate_tables(data_movements_handles[my_rank], 0);
+}
+
+static void exchange_load_data_infos(starpu_data_handle_t load_data_cpy)
+{
+	int i;
+
+	/* Allocate all requests and status for point-to-point communications */
+	starpu_mpi_req load_send_req[nneighbors];
+	starpu_mpi_req load_recv_req[nneighbors];
+
+	MPI_Status load_send_status[nneighbors];
+	MPI_Status load_recv_status[nneighbors];
+
+	int flag;
+
+	/* Send the local load data to neighbour nodes, and receive the remote load
+	 * data from neighbour nodes */
+	for (i = 0; i < nneighbors; i++)
+	{
+		//_STARPU_DEBUG("[node %d] sending and receiving with %i-th neighbor %i\n", my_rank, i, neighbor_ids[i]);
+		starpu_mpi_isend(load_data_cpy, &load_send_req[i], neighbor_ids[i], TAG_LOAD(my_rank), MPI_COMM_WORLD);
+		starpu_mpi_irecv(neighbor_load_data_handles[i], &load_recv_req[i], neighbor_ids[i], TAG_LOAD(neighbor_ids[i]), MPI_COMM_WORLD);
+	}
+
+	/* Wait for completion of all send requests */
+	for (i = 0; i < nneighbors; i++)
+	{
+		flag = 0;
+		while (!flag)
+			starpu_mpi_test(&load_send_req[i], &flag, &load_send_status[i]);
+	}
+
+	/* Wait for completion of all receive requests */
+	for (i = 0; i < nneighbors; i++)
+	{
+		flag = 0;
+		while (!flag)
+			starpu_mpi_test(&load_recv_req[i], &flag, &load_recv_status[i]);
+	}
+}
+
+static void exchange_data_movements_infos()
+{
+	int i;
+
+	/* Allocate all requests and status for point-to-point communications */
+	starpu_mpi_req data_movements_send_req[world_size];
+	starpu_mpi_req data_movements_recv_req[world_size];
+
+	MPI_Status data_movements_send_status[world_size];
+	MPI_Status data_movements_recv_status[world_size];
+
+	int flag;
+
+	/* Send the new ranks of local data to all other nodes, and receive the new
+	 * ranks of all remote data from all other nodes */
+	for (i = 0; i < world_size; i++)
+	{
+		if (i != my_rank)
+		{
+			//_STARPU_DEBUG("[node %d] Send and receive data movement with %d\n", my_rank, i);
+			starpu_mpi_isend(data_movements_handles[my_rank], &data_movements_send_req[i], i, TAG_MOV(my_rank), MPI_COMM_WORLD);
+			starpu_mpi_irecv(data_movements_handles[i], &data_movements_recv_req[i], i, TAG_MOV(i), MPI_COMM_WORLD);
+		}
+	}
+
+	/* Wait for completion of all send requests */
+	for (i = 0; i < world_size; i++)
+	{
+		if (i != my_rank)
+		{
+			//fprintf(stderr,"Wait for sending data movement of %d to %d\n", my_rank, i);
+			flag = 0;
+			while (!flag)
+				starpu_mpi_test(&data_movements_send_req[i], &flag, &data_movements_send_status[i]);
+		}
+	}
+
+	/* Wait for completion of all receive requests */
+	for (i = 0; i < world_size; i++)
+	{
+		if (i != my_rank)
+		{
+			//fprintf(stderr,"Wait for recieving data movement from %d on %d\n", i, my_rank);
+			flag = 0;
+			while (!flag)
+				starpu_mpi_test(&data_movements_recv_req[i], &flag, &data_movements_recv_status[i]);
+		}
+	}
+}
+
+static void update_data_ranks()
+{
+	int i,j;
+
+	/* Update the new ranks for all concerned data */
+	for (i = 0; i < world_size; i++)
+	{
+		int ndata_to_update = data_movements_get_size_tables(data_movements_handles[i]);
+		if (ndata_to_update)
+		{
+			//fprintf(stderr,"Update %d data from table %d on node %d\n", ndata_to_update, i, my_rank);
+
+			for (j = 0; j < ndata_to_update; j++)
+			{
+				starpu_data_handle_t handle = _starpu_mpi_data_get_data_handle_from_tag((data_movements_get_tags_table(data_movements_handles[i]))[j]);
+				STARPU_ASSERT(handle);
+				int dst_rank = (data_movements_get_ranks_table(data_movements_handles[i]))[j];
+
+				/* Save the fact that the data has been moved out of this node */
+				if (i == my_rank)
+				{
+					struct moved_data_entry *md = (struct moved_data_entry *)malloc(sizeof(struct moved_data_entry));
+					md->handle = handle;
+					HASH_ADD_PTR(mdh, handle, md);
+				}
+				else if (dst_rank == my_rank)
+				{
+					/* The data has been moved out, and now is moved back, so
+					 * update the state of the moved_data hash table to reflect
+					 * this change */
+					struct moved_data_entry *md = NULL;
+					HASH_FIND_PTR(mdh, &handle, md);
+					if (md)
+					{
+						HASH_DEL(mdh, md);
+						free(md);
+					}
+				}
+
+				//if (i == my_rank)
+				//{
+				//    if (dst_rank != my_rank)
+				//        fprintf(stderr,"Move data %p (tag %d) from node %d to node %d\n", handle, (data_movements_get_tags_table(data_movements_handles[i]))[j], my_rank, dst_rank);
+				//    else
+				//        fprintf(stderr,"Bring back data %p (tag %d) from node %d on node %d\n", handle, (data_movements_get_tags_table(data_movements_handles[i]))[j], starpu_mpi_data_get_rank(handle), my_rank);
+				//}
+
+				_STARPU_DEBUG("Call of starpu_mpi_get_data_on_node(%d,%d) on node %d\n", starpu_mpi_data_get_tag(handle), dst_rank, my_rank);
+
+				/* Migrate the data handle */
+				starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, handle, dst_rank, NULL, NULL);
+
+				_STARPU_DEBUG("New rank (%d) of data %d upgraded on node %d\n", dst_rank, starpu_mpi_data_get_tag(handle), my_rank);
+				starpu_mpi_data_set_rank_comm(handle, dst_rank, MPI_COMM_WORLD);
+			}
+		}
+	}
+}
+
+static void clean_balance()
+{
+	int i;
+	starpu_mpi_cache_flush(MPI_COMM_WORLD, *load_data_handle_cpy);
+	for (i = 0; i < nneighbors; i++)
+		starpu_mpi_cache_flush(MPI_COMM_WORLD, neighbor_load_data_handles[i]);
+	for (i = 0; i < world_size; i++)
+		starpu_mpi_cache_flush(MPI_COMM_WORLD, data_movements_handles[i]);
+}
+
+/* Core function of the load balancer. Computes from the load_data_cpy handle a
+ * load balancing of the work to come (if needed), perform the necessary data
+ * communications and negociate with the other nodes the rebalancing. */
+static void heat_balance(starpu_data_handle_t load_data_cpy)
+{
+	/* Exchange load data handles with neighboring nodes */
+	exchange_load_data_infos(load_data_cpy);
+
+	/* Determine if this node should sent data to other nodes :
+	 * which ones, how much data */
+	balance(load_data_cpy);
+
+	/* Exchange data movements with neighboring nodes */
+	exchange_data_movements_infos();
+
+	/* Perform data movements */
+	update_data_ranks();
+
+	/* Clean the data handles to properly launch the next balance phase */
+	clean_balance();
+}
+
+/******************************************************************************
+ *                      Heat Load Balancer Entry Points                       *
+ *****************************************************************************/
+
+static void submitted_task_heat(struct starpu_task *task)
+{
+	load_data_inc_nsubmitted_tasks(*load_data_handle);
+	//if (load_data_get_nsubmitted_tasks(*load_data_handle) > task->tag_id)
+	//{
+	//    fprintf(stderr,"Error : nsubmitted_tasks (%d) > tag_id (%lld) ! \n", load_data_get_nsubmitted_tasks(*load_data_handle), (long long int)task->tag_id);
+	//    STARPU_ASSERT(0);
+	//}
+
+	int phase = load_data_get_current_phase(*load_data_handle);
+	/* Numbering of tasks in StarPU-MPI should be given by the application with
+	 * the STARPU_TAG_ONLY insert task option for now. */
+	/* TODO: Properly implement a solution for numbering tasks in StarPU-MPI */
+	if ((task->tag_id / load_data_get_sleep_threshold(*load_data_handle)) > phase)
+	{
+		STARPU_PTHREAD_MUTEX_LOCK(&load_data_mutex);
+		load_data_update_wakeup_cond(*load_data_handle);
+		//fprintf(stderr,"Node %d sleep on tag %lld\n", my_rank, (long long int)task->tag_id);
+		//if (load_data_get_nsubmitted_tasks(*load_data_handle) < load_data_get_wakeup_threshold(*load_data_handle))
+		//{
+		//    fprintf(stderr,"Error : nsubmitted_tasks (%d) lower than wakeup_threshold (%d) !\n", load_data_get_nsubmitted_tasks(*load_data_handle), load_data_get_wakeup_threshold(*load_data_handle));
+		//    STARPU_ASSERT(0);
+		//}
+
+		if (load_data_get_wakeup_threshold(*load_data_handle) > load_data_get_nfinished_tasks(*load_data_handle))
+			STARPU_PTHREAD_COND_WAIT(&load_data_cond, &load_data_mutex);
+
+		load_data_next_phase(*load_data_handle);
+
+		/* Register a copy of the load data at this moment, to allow to compute
+		 * the heat balance while not locking the load data during the whole
+		 * balance step, which could cause all the workers to wait on the lock
+		 * to update the data. */
+		struct starpu_data_interface_ops *itf_load_data = starpu_data_get_interface_ops(*load_data_handle);
+		void* itf_src = starpu_data_get_interface_on_node(*load_data_handle, STARPU_MAIN_RAM);
+		void* itf_dst = starpu_data_get_interface_on_node(*load_data_handle_cpy, STARPU_MAIN_RAM);
+		memcpy(itf_dst, itf_src, itf_load_data->interface_size);
+
+		_STARPU_DEBUG("[node %d] Balance phase %d\n", my_rank, load_data_get_current_phase(*load_data_handle));
+		STARPU_PTHREAD_MUTEX_UNLOCK(&load_data_mutex);
+
+		heat_balance(*load_data_handle_cpy);
+	}
+}
+
+static void finished_task_heat()
+{
+	//fprintf(stderr,"Try to decrement nsubmitted_tasks...");
+	STARPU_PTHREAD_MUTEX_LOCK(&load_data_mutex);
+
+	load_data_inc_nfinished_tasks(*load_data_handle);
+	//fprintf(stderr,"Decrement nsubmitted_tasks, now %d\n", load_data_get_nsubmitted_tasks(*load_data_handle));
+	if (load_data_wakeup_cond(*load_data_handle))
+	{
+		//fprintf(stderr,"Wakeup ! nfinished_tasks = %d, wakeup_threshold = %d\n", load_data_get_nfinished_tasks(*load_data_handle), load_data_get_wakeup_threshold(*load_data_handle));
+		load_data_update_elapsed_time(*load_data_handle);
+		STARPU_PTHREAD_COND_SIGNAL(&load_data_cond);
+		STARPU_PTHREAD_MUTEX_UNLOCK(&load_data_mutex);
+	}
+	else
+		STARPU_PTHREAD_MUTEX_UNLOCK(&load_data_mutex);
+}
+
+/******************************************************************************
+ *                  Initialization / Deinitialization                         *
+ *****************************************************************************/
+
+static int init_heat(struct starpu_mpi_lb_conf *itf)
+{
+	int i;
+	int sleep_task_threshold;
+	double wakeup_ratio;
+
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank);
+
+	/* Immediately return if the starpu_mpi_lb_conf is invalid. */
+	if (!(itf && itf->get_neighbors && itf->get_data_unit_to_migrate))
+	{
+		_STARPU_MSG("Error: struct starpu_mpi_lb_conf %p invalid\n", itf);
+		return 1;
+	}
+
+	user_itf = malloc(sizeof(struct starpu_mpi_lb_conf));
+	memcpy(user_itf, itf, sizeof(struct starpu_mpi_lb_conf));;
+
+	/* Get the neighbors of the local MPI node */
+	user_itf->get_neighbors(&neighbor_ids, &nneighbors);
+	if (nneighbors == 0)
+	{
+		_STARPU_MSG("Error: Function get_neighbors returning 0 neighbor\n");
+		free(user_itf);
+		user_itf = NULL;
+		return 2;
+	}
+
+	/* The sleep threshold is deducted from the numbering of tasks by the
+	 * application. For example, with this threshold, the submission thread
+	 * will stop when a task for which the numbering is 2000 or above will be
+	 * submitted to StarPU-MPI. However, much less tasks can be really
+	 * submitted to the local MPI node: the sleeping of the submission threads
+	 * checks the numbering of the tasks, not how many tasks have been
+	 * submitted to the local MPI node, which are two different things. */
+	char *sleep_env = starpu_getenv("LB_HEAT_SLEEP_THRESHOLD");
+	if (sleep_env)
+		sleep_task_threshold = atoi(sleep_env);
+	else
+		sleep_task_threshold = 2000;
+
+	char *wakeup_env = starpu_getenv("LB_HEAT_WAKEUP_RATIO");
+	if (wakeup_env)
+		wakeup_ratio = atof(wakeup_env);
+	else
+		wakeup_ratio = 0.5;
+
+	char *time_env = starpu_getenv("LB_HEAT_TIME_THRESHOLD");
+	if (time_env)
+		time_threshold = atoi(time_env);
+	else
+		time_threshold = 2000;
+
+	STARPU_PTHREAD_MUTEX_INIT(&load_data_mutex, NULL);
+	STARPU_PTHREAD_COND_INIT(&load_data_cond, NULL);
+
+	/* Allocate, initialize and register all the data handles that will be
+	 * needed for the load balancer, to not reallocate them at each balance
+	 * step. */
+
+	/* Local load data */
+	load_data_handle = malloc(sizeof(starpu_data_handle_t));
+	memset(load_data_handle, 0, sizeof(starpu_data_handle_t));
+	load_data_data_register(load_data_handle, STARPU_MAIN_RAM, sleep_task_threshold, wakeup_ratio);
+
+	/* Copy of the local load data to enable parallel update of the load data
+	 * with communications to neighbor nodes */
+	load_data_handle_cpy = malloc(sizeof(starpu_data_handle_t));
+	memset(load_data_handle_cpy, 0, sizeof(starpu_data_handle_t));
+	void *local_interface = starpu_data_get_interface_on_node(*load_data_handle, STARPU_MAIN_RAM);
+	struct starpu_data_interface_ops *itf_load_data = starpu_data_get_interface_ops(*load_data_handle);
+	starpu_data_register(load_data_handle_cpy, STARPU_MAIN_RAM, local_interface, itf_load_data);
+	starpu_mpi_data_register(*load_data_handle_cpy, TAG_LOAD(my_rank), my_rank);
+
+	/* Remote load data */
+	neighbor_load_data_handles = malloc(nneighbors*sizeof(starpu_data_handle_t));
+	memset(neighbor_load_data_handles, 0, nneighbors*sizeof(starpu_data_handle_t));
+	for (i = 0; i < nneighbors; i++)
+	{
+		load_data_data_register(&neighbor_load_data_handles[i], STARPU_MAIN_RAM, sleep_task_threshold, wakeup_ratio);
+		starpu_mpi_data_register(neighbor_load_data_handles[i], TAG_LOAD(neighbor_ids[i]), neighbor_ids[i]);
+	}
+
+	/* Data movements handles */
+	data_movements_handles = malloc(world_size*sizeof(starpu_data_handle_t));
+	for (i = 0; i < world_size; i++)
+	{
+		data_movements_data_register(&data_movements_handles[i], STARPU_MAIN_RAM, NULL, NULL, 0);
+		starpu_mpi_data_register(data_movements_handles[i], TAG_MOV(i), i);
+	}
+
+	/* Hash table of moved data that will be brought back on the node at
+	 * termination time */
+	mdh = NULL;
+
+	return 0;
+}
+
+/* Move back all the data that has been migrated out of this node at
+ * denitialization time of the load balancer, to ensure the consistency with
+ * the ranks of data originally registered by the application. */
+static void move_back_data()
+{
+	int i,j;
+
+	/* Update the new ranks for all concerned data */
+	for (i = 0; i < world_size; i++)
+	{
+		/* In this case, each data_movements_handles contains the handles to move back on the specific node */
+		int ndata_to_update = data_movements_get_size_tables(data_movements_handles[i]);
+		if (ndata_to_update)
+		{
+			_STARPU_DEBUG("Move back %d data from table %d on node %d\n", ndata_to_update, i, my_rank);
+
+			for (j = 0; j < ndata_to_update; j++)
+			{
+				starpu_data_handle_t handle = _starpu_mpi_data_get_data_handle_from_tag((data_movements_get_tags_table(data_movements_handles[i]))[j]);
+				STARPU_ASSERT(handle);
+
+				int dst_rank = (data_movements_get_ranks_table(data_movements_handles[i]))[j];
+				STARPU_ASSERT(i == dst_rank);
+
+				if (i == my_rank)
+				{
+					/* The data is moved back, so update the state of the
+					 * moved_data hash table to reflect this change */
+					struct moved_data_entry *md = NULL;
+					HASH_FIND_PTR(mdh, &handle, md);
+					if (md)
+					{
+						HASH_DEL(mdh, md);
+						free(md);
+					}
+				}
+
+				//fprintf(stderr,"Call of starpu_mpi_get_data_on_node(%d,%d) on node %d\n", starpu_mpi_data_get_tag(handle), dst_rank, my_rank);
+
+				/* Migrate the data handle */
+				starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, handle, dst_rank, NULL, NULL);
+
+				//fprintf(stderr,"New rank (%d) of data %d upgraded on node %d\n", dst_rank, starpu_mpi_data_get_tag(handle), my_rank);
+				starpu_mpi_data_set_rank_comm(handle, dst_rank, MPI_COMM_WORLD);
+			}
+		}
+	}
+}
+
+static int deinit_heat()
+{
+	int i;
+
+	if ((!user_itf) || (nneighbors == 0))
+		return 1;
+
+	_STARPU_DEBUG("Shutting down heat lb policy\n");
+
+	unsigned int ndata_to_move_back = HASH_COUNT(mdh);
+
+	if (ndata_to_move_back)
+	{
+		_STARPU_DEBUG("Move back %u data on node %d ..\n", ndata_to_move_back, my_rank);
+		data_movements_reallocate_tables(data_movements_handles[my_rank], ndata_to_move_back);
+
+		int *tags = data_movements_get_tags_table(data_movements_handles[my_rank]);
+		int *ranks = data_movements_get_ranks_table(data_movements_handles[my_rank]);
+
+		int n = 0;
+		struct moved_data_entry *md, *tmp;
+		HASH_ITER(hh, mdh, md, tmp)
+		{
+			tags[n] = starpu_mpi_data_get_tag(md->handle);
+			ranks[n] = my_rank;
+			n++;
+		}
+	}
+	else
+		data_movements_reallocate_tables(data_movements_handles[my_rank], 0);
+
+	exchange_data_movements_infos();
+	move_back_data();
+
+	/* This assert ensures that all nodes have properly gotten back all the
+	 * data that has been moven out of the node. */
+	STARPU_ASSERT(HASH_COUNT(mdh) == 0);
+	free(mdh);
+	mdh = NULL;
+
+	starpu_data_unregister(*load_data_handle);
+	free(load_data_handle);
+	load_data_handle = NULL;
+
+	starpu_mpi_cache_flush(MPI_COMM_WORLD, *load_data_handle_cpy);
+	starpu_data_unregister(*load_data_handle_cpy);
+	free(load_data_handle_cpy);
+	load_data_handle_cpy = NULL;
+
+	for (i = 0; i < nneighbors; i++)
+	{
+		starpu_mpi_cache_flush(MPI_COMM_WORLD, neighbor_load_data_handles[i]);
+		starpu_data_unregister(neighbor_load_data_handles[i]);
+	}
+	free(neighbor_load_data_handles);
+	neighbor_load_data_handles = NULL;
+
+	nneighbors = 0;
+	free(neighbor_ids);
+	neighbor_ids = NULL;
+
+	for (i = 0; i < world_size; i++)
+	{
+		starpu_mpi_cache_flush(MPI_COMM_WORLD, data_movements_handles[i]);
+		data_movements_reallocate_tables(data_movements_handles[i], 0);
+		starpu_data_unregister(data_movements_handles[i]);
+	}
+	free(data_movements_handles);
+	data_movements_handles = NULL;
+
+	STARPU_PTHREAD_MUTEX_DESTROY(&load_data_mutex);
+	STARPU_PTHREAD_COND_DESTROY(&load_data_cond);
+	free(user_itf);
+	user_itf = NULL;
+
+	return 0;
+}
+
+/******************************************************************************
+ *                                  Policy                                    *
+ *****************************************************************************/
+
+struct load_balancer_policy load_heat_propagation_policy =
+{
+	.init = init_heat,
+	.deinit = deinit_heat,
+	.submitted_task_entry_point = submitted_task_heat,
+	.finished_task_entry_point = finished_task_heat,
+	.policy_name = "heat"
+};

+ 1 - 1
mpi/src/starpu_mpi.c

@@ -449,7 +449,7 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 	_starpu_mpi_simgrid_wait_req(&req->data_request, &req->status_store, &req->queue, &req->done);
 	_starpu_mpi_simgrid_wait_req(&req->data_request, &req->status_store, &req->queue, &req->done);
 #endif
 #endif
 
 
-	_STARPU_MPI_TRACE_ISEND_SUBMIT_END(req->node_tag.rank, req->node_tag.data_tag, 0);
+	_STARPU_MPI_TRACE_ISEND_SUBMIT_END(req->node_tag.rank, req->node_tag.data_tag, starpu_data_get_size(req->data_handle));
 
 
 	/* somebody is perhaps waiting for the MPI request to be posted */
 	/* somebody is perhaps waiting for the MPI request to be posted */
 	STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
 	STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);

+ 24 - 1
mpi/src/starpu_mpi_task_insert.c

@@ -36,6 +36,22 @@
 	else								\
 	else								\
 		starpu_mpi_isend_detached(data, dest, data_tag, comm, callback, arg);
 		starpu_mpi_isend_detached(data, dest, data_tag, comm, callback, arg);
 
 
+static void (*pre_submit_hook)(struct starpu_task *task) = NULL;
+
+int starpu_mpi_pre_submit_hook_register(void (*f)(struct starpu_task *))
+{
+	if (pre_submit_hook)
+		_STARPU_MSG("Warning: a pre_submit_hook has already been registered. Please check if you really want to erase the previously registered hook.\n");
+	pre_submit_hook = f;
+	return 0;
+}
+
+int starpu_mpi_pre_submit_hook_unregister()
+{
+	pre_submit_hook = NULL;
+	return 0;
+}
+
 int _starpu_mpi_find_executee_node(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int *do_execute, int *inconsistent_execute, int *xrank)
 int _starpu_mpi_find_executee_node(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int *do_execute, int *inconsistent_execute, int *xrank)
 {
 {
 	if (mode & STARPU_W)
 	if (mode & STARPU_W)
@@ -472,6 +488,7 @@ int _starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, stru
 		va_copy(varg_list_copy, varg_list);
 		va_copy(varg_list_copy, varg_list);
 		_starpu_task_insert_create(codelet, task, varg_list_copy);
 		_starpu_task_insert_create(codelet, task, varg_list_copy);
 		va_end(varg_list_copy);
 		va_end(varg_list_copy);
+
 		return 0;
 		return 0;
 	}
 	}
 }
 }
@@ -526,7 +543,13 @@ int _starpu_mpi_task_insert_v(MPI_Comm comm, struct starpu_codelet *codelet, va_
 			starpu_task_destroy(task);
 			starpu_task_destroy(task);
 		}
 		}
 	}
 	}
-	return _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data);
+
+	int val = _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data);
+
+	if (ret == 0 && pre_submit_hook)
+		pre_submit_hook(task);
+
+	return val;
 }
 }
 
 
 int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...)

+ 7 - 3
mpi/tests/Makefile.am

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2009-2012, 2015-2016  Université de Bordeaux
 # Copyright (C) 2009-2012, 2015-2016  Université de Bordeaux
-# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -138,7 +138,8 @@ starpu_mpi_TESTS =				\
 	policy_selection			\
 	policy_selection			\
 	policy_selection2			\
 	policy_selection2			\
 	early_request				\
 	early_request				\
-	starpu_redefine
+	starpu_redefine				\
+	load_balancer
 
 
 noinst_PROGRAMS =				\
 noinst_PROGRAMS =				\
 	datatypes				\
 	datatypes				\
@@ -191,7 +192,8 @@ noinst_PROGRAMS =				\
 	policy_selection			\
 	policy_selection			\
 	policy_selection2			\
 	policy_selection2			\
 	early_request				\
 	early_request				\
-	starpu_redefine
+	starpu_redefine				\
+	load_balancer
 
 
 
 
 XFAIL_TESTS=					\
 XFAIL_TESTS=					\
@@ -301,6 +303,8 @@ early_request_LDADD =					\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 starpu_redefine_LDADD =					\
 starpu_redefine_LDADD =					\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+load_balancer_LDADD =					\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 
 
 ring_SOURCES = ring.c
 ring_SOURCES = ring.c
 ring_sync_SOURCES = ring_sync.c
 ring_sync_SOURCES = ring_sync.c

+ 73 - 0
mpi/tests/load_balancer.c

@@ -0,0 +1,73 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <starpu_mpi_lb.h>
+#include "helper.h"
+
+#if !defined(STARPU_HAVE_UNSETENV)
+
+#warning unsetenv is not defined. Skipping test
+int main(int argc, char **argv)
+{
+	return STARPU_TEST_SKIPPED;
+}
+#else
+
+void get_neighbors(int **neighbor_ids, int *nneighbors)
+{
+	int ret, rank, size;
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+	*nneighbors = 1;
+	*neighbor_ids = malloc(sizeof(int));
+	*neighbor_ids[0] = rank==size-1?0:rank+1;
+}
+
+void get_data_unit_to_migrate(starpu_data_handle_t **handle_unit, int *nhandles, int dst_node)
+{
+	*nhandles = 0;
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+	struct starpu_mpi_lb_conf itf;
+
+	itf.get_neighbors = get_neighbors;
+	itf.get_data_unit_to_migrate = get_data_unit_to_migrate;
+
+	MPI_Init(&argc, &argv);
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	unsetenv("STARPU_MPI_LB");
+	starpu_mpi_lb_init(NULL, NULL);
+	starpu_mpi_lb_shutdown();
+
+	starpu_mpi_lb_init("heat", &itf);
+	starpu_mpi_lb_shutdown();
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+	MPI_Finalize();
+
+	return 0;
+}
+
+#endif

+ 6 - 7
src/common/fxt.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -138,12 +138,11 @@ void _starpu_fxt_init_profiling(unsigned trace_buffer_size)
 		return;
 		return;
 
 
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_fxt_started_mutex);
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_fxt_started_mutex);
-	if (!_starpu_fxt_started)
-	{
-		_starpu_fxt_started = 1;
-		_starpu_written = 0;
-		_starpu_profile_set_tracefile();
-	}
+	STARPU_ASSERT(!_starpu_fxt_started);
+
+	_starpu_fxt_started = 1;
+	_starpu_written = 0;
+	_starpu_profile_set_tracefile();
 
 
 #ifdef HAVE_FUT_SET_FILENAME
 #ifdef HAVE_FUT_SET_FILENAME
 	fut_set_filename(_STARPU_PROF_FILE_USER);
 	fut_set_filename(_STARPU_PROF_FILE_USER);

+ 8 - 2
src/common/fxt.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
  * Copyright (C) 2016  Inria
  * Copyright (C) 2016  Inria
  *
  *
@@ -95,6 +95,8 @@
 #define	_STARPU_FUT_START_ALLOC_REUSE	0x5129
 #define	_STARPU_FUT_START_ALLOC_REUSE	0x5129
 #define	_STARPU_FUT_END_ALLOC_REUSE	0x5130
 #define	_STARPU_FUT_END_ALLOC_REUSE	0x5130
 
 
+#define	_STARPU_FUT_USED_MEM	0x512a
+
 #define	_STARPU_FUT_START_MEMRECLAIM	0x5131
 #define	_STARPU_FUT_START_MEMRECLAIM	0x5131
 #define	_STARPU_FUT_END_MEMRECLAIM	0x5132
 #define	_STARPU_FUT_END_MEMRECLAIM	0x5132
 
 
@@ -493,7 +495,7 @@ do {									\
 		}							\
 		}							\
 		const size_t __job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));	\
 		const size_t __job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));	\
 		const uint32_t __job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\
 		const uint32_t __job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\
-		FUT_DO_PROBE7(_STARPU_FUT_CODELET_DETAILS, (job), ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->tag_id, workerid, ((job)->job_id)); \
+		FUT_DO_PROBE7(_STARPU_FUT_CODELET_DETAILS, ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->flops / 1000, (job)->task->tag_id, workerid, ((job)->job_id)); \
 	}								\
 	}								\
 } while(0);
 } while(0);
 
 
@@ -700,6 +702,9 @@ do {										\
 #define _STARPU_TRACE_END_WRITEBACK(memnode)		\
 #define _STARPU_TRACE_END_WRITEBACK(memnode)		\
 	FUT_DO_PROBE2(_STARPU_FUT_END_WRITEBACK, memnode, _starpu_gettid());
 	FUT_DO_PROBE2(_STARPU_FUT_END_WRITEBACK, memnode, _starpu_gettid());
 
 
+#define _STARPU_TRACE_USED_MEM(memnode,used)		\
+	FUT_DO_PROBE3(_STARPU_FUT_USED_MEM, memnode, used, _starpu_gettid());
+	
 #define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch)		\
 #define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch)		\
 	FUT_DO_PROBE3(_STARPU_FUT_START_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid());
 	FUT_DO_PROBE3(_STARPU_FUT_START_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid());
 	
 	
@@ -1006,6 +1011,7 @@ do {										\
 #define _STARPU_TRACE_END_FREE(memnode)		do {} while(0)
 #define _STARPU_TRACE_END_FREE(memnode)		do {} while(0)
 #define _STARPU_TRACE_START_WRITEBACK(memnode)	do {} while(0)
 #define _STARPU_TRACE_START_WRITEBACK(memnode)	do {} while(0)
 #define _STARPU_TRACE_END_WRITEBACK(memnode)		do {} while(0)
 #define _STARPU_TRACE_END_WRITEBACK(memnode)		do {} while(0)
+#define _STARPU_TRACE_USED_MEM(memnode,used)		do {} while (0)
 #define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch)	do {} while(0)
 #define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch)	do {} while(0)
 #define _STARPU_TRACE_END_MEMRECLAIM(memnode,is_prefetch)	do {} while(0)
 #define _STARPU_TRACE_END_MEMRECLAIM(memnode,is_prefetch)	do {} while(0)
 #define _STARPU_TRACE_START_WRITEBACK_ASYNC(memnode)	do {} while(0)
 #define _STARPU_TRACE_START_WRITEBACK_ASYNC(memnode)	do {} while(0)

+ 21 - 1
src/common/list.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2012, 2015-2016  Université de Bordeaux
+ * Copyright (C) 2009-2012, 2015-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2016  CNRS
  * Copyright (C) 2010, 2011, 2016  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -73,6 +73,12 @@
  *   struct FOO*	FOO_list_end(struct FOO_list*);
  *   struct FOO*	FOO_list_end(struct FOO_list*);
  *     * retourne l'élément suivant de la liste
  *     * retourne l'élément suivant de la liste
  *   struct FOO*	FOO_list_next(struct FOO*)
  *   struct FOO*	FOO_list_next(struct FOO*)
+ *     * retourne le dernier élément de la liste
+ *   struct FOO*	FOO_list_last(struct FOO_list*);
+ *     * retourne la valeur à tester en début de liste
+ *   struct FOO*	FOO_list_alpha(struct FOO_list*);
+ *     * retourne l'élément précédent de la liste
+ *   struct FOO*	FOO_list_prev(struct FOO*)
  *     * retourne la taille de la liste
  *     * retourne la taille de la liste
  *   int		FOO_list_size(struct FOO_list*)
  *   int		FOO_list_size(struct FOO_list*)
  *     * retourne la position de l'élément dans la liste (indexé à partir de 0)
  *     * retourne la position de l'élément dans la liste (indexé à partir de 0)
@@ -108,6 +114,14 @@
  *  {
  *  {
  *    printf("a=%d; b=%d\n", i->a, i->b);
  *    printf("a=%d; b=%d\n", i->a, i->b);
  *  }
  *  }
+ *  - itérateur de liste :
+ *  struct ma_structure * i;
+ *  for(i  = ma_structure_list_last(l);
+ *      i != ma_structure_list_alpha(l);
+ *      i  = ma_structure_list_prev(i))
+ *  {
+ *    printf("a=%d; b=%d\n", i->a, i->b);
+ *  }
  * *********************************************************
  * *********************************************************
  */
  */
 
 
@@ -185,6 +199,12 @@
     { return NULL; } \
     { return NULL; } \
   /** @internal */static inline struct ENAME *ENAME##_list_next(const struct ENAME *i) \
   /** @internal */static inline struct ENAME *ENAME##_list_next(const struct ENAME *i) \
     { return i->_next; } \
     { return i->_next; } \
+  /** @internal */static inline struct ENAME *ENAME##_list_last(const struct ENAME##_list *l) \
+    { return l->_tail; } \
+  /** @internal */static inline struct ENAME *ENAME##_list_alpha(const struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \
+    { return NULL; } \
+  /** @internal */static inline struct ENAME *ENAME##_list_prev(const struct ENAME *i) \
+    { return i->_prev; } \
   /** @internal */static inline int ENAME##_list_ismember(const struct ENAME##_list *l, const struct ENAME *e) \
   /** @internal */static inline int ENAME##_list_ismember(const struct ENAME##_list *l, const struct ENAME *e) \
     { struct ENAME *i=l->_head; while(i!=NULL){ if (i == e) return 1; i=i->_next; } return 0; } \
     { struct ENAME *i=l->_head; while(i!=NULL){ if (i == e) return 1; i=i->_next; } return 0; } \
   /** @internal */static inline int ENAME##_list_member(const struct ENAME##_list *l, const struct ENAME *e) \
   /** @internal */static inline int ENAME##_list_member(const struct ENAME##_list *l, const struct ENAME *e) \

+ 32 - 21
src/common/utils.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010, 2012-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2012-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -271,6 +271,33 @@ int _starpu_fftruncate(FILE *file, size_t length)
 	return ftruncate(fileno(file), length);
 	return ftruncate(fileno(file), length);
 }
 }
 
 
+static int _starpu_warn_nolock(int err)
+{
+	if (0
+#ifdef ENOLCK
+		|| err == ENOLCK
+#endif
+#ifdef ENOTSUP
+		|| err == ENOTSUP
+#endif
+#ifdef EOPNOTSUPP
+		|| err == EOPNOTSUPP
+#endif
+#ifdef EROFS
+		|| err == EROFS
+#endif
+		)
+	{
+		static int warn;
+		if (!warn) {
+			warn = 1;
+			_STARPU_DISP("warning: Couldn't lock performance file, StarPU home (%s, coming from $HOME or $STARPU_HOME) is probably on some network filesystem like NFS which does not support locking.\n", _starpu_get_home_path());
+		}
+		return 1;
+	}
+	return 0;
+}
+
 int _starpu_frdlock(FILE *file)
 int _starpu_frdlock(FILE *file)
 {
 {
 	int ret;
 	int ret;
@@ -290,17 +317,8 @@ int _starpu_frdlock(FILE *file)
 	};
 	};
 	ret = fcntl(fileno(file), F_SETLKW, &lock);
 	ret = fcntl(fileno(file), F_SETLKW, &lock);
 #endif
 #endif
-#ifdef ENOLCK
-	if (ret != 0 && errno == ENOLCK)
-	{
-		static int warn;
-		if (!warn) {
-			warn = 1;
-			_STARPU_DISP("warning: Couldn't lock performance file, StarPU home is probably on NFS which does not support locking.\n");
-		}
+	if (ret != 0 && _starpu_warn_nolock(errno))
 		return -1;
 		return -1;
-	}
-#endif
 	STARPU_ASSERT(ret == 0);
 	STARPU_ASSERT(ret == 0);
 	return ret;
 	return ret;
 }
 }
@@ -325,6 +343,8 @@ int _starpu_frdunlock(FILE *file)
 	};
 	};
 	ret = fcntl(fileno(file), F_SETLKW, &lock);
 	ret = fcntl(fileno(file), F_SETLKW, &lock);
 #endif
 #endif
+	if (ret != 0 && _starpu_warn_nolock(errno))
+		return -1;
 	STARPU_ASSERT(ret == 0);
 	STARPU_ASSERT(ret == 0);
 	return ret;
 	return ret;
 }
 }
@@ -351,17 +371,8 @@ int _starpu_fwrlock(FILE *file)
 	ret = fcntl(fileno(file), F_SETLKW, &lock);
 	ret = fcntl(fileno(file), F_SETLKW, &lock);
 #endif
 #endif
 
 
-#ifdef ENOLCK
-	if (ret != 0 && errno == ENOLCK)
-	{
-		static int warn;
-		if (!warn) {
-			warn = 1;
-			_STARPU_DISP("warning: Couldn't lock performance file, StarPU home is probably on NFS which does not support locking.\n");
-		}
+	if (ret != 0 && _starpu_warn_nolock(errno))
 		return -1;
 		return -1;
-	}
-#endif
 	STARPU_ASSERT(ret == 0);
 	STARPU_ASSERT(ret == 0);
 	return ret;
 	return ret;
 }
 }

+ 16 - 13
src/core/disk_ops/disk_leveldb.cpp

@@ -50,14 +50,14 @@ struct starpu_leveldb_base
 static void *starpu_leveldb_alloc(void *base, size_t size STARPU_ATTRIBUTE_UNUSED)
 static void *starpu_leveldb_alloc(void *base, size_t size STARPU_ATTRIBUTE_UNUSED)
 {
 {
 	struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base;
 	struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base;
-	struct starpu_leveldb_obj *obj;
-	_STARPU_MALLOC(obj, sizeof(struct starpu_leveldb_obj));
+	struct starpu_leveldb_obj *obj = (struct starpu_leveldb_obj *)malloc(sizeof(struct starpu_leveldb_obj));
+	STARPU_ASSERT(obj);
 
 
         STARPU_PTHREAD_MUTEX_INIT(&obj->mutex, NULL);
         STARPU_PTHREAD_MUTEX_INIT(&obj->mutex, NULL);
 
 
 	size_t len = 6 + 1 + 2+sizeof(void*)*2 + 1;
 	size_t len = 6 + 1 + 2+sizeof(void*)*2 + 1;
-	char *key;
-	_STARPU_MALLOC(key, len*sizeof(char));
+	char *key = (char *)malloc(len*sizeof(char));
+	STARPU_ASSERT(key);
 	snprintf(key, len, "STARPU-%p", obj);
 	snprintf(key, len, "STARPU-%p", obj);
 
 
 	/* create and add a key with a small memory */
 	/* create and add a key with a small memory */
@@ -88,8 +88,8 @@ static void starpu_leveldb_free(void *base , void *obj, size_t size STARPU_ATTRI
 /* open an existing memory on disk */
 /* open an existing memory on disk */
 static void *starpu_leveldb_open(void *base STARPU_ATTRIBUTE_UNUSED, void *pos, size_t size)
 static void *starpu_leveldb_open(void *base STARPU_ATTRIBUTE_UNUSED, void *pos, size_t size)
 {
 {
-	struct starpu_leveldb_obj *obj;
-	_STARPU_MALLOC(obj, sizeof(struct starpu_leveldb_obj));
+	struct starpu_leveldb_obj *obj = (struct starpu_leveldb_obj *)malloc(sizeof(struct starpu_leveldb_obj));
+	STARPU_ASSERT(obj);
 
 
         STARPU_PTHREAD_MUTEX_INIT(&obj->mutex, NULL);
         STARPU_PTHREAD_MUTEX_INIT(&obj->mutex, NULL);
 
 
@@ -149,7 +149,8 @@ static int starpu_leveldb_full_read(void *base, void *obj, void **ptr, size_t *s
 	STARPU_ASSERT(s.ok());
 	STARPU_ASSERT(s.ok());
 
 
 	*size = value.length();
 	*size = value.length();
-	_STARPU_MALLOC(*ptr, *size);
+	*ptr = malloc(*size);
+	STARPU_ASSERT(*ptr);
 
 
 	/* use buffer */
 	/* use buffer */
 	memcpy(*ptr, value.c_str(), *size);
 	memcpy(*ptr, value.c_str(), *size);
@@ -177,7 +178,8 @@ static int starpu_leveldb_write(void *base, void *obj, const void *buf, off_t of
 	else
 	else
 	{
 	{
 		uintptr_t buf_tmp = (uintptr_t) buf;
 		uintptr_t buf_tmp = (uintptr_t) buf;
-		_STARPU_MALLOC(buffer, (tmp->size > (offset + size)) ? tmp->size : (offset + size));
+		buffer = malloc((tmp->size > (offset + size)) ? tmp->size : (offset + size));
+		STARPU_ASSERT(buffer);
 
 
 		/* we read the data */
 		/* we read the data */
 		std::string value;
 		std::string value;
@@ -224,8 +226,8 @@ static int starpu_leveldb_full_write(void *base, void *obj, void *ptr, size_t si
 /* create a new copy of parameter == base */
 /* create a new copy of parameter == base */
 static void *starpu_leveldb_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIBUTE_UNUSED)
 static void *starpu_leveldb_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIBUTE_UNUSED)
 {
 {
-	struct starpu_leveldb_base *tmp;
-	_STARPU_MALLOC(tmp, sizeof(struct starpu_leveldb_base));
+	struct starpu_leveldb_base *tmp = (struct starpu_leveldb_base *)malloc(sizeof(struct starpu_leveldb_base));
+	STARPU_ASSERT(tmp);
 
 
 	leveldb::Status status;
 	leveldb::Status status;
 	leveldb::DB *db;
 	leveldb::DB *db;
@@ -268,8 +270,8 @@ static int get_leveldb_bandwidth_between_disk_and_main_ram(unsigned node)
 	double end;
 	double end;
 
 
 	srand(time (NULL));
 	srand(time (NULL));
-	char *buf;
-	_STARPU_MALLOC(buf, SIZE_DISK_MIN*sizeof(char));
+	char *buf = (char *)malloc(SIZE_DISK_MIN*sizeof(char));
+	STARPU_ASSERT(buf);
 
 
 	/* allocate memory */
 	/* allocate memory */
 	void *mem = _starpu_disk_alloc(node, SIZE_DISK_MIN);
 	void *mem = _starpu_disk_alloc(node, SIZE_DISK_MIN);
@@ -293,7 +295,8 @@ static int get_leveldb_bandwidth_between_disk_and_main_ram(unsigned node)
 	/* free memory */
 	/* free memory */
 	free(buf);
 	free(buf);
 
 
-	_STARPU_MALLOC(buf, sizeof(char));
+	buf = (char *)malloc(sizeof(char));
+	STARPU_ASSERT(buf);
 
 
 	/* Measure latency */
 	/* Measure latency */
 	start = starpu_timing_now();
 	start = starpu_timing_now();

+ 2 - 1
src/core/perfmodel/perfmodel_bus.c

@@ -35,6 +35,7 @@
 #include <core/workers.h>
 #include <core/workers.h>
 #include <core/perfmodel/perfmodel.h>
 #include <core/perfmodel/perfmodel.h>
 #include <core/simgrid.h>
 #include <core/simgrid.h>
+#include <core/topology.h>
 #include <common/utils.h>
 #include <common/utils.h>
 #include <drivers/mpi/driver_mpi_common.h>
 #include <drivers/mpi/driver_mpi_common.h>
 
 
@@ -2489,7 +2490,7 @@ static void write_bus_platform_file_content(int version)
 	{
 	{
 		hwloc_topology_t topology;
 		hwloc_topology_t topology;
 		hwloc_topology_init(&topology);
 		hwloc_topology_init(&topology);
-		hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
+		_starpu_topology_filter(topology);
 		hwloc_topology_load(topology);
 		hwloc_topology_load(topology);
 
 
 		/* First find paths and record measured bandwidth along the path */
 		/* First find paths and record measured bandwidth along the path */

+ 114 - 239
src/core/sched_ctx.c

@@ -37,10 +37,11 @@ static int nobind;
 static int occupied_sms = 0;
 static int occupied_sms = 0;
 
 
 static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *config);
 static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *config);
-static void _starpu_sched_ctx_add_workers_to_master(unsigned sched_ctx_id, int *workerids, int nworkers, int new_master);
-static void _starpu_sched_ctx_wake_these_workers_up(unsigned sched_ctx_id, int *workerids, int nworkers);
-static int _starpu_sched_ctx_find_master(unsigned sched_ctx_id, int *workerids, int nworkers);
-static void _starpu_sched_ctx_set_master(struct _starpu_sched_ctx *sched_ctx, int *workerids, int nworkers, int master);
+
+static void _starpu_sched_ctx_put_new_master(unsigned sched_ctx_id);
+static void _starpu_sched_ctx_wake_up_workers(unsigned sched_ctx_id);
+static void _starpu_sched_ctx_update_parallel_workers_with(unsigned sched_ctx_id);
+static void _starpu_sched_ctx_update_parallel_workers_without(unsigned sched_ctx_id);
 
 
 static void _starpu_worker_gets_into_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker)
 static void _starpu_worker_gets_into_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker)
 {
 {
@@ -155,8 +156,6 @@ static void _starpu_add_workers_to_sched_ctx(struct _starpu_sched_ctx *sched_ctx
 	if (!nworkers_to_add)
 	if (!nworkers_to_add)
 		return;
 		return;
 	int workers_to_add[nworkers_to_add];
 	int workers_to_add[nworkers_to_add];
-	int cpu_workers[nworkers_to_add];
-	int ncpu_workers = 0;
 
 
 	struct starpu_perfmodel_device devices[nworkers_to_add];
 	struct starpu_perfmodel_device devices[nworkers_to_add];
 	int ndevices = 0;
 	int ndevices = 0;
@@ -236,13 +235,6 @@ static void _starpu_add_workers_to_sched_ctx(struct _starpu_sched_ctx *sched_ctx
 			else
 			else
 				found = 0;
 				found = 0;
 		}
 		}
-
-		if (!sched_ctx->sched_policy)
-		{
-			struct _starpu_worker *worker_str = _starpu_get_worker_struct(wa[i]);
-			if (worker_str->arch == STARPU_CPU_WORKER)
-				cpu_workers[ncpu_workers++] = wa[i];
-		}
 	}
 	}
 
 
 	if(ndevices > 0)
 	if(ndevices > 0)
@@ -311,24 +303,10 @@ static void _starpu_add_workers_to_sched_ctx(struct _starpu_sched_ctx *sched_ctx
 		}
 		}
 	}
 	}
 
 
-	if(!sched_ctx->sched_policy)
-	{
-		if(!sched_ctx->awake_workers)
-		{
-			if(sched_ctx->main_master == -1)
-				sched_ctx->main_master = starpu_sched_ctx_book_workers_for_task(sched_ctx->id, cpu_workers, ncpu_workers);
-			else
-			{
-				_starpu_sched_ctx_add_workers_to_master(sched_ctx->id, cpu_workers, ncpu_workers, sched_ctx->main_master);
-			}
-		}
-		else
-		{
-			sched_ctx->main_master = _starpu_sched_ctx_find_master(sched_ctx->id, cpu_workers, ncpu_workers);
-			_starpu_sched_ctx_set_master(sched_ctx, cpu_workers, ncpu_workers, sched_ctx->main_master);
-		}
-	}
-	else if(sched_ctx->sched_policy->add_workers)
+
+	_starpu_sched_ctx_update_parallel_workers_with(sched_ctx->id);
+
+	if(sched_ctx->sched_policy && sched_ctx->sched_policy->add_workers)
 	{
 	{
 		_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
 		_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
 		if(added_workers)
 		if(added_workers)
@@ -412,13 +390,7 @@ static void _starpu_remove_workers_from_sched_ctx(struct _starpu_sched_ctx *sche
 		sched_ctx->perf_arch.devices[dev].ncores = devices[dev].ncores;
 		sched_ctx->perf_arch.devices[dev].ncores = devices[dev].ncores;
 	}
 	}
 
 
-	if(!sched_ctx->sched_policy)
-	{
-		if(!sched_ctx->awake_workers)
-		{
-			_starpu_sched_ctx_wake_these_workers_up(sched_ctx->id, removed_workers, *n_removed_workers);
-		}
-	}
+	_starpu_sched_ctx_update_parallel_workers_without(sched_ctx->id);
 
 
 	return;
 	return;
 }
 }
@@ -561,13 +533,13 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 		STARPU_PTHREAD_COND_INIT(&sched_ctx->parallel_sect_cond_busy[w], NULL);
 		STARPU_PTHREAD_COND_INIT(&sched_ctx->parallel_sect_cond_busy[w], NULL);
 		sched_ctx->busy[w] = 0;
 		sched_ctx->busy[w] = 0;
 
 
-		sched_ctx->master[w] = -1;
 		sched_ctx->parallel_sect[w] = 0;
 		sched_ctx->parallel_sect[w] = 0;
 		sched_ctx->sleeping[w] = 0;
 		sched_ctx->sleeping[w] = 0;
 	}
 	}
 
 
+	sched_ctx->parallel_view = 0;
 
 
-        /*init the strategy structs and the worker_collection of the ressources of the context */
+  /*init the strategy structs and the worker_collection of the ressources of the context */
 	if(policy)
 	if(policy)
 	{
 	{
 		_starpu_init_sched_policy(config, sched_ctx, policy);
 		_starpu_init_sched_policy(config, sched_ctx, policy);
@@ -1151,7 +1123,7 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 	if(!_starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx_id))
 	if(!_starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx_id))
 	{
 	{
 		if(!sched_ctx->sched_policy)
 		if(!sched_ctx->sched_policy)
-			starpu_sched_ctx_unbook_workers_for_task(sched_ctx->id, sched_ctx->main_master);
+			_starpu_sched_ctx_wake_up_workers(sched_ctx_id);
 		/*if btw the mutex release & the mutex lock the context has changed take care to free all
 		/*if btw the mutex release & the mutex lock the context has changed take care to free all
 		  scheduling data before deleting the context */
 		  scheduling data before deleting the context */
 		_starpu_update_workers_without_ctx(workerids, nworkers_ctx, sched_ctx_id, 1);
 		_starpu_update_workers_without_ctx(workerids, nworkers_ctx, sched_ctx_id, 1);
@@ -1719,6 +1691,12 @@ void* starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id)
 	return sched_ctx->policy_data;
 	return sched_ctx->policy_data;
 }
 }
 
 
+struct starpu_sched_policy *starpu_sched_ctx_get_sched_policy(unsigned sched_ctx_id)
+{
+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
+	return sched_ctx->sched_policy;
+}
+
 struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type  worker_collection_type)
 struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type  worker_collection_type)
 {
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
@@ -2359,45 +2337,13 @@ static unsigned _worker_sleeping_in_other_ctx(unsigned sched_ctx_id, int workeri
 
 
 }
 }
 
 
-static void _starpu_sched_ctx_get_workers_to_sleep(unsigned sched_ctx_id, int *workerids, int nworkers, int master)
-{
-	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-	int current_worker_id = starpu_worker_get_id();
-	unsigned sleeping[nworkers];
-	int w;
-	for(w = 0; w < nworkers; w++)
-	{
-		if(current_worker_id == -1 || workerids[w] != current_worker_id)
-			STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->parallel_sect_mutex[workerids[w]]);
-		sleeping[w] = _worker_sleeping_in_other_ctx(sched_ctx_id, workerids[w]);
-		sched_ctx->master[workerids[w]] = master;
-		sched_ctx->parallel_sect[workerids[w]] = 1;
-		if(current_worker_id == -1 || workerids[w] != current_worker_id)
-			STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->parallel_sect_mutex[workerids[w]]);
-#ifndef STARPU_NON_BLOCKING_DRIVERS
-		starpu_wake_worker(workerids[w]);
-#endif
-	}
-
-	for(w = 0; w < nworkers; w++)
-	{
-		int workerid = workerids[w];
-		if((current_worker_id == -1 || workerid != current_worker_id) && !sleeping[w])
-		{
-			sem_wait(&sched_ctx->fall_asleep_sem[master]);
-		}
-	}
-	return;
-}
-
 void _starpu_sched_ctx_signal_worker_blocked(unsigned sched_ctx_id, int workerid)
 void _starpu_sched_ctx_signal_worker_blocked(unsigned sched_ctx_id, int workerid)
 {
 {
 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
 	worker->blocked = 1;
 	worker->blocked = 1;
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	sched_ctx->sleeping[workerid] = 1;
 	sched_ctx->sleeping[workerid] = 1;
-	int master = sched_ctx->master[workerid];
-	sem_post(&sched_ctx->fall_asleep_sem[master]);
+	sem_post(&sched_ctx->fall_asleep_sem[sched_ctx->main_master]);
 
 
 	return;
 	return;
 }
 }
@@ -2405,30 +2351,74 @@ void _starpu_sched_ctx_signal_worker_blocked(unsigned sched_ctx_id, int workerid
 void _starpu_sched_ctx_signal_worker_woke_up(unsigned sched_ctx_id, int workerid)
 void _starpu_sched_ctx_signal_worker_woke_up(unsigned sched_ctx_id, int workerid)
 {
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-	int master = sched_ctx->master[workerid];
-	sem_post(&sched_ctx->wake_up_sem[master]);
+	sem_post(&sched_ctx->wake_up_sem[sched_ctx->main_master]);
 	sched_ctx->sleeping[workerid] = 0;
 	sched_ctx->sleeping[workerid] = 0;
-	sched_ctx->master[workerid] = -1;
 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
 	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
 	worker->blocked = 0;
 	worker->blocked = 0;
 
 
 	return;
 	return;
 }
 }
 
 
-static void _starpu_sched_ctx_wake_up_workers(unsigned sched_ctx_id, int master)
+static void _starpu_sched_ctx_put_workers_to_sleep(unsigned sched_ctx_id)
+{
+    struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
+    int current_worker_id = starpu_worker_get_id();
+		int master = sched_ctx->main_master;
+    struct starpu_worker_collection *workers = sched_ctx->workers;
+    struct starpu_sched_ctx_iterator it;
+    unsigned sleeping[sched_ctx->workers->nworkers];
+
+	if (master == -1)
+		return;
+
+    workers->init_iterator(workers, &it);
+    while(workers->has_next(workers, &it))
+    {
+        int workerid = workers->get_next(workers, &it);
+        sleeping[workerid] = _worker_sleeping_in_other_ctx(sched_ctx_id, workerid);
+
+        if(!sched_ctx->parallel_sect[workerid] && workerid != master)
+        {
+            if (current_worker_id == -1 || workerid != current_worker_id)
+            {
+                STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->parallel_sect_mutex[workerid]);
+                sched_ctx->parallel_sect[workerid] = 1;
+                STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->parallel_sect_mutex[workerid]);
+            }
+        }
+    }
+
+    workers->init_iterator(workers, &it);
+    while(workers->has_next(workers, &it))
+    {
+            int workerid = workers->get_next(workers, &it);
+            if(workerid != master
+               && (current_worker_id == -1 || workerid != current_worker_id)
+               && !sleeping[workerid])
+            {
+                    sem_wait(&sched_ctx->fall_asleep_sem[master]);
+            }
+    }
+
+    return;
+}
+
+static void _starpu_sched_ctx_wake_up_workers(unsigned sched_ctx_id)
 {
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	int current_worker_id = starpu_worker_get_id();
 	int current_worker_id = starpu_worker_get_id();
+	int master = sched_ctx->main_master;
 	struct starpu_worker_collection *workers = sched_ctx->workers;
 	struct starpu_worker_collection *workers = sched_ctx->workers;
-
 	struct starpu_sched_ctx_iterator it;
 	struct starpu_sched_ctx_iterator it;
 
 
+	if (master == -1)
+		return;
+
 	workers->init_iterator(workers, &it);
 	workers->init_iterator(workers, &it);
 	while(workers->has_next(workers, &it))
 	while(workers->has_next(workers, &it))
 	{
 	{
 		int workerid = workers->get_next(workers, &it);
 		int workerid = workers->get_next(workers, &it);
-		int curr_master = sched_ctx->master[workerid];
-		if(curr_master == master && sched_ctx->parallel_sect[workerid])
+		if(sched_ctx->parallel_sect[workerid] && workerid != master)
 		{
 		{
 			if((current_worker_id == -1 || workerid != current_worker_id) && sched_ctx->sleeping[workerid])
 			if((current_worker_id == -1 || workerid != current_worker_id) && sched_ctx->sleeping[workerid])
 			{
 			{
@@ -2447,197 +2437,82 @@ static void _starpu_sched_ctx_wake_up_workers(unsigned sched_ctx_id, int master)
 
 
 void* starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void* param, unsigned sched_ctx_id)
 void* starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void* param, unsigned sched_ctx_id)
 {
 {
-	int *workerids = NULL;
-	int nworkers = starpu_sched_ctx_get_workers_list(sched_ctx_id, &workerids);
-	_starpu_sched_ctx_get_workers_to_sleep(sched_ctx_id, workerids, nworkers, workerids[nworkers-1]);
-
-	/* execute parallel code */
-	void* ret = func(param);
+    _starpu_sched_ctx_put_workers_to_sleep(sched_ctx_id);
 
 
-	/* wake up starpu workers */
-	_starpu_sched_ctx_wake_up_workers(sched_ctx_id, workerids[nworkers-1]);
+    /* execute parallel code */
+    void* ret = func(param);
 
 
-	free(workerids);
-	return ret;
+    /* wake up starpu workers */
+    _starpu_sched_ctx_wake_up_workers(sched_ctx_id);
+    return ret;
 }
 }
 
 
-void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids)
+static void _starpu_sched_ctx_update_parallel_workers_with(unsigned sched_ctx_id)
 {
 {
-	int current_worker_id = starpu_worker_get_id();
-	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-	struct starpu_worker_collection *workers = sched_ctx->workers;
-	_STARPU_MALLOC((*cpuids), workers->nworkers*sizeof(int));
-	int w = 0;
+    struct _starpu_sched_ctx * sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 
 
-	struct starpu_sched_ctx_iterator it;
+	if(sched_ctx->sched_policy)
+		return;
 
 
-	workers->init_iterator(workers, &it);
 
 
-	while(workers->has_next(workers, &it))
+	_starpu_sched_ctx_put_new_master(sched_ctx_id);
+
+	if(!sched_ctx->awake_workers)
 	{
 	{
-		int workerid = workers->get_next(workers, &it);
-		int master = sched_ctx->master[workerid];
-		if(master == current_worker_id || workerid == current_worker_id || current_worker_id == -1)
-		{
-			(*cpuids)[w++] = starpu_worker_get_bindid(workerid);
-		}
+		_starpu_sched_ctx_put_workers_to_sleep(sched_ctx_id);
 	}
 	}
-	*ncpuids = w;
-	return;
 }
 }
 
 
-static void _starpu_sched_ctx_wake_these_workers_up(unsigned sched_ctx_id, int *workerids, int nworkers)
+static void _starpu_sched_ctx_update_parallel_workers_without(unsigned sched_ctx_id)
 {
 {
-	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-	int current_worker_id = starpu_worker_get_id();
+    struct _starpu_sched_ctx * sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 
 
-	int masters[nworkers];
-	int w;
-	for(w = 0; w < nworkers; w++)
-	{
-		int workerid = workerids[w];
-		masters[w] = sched_ctx->master[workerid];
-		if(current_worker_id == -1 || workerid != current_worker_id)
-		{
-			STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->parallel_sect_mutex[workerid]);
-			STARPU_PTHREAD_COND_SIGNAL(&sched_ctx->parallel_sect_cond[workerid]);
-			STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->parallel_sect_mutex[workerid]);
-		}
-		else
-			sched_ctx->parallel_sect[workerid] = 0;
-		sched_ctx->master[workerid] = -1;
-	}
+	if(sched_ctx->sched_policy)
+		return;
 
 
-	for(w = 0; w < nworkers; w++)
+
+	_starpu_sched_ctx_put_new_master(sched_ctx_id);
+
+	if(!sched_ctx->awake_workers)
 	{
 	{
-		int workerid = workerids[w];
-		if(masters[w] != -1)
-		{
-			int master = sched_ctx->master[workerid];
-			if(current_worker_id == -1 || workerid != current_worker_id)
-				sem_wait(&sched_ctx->wake_up_sem[master]);
-		}
+		_starpu_sched_ctx_wake_up_workers(sched_ctx_id);
 	}
 	}
-
-	return;
 }
 }
 
 
-static int _starpu_sched_ctx_find_master(unsigned sched_ctx_id, int *workerids, int nworkers)
+void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids)
 {
 {
+	int current_worker_id = starpu_worker_get_id();
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-	int new_master = workerids[nworkers-1];
-        int current_worker_id = starpu_worker_get_id();
-        int current_is_in_section = 0;
-        int npotential_masters = 0;
-        int nawake_workers = 0;
-        int ntrue_masters = 0;
-        int potential_masters[nworkers];
-        int awake_workers[nworkers];
-        int true_masters[nworkers];
-
-        int i,w;
-        for(w = 0 ; w < nworkers ; w++)
-        {
-                if (current_worker_id == workerids[w])
-                        current_is_in_section = 1;
+	struct starpu_worker_collection *workers = sched_ctx->workers;
+	_STARPU_MALLOC((*cpuids), workers->nworkers*sizeof(int));
+	int w = 0;
 
 
-		int master = sched_ctx->master[workerids[w]];
-                if (master > -1)
-		{
-                        int already_seen = 0;
-                        //Could create a function for this. Basically searching an element in an array.
-                        for (i = 0 ; i < npotential_masters; i++)
-                        {
-                                if (potential_masters[i] == master)
-				{
-                                        already_seen = 1;
-                                        break;
-				}
-                        }
-                        if (!already_seen)
-				potential_masters[npotential_masters++] = master;
-                }
-                else if (master == -1)
-                        awake_workers[nawake_workers++] = workerids[w];
-        }
+	struct starpu_sched_ctx_iterator it;
 
 
-        for (i = 0 ; i < npotential_masters ; i++)
-	{
-		int master_is_in_section = 0;
-		//Could create a function for this. Basically searching an element in an array.
-		for (w = 0 ; w < nworkers ; w++)
-		{
-			if (workerids[w] == potential_masters[i])
-			{
-				master_is_in_section = 1;
-				break;
-			}
-		}
-                if (master_is_in_section)
-			true_masters[ntrue_masters++] = potential_masters[i];
-        }
+	workers->init_iterator(workers, &it);
 
 
-        if (current_is_in_section)
-                new_master = current_worker_id;
-        else
-        {
-                if (ntrue_masters > 1)
+	while(workers->has_next(workers, &it))
+	{
+		int workerid = workers->get_next(workers, &it);
+		int master = sched_ctx->main_master;
+		if(master == current_worker_id || workerid == current_worker_id || current_worker_id == -1)
 		{
 		{
-                        if (nawake_workers > 0)
-                                new_master = awake_workers[nawake_workers - 1];
-                        else
-                                new_master = true_masters[ntrue_masters - 1];
+			(*cpuids)[w++] = starpu_worker_get_bindid(workerid);
 		}
 		}
 	}
 	}
-	return new_master;
+	*ncpuids = w;
+	return;
 }
 }
 
 
-static void _starpu_sched_ctx_add_workers_to_master(unsigned sched_ctx_id, int *workerids, int nworkers, int new_master)
+static void _starpu_sched_ctx_put_new_master(unsigned sched_ctx_id)
 {
 {
+	int *workerids;
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-	int w;
-	int nput_to_sleep = 0;
-	int nwake_up = 0;
-	int put_to_sleep[nworkers];
-	int wake_up[nworkers];
-
-	for(w = 0 ; w < nworkers ; w++)
-	{
-		int master = sched_ctx->master[workerids[w]];
-		if (master == -1 && workerids[w] != new_master)
-			put_to_sleep[nput_to_sleep++] = workerids[w];
-		else if(master != -1 && workerids[w] == new_master)
-			wake_up[nwake_up++] = workerids[w];
-	}
-
-	if(nwake_up > 0)
-		_starpu_sched_ctx_wake_these_workers_up(sched_ctx_id, wake_up, nwake_up);
-	if(nput_to_sleep > 0)
-		_starpu_sched_ctx_get_workers_to_sleep(sched_ctx_id, put_to_sleep, nput_to_sleep, new_master);
-
-}
-
-static void _starpu_sched_ctx_set_master(struct _starpu_sched_ctx *sched_ctx, int *workerids, int nworkers, int master)
-{
-	int i;
-	for(i = 0; i < nworkers; i++)
-	{
-		if(workerids[i] != master)
-			sched_ctx->master[workerids[i]] = master;
-	}
-}
+	unsigned nworkers = starpu_sched_ctx_get_workers_list(sched_ctx_id, &workerids);
 
 
-int starpu_sched_ctx_book_workers_for_task(unsigned sched_ctx_id, int *workerids, int nworkers)
-{
-	int new_master = _starpu_sched_ctx_find_master(sched_ctx_id, workerids, nworkers);
-	_starpu_sched_ctx_add_workers_to_master(sched_ctx_id, workerids, nworkers, new_master);
-	return new_master;
-}
+	sched_ctx->main_master = workerids[nworkers-1];
 
 
-void starpu_sched_ctx_unbook_workers_for_task(unsigned sched_ctx_id, int master)
-{
-	/* wake up starpu workers */
-	_starpu_sched_ctx_wake_up_workers(sched_ctx_id, master);
+	free(workerids);
 }
 }
 
 
 struct starpu_perfmodel_arch * _starpu_sched_ctx_get_perf_archtype(unsigned sched_ctx_id)
 struct starpu_perfmodel_arch * _starpu_sched_ctx_get_perf_archtype(unsigned sched_ctx_id)

+ 6 - 5
src/core/sched_ctx.h

@@ -148,17 +148,14 @@ struct _starpu_sched_ctx
 	   parallel sections to be executed on their allocated resources */
 	   parallel sections to be executed on their allocated resources */
 	unsigned parallel_sect[STARPU_NMAXWORKERS];
 	unsigned parallel_sect[STARPU_NMAXWORKERS];
 
 
-	/* id of the master worker */
-	int master[STARPU_NMAXWORKERS];
-
-	/* semaphore that block appl thread until starpu threads are 
+	/* semaphore that block appl thread until starpu threads are
 	   all blocked and ready to exec the parallel code */
 	   all blocked and ready to exec the parallel code */
 	sem_t fall_asleep_sem[STARPU_NMAXWORKERS];
 	sem_t fall_asleep_sem[STARPU_NMAXWORKERS];
 
 
 	/* semaphore that block appl thread until starpu threads are 
 	/* semaphore that block appl thread until starpu threads are 
 	   all woke up and ready continue appl */
 	   all woke up and ready continue appl */
 	sem_t wake_up_sem[STARPU_NMAXWORKERS];
 	sem_t wake_up_sem[STARPU_NMAXWORKERS];
-       
+
 	/* bool indicating if the workers is sleeping in this ctx */
 	/* bool indicating if the workers is sleeping in this ctx */
 	unsigned sleeping[STARPU_NMAXWORKERS];
 	unsigned sleeping[STARPU_NMAXWORKERS];
 
 
@@ -168,6 +165,10 @@ struct _starpu_sched_ctx
 	/* perf model for the device comb of the ctx */
 	/* perf model for the device comb of the ctx */
 	struct starpu_perfmodel_arch perf_arch;
 	struct starpu_perfmodel_arch perf_arch;
 
 
+	/* For parallel workers, say whether it is viewed as sequential or not. This
+		 is a helper for the prologue code. */
+	unsigned parallel_view;
+
 	/* for ctxs without policy: flag to indicate that we want to get
 	/* for ctxs without policy: flag to indicate that we want to get
 	   the threads to sleep in order to replace them with other threads or leave
 	   the threads to sleep in order to replace them with other threads or leave
 	   them awake & use them in the parallel code*/
 	   them awake & use them in the parallel code*/

+ 4 - 0
src/core/sched_policy.c

@@ -989,7 +989,11 @@ profiling:
 	}
 	}
 
 
 	if(task->prologue_callback_pop_func)
 	if(task->prologue_callback_pop_func)
+	{
+		_starpu_set_current_task(task);
 		task->prologue_callback_pop_func(task->prologue_callback_pop_arg);
 		task->prologue_callback_pop_func(task->prologue_callback_pop_arg);
+		_starpu_set_current_task(NULL);
+	}
 
 
 	return task;
 	return task;
 }
 }

+ 21 - 1
src/core/simgrid.c

@@ -233,6 +233,9 @@ static int main_ret;
 
 
 int do_starpu_main(int argc, char *argv[])
 int do_starpu_main(int argc, char *argv[])
 {
 {
+	/* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
+	MSG_process_sleep(0.000001);
+
 	main_ret = starpu_main(argc, argv);
 	main_ret = starpu_main(argc, argv);
 	return main_ret;
 	return main_ret;
 }
 }
@@ -342,6 +345,9 @@ static struct task *last_task[STARPU_NMAXWORKERS];
 /* Actually execute the task.  */
 /* Actually execute the task.  */
 static int task_execute(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
 static int task_execute(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
 {
 {
+	/* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
+	MSG_process_sleep(0.000001);
+
 	struct task *task = starpu_pthread_getspecific(0);
 	struct task *task = starpu_pthread_getspecific(0);
 	_STARPU_DEBUG("task %p started\n", task);
 	_STARPU_DEBUG("task %p started\n", task);
 	MSG_task_execute(task->task);
 	MSG_task_execute(task->task);
@@ -530,6 +536,9 @@ static int transfers_are_sequential(struct transfer *new_transfer, struct transf
 /* Actually execute the transfer, and then start transfers waiting for this one.  */
 /* Actually execute the transfer, and then start transfers waiting for this one.  */
 static int transfer_execute(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
 static int transfer_execute(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
 {
 {
+	/* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
+	MSG_process_sleep(0.000001);
+
 	struct transfer *transfer = starpu_pthread_getspecific(0);
 	struct transfer *transfer = starpu_pthread_getspecific(0);
 	unsigned i;
 	unsigned i;
 	_STARPU_DEBUG("transfer %p started\n", transfer);
 	_STARPU_DEBUG("transfer %p started\n", transfer);
@@ -690,6 +699,9 @@ _starpu_simgrid_thread_start(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[])
 	void *(*f)(void*) = (void*) (uintptr_t) strtol(argv[0], NULL, 16);
 	void *(*f)(void*) = (void*) (uintptr_t) strtol(argv[0], NULL, 16);
 	void *arg = (void*) (uintptr_t) strtol(argv[1], NULL, 16);
 	void *arg = (void*) (uintptr_t) strtol(argv[1], NULL, 16);
 
 
+	/* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
+	MSG_process_sleep(0.000001);
+
 	/* _args is freed with process context */
 	/* _args is freed with process context */
 	f(arg);
 	f(arg);
 	return 0;
 	return 0;
@@ -813,7 +825,15 @@ typedef struct{
 
 
 static int _starpu_simgrid_xbt_thread_create_wrapper(int argc, char *argv[])
 static int _starpu_simgrid_xbt_thread_create_wrapper(int argc, char *argv[])
 {
 {
-  smx_process_t self = SIMIX_process_self();
+  /* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
+  MSG_process_sleep(0.000001);
+
+#ifdef HAVE_SMX_ACTOR_T
+  smx_actor_t
+#else
+  smx_process_t
+#endif
+	  self = SIMIX_process_self();
   thread_data_t *t = SIMIX_process_self_get_data(self);
   thread_data_t *t = SIMIX_process_self_get_data(self);
   simcall_process_set_data(self, t->father_data);
   simcall_process_set_data(self, t->father_data);
   t->code(t->userparam);
   t->code(t->userparam);

+ 10 - 5
src/core/topology.c

@@ -557,11 +557,7 @@ _starpu_init_topology (struct _starpu_machine_config *config)
 #ifndef STARPU_SIMGRID
 #ifndef STARPU_SIMGRID
 #ifdef STARPU_HAVE_HWLOC
 #ifdef STARPU_HAVE_HWLOC
 	hwloc_topology_init(&topology->hwtopology);
 	hwloc_topology_init(&topology->hwtopology);
-#if HWLOC_API_VERSION >= 0x20000
-	hwloc_topology_set_io_types_filter(topology->hwtopology, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
-#else
-	hwloc_topology_set_flags(topology->hwtopology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
-#endif
+	_starpu_topology_filter(topology->hwtopology);
 	hwloc_topology_load(topology->hwtopology);
 	hwloc_topology_load(topology->hwtopology);
 	_starpu_allocate_topology_userdata(hwloc_get_root_obj(topology->hwtopology));
 	_starpu_allocate_topology_userdata(hwloc_get_root_obj(topology->hwtopology));
 #endif
 #endif
@@ -837,6 +833,15 @@ _starpu_topology_get_nhwpu (struct _starpu_machine_config *config)
 	return config->topology.nhwpus;
 	return config->topology.nhwpus;
 }
 }
 
 
+void _starpu_topology_filter(hwloc_topology_t topology)
+{
+#if HWLOC_API_VERSION >= 0x20000
+	hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
+#else
+	hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
+#endif
+}
+
 #ifdef STARPU_USE_MIC
 #ifdef STARPU_USE_MIC
 static void
 static void
 _starpu_init_mic_config (struct _starpu_machine_config *config,
 _starpu_init_mic_config (struct _starpu_machine_config *config,

+ 3 - 0
src/core/topology.h

@@ -51,6 +51,9 @@ unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config);
 /* returns the number of logical cpus */
 /* returns the number of logical cpus */
 unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config);
 unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config);
 
 
+/* Small convenient function to filter hwloc topology depending on HWLOC API version */
+void _starpu_topology_filter(hwloc_topology_t topology);
+
 #define STARPU_NOWORKERID -1
 #define STARPU_NOWORKERID -1
 /* Bind the current thread on the CPU logically identified by "cpuid". The
 /* Bind the current thread on the CPU logically identified by "cpuid". The
  * logical ordering of the processors is either that of hwloc (if available),
  * logical ordering of the processors is either that of hwloc (if available),

+ 13 - 6
src/datawizard/filters.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2010-2016  Université de Bordeaux
  * Copyright (C) 2010-2016  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
  * Copyright (C) 2012, 2016  Inria
  * Copyright (C) 2012, 2016  Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -417,15 +417,22 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin
 
 
 		sizes[child] = _starpu_data_get_size(child_handle);
 		sizes[child] = _starpu_data_get_size(child_handle);
 
 
+		if (child_handle->unregister_hook)
+		{
+			child_handle->unregister_hook(child_handle);
+		}
+
 		_starpu_data_unregister_ram_pointer(child_handle);
 		_starpu_data_unregister_ram_pointer(child_handle);
 
 
 		if (child_handle->per_worker)
 		if (child_handle->per_worker)
-		for (worker = 0; worker < nworkers; worker++)
 		{
 		{
-			struct _starpu_data_replicate *local = &child_handle->per_worker[worker];
-			STARPU_ASSERT(local->state == STARPU_INVALID);
-			if (local->allocated && local->automatically_allocated)
-				_starpu_request_mem_chunk_removal(child_handle, local, starpu_worker_get_memory_node(worker), sizes[child]);
+			for (worker = 0; worker < nworkers; worker++)
+			{
+				struct _starpu_data_replicate *local = &child_handle->per_worker[worker];
+				STARPU_ASSERT(local->state == STARPU_INVALID);
+				if (local->allocated && local->automatically_allocated)
+					_starpu_request_mem_chunk_removal(child_handle, local, starpu_worker_get_memory_node(worker), sizes[child]);
+			}
 		}
 		}
 
 
 		_starpu_memory_stats_free(child_handle);
 		_starpu_memory_stats_free(child_handle);

+ 1 - 1
src/datawizard/malloc.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009-2010, 2012-2016  Université de Bordeaux
  * Copyright (C) 2009-2010, 2012-2016  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by

+ 4 - 0
src/datawizard/memory_manager.c

@@ -17,6 +17,7 @@
 #include <starpu.h>
 #include <starpu.h>
 #include <common/utils.h>
 #include <common/utils.h>
 #include <common/thread.h>
 #include <common/thread.h>
+#include <common/fxt.h>
 #include <datawizard/memory_manager.h>
 #include <datawizard/memory_manager.h>
 #include <starpu_stdlib.h>
 #include <starpu_stdlib.h>
 
 
@@ -90,6 +91,7 @@ int starpu_memory_allocate(unsigned node, size_t size, int flags)
 
 
 		/* And take it */
 		/* And take it */
 		used_size[node] += size;
 		used_size[node] += size;
+		_STARPU_TRACE_USED_MEM(node, used_size[node]);
 		ret = 0;
 		ret = 0;
 	}
 	}
 	else if (flags & STARPU_MEMORY_OVERFLOW
 	else if (flags & STARPU_MEMORY_OVERFLOW
@@ -97,6 +99,7 @@ int starpu_memory_allocate(unsigned node, size_t size, int flags)
 			|| used_size[node] + size <= global_size[node])
 			|| used_size[node] + size <= global_size[node])
 	{
 	{
 		used_size[node] += size;
 		used_size[node] += size;
+		_STARPU_TRACE_USED_MEM(node, used_size[node]);
 		ret = 0;
 		ret = 0;
 	}
 	}
 	else
 	else
@@ -112,6 +115,7 @@ void starpu_memory_deallocate(unsigned node, size_t size)
 	STARPU_PTHREAD_MUTEX_LOCK(&lock_nodes[node]);
 	STARPU_PTHREAD_MUTEX_LOCK(&lock_nodes[node]);
 
 
 	used_size[node] -= size;
 	used_size[node] -= size;
+	_STARPU_TRACE_USED_MEM(node, used_size[node]);
 
 
 	/* If there's now room for waiters, wake them */
 	/* If there's now room for waiters, wake them */
 	if (waiting_size[node] &&
 	if (waiting_size[node] &&

+ 116 - 68
src/debug/traces/starpu_fxt.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -94,6 +94,7 @@ struct task_info {
 	double start_time;
 	double start_time;
 	double end_time;
 	double end_time;
 	unsigned long footprint;
 	unsigned long footprint;
+	unsigned long kflops;
 	char *parameters;
 	char *parameters;
 	unsigned int ndeps;
 	unsigned int ndeps;
 	unsigned long *dependencies;
 	unsigned long *dependencies;
@@ -387,6 +388,53 @@ static double get_event_time_stamp(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 	return (((double)(ev->time-options->file_offset))/1000000.0);
 	return (((double)(ev->time-options->file_offset))/1000000.0);
 }
 }
 
 
+/*
+ *      Auxiliary functions for poti handling names
+ */
+#ifdef STARPU_HAVE_POTI
+static char *memnode_container_alias(char *output, int len, const char *prefix, long unsigned int memnodeid)
+{
+	snprintf(output, len, "%smn%lu", prefix, memnodeid);
+	return output;
+}
+
+static char *memmanager_container_alias(char *output, int len, const char *prefix, long unsigned int memnodeid)
+{
+	snprintf(output, len, "%smm%lu", prefix, memnodeid);
+	return output;
+}
+
+static char *thread_container_alias(char *output, int len, const char *prefix, long unsigned int threadid)
+{
+	snprintf(output, len, "%st%lu", prefix, threadid);
+	return output;
+}
+
+static char *worker_container_alias(char *output, int len, const char *prefix, long unsigned int workerid)
+{
+	snprintf(output, len, "%sw%lu", prefix, workerid);
+	return output;
+}
+
+static char *mpicommthread_container_alias(char *output, int len, const char *prefix)
+{
+	snprintf(output, len, "%smpict", prefix);
+	return output;
+}
+
+static char *program_container_alias(char *output, int len, const char *prefix)
+{
+	snprintf(output, len, "%sp", prefix);
+	return output;
+}
+
+static char *scheduler_container_alias(char *output, int len, const char *prefix)
+{
+	snprintf(output, len, "%ssched", prefix);
+	return output;
+}
+#endif
+
 static int nworkers = 0;
 static int nworkers = 0;
 
 
 struct worker_entry
 struct worker_entry
@@ -492,53 +540,6 @@ static void update_accumulated_time(int worker, double sleep_time, double exec_t
 	}
 	}
 }
 }
 
 
-/*
- *      Auxiliary functions for poti handling names
- */
-#ifdef STARPU_HAVE_POTI
-static char *memnode_container_alias(char *output, int len, const char *prefix, long unsigned int memnodeid)
-{
-	snprintf(output, len, "%smn%lu", prefix, memnodeid);
-	return output;
-}
-
-static char *memmanager_container_alias(char *output, int len, const char *prefix, long unsigned int memnodeid)
-{
-	snprintf(output, len, "%smm%lu", prefix, memnodeid);
-	return output;
-}
-
-static char *thread_container_alias(char *output, int len, const char *prefix, long unsigned int threadid)
-{
-	snprintf(output, len, "%st%lu", prefix, threadid);
-	return output;
-}
-
-static char *worker_container_alias(char *output, int len, const char *prefix, long unsigned int workerid)
-{
-	snprintf(output, len, "%sw%lu", prefix, workerid);
-	return output;
-}
-
-static char *mpicommthread_container_alias(char *output, int len, const char *prefix)
-{
-	snprintf(output, len, "%smpict", prefix);
-	return output;
-}
-
-static char *program_container_alias(char *output, int len, const char *prefix)
-{
-	snprintf(output, len, "%sp", prefix);
-	return output;
-}
-
-static char *scheduler_container_alias(char *output, int len, const char *prefix)
-{
-	snprintf(output, len, "%ssched", prefix);
-	return output;
-}
-#endif
-
 static void memnode_set_state(double time, const char *prefix, unsigned int memnodeid, const char *name)
 static void memnode_set_state(double time, const char *prefix, unsigned int memnodeid, const char *name)
 {
 {
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
@@ -625,7 +626,7 @@ static void user_thread_push_state(double time, const char *prefix, long unsigne
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
 	char container[STARPU_POTI_STR_LEN];
 	char container[STARPU_POTI_STR_LEN];
 	thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid);
 	thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid);
-	poti_SetState(time, container, "US", name);
+	poti_PushState(time, container, "US", name);
 #else
 #else
 	fprintf(out_paje_file, "11	%.9f	%st%lu	US	%s\n", time, prefix, threadid, name);
 	fprintf(out_paje_file, "11	%.9f	%st%lu	US	%s\n", time, prefix, threadid, name);
 #endif
 #endif
@@ -637,7 +638,7 @@ static void user_thread_pop_state(double time, const char *prefix, long unsigned
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
 	char container[STARPU_POTI_STR_LEN];
 	char container[STARPU_POTI_STR_LEN];
 	thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid);
 	thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid);
-	poti_SetState(time, container, "US", name);
+	poti_PopState(time, container, "US");
 #else
 #else
 	fprintf(out_paje_file, "12	%.9f	%st%lu	US\n", time, prefix, threadid);
 	fprintf(out_paje_file, "12	%.9f	%st%lu	US\n", time, prefix, threadid);
 #endif
 #endif
@@ -703,7 +704,7 @@ static void mpicommthread_push_state(double time, const char *prefix, const char
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
 	char container[STARPU_POTI_STR_LEN];
 	char container[STARPU_POTI_STR_LEN];
 	mpicommthread_container_alias(container, STARPU_POTI_STR_LEN, prefix);
 	mpicommthread_container_alias(container, STARPU_POTI_STR_LEN, prefix);
-	poti_SetState(time, container, "CtS", name);
+	poti_PushState(time, container, "CtS", name);
 #else
 #else
 	fprintf(out_paje_file, "11	%.9f	%smpict	CtS 	%s\n", time, prefix, name);
 	fprintf(out_paje_file, "11	%.9f	%smpict	CtS 	%s\n", time, prefix, name);
 #endif
 #endif
@@ -714,7 +715,7 @@ static void mpicommthread_pop_state(double time, const char *prefix)
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
 	char container[STARPU_POTI_STR_LEN];
 	char container[STARPU_POTI_STR_LEN];
 	mpicommthread_container_alias(container, STARPU_POTI_STR_LEN, prefix);
 	mpicommthread_container_alias(container, STARPU_POTI_STR_LEN, prefix);
-	poti_SetState(time, container, "CtS", name);
+	poti_PopState(time, container, "CtS");
 #else
 #else
 	fprintf(out_paje_file, "12	%.9f	%smpict	CtS\n", time, prefix);
 	fprintf(out_paje_file, "12	%.9f	%smpict	CtS\n", time, prefix);
 #endif
 #endif
@@ -845,11 +846,17 @@ static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options
 #endif
 #endif
 
 
 		if (!options->no_bus)
 		if (!options->no_bus)
+		{
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
-			poti_SetVariable(get_event_time_stamp(ev, options), new_memmanager_container_alias, "bw", get_event_time_stamp(ev, options));
+			poti_SetVariable(get_event_time_stamp(ev, options), new_memmanager_container_alias, "use", 0.0);
+			poti_SetVariable(get_event_time_stamp(ev, options), new_memmanager_container_alias, "bwi", 0.0);
+			poti_SetVariable(get_event_time_stamp(ev, options), new_memmanager_container_alias, "bwo", 0.0);
 #else
 #else
-			fprintf(out_paje_file, "13	%.9f	%smm%"PRIu64"	bw	0.0\n", get_event_time_stamp(ev, options), prefix, ev->param[0]);
+			fprintf(out_paje_file, "13	%.9f	%smm%"PRIu64"	use	0.0\n", get_event_time_stamp(ev, options), prefix, ev->param[0]);
+			fprintf(out_paje_file, "13	%.9f	%smm%"PRIu64"	bwi	0.0\n", get_event_time_stamp(ev, options), prefix, ev->param[0]);
+			fprintf(out_paje_file, "13	%.9f	%smm%"PRIu64"	bwo	0.0\n", get_event_time_stamp(ev, options), prefix, ev->param[0]);
 #endif
 #endif
+		}
 	}
 	}
 }
 }
 
 
@@ -946,12 +953,15 @@ static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 		if (new_thread)
 		if (new_thread)
 			poti_CreateContainer(get_event_time_stamp(ev, options), new_thread_container_alias, "T", memnode_container, new_thread_container_name);
 			poti_CreateContainer(get_event_time_stamp(ev, options), new_thread_container_alias, "T", memnode_container, new_thread_container_name);
 		poti_CreateContainer(get_event_time_stamp(ev, options), new_worker_container_alias, "W", new_thread_container_alias, new_worker_container_name);
 		poti_CreateContainer(get_event_time_stamp(ev, options), new_worker_container_alias, "W", new_thread_container_alias, new_worker_container_name);
+		poti_SetVariable(get_event_time_stamp(ev, options), new_worker_container_alias, "gf", 0.0);
 #else
 #else
 		if (new_thread)
 		if (new_thread)
 			fprintf(out_paje_file, "7	%.9f	%st%lu	T	%smn%d	%s%d\n",
 			fprintf(out_paje_file, "7	%.9f	%st%lu	T	%smn%d	%s%d\n",
 				get_event_time_stamp(ev, options), prefix, threadid, prefix, nodeid, prefix, bindid);
 				get_event_time_stamp(ev, options), prefix, threadid, prefix, nodeid, prefix, bindid);
 		fprintf(out_paje_file, "7	%.9f	%sw%d	W	%st%lu	%s%s%d\n",
 		fprintf(out_paje_file, "7	%.9f	%sw%d	W	%st%lu	%s%s%d\n",
 			get_event_time_stamp(ev, options), prefix, workerid, prefix, threadid, prefix, kindstr, devid);
 			get_event_time_stamp(ev, options), prefix, workerid, prefix, threadid, prefix, kindstr, devid);
+		fprintf(out_paje_file, "13	%.9f	%sw%d	gf	0.0\n",
+			get_event_time_stamp(ev, options), prefix, workerid);
 #endif
 #endif
 	}
 	}
 
 
@@ -1286,7 +1296,8 @@ static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 
 
 	struct task_info *task = get_task(job_id, options->file_rank);
 	struct task_info *task = get_task(job_id, options->file_rank);
 	task->parameters = strdup(parameters);
 	task->parameters = strdup(parameters);
-	task->footprint = ev->param[3];
+	task->footprint = ev->param[2];
+	task->kflops = ev->param[3];
 	task->tag = ev->param[4];
 	task->tag = ev->param[4];
 
 
 	if (out_paje_file)
 	if (out_paje_file)
@@ -1294,9 +1305,9 @@ static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 
 
 #ifdef STARPU_ENABLE_PAJE_CODELET_DETAILS
 #ifdef STARPU_ENABLE_PAJE_CODELET_DETAILS
 		char *prefix = options->file_prefix;
 		char *prefix = options->file_prefix;
-		unsigned sched_ctx = ev->param[1];
+		unsigned sched_ctx = ev->param[0];
 
 
-		worker_set_detailed_state(last_codelet_start[worker], prefix, worker, _starpu_last_codelet_symbol[worker], ev->param[2], parameters, ev->param[3], ev->param[4], job_id);
+		worker_set_detailed_state(last_codelet_start[worker], prefix, worker, _starpu_last_codelet_symbol[worker], ev->param[1], parameters, ev->param[2], ev->param[4], job_id);
 		if (sched_ctx != 0)
 		if (sched_ctx != 0)
 		{
 		{
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
@@ -1306,7 +1317,7 @@ static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 			worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, worker);
 			worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, worker);
 			poti_SetState(last_codelet_start[worker], container, ctx, _starpu_last_codelet_symbol[worker]);
 			poti_SetState(last_codelet_start[worker], container, ctx, _starpu_last_codelet_symbol[worker]);
 #else
 #else
-			fprintf(out_paje_file, "20	%.9f	%sw%d	Ctx%u	%s	%ld	%s	%08lx	%016lx	%lu\n", last_codelet_start[worker], prefix, worker, sched_ctx, _starpu_last_codelet_symbol[worker], ev->param[2], parameters,  ev->param[3], ev->param[4], job_id);
+			fprintf(out_paje_file, "20	%.9f	%sw%d	Ctx%u	%s	%ld	%s	%08lx	%016lx	%lu\n", last_codelet_start[worker], prefix, worker, sched_ctx, _starpu_last_codelet_symbol[worker], ev->param[1], parameters,  ev->param[2], ev->param[4], job_id);
 #endif
 #endif
 		}
 		}
 #endif /* STARPU_ENABLE_PAJE_CODELET_DETAILS */
 #endif /* STARPU_ENABLE_PAJE_CODELET_DETAILS */
@@ -1335,11 +1346,25 @@ static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_opti
 		recfmt_worker_set_state(end_codelet_time, worker, "I", "Other");
 		recfmt_worker_set_state(end_codelet_time, worker, "I", "Other");
 
 
 	double codelet_length = (end_codelet_time - last_codelet_start[worker]);
 	double codelet_length = (end_codelet_time - last_codelet_start[worker]);
+	struct task_info *task = get_task(ev->param[0], options->file_rank);
+	double gflops = (((double)task->kflops) / 1000000) / (codelet_length / 1000);
 
 
 	get_task(ev->param[0], options->file_rank)->end_time = end_codelet_time;
 	get_task(ev->param[0], options->file_rank)->end_time = end_codelet_time;
 
 
 	update_accumulated_time(worker, 0.0, codelet_length, end_codelet_time, 0);
 	update_accumulated_time(worker, 0.0, codelet_length, end_codelet_time, 0);
 
 
+#ifdef STARPU_HAVE_POTI
+	char container[STARPU_POTI_STR_LEN];
+	worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, worker);
+	poti_SetVariable(task->start_time, container, "gf", gflops);
+	poti_SetVariable(end_codelet_time, container, "gf", 0);
+#else
+	fprintf(out_paje_file, "13	%.9f	%sw%d	gf	%f\n",
+			task->start_time, prefix, worker, gflops);
+	fprintf(out_paje_file, "13	%.9f	%sw%d	gf	%f\n",
+			end_codelet_time, prefix, worker, 0.);
+#endif
+
 	if (distrib_time)
 	if (distrib_time)
 	     fprintf(distrib_time, "%s\t%s%d\t%ld\t%"PRIx32"\t%.9f\n", _starpu_last_codelet_symbol[worker],
 	     fprintf(distrib_time, "%s\t%s%d\t%ld\t%"PRIx32"\t%.9f\n", _starpu_last_codelet_symbol[worker],
 		     prefix, worker, (unsigned long) codelet_size, codelet_hash, codelet_length);
 		     prefix, worker, (unsigned long) codelet_size, codelet_hash, codelet_length);
@@ -1655,7 +1680,7 @@ static void handle_data_invalidate(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 		char paje_value[STARPU_POTI_STR_LEN], memnode_container[STARPU_POTI_STR_LEN];
 		char paje_value[STARPU_POTI_STR_LEN], memnode_container[STARPU_POTI_STR_LEN];
 		memmanager_container_alias(memnode_container, STARPU_POTI_STR_LEN, prefix, node);
 		memmanager_container_alias(memnode_container, STARPU_POTI_STR_LEN, prefix, node);
 		snprintf(paje_value, STARPU_POTI_STR_LEN, "%lx", handle);
 		snprintf(paje_value, STARPU_POTI_STR_LEN, "%lx", handle);
-		poti_NewEvent(get_event_time_stamp(ev, options), container, "user_event", paje_value);
+		poti_NewEvent(get_event_time_stamp(ev, options), memnode_container, "user_event", paje_value);
 #else
 #else
 		fprintf(out_paje_file, "9	%.9f	invalidate	%smm%u	%lx\n", get_event_time_stamp(ev, options), prefix, node, handle);
 		fprintf(out_paje_file, "9	%.9f	invalidate	%smm%u	%lx\n", get_event_time_stamp(ev, options), prefix, node, handle);
 #endif
 #endif
@@ -1782,6 +1807,9 @@ static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 		}
 		}
 
 
 		/* look for a data transfer to match */
 		/* look for a data transfer to match */
+#ifdef STARPU_DEVEL
+#warning FIXME: use hash table instead
+#endif
 		struct _starpu_communication *itor;
 		struct _starpu_communication *itor;
 		for (itor = _starpu_communication_list_begin(&communication_list);
 		for (itor = _starpu_communication_list_begin(&communication_list);
 			itor != _starpu_communication_list_end(&communication_list);
 			itor != _starpu_communication_list_end(&communication_list);
@@ -1841,6 +1869,23 @@ static void handle_memnode_event(struct fxt_ev_64 *ev, struct starpu_fxt_options
 		memnode_set_state(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr);
 		memnode_set_state(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr);
 }
 }
 
 
+static void handle_used_mem(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
+{
+	unsigned memnode = ev->param[0];
+
+	if (out_paje_file)
+	{
+#ifdef STARPU_HAVE_POTI
+		char memnode_container[STARPU_POTI_STR_LEN];
+		memmanager_container_alias(memnode_container, STARPU_POTI_STR_LEN, options->file_prefix, memnode);
+		poti_SetVariable(get_event_time_stamp(ev, options), memnode_container, "use", (double)ev->param[1] / (1<<20));
+#else
+		fprintf(out_paje_file, "13	%.9f	%smm%u	use	%f\n",
+			get_event_time_stamp(ev, options), options->file_prefix, memnode, (double)ev->param[1] / (1<<20));
+#endif
+	}
+}
+
 static void handle_task_submit_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, unsigned long tid, const char *eventstr)
 static void handle_task_submit_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, unsigned long tid, const char *eventstr)
 {
 {
 	int workerid = find_worker_id(tid);
 	int workerid = find_worker_id(tid);
@@ -2426,7 +2471,8 @@ static void handle_thread_event(struct fxt_ev_64 *ev, struct starpu_fxt_options
 static
 static
 void _starpu_fxt_display_bandwidth(struct starpu_fxt_options *options)
 void _starpu_fxt_display_bandwidth(struct starpu_fxt_options *options)
 {
 {
-	float current_bandwidth_per_node[STARPU_MAXNODES] = {0.0};
+	float current_bandwidth_in_per_node[STARPU_MAXNODES] = {0.0};
+	float current_bandwidth_out_per_node[STARPU_MAXNODES] = {0.0};
 
 
 	char *prefix = options->file_prefix;
 	char *prefix = options->file_prefix;
 
 
@@ -2435,29 +2481,29 @@ void _starpu_fxt_display_bandwidth(struct starpu_fxt_options *options)
 		itor != _starpu_communication_list_end(&communication_list);
 		itor != _starpu_communication_list_end(&communication_list);
 		itor = _starpu_communication_list_next(itor))
 		itor = _starpu_communication_list_next(itor))
 	{
 	{
-		current_bandwidth_per_node[itor->src_node] +=  itor->bandwidth;
+		current_bandwidth_out_per_node[itor->src_node] +=  itor->bandwidth;
 		if (out_paje_file)
 		if (out_paje_file)
 		{
 		{
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
 			char src_memnode_container[STARPU_POTI_STR_LEN];
 			char src_memnode_container[STARPU_POTI_STR_LEN];
 			memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->src_node);
 			memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->src_node);
-			poti_SetVariable(itor->comm_start, src_memnode_container, "bw", current_bandwidth_per_node[itor->src_node]);
+			poti_SetVariable(itor->comm_start, src_memnode_container, "bwo", current_bandwidth_out_per_node[itor->src_node]);
 #else
 #else
-			fprintf(out_paje_file, "13	%.9f	%smm%u	bw	%f\n",
-				itor->comm_start, prefix, itor->src_node, current_bandwidth_per_node[itor->src_node]);
+			fprintf(out_paje_file, "13	%.9f	%smm%u	bwo	%f\n",
+				itor->comm_start, prefix, itor->src_node, current_bandwidth_out_per_node[itor->src_node]);
 #endif
 #endif
 		}
 		}
 
 
-		current_bandwidth_per_node[itor->dst_node] +=  itor->bandwidth;
+		current_bandwidth_in_per_node[itor->dst_node] +=  itor->bandwidth;
 		if (out_paje_file)
 		if (out_paje_file)
 		{
 		{
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
 			char dst_memnode_container[STARPU_POTI_STR_LEN];
 			char dst_memnode_container[STARPU_POTI_STR_LEN];
 			memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->dst_node);
 			memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->dst_node);
-			poti_SetVariable(itor->comm_start, dst_memnode_container, "bw", current_bandwidth_per_node[itor->dst_node]);
+			poti_SetVariable(itor->comm_start, dst_memnode_container, "bwi", current_bandwidth_in_per_node[itor->dst_node]);
 #else
 #else
-			fprintf(out_paje_file, "13	%.9f	%smm%u	bw	%f\n",
-				itor->comm_start, prefix, itor->dst_node, current_bandwidth_per_node[itor->dst_node]);
+			fprintf(out_paje_file, "13	%.9f	%smm%u	bwi	%f\n",
+				itor->comm_start, prefix, itor->dst_node, current_bandwidth_in_per_node[itor->dst_node]);
 #endif
 #endif
 		}
 		}
 	}
 	}
@@ -2845,6 +2891,8 @@ void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *op
 					handle_memnode_event(&ev, options, "No");
 					handle_memnode_event(&ev, options, "No");
 				}
 				}
 				break;
 				break;
+			case _STARPU_FUT_USED_MEM:
+				handle_used_mem(&ev, options);
 
 
 			case _STARPU_FUT_USER_EVENT:
 			case _STARPU_FUT_USER_EVENT:
 				handle_user_event(&ev, options);
 				handle_user_event(&ev, options);

+ 142 - 46
src/debug/traces/starpu_fxt_mpi.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2012-2013, 2016  Université Bordeaux
+ * Copyright (C) 2012-2013, 2016-2017  Université Bordeaux
  * Copyright (C) 2010, 2011, 2014, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2014, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -26,14 +26,17 @@
 #define STARPU_POTI_STR_LEN 200
 #define STARPU_POTI_STR_LEN 200
 #endif
 #endif
 
 
-struct mpi_transfer
-{
+#define MAX_MPI_NODES 64
+
+LIST_TYPE(mpi_transfer,
 	unsigned matched;
 	unsigned matched;
-	int other_rank; /* src for a recv, dest for a send */
+	int src;
+	int dst;
 	int mpi_tag;
 	int mpi_tag;
 	size_t size;
 	size_t size;
 	float date;
 	float date;
-};
+	double bandwidth;
+);
 
 
 /* Returns 0 if a barrier is found, -1 otherwise. In case of success, offset is
 /* Returns 0 if a barrier is found, -1 otherwise. In case of success, offset is
  * filled with the timestamp of the barrier */
  * filled with the timestamp of the barrier */
@@ -100,25 +103,28 @@ int _starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *ke
  */
  */
 
 
 /* the list of MPI transfers found in the different traces */
 /* the list of MPI transfers found in the different traces */
-static struct mpi_transfer *mpi_sends[64] = {NULL};
-static struct mpi_transfer *mpi_recvs[64] = {NULL};
+static struct mpi_transfer *mpi_sends[MAX_MPI_NODES] = {NULL};
+static struct mpi_transfer *mpi_recvs[MAX_MPI_NODES] = {NULL};
 
 
 /* number of available slots in the lists  */
 /* number of available slots in the lists  */
-unsigned mpi_sends_list_size[64] = {0};
-unsigned mpi_recvs_list_size[64] = {0};
+unsigned mpi_sends_list_size[MAX_MPI_NODES] = {0};
+unsigned mpi_recvs_list_size[MAX_MPI_NODES] = {0};
 
 
 /* number of slots actually used in the list  */
 /* number of slots actually used in the list  */
-unsigned mpi_sends_used[64] = {0};
-unsigned mpi_recvs_used[64] = {0};
+unsigned mpi_sends_used[MAX_MPI_NODES] = {0};
+unsigned mpi_recvs_used[MAX_MPI_NODES] = {0};
 
 
 /* number of slots already matched at the beginning of the list. This permits
 /* number of slots already matched at the beginning of the list. This permits
  * going through the lists from the beginning to match each and every
  * going through the lists from the beginning to match each and every
  * transfer, thus avoiding a quadratic complexity. */
  * transfer, thus avoiding a quadratic complexity. */
-unsigned mpi_recvs_matched[64] = {0};
+unsigned mpi_recvs_matched[MAX_MPI_NODES][MAX_MPI_NODES] = { {0} };
+unsigned mpi_sends_matched[MAX_MPI_NODES][MAX_MPI_NODES] = { {0} };
 
 
 void _starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED, int mpi_tag, size_t size, float date)
 void _starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED, int mpi_tag, size_t size, float date)
 {
 {
 	STARPU_ASSERT(src >= 0);
 	STARPU_ASSERT(src >= 0);
+	if (src >= MAX_MPI_NODES)
+		return;
 	unsigned slot = mpi_sends_used[src]++;
 	unsigned slot = mpi_sends_used[src]++;
 
 
 	if (mpi_sends_used[src] > mpi_sends_list_size[src])
 	if (mpi_sends_used[src] > mpi_sends_list_size[src])
@@ -136,7 +142,8 @@ void _starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED,
 	}
 	}
 
 
 	mpi_sends[src][slot].matched = 0;
 	mpi_sends[src][slot].matched = 0;
-	mpi_sends[src][slot].other_rank = dst;
+	mpi_sends[src][slot].src = src;
+	mpi_sends[src][slot].dst = dst;
 	mpi_sends[src][slot].mpi_tag = mpi_tag;
 	mpi_sends[src][slot].mpi_tag = mpi_tag;
 	mpi_sends[src][slot].size = size;
 	mpi_sends[src][slot].size = size;
 	mpi_sends[src][slot].date = date;
 	mpi_sends[src][slot].date = date;
@@ -144,6 +151,8 @@ void _starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED,
 
 
 void _starpu_fxt_mpi_add_recv_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst, int mpi_tag, float date)
 void _starpu_fxt_mpi_add_recv_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst, int mpi_tag, float date)
 {
 {
+	if (dst >= MAX_MPI_NODES)
+		return;
 	unsigned slot = mpi_recvs_used[dst]++;
 	unsigned slot = mpi_recvs_used[dst]++;
 
 
 	if (mpi_recvs_used[dst] > mpi_recvs_list_size[dst])
 	if (mpi_recvs_used[dst] > mpi_recvs_list_size[dst])
@@ -161,7 +170,8 @@ void _starpu_fxt_mpi_add_recv_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst,
 	}
 	}
 
 
 	mpi_recvs[dst][slot].matched = 0;
 	mpi_recvs[dst][slot].matched = 0;
-	mpi_recvs[dst][slot].other_rank = dst;
+	mpi_recvs[dst][slot].src = src;
+	mpi_recvs[dst][slot].dst = dst;
 	mpi_recvs[dst][slot].mpi_tag = mpi_tag;
 	mpi_recvs[dst][slot].mpi_tag = mpi_tag;
 	mpi_recvs[dst][slot].date = date;
 	mpi_recvs[dst][slot].date = date;
 }
 }
@@ -170,7 +180,7 @@ static
 struct mpi_transfer *try_to_match_send_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst, int mpi_tag)
 struct mpi_transfer *try_to_match_send_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst, int mpi_tag)
 {
 {
 	unsigned slot;
 	unsigned slot;
-	unsigned firstslot = mpi_recvs_matched[dst];
+	unsigned firstslot = mpi_recvs_matched[src][dst];
 
 
 	unsigned all_previous_were_matched = 1;
 	unsigned all_previous_were_matched = 1;
 
 
@@ -193,7 +203,7 @@ struct mpi_transfer *try_to_match_send_transfer(int src STARPU_ATTRIBUTE_UNUSED,
 			{
 			{
 				/* All previous transfers are already matched,
 				/* All previous transfers are already matched,
 				 * we need not consider them anymore */
 				 * we need not consider them anymore */
-				mpi_recvs_matched[dst] = slot;
+				mpi_recvs_matched[src][dst] = slot;
 			}
 			}
 		}
 		}
 	}
 	}
@@ -204,60 +214,146 @@ struct mpi_transfer *try_to_match_send_transfer(int src STARPU_ATTRIBUTE_UNUSED,
 
 
 static unsigned long mpi_com_id = 0;
 static unsigned long mpi_com_id = 0;
 
 
-static void display_all_transfers_from_trace(FILE *out_paje_file, int src)
+static void display_all_transfers_from_trace(FILE *out_paje_file, unsigned n)
 {
 {
-	unsigned slot;
-	for (slot = 0; slot < mpi_sends_used[src]; slot++)
+	unsigned slot[MAX_MPI_NODES] = { 0 }, node, src;
+	struct mpi_transfer_list pending_receives; /* Sorted list of matches which have not happened yet */
+	double current_out_bandwidth[MAX_MPI_NODES] = { 0. };
+	double current_in_bandwidth[MAX_MPI_NODES] = { 0. };
+#ifdef STARPU_HAVE_POTI
+	char mpi_container[STARPU_POTI_STR_LEN];
+#endif
+
+	for (node = 0; node < n ; node++)
 	{
 	{
-		int dst = mpi_sends[src][slot].other_rank;
-		int mpi_tag = mpi_sends[src][slot].mpi_tag;
-		float start_date = mpi_sends[src][slot].date;
-		size_t size = mpi_sends[src][slot].size;
+#ifdef STARPU_HAVE_POTI
+		snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", node);
+		poti_SetVariable(0., mpi_container, "bwi", 0.);
+		poti_SetVariable(0., mpi_container, "bwo", 0.);
+#else
+		fprintf(out_paje_file, "13	%.9f	%d_mpict	bwi	%f\n", 0., node, 0.);
+		fprintf(out_paje_file, "13	%.9f	%d_mpict	bwo	%f\n", 0., node, 0.);
+#endif
+	}
 
 
-		struct mpi_transfer *match;
-		match = try_to_match_send_transfer(src, dst, mpi_tag);
+	mpi_transfer_list_init(&pending_receives);
+
+	while (1)
+	{
+		float start_date;
+		struct mpi_transfer *cur, *match;
+
+		/* Find out which event comes first: a pending receive, or a new send */
+
+		if (mpi_transfer_list_empty(&pending_receives))
+			start_date = INFINITY;
+		else
+			start_date = mpi_transfer_list_front(&pending_receives)->date;
+
+		src = MAX_MPI_NODES;
+		for (node = 0; node < n; node++) {
+			if (slot[node] < mpi_sends_used[node] && mpi_sends[node][slot[node]].date < start_date)
+			{
+				/* next send for node is earlier than others */
+				src = node;
+				start_date = mpi_sends[src][slot[src]].date;
+			}
+		}
+		if (start_date == INFINITY)
+			/* No event any more, we're finished! */
+			break;
+
+		if (src == MAX_MPI_NODES)
+		{
+			/* Pending match is earlier than all new sends, finish its communication */
+			match = mpi_transfer_list_pop_front(&pending_receives);
+			current_out_bandwidth[match->src] -= match->bandwidth;
+			current_in_bandwidth[match->dst] -= match->bandwidth;
+#ifdef STARPU_HAVE_POTI
+			snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", match->src);
+			poti_SetVariable(match->date, mpi_container, "bwo", current_out_bandwidth[match->src]);
+			snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", match->dst);
+			poti_SetVariable(match->date, mpi_container, "bwi", current_in_bandwidth[match->dst]);
+#else
+			fprintf(out_paje_file, "13	%.9f	%d_mpict	bwo	%f\n", match->date, match->src, current_out_bandwidth[match->src]);
+			fprintf(out_paje_file, "13	%.9f	%d_mpict	bwi	%f\n", match->date, match->dst, current_in_bandwidth[match->dst]);
+#endif
+			continue;
+		}
+
+		cur = &mpi_sends[src][slot[src]];
+		int dst = cur->dst;
+		int mpi_tag = cur->mpi_tag;
+		size_t size = cur->size;
+
+		if (dst < MAX_MPI_NODES)
+			match = try_to_match_send_transfer(src, dst, mpi_tag);
+		else
+			match = NULL;
 
 
 		if (match)
 		if (match)
 		{
 		{
 			float end_date = match->date;
 			float end_date = match->date;
+			struct mpi_transfer *prev;
+
+			match->bandwidth = (0.001*size)/(end_date - start_date);
+			current_out_bandwidth[src] += match->bandwidth;
+			current_in_bandwidth[dst] += match->bandwidth;
+
+			/* Insert in sorted list, most probably at the end so let's use a mere insertion sort */
+			for (prev = mpi_transfer_list_last(&pending_receives);
+				prev != mpi_transfer_list_alpha(&pending_receives);
+				prev = mpi_transfer_list_prev(prev))
+				if (prev->date <= end_date)
+				{
+					/* Found its place */
+					mpi_transfer_list_insert_after(&pending_receives, match, prev);
+					break;
+				}
+			if (prev == mpi_transfer_list_alpha(&pending_receives))
+			{
+				/* No element earlier than this one, put it at the head */
+				mpi_transfer_list_push_front(&pending_receives, match);
+			}
 
 
 			unsigned long id = mpi_com_id++;
 			unsigned long id = mpi_com_id++;
-			/* TODO replace 0 by a MPI program ? */
-			if (out_paje_file)
-			{
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
-				char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN];
-				snprintf(paje_value, STARPU_POTI_STR_LEN, "%lu", (long unsigned) size);
-				snprintf(paje_key, STARPU_POTI_STR_LEN, "mpicom_%lu", id);
-				char mpi_container[STARPU_POTI_STR_LEN];
-				snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", /* XXX */src);
-				poti_StartLink(start_date, "MPICt", "MPIL", mpi_container, paje_value, paje_key);
-				snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", /* XXX */dst);
-				poti_EndLink(end_date, "MPICt", "MPIL", mpi_container, paje_value, paje_key);
+			char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN];
+			snprintf(paje_value, STARPU_POTI_STR_LEN, "%lu", (long unsigned) size);
+			snprintf(paje_key, STARPU_POTI_STR_LEN, "mpicom_%lu", id);
+			snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", src);
+			poti_StartLink(start_date, "MPICt", "MPIL", mpi_container, paje_value, paje_key);
+			poti_SetVariable(start_date, mpi_container, "bwo", current_out_bandwidth[src]);
+			snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", dst);
+			poti_EndLink(end_date, "MPICt", "MPIL", mpi_container, paje_value, paje_key);
+			poti_SetVariable(start_date, mpi_container, "bwo", current_in_bandwidth[dst]);
 #else
 #else
-				fprintf(out_paje_file, "18	%.9f	MPIL	MPIroot	%lu	%d_mpict	mpicom_%lu\n", start_date, (unsigned long)size, /* XXX */src, id);
-				fprintf(out_paje_file, "19	%.9f	MPIL	MPIroot	%lu	%d_mpict	mpicom_%lu\n", end_date, (unsigned long)size, /* XXX */dst, id);
+			fprintf(out_paje_file, "18	%.9f	MPIL	MPIroot	%lu	%d_mpict	mpicom_%lu\n", start_date, (unsigned long)size, src, id);
+			fprintf(out_paje_file, "19	%.9f	MPIL	MPIroot	%lu	%d_mpict	mpicom_%lu\n", end_date, (unsigned long)size, dst, id);
+			fprintf(out_paje_file, "13	%.9f	%d_mpict	bwo	%f\n", start_date, src, current_out_bandwidth[src]);
+			fprintf(out_paje_file, "13	%.9f	%d_mpict	bwi	%f\n", start_date, dst, current_in_bandwidth[dst]);
 #endif
 #endif
-			}
 		}
 		}
 		else
 		else
 		{
 		{
 			_STARPU_DISP("Warning, could not match MPI transfer from %d to %d (tag %x) starting at %f\n", src, dst, mpi_tag, start_date);
 			_STARPU_DISP("Warning, could not match MPI transfer from %d to %d (tag %x) starting at %f\n", src, dst, mpi_tag, start_date);
 		}
 		}
 
 
+		slot[src]++;
 	}
 	}
 }
 }
 
 
-void _starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks, FILE *out_paje_file)
+void _starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks STARPU_ATTRIBUTE_UNUSED, FILE *out_paje_file)
 {
 {
-	unsigned inputfile;
-
-	/* display the MPI transfers if possible */
-	for (inputfile = 0; inputfile < options->ninputfiles; inputfile++)
+	if (options->ninputfiles > MAX_MPI_NODES)
 	{
 	{
-		int filerank = ranks[inputfile];
-		display_all_transfers_from_trace(out_paje_file, filerank);
+		_STARPU_DISP("Warning: %u files given, maximum %u supported, truncating to %u\n", options->ninputfiles, MAX_MPI_NODES, MAX_MPI_NODES);
+		options->ninputfiles = MAX_MPI_NODES;
 	}
 	}
+
+	/* display the MPI transfers if possible */
+	if (out_paje_file)
+		display_all_transfers_from_trace(out_paje_file, options->ninputfiles);
 }
 }
 
 
 #endif // STARPU_USE_FXT
 #endif // STARPU_USE_FXT

+ 13 - 3
src/debug/traces/starpu_paje.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2016  Université de Bordeaux
+ * Copyright (C) 2010-2017  Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -160,7 +160,9 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 
 
 	/* Types for the memory node */
 	/* Types for the memory node */
 	poti_DefineEventType("invalidate", "Mm", "data invalidation");
 	poti_DefineEventType("invalidate", "Mm", "data invalidation");
-	poti_DefineVariableType("bw", "Mm", "Bandwidth", "0 0 0");
+	poti_DefineVariableType("use", "Mm", "Used (MB)", "0 0 0");
+	poti_DefineVariableType("bwi", "Mm", "Bandwidth In (MB/s)", "0 0 0");
+	poti_DefineVariableType("bwo", "Mm", "Bandwidth Out (MB/s)", "0 0 0");
 	poti_DefineStateType("MS", "Mm", "Memory Node State");
 	poti_DefineStateType("MS", "Mm", "Memory Node State");
 	poti_DefineEntityValue("A", "MS", "Allocating", ".4 .1 .0");
 	poti_DefineEntityValue("A", "MS", "Allocating", ".4 .1 .0");
 	poti_DefineEntityValue("Ar", "MS", "AllocatingReuse", ".1 .1 .8");
 	poti_DefineEntityValue("Ar", "MS", "AllocatingReuse", ".1 .1 .8");
@@ -175,6 +177,7 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 	/* Types for the Worker of the Memory Node */
 	/* Types for the Worker of the Memory Node */
 	poti_DefineEventType("user_event", "T", "user event type");
 	poti_DefineEventType("user_event", "T", "user event type");
 	poti_DefineEventType("thread_event", "T", "thread event type");
 	poti_DefineEventType("thread_event", "T", "thread event type");
+	poti_DefineVariableType("gf", "T", "GFlops", "0 0 0");
 	poti_DefineStateType("S", "T", "Thread State");
 	poti_DefineStateType("S", "T", "Thread State");
 	poti_DefineEntityValue("I", "S", "Idle", ".9 .1 0");
 	poti_DefineEntityValue("I", "S", "Idle", ".9 .1 0");
 	poti_DefineEntityValue("In", "S", "Initializing", "0.0 .7 1.0");
 	poti_DefineEntityValue("In", "S", "Initializing", "0.0 .7 1.0");
@@ -213,6 +216,8 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 
 
 	/* Types for the MPI Communication Thread of the Memory Node */
 	/* Types for the MPI Communication Thread of the Memory Node */
 	poti_DefineEventType("MPIev", "MPICt", "MPI event type");
 	poti_DefineEventType("MPIev", "MPICt", "MPI event type");
+	poti_DefineVariableType("bwi", "MPICt", "Bandwidth In (MB/s)", "0 0 0");
+	poti_DefineVariableType("bwo", "MPICt", "Bandwidth Out (MB/s)", "0 0 0");
 	poti_DefineStateType("CtS", "MPICt", "Communication Thread State");
 	poti_DefineStateType("CtS", "MPICt", "Communication Thread State");
 	poti_DefineEntityValue("P", "CtS", "Processing", "0 0 0");
 	poti_DefineEntityValue("P", "CtS", "Processing", "0 0 0");
 	poti_DefineEntityValue("Sl", "CtS", "Sleeping", ".9 .1 .0");
 	poti_DefineEntityValue("Sl", "CtS", "Sleeping", ".9 .1 .0");
@@ -299,7 +304,12 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 3       MS       Mm       \"Memory Node State\"                        \n\
 3       MS       Mm       \"Memory Node State\"                        \n\
 4       nsubmitted    Sc       \"Number of Submitted Uncompleted Tasks\"                        \n\
 4       nsubmitted    Sc       \"Number of Submitted Uncompleted Tasks\"                        \n\
 4       nready    Sc       \"Number of Ready Tasks\"                        \n\
 4       nready    Sc       \"Number of Ready Tasks\"                        \n\
-4       bw      Mm       \"Bandwidth\"                        \n\
+4       use     Mm       \"Used (MB)\"                        \n\
+4       bwi     Mm       \"Bandwidth In (MB/s)\"                        \n\
+4       bwo     Mm       \"Bandwidth Out (MB/s)\"                        \n\
+4       bwi     MPICt       \"Bandwidth In (MB/s)\"                        \n\
+4       bwo     MPICt       \"Bandwidth Out (MB/s)\"                        \n\
+4       gf      T       \"GFlops\"                        \n\
 6       I       S       Idle         \".9 .1 .0\"		\n\
 6       I       S       Idle         \".9 .1 .0\"		\n\
 6       In       S      Initializing       \"0.0 .7 1.0\"            \n\
 6       In       S      Initializing       \"0.0 .7 1.0\"            \n\
 6       D       S      Deinitializing       \"0.0 .1 .7\"            \n\
 6       D       S      Deinitializing       \"0.0 .1 .7\"            \n\

+ 44 - 22
src/util/starpu_clusters_create.c

@@ -45,45 +45,67 @@ starpu_binding_function _starpu_cluster_type_get_func(starpu_cluster_types type)
 	return prologue_func;
 	return prologue_func;
 }
 }
 
 
-void starpu_openmp_prologue(void *sched_ctx_id)
+void starpu_openmp_prologue(void* arg)
 {
 {
-	int sched_ctx = *(int*)sched_ctx_id;
-	int *cpuids = NULL;
-	int ncpuids = 0;
 	int workerid = starpu_worker_get_id_check();
 	int workerid = starpu_worker_get_id_check();
 
 
 	if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
 	if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
 	{
 	{
-		starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
-		omp_set_num_threads(ncpuids);
-#pragma omp parallel
+		struct starpu_task *task = starpu_task_get_current();
+		int sched_ctx = task->sched_ctx;
+		struct _starpu_sched_ctx *ctx_struct = _starpu_get_sched_ctx_struct(sched_ctx);
+		/* If the view of the worker doesn't correspond to the view of the task,
+			 adapt the thread team */
+		if (ctx_struct->parallel_view != task->possibly_parallel)
 		{
 		{
-			starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
+			int *cpuids = NULL;
+			int ncpuids = 0;
+
+			starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
+			if (!task->possibly_parallel)
+				ncpuids=1;
+			omp_set_num_threads(ncpuids);
+#pragma omp parallel
+			{
+				starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
+			}
+			free(cpuids);
+			ctx_struct->parallel_view = !ctx_struct->parallel_view;
 		}
 		}
-		free(cpuids);
 	}
 	}
 	return;
 	return;
 }
 }
 
 
 #ifdef STARPU_MKL
 #ifdef STARPU_MKL
-void starpu_gnu_openmp_mkl_prologue(void *sched_ctx_id)
+void starpu_gnu_openmp_mkl_prologue(void* arg)
 {
 {
-	int sched_ctx = *(int*)sched_ctx_id;
-	int *cpuids = NULL;
-	int ncpuids = 0;
 	int workerid = starpu_worker_get_id();
 	int workerid = starpu_worker_get_id();
 
 
 	if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
 	if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
 	{
 	{
-		starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
-		omp_set_num_threads(ncpuids);
-		mkl_set_num_threads(ncpuids);
-		mkl_set_dynamic(0);
-#pragma omp parallel
+		struct starpu_task *task = starpu_task_get_current();
+		int sched_ctx = task->sched_ctx;
+		struct _starpu_sched_ctx *ctx_struct = _starpu_get_sched_ctx_struct(sched_ctx);
+		/* If the view of the worker doesn't correspond to the view of the task,
+			 adapt the thread team */
+		if (ctx_struct->parallel_view != task->possibly_parallel)
 		{
 		{
-			starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
+			int *cpuids = NULL;
+			int ncpuids = 0;
+
+			starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
+			if (!task->possibly_parallel)
+				ncpuids=1;
+			omp_set_num_threads(ncpuids);
+			mkl_set_num_threads(ncpuids);
+			mkl_set_dynamic(0);
+#pragma omp parallel
+			{
+				starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
+			}
+			free(cpuids);
+			ctx_struct->parallel_view = !ctx_struct->parallel_view;
 		}
 		}
-		free(cpuids);
 	}
 	}
 	return;
 	return;
 }
 }
@@ -324,8 +346,8 @@ int _starpu_cluster_bind(struct _starpu_cluster *cluster)
 	else
 	else
 	{
 	{
 		func = _starpu_cluster_type_get_func(cluster->params->type);
 		func = _starpu_cluster_type_get_func(cluster->params->type);
-		func_arg = (void*) &cluster->id;
-		}
+		func_arg = NULL;
+	}
 
 
 	return starpu_task_insert(&_starpu_cluster_bind_cl,
 	return starpu_task_insert(&_starpu_cluster_bind_cl,
 				  STARPU_SCHED_CTX, cluster->id,
 				  STARPU_SCHED_CTX, cluster->id,

+ 1 - 1
starpufft/src/Makefile.am

@@ -18,7 +18,7 @@
 include $(top_srcdir)/starpu.mk
 include $(top_srcdir)/starpu.mk
 
 
 AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS)
 AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS)
-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/starpufft/include/ -I$(top_builddir)/include $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
+AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/starpufft/include/ -I$(top_builddir)/include $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(HWLOC_CFLAGS)
 
 
 lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la
 lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la
 
 

+ 8 - 1
tests/datawizard/acquire_cb_insert.c

@@ -111,11 +111,18 @@ int main(int argc, char **argv)
 			starpu_task_insert(&work, STARPU_W, starpu_data_get_sub_data(f_handle, 1, x), 0)
 			starpu_task_insert(&work, STARPU_W, starpu_data_get_sub_data(f_handle, 1, x), 0)
 			);
 			);
 #else
 #else
-	starpu_data_acquire_cb(x_handle, STARPU_W, callback, NULL);
+	starpu_data_acquire_cb(x_handle, STARPU_R, callback, NULL);
 #endif
 #endif
 
 
+	/* Wait for acquisition (and thus insertion) */
+	starpu_data_acquire(x_handle, STARPU_W);
+	starpu_data_release(x_handle);
+
+	/* Now wait for the inserted task */
 	ret = starpu_task_wait_for_all();
 	ret = starpu_task_wait_for_all();
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
+
+	/* Can now clean */
 	starpu_data_unpartition(f_handle, STARPU_MAIN_RAM);
 	starpu_data_unpartition(f_handle, STARPU_MAIN_RAM);
 	starpu_data_unregister(f_handle);
 	starpu_data_unregister(f_handle);
 	starpu_data_unregister(x_handle);
 	starpu_data_unregister(x_handle);

+ 1 - 0
tests/loader.c

@@ -297,6 +297,7 @@ int main(int argc, char *argv[])
 				strcpy(libtool, top_builddir);
 				strcpy(libtool, top_builddir);
 				strcat(libtool, "/libtool");
 				strcat(libtool, "/libtool");
 
 
+				decode(&launcher, "@top_srcdir@", top_srcdir);
 				decode(&launcher_args, "@top_srcdir@", top_srcdir);
 				decode(&launcher_args, "@top_srcdir@", top_srcdir);
 
 
 				launcher_argv[0] = libtool;
 				launcher_argv[0] = libtool;

+ 2 - 2
tests/sched_ctx/sched_ctx_hierarchy.c

@@ -23,7 +23,7 @@ void func_cpu_bis(void *descr[], void *_args)
 {
 {
 	char msg;
 	char msg;
 	char worker_name[256];
 	char worker_name[256];
-	int worker_id = starpu_worker_get_id();
+	int worker_id = starpu_worker_get_id_check();
 	int worker_id_expected;
 	int worker_id_expected;
 	int ntasks;
 	int ntasks;
 
 
@@ -54,7 +54,7 @@ void func_cpu(void *descr[], void *_args)
 {
 {
 	char msg;
 	char msg;
 	char worker_name[256];
 	char worker_name[256];
-	int worker_id = starpu_worker_get_id();
+	int worker_id = starpu_worker_get_id_check();
 	int worker_id_expected;
 	int worker_id_expected;
 	int ntasks;
 	int ntasks;
 	unsigned sched_ctx_id;
 	unsigned sched_ctx_id;

+ 5 - 0
tools/Makefile.am

@@ -138,16 +138,21 @@ EXTRA_DIST =				\
 	dev/rename.sh			\
 	dev/rename.sh			\
 	perfmodels/README		\
 	perfmodels/README		\
 	perfmodels/sampling/codelets/tmp/mlr_init.out	 \
 	perfmodels/sampling/codelets/tmp/mlr_init.out	 \
+	valgrind/fxt.suppr		\
 	valgrind/hwloc.suppr		\
 	valgrind/hwloc.suppr		\
 	valgrind/libc.suppr		\
 	valgrind/libc.suppr		\
 	valgrind/libgomp.suppr		\
 	valgrind/libgomp.suppr		\
 	valgrind/libnuma.suppr		\
 	valgrind/libnuma.suppr		\
+	valgrind/madmpi.suppr		\
 	valgrind/opencl.suppr		\
 	valgrind/opencl.suppr		\
 	valgrind/openmpi.suppr		\
 	valgrind/openmpi.suppr		\
+	valgrind/openmp.suppr		\
+	valgrind/padico.suppr		\
 	valgrind/pthread.suppr		\
 	valgrind/pthread.suppr		\
 	valgrind/starpu.suppr		\
 	valgrind/starpu.suppr		\
 	valgrind/valgrind.suppr		\
 	valgrind/valgrind.suppr		\
 	valgrind/valgrind.sh		\
 	valgrind/valgrind.sh		\
+	valgrind/helgrind.sh		\
 	tsan/starpu.suppr		\
 	tsan/starpu.suppr		\
 	lsan/libc.suppr			\
 	lsan/libc.suppr			\
 	lsan/openmpi.suppr		\
 	lsan/openmpi.suppr		\

+ 3 - 2
tools/cppcheck/suppressions.txt

@@ -42,11 +42,11 @@ unusedStructMember:src/core/perfmodel/perfmodel_bus.c:65
 unusedStructMember:src/core/perfmodel/perfmodel_bus.c:66
 unusedStructMember:src/core/perfmodel/perfmodel_bus.c:66
 unusedStructMember:src/core/simgrid.c:225
 unusedStructMember:src/core/simgrid.c:225
 unusedStructMember:src/core/simgrid.c:226
 unusedStructMember:src/core/simgrid.c:226
-wrongPrintfScanfArgNum:src/core/simgrid.c:719
+wrongPrintfScanfArgNum:src/core/simgrid.c:731
 duplicateExpression:src/util/starpu_task_insert.c:52
 duplicateExpression:src/util/starpu_task_insert.c:52
 
 
 // TODO: this could be an error?
 // TODO: this could be an error?
-redundantCopy:src/core/disk_ops/disk_leveldb.cpp:192
+redundantCopy:src/core/disk_ops/disk_leveldb.cpp:194
 
 
 nullPointerRedundantCheck:src/common/rbtree.c
 nullPointerRedundantCheck:src/common/rbtree.c
 unreadVariable:src/datawizard/interfaces/*
 unreadVariable:src/datawizard/interfaces/*
@@ -62,5 +62,6 @@ allocaCalled:gcc-plugin/src/*
 unusedVariable:gcc-plugin/tests/*
 unusedVariable:gcc-plugin/tests/*
 unreadVariable:gcc-plugin/tests/*
 unreadVariable:gcc-plugin/tests/*
 duplicateExpression:gcc-plugin/src/*
 duplicateExpression:gcc-plugin/src/*
+negativeIndex:gcc-plugin/src/*
 
 
 pointerSize:socl/src/cl_getcontextinfo.c:33
 pointerSize:socl/src/cl_getcontextinfo.c:33

+ 143 - 0
tools/valgrind/hwloc.suppr

@@ -20,3 +20,146 @@
    obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8
    obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8
    fun:hwloc_topology_init
    fun:hwloc_topology_init
 }
 }
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: indirect
+   fun:realloc
+   obj:*
+   obj:*
+   obj:*
+   obj:*
+   fun:hwloc_discover
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: indirect
+   fun:malloc
+   fun:strdup
+   obj:*
+   obj:*
+   obj:*
+   obj:*
+   fun:hwloc_discover
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: indirect
+   fun:calloc
+   obj:*
+   obj:*
+   obj:*
+   fun:hwloc_discover
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: definite
+   fun:calloc
+   obj:*
+   obj:*
+   obj:*
+   fun:hwloc_discover
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: indirect
+   fun:malloc
+   fun:strdup
+   obj:*
+   obj:*
+   obj:*
+   fun:hwloc_discover
+}
+
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: possible
+   fun:malloc
+   fun:strdup
+   obj:*
+   obj:*
+   obj:*
+   obj:*
+   fun:hwloc_discover
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: possible
+   fun:malloc
+   fun:strdup
+   obj:*
+   obj:*
+   obj:*
+   obj:*
+   fun:hwloc_discover
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: possible
+   fun:realloc
+   obj:*
+   obj:*
+   obj:*
+   obj:*
+   fun:hwloc_discover
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:malloc
+   fun:strdup
+   obj:*
+   obj:*
+   obj:*
+   obj:*
+   fun:hwloc_discover
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:malloc
+   fun:strdup
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   fun:pci_device_get_device_name
+   fun:hwloc_look_pci
+}
+
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:realloc
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   fun:pci_device_get_device_name
+   fun:hwloc_look_pci
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:calloc
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   fun:hwloc_look_pci
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:malloc
+   fun:strdup
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   fun:hwloc_look_pci
+}

+ 167 - 0
tools/valgrind/openmp.suppr

@@ -0,0 +1,167 @@
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:realloc
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   fun:pci_device_get_device_name
+   fun:hwloc_look_pci
+   fun:hwloc_discover
+   fun:hwloc_topology_load
+   fun:_starpu_init_topology
+   fun:_starpu_topology_get_nhwcpu
+   fun:check_bus_config_file
+   fun:_starpu_load_bus_performance_files
+   fun:starpu_initialize
+   fun:starpu_init
+   fun:omp_initial_thread_setup
+   fun:omp_initial_region_setup
+   fun:starpu_omp_init
+   fun:omp_constructor
+   fun:__libc_csu_init
+   fun:(below main)
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:malloc
+   fun:strdup
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   fun:pci_device_get_device_name
+   fun:hwloc_look_pci
+   fun:hwloc_discover
+   fun:hwloc_topology_load
+   fun:_starpu_init_topology
+   fun:_starpu_topology_get_nhwcpu
+   fun:check_bus_config_file
+   fun:_starpu_load_bus_performance_files
+   fun:starpu_initialize
+   fun:starpu_init
+   fun:omp_initial_thread_setup
+   fun:omp_initial_region_setup
+   fun:starpu_omp_init
+   fun:omp_constructor
+   fun:__libc_csu_init
+   fun:(below main)
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:malloc
+   fun:strdup
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   fun:pci_device_get_device_name
+   fun:hwloc_look_pci
+   fun:hwloc_discover
+   fun:hwloc_topology_load
+   fun:_starpu_init_topology
+   fun:_starpu_topology_get_nhwcpu
+   fun:check_bus_config_file
+   fun:_starpu_load_bus_performance_files
+   fun:starpu_initialize
+   fun:starpu_init
+   fun:omp_initial_thread_setup
+   fun:omp_initial_region_setup
+   fun:starpu_omp_init
+   fun:omp_constructor
+   fun:__libc_csu_init
+   fun:(below main)
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:calloc
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   fun:hwloc_look_pci
+   fun:hwloc_discover
+   fun:hwloc_topology_load
+   fun:_starpu_init_topology
+   fun:_starpu_topology_get_nhwcpu
+   fun:check_bus_config_file
+   fun:_starpu_load_bus_performance_files
+   fun:starpu_initialize
+   fun:starpu_init
+   fun:omp_initial_thread_setup
+   fun:omp_initial_region_setup
+   fun:starpu_omp_init
+   fun:omp_constructor
+   fun:__libc_csu_init
+   fun:(below main)
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:calloc
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   fun:hwloc_look_pci
+   fun:hwloc_discover
+   fun:hwloc_topology_load
+   fun:_starpu_init_topology
+   fun:_starpu_topology_get_nhwcpu
+   fun:check_bus_config_file
+   fun:_starpu_load_bus_performance_files
+   fun:starpu_initialize
+   fun:starpu_init
+   fun:omp_initial_thread_setup
+   fun:omp_initial_region_setup
+   fun:starpu_omp_init
+   fun:omp_constructor
+   fun:__libc_csu_init
+   fun:(below main)
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:calloc
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   fun:hwloc_look_pci
+   fun:hwloc_discover
+   fun:hwloc_topology_load
+   fun:_starpu_init_topology
+   fun:_starpu_topology_get_nhwcpu
+   fun:check_bus_config_file
+   fun:_starpu_load_bus_performance_files
+   fun:starpu_initialize
+   fun:starpu_init
+   fun:omp_initial_thread_setup
+   fun:omp_initial_region_setup
+   fun:starpu_omp_init
+   fun:omp_constructor
+   fun:__libc_csu_init
+   fun:(below main)
+}
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:malloc
+   fun:strdup
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1
+   fun:hwloc_look_pci
+   fun:hwloc_discover
+   fun:hwloc_topology_load
+   fun:_starpu_init_topology
+   fun:_starpu_topology_get_nhwcpu
+   fun:check_bus_config_file
+   fun:_starpu_load_bus_performance_files
+   fun:starpu_initialize
+   fun:starpu_init
+   fun:omp_initial_thread_setup
+   fun:omp_initial_region_setup
+   fun:starpu_omp_init
+   fun:omp_constructor
+   fun:__libc_csu_init
+   fun:(below main)
+}