Procházet zdrojové kódy

Merge branch 'master' into fpga

Nathalie Furmento před 5 roky
rodič
revize
16efa503b9
55 změnil soubory, kde provedl 346 přidání a 99 odebrání
  1. 8 3
      configure.ac
  2. 8 4
      examples/interface/complex.c
  3. 10 1
      examples/interface/complex_interface.c
  4. 1 1
      include/starpu_bitmap.h
  5. 12 6
      include/starpu_data_interfaces.h
  6. 1 1
      include/starpu_fxt.h
  7. 3 3
      include/starpu_openmp.h
  8. 3 3
      include/starpu_perf_monitoring.h
  9. 1 1
      include/starpu_profiling.h
  10. 1 1
      include/starpu_scheduler.h
  11. 7 1
      mpi/examples/benchs/sendrecv_bench.c
  12. 6 0
      mpi/src/mpi/starpu_mpi_early_data.c
  13. 2 0
      mpi/src/mpi/starpu_mpi_early_data.h
  14. 1 0
      mpi/src/mpi/starpu_mpi_early_request.h
  15. 2 2
      mpi/src/mpi/starpu_mpi_mpi.c
  16. 9 0
      mpi/tests/driver.c
  17. 8 4
      mpi/tests/early_stuff.c
  18. 7 14
      src/common/fxt.h
  19. 1 0
      src/core/perfmodel/perfmodel_bus.c
  20. 6 0
      src/core/topology.c
  21. 3 0
      src/core/workers.c
  22. 4 2
      src/datawizard/coherency.h
  23. 75 3
      src/datawizard/interfaces/data_interface.c
  24. 12 0
      src/debug/traces/anim.c
  25. 55 32
      src/debug/traces/starpu_fxt.c
  26. 1 0
      src/debug/traces/starpu_fxt.h
  27. 8 4
      src/drivers/mp_common/source_common.c
  28. 22 1
      src/sched_policies/component_eager.c
  29. 2 4
      src/sched_policies/component_fifo.c
  30. 2 4
      src/sched_policies/component_prio.c
  31. 7 0
      src/sched_policies/component_random.c
  32. 1 1
      tests/microbenchs/parallel_independent_heterogeneous_tasks.sh
  33. 1 1
      tests/microbenchs/parallel_independent_homogeneous_tasks.sh
  34. 1 0
      tests/model-checking/platform.xml
  35. 1 0
      tools/dev/lsan/suppressions
  36. 8 0
      tools/dev/valgrind/fxt.suppr
  37. 9 0
      tools/dev/valgrind/papi.suppr
  38. 4 1
      tools/gdbinit
  39. 1 0
      tools/perfmodels/sampling/bus/attila.platform.v4.xml
  40. 1 0
      tools/perfmodels/sampling/bus/attila.platform.xml
  41. 1 0
      tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml
  42. 1 0
      tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml
  43. 1 0
      tools/perfmodels/sampling/bus/hannibal.platform.v4.xml
  44. 1 0
      tools/perfmodels/sampling/bus/hannibal.platform.xml
  45. 1 0
      tools/perfmodels/sampling/bus/idgraf.platform.v4.xml
  46. 1 0
      tools/perfmodels/sampling/bus/idgraf.platform.xml
  47. 1 0
      tools/perfmodels/sampling/bus/mirage.platform.v4.xml
  48. 1 0
      tools/perfmodels/sampling/bus/mirage.platform.xml
  49. 1 0
      tools/perfmodels/sampling/bus/sirocco.platform.v4.xml
  50. 1 0
      tools/perfmodels/sampling/bus/sirocco.platform.xml
  51. 1 0
      tools/starpu_fxt_data_trace.c
  52. 14 0
      tools/starpu_fxt_stats.c
  53. 1 0
      tools/starpu_perfmodel_plot.c
  54. 2 0
      tools/starpu_perfmodel_recdump.c
  55. 3 1
      tools/starpu_tasks_rec_complete.c

+ 8 - 3
configure.ac

@@ -2055,6 +2055,10 @@ if test x$enable_debug = xyes; then
 	FCFLAGS="$FCFLAGS -O0"
 	FCFLAGS="$FCFLAGS -O0"
 	enable_spinlock_check=yes
 	enable_spinlock_check=yes
 	if test x$GCC = xyes; then
 	if test x$GCC = xyes; then
+		CFLAGS="$CFLAGS -Og"
+		CXXFLAGS="$CXXFLAGS -Og"
+		FFLAGS="$FFLAGS -Og"
+		FCFLAGS="$FCFLAGS -Og"
 		if test x$starpu_windows != xyes ; then
 		if test x$starpu_windows != xyes ; then
 			if test x$enable_fstack_protector_all = xyes ; then
 			if test x$enable_fstack_protector_all = xyes ; then
 			   CFLAGS="$CFLAGS -fstack-protector-all"
 			   CFLAGS="$CFLAGS -fstack-protector-all"
@@ -3228,7 +3232,7 @@ AM_CONDITIONAL(STARPU_BUILD_STARPUFFT_EXAMPLES, [test x$enable_starpufft_example
 ##########################################
 ##########################################
 
 
 have_valid_hwloc=no
 have_valid_hwloc=no
-SAVED_LDFLAGS="${LDFLAGS}"
+SAVED_LIBS="${LIBS}"
 SAVED_CPPFLAGS="${CPPFLAGS}"
 SAVED_CPPFLAGS="${CPPFLAGS}"
 SAVED_PKG_CONFIG_PATH="$PKG_CONFIG_PATH"
 SAVED_PKG_CONFIG_PATH="$PKG_CONFIG_PATH"
 AC_ARG_WITH([hwloc],
 AC_ARG_WITH([hwloc],
@@ -3273,7 +3277,7 @@ AS_IF([test "$have_valid_hwloc" = "no" -a "$use_hwloc" != "no"],
       [AC_MSG_ERROR([libhwloc or pkg-config was not found on your system. If the target machine is hyperthreaded the performance may be impacted a lot.  It is strongly recommended to install libhwloc and pkg-config. However, if you really want to use StarPU without enabling libhwloc, please restart configure by specifying the option '--without-hwloc'.])]
       [AC_MSG_ERROR([libhwloc or pkg-config was not found on your system. If the target machine is hyperthreaded the performance may be impacted a lot.  It is strongly recommended to install libhwloc and pkg-config. However, if you really want to use StarPU without enabling libhwloc, please restart configure by specifying the option '--without-hwloc'.])]
      )
      )
 
 
-LDFLAGS="${HWLOC_LIBS} ${SAVED_LDFLAGS}"
+LIBS="${HWLOC_LIBS} ${SAVED_LIBS}"
 CPPFLAGS="${HWLOC_CFLAGS} ${SAVED_CPPFLAGS}"
 CPPFLAGS="${HWLOC_CFLAGS} ${SAVED_CPPFLAGS}"
 
 
 AS_IF([test "$have_valid_hwloc" = "yes"],
 AS_IF([test "$have_valid_hwloc" = "yes"],
@@ -3285,9 +3289,10 @@ AS_IF([test "$have_valid_hwloc" = "yes"],
 
 
 AC_CHECK_FUNCS([hwloc_topology_dup])
 AC_CHECK_FUNCS([hwloc_topology_dup])
 AC_CHECK_FUNCS([hwloc_topology_set_components])
 AC_CHECK_FUNCS([hwloc_topology_set_components])
+AC_CHECK_FUNCS([hwloc_cpukinds_get_nr])
 AM_CONDITIONAL(STARPU_HWLOC_HAVE_TOPOLOGY_DUP, test $ac_cv_func_hwloc_topology_dup = yes)
 AM_CONDITIONAL(STARPU_HWLOC_HAVE_TOPOLOGY_DUP, test $ac_cv_func_hwloc_topology_dup = yes)
 
 
-LDFLAGS="${SAVED_LDFLAGS}"
+LIBS="${SAVED_LIBS}"
 CPPFLAGS="${SAVED_CPPFLAGS}"
 CPPFLAGS="${SAVED_CPPFLAGS}"
 export PKG_CONFIG_PATH=$SAVED_PKG_CONFIG_PATH
 export PKG_CONFIG_PATH=$SAVED_PKG_CONFIG_PATH
 
 

+ 8 - 4
examples/interface/complex.c

@@ -127,6 +127,9 @@ int main(void)
 #endif
 #endif
 	starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
 	starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
 	starpu_complex_data_register(&handle2, STARPU_MAIN_RAM, &copy_real, &copy_imaginary, 1);
 	starpu_complex_data_register(&handle2, STARPU_MAIN_RAM, &copy_real, &copy_imaginary, 1);
+	/* Create a vector of two complexs.  */
+	starpu_complex_data_register(&handle3, -1, 0, 0, 2);
+	starpu_complex_data_register(&handle4, -1, 0, 0, 1);
 
 
 	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0);
 	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0);
 	if (ret == -ENODEV) goto end;
 	if (ret == -ENODEV) goto end;
@@ -189,9 +192,6 @@ int main(void)
 	copy_imaginary = 77.0;
 	copy_imaginary = 77.0;
 	starpu_data_release(handle2);
 	starpu_data_release(handle2);
 
 
-	/* Create a vector of two complexs.  */
-	starpu_complex_data_register(&handle3, -1, 0, 0, 2);
-
 	/* Split it in two pieces (thus one complex each).  */
 	/* Split it in two pieces (thus one complex each).  */
 	struct starpu_data_filter f =
 	struct starpu_data_filter f =
 	{
 	{
@@ -219,6 +219,8 @@ int main(void)
 
 
 	/* Show it.  */
 	/* Show it.  */
 	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle3", strlen("handle3")+1, STARPU_R, handle3, 0);
 	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle3", strlen("handle3")+1, STARPU_R, handle3, 0);
+	if (ret == -ENODEV) goto end;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 
 
 	/* Get the real and imaginary vectors.  */
 	/* Get the real and imaginary vectors.  */
 	struct starpu_data_filter fcanon =
 	struct starpu_data_filter fcanon =
@@ -249,11 +251,11 @@ int main(void)
 	starpu_data_unpartition(handle3, STARPU_MAIN_RAM);
 	starpu_data_unpartition(handle3, STARPU_MAIN_RAM);
 
 
 	/* Use helper starpu_data_cpy */
 	/* Use helper starpu_data_cpy */
-	starpu_complex_data_register(&handle4, -1, 0, 0, 1);
 	starpu_data_cpy(handle4, handle1, 0, NULL, NULL);
 	starpu_data_cpy(handle4, handle1, 0, NULL, NULL);
 	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle4", strlen("handle4")+1, STARPU_R, handle4, 0);
 	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle4", strlen("handle4")+1, STARPU_R, handle4, 0);
 	if (ret == -ENODEV) goto end;
 	if (ret == -ENODEV) goto end;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
 	/* Compare two different complexs.  */
 	/* Compare two different complexs.  */
 	ret = starpu_task_insert(&cl_compare,
 	ret = starpu_task_insert(&cl_compare,
 				 STARPU_R, handle1,
 				 STARPU_R, handle1,
@@ -278,6 +280,8 @@ end:
 #endif
 #endif
 	starpu_data_unregister(handle1);
 	starpu_data_unregister(handle1);
 	starpu_data_unregister(handle2);
 	starpu_data_unregister(handle2);
+	starpu_data_unregister(handle3);
+	starpu_data_unregister(handle4);
 	starpu_shutdown();
 	starpu_shutdown();
 	if (ret == -ENODEV) return 77; else return !compare;
 	if (ret == -ENODEV) return 77; else return !compare;
 }
 }

+ 10 - 1
examples/interface/complex_interface.c

@@ -170,6 +170,14 @@ static starpu_ssize_t complex_describe(void *data_interface, char *buf, size_t s
 	return snprintf(buf, size, "Complex%d", complex_interface->nx);
 	return snprintf(buf, size, "Complex%d", complex_interface->nx);
 }
 }
 
 
+static int complex_compare(void *data_interface_a, void *data_interface_b)
+{
+	struct starpu_complex_interface *complex_a = (struct starpu_complex_interface *) data_interface_a;
+	struct starpu_complex_interface *complex_b = (struct starpu_complex_interface *) data_interface_b;
+
+	return (complex_a->nx == complex_b->nx);
+}
+
 static int copy_any_to_any(void *src_interface, unsigned src_node,
 static int copy_any_to_any(void *src_interface, unsigned src_node,
 			   void *dst_interface, unsigned dst_node,
 			   void *dst_interface, unsigned dst_node,
 			   void *async_data)
 			   void *async_data)
@@ -210,7 +218,8 @@ static struct starpu_data_interface_ops interface_complex_ops =
 	.pointer_is_inside = complex_pointer_is_inside,
 	.pointer_is_inside = complex_pointer_is_inside,
 	.pack_data = complex_pack_data,
 	.pack_data = complex_pack_data,
 	.unpack_data = complex_unpack_data,
 	.unpack_data = complex_unpack_data,
-	.describe = complex_describe
+	.describe = complex_describe,
+	.compare = complex_compare
 };
 };
 
 
 void starpu_complex_data_register(starpu_data_handle_t *handleptr, unsigned home_node, double *real, double *imaginary, int nx)
 void starpu_complex_data_register(starpu_data_handle_t *handleptr, unsigned home_node, double *real, double *imaginary, int nx)

+ 1 - 1
include/starpu_bitmap.h

@@ -120,7 +120,7 @@ static int _starpu_count_bit_static(unsigned long e)
 #endif
 #endif
 }
 }
 
 
-static inline struct starpu_bitmap *starpu_bitmap_create()
+static inline struct starpu_bitmap *starpu_bitmap_create(void)
 {
 {
 	return (struct starpu_bitmap *) calloc(1, sizeof(struct starpu_bitmap));
 	return (struct starpu_bitmap *) calloc(1, sizeof(struct starpu_bitmap));
 }
 }

+ 12 - 6
include/starpu_data_interfaces.h

@@ -243,9 +243,9 @@ struct starpu_data_copy_methods
 	*/
 	*/
 	int (*cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream);
 	int (*cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream);
 #else
 #else
-	int (*ram_to_cuda_async)();
-	int (*cuda_to_ram_async)();
-	int (*cuda_to_cuda_async)();
+	int (*ram_to_cuda_async)(void);
+	int (*cuda_to_ram_async)(void);
+	int (*cuda_to_cuda_async)(void);
 #endif
 #endif
 
 
 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
@@ -280,9 +280,9 @@ struct starpu_data_copy_methods
 	*/
 	*/
 	int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event);
 	int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event);
 #else
 #else
-	int (*ram_to_opencl_async)();
-	int (*opencl_to_ram_async)();
-	int (*opencl_to_opencl_async)();
+	int (*ram_to_opencl_async)(void);
+	int (*opencl_to_ram_async)(void);
+	int (*opencl_to_opencl_async)(void);
 #endif
 #endif
 
 
 	/**
 	/**
@@ -515,6 +515,7 @@ struct starpu_data_interface_ops
 	   Compare the data size and layout of two interfaces (nx, ny, ld, elemsize,
 	   Compare the data size and layout of two interfaces (nx, ny, ld, elemsize,
 	   etc.), to be used for indexing performance models. It should return 1 if
 	   etc.), to be used for indexing performance models. It should return 1 if
 	   the two interfaces size and layout match computation-wise, and 0 otherwise.
 	   the two interfaces size and layout match computation-wise, and 0 otherwise.
+	   It does *not* compare the actual content of the interfaces.
 	*/
 	*/
 	int 		 (*compare)			(void *data_interface_a, void *data_interface_b);
 	int 		 (*compare)			(void *data_interface_a, void *data_interface_b);
 
 
@@ -709,6 +710,11 @@ starpu_data_handle_t starpu_data_lookup(const void *ptr);
 int starpu_data_get_home_node(starpu_data_handle_t handle);
 int starpu_data_get_home_node(starpu_data_handle_t handle);
 
 
 /**
 /**
+   Print basic informations on \p handle on \p node
+ */
+void starpu_data_print(starpu_data_handle_t handle, unsigned node, FILE *stream);
+
+/**
    Return the next available id for a newly created data interface
    Return the next available id for a newly created data interface
    (\ref DefiningANewDataInterface).
    (\ref DefiningANewDataInterface).
 */
 */

+ 1 - 1
include/starpu_fxt.h

@@ -138,7 +138,7 @@ void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir);
 /**
 /**
     Wrapper to get value of env variable STARPU_FXT_TRACE
     Wrapper to get value of env variable STARPU_FXT_TRACE
 */
 */
-int starpu_fxt_is_enabled();
+int starpu_fxt_is_enabled(void);
 
 
 /**
 /**
    Add an event in the execution trace if FxT is enabled.
    Add an event in the execution trace if FxT is enabled.

+ 3 - 3
include/starpu_openmp.h

@@ -639,7 +639,7 @@ extern void starpu_omp_set_num_threads(int threads) __STARPU_OMP_NOTHROW;
    \sa starpu_omp_get_max_threads
    \sa starpu_omp_get_max_threads
    \sa starpu_omp_get_num_procs
    \sa starpu_omp_get_num_procs
  */
  */
-extern int starpu_omp_get_num_threads() __STARPU_OMP_NOTHROW;
+extern int starpu_omp_get_num_threads(void) __STARPU_OMP_NOTHROW;
 
 
 /**
 /**
    Return the rank of the current thread among the threads
    Return the rank of the current thread among the threads
@@ -652,7 +652,7 @@ extern int starpu_omp_get_num_threads() __STARPU_OMP_NOTHROW;
    \sa starpu_omp_get_max_threads
    \sa starpu_omp_get_max_threads
    \sa starpu_omp_get_num_procs
    \sa starpu_omp_get_num_procs
  */
  */
-extern int starpu_omp_get_thread_num() __STARPU_OMP_NOTHROW;
+extern int starpu_omp_get_thread_num(void) __STARPU_OMP_NOTHROW;
 
 
 /**
 /**
    Return the maximum number of threads that can be used to
    Return the maximum number of threads that can be used to
@@ -665,7 +665,7 @@ extern int starpu_omp_get_thread_num() __STARPU_OMP_NOTHROW;
    \sa starpu_omp_get_thread_num
    \sa starpu_omp_get_thread_num
    \sa starpu_omp_get_num_procs
    \sa starpu_omp_get_num_procs
  */
  */
-extern int starpu_omp_get_max_threads() __STARPU_OMP_NOTHROW;
+extern int starpu_omp_get_max_threads(void) __STARPU_OMP_NOTHROW;
 
 
 /**
 /**
    Return the number of StarPU CPU workers.
    Return the number of StarPU CPU workers.

+ 3 - 3
include/starpu_perf_monitoring.h

@@ -65,11 +65,11 @@ struct starpu_perf_counter_set;
 /**
 /**
   Start collecting performance counter values.
   Start collecting performance counter values.
   */
   */
-void starpu_perf_counter_collection_start();
+void starpu_perf_counter_collection_start(void);
 /**
 /**
   Stop collecting performance counter values.
   Stop collecting performance counter values.
   */
   */
-void starpu_perf_counter_collection_stop();
+void starpu_perf_counter_collection_stop(void);
 
 
 /**
 /**
   Translate scope name constant string to scope id.
   Translate scope name constant string to scope id.
@@ -170,7 +170,7 @@ void starpu_perf_counter_set_per_codelet_listener(struct starpu_codelet *cl, str
 /**
 /**
   Unset the global listener.
   Unset the global listener.
   */
   */
-void starpu_perf_counter_unset_global_listener();
+void starpu_perf_counter_unset_global_listener(void);
 /**
 /**
   Unset the per_worker listener.
   Unset the per_worker listener.
   */
   */

+ 1 - 1
include/starpu_profiling.h

@@ -316,7 +316,7 @@ void starpu_profiling_worker_helper_display_summary(void);
    option \ref enable-memory-stats "--enable-memory-stats" (see \ref
    option \ref enable-memory-stats "--enable-memory-stats" (see \ref
    MemoryFeedback).
    MemoryFeedback).
 */
 */
-void starpu_data_display_memory_stats();
+void starpu_data_display_memory_stats(void);
 
 
 /** @} */
 /** @} */
 
 

+ 1 - 1
include/starpu_scheduler.h

@@ -208,7 +208,7 @@ struct starpu_sched_policy
    Return an <c>NULL</c>-terminated array of all the predefined
    Return an <c>NULL</c>-terminated array of all the predefined
    scheduling policies.
    scheduling policies.
 */
 */
-struct starpu_sched_policy **starpu_sched_get_predefined_policies();
+struct starpu_sched_policy **starpu_sched_get_predefined_policies(void);
 
 
 /**
 /**
    When there is no available task for a worker, StarPU blocks this
    When there is no available task for a worker, StarPU blocks this

+ 7 - 1
mpi/examples/benchs/sendrecv_bench.c

@@ -54,7 +54,7 @@ int main(int argc, char **argv)
 		{
 		{
 			man();
 			man();
 		}
 		}
-		if (strcmp(argv[i], "--bidir") == 0)
+		else if (strcmp(argv[i], "--bidir") == 0)
 		{
 		{
 			bidir = 1;
 			bidir = 1;
 			printf("Communications will be full-duplex.\n");
 			printf("Communications will be full-duplex.\n");
@@ -83,6 +83,12 @@ int main(int argc, char **argv)
 		return STARPU_TEST_SKIPPED;
 		return STARPU_TEST_SKIPPED;
 	}
 	}
 
 
+#if !defined(STARPU_LONG_CHECK)
+	if (rank == 0)
+	{
+		printf("To have a more precise benchmark, configure StarPU with --enable-long-check\n");
+	}
+#endif
 
 
 	if (pause_workers)
 	if (pause_workers)
 	{
 	{

+ 6 - 0
mpi/src/mpi/starpu_mpi_early_data.c

@@ -92,6 +92,12 @@ struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _star
 	return early_data_handle;
 	return early_data_handle;
 }
 }
 
 
+void _starpu_mpi_early_data_delete(struct _starpu_mpi_early_data_handle *early_data_handle)
+{
+	free(early_data_handle);
+	early_data_handle = NULL;
+}
+
 struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag)
 struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag)
 {
 {
 	struct _starpu_mpi_early_data_handle_hashlist *hashlist;
 	struct _starpu_mpi_early_data_handle_hashlist *hashlist;

+ 2 - 0
mpi/src/mpi/starpu_mpi_early_data.h

@@ -61,7 +61,9 @@ void _starpu_mpi_early_data_shutdown(void);
 struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _starpu_mpi_envelope *envelope, int source, MPI_Comm comm) STARPU_ATTRIBUTE_MALLOC;
 struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _starpu_mpi_envelope *envelope, int source, MPI_Comm comm) STARPU_ATTRIBUTE_MALLOC;
 struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag);
 struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag);
 void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle);
 void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle);
+void _starpu_mpi_early_data_delete(struct _starpu_mpi_early_data_handle *early_data_handle);
 
 
+// Not used now but needed for fault tolerance
 struct _starpu_mpi_early_data_handle_tag_hashlist *_starpu_mpi_early_data_extract(struct _starpu_mpi_node_tag *node_tag);
 struct _starpu_mpi_early_data_handle_tag_hashlist *_starpu_mpi_early_data_extract(struct _starpu_mpi_node_tag *node_tag);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus

+ 1 - 0
mpi/src/mpi/starpu_mpi_early_request.h

@@ -47,6 +47,7 @@ void _starpu_mpi_early_request_check_termination(void);
 void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req);
 void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req);
 struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm);
 struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm);
 
 
+// Not used now but needed for fault tolerance
 struct _starpu_mpi_early_request_tag_hashlist *_starpu_mpi_early_request_extract(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm);
 struct _starpu_mpi_early_request_tag_hashlist *_starpu_mpi_early_request_extract(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus

+ 2 - 2
mpi/src/mpi/starpu_mpi_mpi.c

@@ -862,8 +862,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 
 
 	if (req->backend->internal_req)
 	if (req->backend->internal_req)
 	{
 	{
-		free(req->backend->early_data_handle);
-		req->backend->early_data_handle = NULL;
+		_starpu_mpi_early_data_delete(req->backend->early_data_handle);
 	}
 	}
 	else
 	else
 	{
 	{
@@ -1546,6 +1545,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	_starpu_mpi_early_request_check_termination();
 	_starpu_mpi_early_request_check_termination();
 	_starpu_mpi_early_data_check_termination();
 	_starpu_mpi_early_data_check_termination();
 	_starpu_mpi_sync_data_check_termination();
 	_starpu_mpi_sync_data_check_termination();
+	_starpu_mpi_req_prio_list_deinit(&ready_send_requests);
 
 
 	if (argc_argv->initialize_mpi)
 	if (argc_argv->initialize_mpi)
 	{
 	{

+ 9 - 0
mpi/tests/driver.c

@@ -18,6 +18,14 @@
 #include <math.h>
 #include <math.h>
 #include "helper.h"
 #include "helper.h"
 
 
+#if !defined(STARPU_HAVE_SETENV)
+#warning setenv is not defined. Skipping test
+int main(int argc, char **argv)
+{
+	return STARPU_TEST_SKIPPED;
+}
+#else
+
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ret, rank, size, i;
 	int ret, rank, size, i;
@@ -132,3 +140,4 @@ int main(int argc, char **argv)
 
 
 	return 0;
 	return 0;
 }
 }
+#endif

+ 8 - 4
mpi/tests/early_stuff.c

@@ -63,14 +63,17 @@ void early_data()
 	_starpu_mpi_early_data_add(edh[0]);
 	_starpu_mpi_early_data_add(edh[0]);
 	_starpu_mpi_early_data_add(edh[1]);
 	_starpu_mpi_early_data_add(edh[1]);
 
 
-	hash = _starpu_mpi_early_data_extract(&node_tag[1]);
+	hash = _starpu_mpi_early_data_extract(&node_tag[0]);
 	STARPU_ASSERT(_starpu_mpi_early_data_handle_list_size(&hash->list) == 1);
 	STARPU_ASSERT(_starpu_mpi_early_data_handle_list_size(&hash->list) == 1);
 	early = _starpu_mpi_early_data_handle_list_pop_front(&hash->list);
 	early = _starpu_mpi_early_data_handle_list_pop_front(&hash->list);
-	STARPU_ASSERT(early->node_tag.node.comm == node_tag[1].node.comm && early->node_tag.node.rank == node_tag[1].node.rank && early->node_tag.data_tag == node_tag[1].data_tag);
+	STARPU_ASSERT(early->node_tag.node.comm == node_tag[0].node.comm && early->node_tag.node.rank == node_tag[0].node.rank && early->node_tag.data_tag == node_tag[0].data_tag);
 	STARPU_ASSERT(_starpu_mpi_early_data_handle_list_size(&hash->list) == 0);
 	STARPU_ASSERT(_starpu_mpi_early_data_handle_list_size(&hash->list) == 0);
+	_starpu_mpi_early_data_delete(early);
+	free(hash);
 
 
-	early = _starpu_mpi_early_data_find(&node_tag[0]);
-	STARPU_ASSERT(early->node_tag.node.comm == node_tag[0].node.comm && early->node_tag.node.rank == node_tag[0].node.rank && early->node_tag.data_tag == node_tag[0].data_tag);
+	early = _starpu_mpi_early_data_find(&node_tag[1]);
+	STARPU_ASSERT(early->node_tag.node.comm == node_tag[1].node.comm && early->node_tag.node.rank == node_tag[1].node.rank && early->node_tag.data_tag == node_tag[1].data_tag);
+	_starpu_mpi_early_data_delete(early);
 }
 }
 
 
 void early_request()
 void early_request()
@@ -100,6 +103,7 @@ void early_request()
 	early = _starpu_mpi_req_list_pop_front(&hash->list);
 	early = _starpu_mpi_req_list_pop_front(&hash->list);
 	STARPU_ASSERT(_starpu_mpi_req_list_size(&hash->list) == 0);
 	STARPU_ASSERT(_starpu_mpi_req_list_size(&hash->list) == 0);
 	STARPU_ASSERT(early->node_tag.data_tag == req[1].node_tag.data_tag && early->node_tag.node.rank == req[1].node_tag.node.rank && early->node_tag.node.comm == req[1].node_tag.node.comm);
 	STARPU_ASSERT(early->node_tag.data_tag == req[1].node_tag.data_tag && early->node_tag.node.rank == req[1].node_tag.node.rank && early->node_tag.node.comm == req[1].node_tag.node.comm);
+	free(hash);
 }
 }
 
 
 int main(int argc, char **argv)
 int main(int argc, char **argv)

+ 7 - 14
src/common/fxt.h

@@ -749,17 +749,7 @@ do {									\
 
 
 #define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid)				\
 #define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid)				\
 do {									\
 do {									\
-        const char *model_name = _starpu_job_get_model_name((job)), *name = _starpu_job_get_task_name((job));         \
-	if (name)                                                 \
-	{								\
-		/* we include the task name */			\
-		_STARPU_FUT_FULL_PROBE5STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid), 1, name); \
-		if (model_name)					\
-			_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, model_name); \
-	}								\
-	else {                                                          \
-		FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid), 0); \
-	}								\
+	FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid)); \
 	{								\
 	{								\
 		if ((job)->task->cl)					\
 		if ((job)->task->cl)					\
 		{							\
 		{							\
@@ -852,14 +842,17 @@ do {									\
 
 
 #define _STARPU_TRACE_TASK_NAME(job)					\
 #define _STARPU_TRACE_TASK_NAME(job)					\
 	do {								\
 	do {								\
-        const char *model_name = _starpu_job_get_task_name((job));                       \
-	if (model_name)					                        \
+        const char *model_name = _starpu_job_get_model_name((job));		\
+	const char *name = _starpu_job_get_task_name((job));			\
+	if (name)					                        \
 	{									\
 	{									\
-		_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, model_name);\
+		_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, name);\
 	}									\
 	}									\
 	else {									\
 	else {									\
 		_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, "unknown");\
 		_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, "unknown");\
 	}									\
 	}									\
+	if (model_name)					\
+		_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, model_name); \
 } while(0)
 } while(0)
 
 
 #define _STARPU_TRACE_TASK_COLOR(job)						\
 #define _STARPU_TRACE_TASK_COLOR(job)						\

+ 1 - 0
src/core/perfmodel/perfmodel_bus.c

@@ -2684,6 +2684,7 @@ static void write_bus_platform_file_content(int version)
 			"   <prop id=\"network/TCP%cgamma\" value=\"-1\"></prop>\n"
 			"   <prop id=\"network/TCP%cgamma\" value=\"-1\"></prop>\n"
 			"   <prop id=\"network/latency%cfactor\" value=\"1\"></prop>\n"
 			"   <prop id=\"network/latency%cfactor\" value=\"1\"></prop>\n"
 			"   <prop id=\"network/bandwidth%cfactor\" value=\"1\"></prop>\n"
 			"   <prop id=\"network/bandwidth%cfactor\" value=\"1\"></prop>\n"
+			"   <prop id=\"network/crosstraffic\" value=\"0\"></prop>\n"
 			"   <prop id=\"network/weight%cS\" value=\"0.0\"></prop>\n"
 			"   <prop id=\"network/weight%cS\" value=\"0.0\"></prop>\n"
 			" </config>\n"
 			" </config>\n"
 			" <AS  id=\"AS0\"  routing=\"Full\">\n"
 			" <AS  id=\"AS0\"  routing=\"Full\">\n"

+ 6 - 0
src/core/topology.c

@@ -878,6 +878,12 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 	_starpu_topology_filter(topology->hwtopology);
 	_starpu_topology_filter(topology->hwtopology);
 	hwloc_topology_load(topology->hwtopology);
 	hwloc_topology_load(topology->hwtopology);
 
 
+#ifdef HAVE_HWLOC_CPUKINDS_GET_NR
+	int nr_kinds = hwloc_cpukinds_get_nr(topology->hwtopology, 0);
+	if (nr_kinds > 1)
+		_STARPU_DISP("Warning: there are several kinds of CPU on this system. For now StarPU assumes all CPU are equal\n", strerror(errno));
+#endif
+
 	if (starpu_get_env_number_default("STARPU_WORKERS_GETBIND", 0))
 	if (starpu_get_env_number_default("STARPU_WORKERS_GETBIND", 0))
 	{
 	{
 		/* Respect the existing binding */
 		/* Respect the existing binding */

+ 3 - 0
src/core/workers.c

@@ -1599,6 +1599,9 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 
 
 	_starpu_load_bus_performance_files();
 	_starpu_load_bus_performance_files();
 
 
+	/* Note: nothing before here should be allocating anything, in case we
+	 * actually return ENODEV here */
+
 	/* Depending on whether we are a MP sink or not, we must build the
 	/* Depending on whether we are a MP sink or not, we must build the
 	 * topology with MP nodes or not. */
 	 * topology with MP nodes or not. */
 	ret = _starpu_build_topology(&_starpu_config, is_a_sink);
 	ret = _starpu_build_topology(&_starpu_config, is_a_sink);

+ 4 - 2
src/datawizard/coherency.h

@@ -178,7 +178,8 @@ struct _starpu_data_state
 	unsigned active:1;
 	unsigned active:1;
 	unsigned active_ro:1;
 	unsigned active_ro:1;
 
 
-	/** describe the state of the data in term of coherency */
+	/** describe the state of the data in term of coherency
+	 * This is execution-time state. */
 	struct _starpu_data_replicate per_node[STARPU_MAXNODES];
 	struct _starpu_data_replicate per_node[STARPU_MAXNODES];
 	struct _starpu_data_replicate *per_worker;
 	struct _starpu_data_replicate *per_worker;
 
 
@@ -209,7 +210,8 @@ struct _starpu_data_state
 
 
 	/** Does StarPU have to enforce some implicit data-dependencies ? */
 	/** Does StarPU have to enforce some implicit data-dependencies ? */
 	unsigned sequential_consistency:1;
 	unsigned sequential_consistency:1;
-	/** Is the data initialized, or a task is already submitted to initialize it */
+	/** Is the data initialized, or a task is already submitted to initialize it
+	 * This is submission-time initialization state. */
 	unsigned initialized:1;
 	unsigned initialized:1;
 	/** Whether we shall not ever write to this handle, thus allowing various optimizations */
 	/** Whether we shall not ever write to this handle, thus allowing various optimizations */
 	unsigned readonly:1;
 	unsigned readonly:1;

+ 75 - 3
src/datawizard/interfaces/data_interface.c

@@ -53,8 +53,6 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 void _starpu_data_interface_init(void)
 void _starpu_data_interface_init(void)
 {
 {
 	_starpu_spin_init(&registered_handles_lock);
 	_starpu_spin_init(&registered_handles_lock);
-	_id_to_ops_array_size = 20;
-	_STARPU_MALLOC(_id_to_ops_array, _id_to_ops_array_size * sizeof(struct starpu_data_interface_ops *));
 
 
 	/* Just for testing purpose */
 	/* Just for testing purpose */
 	if (starpu_get_env_number_default("STARPU_GLOBAL_ARBITER", 0) > 0)
 	if (starpu_get_env_number_default("STARPU_GLOBAL_ARBITER", 0) > 0)
@@ -72,6 +70,8 @@ void _starpu_data_interface_shutdown()
 
 
 	_starpu_spin_destroy(&registered_handles_lock);
 	_starpu_spin_destroy(&registered_handles_lock);
 	free(_id_to_ops_array);
 	free(_id_to_ops_array);
+	_id_to_ops_array = NULL;
+	_id_to_ops_array_size = 0;
 
 
 	HASH_ITER(hh, registered_handles, entry, tmp)
 	HASH_ITER(hh, registered_handles, entry, tmp)
 	{
 	{
@@ -573,7 +573,14 @@ void starpu_data_register(starpu_data_handle_t *handleptr, int home_node,
 	{
 	{
 		if ((unsigned)ops->interfaceid > _id_to_ops_array_size)
 		if ((unsigned)ops->interfaceid > _id_to_ops_array_size)
 		{
 		{
-			_id_to_ops_array_size *= 2;
+			if (!_id_to_ops_array_size)
+			{
+				_id_to_ops_array_size = 16;
+			}
+			else
+			{
+				_id_to_ops_array_size *= 2;
+			}
 			_STARPU_REALLOC(_id_to_ops_array, _id_to_ops_array_size * sizeof(struct starpu_data_interface_ops *));
 			_STARPU_REALLOC(_id_to_ops_array, _id_to_ops_array_size * sizeof(struct starpu_data_interface_ops *));
 		}
 		}
 		_id_to_ops_array[ops->interfaceid-STARPU_MAX_INTERFACE_ID] = ops;
 		_id_to_ops_array[ops->interfaceid-STARPU_MAX_INTERFACE_ID] = ops;
@@ -1063,6 +1070,9 @@ retry_busy:
 	STARPU_HG_ENABLE_CHECKING(handle->post_sync_tasks_cnt);
 	STARPU_HG_ENABLE_CHECKING(handle->post_sync_tasks_cnt);
 	STARPU_HG_ENABLE_CHECKING(handle->busy_count);
 	STARPU_HG_ENABLE_CHECKING(handle->busy_count);
 
 
+	_starpu_data_requester_prio_list_deinit(&handle->req_list);
+	_starpu_data_requester_prio_list_deinit(&handle->reduction_req_list);
+
 	if (handle->switch_cl)
 	if (handle->switch_cl)
 	{
 	{
 		free(handle->switch_cl->dyn_nodes);
 		free(handle->switch_cl->dyn_nodes);
@@ -1285,3 +1295,65 @@ unsigned starpu_data_get_coordinates_array(starpu_data_handle_t handle, unsigned
 
 
 	return dimensions;
 	return dimensions;
 }
 }
+
+void starpu_data_print(starpu_data_handle_t handle, unsigned node, FILE *stream)
+{
+	if (handle->ops == NULL)
+		fprintf(stream, "Undefined");
+	else
+	{
+		switch (handle->ops->interfaceid)
+		{
+		case(STARPU_MATRIX_INTERFACE_ID):
+			fprintf(stream, "Matrix");
+			break;
+		case(STARPU_BLOCK_INTERFACE_ID):
+			fprintf(stream, "Block");
+			break;
+		case(STARPU_VECTOR_INTERFACE_ID):
+			fprintf(stream, "Vector");
+			break;
+		case(STARPU_CSR_INTERFACE_ID):
+			fprintf(stream, "CSR");
+			break;
+		case(STARPU_BCSR_INTERFACE_ID):
+			fprintf(stream, "BCSR");
+			break;
+		case(STARPU_VARIABLE_INTERFACE_ID):
+			fprintf(stream, "Variable");
+			break;
+		case(STARPU_VOID_INTERFACE_ID):
+			fprintf(stream, "Void");
+			break;
+		case(STARPU_MULTIFORMAT_INTERFACE_ID):
+			fprintf(stream, "Multfiformat");
+			break;
+		case(STARPU_COO_INTERFACE_ID):
+			fprintf(stream, "COO");
+			break;
+		case(STARPU_TENSOR_INTERFACE_ID):
+			fprintf(stream, "Tensor");
+			break;
+		case(STARPU_UNKNOWN_INTERFACE_ID ):
+			fprintf(stream, "UNKNOWN");
+			break;
+		default:
+			fprintf(stream, "User interface with id %d", handle->ops->interfaceid);
+			break;
+		}
+	}
+	void *data_interface = NULL;
+	if (starpu_data_test_if_allocated_on_node(handle, node))
+		data_interface = starpu_data_get_interface_on_node(handle, node);
+	if (starpu_data_test_if_allocated_on_node(handle, handle->home_node))
+		data_interface = starpu_data_get_interface_on_node(handle, handle->home_node);
+	if (handle->ops && handle->ops->describe && data_interface)
+	{
+		char buffer[1024];
+		handle->ops->describe(data_interface, buffer, sizeof(buffer));
+		fprintf(stream, " %s\n", buffer);
+	}
+	else
+		fprintf(stream, "\n");
+
+}

+ 12 - 0
src/debug/traces/anim.c

@@ -80,6 +80,18 @@ void _starpu_fxt_component_new(uint64_t component, char *name)
 	COMPONENT_ADD(components, ptr, comp);
 	COMPONENT_ADD(components, ptr, comp);
 }
 }
 
 
+void _starpu_fxt_component_deinit(void)
+{
+	struct component *comp, *tmp;
+	HASH_ITER(hh, components, comp, tmp)
+	{
+		HASH_DEL(components, comp);
+		free(comp->children);
+		free(comp->name);
+		free(comp);
+	}
+}
+
 static void fxt_component_dump(FILE *file, struct component *comp, unsigned depth)
 static void fxt_component_dump(FILE *file, struct component *comp, unsigned depth)
 {
 {
 	unsigned i;
 	unsigned i;

+ 55 - 32
src/debug/traces/starpu_fxt.c

@@ -198,15 +198,9 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 		goto out;
 		goto out;
 
 
 	if (task->name)
 	if (task->name)
-	{
 		fprintf(tasks_file, "Name: %s\n", task->name);
 		fprintf(tasks_file, "Name: %s\n", task->name);
-		free(task->name);
-	}
 	if (task->model_name)
 	if (task->model_name)
-	{
 		fprintf(tasks_file, "Model: %s\n", task->model_name);
 		fprintf(tasks_file, "Model: %s\n", task->model_name);
-		free(task->model_name);
-	}
 	fprintf(tasks_file, "JobId: %s%lu\n", prefix, task->job_id);
 	fprintf(tasks_file, "JobId: %s%lu\n", prefix, task->job_id);
 	if (task->submit_order)
 	if (task->submit_order)
 		fprintf(tasks_file, "SubmitOrder: %lu\n", task->submit_order);
 		fprintf(tasks_file, "SubmitOrder: %lu\n", task->submit_order);
@@ -217,18 +211,13 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 		for (i = 0; i < task->ndeps; i++)
 		for (i = 0; i < task->ndeps; i++)
 			fprintf(tasks_file, " %s%lu", prefix, task->dependencies[i]);
 			fprintf(tasks_file, " %s%lu", prefix, task->dependencies[i]);
 		fprintf(tasks_file, "\n");
 		fprintf(tasks_file, "\n");
-		free(task->dependencies);
 	}
 	}
 	if (task->dep_labels)
 	if (task->dep_labels)
 	{
 	{
 		fprintf(tasks_file, "DepLabels:");
 		fprintf(tasks_file, "DepLabels:");
 		for (i = 0; i < task->ndeps; i++)
 		for (i = 0; i < task->ndeps; i++)
-		{
 			fprintf(tasks_file, " %s", task->dep_labels[i]);
 			fprintf(tasks_file, " %s", task->dep_labels[i]);
-			free(task->dep_labels[i]);
-		}
 		fprintf(tasks_file, "\n");
 		fprintf(tasks_file, "\n");
-		free(task->dep_labels);
 	}
 	}
 	fprintf(tasks_file, "Tag: %"PRIx64"\n", task->tag);
 	fprintf(tasks_file, "Tag: %"PRIx64"\n", task->tag);
 	if (task->workerid >= 0)
 	if (task->workerid >= 0)
@@ -256,10 +245,7 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 		fprintf(tasks_file, "\n");
 		fprintf(tasks_file, "\n");
 	}
 	}
 	if (task->parameters)
 	if (task->parameters)
-	{
 		fprintf(tasks_file, "Parameters: %s\n", task->parameters);
 		fprintf(tasks_file, "Parameters: %s\n", task->parameters);
-		free(task->parameters);
-	}
 	if (task->data)
 	if (task->data)
 	{
 	{
 		fprintf(tasks_file, "Handles:");
 		fprintf(tasks_file, "Handles:");
@@ -279,12 +265,22 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 		for (i = 0; i < task->ndata; i++)
 		for (i = 0; i < task->ndata; i++)
 			fprintf(tasks_file, " %lu", task->data[i].size);
 			fprintf(tasks_file, " %lu", task->data[i].size);
 		fprintf(tasks_file, "\n");
 		fprintf(tasks_file, "\n");
-		free(task->data);
 	}
 	}
 	fprintf(tasks_file, "MPIRank: %d\n", task->mpi_rank);
 	fprintf(tasks_file, "MPIRank: %d\n", task->mpi_rank);
 	fprintf(tasks_file, "\n");
 	fprintf(tasks_file, "\n");
 
 
 out:
 out:
+	free(task->name);
+	free(task->model_name);
+	free(task->dependencies);
+	if (task->dep_labels)
+	{
+		for (i = 0; i < task->ndeps; i++)
+			free(task->dep_labels[i]);
+		free(task->dep_labels);
+	}
+	free(task->parameters);
+	free(task->data);
 	HASH_DEL(tasks_info, task);
 	HASH_DEL(tasks_info, task);
 	free(task);
 	free(task);
 }
 }
@@ -363,18 +359,12 @@ static void data_dump(struct data_info *data)
 	if (data->mpi_rank >= 0)
 	if (data->mpi_rank >= 0)
 		fprintf(data_file, "MPIRank: %d\n", data->mpi_rank);
 		fprintf(data_file, "MPIRank: %d\n", data->mpi_rank);
 	if (data->name)
 	if (data->name)
-	{
 		fprintf(data_file, "Name: %s\n", data->name);
 		fprintf(data_file, "Name: %s\n", data->name);
-		free(data->name);
-	}
 	fprintf(data_file, "Size: %lu\n", (unsigned long) data->size);
 	fprintf(data_file, "Size: %lu\n", (unsigned long) data->size);
 	if (data->max_size != -1)
 	if (data->max_size != -1)
 		fprintf(data_file, "MaxSize: %lu\n", (unsigned long) data->max_size);
 		fprintf(data_file, "MaxSize: %lu\n", (unsigned long) data->max_size);
 	if (data->description)
 	if (data->description)
-	{
 		fprintf(data_file, "Description: %s\n", data->description);
 		fprintf(data_file, "Description: %s\n", data->description);
-		free(data->description);
-	}
 	if (data->dimensions)
 	if (data->dimensions)
 	{
 	{
 		unsigned i;
 		unsigned i;
@@ -389,6 +379,9 @@ static void data_dump(struct data_info *data)
 		fprintf(data_file, "MPITag: %ld\n", data->mpi_tag);
 		fprintf(data_file, "MPITag: %ld\n", data->mpi_tag);
 	fprintf(data_file, "\n");
 	fprintf(data_file, "\n");
 out:
 out:
+	free(data->dims);
+	free(data->description);
+	free(data->name);
 	HASH_DEL(data_info, data);
 	HASH_DEL(data_info, data);
 	free(data);
 	free(data);
 }
 }
@@ -669,6 +662,16 @@ static int register_thread(unsigned long nodeid, unsigned long tid, int workerid
 	return 1;
 	return 1;
 }
 }
 
 
+static void free_worker_ids(void)
+{
+	struct worker_entry *entry, *tmp;
+	HASH_ITER(hh, worker_ids, entry, tmp)
+	{
+		HASH_DEL(worker_ids, entry);
+		free(entry);
+	}
+}
+
 static int register_worker_id(unsigned long nodeid, unsigned long tid, int workerid, int sync)
 static int register_worker_id(unsigned long nodeid, unsigned long tid, int workerid, int sync)
 {
 {
 	nworkers++;
 	nworkers++;
@@ -1540,8 +1543,9 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 
 
 	if (worker < 0) return;
 	if (worker < 0) return;
 
 
-	unsigned long has_name = ev->param[4];
-	char *name = has_name?get_fxt_string(ev, 5):"unknown";
+	struct task_info *task = get_task(ev->param[0], options->file_rank);
+	char *name = task->name;
+	create_paje_state_if_not_found(name, task->color, options);
 
 
 	snprintf(_starpu_last_codelet_symbol[worker], sizeof(_starpu_last_codelet_symbol[worker]), "%.*s", (int) sizeof(_starpu_last_codelet_symbol[worker])-1, name);
 	snprintf(_starpu_last_codelet_symbol[worker], sizeof(_starpu_last_codelet_symbol[worker]), "%.*s", (int) sizeof(_starpu_last_codelet_symbol[worker])-1, name);
 	_starpu_last_codelet_symbol[worker][sizeof(_starpu_last_codelet_symbol[worker])-1] = 0;
 	_starpu_last_codelet_symbol[worker][sizeof(_starpu_last_codelet_symbol[worker])-1] = 0;
@@ -1551,12 +1555,8 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 	double last_start_codelet_time = last_codelet_start[worker];
 	double last_start_codelet_time = last_codelet_start[worker];
 	last_codelet_start[worker] = start_codelet_time;
 	last_codelet_start[worker] = start_codelet_time;
 
 
-	struct task_info *task = get_task(ev->param[0], options->file_rank);
-	create_paje_state_if_not_found(name, task->color, options);
-
 	task->start_time = start_codelet_time;
 	task->start_time = start_codelet_time;
 	task->workerid = worker;
 	task->workerid = worker;
-	task->name = strdup(name);
 	task->node = node;
 	task->node = node;
 
 
 	if (out_paje_file)
 	if (out_paje_file)
@@ -4251,6 +4251,22 @@ void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *op
 	for (i = 0; i < STARPU_NMAXWORKERS; i++)
 	for (i = 0; i < STARPU_NMAXWORKERS; i++)
 		free(options->worker_archtypes[i].devices);
 		free(options->worker_archtypes[i].devices);
 
 
+	struct _starpu_symbol_name *itor, *next;
+	for (itor = _starpu_symbol_name_list_begin(&symbol_list);
+		itor != _starpu_symbol_name_list_end(&symbol_list);
+		itor = next)
+	{
+		next = _starpu_symbol_name_list_next(itor);
+
+		_starpu_symbol_name_list_erase(&symbol_list, itor);
+		free(itor->name);
+		_starpu_symbol_name_delete(itor);
+	}
+
+	_starpu_fxt_component_deinit();
+
+	free_worker_ids();
+
 #ifdef HAVE_FXT_BLOCKEV_LEAVE
 #ifdef HAVE_FXT_BLOCKEV_LEAVE
 	fxt_blockev_leave(block);
 	fxt_blockev_leave(block);
 #endif
 #endif
@@ -4877,10 +4893,10 @@ struct starpu_data_trace_kernel
 
 
 static FILE *codelet_list;
 static FILE *codelet_list;
 
 
-static void write_task(char *dir, struct parse_task pt)
+static void write_task(char *dir, struct parse_task *pt)
 {
 {
 	struct starpu_data_trace_kernel *kernel;
 	struct starpu_data_trace_kernel *kernel;
-	char *codelet_name = pt.codelet_name;
+	char *codelet_name = pt->codelet_name;
 	HASH_FIND_STR(kernels, codelet_name, kernel);
 	HASH_FIND_STR(kernels, codelet_name, kernel);
 	//fprintf(stderr, "%p %p %s\n", kernel, kernels, codelet_name);
 	//fprintf(stderr, "%p %p %s\n", kernel, kernels, codelet_name);
 	if(kernel == NULL)
 	if(kernel == NULL)
@@ -4898,8 +4914,8 @@ static void write_task(char *dir, struct parse_task pt)
 		HASH_ADD_STR(kernels, name, kernel);
 		HASH_ADD_STR(kernels, name, kernel);
 		fprintf(codelet_list, "%s\n", codelet_name);
 		fprintf(codelet_list, "%s\n", codelet_name);
 	}
 	}
-	double time = pt.exec_time * NANO_SEC_TO_MILI_SEC;
-	fprintf(kernel->file, "%lf %u %u\n", time, pt.data_total, pt.workerid);
+	double time = pt->exec_time * NANO_SEC_TO_MILI_SEC;
+	fprintf(kernel->file, "%lf %u %u\n", time, pt->data_total, pt->workerid);
 }
 }
 
 
 void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir)
 void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir)
@@ -4957,6 +4973,7 @@ void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir)
 			tasks[workerid].workerid = (unsigned)workerid;
 			tasks[workerid].workerid = (unsigned)workerid;
 			tasks[workerid].exec_time = ev.time;
 			tasks[workerid].exec_time = ev.time;
 			has_name = ev.param[4];
 			has_name = ev.param[4];
+			free(tasks[workerid].codelet_name);
 			tasks[workerid].codelet_name = strdup(has_name ? get_fxt_string(&ev, 5): "unknown");
 			tasks[workerid].codelet_name = strdup(has_name ? get_fxt_string(&ev, 5): "unknown");
 			//fprintf(stderr, "start codelet :[%d][%s]\n", workerid, tasks[workerid].codelet_name);
 			//fprintf(stderr, "start codelet :[%d][%s]\n", workerid, tasks[workerid].codelet_name);
 			break;
 			break;
@@ -4965,7 +4982,7 @@ void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir)
 			workerid = ev.param[3];
 			workerid = ev.param[3];
 			assert(workerid != -1);
 			assert(workerid != -1);
 			tasks[workerid].exec_time = ev.time - tasks[workerid].exec_time;
 			tasks[workerid].exec_time = ev.time - tasks[workerid].exec_time;
-			write_task(dir, tasks[workerid]);
+			write_task(dir, &tasks[workerid]);
 			break;
 			break;
 
 
 		case _STARPU_FUT_DATA_LOAD:
 		case _STARPU_FUT_DATA_LOAD:
@@ -5002,6 +5019,12 @@ void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir)
 		exit(-1);
 		exit(-1);
 	}
 	}
 
 
+	unsigned i;
+	for (i = 0; i < STARPU_NMAXWORKERS; i++)
+		free(tasks[i].codelet_name);
+
+	free_worker_ids();
+
 	struct starpu_data_trace_kernel *kernel=NULL, *tmp=NULL;
 	struct starpu_data_trace_kernel *kernel=NULL, *tmp=NULL;
 	HASH_ITER(hh, kernels, kernel, tmp)
 	HASH_ITER(hh, kernels, kernel, tmp)
 	{
 	{

+ 1 - 0
src/debug/traces/starpu_fxt.h

@@ -82,6 +82,7 @@ void _starpu_fxt_component_push(FILE *output, struct starpu_fxt_options *options
 void _starpu_fxt_component_pull(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task, unsigned prio);
 void _starpu_fxt_component_pull(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task, unsigned prio);
 void _starpu_fxt_component_dump(FILE *output);
 void _starpu_fxt_component_dump(FILE *output);
 void _starpu_fxt_component_finish(FILE *output);
 void _starpu_fxt_component_finish(FILE *output);
+void _starpu_fxt_component_deinit(void);
 
 
 #endif // STARPU_USE_FXT
 #endif // STARPU_USE_FXT
 
 

+ 8 - 4
src/drivers/mp_common/source_common.c

@@ -788,7 +788,8 @@ int _starpu_src_common_locate_file(char *located_file_name, size_t len,
 	{
 	{
 		if (access(env_file_name, R_OK) == 0)
 		if (access(env_file_name, R_OK) == 0)
 		{
 		{
-			strncpy(located_file_name, env_file_name, len);
+			strncpy(located_file_name, env_file_name, len-1);
+			located_file_name[len-1] = '\0';
 			return 0;
 			return 0;
 		}
 		}
 		else if(env_mic_path != NULL)
 		else if(env_mic_path != NULL)
@@ -802,7 +803,8 @@ int _starpu_src_common_locate_file(char *located_file_name, size_t len,
 	{
 	{
 		if (access(config_file_name, R_OK) == 0)
 		if (access(config_file_name, R_OK) == 0)
 		{
 		{
-			strncpy(located_file_name, config_file_name, len);
+			strncpy(located_file_name, config_file_name, len-1);
+			located_file_name[len-1] = '\0';
 			return 0;
 			return 0;
 		}
 		}
 		else if (env_mic_path != NULL)
 		else if (env_mic_path != NULL)
@@ -820,7 +822,8 @@ int _starpu_src_common_locate_file(char *located_file_name, size_t len,
 		if (env_mic_path != NULL)
 		if (env_mic_path != NULL)
 		{
 		{
 			char actual_cpy[1024];
 			char actual_cpy[1024];
-			strncpy(actual_cpy, actual_file_name, sizeof(actual_cpy));
+			strncpy(actual_cpy, actual_file_name, sizeof(actual_cpy)-1);
+			actual_cpy[sizeof(actual_cpy)-1] = '\0';
 
 
 			char *last =  strrchr(actual_cpy, '/');
 			char *last =  strrchr(actual_cpy, '/');
 			while (last != NULL)
 			while (last != NULL)
@@ -831,7 +834,8 @@ int _starpu_src_common_locate_file(char *located_file_name, size_t len,
 
 
 				if (access(tmp, R_OK) == 0)
 				if (access(tmp, R_OK) == 0)
 				{
 				{
-					strncpy(located_file_name, tmp, len);
+					strncpy(located_file_name, tmp, len-1);
+					located_file_name[len-1] = '\0';
 					return 0;
 					return 0;
 				}
 				}
 
 

+ 22 - 1
src/sched_policies/component_eager.c

@@ -24,6 +24,7 @@ struct _starpu_eager_data
 {
 {
 	struct starpu_sched_component *target;
 	struct starpu_sched_component *target;
 	starpu_pthread_mutex_t scheduling_mutex;
 	starpu_pthread_mutex_t scheduling_mutex;
+	int ntasks;
 };
 };
 
 
 static int eager_push_task(struct starpu_sched_component * component, struct starpu_task * task)
 static int eager_push_task(struct starpu_sched_component * component, struct starpu_task * task)
@@ -34,6 +35,12 @@ static int eager_push_task(struct starpu_sched_component * component, struct sta
 	struct _starpu_eager_data *d = component->data;
 	struct _starpu_eager_data *d = component->data;
 	struct starpu_sched_component *target;
 	struct starpu_sched_component *target;
 
 
+	if (d->ntasks == 0)
+		/* We have already pushed a task down */
+		return 1;
+	if (d->ntasks > 0)
+		d->ntasks--;
+
 	if ((target = d->target))
 	if ((target = d->target))
 	{
 	{
 		/* target told us we could push to it, try to */
 		/* target told us we could push to it, try to */
@@ -99,7 +106,7 @@ static int eager_push_task(struct starpu_sched_component * component, struct sta
 	return 1;
 	return 1;
 }
 }
 
 
-/* Note: we can't use starpu_sched_component_pump_to because if a fifo below
+/* Note: we can't use starpu_sched_component_pump_to ourself because if a fifo below
  * refuses a task, we have no way to push it back to a fifo above. */
  * refuses a task, we have no way to push it back to a fifo above. */
 static int eager_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to)
 static int eager_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to)
 {
 {
@@ -108,12 +115,24 @@ static int eager_can_push(struct starpu_sched_component * component, struct star
 	STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex);
 	STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex);
 	/* Target flow of tasks to this child */
 	/* Target flow of tasks to this child */
 	d->target = to;
 	d->target = to;
+	/* But make pump above push only one task */
+	d->ntasks = 1;
 	success = starpu_sched_component_can_push(component, to);
 	success = starpu_sched_component_can_push(component, to);
 	d->target = NULL;
 	d->target = NULL;
+	d->ntasks = -1;
 	STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex);
 	STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex);
 	return success;
 	return success;
 }
 }
 
 
+static struct starpu_task *eager_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to)
+{
+	/* We can't directly pull (in case the obtained task does not match
+	 * the constraints of `to'), but we can try to push, and components
+	 * below will cope with it */
+	eager_can_push(component, to);
+	return NULL;
+}
+
 static void eager_deinit_data(struct starpu_sched_component *component)
 static void eager_deinit_data(struct starpu_sched_component *component)
 {
 {
 	STARPU_ASSERT(starpu_sched_component_is_eager(component));
 	STARPU_ASSERT(starpu_sched_component_is_eager(component));
@@ -134,10 +153,12 @@ struct starpu_sched_component * starpu_sched_component_eager_create(struct starp
 	struct _starpu_eager_data *data;
 	struct _starpu_eager_data *data;
 	_STARPU_MALLOC(data, sizeof(*data));
 	_STARPU_MALLOC(data, sizeof(*data));
 	data->target = NULL;
 	data->target = NULL;
+	data->ntasks = -1;
 	STARPU_PTHREAD_MUTEX_INIT(&data->scheduling_mutex, NULL);
 	STARPU_PTHREAD_MUTEX_INIT(&data->scheduling_mutex, NULL);
 
 
 	component->data = data;
 	component->data = data;
 	component->push_task = eager_push_task;
 	component->push_task = eager_push_task;
+	component->pull_task = eager_pull_task;
 	component->can_push = eager_can_push;
 	component->can_push = eager_can_push;
 	component->can_pull = starpu_sched_component_can_pull_all;
 	component->can_pull = starpu_sched_component_can_pull_all;
 	component->deinit_data = eager_deinit_data;
 	component->deinit_data = eager_deinit_data;

+ 2 - 4
src/sched_policies/component_fifo.c

@@ -92,9 +92,8 @@ static int fifo_push_local_task(struct starpu_sched_component * component, struc
 	const double now = starpu_timing_now();
 	const double now = starpu_timing_now();
 	STARPU_COMPONENT_MUTEX_LOCK(mutex);
 	STARPU_COMPONENT_MUTEX_LOCK(mutex);
 
 
-	if (data->ntasks_threshold != 0 && queue->ntasks >= data->ntasks_threshold)
+	if (!is_pushback && data->ntasks_threshold != 0 && queue->ntasks >= data->ntasks_threshold)
 	{
 	{
-		STARPU_ASSERT(!is_pushback);
 		ret = 1;
 		ret = 1;
 		STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
 		STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
 	}
 	}
@@ -106,7 +105,7 @@ static int fifo_push_local_task(struct starpu_sched_component * component, struc
 		else
 		else
 			exp_len = queue->exp_len;
 			exp_len = queue->exp_len;
 
 
-		if (data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold)
+		if (!is_pushback && data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold)
 		{
 		{
 			static int warned;
 			static int warned;
 			if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned)
 			if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned)
@@ -114,7 +113,6 @@ static int fifo_push_local_task(struct starpu_sched_component * component, struc
 				_STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold);
 				_STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold);
 				warned = 1;
 				warned = 1;
 			}
 			}
-			STARPU_ASSERT(!is_pushback);
 			ret = 1;
 			ret = 1;
 			STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
 			STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
 		}
 		}

+ 2 - 4
src/sched_policies/component_prio.c

@@ -114,9 +114,8 @@ static int prio_push_local_task(struct starpu_sched_component * component, struc
 
 
 	double exp_len = NAN;
 	double exp_len = NAN;
 
 
-	if (data->ntasks_threshold != 0 && queue->ntasks >= data->ntasks_threshold)
+	if (!is_pushback && data->ntasks_threshold != 0 && queue->ntasks >= data->ntasks_threshold)
 	{
 	{
-		STARPU_ASSERT(!is_pushback);
 		ret = 1;
 		ret = 1;
 		STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
 		STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
 	}
 	}
@@ -127,7 +126,7 @@ static int prio_push_local_task(struct starpu_sched_component * component, struc
 		else
 		else
 			exp_len = queue->exp_len;
 			exp_len = queue->exp_len;
 
 
-		if (data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold)
+		if (!is_pushback && data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold)
 		{
 		{
 			static int warned;
 			static int warned;
 			if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned)
 			if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned)
@@ -135,7 +134,6 @@ static int prio_push_local_task(struct starpu_sched_component * component, struc
 				_STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold);
 				_STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold);
 				warned = 1;
 				warned = 1;
 			}
 			}
-			STARPU_ASSERT(!is_pushback);
 			ret = 1;
 			ret = 1;
 			STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
 			STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
 		}
 		}

+ 7 - 0
src/sched_policies/component_random.c

@@ -97,6 +97,12 @@ static int random_push_task(struct starpu_sched_component * component, struct st
 	return ret_val;
 	return ret_val;
 }
 }
 
 
+static struct starpu_task *random_pull_task(struct starpu_sched_component * from, struct starpu_sched_component *to)
+{
+	starpu_sched_component_can_push(from, to);
+	return NULL;
+}
+
 int starpu_sched_component_is_random(struct starpu_sched_component *component)
 int starpu_sched_component_is_random(struct starpu_sched_component *component)
 {
 {
 	return component->push_task == random_push_task;
 	return component->push_task == random_push_task;
@@ -107,5 +113,6 @@ struct starpu_sched_component * starpu_sched_component_random_create(struct star
 	(void)arg;
 	(void)arg;
 	struct starpu_sched_component * component = starpu_sched_component_create(tree, "random");
 	struct starpu_sched_component * component = starpu_sched_component_create(tree, "random");
 	component->push_task = random_push_task;
 	component->push_task = random_push_task;
+	component->pull_task = random_pull_task;
 	return component;
 	return component;
 }
 }

+ 1 - 1
tests/microbenchs/parallel_independent_heterogeneous_tasks.sh

@@ -16,6 +16,6 @@
 #
 #
 source $(dirname $0)/microbench.sh
 source $(dirname $0)/microbench.sh
 
 
-XFAIL="modular-eager-prio modular-eager-prefetching modular-prio-prefetching modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test"
+XFAIL="modular-eager-prio modular-eager-prefetching modular-prio-prefetching modular-random modular-random-prio modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test"
 
 
 test_scheds parallel_independent_heterogeneous_tasks
 test_scheds parallel_independent_heterogeneous_tasks

+ 1 - 1
tests/microbenchs/parallel_independent_homogeneous_tasks.sh

@@ -16,6 +16,6 @@
 #
 #
 source $(dirname $0)/microbench.sh
 source $(dirname $0)/microbench.sh
 
 
-XFAIL="modular-eager-prefetching modular-prio-prefetching modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test"
+XFAIL="modular-eager-prefetching modular-prio-prefetching modular-random modular-random-prio modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test"
 
 
 test_scheds parallel_independent_homogeneous_tasks
 test_scheds parallel_independent_homogeneous_tasks

+ 1 - 0
tests/model-checking/platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/dev/lsan/suppressions

@@ -33,3 +33,4 @@ leak:hwloc_plugins_exit
 
 
 # papi
 # papi
 leak:_pe_libpfm4_init
 leak:_pe_libpfm4_init
+leak:allocate_thread

+ 8 - 0
tools/dev/valgrind/fxt.suppr

@@ -48,3 +48,11 @@
    fun:fxt_setinfos
    fun:fxt_setinfos
    fun:fut_setup
    fun:fut_setup
 }
 }
+
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Cond
+   fun:fxt_get_cpu_info
+   fun:fxt_setinfos
+   fun:fut_setup
+}

+ 9 - 0
tools/dev/valgrind/papi.suppr

@@ -22,3 +22,12 @@
    fun:_pe_libpfm4_init
    fun:_pe_libpfm4_init
    ...
    ...
 }
 }
+
+# This happens in multithreaded_init: papi does not support getting initialized in one thread and shut down in another thread.
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   ...
+   fun:allocate_thread
+   ...
+}

+ 4 - 1
tools/gdbinit

@@ -381,7 +381,10 @@ define starpu-print-data
   if $data->ops->interfaceid == 8
   if $data->ops->interfaceid == 8
     printf "COO\n"
     printf "COO\n"
   end
   end
-  if $data->ops->interfaceid > 8
+  if $data->ops->interfaceid == 9
+    printf "Tensor\n"
+  end
+  if $data->ops->interfaceid > 9
     printf "Interface id %d\n", $data->ops->interfaceid
     printf "Interface id %d\n", $data->ops->interfaceid
   end
   end
   printf "Home node %d\n", $data->home_node
   printf "Home node %d\n", $data->home_node

+ 1 - 0
tools/perfmodels/sampling/bus/attila.platform.v4.xml

@@ -8,6 +8,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/perfmodels/sampling/bus/attila.platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" power="1"/>
    <host id="MAIN" power="1"/>

+ 1 - 0
tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" power="1"/>
    <host id="MAIN" power="1"/>

+ 1 - 0
tools/perfmodels/sampling/bus/hannibal.platform.v4.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/perfmodels/sampling/bus/hannibal.platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" power="1"/>
    <host id="MAIN" power="1"/>

+ 1 - 0
tools/perfmodels/sampling/bus/idgraf.platform.v4.xml

@@ -8,6 +8,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/perfmodels/sampling/bus/idgraf.platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" power="1"/>
    <host id="MAIN" power="1"/>

+ 1 - 0
tools/perfmodels/sampling/bus/mirage.platform.v4.xml

@@ -8,6 +8,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/perfmodels/sampling/bus/mirage.platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" power="1"/>
    <host id="MAIN" power="1"/>

+ 1 - 0
tools/perfmodels/sampling/bus/sirocco.platform.v4.xml

@@ -8,6 +8,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/perfmodels/sampling/bus/sirocco.platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  </config>
  <AS  id="AS0"  routing="Full">
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" power="1"/>
    <host id="MAIN" power="1"/>

+ 1 - 0
tools/starpu_fxt_data_trace.c

@@ -186,5 +186,6 @@ int main(int argc, char **argv)
 	starpu_fxt_write_data_trace_in_dir(argv[1+pos], directory);
 	starpu_fxt_write_data_trace_in_dir(argv[1+pos], directory);
 	write_gp(directory, argc - (2 + pos), argv + 2 + pos);
 	write_gp(directory, argc - (2 + pos), argv + 2 + pos);
 	starpu_perfmodel_free_sampling();
 	starpu_perfmodel_free_sampling();
+	free(directory);
 	return 0;
 	return 0;
 }
 }

+ 14 - 0
tools/starpu_fxt_stats.c

@@ -184,6 +184,20 @@ int main(int argc, char **argv)
 		}
 		}
 	}
 	}
 
 
+#ifdef HAVE_FXT_BLOCKEV_LEAVE
+	fxt_blockev_leave(block);
+#endif
+
+#ifdef HAVE_FXT_CLOSE
+	fxt_close(fut);
+#else
+	if (close(fd_in))
+	{
+	        perror("close failed :");
+	        exit(-1);
+	}
+#endif
+
 	fprintf(fd_out, "Start : start time %e end time %e length %e\n", start_time, end_time, end_time - start_time);
 	fprintf(fd_out, "Start : start time %e end time %e length %e\n", start_time, end_time, end_time - start_time);
 
 
 	unsigned src, dst;
 	unsigned src, dst;

+ 1 - 0
tools/starpu_perfmodel_plot.c

@@ -492,6 +492,7 @@ static void dump_data_file(FILE *data_file, struct _perfmodel_plot_options *opti
 		}
 		}
 		free(tmp);
 		free(tmp);
 	}
 	}
+	free(options->dumped_codelets);
 }
 }
 #endif
 #endif
 
 

+ 2 - 0
tools/starpu_perfmodel_recdump.c

@@ -386,8 +386,10 @@ int main(int argc, char **argv)
 					l = ltmp;
 					l = ltmp;
 				}
 				}
 
 
+				starpu_perfmodel_unload_model(&model->model);
 				free(model->name);
 				free(model->name);
 				HASH_DEL(models, model);
 				HASH_DEL(models, model);
+				free(model);
 			}
 			}
 		}
 		}
 		fclose(input);
 		fclose(input);

+ 3 - 1
tools/starpu_tasks_rec_complete.c

@@ -192,8 +192,10 @@ int main(int argc, char *argv[])
 	starpu_shutdown();
 	starpu_shutdown();
 	HASH_ITER(hh, models, model, tmp)
 	HASH_ITER(hh, models, model, tmp)
 	{
 	{
-		free(model->name);
 		HASH_DEL(models, model);
 		HASH_DEL(models, model);
+		starpu_perfmodel_unload_model(&model->model);
+		free(model->name);
+		free(model);
 	}
 	}
 	return 0;
 	return 0;
 }
 }