Просмотр исходного кода

Merge branch 'master' into fpga

Nathalie Furmento лет назад: 5
Родитель
Сommit
16efa503b9
55 измененных файлов с 346 добавлено и 99 удалено
  1. 8 3
      configure.ac
  2. 8 4
      examples/interface/complex.c
  3. 10 1
      examples/interface/complex_interface.c
  4. 1 1
      include/starpu_bitmap.h
  5. 12 6
      include/starpu_data_interfaces.h
  6. 1 1
      include/starpu_fxt.h
  7. 3 3
      include/starpu_openmp.h
  8. 3 3
      include/starpu_perf_monitoring.h
  9. 1 1
      include/starpu_profiling.h
  10. 1 1
      include/starpu_scheduler.h
  11. 7 1
      mpi/examples/benchs/sendrecv_bench.c
  12. 6 0
      mpi/src/mpi/starpu_mpi_early_data.c
  13. 2 0
      mpi/src/mpi/starpu_mpi_early_data.h
  14. 1 0
      mpi/src/mpi/starpu_mpi_early_request.h
  15. 2 2
      mpi/src/mpi/starpu_mpi_mpi.c
  16. 9 0
      mpi/tests/driver.c
  17. 8 4
      mpi/tests/early_stuff.c
  18. 7 14
      src/common/fxt.h
  19. 1 0
      src/core/perfmodel/perfmodel_bus.c
  20. 6 0
      src/core/topology.c
  21. 3 0
      src/core/workers.c
  22. 4 2
      src/datawizard/coherency.h
  23. 75 3
      src/datawizard/interfaces/data_interface.c
  24. 12 0
      src/debug/traces/anim.c
  25. 55 32
      src/debug/traces/starpu_fxt.c
  26. 1 0
      src/debug/traces/starpu_fxt.h
  27. 8 4
      src/drivers/mp_common/source_common.c
  28. 22 1
      src/sched_policies/component_eager.c
  29. 2 4
      src/sched_policies/component_fifo.c
  30. 2 4
      src/sched_policies/component_prio.c
  31. 7 0
      src/sched_policies/component_random.c
  32. 1 1
      tests/microbenchs/parallel_independent_heterogeneous_tasks.sh
  33. 1 1
      tests/microbenchs/parallel_independent_homogeneous_tasks.sh
  34. 1 0
      tests/model-checking/platform.xml
  35. 1 0
      tools/dev/lsan/suppressions
  36. 8 0
      tools/dev/valgrind/fxt.suppr
  37. 9 0
      tools/dev/valgrind/papi.suppr
  38. 4 1
      tools/gdbinit
  39. 1 0
      tools/perfmodels/sampling/bus/attila.platform.v4.xml
  40. 1 0
      tools/perfmodels/sampling/bus/attila.platform.xml
  41. 1 0
      tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml
  42. 1 0
      tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml
  43. 1 0
      tools/perfmodels/sampling/bus/hannibal.platform.v4.xml
  44. 1 0
      tools/perfmodels/sampling/bus/hannibal.platform.xml
  45. 1 0
      tools/perfmodels/sampling/bus/idgraf.platform.v4.xml
  46. 1 0
      tools/perfmodels/sampling/bus/idgraf.platform.xml
  47. 1 0
      tools/perfmodels/sampling/bus/mirage.platform.v4.xml
  48. 1 0
      tools/perfmodels/sampling/bus/mirage.platform.xml
  49. 1 0
      tools/perfmodels/sampling/bus/sirocco.platform.v4.xml
  50. 1 0
      tools/perfmodels/sampling/bus/sirocco.platform.xml
  51. 1 0
      tools/starpu_fxt_data_trace.c
  52. 14 0
      tools/starpu_fxt_stats.c
  53. 1 0
      tools/starpu_perfmodel_plot.c
  54. 2 0
      tools/starpu_perfmodel_recdump.c
  55. 3 1
      tools/starpu_tasks_rec_complete.c

+ 8 - 3
configure.ac

@@ -2055,6 +2055,10 @@ if test x$enable_debug = xyes; then
 	FCFLAGS="$FCFLAGS -O0"
 	enable_spinlock_check=yes
 	if test x$GCC = xyes; then
+		CFLAGS="$CFLAGS -Og"
+		CXXFLAGS="$CXXFLAGS -Og"
+		FFLAGS="$FFLAGS -Og"
+		FCFLAGS="$FCFLAGS -Og"
 		if test x$starpu_windows != xyes ; then
 			if test x$enable_fstack_protector_all = xyes ; then
 			   CFLAGS="$CFLAGS -fstack-protector-all"
@@ -3228,7 +3232,7 @@ AM_CONDITIONAL(STARPU_BUILD_STARPUFFT_EXAMPLES, [test x$enable_starpufft_example
 ##########################################
 
 have_valid_hwloc=no
-SAVED_LDFLAGS="${LDFLAGS}"
+SAVED_LIBS="${LIBS}"
 SAVED_CPPFLAGS="${CPPFLAGS}"
 SAVED_PKG_CONFIG_PATH="$PKG_CONFIG_PATH"
 AC_ARG_WITH([hwloc],
@@ -3273,7 +3277,7 @@ AS_IF([test "$have_valid_hwloc" = "no" -a "$use_hwloc" != "no"],
       [AC_MSG_ERROR([libhwloc or pkg-config was not found on your system. If the target machine is hyperthreaded the performance may be impacted a lot.  It is strongly recommended to install libhwloc and pkg-config. However, if you really want to use StarPU without enabling libhwloc, please restart configure by specifying the option '--without-hwloc'.])]
      )
 
-LDFLAGS="${HWLOC_LIBS} ${SAVED_LDFLAGS}"
+LIBS="${HWLOC_LIBS} ${SAVED_LIBS}"
 CPPFLAGS="${HWLOC_CFLAGS} ${SAVED_CPPFLAGS}"
 
 AS_IF([test "$have_valid_hwloc" = "yes"],
@@ -3285,9 +3289,10 @@ AS_IF([test "$have_valid_hwloc" = "yes"],
 
 AC_CHECK_FUNCS([hwloc_topology_dup])
 AC_CHECK_FUNCS([hwloc_topology_set_components])
+AC_CHECK_FUNCS([hwloc_cpukinds_get_nr])
 AM_CONDITIONAL(STARPU_HWLOC_HAVE_TOPOLOGY_DUP, test $ac_cv_func_hwloc_topology_dup = yes)
 
-LDFLAGS="${SAVED_LDFLAGS}"
+LIBS="${SAVED_LIBS}"
 CPPFLAGS="${SAVED_CPPFLAGS}"
 export PKG_CONFIG_PATH=$SAVED_PKG_CONFIG_PATH
 

+ 8 - 4
examples/interface/complex.c

@@ -127,6 +127,9 @@ int main(void)
 #endif
 	starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
 	starpu_complex_data_register(&handle2, STARPU_MAIN_RAM, &copy_real, &copy_imaginary, 1);
+	/* Create a vector of two complexs.  */
+	starpu_complex_data_register(&handle3, -1, 0, 0, 2);
+	starpu_complex_data_register(&handle4, -1, 0, 0, 1);
 
 	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0);
 	if (ret == -ENODEV) goto end;
@@ -189,9 +192,6 @@ int main(void)
 	copy_imaginary = 77.0;
 	starpu_data_release(handle2);
 
-	/* Create a vector of two complexs.  */
-	starpu_complex_data_register(&handle3, -1, 0, 0, 2);
-
 	/* Split it in two pieces (thus one complex each).  */
 	struct starpu_data_filter f =
 	{
@@ -219,6 +219,8 @@ int main(void)
 
 	/* Show it.  */
 	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle3", strlen("handle3")+1, STARPU_R, handle3, 0);
+	if (ret == -ENODEV) goto end;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 
 	/* Get the real and imaginary vectors.  */
 	struct starpu_data_filter fcanon =
@@ -249,11 +251,11 @@ int main(void)
 	starpu_data_unpartition(handle3, STARPU_MAIN_RAM);
 
 	/* Use helper starpu_data_cpy */
-	starpu_complex_data_register(&handle4, -1, 0, 0, 1);
 	starpu_data_cpy(handle4, handle1, 0, NULL, NULL);
 	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle4", strlen("handle4")+1, STARPU_R, handle4, 0);
 	if (ret == -ENODEV) goto end;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
 	/* Compare two different complexs.  */
 	ret = starpu_task_insert(&cl_compare,
 				 STARPU_R, handle1,
@@ -278,6 +280,8 @@ end:
 #endif
 	starpu_data_unregister(handle1);
 	starpu_data_unregister(handle2);
+	starpu_data_unregister(handle3);
+	starpu_data_unregister(handle4);
 	starpu_shutdown();
 	if (ret == -ENODEV) return 77; else return !compare;
 }

+ 10 - 1
examples/interface/complex_interface.c

@@ -170,6 +170,14 @@ static starpu_ssize_t complex_describe(void *data_interface, char *buf, size_t s
 	return snprintf(buf, size, "Complex%d", complex_interface->nx);
 }
 
+static int complex_compare(void *data_interface_a, void *data_interface_b)
+{
+	struct starpu_complex_interface *complex_a = (struct starpu_complex_interface *) data_interface_a;
+	struct starpu_complex_interface *complex_b = (struct starpu_complex_interface *) data_interface_b;
+
+	return (complex_a->nx == complex_b->nx);
+}
+
 static int copy_any_to_any(void *src_interface, unsigned src_node,
 			   void *dst_interface, unsigned dst_node,
 			   void *async_data)
@@ -210,7 +218,8 @@ static struct starpu_data_interface_ops interface_complex_ops =
 	.pointer_is_inside = complex_pointer_is_inside,
 	.pack_data = complex_pack_data,
 	.unpack_data = complex_unpack_data,
-	.describe = complex_describe
+	.describe = complex_describe,
+	.compare = complex_compare
 };
 
 void starpu_complex_data_register(starpu_data_handle_t *handleptr, unsigned home_node, double *real, double *imaginary, int nx)

+ 1 - 1
include/starpu_bitmap.h

@@ -120,7 +120,7 @@ static int _starpu_count_bit_static(unsigned long e)
 #endif
 }
 
-static inline struct starpu_bitmap *starpu_bitmap_create()
+static inline struct starpu_bitmap *starpu_bitmap_create(void)
 {
 	return (struct starpu_bitmap *) calloc(1, sizeof(struct starpu_bitmap));
 }

+ 12 - 6
include/starpu_data_interfaces.h

@@ -243,9 +243,9 @@ struct starpu_data_copy_methods
 	*/
 	int (*cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream);
 #else
-	int (*ram_to_cuda_async)();
-	int (*cuda_to_ram_async)();
-	int (*cuda_to_cuda_async)();
+	int (*ram_to_cuda_async)(void);
+	int (*cuda_to_ram_async)(void);
+	int (*cuda_to_cuda_async)(void);
 #endif
 
 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
@@ -280,9 +280,9 @@ struct starpu_data_copy_methods
 	*/
 	int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event);
 #else
-	int (*ram_to_opencl_async)();
-	int (*opencl_to_ram_async)();
-	int (*opencl_to_opencl_async)();
+	int (*ram_to_opencl_async)(void);
+	int (*opencl_to_ram_async)(void);
+	int (*opencl_to_opencl_async)(void);
 #endif
 
 	/**
@@ -515,6 +515,7 @@ struct starpu_data_interface_ops
 	   Compare the data size and layout of two interfaces (nx, ny, ld, elemsize,
 	   etc.), to be used for indexing performance models. It should return 1 if
 	   the two interfaces size and layout match computation-wise, and 0 otherwise.
+	   It does *not* compare the actual content of the interfaces.
 	*/
 	int 		 (*compare)			(void *data_interface_a, void *data_interface_b);
 
@@ -709,6 +710,11 @@ starpu_data_handle_t starpu_data_lookup(const void *ptr);
 int starpu_data_get_home_node(starpu_data_handle_t handle);
 
 /**
+   Print basic informations on \p handle on \p node
+ */
+void starpu_data_print(starpu_data_handle_t handle, unsigned node, FILE *stream);
+
+/**
    Return the next available id for a newly created data interface
    (\ref DefiningANewDataInterface).
 */

+ 1 - 1
include/starpu_fxt.h

@@ -138,7 +138,7 @@ void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir);
 /**
     Wrapper to get value of env variable STARPU_FXT_TRACE
 */
-int starpu_fxt_is_enabled();
+int starpu_fxt_is_enabled(void);
 
 /**
    Add an event in the execution trace if FxT is enabled.

+ 3 - 3
include/starpu_openmp.h

@@ -639,7 +639,7 @@ extern void starpu_omp_set_num_threads(int threads) __STARPU_OMP_NOTHROW;
    \sa starpu_omp_get_max_threads
    \sa starpu_omp_get_num_procs
  */
-extern int starpu_omp_get_num_threads() __STARPU_OMP_NOTHROW;
+extern int starpu_omp_get_num_threads(void) __STARPU_OMP_NOTHROW;
 
 /**
    Return the rank of the current thread among the threads
@@ -652,7 +652,7 @@ extern int starpu_omp_get_num_threads() __STARPU_OMP_NOTHROW;
    \sa starpu_omp_get_max_threads
    \sa starpu_omp_get_num_procs
  */
-extern int starpu_omp_get_thread_num() __STARPU_OMP_NOTHROW;
+extern int starpu_omp_get_thread_num(void) __STARPU_OMP_NOTHROW;
 
 /**
    Return the maximum number of threads that can be used to
@@ -665,7 +665,7 @@ extern int starpu_omp_get_thread_num() __STARPU_OMP_NOTHROW;
    \sa starpu_omp_get_thread_num
    \sa starpu_omp_get_num_procs
  */
-extern int starpu_omp_get_max_threads() __STARPU_OMP_NOTHROW;
+extern int starpu_omp_get_max_threads(void) __STARPU_OMP_NOTHROW;
 
 /**
    Return the number of StarPU CPU workers.

+ 3 - 3
include/starpu_perf_monitoring.h

@@ -65,11 +65,11 @@ struct starpu_perf_counter_set;
 /**
   Start collecting performance counter values.
   */
-void starpu_perf_counter_collection_start();
+void starpu_perf_counter_collection_start(void);
 /**
   Stop collecting performance counter values.
   */
-void starpu_perf_counter_collection_stop();
+void starpu_perf_counter_collection_stop(void);
 
 /**
   Translate scope name constant string to scope id.
@@ -170,7 +170,7 @@ void starpu_perf_counter_set_per_codelet_listener(struct starpu_codelet *cl, str
 /**
   Unset the global listener.
   */
-void starpu_perf_counter_unset_global_listener();
+void starpu_perf_counter_unset_global_listener(void);
 /**
   Unset the per_worker listener.
   */

+ 1 - 1
include/starpu_profiling.h

@@ -316,7 +316,7 @@ void starpu_profiling_worker_helper_display_summary(void);
    option \ref enable-memory-stats "--enable-memory-stats" (see \ref
    MemoryFeedback).
 */
-void starpu_data_display_memory_stats();
+void starpu_data_display_memory_stats(void);
 
 /** @} */
 

+ 1 - 1
include/starpu_scheduler.h

@@ -208,7 +208,7 @@ struct starpu_sched_policy
    Return an <c>NULL</c>-terminated array of all the predefined
    scheduling policies.
 */
-struct starpu_sched_policy **starpu_sched_get_predefined_policies();
+struct starpu_sched_policy **starpu_sched_get_predefined_policies(void);
 
 /**
    When there is no available task for a worker, StarPU blocks this

+ 7 - 1
mpi/examples/benchs/sendrecv_bench.c

@@ -54,7 +54,7 @@ int main(int argc, char **argv)
 		{
 			man();
 		}
-		if (strcmp(argv[i], "--bidir") == 0)
+		else if (strcmp(argv[i], "--bidir") == 0)
 		{
 			bidir = 1;
 			printf("Communications will be full-duplex.\n");
@@ -83,6 +83,12 @@ int main(int argc, char **argv)
 		return STARPU_TEST_SKIPPED;
 	}
 
+#if !defined(STARPU_LONG_CHECK)
+	if (rank == 0)
+	{
+		printf("To have a more precise benchmark, configure StarPU with --enable-long-check\n");
+	}
+#endif
 
 	if (pause_workers)
 	{

+ 6 - 0
mpi/src/mpi/starpu_mpi_early_data.c

@@ -92,6 +92,12 @@ struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _star
 	return early_data_handle;
 }
 
+void _starpu_mpi_early_data_delete(struct _starpu_mpi_early_data_handle *early_data_handle)
+{
+	free(early_data_handle);
+	early_data_handle = NULL;
+}
+
 struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag)
 {
 	struct _starpu_mpi_early_data_handle_hashlist *hashlist;

+ 2 - 0
mpi/src/mpi/starpu_mpi_early_data.h

@@ -61,7 +61,9 @@ void _starpu_mpi_early_data_shutdown(void);
 struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _starpu_mpi_envelope *envelope, int source, MPI_Comm comm) STARPU_ATTRIBUTE_MALLOC;
 struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag);
 void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle);
+void _starpu_mpi_early_data_delete(struct _starpu_mpi_early_data_handle *early_data_handle);
 
+// Not used now but needed for fault tolerance
 struct _starpu_mpi_early_data_handle_tag_hashlist *_starpu_mpi_early_data_extract(struct _starpu_mpi_node_tag *node_tag);
 
 #ifdef __cplusplus

+ 1 - 0
mpi/src/mpi/starpu_mpi_early_request.h

@@ -47,6 +47,7 @@ void _starpu_mpi_early_request_check_termination(void);
 void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req);
 struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm);
 
+// Not used now but needed for fault tolerance
 struct _starpu_mpi_early_request_tag_hashlist *_starpu_mpi_early_request_extract(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm);
 
 #ifdef __cplusplus

+ 2 - 2
mpi/src/mpi/starpu_mpi_mpi.c

@@ -862,8 +862,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 
 	if (req->backend->internal_req)
 	{
-		free(req->backend->early_data_handle);
-		req->backend->early_data_handle = NULL;
+		_starpu_mpi_early_data_delete(req->backend->early_data_handle);
 	}
 	else
 	{
@@ -1546,6 +1545,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	_starpu_mpi_early_request_check_termination();
 	_starpu_mpi_early_data_check_termination();
 	_starpu_mpi_sync_data_check_termination();
+	_starpu_mpi_req_prio_list_deinit(&ready_send_requests);
 
 	if (argc_argv->initialize_mpi)
 	{

+ 9 - 0
mpi/tests/driver.c

@@ -18,6 +18,14 @@
 #include <math.h>
 #include "helper.h"
 
+#if !defined(STARPU_HAVE_SETENV)
+#warning setenv is not defined. Skipping test
+int main(int argc, char **argv)
+{
+	return STARPU_TEST_SKIPPED;
+}
+#else
+
 int main(int argc, char **argv)
 {
 	int ret, rank, size, i;
@@ -132,3 +140,4 @@ int main(int argc, char **argv)
 
 	return 0;
 }
+#endif

+ 8 - 4
mpi/tests/early_stuff.c

@@ -63,14 +63,17 @@ void early_data()
 	_starpu_mpi_early_data_add(edh[0]);
 	_starpu_mpi_early_data_add(edh[1]);
 
-	hash = _starpu_mpi_early_data_extract(&node_tag[1]);
+	hash = _starpu_mpi_early_data_extract(&node_tag[0]);
 	STARPU_ASSERT(_starpu_mpi_early_data_handle_list_size(&hash->list) == 1);
 	early = _starpu_mpi_early_data_handle_list_pop_front(&hash->list);
-	STARPU_ASSERT(early->node_tag.node.comm == node_tag[1].node.comm && early->node_tag.node.rank == node_tag[1].node.rank && early->node_tag.data_tag == node_tag[1].data_tag);
+	STARPU_ASSERT(early->node_tag.node.comm == node_tag[0].node.comm && early->node_tag.node.rank == node_tag[0].node.rank && early->node_tag.data_tag == node_tag[0].data_tag);
 	STARPU_ASSERT(_starpu_mpi_early_data_handle_list_size(&hash->list) == 0);
+	_starpu_mpi_early_data_delete(early);
+	free(hash);
 
-	early = _starpu_mpi_early_data_find(&node_tag[0]);
-	STARPU_ASSERT(early->node_tag.node.comm == node_tag[0].node.comm && early->node_tag.node.rank == node_tag[0].node.rank && early->node_tag.data_tag == node_tag[0].data_tag);
+	early = _starpu_mpi_early_data_find(&node_tag[1]);
+	STARPU_ASSERT(early->node_tag.node.comm == node_tag[1].node.comm && early->node_tag.node.rank == node_tag[1].node.rank && early->node_tag.data_tag == node_tag[1].data_tag);
+	_starpu_mpi_early_data_delete(early);
 }
 
 void early_request()
@@ -100,6 +103,7 @@ void early_request()
 	early = _starpu_mpi_req_list_pop_front(&hash->list);
 	STARPU_ASSERT(_starpu_mpi_req_list_size(&hash->list) == 0);
 	STARPU_ASSERT(early->node_tag.data_tag == req[1].node_tag.data_tag && early->node_tag.node.rank == req[1].node_tag.node.rank && early->node_tag.node.comm == req[1].node_tag.node.comm);
+	free(hash);
 }
 
 int main(int argc, char **argv)

+ 7 - 14
src/common/fxt.h

@@ -749,17 +749,7 @@ do {									\
 
 #define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid)				\
 do {									\
-        const char *model_name = _starpu_job_get_model_name((job)), *name = _starpu_job_get_task_name((job));         \
-	if (name)                                                 \
-	{								\
-		/* we include the task name */			\
-		_STARPU_FUT_FULL_PROBE5STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid), 1, name); \
-		if (model_name)					\
-			_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, model_name); \
-	}								\
-	else {                                                          \
-		FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid), 0); \
-	}								\
+	FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid)); \
 	{								\
 		if ((job)->task->cl)					\
 		{							\
@@ -852,14 +842,17 @@ do {									\
 
 #define _STARPU_TRACE_TASK_NAME(job)					\
 	do {								\
-        const char *model_name = _starpu_job_get_task_name((job));                       \
-	if (model_name)					                        \
+        const char *model_name = _starpu_job_get_model_name((job));		\
+	const char *name = _starpu_job_get_task_name((job));			\
+	if (name)					                        \
 	{									\
-		_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, model_name);\
+		_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, name);\
 	}									\
 	else {									\
 		_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, "unknown");\
 	}									\
+	if (model_name)					\
+		_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, model_name); \
 } while(0)
 
 #define _STARPU_TRACE_TASK_COLOR(job)						\

+ 1 - 0
src/core/perfmodel/perfmodel_bus.c

@@ -2684,6 +2684,7 @@ static void write_bus_platform_file_content(int version)
 			"   <prop id=\"network/TCP%cgamma\" value=\"-1\"></prop>\n"
 			"   <prop id=\"network/latency%cfactor\" value=\"1\"></prop>\n"
 			"   <prop id=\"network/bandwidth%cfactor\" value=\"1\"></prop>\n"
+			"   <prop id=\"network/crosstraffic\" value=\"0\"></prop>\n"
 			"   <prop id=\"network/weight%cS\" value=\"0.0\"></prop>\n"
 			" </config>\n"
 			" <AS  id=\"AS0\"  routing=\"Full\">\n"

+ 6 - 0
src/core/topology.c

@@ -878,6 +878,12 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 	_starpu_topology_filter(topology->hwtopology);
 	hwloc_topology_load(topology->hwtopology);
 
+#ifdef HAVE_HWLOC_CPUKINDS_GET_NR
+	int nr_kinds = hwloc_cpukinds_get_nr(topology->hwtopology, 0);
+	if (nr_kinds > 1)
+		_STARPU_DISP("Warning: there are several kinds of CPU on this system. For now StarPU assumes all CPU are equal\n", strerror(errno));
+#endif
+
 	if (starpu_get_env_number_default("STARPU_WORKERS_GETBIND", 0))
 	{
 		/* Respect the existing binding */

+ 3 - 0
src/core/workers.c

@@ -1599,6 +1599,9 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 
 	_starpu_load_bus_performance_files();
 
+	/* Note: nothing before here should be allocating anything, in case we
+	 * actually return ENODEV here */
+
 	/* Depending on whether we are a MP sink or not, we must build the
 	 * topology with MP nodes or not. */
 	ret = _starpu_build_topology(&_starpu_config, is_a_sink);

+ 4 - 2
src/datawizard/coherency.h

@@ -178,7 +178,8 @@ struct _starpu_data_state
 	unsigned active:1;
 	unsigned active_ro:1;
 
-	/** describe the state of the data in term of coherency */
+	/** describe the state of the data in term of coherency
+	 * This is execution-time state. */
 	struct _starpu_data_replicate per_node[STARPU_MAXNODES];
 	struct _starpu_data_replicate *per_worker;
 
@@ -209,7 +210,8 @@ struct _starpu_data_state
 
 	/** Does StarPU have to enforce some implicit data-dependencies ? */
 	unsigned sequential_consistency:1;
-	/** Is the data initialized, or a task is already submitted to initialize it */
+	/** Is the data initialized, or a task is already submitted to initialize it
+	 * This is submission-time initialization state. */
 	unsigned initialized:1;
 	/** Whether we shall not ever write to this handle, thus allowing various optimizations */
 	unsigned readonly:1;

+ 75 - 3
src/datawizard/interfaces/data_interface.c

@@ -53,8 +53,6 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 void _starpu_data_interface_init(void)
 {
 	_starpu_spin_init(&registered_handles_lock);
-	_id_to_ops_array_size = 20;
-	_STARPU_MALLOC(_id_to_ops_array, _id_to_ops_array_size * sizeof(struct starpu_data_interface_ops *));
 
 	/* Just for testing purpose */
 	if (starpu_get_env_number_default("STARPU_GLOBAL_ARBITER", 0) > 0)
@@ -72,6 +70,8 @@ void _starpu_data_interface_shutdown()
 
 	_starpu_spin_destroy(&registered_handles_lock);
 	free(_id_to_ops_array);
+	_id_to_ops_array = NULL;
+	_id_to_ops_array_size = 0;
 
 	HASH_ITER(hh, registered_handles, entry, tmp)
 	{
@@ -573,7 +573,14 @@ void starpu_data_register(starpu_data_handle_t *handleptr, int home_node,
 	{
 		if ((unsigned)ops->interfaceid > _id_to_ops_array_size)
 		{
-			_id_to_ops_array_size *= 2;
+			if (!_id_to_ops_array_size)
+			{
+				_id_to_ops_array_size = 16;
+			}
+			else
+			{
+				_id_to_ops_array_size *= 2;
+			}
 			_STARPU_REALLOC(_id_to_ops_array, _id_to_ops_array_size * sizeof(struct starpu_data_interface_ops *));
 		}
 		_id_to_ops_array[ops->interfaceid-STARPU_MAX_INTERFACE_ID] = ops;
@@ -1063,6 +1070,9 @@ retry_busy:
 	STARPU_HG_ENABLE_CHECKING(handle->post_sync_tasks_cnt);
 	STARPU_HG_ENABLE_CHECKING(handle->busy_count);
 
+	_starpu_data_requester_prio_list_deinit(&handle->req_list);
+	_starpu_data_requester_prio_list_deinit(&handle->reduction_req_list);
+
 	if (handle->switch_cl)
 	{
 		free(handle->switch_cl->dyn_nodes);
@@ -1285,3 +1295,65 @@ unsigned starpu_data_get_coordinates_array(starpu_data_handle_t handle, unsigned
 
 	return dimensions;
 }
+
+void starpu_data_print(starpu_data_handle_t handle, unsigned node, FILE *stream)
+{
+	if (handle->ops == NULL)
+		fprintf(stream, "Undefined");
+	else
+	{
+		switch (handle->ops->interfaceid)
+		{
+		case(STARPU_MATRIX_INTERFACE_ID):
+			fprintf(stream, "Matrix");
+			break;
+		case(STARPU_BLOCK_INTERFACE_ID):
+			fprintf(stream, "Block");
+			break;
+		case(STARPU_VECTOR_INTERFACE_ID):
+			fprintf(stream, "Vector");
+			break;
+		case(STARPU_CSR_INTERFACE_ID):
+			fprintf(stream, "CSR");
+			break;
+		case(STARPU_BCSR_INTERFACE_ID):
+			fprintf(stream, "BCSR");
+			break;
+		case(STARPU_VARIABLE_INTERFACE_ID):
+			fprintf(stream, "Variable");
+			break;
+		case(STARPU_VOID_INTERFACE_ID):
+			fprintf(stream, "Void");
+			break;
+		case(STARPU_MULTIFORMAT_INTERFACE_ID):
+			fprintf(stream, "Multfiformat");
+			break;
+		case(STARPU_COO_INTERFACE_ID):
+			fprintf(stream, "COO");
+			break;
+		case(STARPU_TENSOR_INTERFACE_ID):
+			fprintf(stream, "Tensor");
+			break;
+		case(STARPU_UNKNOWN_INTERFACE_ID ):
+			fprintf(stream, "UNKNOWN");
+			break;
+		default:
+			fprintf(stream, "User interface with id %d", handle->ops->interfaceid);
+			break;
+		}
+	}
+	void *data_interface = NULL;
+	if (starpu_data_test_if_allocated_on_node(handle, node))
+		data_interface = starpu_data_get_interface_on_node(handle, node);
+	if (starpu_data_test_if_allocated_on_node(handle, handle->home_node))
+		data_interface = starpu_data_get_interface_on_node(handle, handle->home_node);
+	if (handle->ops && handle->ops->describe && data_interface)
+	{
+		char buffer[1024];
+		handle->ops->describe(data_interface, buffer, sizeof(buffer));
+		fprintf(stream, " %s\n", buffer);
+	}
+	else
+		fprintf(stream, "\n");
+
+}

+ 12 - 0
src/debug/traces/anim.c

@@ -80,6 +80,18 @@ void _starpu_fxt_component_new(uint64_t component, char *name)
 	COMPONENT_ADD(components, ptr, comp);
 }
 
+void _starpu_fxt_component_deinit(void)
+{
+	struct component *comp, *tmp;
+	HASH_ITER(hh, components, comp, tmp)
+	{
+		HASH_DEL(components, comp);
+		free(comp->children);
+		free(comp->name);
+		free(comp);
+	}
+}
+
 static void fxt_component_dump(FILE *file, struct component *comp, unsigned depth)
 {
 	unsigned i;

+ 55 - 32
src/debug/traces/starpu_fxt.c

@@ -198,15 +198,9 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 		goto out;
 
 	if (task->name)
-	{
 		fprintf(tasks_file, "Name: %s\n", task->name);
-		free(task->name);
-	}
 	if (task->model_name)
-	{
 		fprintf(tasks_file, "Model: %s\n", task->model_name);
-		free(task->model_name);
-	}
 	fprintf(tasks_file, "JobId: %s%lu\n", prefix, task->job_id);
 	if (task->submit_order)
 		fprintf(tasks_file, "SubmitOrder: %lu\n", task->submit_order);
@@ -217,18 +211,13 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 		for (i = 0; i < task->ndeps; i++)
 			fprintf(tasks_file, " %s%lu", prefix, task->dependencies[i]);
 		fprintf(tasks_file, "\n");
-		free(task->dependencies);
 	}
 	if (task->dep_labels)
 	{
 		fprintf(tasks_file, "DepLabels:");
 		for (i = 0; i < task->ndeps; i++)
-		{
 			fprintf(tasks_file, " %s", task->dep_labels[i]);
-			free(task->dep_labels[i]);
-		}
 		fprintf(tasks_file, "\n");
-		free(task->dep_labels);
 	}
 	fprintf(tasks_file, "Tag: %"PRIx64"\n", task->tag);
 	if (task->workerid >= 0)
@@ -256,10 +245,7 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 		fprintf(tasks_file, "\n");
 	}
 	if (task->parameters)
-	{
 		fprintf(tasks_file, "Parameters: %s\n", task->parameters);
-		free(task->parameters);
-	}
 	if (task->data)
 	{
 		fprintf(tasks_file, "Handles:");
@@ -279,12 +265,22 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 		for (i = 0; i < task->ndata; i++)
 			fprintf(tasks_file, " %lu", task->data[i].size);
 		fprintf(tasks_file, "\n");
-		free(task->data);
 	}
 	fprintf(tasks_file, "MPIRank: %d\n", task->mpi_rank);
 	fprintf(tasks_file, "\n");
 
 out:
+	free(task->name);
+	free(task->model_name);
+	free(task->dependencies);
+	if (task->dep_labels)
+	{
+		for (i = 0; i < task->ndeps; i++)
+			free(task->dep_labels[i]);
+		free(task->dep_labels);
+	}
+	free(task->parameters);
+	free(task->data);
 	HASH_DEL(tasks_info, task);
 	free(task);
 }
@@ -363,18 +359,12 @@ static void data_dump(struct data_info *data)
 	if (data->mpi_rank >= 0)
 		fprintf(data_file, "MPIRank: %d\n", data->mpi_rank);
 	if (data->name)
-	{
 		fprintf(data_file, "Name: %s\n", data->name);
-		free(data->name);
-	}
 	fprintf(data_file, "Size: %lu\n", (unsigned long) data->size);
 	if (data->max_size != -1)
 		fprintf(data_file, "MaxSize: %lu\n", (unsigned long) data->max_size);
 	if (data->description)
-	{
 		fprintf(data_file, "Description: %s\n", data->description);
-		free(data->description);
-	}
 	if (data->dimensions)
 	{
 		unsigned i;
@@ -389,6 +379,9 @@ static void data_dump(struct data_info *data)
 		fprintf(data_file, "MPITag: %ld\n", data->mpi_tag);
 	fprintf(data_file, "\n");
 out:
+	free(data->dims);
+	free(data->description);
+	free(data->name);
 	HASH_DEL(data_info, data);
 	free(data);
 }
@@ -669,6 +662,16 @@ static int register_thread(unsigned long nodeid, unsigned long tid, int workerid
 	return 1;
 }
 
+static void free_worker_ids(void)
+{
+	struct worker_entry *entry, *tmp;
+	HASH_ITER(hh, worker_ids, entry, tmp)
+	{
+		HASH_DEL(worker_ids, entry);
+		free(entry);
+	}
+}
+
 static int register_worker_id(unsigned long nodeid, unsigned long tid, int workerid, int sync)
 {
 	nworkers++;
@@ -1540,8 +1543,9 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 
 	if (worker < 0) return;
 
-	unsigned long has_name = ev->param[4];
-	char *name = has_name?get_fxt_string(ev, 5):"unknown";
+	struct task_info *task = get_task(ev->param[0], options->file_rank);
+	char *name = task->name;
+	create_paje_state_if_not_found(name, task->color, options);
 
 	snprintf(_starpu_last_codelet_symbol[worker], sizeof(_starpu_last_codelet_symbol[worker]), "%.*s", (int) sizeof(_starpu_last_codelet_symbol[worker])-1, name);
 	_starpu_last_codelet_symbol[worker][sizeof(_starpu_last_codelet_symbol[worker])-1] = 0;
@@ -1551,12 +1555,8 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 	double last_start_codelet_time = last_codelet_start[worker];
 	last_codelet_start[worker] = start_codelet_time;
 
-	struct task_info *task = get_task(ev->param[0], options->file_rank);
-	create_paje_state_if_not_found(name, task->color, options);
-
 	task->start_time = start_codelet_time;
 	task->workerid = worker;
-	task->name = strdup(name);
 	task->node = node;
 
 	if (out_paje_file)
@@ -4251,6 +4251,22 @@ void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *op
 	for (i = 0; i < STARPU_NMAXWORKERS; i++)
 		free(options->worker_archtypes[i].devices);
 
+	struct _starpu_symbol_name *itor, *next;
+	for (itor = _starpu_symbol_name_list_begin(&symbol_list);
+		itor != _starpu_symbol_name_list_end(&symbol_list);
+		itor = next)
+	{
+		next = _starpu_symbol_name_list_next(itor);
+
+		_starpu_symbol_name_list_erase(&symbol_list, itor);
+		free(itor->name);
+		_starpu_symbol_name_delete(itor);
+	}
+
+	_starpu_fxt_component_deinit();
+
+	free_worker_ids();
+
 #ifdef HAVE_FXT_BLOCKEV_LEAVE
 	fxt_blockev_leave(block);
 #endif
@@ -4877,10 +4893,10 @@ struct starpu_data_trace_kernel
 
 static FILE *codelet_list;
 
-static void write_task(char *dir, struct parse_task pt)
+static void write_task(char *dir, struct parse_task *pt)
 {
 	struct starpu_data_trace_kernel *kernel;
-	char *codelet_name = pt.codelet_name;
+	char *codelet_name = pt->codelet_name;
 	HASH_FIND_STR(kernels, codelet_name, kernel);
 	//fprintf(stderr, "%p %p %s\n", kernel, kernels, codelet_name);
 	if(kernel == NULL)
@@ -4898,8 +4914,8 @@ static void write_task(char *dir, struct parse_task pt)
 		HASH_ADD_STR(kernels, name, kernel);
 		fprintf(codelet_list, "%s\n", codelet_name);
 	}
-	double time = pt.exec_time * NANO_SEC_TO_MILI_SEC;
-	fprintf(kernel->file, "%lf %u %u\n", time, pt.data_total, pt.workerid);
+	double time = pt->exec_time * NANO_SEC_TO_MILI_SEC;
+	fprintf(kernel->file, "%lf %u %u\n", time, pt->data_total, pt->workerid);
 }
 
 void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir)
@@ -4957,6 +4973,7 @@ void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir)
 			tasks[workerid].workerid = (unsigned)workerid;
 			tasks[workerid].exec_time = ev.time;
 			has_name = ev.param[4];
+			free(tasks[workerid].codelet_name);
 			tasks[workerid].codelet_name = strdup(has_name ? get_fxt_string(&ev, 5): "unknown");
 			//fprintf(stderr, "start codelet :[%d][%s]\n", workerid, tasks[workerid].codelet_name);
 			break;
@@ -4965,7 +4982,7 @@ void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir)
 			workerid = ev.param[3];
 			assert(workerid != -1);
 			tasks[workerid].exec_time = ev.time - tasks[workerid].exec_time;
-			write_task(dir, tasks[workerid]);
+			write_task(dir, &tasks[workerid]);
 			break;
 
 		case _STARPU_FUT_DATA_LOAD:
@@ -5002,6 +5019,12 @@ void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir)
 		exit(-1);
 	}
 
+	unsigned i;
+	for (i = 0; i < STARPU_NMAXWORKERS; i++)
+		free(tasks[i].codelet_name);
+
+	free_worker_ids();
+
 	struct starpu_data_trace_kernel *kernel=NULL, *tmp=NULL;
 	HASH_ITER(hh, kernels, kernel, tmp)
 	{

+ 1 - 0
src/debug/traces/starpu_fxt.h

@@ -82,6 +82,7 @@ void _starpu_fxt_component_push(FILE *output, struct starpu_fxt_options *options
 void _starpu_fxt_component_pull(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task, unsigned prio);
 void _starpu_fxt_component_dump(FILE *output);
 void _starpu_fxt_component_finish(FILE *output);
+void _starpu_fxt_component_deinit(void);
 
 #endif // STARPU_USE_FXT
 

+ 8 - 4
src/drivers/mp_common/source_common.c

@@ -788,7 +788,8 @@ int _starpu_src_common_locate_file(char *located_file_name, size_t len,
 	{
 		if (access(env_file_name, R_OK) == 0)
 		{
-			strncpy(located_file_name, env_file_name, len);
+			strncpy(located_file_name, env_file_name, len-1);
+			located_file_name[len-1] = '\0';
 			return 0;
 		}
 		else if(env_mic_path != NULL)
@@ -802,7 +803,8 @@ int _starpu_src_common_locate_file(char *located_file_name, size_t len,
 	{
 		if (access(config_file_name, R_OK) == 0)
 		{
-			strncpy(located_file_name, config_file_name, len);
+			strncpy(located_file_name, config_file_name, len-1);
+			located_file_name[len-1] = '\0';
 			return 0;
 		}
 		else if (env_mic_path != NULL)
@@ -820,7 +822,8 @@ int _starpu_src_common_locate_file(char *located_file_name, size_t len,
 		if (env_mic_path != NULL)
 		{
 			char actual_cpy[1024];
-			strncpy(actual_cpy, actual_file_name, sizeof(actual_cpy));
+			strncpy(actual_cpy, actual_file_name, sizeof(actual_cpy)-1);
+			actual_cpy[sizeof(actual_cpy)-1] = '\0';
 
 			char *last =  strrchr(actual_cpy, '/');
 			while (last != NULL)
@@ -831,7 +834,8 @@ int _starpu_src_common_locate_file(char *located_file_name, size_t len,
 
 				if (access(tmp, R_OK) == 0)
 				{
-					strncpy(located_file_name, tmp, len);
+					strncpy(located_file_name, tmp, len-1);
+					located_file_name[len-1] = '\0';
 					return 0;
 				}
 

+ 22 - 1
src/sched_policies/component_eager.c

@@ -24,6 +24,7 @@ struct _starpu_eager_data
 {
 	struct starpu_sched_component *target;
 	starpu_pthread_mutex_t scheduling_mutex;
+	int ntasks;
 };
 
 static int eager_push_task(struct starpu_sched_component * component, struct starpu_task * task)
@@ -34,6 +35,12 @@ static int eager_push_task(struct starpu_sched_component * component, struct sta
 	struct _starpu_eager_data *d = component->data;
 	struct starpu_sched_component *target;
 
+	if (d->ntasks == 0)
+		/* We have already pushed a task down */
+		return 1;
+	if (d->ntasks > 0)
+		d->ntasks--;
+
 	if ((target = d->target))
 	{
 		/* target told us we could push to it, try to */
@@ -99,7 +106,7 @@ static int eager_push_task(struct starpu_sched_component * component, struct sta
 	return 1;
 }
 
-/* Note: we can't use starpu_sched_component_pump_to because if a fifo below
+/* Note: we can't use starpu_sched_component_pump_to ourself because if a fifo below
  * refuses a task, we have no way to push it back to a fifo above. */
 static int eager_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to)
 {
@@ -108,12 +115,24 @@ static int eager_can_push(struct starpu_sched_component * component, struct star
 	STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex);
 	/* Target flow of tasks to this child */
 	d->target = to;
+	/* But make pump above push only one task */
+	d->ntasks = 1;
 	success = starpu_sched_component_can_push(component, to);
 	d->target = NULL;
+	d->ntasks = -1;
 	STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex);
 	return success;
 }
 
+static struct starpu_task *eager_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to)
+{
+	/* We can't directly pull (in case the obtained task does not match
+	 * the constraints of `to'), but we can try to push, and components
+	 * below will cope with it */
+	eager_can_push(component, to);
+	return NULL;
+}
+
 static void eager_deinit_data(struct starpu_sched_component *component)
 {
 	STARPU_ASSERT(starpu_sched_component_is_eager(component));
@@ -134,10 +153,12 @@ struct starpu_sched_component * starpu_sched_component_eager_create(struct starp
 	struct _starpu_eager_data *data;
 	_STARPU_MALLOC(data, sizeof(*data));
 	data->target = NULL;
+	data->ntasks = -1;
 	STARPU_PTHREAD_MUTEX_INIT(&data->scheduling_mutex, NULL);
 
 	component->data = data;
 	component->push_task = eager_push_task;
+	component->pull_task = eager_pull_task;
 	component->can_push = eager_can_push;
 	component->can_pull = starpu_sched_component_can_pull_all;
 	component->deinit_data = eager_deinit_data;

+ 2 - 4
src/sched_policies/component_fifo.c

@@ -92,9 +92,8 @@ static int fifo_push_local_task(struct starpu_sched_component * component, struc
 	const double now = starpu_timing_now();
 	STARPU_COMPONENT_MUTEX_LOCK(mutex);
 
-	if (data->ntasks_threshold != 0 && queue->ntasks >= data->ntasks_threshold)
+	if (!is_pushback && data->ntasks_threshold != 0 && queue->ntasks >= data->ntasks_threshold)
 	{
-		STARPU_ASSERT(!is_pushback);
 		ret = 1;
 		STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
 	}
@@ -106,7 +105,7 @@ static int fifo_push_local_task(struct starpu_sched_component * component, struc
 		else
 			exp_len = queue->exp_len;
 
-		if (data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold)
+		if (!is_pushback && data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold)
 		{
 			static int warned;
 			if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned)
@@ -114,7 +113,6 @@ static int fifo_push_local_task(struct starpu_sched_component * component, struc
 				_STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold);
 				warned = 1;
 			}
-			STARPU_ASSERT(!is_pushback);
 			ret = 1;
 			STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
 		}

+ 2 - 4
src/sched_policies/component_prio.c

@@ -114,9 +114,8 @@ static int prio_push_local_task(struct starpu_sched_component * component, struc
 
 	double exp_len = NAN;
 
-	if (data->ntasks_threshold != 0 && queue->ntasks >= data->ntasks_threshold)
+	if (!is_pushback && data->ntasks_threshold != 0 && queue->ntasks >= data->ntasks_threshold)
 	{
-		STARPU_ASSERT(!is_pushback);
 		ret = 1;
 		STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
 	}
@@ -127,7 +126,7 @@ static int prio_push_local_task(struct starpu_sched_component * component, struc
 		else
 			exp_len = queue->exp_len;
 
-		if (data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold)
+		if (!is_pushback && data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold)
 		{
 			static int warned;
 			if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned)
@@ -135,7 +134,6 @@ static int prio_push_local_task(struct starpu_sched_component * component, struc
 				_STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold);
 				warned = 1;
 			}
-			STARPU_ASSERT(!is_pushback);
 			ret = 1;
 			STARPU_COMPONENT_MUTEX_UNLOCK(mutex);
 		}

+ 7 - 0
src/sched_policies/component_random.c

@@ -97,6 +97,12 @@ static int random_push_task(struct starpu_sched_component * component, struct st
 	return ret_val;
 }
 
+static struct starpu_task *random_pull_task(struct starpu_sched_component * from, struct starpu_sched_component *to)
+{
+	starpu_sched_component_can_push(from, to);
+	return NULL;
+}
+
 int starpu_sched_component_is_random(struct starpu_sched_component *component)
 {
 	return component->push_task == random_push_task;
@@ -107,5 +113,6 @@ struct starpu_sched_component * starpu_sched_component_random_create(struct star
 	(void)arg;
 	struct starpu_sched_component * component = starpu_sched_component_create(tree, "random");
 	component->push_task = random_push_task;
+	component->pull_task = random_pull_task;
 	return component;
 }

+ 1 - 1
tests/microbenchs/parallel_independent_heterogeneous_tasks.sh

@@ -16,6 +16,6 @@
 #
 source $(dirname $0)/microbench.sh
 
-XFAIL="modular-eager-prio modular-eager-prefetching modular-prio-prefetching modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test"
+XFAIL="modular-eager-prio modular-eager-prefetching modular-prio-prefetching modular-random modular-random-prio modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test"
 
 test_scheds parallel_independent_heterogeneous_tasks

+ 1 - 1
tests/microbenchs/parallel_independent_homogeneous_tasks.sh

@@ -16,6 +16,6 @@
 #
 source $(dirname $0)/microbench.sh
 
-XFAIL="modular-eager-prefetching modular-prio-prefetching modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test"
+XFAIL="modular-eager-prefetching modular-prio-prefetching modular-random modular-random-prio modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test"
 
 test_scheds parallel_independent_homogeneous_tasks

+ 1 - 0
tests/model-checking/platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/dev/lsan/suppressions

@@ -33,3 +33,4 @@ leak:hwloc_plugins_exit
 
 # papi
 leak:_pe_libpfm4_init
+leak:allocate_thread

+ 8 - 0
tools/dev/valgrind/fxt.suppr

@@ -48,3 +48,11 @@
    fun:fxt_setinfos
    fun:fut_setup
 }
+
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Cond
+   fun:fxt_get_cpu_info
+   fun:fxt_setinfos
+   fun:fut_setup
+}

+ 9 - 0
tools/dev/valgrind/papi.suppr

@@ -22,3 +22,12 @@
    fun:_pe_libpfm4_init
    ...
 }
+
+# This happens in multithreaded_init: papi does not support getting initialized in one thread and shut down in another thread.
+{
+   <insert_a_suppression_name_here>
+   Memcheck:Leak
+   ...
+   fun:allocate_thread
+   ...
+}

+ 4 - 1
tools/gdbinit

@@ -381,7 +381,10 @@ define starpu-print-data
   if $data->ops->interfaceid == 8
     printf "COO\n"
   end
-  if $data->ops->interfaceid > 8
+  if $data->ops->interfaceid == 9
+    printf "Tensor\n"
+  end
+  if $data->ops->interfaceid > 9
     printf "Interface id %d\n", $data->ops->interfaceid
   end
   printf "Home node %d\n", $data->home_node

+ 1 - 0
tools/perfmodels/sampling/bus/attila.platform.v4.xml

@@ -8,6 +8,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/perfmodels/sampling/bus/attila.platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" power="1"/>

+ 1 - 0
tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" power="1"/>

+ 1 - 0
tools/perfmodels/sampling/bus/hannibal.platform.v4.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/perfmodels/sampling/bus/hannibal.platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" power="1"/>

+ 1 - 0
tools/perfmodels/sampling/bus/idgraf.platform.v4.xml

@@ -8,6 +8,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/perfmodels/sampling/bus/idgraf.platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" power="1"/>

+ 1 - 0
tools/perfmodels/sampling/bus/mirage.platform.v4.xml

@@ -8,6 +8,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/perfmodels/sampling/bus/mirage.platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" power="1"/>

+ 1 - 0
tools/perfmodels/sampling/bus/sirocco.platform.v4.xml

@@ -8,6 +8,7 @@
    <prop id="network/latency-factor" value="1"></prop>
    <prop id="network/bandwidth-factor" value="1"></prop>
    <prop id="network/weight-S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" speed="1f"/>

+ 1 - 0
tools/perfmodels/sampling/bus/sirocco.platform.xml

@@ -6,6 +6,7 @@
    <prop id="network/latency_factor" value="1"></prop>
    <prop id="network/bandwidth_factor" value="1"></prop>
    <prop id="network/weight_S" value="0.0"></prop>
+   <prop id="network/crosstraffic" value="0"></prop>
  </config>
  <AS  id="AS0"  routing="Full">
    <host id="MAIN" power="1"/>

+ 1 - 0
tools/starpu_fxt_data_trace.c

@@ -186,5 +186,6 @@ int main(int argc, char **argv)
 	starpu_fxt_write_data_trace_in_dir(argv[1+pos], directory);
 	write_gp(directory, argc - (2 + pos), argv + 2 + pos);
 	starpu_perfmodel_free_sampling();
+	free(directory);
 	return 0;
 }

+ 14 - 0
tools/starpu_fxt_stats.c

@@ -184,6 +184,20 @@ int main(int argc, char **argv)
 		}
 	}
 
+#ifdef HAVE_FXT_BLOCKEV_LEAVE
+	fxt_blockev_leave(block);
+#endif
+
+#ifdef HAVE_FXT_CLOSE
+	fxt_close(fut);
+#else
+	if (close(fd_in))
+	{
+	        perror("close failed :");
+	        exit(-1);
+	}
+#endif
+
 	fprintf(fd_out, "Start : start time %e end time %e length %e\n", start_time, end_time, end_time - start_time);
 
 	unsigned src, dst;

+ 1 - 0
tools/starpu_perfmodel_plot.c

@@ -492,6 +492,7 @@ static void dump_data_file(FILE *data_file, struct _perfmodel_plot_options *opti
 		}
 		free(tmp);
 	}
+	free(options->dumped_codelets);
 }
 #endif
 

+ 2 - 0
tools/starpu_perfmodel_recdump.c

@@ -386,8 +386,10 @@ int main(int argc, char **argv)
 					l = ltmp;
 				}
 
+				starpu_perfmodel_unload_model(&model->model);
 				free(model->name);
 				HASH_DEL(models, model);
+				free(model);
 			}
 		}
 		fclose(input);

+ 3 - 1
tools/starpu_tasks_rec_complete.c

@@ -192,8 +192,10 @@ int main(int argc, char *argv[])
 	starpu_shutdown();
 	HASH_ITER(hh, models, model, tmp)
 	{
-		free(model->name);
 		HASH_DEL(models, model);
+		starpu_perfmodel_unload_model(&model->model);
+		free(model->name);
+		free(model);
 	}
 	return 0;
 }