Browse Source

Merge from trunk @11526:11613

Marc Sergent 11 years ago
parent
commit
5bbe09ce59
86 changed files with 12075 additions and 425 deletions
  1. 5 3
      ChangeLog
  2. 2 7
      configure.ac
  3. 12 0
      doc/doxygen/Makefile.am
  4. 1 1
      doc/doxygen/chapters/advanced_examples.doxy
  5. 3 0
      doc/doxygen/chapters/api/codelet_and_tasks.doxy
  6. BIN
      doc/doxygen/chapters/data_trace.pdf
  7. 4283 0
      doc/doxygen/chapters/distrib_data.eps
  8. BIN
      doc/doxygen/chapters/distrib_data.pdf
  9. BIN
      doc/doxygen/chapters/distrib_data.png
  10. 1613 0
      doc/doxygen/chapters/distrib_data_histo.eps
  11. BIN
      doc/doxygen/chapters/distrib_data_histo.pdf
  12. BIN
      doc/doxygen/chapters/distrib_data_histo.png
  13. 13 1
      doc/doxygen/chapters/performance_feedback.doxy
  14. 1036 0
      doc/doxygen/chapters/starpu_chol_model_11_type.eps
  15. BIN
      doc/doxygen/chapters/starpu_chol_model_11_type.pdf
  16. BIN
      doc/doxygen/chapters/starpu_chol_model_11_type.png
  17. 4460 0
      doc/doxygen/chapters/starpu_non_linear_memset_regression_based_2.eps
  18. BIN
      doc/doxygen/chapters/starpu_non_linear_memset_regression_based_2.pdf
  19. BIN
      doc/doxygen/chapters/starpu_non_linear_memset_regression_based_2.png
  20. 4 4
      examples/interface/complex.c
  21. 1 1
      examples/interface/complex_interface.c
  22. 34 1
      examples/stencil/Makefile.am
  23. 3 13
      examples/stencil/life_opencl.c
  24. 4 14
      examples/stencil/shadow_opencl.c
  25. 52 3
      examples/stencil/stencil-blocks.c
  26. 16 13
      examples/stencil/stencil-kernels.c
  27. 28 23
      examples/stencil/stencil-tasks.c
  28. 36 19
      examples/stencil/stencil.c
  29. 4 0
      examples/stencil/stencil.h
  30. 4 1
      include/starpu_task.h
  31. 3 3
      mpi/examples/complex/mpi_complex.c
  32. 3 3
      mpi/src/starpu_mpi.c
  33. 4 4
      mpi/tests/datatypes.c
  34. 2 2
      mpi/tests/gather.c
  35. 2 2
      mpi/tests/gather2.c
  36. 5 2
      mpi/tests/insert_task_owner2.c
  37. 9 5
      mpi/tests/insert_task_owner_data.c
  38. 1 1
      mpi/tests/user_defined_datatype_value.h
  39. 2 3
      socl/src/Makefile.am
  40. 39 1
      src/common/fxt.h
  41. 3 6
      src/core/dependencies/implicit_data_deps.c
  42. 1 5
      src/core/jobs.h
  43. 7 6
      src/core/perfmodel/perfmodel_history.c
  44. 19 0
      src/core/perfmodel/perfmodel_nan.c
  45. 32 10
      src/core/sched_ctx.c
  46. 2 1
      src/core/sched_policy.c
  47. 10 7
      src/core/task.c
  48. 4 4
      src/core/workers.c
  49. 2 0
      src/core/workers.h
  50. 3 0
      src/datawizard/copy_driver.c
  51. 6 1
      src/datawizard/datastats.c
  52. 2 0
      src/datawizard/filters.c
  53. 8 8
      src/datawizard/interfaces/bcsr_interface.c
  54. 6 6
      src/datawizard/interfaces/block_interface.c
  55. 3 3
      src/datawizard/interfaces/coo_interface.c
  56. 4 4
      src/datawizard/interfaces/csr_interface.c
  57. 2 2
      src/datawizard/interfaces/data_interface.c
  58. 5 5
      src/datawizard/interfaces/matrix_interface.c
  59. 3 3
      src/datawizard/interfaces/multiformat_interface.c
  60. 3 3
      src/datawizard/interfaces/variable_interface.c
  61. 4 4
      src/datawizard/interfaces/vector_interface.c
  62. 6 2
      src/datawizard/malloc.c
  63. 9 1
      src/datawizard/memalloc.c
  64. 3 0
      src/datawizard/reduction.c
  65. 6 16
      src/datawizard/user_interactions.c
  66. 65 14
      src/debug/traces/starpu_fxt.c
  67. 6 2
      src/debug/traces/starpu_paje.c
  68. 1 0
      src/sched_policies/fifo_queues.c
  69. 2 8
      src/util/execute_on_all.c
  70. 4 4
      src/util/misc.c
  71. 2 1
      src/util/starpu_create_sync_task.c
  72. 1 0
      src/util/starpu_data_cpy.c
  73. 2 0
      src/util/starpu_task_insert.c
  74. 1 0
      tests/Makefile.am
  75. 3 11
      tests/main/driver_api/run_driver.c
  76. 1 1
      tests/microbenchs/prefetch_data_on_node.c
  77. 110 0
      tests/parallel_tasks/cuda_only.c
  78. 9 8
      tests/perfmodels/value_nan.c
  79. 14 6
      tools/gdbinit
  80. 0 75
      tools/model.sh
  81. 1 1
      tools/starpu_codelet_histo_profile.in
  82. 1 1
      tools/starpu_fxt_stats.c
  83. 2 2
      tools/starpu_perfmodel_display.c
  84. 5 4
      tools/starpu_perfmodel_plot.c
  85. 0 60
      tools/valgrind/fscanf.suppr
  86. 8 0
      tools/valgrind/pthread.suppr

+ 5 - 3
ChangeLog

@@ -76,12 +76,14 @@ Small features:
     renamed in starpu_task_insert and starpu_mpi_task_insert. Old
     renamed in starpu_task_insert and starpu_mpi_task_insert. Old
     names are kept to avoid breaking old codes.
     names are kept to avoid breaking old codes.
   * New configure option --enable-calibration-heuristic which allows
   * New configure option --enable-calibration-heuristic which allows
-    the user to set the maximum authorized deviation of the 
-    history-based calibrator. 
+    the user to set the maximum authorized deviation of the
+    history-based calibrator.
+  * Tasks can now have a name (via the field const char *name of
+    struct starpu_task)
 
 
 Changes:
 Changes:
   * Fix of the livelock issue discovered while executing applications
   * Fix of the livelock issue discovered while executing applications
-    on a CPU+GPU cluster of machines by adding a maximum trylock 
+    on a CPU+GPU cluster of machines by adding a maximum trylock
     threshold before a blocking lock.
     threshold before a blocking lock.
   * Data interfaces (variable, vector, matrix and block) now define
   * Data interfaces (variable, vector, matrix and block) now define
     pack und unpack functions
     pack und unpack functions

+ 2 - 7
configure.ac

@@ -1606,13 +1606,8 @@ AC_MSG_CHECKING(calibration heuristic of history-based StarPU calibrator)
 AC_ARG_ENABLE(calibration-heuristic, [AS_HELP_STRING([--enable-calibration-heuristic=<number>],
 AC_ARG_ENABLE(calibration-heuristic, [AS_HELP_STRING([--enable-calibration-heuristic=<number>],
 			[Define the maximum authorized deviation of StarPU history-based calibrator.])],
 			[Define the maximum authorized deviation of StarPU history-based calibrator.])],
 			calibration_heuristic=$enableval, calibration_heuristic=10)
 			calibration_heuristic=$enableval, calibration_heuristic=10)
-if test $calibration_heuristic -gt 100; then
-	AC_MSG_RESULT(uncorrect parameter $calibration_heuristic  set default parameter 10)
-	AC_DEFINE_UNQUOTED(STARPU_HISTORYMAXERROR, [$calibration_heuristic], [calibration heuristic value])
-else
-	AC_MSG_RESULT($calibration_heuristic)
-	AC_DEFINE_UNQUOTED(STARPU_HISTORYMAXERROR, [$calibration_heuristic], [calibration heuristic value])
-fi
+AC_MSG_RESULT($calibration_heuristic)
+AC_DEFINE_UNQUOTED(STARPU_HISTORYMAXERROR, [$calibration_heuristic], [calibration heuristic value])
 
 
 
 
 ###############################################################################
 ###############################################################################

+ 12 - 0
doc/doxygen/Makefile.am

@@ -145,9 +145,21 @@ EXTRA_DIST	= 					\
 	chapters/data_trace.eps				\
 	chapters/data_trace.eps				\
 	chapters/data_trace.pdf				\
 	chapters/data_trace.pdf				\
 	chapters/data_trace.png				\
 	chapters/data_trace.png				\
+	chapters/distrib_data.png	\
+	chapters/distrib_data.eps	\
+	chapters/distrib_data.pdf	\
+	chapters/distrib_data_histo.png	\
+	chapters/distrib_data_histo.eps	\
+	chapters/distrib_data_histo.pdf	\
+	chapters/starpu_chol_model_11_type.png	\
+	chapters/starpu_chol_model_11_type.eps	\
+	chapters/starpu_chol_model_11_type.pdf	\
 	chapters/starpu_non_linear_memset_regression_based.png	\
 	chapters/starpu_non_linear_memset_regression_based.png	\
 	chapters/starpu_non_linear_memset_regression_based.eps	\
 	chapters/starpu_non_linear_memset_regression_based.eps	\
 	chapters/starpu_non_linear_memset_regression_based.pdf	\
 	chapters/starpu_non_linear_memset_regression_based.pdf	\
+	chapters/starpu_non_linear_memset_regression_based_2.png	\
+	chapters/starpu_non_linear_memset_regression_based_2.eps	\
+	chapters/starpu_non_linear_memset_regression_based_2.pdf	\
 	chapters/starpu_starpu_slu_lu_model_11.png	\
 	chapters/starpu_starpu_slu_lu_model_11.png	\
 	chapters/starpu_starpu_slu_lu_model_11.eps	\
 	chapters/starpu_starpu_slu_lu_model_11.eps	\
 	chapters/starpu_starpu_slu_lu_model_11.pdf	\
 	chapters/starpu_starpu_slu_lu_model_11.pdf	\

+ 1 - 1
doc/doxygen/chapters/advanced_examples.doxy

@@ -1103,7 +1103,7 @@ complex interface from a StarPU data handle.
 double *starpu_complex_get_real(starpu_data_handle_t handle)
 double *starpu_complex_get_real(starpu_data_handle_t handle)
 {
 {
         struct starpu_complex_interface *complex_interface =
         struct starpu_complex_interface *complex_interface =
-          (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, 0);
+          (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
         return complex_interface->real;
         return complex_interface->real;
 }
 }
 
 

+ 3 - 0
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -331,6 +331,9 @@ starpu_task_create(), or declared statically. In the latter case, the
 programmer has to zero the structure starpu_task and to fill the
 programmer has to zero the structure starpu_task and to fill the
 different fields properly. The indicated default values correspond to
 different fields properly. The indicated default values correspond to
 the configuration of a task allocated with starpu_task_create().
 the configuration of a task allocated with starpu_task_create().
+\var starpu_task::name
+Optional name of the task. This can be useful for debugging
+purposes.
 \var starpu_task::cl
 \var starpu_task::cl
 Is a pointer to the corresponding structure starpu_codelet. This
 Is a pointer to the corresponding structure starpu_codelet. This
 describes where the kernel should be executed, and supplies the
 describes where the kernel should be executed, and supplies the

BIN
doc/doxygen/chapters/data_trace.pdf


File diff suppressed because it is too large
+ 4283 - 0
doc/doxygen/chapters/distrib_data.eps


BIN
doc/doxygen/chapters/distrib_data.pdf


BIN
doc/doxygen/chapters/distrib_data.png


File diff suppressed because it is too large
+ 1613 - 0
doc/doxygen/chapters/distrib_data_histo.eps


BIN
doc/doxygen/chapters/distrib_data_histo.pdf


BIN
doc/doxygen/chapters/distrib_data_histo.png


+ 13 - 1
doc/doxygen/chapters/performance_feedback.doxy

@@ -430,12 +430,15 @@ When the field starpu_task::flops is set, <c>starpu_perfmodel_plot</c> can
 directly draw a GFlops curve, by simply adding the <c>-f</c> option:
 directly draw a GFlops curve, by simply adding the <c>-f</c> option:
 
 
 \verbatim
 \verbatim
-$ starpu_perfmodel_display -f -s chol_model_11
+$ starpu_perfmodel_plot -f -s chol_model_11
 \endverbatim
 \endverbatim
 
 
 This will however disable displaying the regression model, for which we can not
 This will however disable displaying the regression model, for which we can not
 compute GFlops.
 compute GFlops.
 
 
+\image html starpu_chol_model_11_type.png
+\image latex starpu_chol_model_11_type.eps "" width=\textwidth
+
 When the FxT trace file <c>filename</c> has been generated, it is possible to
 When the FxT trace file <c>filename</c> has been generated, it is possible to
 get a profiling of each codelet by calling:
 get a profiling of each codelet by calling:
 
 
@@ -448,6 +451,9 @@ This will create profiling data files, and a <c>.gp</c> file in the current
 directory, which draws the distribution of codelet time over the application
 directory, which draws the distribution of codelet time over the application
 execution, according to data input size.
 execution, according to data input size.
 
 
+\image html distrib_data.png
+\image latex distrib_data.eps "" width=\textwidth
+
 This is also available in the tool <c>starpu_perfmodel_plot</c>, by passing it
 This is also available in the tool <c>starpu_perfmodel_plot</c>, by passing it
 the fxt trace:
 the fxt trace:
 
 
@@ -458,6 +464,9 @@ $ starpu_perfmodel_plot -s non_linear_memset_regression_based -i /tmp/prof_file_
 It will produce a <c>.gp</c> file which contains both the performance model
 It will produce a <c>.gp</c> file which contains both the performance model
 curves, and the profiling measurements.
 curves, and the profiling measurements.
 
 
+\image html starpu_non_linear_memset_regression_based_2.png
+\image latex starpu_non_linear_memset_regression_based_2.eps "" width=\textwidth
+
 If you have the statistical tool <c>R</c> installed, you can additionally use
 If you have the statistical tool <c>R</c> installed, you can additionally use
 
 
 \verbatim
 \verbatim
@@ -467,6 +476,9 @@ $ starpu_codelet_histo_profile distrib.data
 Which will create one <c>.pdf</c> file per codelet and per input size, showing a
 Which will create one <c>.pdf</c> file per codelet and per input size, showing a
 histogram of the codelet execution time distribution.
 histogram of the codelet execution time distribution.
 
 
+\image html distrib_data_histo.png
+\image latex distrib_data_histo.eps "" width=\textwidth
+
 \section TheoreticalLowerBoundOnExecutionTime Theoretical Lower Bound On Execution Time
 \section TheoreticalLowerBoundOnExecutionTime Theoretical Lower Bound On Execution Time
 
 
 StarPU can record a trace of what tasks are needed to complete the
 StarPU can record a trace of what tasks are needed to complete the

File diff suppressed because it is too large
+ 1036 - 0
doc/doxygen/chapters/starpu_chol_model_11_type.eps


BIN
doc/doxygen/chapters/starpu_chol_model_11_type.pdf


BIN
doc/doxygen/chapters/starpu_chol_model_11_type.png


File diff suppressed because it is too large
+ 4460 - 0
doc/doxygen/chapters/starpu_non_linear_memset_regression_based_2.eps


BIN
doc/doxygen/chapters/starpu_non_linear_memset_regression_based_2.pdf


BIN
doc/doxygen/chapters/starpu_non_linear_memset_regression_based_2.png


+ 4 - 4
examples/interface/complex.c

@@ -93,11 +93,11 @@ int main(int argc, char **argv)
 	starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
 	starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
 	starpu_complex_data_register(&handle2, STARPU_MAIN_RAM, &copy_real, &copy_imaginary, 1);
 	starpu_complex_data_register(&handle2, STARPU_MAIN_RAM, &copy_real, &copy_imaginary, 1);
 
 
-	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1"), STARPU_R, handle1, 0);
+	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0);
 	if (ret == -ENODEV) goto end;
 	if (ret == -ENODEV) goto end;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 
 
-	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2"), STARPU_R, handle2, 0);
+	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0);
 	if (ret == -ENODEV) goto end;
 	if (ret == -ENODEV) goto end;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 
 
@@ -122,11 +122,11 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) goto end;
 	if (ret == -ENODEV) goto end;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 
 
-	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1"), STARPU_R, handle1, 0);
+	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0);
 	if (ret == -ENODEV) goto end;
 	if (ret == -ENODEV) goto end;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 
 
-	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2"), STARPU_R, handle2, 0);
+	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0);
 	if (ret == -ENODEV) goto end;
 	if (ret == -ENODEV) goto end;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 
 

+ 1 - 1
examples/interface/complex_interface.c

@@ -97,7 +97,7 @@ static void complex_free_data_on_node(void *data_interface, unsigned node)
 static size_t complex_get_size(starpu_data_handle_t handle)
 static size_t complex_get_size(starpu_data_handle_t handle)
 {
 {
 	size_t size;
 	size_t size;
-	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, 0);
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	size = complex_interface->nx * 2 * sizeof(double);
 	size = complex_interface->nx * 2 * sizeof(double);
 	return size;
 	return size;

+ 34 - 1
examples/stencil/Makefile.am

@@ -40,11 +40,44 @@ NVCCFLAGS += $(HWLOC_CFLAGS)
 
 
 endif
 endif
 
 
+#####################################
+# What to install and what to check #
+#####################################
+
+if STARPU_HAVE_WINDOWS
+check_PROGRAMS	=	$(STARPU_EXAMPLES)
+else
+check_PROGRAMS	=	$(LOADER) $(STARPU_EXAMPLES)
+endif
+
+TESTS		=	$(STARPU_EXAMPLES)
+
+if !STARPU_HAVE_WINDOWS
+## test loader program
+if !STARPU_CROSS_COMPILING
+LOADER			=	loader
+loader_CPPFLAGS 	= 	$(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
+LOADER_BIN		=	./$(LOADER)
+loader_SOURCES		=	../../tests/loader.c
+else
+LOADER			=
+LOADER_BIN		=	$(top_builddir)/tests/loader-cross.sh
+endif
+
+if STARPU_HAVE_AM111
+TESTS_ENVIRONMENT	=	top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)"
+LOG_COMPILER		=	$(LOADER_BIN)
+else
+TESTS_ENVIRONMENT	=	top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN)
+endif
+
+endif
+
 ###################
 ###################
 # stencil example #
 # stencil example #
 ###################
 ###################
 
 
-check_PROGRAMS =				\
+STARPU_EXAMPLES =				\
 	stencil
 	stencil
 
 
 examplebindir = $(libdir)/starpu/examples/stencil
 examplebindir = $(libdir)/starpu/examples/stencil

+ 3 - 13
examples/stencil/life_opencl.c

@@ -84,20 +84,10 @@ void opencl_life_free(void)
 void
 void
 opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter)
 opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter)
 {
 {
-	unsigned max_parallelism = 512;
-	unsigned threads_per_dim_x = max_parallelism;
-	while (threads_per_dim_x / 2 >= nx)
-		threads_per_dim_x /= 2;
-	unsigned threads_per_dim_y = max_parallelism / threads_per_dim_x;
-	while (threads_per_dim_y / 2 >= ny)
-		threads_per_dim_y /= 2;
 #if 0
 #if 0
-	unsigned threads_per_dim_z = 4;
-	size_t dimBlock[] = {threads_per_dim_x, threads_per_dim_y, threads_per_dim_z};
-	size_t dimGrid[] = {nx / threads_per_dim_x, ny / threads_per_dim_y, nz / threads_per_dim_z};
+	size_t dim[] = {nx, ny, nz};
 #else
 #else
-	size_t dimBlock[] = {threads_per_dim_x, threads_per_dim_y, 1};
-	size_t dimGrid[] = {((nx + threads_per_dim_x-1) / threads_per_dim_x)*threads_per_dim_x, ((ny + threads_per_dim_y-1) / threads_per_dim_y)*threads_per_dim_y, 1};
+	size_t dim[] = {nx, ny, 1};
 #endif
 #endif
 
 
   int devid,id;
   int devid,id;
@@ -119,7 +109,7 @@ opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int
   clSetKernelArg(kernel, 8, sizeof(iter), &iter);
   clSetKernelArg(kernel, 8, sizeof(iter), &iter);
 
 
   cl_event ev;
   cl_event ev;
-  clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dimGrid, dimBlock, 0, NULL, &ev);
+  clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, &ev);
   clWaitForEvents(1, &ev);
   clWaitForEvents(1, &ev);
   starpu_opencl_collect_stats(ev);
   starpu_opencl_collect_stats(ev);
   clReleaseEvent(ev);
   clReleaseEvent(ev);

+ 4 - 14
examples/stencil/shadow_opencl.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -81,20 +81,10 @@ void opencl_shadow_free(void)
 void
 void
 opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i)
 opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i)
 {
 {
-	unsigned max_parallelism = 512;
-	unsigned threads_per_dim_x = max_parallelism;
-	while (threads_per_dim_x / 2 >= nx)
-		threads_per_dim_x /= 2;
-	unsigned threads_per_dim_y = max_parallelism / threads_per_dim_x;
-	while (threads_per_dim_y / 2 >= ny)
-		threads_per_dim_y /= 2;
 #if 0
 #if 0
-	unsigned threads_per_dim_z = 4;
-	size_t dimBlock[] = {threads_per_dim_x, threads_per_dim_y, threads_per_dim_z};
-	size_t dimGrid[] = {nx / threads_per_dim_x, ny / threads_per_dim_y, nz / threads_per_dim_z};
+	size_t dim[] = {nx, ny, nz};
 #else
 #else
-	size_t dimBlock[] = {threads_per_dim_x, threads_per_dim_y, 1};
-	size_t dimGrid[] = {((nx + threads_per_dim_x-1) / threads_per_dim_x)*threads_per_dim_x, ((ny + threads_per_dim_y-1) / threads_per_dim_y)*threads_per_dim_y, 1};
+	size_t dim[] = {nx, ny, 1};
 #endif
 #endif
 
 
         int devid,id;
         int devid,id;
@@ -115,7 +105,7 @@ opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz,
         clSetKernelArg(kernel, 7, sizeof(i), &i);
         clSetKernelArg(kernel, 7, sizeof(i), &i);
 
 
         cl_event ev;
         cl_event ev;
-        cl_int err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dimGrid, dimBlock, 0, NULL, &ev);
+        cl_int err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, &ev);
         if (err != CL_SUCCESS)
         if (err != CL_SUCCESS)
                 STARPU_OPENCL_REPORT_ERROR(err);
                 STARPU_OPENCL_REPORT_ERROR(err);
         clWaitForEvents(1, &ev);
         clWaitForEvents(1, &ev);

+ 52 - 3
examples/stencil/stencil-blocks.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Université de Bordeaux 1
+ * Copyright (C) 2010, 2013  Université de Bordeaux 1
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -157,6 +157,12 @@ void create_blocks_array(unsigned _sizex, unsigned _sizey, unsigned _sizez, unsi
 	}
 	}
 }
 }
 
 
+void free_blocks_array()
+{
+	free(blocks);
+	free(block_sizes_z);
+}
+
 /*
 /*
  *	Initialization of the blocks
  *	Initialization of the blocks
  */
  */
@@ -265,9 +271,16 @@ static void allocate_block_on_node(starpu_data_handle_t *handleptr, TYPE **ptr,
 	starpu_block_data_register(handleptr, STARPU_MAIN_RAM, (uintptr_t)*ptr, nx, nx*ny, nx, ny, nz, sizeof(TYPE));
 	starpu_block_data_register(handleptr, STARPU_MAIN_RAM, (uintptr_t)*ptr, nx, nx*ny, nx, ny, nz, sizeof(TYPE));
 }
 }
 
 
+static void free_block_on_node(starpu_data_handle_t handleptr)
+{
+	void *ptr = (void *) starpu_block_get_local_ptr(handleptr);
+	starpu_data_unregister(handleptr);
+	starpu_free(ptr);
+}
+
 void display_memory_consumption(int rank)
 void display_memory_consumption(int rank)
 {
 {
-	fprintf(stderr, "%lu MB of memory were allocated on node %d\n", allocated/(1024*1024), rank);
+	FPRINTF(stderr, "%lu B of memory were allocated on node %d\n", allocated, rank);
 }
 }
 
 
 void allocate_memory_on_node(int rank)
 void allocate_memory_on_node(int rank)
@@ -280,7 +293,7 @@ void allocate_memory_on_node(int rank)
 		int node = block->mpi_node;
 		int node = block->mpi_node;
 
 
 		unsigned size_bz = block_sizes_z[bz];
 		unsigned size_bz = block_sizes_z[bz];
-	
+
 		/* Main blocks */
 		/* Main blocks */
 		if (node == rank)
 		if (node == rank)
 		{
 		{
@@ -322,6 +335,42 @@ void allocate_memory_on_node(int rank)
 	}
 	}
 }
 }
 
 
+void free_memory_on_node(int rank)
+{
+	unsigned bz;
+	for (bz = 0; bz < nbz; bz++)
+	{
+		struct block_description *block = get_block_description(bz);
+
+		int node = block->mpi_node;
+
+		unsigned size_bz = block_sizes_z[bz];
+
+		/* Main blocks */
+		if (node == rank)
+		{
+			free_block_on_node(block->layers_handle[0]);
+			free_block_on_node(block->layers_handle[1]);
+		}
+
+		/* Boundary blocks : Top */
+		int top_node = block->boundary_blocks[T]->mpi_node;
+		if ((node == rank) || (top_node == rank))
+		{
+			free_block_on_node(block->boundaries_handle[T][0]);
+			free_block_on_node(block->boundaries_handle[T][1]);
+		}
+
+		/* Boundary blocks : Bottom */
+		int bottom_node = block->boundary_blocks[B]->mpi_node;
+		if ((node == rank) || (bottom_node == rank))
+		{
+			free_block_on_node(block->boundaries_handle[B][0]);
+			free_block_on_node(block->boundaries_handle[B][1]);
+		}
+	}
+}
+
 /* check how many cells are alive */
 /* check how many cells are alive */
 void check(int rank)
 void check(int rank)
 {
 {

+ 16 - 13
examples/stencil/stencil-kernels.c

@@ -18,8 +18,6 @@
 #include "stencil.h"
 #include "stencil.h"
 #include <sys/time.h>
 #include <sys/time.h>
 
 
-#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
-
 #ifndef timersub
 #ifndef timersub
 #define	timersub(x, y, res) \
 #define	timersub(x, y, res) \
 	do \
 	do \
@@ -216,9 +214,9 @@ static void update_func_cuda(void *descr[], void *arg)
 	int workerid = starpu_worker_get_id();
 	int workerid = starpu_worker_get_id();
 	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
 	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
 	if (block->bz == 0)
 	if (block->bz == 0)
-fprintf(stderr,"!!! DO update_func_cuda z %d CUDA%d !!!\n", block->bz, workerid);
+		FPRINTF(stderr,"!!! DO update_func_cuda z %d CUDA%d !!!\n", block->bz, workerid);
 	else
 	else
-	DEBUG( "!!! DO update_func_cuda z %d CUDA%d !!!\n", block->bz, workerid);
+		DEBUG( "!!! DO update_func_cuda z %d CUDA%d !!!\n", block->bz, workerid);
 #ifdef STARPU_USE_MPI
 #ifdef STARPU_USE_MPI
 	int rank = 0;
 	int rank = 0;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -289,8 +287,8 @@ static void load_subblock_from_buffer_opencl(struct starpu_block_interface *bloc
 	size_t boundary_size = K*block->ldz*block->elemsize;
 	size_t boundary_size = K*block->ldz*block->elemsize;
 
 
 	unsigned offset = firstz*block->ldz;
 	unsigned offset = firstz*block->ldz;
-	cl_mem block_data = (cl_mem)block->ptr;
-	cl_mem boundary_data = (cl_mem)boundary->ptr;
+	cl_mem block_data = (cl_mem)block->dev_handle;
+	cl_mem boundary_data = (cl_mem)boundary->dev_handle;
 	cl_event event;
 	cl_event event;
 
 
         cl_command_queue cq;
         cl_command_queue cq;
@@ -311,9 +309,9 @@ static void update_func_opencl(void *descr[], void *arg)
 	int workerid = starpu_worker_get_id();
 	int workerid = starpu_worker_get_id();
 	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
 	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
 	if (block->bz == 0)
 	if (block->bz == 0)
-fprintf(stderr,"!!! DO update_func_opencl z %d OPENCL%d !!!\n", block->bz, workerid);
+		FPRINTF(stderr,"!!! DO update_func_opencl z %d OPENCL%d !!!\n", block->bz, workerid);
 	else
 	else
-	DEBUG( "!!! DO update_func_opencl z %d OPENCL%d !!!\n", block->bz, workerid);
+		DEBUG( "!!! DO update_func_opencl z %d OPENCL%d !!!\n", block->bz, workerid);
 #ifdef STARPU_USE_MPI
 #ifdef STARPU_USE_MPI
 	int rank = 0;
 	int rank = 0;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -351,7 +349,7 @@ fprintf(stderr,"!!! DO update_func_opencl z %d OPENCL%d !!!\n", block->bz, worke
 	for (i=1; i<=K; i++)
 	for (i=1; i<=K; i++)
 	{
 	{
 		struct starpu_block_interface *oldb = descr[i%2], *newb = descr[(i+1)%2];
 		struct starpu_block_interface *oldb = descr[i%2], *newb = descr[(i+1)%2];
-		TYPE *old = (void*) oldb->ptr, *newer = (void*) newb->ptr;
+		TYPE *old = (void*) oldb->dev_handle, *newer = (void*) newb->dev_handle;
 
 
 		/* Shadow data */
 		/* Shadow data */
 		opencl_shadow_host(block->bz, old, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i);
 		opencl_shadow_host(block->bz, old, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i);
@@ -361,15 +359,19 @@ fprintf(stderr,"!!! DO update_func_opencl z %d OPENCL%d !!!\n", block->bz, worke
 		opencl_life_update_host(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i);
 		opencl_life_update_host(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i);
 #else
 #else
 		cl_event event;
 		cl_event event;
-                clEnqueueCopyBuffer(cq, old, newer, 0, 0, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), 0, NULL, &event);
+                cl_int ret = clEnqueueCopyBuffer(cq, old, newer, 0, 0, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), 0, NULL, &event);
+		if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret);
+
 		clWaitForEvents(1, &event);
 		clWaitForEvents(1, &event);
 		clReleaseEvent(event);
 		clReleaseEvent(event);
 #endif /* LIFE */
 #endif /* LIFE */
 	}
 	}
 
 
+#ifndef LIFE
 	cl_int err;
 	cl_int err;
 	if ((err = clFinish(cq)))
 	if ((err = clFinish(cq)))
 		STARPU_OPENCL_REPORT_ERROR(err);
 		STARPU_OPENCL_REPORT_ERROR(err);
+#endif
 
 
 	if (block->bz == 0)
 	if (block->bz == 0)
 		starpu_top_update_data_integer(starpu_top_achieved_loop, ++achieved_iter);
 		starpu_top_update_data_integer(starpu_top_achieved_loop, ++achieved_iter);
@@ -526,14 +528,15 @@ static void load_subblock_into_buffer_opencl(struct starpu_block_interface *bloc
 	size_t boundary_size = K*block->ldz*block->elemsize;
 	size_t boundary_size = K*block->ldz*block->elemsize;
 
 
 	unsigned offset = firstz*block->ldz;
 	unsigned offset = firstz*block->ldz;
-	cl_mem block_data = (cl_mem)block->ptr;
-	cl_mem boundary_data = (cl_mem)boundary->ptr;
+	cl_mem block_data = (cl_mem)block->dev_handle;
+	cl_mem boundary_data = (cl_mem)boundary->dev_handle;
 
 
         cl_command_queue cq;
         cl_command_queue cq;
         starpu_opencl_get_current_queue(&cq);
         starpu_opencl_get_current_queue(&cq);
 	cl_event event;
 	cl_event event;
 
 
-        clEnqueueCopyBuffer(cq, block_data, boundary_data, offset, 0, boundary_size, 0, NULL, &event);
+        cl_int ret = clEnqueueCopyBuffer(cq, block_data, boundary_data, offset, 0, boundary_size, 0, NULL, &event);
+	if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret);
 
 
 	clWaitForEvents(1, &event);
 	clWaitForEvents(1, &event);
 	clReleaseEvent(event);
 	clReleaseEvent(event);

+ 28 - 23
examples/stencil/stencil-tasks.c

@@ -32,7 +32,7 @@
 #if 0
 #if 0
 # define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__)
 # define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__)
 #else
 #else
-# define DEBUG(fmt, ...) 
+# define DEBUG(fmt, ...)
 #endif
 #endif
 
 
 /*
 /*
@@ -65,7 +65,7 @@ static void create_task_save_local(unsigned iter, unsigned z, int dir, int local
 	int ret = starpu_task_submit(save_task);
 	int ret = starpu_task_submit(save_task);
 	if (ret)
 	if (ret)
 	{
 	{
-		fprintf(stderr, "Could not submit task save: %d\n", ret);
+		FPRINTF(stderr, "Could not submit task save: %d\n", ret);
 		STARPU_ABORT();
 		STARPU_ABORT();
 	}
 	}
 }
 }
@@ -179,7 +179,6 @@ void create_task_update(unsigned iter, unsigned z, int local_rank)
 	/* We are going to synchronize with the last tasks */
 	/* We are going to synchronize with the last tasks */
 	if (iter == niter)
 	if (iter == niter)
 	{
 	{
-		task->detach = 0;
 		task->use_tag = 1;
 		task->use_tag = 1;
 		task->tag_id = TAG_FINISH(z);
 		task->tag_id = TAG_FINISH(z);
 	}
 	}
@@ -207,7 +206,7 @@ void create_task_update(unsigned iter, unsigned z, int local_rank)
 	int ret = starpu_task_submit(task);
 	int ret = starpu_task_submit(task);
 	if (ret)
 	if (ret)
 	{
 	{
-		fprintf(stderr, "Could not submit task update block: %d\n", ret);
+		FPRINTF(stderr, "Could not submit task update block: %d\n", ret);
 		STARPU_ABORT();
 		STARPU_ABORT();
 	}
 	}
 }
 }
@@ -243,7 +242,7 @@ void create_start_task(int z, int dir)
 	int ret = starpu_task_submit(wait_init);
 	int ret = starpu_task_submit(wait_init);
 	if (ret)
 	if (ret)
 	{
 	{
-		fprintf(stderr, "Could not submit task initial wait: %d\n", ret);
+		FPRINTF(stderr, "Could not submit task initial wait: %d\n", ret);
 		STARPU_ABORT();
 		STARPU_ABORT();
 	}
 	}
 }
 }
@@ -269,23 +268,23 @@ void create_tasks(int rank)
 
 
 	for (iter = 0; iter <= niter; iter++)
 	for (iter = 0; iter <= niter; iter++)
 	{
 	{
-	for (bz = 0; bz < nbz; bz++)
-	{
-		if ((iter > 0) && (get_block_mpi_node(bz) == rank))
-			create_task_update(iter, bz, rank);
-
-	}
-	for (bz = 0; bz < nbz; bz++)
-	{
-		if (iter != niter)
-		{
-			if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
-				create_task_save(iter, bz, +1, rank);
-	
-			if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank))
-				create_task_save(iter, bz, -1, rank);
-		}
-	}
+	     for (bz = 0; bz < nbz; bz++)
+	     {
+		  if ((iter > 0) && (get_block_mpi_node(bz) == rank))
+			  create_task_update(iter, bz, rank);
+
+	     }
+	     for (bz = 0; bz < nbz; bz++)
+	     {
+		     if (iter != niter)
+		     {
+			     if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
+				     create_task_save(iter, bz, +1, rank);
+
+			     if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank))
+				     create_task_save(iter, bz, -1, rank);
+		     }
+	     }
 	}
 	}
 }
 }
 
 
@@ -308,7 +307,13 @@ void wait_end_tasks(int rank)
 			struct block_description *block = get_block_description(bz);
 			struct block_description *block = get_block_description(bz);
 			starpu_data_acquire(block->layers_handle[0], STARPU_R);
 			starpu_data_acquire(block->layers_handle[0], STARPU_R);
 			starpu_data_acquire(block->layers_handle[1], STARPU_R);
 			starpu_data_acquire(block->layers_handle[1], STARPU_R);
+			/* the data_acquire here is done to make sure
+			 * the data is sent back to the ram memory, we
+			 * can safely do a data_release, to avoid the
+			 * data_unregister to block later on
+			 */
+			starpu_data_release(block->layers_handle[0]);
+			starpu_data_release(block->layers_handle[1]);
 		}
 		}
 	}
 	}
 }
 }
-

+ 36 - 19
examples/stencil/stencil.c

@@ -23,10 +23,15 @@
 /* default parameter values */
 /* default parameter values */
 static unsigned  bind_tasks = 0;
 static unsigned  bind_tasks = 0;
 
 
-static unsigned niter = 32;
 static unsigned ticks = 1000;
 static unsigned ticks = 1000;
 
 
+#ifdef STARPU_QUICK_CHECK
+static unsigned niter = 4;
+#define SIZE 16
+#else
+static unsigned niter = 32;
 #define SIZE 128
 #define SIZE 128
+#endif
 
 
 /* Problem size */
 /* Problem size */
 static unsigned sizex = SIZE;
 static unsigned sizex = SIZE;
@@ -147,6 +152,15 @@ static void init_problem(int argc, char **argv, int rank, int world_size)
 	last_tick = (struct timeval *) calloc(nbz, sizeof(*last_tick));
 	last_tick = (struct timeval *) calloc(nbz, sizeof(*last_tick));
 }
 }
 
 
+static void free_problem(int rank)
+{
+     	free_memory_on_node(rank);
+	free_blocks_array();
+	free(who_runs_what);
+	free(who_runs_what_index);
+	free(last_tick);
+}
+
 /*
 /*
  *	Main body
  *	Main body
  */
  */
@@ -168,7 +182,7 @@ void f(unsigned task_per_worker[STARPU_NMAXWORKERS])
 		{
 		{
 			char name[32];
 			char name[32];
 			starpu_worker_get_name(worker, name, sizeof(name));
 			starpu_worker_get_name(worker, name, sizeof(name));
-			fprintf(stderr,"\t%s -> %d (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total);
+			FPRINTF(stderr,"\t%s -> %d (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total);
 		}
 		}
 	}
 	}
 }
 }
@@ -196,12 +210,12 @@ int main(int argc, char **argv)
 	int thread_support;
 	int thread_support;
 	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support))
 	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support))
 	{
 	{
-		fprintf(stderr, "MPI_Init_thread failed\n");
+		FPRINTF(stderr, "MPI_Init_thread failed\n");
 	}
 	}
 	if (thread_support == MPI_THREAD_FUNNELED)
 	if (thread_support == MPI_THREAD_FUNNELED)
-		fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n");
+		FPRINTF(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n");
 	if (thread_support < MPI_THREAD_FUNNELED)
 	if (thread_support < MPI_THREAD_FUNNELED)
-		fprintf(stderr,"Warning: MPI does not have thread support!\n");
+		FPRINTF(stderr,"Warning: MPI does not have thread support!\n");
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
 	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
 #else
 #else
@@ -211,7 +225,7 @@ int main(int argc, char **argv)
 
 
 	if (rank == 0)
 	if (rank == 0)
 	{
 	{
-		fprintf(stderr, "Running on %d nodes\n", world_size);
+		FPRINTF(stderr, "Running on %d nodes\n", world_size);
 		fflush(stderr);
 		fflush(stderr);
 	}
 	}
 
 
@@ -238,7 +252,7 @@ int main(int argc, char **argv)
 	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
 	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
 #endif
 #endif
 	if (rank == 0)
 	if (rank == 0)
-		fprintf(stderr, "GO !\n");
+		FPRINTF(stderr, "GO !\n");
 
 
 	gettimeofday(&start, NULL);
 	gettimeofday(&start, NULL);
 
 
@@ -290,6 +304,7 @@ int main(int argc, char **argv)
 	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
 	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
 
 
 	memcpy(who_runs_what, who_runs_what_tmp, nbz * who_runs_what_len * sizeof(*who_runs_what));
 	memcpy(who_runs_what, who_runs_what_tmp, nbz * who_runs_what_len * sizeof(*who_runs_what));
+	free(who_runs_what_tmp);
 
 
 	/* XXX we should do a gather instead, here we assume that non initialized values are still 0 */
 	/* XXX we should do a gather instead, here we assume that non initialized values are still 0 */
 	int *who_runs_what_index_tmp = malloc(nbz * sizeof(*who_runs_what_index));
 	int *who_runs_what_index_tmp = malloc(nbz * sizeof(*who_runs_what_index));
@@ -297,16 +312,17 @@ int main(int argc, char **argv)
 	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
 	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
 
 
 	memcpy(who_runs_what_index, who_runs_what_index_tmp, nbz * sizeof(*who_runs_what_index));
 	memcpy(who_runs_what_index, who_runs_what_index_tmp, nbz * sizeof(*who_runs_what_index));
+	free(who_runs_what_index_tmp);
 #endif
 #endif
 
 
 	if (rank == 0)
 	if (rank == 0)
 	{
 	{
 #if 1 
 #if 1 
-		fprintf(stderr, "update:\n");
+		FPRINTF(stderr, "update:\n");
 		f(update_per_worker);
 		f(update_per_worker);
-		fprintf(stderr, "top:\n");
+		FPRINTF(stderr, "top:\n");
 		f(top_per_worker);
 		f(top_per_worker);
-		fprintf(stderr, "bottom:\n");
+		FPRINTF(stderr, "bottom:\n");
 		f(bottom_per_worker);
 		f(bottom_per_worker);
 #endif
 #endif
 #if 1
 #if 1
@@ -320,20 +336,20 @@ int main(int argc, char **argv)
 			for (bz = 0; bz < nbz; bz++)
 			for (bz = 0; bz < nbz; bz++)
 			{
 			{
 				if ((bz % nzblocks_per_process) == 0)
 				if ((bz % nzblocks_per_process) == 0)
-					fprintf(stderr, "| ");
+					FPRINTF(stderr, "| ");
 
 
 				if (who_runs_what_index[bz] <= iter)
 				if (who_runs_what_index[bz] <= iter)
-					fprintf(stderr,"_ ");
+					FPRINTF(stderr,"_ ");
 				else
 				else
 				{
 				{
 					last = 0;
 					last = 0;
 					if (who_runs_what[bz + iter * nbz] == -1)
 					if (who_runs_what[bz + iter * nbz] == -1)
-						fprintf(stderr,"* ");
+						FPRINTF(stderr,"* ");
 					else
 					else
-						fprintf(stderr, "%d ", who_runs_what[bz + iter * nbz]);
+						FPRINTF(stderr, "%d ", who_runs_what[bz + iter * nbz]);
 				}
 				}
 			}
 			}
-			fprintf(stderr, "\n");
+			FPRINTF(stderr, "\n");
 
 
 			if (last)
 			if (last)
 				break;
 				break;
@@ -342,12 +358,13 @@ int main(int argc, char **argv)
 
 
 		fflush(stderr);
 		fflush(stderr);
 
 
-		fprintf(stdout, "Computation took: %f ms on %d MPI processes\n", max_timing/1000, world_size);
-		fprintf(stdout, "\tMIN : %f ms\n", min_timing/1000);
-		fprintf(stdout, "\tMAX : %f ms\n", max_timing/1000);
-		fprintf(stdout, "\tAVG : %f ms\n", sum_timing/(world_size*1000));
+		FPRINTF(stdout, "Computation took: %f ms on %d MPI processes\n", max_timing/1000, world_size);
+		FPRINTF(stdout, "\tMIN : %f ms\n", min_timing/1000);
+		FPRINTF(stdout, "\tMAX : %f ms\n", max_timing/1000);
+		FPRINTF(stdout, "\tAVG : %f ms\n", sum_timing/(world_size*1000));
 	}
 	}
 
 
+	free_problem(rank);
 	starpu_shutdown();
 	starpu_shutdown();
 
 
 #ifdef STARPU_USE_MPI
 #ifdef STARPU_USE_MPI

+ 4 - 0
examples/stencil/stencil.h

@@ -29,6 +29,8 @@
 #endif
 #endif
 #endif
 #endif
 
 
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
 #define LIFE
 #define LIFE
 
 
 #ifdef LIFE
 #ifdef LIFE
@@ -91,6 +93,7 @@ int MPI_TAG1(int z, int iter, int dir);
 #define MIN(a,b)	((a)<(b)?(a):(b))
 #define MIN(a,b)	((a)<(b)?(a):(b))
 
 
 void create_blocks_array(unsigned sizex, unsigned sizey, unsigned sizez, unsigned nbz);
 void create_blocks_array(unsigned sizex, unsigned sizey, unsigned sizez, unsigned nbz);
+void free_blocks_array();
 struct block_description *get_block_description(int z);
 struct block_description *get_block_description(int z);
 void assign_blocks_to_mpi_nodes(int world_size);
 void assign_blocks_to_mpi_nodes(int world_size);
 void allocate_memory_on_node(int rank);
 void allocate_memory_on_node(int rank);
@@ -98,6 +101,7 @@ void assign_blocks_to_workers(int rank);
 void create_tasks(int rank);
 void create_tasks(int rank);
 void wait_end_tasks(int rank);
 void wait_end_tasks(int rank);
 void check(int rank);
 void check(int rank);
+void free_memory_on_node(int rank);
 
 
 void display_memory_consumption(int rank);
 void display_memory_consumption(int rank);
 
 

+ 4 - 1
include/starpu_task.h

@@ -164,6 +164,8 @@ struct starpu_task
 	void *starpu_private;
 	void *starpu_private;
 	int magic;
 	int magic;
 
 
+	const char *name;
+
 	unsigned sched_ctx;
 	unsigned sched_ctx;
 	int hypervisor_tag;
 	int hypervisor_tag;
 	double flops;
 	double flops;
@@ -197,7 +199,8 @@ struct starpu_task
 	.flops = 0.0,					\
 	.flops = 0.0,					\
 	.scheduled = 0,					\
 	.scheduled = 0,					\
 	.dyn_handles = NULL,				\
 	.dyn_handles = NULL,				\
-	.dyn_interfaces = NULL				\
+	.dyn_interfaces = NULL,				\
+	.name = NULL                        		\
 }
 }
 
 
 #define STARPU_TASK_GET_HANDLE(task, i) ((task->dyn_handles) ? task->dyn_handles[i] : task->handles[i])
 #define STARPU_TASK_GET_HANDLE(task, i) ((task->dyn_handles) ? task->dyn_handles[i] : task->handles[i])

+ 3 - 3
mpi/examples/complex/mpi_complex.c

@@ -77,17 +77,17 @@ int main(int argc, char **argv)
 		{
 		{
 			int *compare_ptr = &compare;
 			int *compare_ptr = &compare;
 
 
-			starpu_task_insert(&cl_display, STARPU_VALUE, "node0 initial value", strlen("node0 initial value"), STARPU_R, handle, 0);
+			starpu_task_insert(&cl_display, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle, 0);
 			starpu_mpi_isend_detached(handle, 1, 10, MPI_COMM_WORLD, NULL, NULL);
 			starpu_mpi_isend_detached(handle, 1, 10, MPI_COMM_WORLD, NULL, NULL);
 			starpu_mpi_irecv_detached(handle2, 1, 20, MPI_COMM_WORLD, NULL, NULL);
 			starpu_mpi_irecv_detached(handle2, 1, 20, MPI_COMM_WORLD, NULL, NULL);
 
 
-			starpu_task_insert(&cl_display, STARPU_VALUE, "node0 received value", strlen("node0 received value"), STARPU_R, handle2, 0);
+			starpu_task_insert(&cl_display, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle2, 0);
 			starpu_task_insert(&cl_compare, STARPU_R, handle, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0);
 			starpu_task_insert(&cl_compare, STARPU_R, handle, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0);
 		}
 		}
 		else if (rank == 1)
 		else if (rank == 1)
 		{
 		{
 			starpu_mpi_irecv_detached(handle, 0, 10, MPI_COMM_WORLD, NULL, NULL);
 			starpu_mpi_irecv_detached(handle, 0, 10, MPI_COMM_WORLD, NULL, NULL);
-			starpu_task_insert(&cl_display, STARPU_VALUE, "node1 received value", strlen("node1 received value"), STARPU_R, handle, 0);
+			starpu_task_insert(&cl_display, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle, 0);
 			starpu_mpi_isend_detached(handle, 0, 20, MPI_COMM_WORLD, NULL, NULL);
 			starpu_mpi_isend_detached(handle, 0, 20, MPI_COMM_WORLD, NULL, NULL);
 		}
 		}
 
 

+ 3 - 3
mpi/src/starpu_mpi.c

@@ -941,8 +941,8 @@ static void _starpu_mpi_copy_cb(void* arg)
 	else
 	else
 	{
 	{
 		struct starpu_data_interface_ops *itf = starpu_data_get_interface_ops(args->copy_handle);
 		struct starpu_data_interface_ops *itf = starpu_data_get_interface_ops(args->copy_handle);
-		void* itf_src = starpu_data_get_interface_on_node(args->copy_handle,0);
-		void* itf_dst = starpu_data_get_interface_on_node(args->data_handle,0);
+		void* itf_src = starpu_data_get_interface_on_node(args->copy_handle, STARPU_MAIN_RAM);
+		void* itf_dst = starpu_data_get_interface_on_node(args->data_handle, STARPU_MAIN_RAM);
 
 
 		if (!itf->copy_methods->ram_to_ram)
 		if (!itf->copy_methods->ram_to_ram)
 		{
 		{
@@ -1380,7 +1380,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 						 */
 						 */
 						_STARPU_MPI_DEBUG(20, "Posting a receive for a data of size %d which has not yet been registered\n", (int)chandle->env->size);
 						_STARPU_MPI_DEBUG(20, "Posting a receive for a data of size %d which has not yet been registered\n", (int)chandle->env->size);
 						chandle->buffer = malloc(chandle->env->size);
 						chandle->buffer = malloc(chandle->env->size);
-						starpu_vector_data_register(&chandle->handle, 0, (uintptr_t) chandle->buffer, chandle->env->size, 1);
+						starpu_vector_data_register(&chandle->handle, STARPU_MAIN_RAM, (uintptr_t) chandle->buffer, chandle->env->size, 1);
 						add_chandle(chandle);
 						add_chandle(chandle);
 					}
 					}
 
 

+ 4 - 4
mpi/tests/datatypes.c

@@ -195,7 +195,7 @@ int main(int argc, char **argv)
 		{
 		{
 			float v = 42.12;
 			float v = 42.12;
 			starpu_data_handle_t variable_handle[2];
 			starpu_data_handle_t variable_handle[2];
-			starpu_variable_data_register(&variable_handle[0], 0, (uintptr_t)&v, sizeof(v));
+			starpu_variable_data_register(&variable_handle[0], STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(v));
 			starpu_variable_data_register(&variable_handle[1], -1, (uintptr_t)NULL, sizeof(v));
 			starpu_variable_data_register(&variable_handle[1], -1, (uintptr_t)NULL, sizeof(v));
 
 
 			send_recv_and_check(rank, 1, variable_handle[0], 0x42, variable_handle[1], 0x1337, &error, check_variable);
 			send_recv_and_check(rank, 1, variable_handle[0], 0x42, variable_handle[1], 0x1337, &error, check_variable);
@@ -208,7 +208,7 @@ int main(int argc, char **argv)
 			int vector[4] = {1, 2, 3, 4};
 			int vector[4] = {1, 2, 3, 4};
 			starpu_data_handle_t vector_handle[2];
 			starpu_data_handle_t vector_handle[2];
 
 
-			starpu_vector_data_register(&vector_handle[0], 0, (uintptr_t)vector, 4, sizeof(vector[0]));
+			starpu_vector_data_register(&vector_handle[0], STARPU_MAIN_RAM, (uintptr_t)vector, 4, sizeof(vector[0]));
 			starpu_vector_data_register(&vector_handle[1], -1, (uintptr_t)NULL, 4, sizeof(vector[0]));
 			starpu_vector_data_register(&vector_handle[1], -1, (uintptr_t)NULL, 4, sizeof(vector[0]));
 
 
 			send_recv_and_check(rank, 1, vector_handle[0], 0x43, vector_handle[1], 0x2337, &error, check_vector);
 			send_recv_and_check(rank, 1, vector_handle[0], 0x43, vector_handle[1], 0x2337, &error, check_vector);
@@ -232,7 +232,7 @@ int main(int argc, char **argv)
 				}
 				}
 			}
 			}
 
 
-			starpu_matrix_data_register(&matrix_handle[0], 0, (uintptr_t)matrix, nx, nx, ny, sizeof(char));
+			starpu_matrix_data_register(&matrix_handle[0], STARPU_MAIN_RAM, (uintptr_t)matrix, nx, nx, ny, sizeof(char));
 			starpu_matrix_data_register(&matrix_handle[1], -1, (uintptr_t)NULL, nx, nx, ny, sizeof(char));
 			starpu_matrix_data_register(&matrix_handle[1], -1, (uintptr_t)NULL, nx, nx, ny, sizeof(char));
 
 
 			send_recv_and_check(rank, 1, matrix_handle[0], 0x75, matrix_handle[1], 0x8555, &error, check_matrix);
 			send_recv_and_check(rank, 1, matrix_handle[0], 0x75, matrix_handle[1], 0x8555, &error, check_matrix);
@@ -260,7 +260,7 @@ int main(int argc, char **argv)
 				}
 				}
 			}
 			}
 
 
-			starpu_block_data_register(&block_handle[0], 0, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
+			starpu_block_data_register(&block_handle[0], STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
 			starpu_block_data_register(&block_handle[1], -1, (uintptr_t)NULL, nx, nx*ny, nx, ny, nz, sizeof(float));
 			starpu_block_data_register(&block_handle[1], -1, (uintptr_t)NULL, nx, nx*ny, nx, ny, nz, sizeof(float));
 
 
 			send_recv_and_check(rank, 1, block_handle[0], 0x73, block_handle[1], 0x8337, &error, check_block);
 			send_recv_and_check(rank, 1, block_handle[0], 0x73, block_handle[1], 0x8337, &error, check_block);

+ 2 - 2
mpi/tests/gather.c

@@ -49,7 +49,7 @@ int main(int argc, char **argv)
 			MPI_Status status;
 			MPI_Status status;
 
 
 			FPRINTF_MPI("receiving from node %d\n", n);
 			FPRINTF_MPI("receiving from node %d\n", n);
-			starpu_variable_data_register(&handle, 0, (uintptr_t)&var, sizeof(var));
+			starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
 			starpu_mpi_recv(handle, n, 42, MPI_COMM_WORLD, &status);
 			starpu_mpi_recv(handle, n, 42, MPI_COMM_WORLD, &status);
 			starpu_data_acquire(handle, STARPU_R);
 			starpu_data_acquire(handle, STARPU_R);
 			STARPU_ASSERT_MSG(var == n, "Received incorrect value <%d> from node <%d>\n", var, n);
 			STARPU_ASSERT_MSG(var == n, "Received incorrect value <%d> from node <%d>\n", var, n);
@@ -62,7 +62,7 @@ int main(int argc, char **argv)
 	{
 	{
 		FPRINTF_MPI("sending to node %d\n", 0);
 		FPRINTF_MPI("sending to node %d\n", 0);
 		var = rank;
 		var = rank;
-		starpu_variable_data_register(&handle, 0, (uintptr_t)&var, sizeof(var));
+		starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
 		starpu_mpi_send(handle, 0, 42, MPI_COMM_WORLD);
 		starpu_mpi_send(handle, 0, 42, MPI_COMM_WORLD);
 		starpu_data_unregister(handle);
 		starpu_data_unregister(handle);
 	}
 	}

+ 2 - 2
mpi/tests/gather2.c

@@ -50,7 +50,7 @@ int main(int argc, char **argv)
 
 
 			FPRINTF_MPI("receiving from node %d\n", n);
 			FPRINTF_MPI("receiving from node %d\n", n);
 			for(i=0 ; i<2 ; i++)
 			for(i=0 ; i<2 ; i++)
-				starpu_variable_data_register(&handle[i], 0, (uintptr_t)&var[i], sizeof(var[i]));
+				starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i]));
 
 
 			starpu_mpi_recv(handle[0], n, 42, MPI_COMM_WORLD, &status[0]);
 			starpu_mpi_recv(handle[0], n, 42, MPI_COMM_WORLD, &status[0]);
 			starpu_data_acquire(handle[0], STARPU_R);
 			starpu_data_acquire(handle[0], STARPU_R);
@@ -81,7 +81,7 @@ int main(int argc, char **argv)
 		var[1] = var[0] * 2;
 		var[1] = var[0] * 2;
 		var[2] = var[0] * 4;
 		var[2] = var[0] * 4;
 		for(i=0 ; i<3 ; i++)
 		for(i=0 ; i<3 ; i++)
-			starpu_variable_data_register(&handle[i], 0, (uintptr_t)&var[i], sizeof(var[i]));
+			starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i]));
 		starpu_mpi_send(handle[0], 0, 42, MPI_COMM_WORLD);
 		starpu_mpi_send(handle[0], 0, 42, MPI_COMM_WORLD);
 		starpu_mpi_send(handle[1], 0, 42, MPI_COMM_WORLD);
 		starpu_mpi_send(handle[1], 0, 42, MPI_COMM_WORLD);
 		starpu_mpi_send(handle[2], 0, 44, MPI_COMM_WORLD);
 		starpu_mpi_send(handle[2], 0, 44, MPI_COMM_WORLD);

+ 5 - 2
mpi/tests/insert_task_owner2.c

@@ -29,7 +29,7 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	//*x2 = 45;
 	//*x2 = 45;
 	//*y = 144;
 	//*y = 144;
 
 
-	FPRINTF(stderr, "-------> CODELET VALUES: %d %d %d %d\n", *x0, *x1, *x2, *y);
+	FPRINTF(stderr, "-------> CODELET VALUES: %d %d (x2) %d\n", *x0, *x1, *y);
 	*y = (*x0 + *x1) * 100;
 	*y = (*x0 + *x1) * 100;
 	*x1 = 12;
 	*x1 = 12;
 	*x2 = 24;
 	*x2 = 24;
@@ -113,7 +113,10 @@ int main(int argc, char **argv)
 		}
 		}
 		starpu_data_unregister(data_handles[i]);
 		starpu_data_unregister(data_handles[i]);
 	}
 	}
-        FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d %d %d\n", rank, values[0], values[1], values[2], values[3]);
+	if (rank == 0)
+	{
+		FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d %d %d\n", rank, values[0], values[1], values[2], values[3]);
+	}
         FPRINTF(stderr, "[%d][end] VALUES: %d %d %d %d\n", rank, x[0], x[1], x[2], y);
         FPRINTF(stderr, "[%d][end] VALUES: %d %d %d %d\n", rank, x[0], x[1], x[2], y);
 
 
 	free(values);
 	free(values);

+ 9 - 5
mpi/tests/insert_task_owner_data.c

@@ -86,13 +86,17 @@ int main(int argc, char **argv)
 		{
 		{
 			starpu_data_acquire(data_handles[i], STARPU_R);
 			starpu_data_acquire(data_handles[i], STARPU_R);
 			values[i] = *((int *)starpu_data_get_local_ptr(data_handles[i]));
 			values[i] = *((int *)starpu_data_get_local_ptr(data_handles[i]));
-			starpu_data_release(data_handles[i]);
-		}
+			starpu_data_release(data_handles[i]);		}
 	}
 	}
-	FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d\n", rank, values[0], values[1]);
 	ret = 0;
 	ret = 0;
-	if (rank == 0 && (values[0] != 12 || values[1] != 144))
-		ret = EXIT_FAILURE;
+	if (rank == 0)
+	{
+		FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d\n", rank, values[0], values[1]);
+		if (values[0] != 12 || values[1] != 144)
+		{
+			ret = EXIT_FAILURE;
+		}
+	}
 
 
 	starpu_data_unregister(data_handles[0]);
 	starpu_data_unregister(data_handles[0]);
 	starpu_data_unregister(data_handles[1]);
 	starpu_data_unregister(data_handles[1]);

+ 1 - 1
mpi/tests/user_defined_datatype_value.h

@@ -26,7 +26,7 @@ struct starpu_value_interface
 int *starpu_value_get(starpu_data_handle_t handle)
 int *starpu_value_get(starpu_data_handle_t handle)
 {
 {
 	struct starpu_value_interface *value_interface =
 	struct starpu_value_interface *value_interface =
-		(struct starpu_value_interface *) starpu_data_get_interface_on_node(handle, 0);
+		(struct starpu_value_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 	return value_interface->value;
 	return value_interface->value;
 }
 }
 
 

+ 2 - 3
socl/src/Makefile.am

@@ -132,6 +132,5 @@ libsocl_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 						\
   cl_getextensionfunctionaddress.c \
   cl_getextensionfunctionaddress.c \
   cl_icdgetplatformidskhr.c
   cl_icdgetplatformidskhr.c
 
 
-
-
-
+showcheck:
+	-cat /dev/null

+ 39 - 1
src/common/fxt.h

@@ -140,6 +140,12 @@
 #define _STARPU_FUT_START_UNPARTITION 0x5154
 #define _STARPU_FUT_START_UNPARTITION 0x5154
 #define _STARPU_FUT_END_UNPARTITION 0x5155
 #define _STARPU_FUT_END_UNPARTITION 0x5155
 
 
+#define	_STARPU_FUT_START_FREE		0x5156
+#define	_STARPU_FUT_END_FREE		0x5157
+
+#define	_STARPU_FUT_START_WRITEBACK	0x5158
+#define	_STARPU_FUT_END_WRITEBACK	0x5159
+
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
 #include <fxt/fxt.h>
 #include <fxt/fxt.h>
 #include <fxt/fut.h>
 #include <fxt/fut.h>
@@ -249,6 +255,22 @@ do {									\
     }									\
     }									\
 } while (0);
 } while (0);
 
 
+#ifndef FUT_RAW_PROBE7
+#define FUT_RAW_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do {		\
+		if(fut_active) {					\
+			unsigned long *args __attribute__((unused))=	\
+				fut_getstampedbuffer(CODE,		\
+						     FUT_SIZE(7)); \
+			*(args++)=(unsigned long)(P1);*(args++)=(unsigned long)(P2);*(args++)=(unsigned long)(P3);*(args++)=(unsigned long)(P4);*(args++)=(unsigned long)(P5);*(args++)=(unsigned long)(P6);*(args++)=(unsigned long)(P7);				\
+				}					\
+	} while (0)
+#endif
+
+#ifndef FUT_DO_PROBE7
+#define FUT_DO_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \
+        FUT_RAW_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \
+} while (0)
+#endif
 
 
 
 
 /* workerkind = _STARPU_FUT_CPU_KEY for instance */
 /* workerkind = _STARPU_FUT_CPU_KEY for instance */
@@ -278,7 +300,7 @@ do {									\
 do {									\
 do {									\
 	const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));	\
 	const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));	\
 	const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));\
 	const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));\
-	FUT_DO_PROBE5(_STARPU_FUT_END_CODELET_BODY, (job), (job_size), (job_hash), (archtype), _starpu_gettid());	\
+	FUT_DO_PROBE7(_STARPU_FUT_END_CODELET_BODY, (job), (job_size), (job_hash), (archtype)->type, (archtype)->devid, (archtype)->ncore, _starpu_gettid());	\
 } while(0);
 } while(0);
 
 
 #define _STARPU_TRACE_START_CALLBACK(job)	\
 #define _STARPU_TRACE_START_CALLBACK(job)	\
@@ -394,6 +416,18 @@ do {										\
 #define _STARPU_TRACE_END_ALLOC_REUSE(memnode)		\
 #define _STARPU_TRACE_END_ALLOC_REUSE(memnode)		\
 	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC_REUSE, memnode, _starpu_gettid());
 	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC_REUSE, memnode, _starpu_gettid());
 	
 	
+#define _STARPU_TRACE_START_FREE(memnode, size)		\
+	FUT_DO_PROBE3(_STARPU_FUT_START_FREE, memnode, _starpu_gettid(), size);
+	
+#define _STARPU_TRACE_END_FREE(memnode)		\
+	FUT_DO_PROBE2(_STARPU_FUT_END_FREE, memnode, _starpu_gettid());
+
+#define _STARPU_TRACE_START_WRITEBACK(memnode)		\
+	FUT_DO_PROBE2(_STARPU_FUT_START_WRITEBACK, memnode, _starpu_gettid());
+	
+#define _STARPU_TRACE_END_WRITEBACK(memnode)		\
+	FUT_DO_PROBE2(_STARPU_FUT_END_WRITEBACK, memnode, _starpu_gettid());
+
 #define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch)		\
 #define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch)		\
 	FUT_DO_PROBE3(_STARPU_FUT_START_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid());
 	FUT_DO_PROBE3(_STARPU_FUT_START_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid());
 	
 	
@@ -612,6 +646,10 @@ do {										\
 #define _STARPU_TRACE_END_ALLOC(memnode)		do {} while(0)
 #define _STARPU_TRACE_END_ALLOC(memnode)		do {} while(0)
 #define _STARPU_TRACE_START_ALLOC_REUSE(a, size)	do {} while(0)
 #define _STARPU_TRACE_START_ALLOC_REUSE(a, size)	do {} while(0)
 #define _STARPU_TRACE_END_ALLOC_REUSE(a)		do {} while(0)
 #define _STARPU_TRACE_END_ALLOC_REUSE(a)		do {} while(0)
+#define _STARPU_TRACE_START_FREE(memnode, size)	do {} while(0)
+#define _STARPU_TRACE_END_FREE(memnode)		do {} while(0)
+#define _STARPU_TRACE_START_WRITEBACK(memnode)	do {} while(0)
+#define _STARPU_TRACE_END_WRITEBACK(memnode)		do {} while(0)
 #define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch)	do {} while(0)
 #define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch)	do {} while(0)
 #define _STARPU_TRACE_END_MEMRECLAIM(memnode,is_prefetch)	do {} while(0)
 #define _STARPU_TRACE_END_MEMRECLAIM(memnode,is_prefetch)	do {} while(0)
 #define _STARPU_TRACE_START_PROGRESS(memnode)	do {} while(0)
 #define _STARPU_TRACE_START_PROGRESS(memnode)	do {} while(0)

+ 3 - 6
src/core/dependencies/implicit_data_deps.c

@@ -255,10 +255,9 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 					 * number of dependencies. */
 					 * number of dependencies. */
 					struct starpu_task *sync_task = starpu_task_create();
 					struct starpu_task *sync_task = starpu_task_create();
 					STARPU_ASSERT(sync_task);
 					STARPU_ASSERT(sync_task);
+					sync_task->name = "sync_task_redux";
 					sync_task->cl = NULL;
 					sync_task->cl = NULL;
-#ifdef STARPU_USE_FXT
-					_starpu_get_job_associated_to_task(sync_task)->model_name = "sync_task_redux";
-#endif
+
 					/* Make this task wait for the previous ones */
 					/* Make this task wait for the previous ones */
 					_starpu_add_sync_task(handle, sync_task, sync_task);
 					_starpu_add_sync_task(handle, sync_task, sync_task);
 					/* And the requested task wait for this one */
 					/* And the requested task wait for this one */
@@ -528,11 +527,9 @@ int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_d
 	{
 	{
 		struct starpu_task *sync_task, *new_task;
 		struct starpu_task *sync_task, *new_task;
 		sync_task = starpu_task_create();
 		sync_task = starpu_task_create();
+		sync_task->name = "sync_task_seq_cons";
 		sync_task->detach = 0;
 		sync_task->detach = 0;
 		sync_task->destroy = 1;
 		sync_task->destroy = 1;
-#ifdef STARPU_USE_FXT
-		_starpu_get_job_associated_to_task(sync_task)->model_name = "sync_task";
-#endif
 
 
 		/* It is not really a RW access, but we want to make sure that
 		/* It is not really a RW access, but we want to make sure that
 		 * all previous accesses are done */
 		 * all previous accesses are done */

+ 1 - 5
src/core/jobs.h

@@ -106,7 +106,7 @@ LIST_TYPE(_starpu_job,
 	 * with dot) */
 	 * with dot) */
         unsigned exclude_from_dag;
         unsigned exclude_from_dag;
 
 
-	/* Is that task internal to StarPU ? */
+	/* Is that task internal to StarPU? */
 	unsigned internal;
 	unsigned internal;
 
 
 	/* Each job is attributed a unique id. */
 	/* Each job is attributed a unique id. */
@@ -122,10 +122,6 @@ LIST_TYPE(_starpu_job,
 	 * local variable */
 	 * local variable */
 	struct timespec cl_start;
 	struct timespec cl_start;
 
 
-	/* A symbol name may be associated to the job directly for debug
-	 * purposes (for instance if the codelet is NULL). */
-        const char *model_name;
-
 	struct bound_task *bound_task;
 	struct bound_task *bound_task;
 
 
 	/* Number of workers executing that task (>1 if the task is parallel)
 	/* Number of workers executing that task (>1 if the task is parallel)

+ 7 - 6
src/core/perfmodel/perfmodel_history.c

@@ -35,7 +35,6 @@
 #include <windows.h>
 #include <windows.h>
 #endif
 #endif
 
 
-#define HISTORYMAXERROR	(STARPU_HISTORYMAXERROR > 100 ? 10 : STARPU_HISTORYMAXERROR)
 #define HASH_ADD_UINT32_T(head,field,add) HASH_ADD(hh,head,field,sizeof(uint32_t),add)
 #define HASH_ADD_UINT32_T(head,field,add) HASH_ADD(hh,head,field,sizeof(uint32_t),add)
 #define HASH_FIND_UINT32_T(head,find,out) HASH_FIND(hh,head,find,sizeof(uint32_t),out)
 #define HASH_FIND_UINT32_T(head,find,out) HASH_FIND(hh,head,find,sizeof(uint32_t),out)
 
 
@@ -262,6 +261,7 @@ static void parse_per_arch_model_file(FILE *f, struct starpu_perfmodel_per_arch
 			 * good-enough estimation */
 			 * good-enough estimation */
 			STARPU_HG_DISABLE_CHECKING(entry->nsample);
 			STARPU_HG_DISABLE_CHECKING(entry->nsample);
 			STARPU_HG_DISABLE_CHECKING(entry->mean);
 			STARPU_HG_DISABLE_CHECKING(entry->mean);
+			entry->nerror = 0;
 		}
 		}
 
 
 		scan_history_entry(f, entry);
 		scan_history_entry(f, entry);
@@ -625,7 +625,7 @@ static void initialize_model_with_file(FILE*f, struct starpu_perfmodel *model)
 		STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
 		STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
 
 
 		if(ndevice != 0)
 		if(ndevice != 0)
-			maxncore = malloc(sizeof((*maxncore)*ndevice));
+			maxncore = malloc(sizeof(*maxncore)*ndevice);
 		else
 		else
 			maxncore = NULL;
 			maxncore = NULL;
 
 
@@ -1067,7 +1067,6 @@ int starpu_perfmodel_list(FILE *output)
 int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
 int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
 {
 {
 	model->symbol = strdup(symbol);
 	model->symbol = strdup(symbol);
-	starpu_perfmodel_init(model);
 
 
 	/* where is the file if it exists ? */
 	/* where is the file if it exists ? */
 	char path[256];
 	char path[256];
@@ -1142,7 +1141,7 @@ char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
 
 
 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch* arch, char *archname, size_t maxlen,unsigned nimpl)
 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch* arch, char *archname, size_t maxlen,unsigned nimpl)
 {
 {
-	snprintf(archname, maxlen, "%s_%dncore_%dimpl_%u",
+	snprintf(archname, maxlen, "%s%d_ncore%d_impl%u",
 			starpu_perfmodel_get_archtype_name(arch->type),
 			starpu_perfmodel_get_archtype_name(arch->type),
 			arch->devid,
 			arch->devid,
 			arch->ncore,
 			arch->ncore,
@@ -1320,9 +1319,11 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 			{
 			{
 				/* There is already an entry with the same footprint */
 				/* There is already an entry with the same footprint */
 
 
-				double local_deviation = (measured/entry->mean)*100;
+				double local_deviation = measured/entry->mean;
 				
 				
-				if (entry->nsample && (local_deviation < (100 - HISTORYMAXERROR) || local_deviation > (100 + HISTORYMAXERROR)))
+				if (entry->nsample &&
+					(100 * local_deviation > (100 + STARPU_HISTORYMAXERROR)
+					 || (100 / local_deviation > (100 + STARPU_HISTORYMAXERROR))))
 				{
 				{
 					entry->nerror++;
 					entry->nerror++;
 
 

+ 19 - 0
src/core/perfmodel/perfmodel_nan.c

@@ -23,6 +23,24 @@
 #include <string.h>
 #include <string.h>
 #include <config.h>
 #include <config.h>
 #include <core/perfmodel/perfmodel.h>
 #include <core/perfmodel/perfmodel.h>
+#include <ctype.h>
+
+#ifdef STARPU_HAVE_WINDOWS
+static
+void _starpu_read_spaces(FILE *f)
+{
+	int c = getc(f);
+	if (isspace(c))
+	{
+		while (isspace(c)) c = getc(f);
+		ungetc(c, f);
+	}
+	else
+	{
+		ungetc(c, f);
+	}
+}
+#endif /* STARPU_HAVE_WINDOWS */
 
 
 int _starpu_read_double(FILE *f, char *format, double *val)
 int _starpu_read_double(FILE *f, char *format, double *val)
 {
 {
@@ -36,6 +54,7 @@ int _starpu_read_double(FILE *f, char *format, double *val)
 	     int x3 = getc(f);
 	     int x3 = getc(f);
 	     if (x2 == 'a' && x3 == 'n')
 	     if (x2 == 'a' && x3 == 'n')
 	     {
 	     {
+		     _starpu_read_spaces(f);
 		     *val = NAN;
 		     *val = NAN;
 		     return 1;
 		     return 1;
 	     }
 	     }

+ 32 - 10
src/core/sched_ctx.c

@@ -847,18 +847,40 @@ void _starpu_decrement_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id)
 			return;
 			return;
 		}
 		}
 		STARPU_PTHREAD_MUTEX_UNLOCK(&finished_submit_mutex);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&finished_submit_mutex);
-		/* FIXME: */
-		/* We also need to check for config->submitting = 0 (i.e. the
-		 * user calle starpu_drivers_request_termination()), in which
-		 * case we need to set config->running to 0 and wake workers,
-		 * so they can terminate, just like
-		 * starpu_drivers_request_termination() does.
-		 *
-		 * Set FIXME to 1 in tests/main/driver_api/run_driver.c to
-		 * check it is actually fixed.
-		 */
 	}
 	}
 
 
+	/* We also need to check for config->submitting = 0 (i.e. the
+	 * user calle starpu_drivers_request_termination()), in which
+	 * case we need to set config->running to 0 and wake workers,
+	 * so they can terminate, just like
+	 * starpu_drivers_request_termination() does.
+	 */
+
+	STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
+	if(config->submitting == 0)
+	{
+		STARPU_PTHREAD_RWLOCK_RDLOCK(&changing_ctx_mutex[sched_ctx_id]);
+		if(sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
+		{
+			if(sched_ctx->close_callback)
+				sched_ctx->close_callback(sched_ctx->id, sched_ctx->close_args);
+		}
+		STARPU_PTHREAD_RWLOCK_UNLOCK(&changing_ctx_mutex[sched_ctx_id]);
+
+		ANNOTATE_HAPPENS_AFTER(&config->running);
+		config->running = 0;
+		ANNOTATE_HAPPENS_BEFORE(&config->running);
+		int s;
+		for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
+		{
+			if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
+			{
+				_starpu_check_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id);
+			}
+		}
+	}
+	STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
+
 	return;
 	return;
 }
 }
 
 

+ 2 - 1
src/core/sched_policy.c

@@ -473,12 +473,13 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 #endif
 #endif
 
 
 	conversion_task = starpu_task_create();
 	conversion_task = starpu_task_create();
+	conversion_task->name = "conversion_task";
 	conversion_task->synchronous = 0;
 	conversion_task->synchronous = 0;
 	STARPU_TASK_SET_HANDLE(conversion_task, handle, 0);
 	STARPU_TASK_SET_HANDLE(conversion_task, handle, 0);
 
 
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_MIC) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_MIC) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
 	/* The node does not really matter here */
 	/* The node does not really matter here */
-	format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
+	format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 #endif
 #endif
 
 
 	_starpu_spin_lock(&handle->header_lock);
 	_starpu_spin_lock(&handle->header_lock);

+ 10 - 7
src/core/task.c

@@ -41,7 +41,6 @@
  * sure that no task remains !) */
  * sure that no task remains !) */
 /* TODO we could make this hierarchical to avoid contention ? */
 /* TODO we could make this hierarchical to avoid contention ? */
 //static starpu_pthread_cond_t submitted_cond = STARPU_PTHREAD_COND_INITIALIZER;
 //static starpu_pthread_cond_t submitted_cond = STARPU_PTHREAD_COND_INITIALIZER;
-static starpu_pthread_mutex_t submitted_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
 
 
 /* This key stores the task currently handled by the thread, note that we
 /* This key stores the task currently handled by the thread, note that we
  * cannot use the worker structure to store that information because it is
  * cannot use the worker structure to store that information because it is
@@ -84,6 +83,7 @@ void starpu_task_init(struct starpu_task *task)
 
 
 	task->dyn_handles = NULL;
 	task->dyn_handles = NULL;
 	task->dyn_interfaces = NULL;
 	task->dyn_interfaces = NULL;
+	task->name = NULL;
 }
 }
 
 
 /* Free all the ressources allocated for a task, without deallocating the task
 /* Free all the ressources allocated for a task, without deallocating the task
@@ -804,7 +804,7 @@ starpu_drivers_request_termination(void)
 {
 {
 	struct _starpu_machine_config *config = _starpu_get_machine_config();
 	struct _starpu_machine_config *config = _starpu_get_machine_config();
 
 
-	STARPU_PTHREAD_MUTEX_LOCK(&submitted_mutex);
+	STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
 	int nsubmitted = starpu_task_nsubmitted();
 	int nsubmitted = starpu_task_nsubmitted();
 	config->submitting = 0;
 	config->submitting = 0;
 	if (nsubmitted == 0)
 	if (nsubmitted == 0)
@@ -812,6 +812,7 @@ starpu_drivers_request_termination(void)
 		ANNOTATE_HAPPENS_AFTER(&config->running);
 		ANNOTATE_HAPPENS_AFTER(&config->running);
 		config->running = 0;
 		config->running = 0;
 		ANNOTATE_HAPPENS_BEFORE(&config->running);
 		ANNOTATE_HAPPENS_BEFORE(&config->running);
+		STARPU_WMB();
 		int s;
 		int s;
 		for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
 		for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
 		{
 		{
@@ -822,7 +823,7 @@ starpu_drivers_request_termination(void)
 		}
 		}
 	}
 	}
 
 
-	STARPU_PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
 }
 }
 
 
 int starpu_task_nsubmitted(void)
 int starpu_task_nsubmitted(void)
@@ -1018,16 +1019,16 @@ static void *watchdog_func(void *foo STARPU_ATTRIBUTE_UNUSED)
 	ts.tv_nsec = (timeout % 1000000) * 1000;
 	ts.tv_nsec = (timeout % 1000000) * 1000;
 	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
 	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
 	
 	
-	STARPU_PTHREAD_MUTEX_LOCK(&submitted_mutex);
+	STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
 	while (_starpu_machine_is_running())
 	while (_starpu_machine_is_running())
 	{
 	{
 		int last_nsubmitted = starpu_task_nsubmitted();
 		int last_nsubmitted = starpu_task_nsubmitted();
 		config->watchdog_ok = 0;
 		config->watchdog_ok = 0;
-		STARPU_PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
+		STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
 
 
 		_starpu_sleep(ts);
 		_starpu_sleep(ts);
 
 
-		STARPU_PTHREAD_MUTEX_LOCK(&submitted_mutex);
+		STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
 		if (!config->watchdog_ok && last_nsubmitted
 		if (!config->watchdog_ok && last_nsubmitted
 				&& last_nsubmitted == starpu_task_nsubmitted())
 				&& last_nsubmitted == starpu_task_nsubmitted())
 		{
 		{
@@ -1041,12 +1042,14 @@ static void *watchdog_func(void *foo STARPU_ATTRIBUTE_UNUSED)
 				fprintf(stderr,"Set the STARPU_WATCHDOG_CRASH environment variable if you want to abort the process in such a case\n");
 				fprintf(stderr,"Set the STARPU_WATCHDOG_CRASH environment variable if you want to abort the process in such a case\n");
 		}
 		}
 	}
 	}
-	STARPU_PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
 	return NULL;
 	return NULL;
 }
 }
 
 
 void _starpu_watchdog_init(void)
 void _starpu_watchdog_init(void)
 {
 {
+	struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
+	STARPU_PTHREAD_MUTEX_INIT(&config->submitted_mutex, NULL);
 	STARPU_PTHREAD_CREATE(&watchdog_thread, NULL, watchdog_func, NULL);
 	STARPU_PTHREAD_CREATE(&watchdog_thread, NULL, watchdog_func, NULL);
 }
 }
 
 

+ 4 - 4
src/core/workers.c

@@ -280,8 +280,6 @@ int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task,
 int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
 int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
 {
 {
 	/* TODO: check that the task operand sizes will fit on that device */
 	/* TODO: check that the task operand sizes will fit on that device */
-	/* TODO: call application-provided function for various cases like
-	 * double support, shared memory size limit, etc. */
 
 
 	struct starpu_codelet *cl = task->cl;
 	struct starpu_codelet *cl = task->cl;
 	unsigned nworkers = config.topology.nworkers;
 	unsigned nworkers = config.topology.nworkers;
@@ -290,7 +288,8 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
 	if (workerid < nworkers)
 	if (workerid < nworkers)
 	{
 	{
 		return !!((task->cl->where & config.workers[workerid].worker_mask) &&
 		return !!((task->cl->where & config.workers[workerid].worker_mask) &&
-				_starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
+				_starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl) &&
+				(!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl)));
 	}
 	}
 	else
 	else
 	{
 	{
@@ -311,7 +310,8 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
 			int worker_size = (int)config.combined_workers[workerid - nworkers].worker_size;
 			int worker_size = (int)config.combined_workers[workerid - nworkers].worker_size;
 			int worker0 = config.combined_workers[workerid - nworkers].combined_workerid[0];
 			int worker0 = config.combined_workers[workerid - nworkers].combined_workerid[0];
 			return !!((worker_size <= task->cl->max_parallelism) &&
 			return !!((worker_size <= task->cl->max_parallelism) &&
-				_starpu_can_use_nth_implementation(config.workers[worker0].arch, task->cl, nimpl));
+				_starpu_can_use_nth_implementation(config.workers[worker0].arch, task->cl, nimpl) &&
+				(!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl)));
 		}
 		}
 		else
 		else
 		{
 		{

+ 2 - 0
src/core/workers.h

@@ -314,6 +314,8 @@ struct _starpu_machine_config
 	unsigned submitting;
 	unsigned submitting;
 
 
 	int watchdog_ok;
 	int watchdog_ok;
+
+	starpu_pthread_mutex_t submitted_mutex;
 };
 };
 
 
 /* Three functions to manage argv, argc */
 /* Three functions to manage argv, argc */

+ 3 - 0
src/datawizard/copy_driver.c

@@ -495,6 +495,9 @@ int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT _starpu_driver_copy_data_1_to_1(starpu_d
 
 
 		_STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, size, com_id);
 		_STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, size, com_id);
 		ret_copy = copy_data_1_to_1_generic(handle, src_replicate, dst_replicate, req);
 		ret_copy = copy_data_1_to_1_generic(handle, src_replicate, dst_replicate, req);
+		if (!req)
+			/* Synchronous, this is already finished */
+			_STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, com_id);
 
 
 		return ret_copy;
 		return ret_copy;
 	}
 	}

+ 6 - 1
src/datawizard/datastats.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010, 2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -29,6 +29,7 @@ static unsigned miss_cnt[STARPU_MAXNODES];
 void _starpu_msi_cache_hit(unsigned node STARPU_ATTRIBUTE_UNUSED)
 void _starpu_msi_cache_hit(unsigned node STARPU_ATTRIBUTE_UNUSED)
 {
 {
 #ifdef STARPU_ENABLE_STATS
 #ifdef STARPU_ENABLE_STATS
+	STARPU_HG_DISABLE_CHECKING(hit_cnt[node]);
 	hit_cnt[node]++;
 	hit_cnt[node]++;
 #endif
 #endif
 }
 }
@@ -36,6 +37,7 @@ void _starpu_msi_cache_hit(unsigned node STARPU_ATTRIBUTE_UNUSED)
 void _starpu_msi_cache_miss(unsigned node STARPU_ATTRIBUTE_UNUSED)
 void _starpu_msi_cache_miss(unsigned node STARPU_ATTRIBUTE_UNUSED)
 {
 {
 #ifdef STARPU_ENABLE_STATS
 #ifdef STARPU_ENABLE_STATS
+	STARPU_HG_DISABLE_CHECKING(miss_cnt[node]);
 	miss_cnt[node]++;
 	miss_cnt[node]++;
 #endif
 #endif
 }
 }
@@ -81,6 +83,7 @@ static unsigned alloc_cache_hit_cnt[STARPU_MAXNODES];
 void _starpu_allocation_cache_hit(unsigned node STARPU_ATTRIBUTE_UNUSED)
 void _starpu_allocation_cache_hit(unsigned node STARPU_ATTRIBUTE_UNUSED)
 {
 {
 #ifdef STARPU_ENABLE_STATS
 #ifdef STARPU_ENABLE_STATS
+	STARPU_HG_DISABLE_CHECKING(alloc_cache_hit_cnt[node]);
 	alloc_cache_hit_cnt[node]++;
 	alloc_cache_hit_cnt[node]++;
 #endif
 #endif
 }
 }
@@ -88,6 +91,7 @@ void _starpu_allocation_cache_hit(unsigned node STARPU_ATTRIBUTE_UNUSED)
 void _starpu_data_allocation_inc_stats(unsigned node STARPU_ATTRIBUTE_UNUSED)
 void _starpu_data_allocation_inc_stats(unsigned node STARPU_ATTRIBUTE_UNUSED)
 {
 {
 #ifdef STARPU_ENABLE_STATS
 #ifdef STARPU_ENABLE_STATS
+	STARPU_HG_DISABLE_CHECKING(alloc_cnt[node]);
 	alloc_cnt[node]++;
 	alloc_cnt[node]++;
 #endif
 #endif
 }
 }
@@ -122,6 +126,7 @@ static size_t comm_amount[STARPU_MAXNODES][STARPU_MAXNODES];
 void _starpu_comm_amounts_inc(unsigned src  STARPU_ATTRIBUTE_UNUSED, unsigned dst  STARPU_ATTRIBUTE_UNUSED, size_t size  STARPU_ATTRIBUTE_UNUSED)
 void _starpu_comm_amounts_inc(unsigned src  STARPU_ATTRIBUTE_UNUSED, unsigned dst  STARPU_ATTRIBUTE_UNUSED, size_t size  STARPU_ATTRIBUTE_UNUSED)
 {
 {
 #ifdef STARPU_ENABLE_STATS
 #ifdef STARPU_ENABLE_STATS
+	STARPU_HG_DISABLE_CHECKING(comm_amount[src][dst]);
 	comm_amount[src][dst] += size;
 	comm_amount[src][dst] += size;
 #endif /* STARPU_ENABLE_STATS */
 #endif /* STARPU_ENABLE_STATS */
 }
 }

+ 2 - 0
src/datawizard/filters.c

@@ -312,6 +312,8 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin
 				.nbuffers = 1
 				.nbuffers = 1
 			};
 			};
 			struct starpu_task *task = starpu_task_create();
 			struct starpu_task *task = starpu_task_create();
+			task->name = "convert_data";
+
 			STARPU_TASK_SET_HANDLE(task, child_handle, 0);
 			STARPU_TASK_SET_HANDLE(task, child_handle, 0);
 			task->cl = &cl;
 			task->cl = &cl;
 			task->synchronous = 1;
 			task->synchronous = 1;

+ 8 - 8
src/datawizard/interfaces/bcsr_interface.c

@@ -142,7 +142,7 @@ static int bcsr_compare(void *data_interface_a, void *data_interface_b)
 uint32_t starpu_bcsr_get_nnz(starpu_data_handle_t handle)
 uint32_t starpu_bcsr_get_nnz(starpu_data_handle_t handle)
 {
 {
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return data_interface->nnz;
 	return data_interface->nnz;
 }
 }
@@ -150,7 +150,7 @@ uint32_t starpu_bcsr_get_nnz(starpu_data_handle_t handle)
 uint32_t starpu_bcsr_get_nrow(starpu_data_handle_t handle)
 uint32_t starpu_bcsr_get_nrow(starpu_data_handle_t handle)
 {
 {
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return data_interface->nrow;
 	return data_interface->nrow;
 }
 }
@@ -158,7 +158,7 @@ uint32_t starpu_bcsr_get_nrow(starpu_data_handle_t handle)
 uint32_t starpu_bcsr_get_firstentry(starpu_data_handle_t handle)
 uint32_t starpu_bcsr_get_firstentry(starpu_data_handle_t handle)
 {
 {
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return data_interface->firstentry;
 	return data_interface->firstentry;
 }
 }
@@ -166,7 +166,7 @@ uint32_t starpu_bcsr_get_firstentry(starpu_data_handle_t handle)
 uint32_t starpu_bcsr_get_r(starpu_data_handle_t handle)
 uint32_t starpu_bcsr_get_r(starpu_data_handle_t handle)
 {
 {
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return data_interface->r;
 	return data_interface->r;
 }
 }
@@ -174,7 +174,7 @@ uint32_t starpu_bcsr_get_r(starpu_data_handle_t handle)
 uint32_t starpu_bcsr_get_c(starpu_data_handle_t handle)
 uint32_t starpu_bcsr_get_c(starpu_data_handle_t handle)
 {
 {
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return data_interface->c;
 	return data_interface->c;
 }
 }
@@ -182,7 +182,7 @@ uint32_t starpu_bcsr_get_c(starpu_data_handle_t handle)
 size_t starpu_bcsr_get_elemsize(starpu_data_handle_t handle)
 size_t starpu_bcsr_get_elemsize(starpu_data_handle_t handle)
 {
 {
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return data_interface->elemsize;
 	return data_interface->elemsize;
 }
 }
@@ -204,7 +204,7 @@ uint32_t *starpu_bcsr_get_local_colind(starpu_data_handle_t handle)
 {
 {
 	/* XXX 0 */
 	/* XXX 0 */
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return data_interface->colind;
 	return data_interface->colind;
 }
 }
@@ -213,7 +213,7 @@ uint32_t *starpu_bcsr_get_local_rowptr(starpu_data_handle_t handle)
 {
 {
 	/* XXX 0 */
 	/* XXX 0 */
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return data_interface->rowptr;
 	return data_interface->rowptr;
 }
 }

+ 6 - 6
src/datawizard/interfaces/block_interface.c

@@ -194,7 +194,7 @@ static void display_block_interface(starpu_data_handle_t handle, FILE *f)
 {
 {
 	struct starpu_block_interface *block_interface;
 	struct starpu_block_interface *block_interface;
 
 
-	block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, 0);
+	block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	fprintf(f, "%u\t%u\t%u\t", block_interface->nx, block_interface->ny, block_interface->nz);
 	fprintf(f, "%u\t%u\t%u\t", block_interface->nx, block_interface->ny, block_interface->nz);
 }
 }
@@ -265,7 +265,7 @@ static size_t block_interface_get_size(starpu_data_handle_t handle)
 	size_t size;
 	size_t size;
 	struct starpu_block_interface *block_interface;
 	struct starpu_block_interface *block_interface;
 
 
-	block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, 0);
+	block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	size = block_interface->nx*block_interface->ny*block_interface->nz*block_interface->elemsize;
 	size = block_interface->nx*block_interface->ny*block_interface->nz*block_interface->elemsize;
 
 
@@ -276,7 +276,7 @@ static size_t block_interface_get_size(starpu_data_handle_t handle)
 uint32_t starpu_block_get_nx(starpu_data_handle_t handle)
 uint32_t starpu_block_get_nx(starpu_data_handle_t handle)
 {
 {
 	struct starpu_block_interface *block_interface = (struct starpu_block_interface *)
 	struct starpu_block_interface *block_interface = (struct starpu_block_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return block_interface->nx;
 	return block_interface->nx;
 }
 }
@@ -284,7 +284,7 @@ uint32_t starpu_block_get_nx(starpu_data_handle_t handle)
 uint32_t starpu_block_get_ny(starpu_data_handle_t handle)
 uint32_t starpu_block_get_ny(starpu_data_handle_t handle)
 {
 {
 	struct starpu_block_interface *block_interface = (struct starpu_block_interface *)
 	struct starpu_block_interface *block_interface = (struct starpu_block_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return block_interface->ny;
 	return block_interface->ny;
 }
 }
@@ -292,7 +292,7 @@ uint32_t starpu_block_get_ny(starpu_data_handle_t handle)
 uint32_t starpu_block_get_nz(starpu_data_handle_t handle)
 uint32_t starpu_block_get_nz(starpu_data_handle_t handle)
 {
 {
 	struct starpu_block_interface *block_interface = (struct starpu_block_interface *)
 	struct starpu_block_interface *block_interface = (struct starpu_block_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return block_interface->nz;
 	return block_interface->nz;
 }
 }
@@ -339,7 +339,7 @@ uintptr_t starpu_block_get_local_ptr(starpu_data_handle_t handle)
 size_t starpu_block_get_elemsize(starpu_data_handle_t handle)
 size_t starpu_block_get_elemsize(starpu_data_handle_t handle)
 {
 {
 	struct starpu_block_interface *block_interface = (struct starpu_block_interface *)
 	struct starpu_block_interface *block_interface = (struct starpu_block_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return block_interface->elemsize;
 	return block_interface->elemsize;
 }
 }

+ 3 - 3
src/datawizard/interfaces/coo_interface.c

@@ -151,7 +151,7 @@ coo_interface_get_size(starpu_data_handle_t handle)
 {
 {
 	struct starpu_coo_interface *coo_interface;
 	struct starpu_coo_interface *coo_interface;
 	coo_interface = (struct starpu_coo_interface *)
 	coo_interface = (struct starpu_coo_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return coo_interface->nx * coo_interface->ny * coo_interface->elemsize;
 	return coo_interface->nx * coo_interface->ny * coo_interface->elemsize;
 }
 }
@@ -161,7 +161,7 @@ coo_interface_footprint(starpu_data_handle_t handle)
 {
 {
 	struct starpu_coo_interface *coo_interface;
 	struct starpu_coo_interface *coo_interface;
 	coo_interface = (struct starpu_coo_interface *)
 	coo_interface = (struct starpu_coo_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return starpu_hash_crc32c_be(coo_interface->nx * coo_interface->ny, 0);
 	return starpu_hash_crc32c_be(coo_interface->nx * coo_interface->ny, 0);
 }
 }
@@ -185,7 +185,7 @@ display_coo_interface(starpu_data_handle_t handle, FILE *f)
 {
 {
 	struct starpu_coo_interface *coo_interface =
 	struct starpu_coo_interface *coo_interface =
 	coo_interface = (struct starpu_coo_interface *)
 	coo_interface = (struct starpu_coo_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	fprintf(f, "%u\t%u", coo_interface->nx, coo_interface->ny);
 	fprintf(f, "%u\t%u", coo_interface->nx, coo_interface->ny);
 }
 }

+ 4 - 4
src/datawizard/interfaces/csr_interface.c

@@ -127,7 +127,7 @@ static int csr_compare(void *data_interface_a, void *data_interface_b)
 uint32_t starpu_csr_get_nnz(starpu_data_handle_t handle)
 uint32_t starpu_csr_get_nnz(starpu_data_handle_t handle)
 {
 {
 	struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *)
 	struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return csr_interface->nnz;
 	return csr_interface->nnz;
 }
 }
@@ -135,7 +135,7 @@ uint32_t starpu_csr_get_nnz(starpu_data_handle_t handle)
 uint32_t starpu_csr_get_nrow(starpu_data_handle_t handle)
 uint32_t starpu_csr_get_nrow(starpu_data_handle_t handle)
 {
 {
 	struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *)
 	struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return csr_interface->nrow;
 	return csr_interface->nrow;
 }
 }
@@ -143,7 +143,7 @@ uint32_t starpu_csr_get_nrow(starpu_data_handle_t handle)
 uint32_t starpu_csr_get_firstentry(starpu_data_handle_t handle)
 uint32_t starpu_csr_get_firstentry(starpu_data_handle_t handle)
 {
 {
 	struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *)
 	struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return csr_interface->firstentry;
 	return csr_interface->firstentry;
 }
 }
@@ -151,7 +151,7 @@ uint32_t starpu_csr_get_firstentry(starpu_data_handle_t handle)
 size_t starpu_csr_get_elemsize(starpu_data_handle_t handle)
 size_t starpu_csr_get_elemsize(starpu_data_handle_t handle)
 {
 {
 	struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *)
 	struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return csr_interface->elemsize;
 	return csr_interface->elemsize;
 }
 }

+ 2 - 2
src/datawizard/interfaces/data_interface.c

@@ -365,7 +365,7 @@ void starpu_data_register(starpu_data_handle_t *handleptr, unsigned home_node,
 
 
 void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc)
 void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc)
 {
 {
-	void *local_interface = starpu_data_get_interface_on_node(handlesrc, 0);
+	void *local_interface = starpu_data_get_interface_on_node(handlesrc, STARPU_MAIN_RAM);
 	starpu_data_register(handledst, -1, local_interface, handlesrc->ops);
 	starpu_data_register(handledst, -1, local_interface, handlesrc->ops);
 }
 }
 
 
@@ -620,7 +620,7 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 			_STARPU_DEBUG("Conversion needed\n");
 			_STARPU_DEBUG("Conversion needed\n");
 			void *buffers[1];
 			void *buffers[1];
 			struct starpu_multiformat_interface *format_interface;
 			struct starpu_multiformat_interface *format_interface;
-			format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
+			format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 			struct starpu_codelet *cl = NULL;
 			struct starpu_codelet *cl = NULL;
 			enum starpu_node_kind node_kind = starpu_node_get_kind(handle->mf_node);
 			enum starpu_node_kind node_kind = starpu_node_get_kind(handle->mf_node);
 
 

+ 5 - 5
src/datawizard/interfaces/matrix_interface.c

@@ -195,7 +195,7 @@ static int matrix_compare(void *data_interface_a, void *data_interface_b)
 static void display_matrix_interface(starpu_data_handle_t handle, FILE *f)
 static void display_matrix_interface(starpu_data_handle_t handle, FILE *f)
 {
 {
 	struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *)
 	struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	fprintf(f, "%u\t%u\t", matrix_interface->nx, matrix_interface->ny);
 	fprintf(f, "%u\t%u\t", matrix_interface->nx, matrix_interface->ny);
 }
 }
@@ -253,7 +253,7 @@ static int unpack_matrix_handle(starpu_data_handle_t handle, unsigned node, void
 static size_t matrix_interface_get_size(starpu_data_handle_t handle)
 static size_t matrix_interface_get_size(starpu_data_handle_t handle)
 {
 {
 	struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *)
 	struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	size_t size;
 	size_t size;
 	size = (size_t)matrix_interface->nx*matrix_interface->ny*matrix_interface->elemsize;
 	size = (size_t)matrix_interface->nx*matrix_interface->ny*matrix_interface->elemsize;
@@ -265,7 +265,7 @@ static size_t matrix_interface_get_size(starpu_data_handle_t handle)
 uint32_t starpu_matrix_get_nx(starpu_data_handle_t handle)
 uint32_t starpu_matrix_get_nx(starpu_data_handle_t handle)
 {
 {
 	struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *)
 	struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return matrix_interface->nx;
 	return matrix_interface->nx;
 }
 }
@@ -273,7 +273,7 @@ uint32_t starpu_matrix_get_nx(starpu_data_handle_t handle)
 uint32_t starpu_matrix_get_ny(starpu_data_handle_t handle)
 uint32_t starpu_matrix_get_ny(starpu_data_handle_t handle)
 {
 {
 	struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *)
 	struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return matrix_interface->ny;
 	return matrix_interface->ny;
 }
 }
@@ -307,7 +307,7 @@ uintptr_t starpu_matrix_get_local_ptr(starpu_data_handle_t handle)
 size_t starpu_matrix_get_elemsize(starpu_data_handle_t handle)
 size_t starpu_matrix_get_elemsize(starpu_data_handle_t handle)
 {
 {
 	struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *)
 	struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return matrix_interface->elemsize;
 	return matrix_interface->elemsize;
 }
 }

+ 3 - 3
src/datawizard/interfaces/multiformat_interface.c

@@ -242,7 +242,7 @@ static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
 {
 {
 	struct starpu_multiformat_interface *multiformat_interface;
 	struct starpu_multiformat_interface *multiformat_interface;
 	multiformat_interface = (struct starpu_multiformat_interface *)
 	multiformat_interface = (struct starpu_multiformat_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	fprintf(f, "%u\t", multiformat_interface->nx);
 	fprintf(f, "%u\t", multiformat_interface->nx);
 }
 }
@@ -252,7 +252,7 @@ static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
 {
 {
 	size_t size;
 	size_t size;
 	struct starpu_multiformat_interface *multiformat_interface;
 	struct starpu_multiformat_interface *multiformat_interface;
-	multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
+	multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 	size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
 	size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
 	return size;
 	return size;
 }
 }
@@ -260,7 +260,7 @@ static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
 uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
 uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
 {
 {
 	struct starpu_multiformat_interface *multiformat_interface;
 	struct starpu_multiformat_interface *multiformat_interface;
-	multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
+	multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 	return multiformat_interface->nx;
 	return multiformat_interface->nx;
 }
 }
 
 

+ 3 - 3
src/datawizard/interfaces/variable_interface.c

@@ -135,7 +135,7 @@ static int variable_compare(void *data_interface_a, void *data_interface_b)
 static void display_variable_interface(starpu_data_handle_t handle, FILE *f)
 static void display_variable_interface(starpu_data_handle_t handle, FILE *f)
 {
 {
 	struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)
 	struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	fprintf(f, "%ld\t", (long)variable_interface->elemsize);
 	fprintf(f, "%ld\t", (long)variable_interface->elemsize);
 }
 }
@@ -174,7 +174,7 @@ static int unpack_variable_handle(starpu_data_handle_t handle, unsigned node, vo
 static size_t variable_interface_get_size(starpu_data_handle_t handle)
 static size_t variable_interface_get_size(starpu_data_handle_t handle)
 {
 {
 	struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)
 	struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return variable_interface->elemsize;
 	return variable_interface->elemsize;
 }
 }
@@ -191,7 +191,7 @@ uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle)
 
 
 size_t starpu_variable_get_elemsize(starpu_data_handle_t handle)
 size_t starpu_variable_get_elemsize(starpu_data_handle_t handle)
 {
 {
-	return STARPU_VARIABLE_GET_ELEMSIZE(starpu_data_get_interface_on_node(handle, 0));
+	return STARPU_VARIABLE_GET_ELEMSIZE(starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM));
 }
 }
 
 
 /* memory allocation/deallocation primitives for the variable interface */
 /* memory allocation/deallocation primitives for the variable interface */

+ 4 - 4
src/datawizard/interfaces/vector_interface.c

@@ -141,7 +141,7 @@ static int vector_compare(void *data_interface_a, void *data_interface_b)
 static void display_vector_interface(starpu_data_handle_t handle, FILE *f)
 static void display_vector_interface(starpu_data_handle_t handle, FILE *f)
 {
 {
 	struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *)
 	struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	fprintf(f, "%u\t", vector_interface->nx);
 	fprintf(f, "%u\t", vector_interface->nx);
 }
 }
@@ -181,7 +181,7 @@ static size_t vector_interface_get_size(starpu_data_handle_t handle)
 {
 {
 	size_t size;
 	size_t size;
 	struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *)
 	struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	size = vector_interface->nx*vector_interface->elemsize;
 	size = vector_interface->nx*vector_interface->elemsize;
 
 
@@ -192,7 +192,7 @@ static size_t vector_interface_get_size(starpu_data_handle_t handle)
 uint32_t starpu_vector_get_nx(starpu_data_handle_t handle)
 uint32_t starpu_vector_get_nx(starpu_data_handle_t handle)
 {
 {
 	struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *)
 	struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return vector_interface->nx;
 	return vector_interface->nx;
 }
 }
@@ -213,7 +213,7 @@ uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle)
 size_t starpu_vector_get_elemsize(starpu_data_handle_t handle)
 size_t starpu_vector_get_elemsize(starpu_data_handle_t handle)
 {
 {
 	struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *)
 	struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *)
-		starpu_data_get_interface_on_node(handle, 0);
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 
 
 	return vector_interface->elemsize;
 	return vector_interface->elemsize;
 }
 }

+ 6 - 2
src/datawizard/malloc.c

@@ -141,6 +141,7 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 
 
 			malloc_pinned_cl.where = STARPU_CUDA;
 			malloc_pinned_cl.where = STARPU_CUDA;
 			struct starpu_task *task = starpu_task_create();
 			struct starpu_task *task = starpu_task_create();
+			task->name = "cuda_malloc_pinned";
 			task->callback_func = NULL;
 			task->callback_func = NULL;
 			task->cl = &malloc_pinned_cl;
 			task->cl = &malloc_pinned_cl;
 			task->cl_arg = &s;
 			task->cl_arg = &s;
@@ -171,6 +172,7 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 //
 //
 //			malloc_pinned_cl.where = STARPU_OPENCL;
 //			malloc_pinned_cl.where = STARPU_OPENCL;
 //			struct starpu_task *task = starpu_task_create();
 //			struct starpu_task *task = starpu_task_create();
+//		        task->name = "opencl_malloc_pinned";
 //			task->callback_func = NULL;
 //			task->callback_func = NULL;
 //			task->cl = &malloc_pinned_cl;
 //			task->cl = &malloc_pinned_cl;
 //			task->cl_arg = &s;
 //			task->cl_arg = &s;
@@ -295,6 +297,7 @@ int starpu_free_flags(void *A, size_t dim, int flags)
 
 
 				free_pinned_cl.where = STARPU_CUDA;
 				free_pinned_cl.where = STARPU_CUDA;
 				struct starpu_task *task = starpu_task_create();
 				struct starpu_task *task = starpu_task_create();
+				task->name = "cuda_free_pinned";
 				task->callback_func = NULL;
 				task->callback_func = NULL;
 				task->cl = &free_pinned_cl;
 				task->cl = &free_pinned_cl;
 				task->cl_arg = A;
 				task->cl_arg = A;
@@ -319,6 +322,7 @@ int starpu_free_flags(void *A, size_t dim, int flags)
 //
 //
 //                free_pinned_cl.where = STARPU_OPENCL;
 //                free_pinned_cl.where = STARPU_OPENCL;
 //		struct starpu_task *task = starpu_task_create();
 //		struct starpu_task *task = starpu_task_create();
+//              task->name = "opencl_free_pinned";
 //		task->callback_func = NULL;
 //		task->callback_func = NULL;
 //		task->cl = &free_pinned_cl;
 //		task->cl = &free_pinned_cl;
 //		task->cl_arg = A;
 //		task->cl_arg = A;
@@ -501,7 +505,7 @@ _starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 			STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
 			STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
 			/* Sleep for the free */
 			/* Sleep for the free */
-			MSG_process_sleep(0.000125);
+			MSG_process_sleep(0.000750);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
 #else
 #else
 			cudaError_t err;
 			cudaError_t err;
@@ -518,7 +522,7 @@ _starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 			STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
 			STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
 			/* Sleep for the free */
 			/* Sleep for the free */
-			MSG_process_sleep(0.000125);
+			MSG_process_sleep(0.000750);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
 #else
 #else
 			cl_int err;
 			cl_int err;

+ 9 - 1
src/datawizard/memalloc.c

@@ -273,7 +273,9 @@ static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
 		}
 		}
 #endif
 #endif
 
 
+		_STARPU_TRACE_START_FREE(node, mc->size);
 		mc->ops->free_data_on_node(mc->chunk_interface, node);
 		mc->ops->free_data_on_node(mc->chunk_interface, node);
+		_STARPU_TRACE_END_FREE(node);
 
 
 		if (handle)
 		if (handle)
 			notify_handle_children(handle, replicate, node);
 			notify_handle_children(handle, replicate, node);
@@ -381,7 +383,9 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 				if (handle->per_node[node].state == STARPU_OWNER)
 				if (handle->per_node[node].state == STARPU_OWNER)
 					_starpu_memory_handle_stats_invalidated(handle, node);
 					_starpu_memory_handle_stats_invalidated(handle, node);
 #endif
 #endif
+				_STARPU_TRACE_START_WRITEBACK(node);
 				transfer_subtree_to_node(handle, node, target);
 				transfer_subtree_to_node(handle, node, target);
+				_STARPU_TRACE_END_WRITEBACK(node);
 #ifdef STARPU_MEMORY_STATS
 #ifdef STARPU_MEMORY_STATS
 				_starpu_memory_handle_stats_loaded_owner(handle, target);
 				_starpu_memory_handle_stats_loaded_owner(handle, target);
 #endif
 #endif
@@ -453,7 +457,9 @@ static unsigned try_to_reuse_mem_chunk(struct _starpu_mem_chunk *mc, unsigned no
 
 
 			/* in case there was nobody using that buffer, throw it
 			/* in case there was nobody using that buffer, throw it
 			 * away after writing it back to main memory */
 			 * away after writing it back to main memory */
+			_STARPU_TRACE_START_WRITEBACK(node);
 			transfer_subtree_to_node(old_data, node, 0);
 			transfer_subtree_to_node(old_data, node, 0);
+			_STARPU_TRACE_END_WRITEBACK(node);
 
 
 			/* now replace the previous data */
 			/* now replace the previous data */
 			reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list);
 			reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list);
@@ -670,7 +676,7 @@ size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t recl
 	{
 	{
 		static int warned;
 		static int warned;
 		if (!warned) {
 		if (!warned) {
-			_STARPU_DISP("Not enough memory left on node %u. Trying to purge %lu bytes out\n", node, (unsigned long) reclaim);
+			_STARPU_DISP("Not enough memory left on node %u. Trying to purge %lu bytes out. This message will not be printed again for further purges\n", node, (unsigned long) reclaim);
 			warned = 1;
 			warned = 1;
 		}
 		}
 	}
 	}
@@ -914,7 +920,9 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 	if (replicate->allocated)
 	if (replicate->allocated)
 	{
 	{
 		/* Argl, somebody allocated it in between already, drop this one */
 		/* Argl, somebody allocated it in between already, drop this one */
+		_STARPU_TRACE_START_FREE(dst_node, data_size);
 		handle->ops->free_data_on_node(data_interface, dst_node);
 		handle->ops->free_data_on_node(data_interface, dst_node);
+		_STARPU_TRACE_END_FREE(dst_node);
 		allocated_memory = 0;
 		allocated_memory = 0;
 	}
 	}
 	else
 	else

+ 3 - 0
src/datawizard/reduction.c

@@ -233,6 +233,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 					/* Perform the reduction between replicates i
 					/* Perform the reduction between replicates i
 					 * and i+step and put the result in replicate i */
 					 * and i+step and put the result in replicate i */
 					struct starpu_task *redux_task = starpu_task_create();
 					struct starpu_task *redux_task = starpu_task_create();
+					redux_task->name = "redux_task_between_replicates";
 
 
 					/* Mark these tasks so that StarPU does not block them
 					/* Mark these tasks so that StarPU does not block them
 					 * when they try to access the handle (normal tasks are
 					 * when they try to access the handle (normal tasks are
@@ -294,6 +295,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 		if (empty)
 		if (empty)
 		{
 		{
 			struct starpu_task *redux_task = starpu_task_create();
 			struct starpu_task *redux_task = starpu_task_create();
+			redux_task->name = "redux_task_empty";
 
 
 			/* Mark these tasks so that StarPU does not block them
 			/* Mark these tasks so that StarPU does not block them
 			 * when they try to access the handle (normal tasks are
 			 * when they try to access the handle (normal tasks are
@@ -320,6 +322,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 		for (replicate = 0; replicate < replicate_count; replicate++)
 		for (replicate = 0; replicate < replicate_count; replicate++)
 		{
 		{
 			struct starpu_task *redux_task = starpu_task_create();
 			struct starpu_task *redux_task = starpu_task_create();
+			redux_task->name = "redux_task_reduction";
 
 
 			/* Mark these tasks so that StarPU does not block them
 			/* Mark these tasks so that StarPU does not block them
 			 * when they try to access the handle (normal tasks are
 			 * when they try to access the handle (normal tasks are

+ 6 - 16
src/datawizard/user_interactions.c

@@ -143,20 +143,15 @@ int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t h
 	{
 	{
 		struct starpu_task *new_task;
 		struct starpu_task *new_task;
 		wrapper->pre_sync_task = starpu_task_create();
 		wrapper->pre_sync_task = starpu_task_create();
+		wrapper->pre_sync_task->name = "acquire_cb_pre";
 		wrapper->pre_sync_task->detach = 1;
 		wrapper->pre_sync_task->detach = 1;
 		wrapper->pre_sync_task->callback_func = starpu_data_acquire_cb_pre_sync_callback;
 		wrapper->pre_sync_task->callback_func = starpu_data_acquire_cb_pre_sync_callback;
 		wrapper->pre_sync_task->callback_arg = wrapper;
 		wrapper->pre_sync_task->callback_arg = wrapper;
 
 
 		wrapper->post_sync_task = starpu_task_create();
 		wrapper->post_sync_task = starpu_task_create();
+		wrapper->post_sync_task->name = "acquire_cb_post";
 		wrapper->post_sync_task->detach = 1;
 		wrapper->post_sync_task->detach = 1;
 
 
-#ifdef STARPU_USE_FXT
-                struct _starpu_job *job = _starpu_get_job_associated_to_task(wrapper->pre_sync_task);
-                job->model_name = "acquire_cb_pre";
-                job = _starpu_get_job_associated_to_task(wrapper->post_sync_task);
-                job->model_name = "acquire_cb_post";
-#endif
-
 		new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper->pre_sync_task, wrapper->post_sync_task, handle, mode);
 		new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper->pre_sync_task, wrapper->post_sync_task, handle, mode);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 
 
@@ -197,7 +192,7 @@ int starpu_data_acquire_cb(starpu_data_handle_t handle,
 int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle,
 int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle,
 						  enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency)
 						  enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency)
 {
 {
-	return starpu_data_acquire_on_node_cb_sequential_consistency(handle, 0, mode, callback, arg, sequential_consistency);
+	return starpu_data_acquire_on_node_cb_sequential_consistency(handle, STARPU_MAIN_RAM, mode, callback, arg, sequential_consistency);
 }
 }
 
 
 /*
 /*
@@ -265,18 +260,13 @@ int starpu_data_acquire_on_node(starpu_data_handle_t handle, unsigned node, enum
 	{
 	{
 		struct starpu_task *new_task;
 		struct starpu_task *new_task;
 		wrapper.pre_sync_task = starpu_task_create();
 		wrapper.pre_sync_task = starpu_task_create();
+		wrapper.pre_sync_task->name = "acquire_pre";
 		wrapper.pre_sync_task->detach = 0;
 		wrapper.pre_sync_task->detach = 0;
 
 
 		wrapper.post_sync_task = starpu_task_create();
 		wrapper.post_sync_task = starpu_task_create();
+		wrapper.post_sync_task->name = "acquire_post";
 		wrapper.post_sync_task->detach = 1;
 		wrapper.post_sync_task->detach = 1;
 
 
-#ifdef STARPU_USE_FXT
-                struct _starpu_job *job = _starpu_get_job_associated_to_task(wrapper.pre_sync_task);
-                job->model_name = "acquire_pre";
-                job = _starpu_get_job_associated_to_task(wrapper.post_sync_task);
-                job->model_name = "acquire_post";
-#endif
-
 		new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper.pre_sync_task, wrapper.post_sync_task, handle, mode);
 		new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper.pre_sync_task, wrapper.post_sync_task, handle, mode);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 		if (new_task)
 		if (new_task)
@@ -327,7 +317,7 @@ int starpu_data_acquire_on_node(starpu_data_handle_t handle, unsigned node, enum
 
 
 int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode)
 int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode)
 {
 {
-	return starpu_data_acquire_on_node(handle, 0, mode);
+	return starpu_data_acquire_on_node(handle, STARPU_MAIN_RAM, mode);
 }
 }
 
 
 /* This function must be called after starpu_data_acquire so that the
 /* This function must be called after starpu_data_acquire so that the

+ 65 - 14
src/debug/traces/starpu_fxt.c

@@ -101,7 +101,8 @@ static unsigned get_colour_symbol_blue(char *name)
 }
 }
 
 
 static double last_codelet_start[STARPU_NMAXWORKERS];
 static double last_codelet_start[STARPU_NMAXWORKERS];
-static char last_codelet_symbol[STARPU_NMAXWORKERS][128];
+/* _STARPU_FUT_DO_PROBE4STR records only 4 longs */
+static char last_codelet_symbol[STARPU_NMAXWORKERS][4*sizeof(unsigned long)];
 
 
 /* If more than a period of time has elapsed, we flush the profiling info,
 /* If more than a period of time has elapsed, we flush the profiling info,
  * otherwise they are accumulated everytime there is a new relevant event. */
  * otherwise they are accumulated everytime there is a new relevant event. */
@@ -109,6 +110,7 @@ static char last_codelet_symbol[STARPU_NMAXWORKERS][128];
 static double last_activity_flush_timestamp[STARPU_NMAXWORKERS];
 static double last_activity_flush_timestamp[STARPU_NMAXWORKERS];
 static double accumulated_sleep_time[STARPU_NMAXWORKERS];
 static double accumulated_sleep_time[STARPU_NMAXWORKERS];
 static double accumulated_exec_time[STARPU_NMAXWORKERS];
 static double accumulated_exec_time[STARPU_NMAXWORKERS];
+static double reclaiming[STARPU_MAXNODES];
 
 
 LIST_TYPE(_starpu_symbol_name,
 LIST_TYPE(_starpu_symbol_name,
 	char *name;
 	char *name;
@@ -514,9 +516,9 @@ static void create_paje_state_if_not_found(char *name, struct starpu_fxt_options
 	{
 	{
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
 		create_paje_state_color(name, "S", red, green, blue);
 		create_paje_state_color(name, "S", red, green, blue);
-		create_paje_state_color(name, "Ctx1", 255.0, 255.0, 0.0);
+		create_paje_state_color(name, "Ctx1", 255.0, 102.0, 255.0);
 		create_paje_state_color(name, "Ctx2", .0, 255.0, 0.0);
 		create_paje_state_color(name, "Ctx2", .0, 255.0, 0.0);
-		create_paje_state_color(name, "Ctx3", 75.0, .0, 130.0);
+		create_paje_state_color(name, "Ctx3", 255.0, 255.0, .0);
 		create_paje_state_color(name, "Ctx4", .0, 245.0, 255.0);
 		create_paje_state_color(name, "Ctx4", .0, 245.0, 255.0);
 		create_paje_state_color(name, "Ctx5", .0, .0, .0);
 		create_paje_state_color(name, "Ctx5", .0, .0, .0);
 		create_paje_state_color(name, "Ctx6", .0, .0, 128.0);
 		create_paje_state_color(name, "Ctx6", .0, .0, 128.0);
@@ -526,9 +528,9 @@ static void create_paje_state_if_not_found(char *name, struct starpu_fxt_options
 		create_paje_state_color(name, "Ctx10", 154.0, 205.0, 50.0);
 		create_paje_state_color(name, "Ctx10", 154.0, 205.0, 50.0);
 #else
 #else
 		fprintf(out_paje_file, "6	%s	S	%s	\"%f %f %f\" \n", name, name, red, green, blue);
 		fprintf(out_paje_file, "6	%s	S	%s	\"%f %f %f\" \n", name, name, red, green, blue);
-		fprintf(out_paje_file, "6	%s	Ctx1	%s	\"255.0 255.0 0.0\" \n", name, name);
+		fprintf(out_paje_file, "6	%s	Ctx1	%s	\"255.0 102.0 255.0\" \n", name, name);
 		fprintf(out_paje_file, "6	%s	Ctx2	%s	\".0 255.0 .0\" \n", name, name);
 		fprintf(out_paje_file, "6	%s	Ctx2	%s	\".0 255.0 .0\" \n", name, name);
-		fprintf(out_paje_file, "6	%s	Ctx3	%s	\"75.0 .0 130.0\" \n", name, name);
+		fprintf(out_paje_file, "6	%s	Ctx3	%s	\"225.0 225.0 .0\" \n", name, name);
 		fprintf(out_paje_file, "6	%s	Ctx4	%s	\".0 245.0 255.0\" \n", name, name);
 		fprintf(out_paje_file, "6	%s	Ctx4	%s	\".0 245.0 255.0\" \n", name, name);
 		fprintf(out_paje_file, "6	%s	Ctx5	%s	\".0 .0 .0\" \n", name, name);
 		fprintf(out_paje_file, "6	%s	Ctx5	%s	\".0 .0 .0\" \n", name, name);
 		fprintf(out_paje_file, "6	%s	Ctx6	%s	\".0 .0 128.0\" \n", name, name);
 		fprintf(out_paje_file, "6	%s	Ctx6	%s	\".0 .0 128.0\" \n", name, name);
@@ -587,7 +589,7 @@ static struct starpu_fxt_codelet_event *dumped_codelets;
 static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
 {
 	int worker;
 	int worker;
-	worker = find_worker_id(ev->param[4]);
+	worker = find_worker_id(ev->param[6]);
 	if (worker < 0) return;
 	if (worker < 0) return;
 
 
 	char *prefix = options->file_prefix;
 	char *prefix = options->file_prefix;
@@ -598,7 +600,7 @@ static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_opti
 	uint32_t codelet_hash = ev->param[2];
 	uint32_t codelet_hash = ev->param[2];
 
 
 	if (out_paje_file)
 	if (out_paje_file)
-		worker_set_state(end_codelet_time, prefix, ev->param[4], "B");
+		worker_set_state(end_codelet_time, prefix, ev->param[6], "B");
 
 
 	double codelet_length = (end_codelet_time - last_codelet_start[worker]);
 	double codelet_length = (end_codelet_time - last_codelet_start[worker]);
 
 
@@ -610,14 +612,14 @@ static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_opti
 
 
 	if (options->dumped_codelets)
 	if (options->dumped_codelets)
 	{
 	{
-		struct starpu_perfmodel_arch* arch = ev->param[3];
-
 		dumped_codelets_count++;
 		dumped_codelets_count++;
 		dumped_codelets = realloc(dumped_codelets, dumped_codelets_count*sizeof(struct starpu_fxt_codelet_event));
 		dumped_codelets = realloc(dumped_codelets, dumped_codelets_count*sizeof(struct starpu_fxt_codelet_event));
 
 
 		snprintf(dumped_codelets[dumped_codelets_count - 1].symbol, 256, "%s", last_codelet_symbol[worker]);
 		snprintf(dumped_codelets[dumped_codelets_count - 1].symbol, 256, "%s", last_codelet_symbol[worker]);
 		dumped_codelets[dumped_codelets_count - 1].workerid = worker;
 		dumped_codelets[dumped_codelets_count - 1].workerid = worker;
-		dumped_codelets[dumped_codelets_count - 1].arch = *arch;
+		dumped_codelets[dumped_codelets_count - 1].arch.type = ev->param[3];
+		dumped_codelets[dumped_codelets_count - 1].arch.devid = ev->param[4];
+		dumped_codelets[dumped_codelets_count - 1].arch.ncore = ev->param[5];
 
 
 		dumped_codelets[dumped_codelets_count - 1].size = codelet_size;
 		dumped_codelets[dumped_codelets_count - 1].size = codelet_size;
 		dumped_codelets[dumped_codelets_count - 1].hash = codelet_hash;
 		dumped_codelets[dumped_codelets_count - 1].hash = codelet_hash;
@@ -1466,15 +1468,64 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 				handle_memnode_event(&ev, options, "Ar");
 				handle_memnode_event(&ev, options, "Ar");
 				break;
 				break;
 
 
+			case _STARPU_FUT_END_ALLOC:
+			case _STARPU_FUT_END_ALLOC_REUSE:
+				if (!options->no_bus)
+				handle_memnode_event(&ev, options, "No");
+				break;
+
+			case _STARPU_FUT_START_FREE:
+				if (!options->no_bus)
+				{
+					handle_memnode_event(&ev, options, "F");
+				}
+				break;
+
+			case _STARPU_FUT_END_FREE:
+				if (!options->no_bus)
+				{
+					unsigned memnode = ev.param[0];
+					if (reclaiming[memnode])
+						handle_memnode_event(&ev, options, "R");
+					else
+						handle_memnode_event(&ev, options, "No");
+				}
+				break;
+
+			case _STARPU_FUT_START_WRITEBACK:
+				if (!options->no_bus)
+				{
+					handle_memnode_event(&ev, options, "W");
+				}
+				break;
+
+			case _STARPU_FUT_END_WRITEBACK:
+				if (!options->no_bus)
+				{
+					unsigned memnode = ev.param[0];
+					if (reclaiming[memnode])
+						handle_memnode_event(&ev, options, "R");
+					else
+						handle_memnode_event(&ev, options, "No");
+				}
+				break;
+
 			case _STARPU_FUT_START_MEMRECLAIM:
 			case _STARPU_FUT_START_MEMRECLAIM:
-				handle_memnode_event(&ev, options, "R");
+				if (!options->no_bus)
+				{
+					unsigned memnode = ev.param[0];
+					reclaiming[memnode] = 1;
+					handle_memnode_event(&ev, options, "R");
+				}
 				break;
 				break;
 
 
-			case _STARPU_FUT_END_ALLOC:
-			case _STARPU_FUT_END_ALLOC_REUSE:
 			case _STARPU_FUT_END_MEMRECLAIM:
 			case _STARPU_FUT_END_MEMRECLAIM:
 				if (!options->no_bus)
 				if (!options->no_bus)
-				handle_memnode_event(&ev, options, "No");
+				{
+					unsigned memnode = ev.param[0];
+					reclaiming[memnode] = 0;
+					handle_memnode_event(&ev, options, "No");
+				}
 				break;
 				break;
 
 
 			case _STARPU_FUT_USER_EVENT:
 			case _STARPU_FUT_USER_EVENT:

+ 6 - 2
src/debug/traces/starpu_paje.c

@@ -147,7 +147,9 @@ void _starpu_fxt_write_paje_header(FILE *file)
 	poti_DefineStateType("MS", "Mm", "Memory Node State");
 	poti_DefineStateType("MS", "Mm", "Memory Node State");
 	poti_DefineEntityValue("A", "MS", "Allocating", ".4 .1 .0");
 	poti_DefineEntityValue("A", "MS", "Allocating", ".4 .1 .0");
 	poti_DefineEntityValue("Ar", "MS", "AllocatingReuse", ".1 .1 .8");
 	poti_DefineEntityValue("Ar", "MS", "AllocatingReuse", ".1 .1 .8");
-	poti_DefineEntityValue("R", "MS", "Reclaiming", ".0 .1 .4");
+	poti_DefineEntityValue("F", "MS", "Freeing", ".6 .3 .0");
+	poti_DefineEntityValue("W", "MS", "WritingBack", ".0 .0 .4");
+	poti_DefineEntityValue("R", "MS", "Reclaiming", ".0 .1 .6");
 	poti_DefineEntityValue("Co", "MS", "DriverCopy", ".3 .5 .1");
 	poti_DefineEntityValue("Co", "MS", "DriverCopy", ".3 .5 .1");
 	poti_DefineEntityValue("CoA", "MS", "DriverCopyAsync", ".1 .3 .1");
 	poti_DefineEntityValue("CoA", "MS", "DriverCopyAsync", ".1 .3 .1");
 	poti_DefineEntityValue("No", "MS", "Nothing", ".0 .0 .0");
 	poti_DefineEntityValue("No", "MS", "Nothing", ".0 .0 .0");
@@ -256,7 +258,9 @@ void _starpu_fxt_write_paje_header(FILE *file)
 	fprintf(file, "\
 	fprintf(file, "\
 6       A       MS      Allocating         \".4 .1 .0\"		\n\
 6       A       MS      Allocating         \".4 .1 .0\"		\n\
 6       Ar       MS      AllocatingReuse       \".1 .1 .8\"		\n\
 6       Ar       MS      AllocatingReuse       \".1 .1 .8\"		\n\
-6       R       MS      Reclaiming         \".0 .1 .4\"		\n\
+6       F       MS      Freeing         \".6 .3 .0\"		\n\
+6       W       MS      WritingBack         \".0 .0 .4\"		\n\
+6       R       MS      Reclaiming         \".0 .1 .6\"		\n\
 6       Co       MS     DriverCopy         \".3 .5 .1\"		\n\
 6       Co       MS     DriverCopy         \".3 .5 .1\"		\n\
 6       CoA      MS     DriverCopyAsync         \".1 .3 .1\"		\n\
 6       CoA      MS     DriverCopyAsync         \".1 .3 .1\"		\n\
 6       No       MS     Nothing         \".0 .0 .0\"		\n\
 6       No       MS     Nothing         \".0 .0 .0\"		\n\

+ 1 - 0
src/sched_policies/fifo_queues.c

@@ -49,6 +49,7 @@ struct _starpu_fifo_taskq *_starpu_create_fifo(void)
 	/* note that not all mechanisms (eg. the semaphore) have to be used */
 	/* note that not all mechanisms (eg. the semaphore) have to be used */
 	starpu_task_list_init(&fifo->taskq);
 	starpu_task_list_init(&fifo->taskq);
 	fifo->ntasks = 0;
 	fifo->ntasks = 0;
+	STARPU_HG_DISABLE_CHECKING(fifo->ntasks);
 	fifo->nprocessed = 0;
 	fifo->nprocessed = 0;
 
 
 	fifo->exp_start = starpu_timing_now();
 	fifo->exp_start = starpu_timing_now();

+ 2 - 8
src/util/execute_on_all.c

@@ -64,6 +64,7 @@ void starpu_execute_on_specific_workers(void (*func)(void*), void * arg, unsigne
 	{
 	{
 		unsigned worker = workers[w];
 		unsigned worker = workers[w];
 		tasks[w] = starpu_task_create();
 		tasks[w] = starpu_task_create();
+		tasks[w]->name = name;
 
 
 		tasks[w]->cl = &wrapper_cl;
 		tasks[w]->cl = &wrapper_cl;
 		tasks[w]->cl_arg = &args;
 		tasks[w]->cl_arg = &args;
@@ -74,10 +75,6 @@ void starpu_execute_on_specific_workers(void (*func)(void*), void * arg, unsigne
 		tasks[w]->detach = 0;
 		tasks[w]->detach = 0;
 		tasks[w]->destroy = 0;
 		tasks[w]->destroy = 0;
 
 
-#ifdef STARPU_USE_FXT
-		_starpu_get_job_associated_to_task(tasks[w])->model_name = name;
-#endif
-
 		_starpu_exclude_task_from_dag(tasks[w]);
 		_starpu_exclude_task_from_dag(tasks[w]);
 
 
 		ret = starpu_task_submit(tasks[w]);
 		ret = starpu_task_submit(tasks[w]);
@@ -134,6 +131,7 @@ void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t
 	for (worker = 0; worker < nworkers; worker++)
 	for (worker = 0; worker < nworkers; worker++)
 	{
 	{
 		tasks[worker] = starpu_task_create();
 		tasks[worker] = starpu_task_create();
+		tasks[worker]->name = wrapper_cl.name;
 
 
 		tasks[worker]->cl = &wrapper_cl;
 		tasks[worker]->cl = &wrapper_cl;
 		tasks[worker]->cl_arg = &args;
 		tasks[worker]->cl_arg = &args;
@@ -144,10 +142,6 @@ void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t
 		tasks[worker]->detach = 0;
 		tasks[worker]->detach = 0;
 		tasks[worker]->destroy = 0;
 		tasks[worker]->destroy = 0;
 
 
-#ifdef STARPU_USE_FXT
-		_starpu_get_job_associated_to_task(tasks[worker])->model_name = wrapper_cl.name;
-#endif
-
 		_starpu_exclude_task_from_dag(tasks[worker]);
 		_starpu_exclude_task_from_dag(tasks[worker]);
 
 
 		ret = _starpu_task_submit_internally(tasks[worker]);
 		ret = _starpu_task_submit_internally(tasks[worker]);

+ 4 - 4
src/util/misc.c

@@ -38,11 +38,11 @@ const char *_starpu_job_get_model_name(struct _starpu_job *j)
 
 
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
 	if (task)
 	if (task)
+	{
 		ret = _starpu_codelet_get_model_name(task->cl);
 		ret = _starpu_codelet_get_model_name(task->cl);
+		if (!ret)
+			ret = task->name;
+	}
 
 
-#ifdef STARPU_USE_FXT
-	if (!ret)
-		ret = j->model_name;
-#endif
 	return ret;
 	return ret;
 }
 }

+ 2 - 1
src/util/starpu_create_sync_task.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010  Université de Bordeaux 1
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -30,6 +30,7 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 
 
 	/* We create an empty task */
 	/* We create an empty task */
 	struct starpu_task *sync_task = starpu_task_create();
 	struct starpu_task *sync_task = starpu_task_create();
+	sync_task->name = "create_sync_task";
 
 
 	sync_task->use_tag = 1;
 	sync_task->use_tag = 1;
 	sync_task->tag_id = sync_tag;
 	sync_task->tag_id = sync_tag;

+ 1 - 0
src/util/starpu_data_cpy.c

@@ -141,6 +141,7 @@ int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_h
 
 
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
 	STARPU_ASSERT(task);
 	STARPU_ASSERT(task);
+	task->name = "data_cpy";
 
 
 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
 	if (reduction)
 	if (reduction)

+ 2 - 0
src/util/starpu_task_insert.c

@@ -86,6 +86,7 @@ int _starpu_task_insert_v(struct starpu_codelet *cl, va_list varg_list)
 	}
 	}
 
 
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
+	task->name = "task_insert";
 	task->cl_arg_free = 1;
 	task->cl_arg_free = 1;
 
 
 	if (cl && cl->nbuffers > STARPU_NMAXBUFS)
 	if (cl && cl->nbuffers > STARPU_NMAXBUFS)
@@ -146,6 +147,7 @@ struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...)
 	}
 	}
 
 
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
+	task->name = "task_build";
 
 
 	if (cl && cl->nbuffers > STARPU_NMAXBUFS)
 	if (cl && cl->nbuffers > STARPU_NMAXBUFS)
 	{
 	{

+ 1 - 0
tests/Makefile.am

@@ -222,6 +222,7 @@ noinst_PROGRAMS =				\
 	parallel_tasks/parallel_kernels		\
 	parallel_tasks/parallel_kernels		\
 	parallel_tasks/parallel_kernels_spmd	\
 	parallel_tasks/parallel_kernels_spmd	\
 	parallel_tasks/spmd_peager		\
 	parallel_tasks/spmd_peager		\
+	parallel_tasks/cuda_only		\
 	perfmodels/regression_based		\
 	perfmodels/regression_based		\
 	perfmodels/non_linear_regression_based	\
 	perfmodels/non_linear_regression_based	\
 	perfmodels/feed				\
 	perfmodels/feed				\

+ 3 - 11
tests/main/driver_api/run_driver.c

@@ -32,11 +32,6 @@
  * - STARPU_TEST_SKIPPED (non-critical errors)
  * - STARPU_TEST_SKIPPED (non-critical errors)
  */
  */
 
 
-/* See FIXME in src/core/sched_ctx.c about starpu_drivers_request_termination.
- * This test should really use non-synchronous tasks, to properly cover all
- * needed cases. */
-#define FIXME 0
-
 #if defined(STARPU_USE_CPU) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
 #if defined(STARPU_USE_CPU) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
 static void
 static void
 dummy(void *buffers[], void *args)
 dummy(void *buffers[], void *args)
@@ -100,9 +95,8 @@ test_cpu(void)
 	cl.where = STARPU_CPU;
 	cl.where = STARPU_CPU;
 	task->cl = &cl;
 	task->cl = &cl;
 	task->cl_arg = &var;
 	task->cl_arg = &var;
-#if !FIXME
 	task->synchronous = 1;
 	task->synchronous = 1;
-#endif
+
 	ret = starpu_task_submit(task);
 	ret = starpu_task_submit(task);
 	if (ret == -ENODEV)
 	if (ret == -ENODEV)
 	{
 	{
@@ -159,9 +153,8 @@ test_cuda(void)
 	cl.where = STARPU_CUDA;
 	cl.where = STARPU_CUDA;
 	task->cl = &cl;
 	task->cl = &cl;
 	task->cl_arg = &var;
 	task->cl_arg = &var;
-#if !FIXME
 	task->synchronous = 1;
 	task->synchronous = 1;
-#endif
+
 	ret = starpu_task_submit(task);
 	ret = starpu_task_submit(task);
 	if (ret == -ENODEV)
 	if (ret == -ENODEV)
 	{
 	{
@@ -244,9 +237,8 @@ test_opencl(void)
 	cl.where = STARPU_OPENCL;
 	cl.where = STARPU_OPENCL;
 	task->cl = &cl;
 	task->cl = &cl;
 	task->cl_arg = &var;
 	task->cl_arg = &var;
-#if !FIXME
 	task->synchronous = 1;
 	task->synchronous = 1;
-#endif
+
 	ret = starpu_task_submit(task);
 	ret = starpu_task_submit(task);
 	if (ret == -ENODEV)
 	if (ret == -ENODEV)
 	{
 	{

+ 1 - 1
tests/microbenchs/prefetch_data_on_node.c

@@ -113,7 +113,7 @@ int main(int argc, char **argv)
 		{
 		{
 			/* synchronous prefetch */
 			/* synchronous prefetch */
 			unsigned node = starpu_worker_get_memory_node(worker);
 			unsigned node = starpu_worker_get_memory_node(worker);
-			ret = starpu_data_prefetch_on_node(v_handle, node, STARPU_MAIN_RAM);
+			ret = starpu_data_prefetch_on_node(v_handle, node, 0);
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_prefetch_on_node");
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_prefetch_on_node");
 
 
 			/* execute a task */
 			/* execute a task */

+ 110 - 0
tests/parallel_tasks/cuda_only.c

@@ -0,0 +1,110 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2013  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <config.h>
+#include <starpu.h>
+#include <limits.h>
+#include <unistd.h>
+#include "../helper.h"
+
+void codelet_null(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
+{
+}
+
+struct starpu_perfmodel model =
+{
+	.type = STARPU_HISTORY_BASED,
+	.symbol = "test"
+};
+
+static struct starpu_codelet cl =
+{
+	.cuda_funcs = {codelet_null, NULL},
+	.model = &model,
+	.nbuffers = 1,
+	.modes = {STARPU_R}
+};
+
+struct starpu_perfmodel model2 =
+{
+	.type = STARPU_HISTORY_BASED,
+	.symbol = "test2"
+};
+
+static struct starpu_codelet cl2 =
+{
+	.cuda_funcs = {codelet_null, NULL},
+	.model = &model2,
+	.nbuffers = 1,
+	.modes = {STARPU_W}
+};
+
+
+int main(int argc, char **argv)
+{
+	int ret;
+	starpu_data_handle_t handle;
+	unsigned data;
+
+        struct starpu_conf conf;
+	starpu_conf_init(&conf);
+	conf.sched_policy_name = "pheft";
+
+	ret = starpu_init(&conf);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&data, sizeof(data));
+
+	unsigned iter;
+	struct starpu_task *task;
+	for (iter = 0; iter < 100; iter++)
+	{
+		task = starpu_task_create();
+		task->cl = &cl;
+		task->handles[0] = handle;
+
+		ret = starpu_task_submit(task);
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
+		task = starpu_task_create();
+		task->cl = &cl2;
+		task->handles[0] = handle;
+
+		ret = starpu_task_submit(task);
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	}
+
+	ret = starpu_task_wait_for_all();
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
+
+	starpu_data_unregister(handle);
+	starpu_shutdown();
+
+	STARPU_RETURN(EXIT_SUCCESS);
+
+enodev:
+	task->destroy = 0;
+	starpu_task_destroy(task);
+	starpu_data_unregister(handle);
+	fprintf(stderr, "WARNING: No one can execute this task\n");
+	/* yes, we do not perform the computation but we did detect that no one
+ 	 * could perform the kernel, so this is not an error from StarPU */
+	starpu_shutdown();
+	STARPU_RETURN(STARPU_TEST_SKIPPED);
+}

+ 9 - 8
tests/perfmodels/value_nan.c

@@ -31,21 +31,22 @@ int _check_number(double val, int checknan)
 {
 {
 	char *tmp = "starpu_XXXXXX";
 	char *tmp = "starpu_XXXXXX";
 	char filename[100];
 	char filename[100];
-	int id;
 
 
 	strcpy(filename, tmp);
 	strcpy(filename, tmp);
+
 #ifdef STARPU_HAVE_WINDOWS
 #ifdef STARPU_HAVE_WINDOWS
         _mktemp(filename);
         _mktemp(filename);
-        id = open(filename, _O_RDWR);
 #else
 #else
-	id = mkstemp(filename);
-
-#endif
-	/* fail */
-	if (id < 0)
 	{
 	{
-		return 1;
+	     int id = mkstemp(filename);
+	     /* fail */
+	     if (id < 0)
+	     {
+		  FPRINTF(stderr, "Error when creating temp file\n");
+		  return 1;
+	     }
 	}
 	}
+#endif
 
 
 	/* write the double value in the file followed by a predefined string */
 	/* write the double value in the file followed by a predefined string */
 	FILE *f = fopen(filename, "w");
 	FILE *f = fopen(filename, "w");

+ 14 - 6
tools/gdbinit

@@ -30,10 +30,8 @@ define starpu-print-job
     printf "\tsubmitted:\t\t\t<%d>\n", $job->submitted
     printf "\tsubmitted:\t\t\t<%d>\n", $job->submitted
     printf "\tterminated:\t\t\t<%d>\n", $job->terminated
     printf "\tterminated:\t\t\t<%d>\n", $job->terminated
     printf "\tjob_id:\t\t\t\t<%d>\n", $job->job_id
     printf "\tjob_id:\t\t\t\t<%d>\n", $job->job_id
-    if _starpu_use_fxt == 1
-      if $job->model_name
-        printf "\tmodel_name:\t\t\t<%s>\n", $job->model_name
-      end
+    if $job->task
+        printf "\tname:\t\t\t\t<%s>\n", $job->task->name
     end
     end
   end
   end
 end
 end
@@ -73,6 +71,7 @@ define starpu-print-task
   end
   end
 
 
   printf "StarPU Task (%p)\n", $task
   printf "StarPU Task (%p)\n", $task
+  printf "\tname:\t\t\t\t<%s>\n", $task->name
   printf "\tcodelet:\t\t\t<%p>\n", $task->cl
   printf "\tcodelet:\t\t\t<%p>\n", $task->cl
   printf "\tcallback:\t\t\t<%p>\n", $task->callback_func
   printf "\tcallback:\t\t\t<%p>\n", $task->callback_func
   printf "\tsynchronous:\t\t\t<%d>\n", $task->synchronous
   printf "\tsynchronous:\t\t\t<%d>\n", $task->synchronous
@@ -137,8 +136,17 @@ define starpu-tags
 end
 end
 
 
 define starpu-tasks
 define starpu-tasks
-  printf "%d submitted tasks\n", nsubmitted
-  printf "%d ready tasks\n", nready
+  set language c
+  set $num=0
+  set $nsubmitted=0
+  set $nready=0
+  while $num<config->topology->nsched_ctxs
+  	set $nsubmitted = $nsubmitted + config->sched_ctxs[$num]->tasks_barrier->barrier->reached_start 
+  	set $nready = $nready + config->sched_ctxs[$num]->ready_tasks_barrier->barrier->reached_start
+	set $num = $num + 1
+  end
+  printf "%d submitted tasks\n", $nsubmitted
+  printf "%d ready tasks\n", $nready
   printf "Tasks being run:\n"
   printf "Tasks being run:\n"
   set $n = 0
   set $n = 0
   while $n < config.topology.nworkers
   while $n < config.topology.nworkers

+ 0 - 75
tools/model.sh

@@ -1,75 +0,0 @@
-#!/bin/bash
-
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-# 
-# Copyright (C) 2008, 2009, 2010  Université de Bordeaux 1
-# Copyright (C) 2010  Centre National de la Recherche Scientifique
-# 
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-# 
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# 
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-
-trace_model()
-{
-	inputfile=$1
-	
-	cpuentries=`head -1 $inputfile`
-	gpuentries=`head -2 $inputfile|tail -1`
-	
-	cpumodel=`head -3 $inputfile|tail -1`
-	gpumodel=`head -4 $inputfile|tail -1`
-	
-	a_cpu=`cut -f 1 $inputfile| head -5|tail -1`
-	b_cpu=`cut -f 2 $inputfile| head -5|tail -1`
-	c_cpu=`cut -f 3 $inputfile| head -5|tail -1`
-	
-	a_gpu=`cut -f 1 $inputfile| head -6|tail -1`
-	b_gpu=`cut -f 2 $inputfile| head -6|tail -1`
-	c_gpu=`cut -f 3 $inputfile| head -6|tail -1`
-
-	alpha_cpu=`cut -f 5 $inputfile| head -3|tail -1` 
-	alpha_gpu=`cut -f 5 $inputfile| head -4|tail -1` 
-	
-	beta_cpu=`cut -f 6 $inputfile| head -3|tail -1` 
-	beta_gpu=`cut -f 6 $inputfile| head -4|tail -1` 
-	
-	tail -$(($gpuentries + $cpuentries)) $inputfile | head -$(($cpuentries)) |cut -f 2-4 > $inputfile.cpu
-	tail -$(($gpuentries)) $inputfile | cut -f 2-4> $inputfile.gpu
-	
-	echo "pouet $cpuentries gpu $gpuentries toot"
-	
-	echo "cpumodel $alpha_cpu * size ^ $beta_cpu"
-	echo "gpumodel $alpha_gpu * size ^ $beta_gpu"
-	
-	gpfile=$inputfile.gp
-	
-	echo "#!/usr/bin/gnuplot -persist" 		> $gpfile
-	echo "set term postscript eps enhanced color" 	>> $gpfile
-	echo "set logscale x"				>> $gpfile 
-	echo "set logscale y"				>> $gpfile 
-	echo "set key left top"				>> $gpfile 
-	echo "set title \"$inputfile\""			>> $gpfile 
-	echo "set output \"$inputfile.eps\""		>> $gpfile
-	
-	echo  "plot	$alpha_gpu*x**$beta_gpu title \"GPU regression\" ,\\" >> $gpfile
-	echo  "	\"$inputfile.gpu\" with errorbar title \"GPU measured\" ,\\" >> $gpfile
-	echo  "	$c_gpu + exp(log($a_gpu) + $b_gpu * log(x) ) title \"GPU regression (non linear)\" ,\\" >> $gpfile
-	echo  "	\"$inputfile.cpu\" with errorbar title \"CPU measured\" ,\\" >> $gpfile
-	echo  "	$alpha_cpu*x**$beta_cpu title \"CPU regression\" ,\\" >> $gpfile
-	echo  "	$c_cpu + exp(log($a_cpu) + $b_cpu * log(x) ) title \"CPU regression (non linear)\"" >> $gpfile
-	
-	gnuplot $gpfile
-}
-
-for file in $@
-do
-	trace_model "$file"
-done

+ 1 - 1
tools/starpu_codelet_histo_profile.in

@@ -64,7 +64,7 @@ size <- unique(mytable[,3])
 
 
 pdf(paste("$inputfile", codelet, arch, hash, size, "pdf", sep="."));
 pdf(paste("$inputfile", codelet, arch, hash, size, "pdf", sep="."));
 
 
-h <- hist(val[val > quantile(val,0.01) & val<quantile(val,0.99)], col="red", breaks=50, density=10)
+try ( { h <- hist(val[val > quantile(val,0.01) & val<quantile(val,0.99)], col="red", breaks=50, density=10) } )
 
 
 dev.off()
 dev.off()
 
 

+ 1 - 1
tools/starpu_fxt_stats.c

@@ -136,7 +136,7 @@ int main(int argc, char **argv)
 	else
 	else
 	{
 	{
 		fd_out = fopen(fout, "w");
 		fd_out = fopen(fout, "w");
-		if (fd_out < 0)
+		if (fd_out == NULL)
 		{
 		{
 			perror("open failed :");
 			perror("open failed :");
 			exit(-1);
 			exit(-1);

+ 2 - 2
tools/starpu_perfmodel_display.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011  Université de Bordeaux 1
+ * Copyright (C) 2011, 2013  Université de Bordeaux 1
  * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
  *
  *
@@ -158,7 +158,7 @@ int main(int argc, char **argv)
 	}
 	}
 	else
 	else
 	{
 	{
-		struct starpu_perfmodel model;
+		struct starpu_perfmodel model = {};
                 int ret = starpu_perfmodel_load_symbol(psymbol, &model);
                 int ret = starpu_perfmodel_load_symbol(psymbol, &model);
                 if (ret == 1)
                 if (ret == 1)
 		{
 		{

+ 5 - 4
tools/starpu_perfmodel_plot.c

@@ -450,15 +450,16 @@ static void init_archtype_is_found(struct starpu_perfmodel *model)
 	for(archtype = 0; archtype < STARPU_NARCH; archtype++)
 	for(archtype = 0; archtype < STARPU_NARCH; archtype++)
 	{
 	{
 	
 	
-		for(devid=0; model->per_arch[archtype][devid] != NULL; devid++);
+		for(devid=0; model->per_arch[archtype][devid] != NULL; devid++)
+			;
 		ndevice = devid;
 		ndevice = devid;
 		if(ndevice != 0)
 		if(ndevice != 0)
 		{
 		{
 			maxncore = malloc(sizeof(*maxncore)*ndevice);
 			maxncore = malloc(sizeof(*maxncore)*ndevice);
-			for(devid=0; devid < ndevice; devid++);
+			for(devid=0; devid < ndevice; devid++)
 			{
 			{
-			
-				for(ncore=0; model->per_arch[archtype][devid][ncore] != NULL; ncore++);
+				for(ncore=0; model->per_arch[archtype][devid][ncore] != NULL; ncore++)
+					;
 				maxncore[devid] = ncore;
 				maxncore[devid] = ncore;
 			}
 			}
 		}
 		}

+ 0 - 60
tools/valgrind/fscanf.suppr

@@ -1,60 +0,0 @@
-{
-   fscanf error
-   Memcheck:Cond
-   fun:__GI___strncasecmp_l
-   fun:____strtod_l_internal
-   fun:_IO_vfscanf
-}
-
-{
-   fprintf cond error
-   Memcheck:Cond
-   ...
-   fun:vfprintf
-   fun:buffered_vfprintf
-   fun:vfprintf
-   fun:fprintf
-}
-
-{
-   fprintf value error
-   Memcheck:Value8
-   ...
-   fun:vfprintf
-   fun:buffered_vfprintf
-   fun:vfprintf
-   fun:fprintf
-}
-
-{
-   fprintf addr4 error
-   Memcheck:Addr4
-   ...
-   fun:vfprintf
-   fun:buffered_vfprintf
-   fun:vfprintf
-   fun:fprintf
-   fun:main
-}
-
-{
-   fprintf addr8 error
-   Memcheck:Addr8
-   ...
-   fun:vfprintf
-   fun:buffered_vfprintf
-   fun:vfprintf
-   fun:fprintf
-   fun:main
-}
-
-{
-   fprintf addr1 error
-   Memcheck:Addr1
-   ...
-   fun:vfprintf
-   fun:buffered_vfprintf
-   fun:vfprintf
-   fun:fprintf
-   fun:main
-}

+ 8 - 0
tools/valgrind/pthread.suppr

@@ -13,3 +13,11 @@
    fun:pthread_cancel_init
    fun:pthread_cancel_init
    ...
    ...
 }
 }
+
+{
+   Helgrind 3.9 wrongly compares to-be-destroyed mutex with init value
+   Helgrind:Race
+   fun:my_memcmp
+   fun:pthread_mutex_destroy
+   ...
+}