瀏覽代碼

merge from trunk

Olivier Aumage 8 年之前
父節點
當前提交
98508d981f
共有 84 個文件被更改,包括 1193 次插入537 次删除
  1. 30 3
      configure.ac
  2. 3 2
      doc/doxygen/chapters/110_basic_examples.doxy
  3. 9 5
      doc/doxygen/chapters/210_check_list_performance.doxy
  4. 15 8
      doc/doxygen/chapters/301_tasks.doxy
  5. 5 3
      doc/doxygen/chapters/310_data_management.doxy
  6. 8 2
      doc/doxygen/chapters/320_scheduling.doxy
  7. 4 0
      doc/doxygen/chapters/350_modularized_scheduler.doxy
  8. 6 3
      doc/doxygen/chapters/370_online_performance_tools.doxy
  9. 1 1
      doc/doxygen/chapters/380_offline_performance_tools.doxy
  10. 19 10
      doc/doxygen/chapters/390_faq.doxy
  11. 38 19
      doc/doxygen/chapters/410_mpi_support.doxy
  12. 3 2
      doc/doxygen/chapters/430_mic_scc_support.doxy
  13. 15 0
      doc/doxygen/chapters/510_configure_options.doxy
  14. 4 2
      doc/doxygen/chapters/api/data_partition.doxy
  15. 1 1
      doc/doxygen/chapters/api/insert_task.doxy
  16. 5 2
      doc/doxygen/chapters/api/modularized_scheduler.doxy
  17. 24 6
      doc/doxygen/chapters/api/scheduling_policy.doxy
  18. 6 3
      doc/doxygen/chapters/code/multiformat.c
  19. 2 1
      doc/doxygen/chapters/code/vector_scal_c.c
  20. 2 1
      doc/doxygen/chapters/code/vector_scal_opencl_codelet.cl
  21. 5 0
      examples/cg/cg.c
  22. 2 2
      examples/cpp/add_vectors_cpp11.cpp
  23. 1 1
      examples/filters/fblock_opencl_kernel.cl
  24. 1 2
      examples/mlr/mlr.c
  25. 4 4
      include/fstarpu_mod.f90
  26. 3 1
      include/starpu.h
  27. 4 4
      include/starpu_task.h
  28. 14 4
      include/starpu_util.h
  29. 2 2
      mpi/examples/matrix_decomposition/mpi_decomposition_params.c
  30. 1 2
      mpi/src/starpu_mpi.c
  31. 34 14
      mpi/tests/Makefile.am
  32. 1 1
      src/Makefile.am
  33. 5 2
      src/common/list.h
  34. 164 10
      src/common/prio_list.h
  35. 49 0
      src/common/utils.c
  36. 2 0
      src/common/utils.h
  37. 15 15
      src/core/perfmodel/perfmodel_bus.c
  38. 1 2
      src/core/sched_ctx.c
  39. 1 0
      src/core/sched_ctx.h
  40. 1 1
      src/core/sched_policy.c
  41. 8 1
      src/core/task.h
  42. 8 14
      src/core/topology.c
  43. 0 1
      src/core/workers.c
  44. 5 1
      src/datawizard/malloc.c
  45. 5 2
      src/datawizard/memory_nodes.c
  46. 8 41
      src/debug/traces/starpu_fxt.c
  47. 3 3
      src/drivers/cuda/driver_cuda.c
  48. 2 1
      src/profiling/profiling_helpers.c
  49. 1 1
      src/sched_policies/component_work_stealing.c
  50. 21 9
      src/sched_policies/deque_modeling_policy_data_aware.c
  51. 14 93
      src/sched_policies/eager_central_priority_policy.c
  52. 24 24
      src/sched_policies/fifo_queues.c
  53. 2 1
      src/sched_policies/fifo_queues.h
  54. 1 1
      src/sched_policies/graph_test_policy.c
  55. 1 1
      src/sched_policies/helper_mct.c
  56. 5 2
      src/sched_policies/parallel_heft.c
  57. 24 103
      src/sched_policies/prio_deque.c
  58. 51 18
      src/sched_policies/prio_deque.h
  59. 1 3
      src/util/fstarpu.c
  60. 1 2
      src/util/starpu_task_insert_utils.c
  61. 19 3
      tests/Makefile.am
  62. 2 2
      tests/datawizard/scal_opencl.cl
  63. 10 13
      tests/disk/disk_compute.c
  64. 9 12
      tests/disk/disk_copy.c
  65. 8 11
      tests/disk/disk_pack.c
  66. 14 12
      tests/disk/mem_reclaim.c
  67. 64 0
      tests/main/mkdtemp.c
  68. 13 10
      tests/microbenchs/matrix_as_vector.c
  69. 1 1
      tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh
  70. 1 1
      tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh
  71. 1 1
      tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh
  72. 6 1
      tests/microbenchs/tasks_size_overhead.c
  73. 48 0
      tests/model-checking/Makefile
  74. 18 0
      tests/model-checking/platform.xml
  75. 186 0
      tests/model-checking/prio_list.c
  76. 19 0
      tests/model-checking/prio_list.sh
  77. 18 0
      tests/model-checking/prio_list2.c
  78. 37 0
      tests/model-checking/starpu-mc.sh.in
  79. 2 0
      tools/Makefile.am
  80. 3 4
      tools/cppcheck/suppressions.txt
  81. 1 1
      tools/dev/internal/starpu_check_braces.sh
  82. 11 0
      tools/perfmodels/cluster.xml
  83. 4 0
      tools/perfmodels/hostfile
  84. 3 2
      tools/starpu_fxt_tool.c

+ 30 - 3
configure.ac

@@ -87,6 +87,7 @@ AC_OPENMP
 if test x$enable_perf_debug = xyes; then
     enable_shared=no
 fi
+default_enable_mpi_check=maybe
 
 ###############################################################################
 #                                                                             #
@@ -185,6 +186,10 @@ if test x$enable_simgrid = xyes ; then
 	# We won't bind or detect anything
 	with_hwloc=no
 
+        # disable mpi checks by default, they require static linking, we don't
+        # want that by default
+	default_enable_mpi_check=no
+
 	# Simgrid 3.12 & 3.13 need -std=c++11 to be able to build anything in C++...
 	AC_LANG_PUSH([C++])
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
@@ -198,7 +203,15 @@ if test x$enable_simgrid = xyes ; then
 			  CXXFLAGS="-std=c++11 $CXXFLAGS"
 			  NVCCFLAGS="-std=c++11 $NVCCFLAGS")
 	AC_LANG_POP([C++])
+	AC_ARG_ENABLE(simgrid-mc, [AS_HELP_STRING([--enable-simgrid-mc],
+				[Enable using Model Checker of simgrid])],
+				enable_simgrid_mc=$enableval, enable_simgrid_mc=no)
+	if test x$enable_simgrid_mc = xyes ; then
+		AC_DEFINE(STARPU_SIMGRID_MC, [1], [Define this to enable Model Checker in simgrid execution])
+		AC_PATH_PROG([SIMGRID_MC], [simgrid-mc], [no], [$simgrid_dir/bin:$PATH])
+	fi
 fi
+AM_CONDITIONAL(STARPU_SIMGRID_MC, test x$enable_simgrid_mc = xyes)
 AM_CONDITIONAL(STARPU_SIMGRID, test x$enable_simgrid = xyes)
 AC_SUBST(SIMGRID_CFLAGS)
 AC_SUBST(SIMGRID_LIBS)
@@ -448,7 +461,8 @@ AM_CONDITIONAL([STARPU_CROSS_COMPILING], [test "x$cross_compiling" = "xyes"])
 ###############################################################################
 
 # If the user specifically asks for it, or if we are in a developer checkout, we enable mpi check
-AC_ARG_ENABLE(mpi-check, AC_HELP_STRING([--enable-mpi-check], [Enable execution of MPI testcases]))
+AC_ARG_ENABLE(mpi-check, AC_HELP_STRING([--enable-mpi-check], [Enable execution of MPI testcases]),
+	      [enable_mpi_check=$enableval], [enable_mpi_check=$default_enable_mpi_check])
 running_mpi_check=no
 if test $svndir = 1 -o -d "$srcdir/.git" ; then
     running_mpi_check=yes
@@ -488,6 +502,9 @@ if test ! -x $mpiexec_path; then
 fi
 
 AM_CONDITIONAL(STARPU_MPI_CHECK, test x$running_mpi_check = xyes)
+if test x$running_mpi_check = xyes -a x$enable_simgrid = xyes -a x$enable_shared = xyes ; then
+    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, use --disable-shared to fix this])
+fi
 if test x$use_mpi = xyes ; then
     AC_MSG_CHECKING(whether MPI tests should be run)
     AC_MSG_RESULT($running_mpi_check)
@@ -737,6 +754,7 @@ AC_CHECK_HEADERS([aio.h])
 AC_CHECK_LIB([rt], [aio_read])
 
 AC_CHECK_FUNCS([mkostemp])
+AC_CHECK_FUNCS([mkdtemp])
 
 AC_CHECK_FUNCS([pread pwrite])
 
@@ -1944,8 +1962,13 @@ if test x$enable_memory_stats = xyes; then
         AC_DEFINE(STARPU_MEMORY_STATS, [1], [enable memory stats])
 fi
 
-AC_CHECK_HEADERS([glpk.h], [AC_DEFINE([STARPU_HAVE_GLPK_H], [1], [Define to 1 if you have the <glpk.h> header file.])])
-STARPU_HAVE_LIBRARY(GLPK, [glpk])
+AC_ARG_ENABLE(glpk, [AS_HELP_STRING([--disable-glpk],
+			     [disable using glpk for bound computation])],
+			     enable_glpk=$enableval, enable_glpk=yes)
+if test x$enable_glpk = xyes; then
+	AC_CHECK_HEADERS([glpk.h], [AC_DEFINE([STARPU_HAVE_GLPK_H], [1], [Define to 1 if you have the <glpk.h> header file.])])
+	STARPU_HAVE_LIBRARY(GLPK, [glpk])
+fi
 
 AC_ARG_WITH(ayudame1-include-dir,
 	[AS_HELP_STRING([--with-ayudame1-include-dir=<path>],
@@ -3066,6 +3089,7 @@ AC_SUBST(STARPU_EXPORTED_LIBS)
 AC_CONFIG_COMMANDS([executable-scripts], [
   chmod +x tests/regression/regression.sh
   chmod +x tests/loader-cross.sh
+  chmod +x tests/model-checking/starpu-mc.sh
   chmod +x examples/loader-cross.sh
   chmod +x examples/stencil/loader-cross.sh
   chmod +x gcc-plugin/tests/run-test
@@ -3092,6 +3116,8 @@ AC_CONFIG_COMMANDS([executable-scripts], [
   test -e tests/microbenchs/parallel_independent_homogeneous_tasks.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/parallel_independent_homogeneous_tasks.sh tests/microbenchs/
   mkdir -p tests/datawizard
   test -e tests/datawizard/locality.sh || ln -sf $ac_abs_top_srcdir/tests/datawizard/locality.sh tests/datawizard/
+  mkdir -p tests/model-checking
+  test -e tests/model-checking/prio_list.sh || ln -sf $ac_abs_top_srcdir/tests/model-checking/prio_list.sh tests/model-checking/
   mkdir -p examples/heat
   test -e examples/heat/heat.sh || ln -sf $ac_abs_top_srcdir/examples/heat/heat.sh examples/heat/
   mkdir -p examples/lu
@@ -3170,6 +3196,7 @@ AC_OUTPUT([
 	examples/stencil/Makefile
 	tests/Makefile
 	tests/loader-cross.sh
+	tests/model-checking/starpu-mc.sh
 	examples/loader-cross.sh
 	examples/stencil/loader-cross.sh
 	mpi/Makefile

+ 3 - 2
doc/doxygen/chapters/110_basic_examples.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -632,7 +632,8 @@ tools to compile a OpenCL kernel stored in a file.
 __kernel void vector_mult_opencl(int nx, __global float* val, float factor)
 {
         const int i = get_global_id(0);
-        if (i < nx) {
+        if (i < nx)
+	{
                 val[i] *= factor;
         }
 }

+ 9 - 5
doc/doxygen/chapters/210_check_list_performance.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012, 2017  INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -315,26 +315,30 @@ measurements. This can be done by using starpu_perfmodel_update_history(),
 for instance:
 
 \code{.c}
-static struct starpu_perfmodel perf_model = {
+static struct starpu_perfmodel perf_model =
+{
     .type = STARPU_HISTORY_BASED,
     .symbol = "my_perfmodel",
 };
 
-struct starpu_codelet cl = {
+struct starpu_codelet cl =
+{
     .cuda_funcs = { cuda_func1, cuda_func2 },
     .nbuffers = 1,
     .modes = {STARPU_W},
     .model = &perf_model
 };
 
-void feed(void) {
+void feed(void)
+{
     struct my_measure *measure;
     struct starpu_task task;
     starpu_task_init(&task);
 
     task.cl = &cl;
 
-    for (measure = &measures[0]; measure < measures[last]; measure++) {
+    for (measure = &measures[0]; measure < measures[last]; measure++)
+    {
         starpu_data_handle_t handle;
 	starpu_vector_data_register(&handle, -1, 0, measure->size, sizeof(float));
 	task.handles[0] = handle;

+ 15 - 8
doc/doxygen/chapters/301_tasks.doxy

@@ -56,7 +56,8 @@ the field starpu_task::dyn_handles when defining a task and the field
 starpu_codelet::dyn_modes when defining the corresponding codelet.
 
 \code{.c}
-enum starpu_data_access_mode modes[STARPU_NMAXBUFS+1] = {
+enum starpu_data_access_mode modes[STARPU_NMAXBUFS+1] =
+{
 	STARPU_R, STARPU_R, ...
 };
 
@@ -132,7 +133,8 @@ void scal_sse_func(void *buffers[], void *cl_arg)
 \endcode
 
 \code{.c}
-struct starpu_codelet cl = {
+struct starpu_codelet cl =
+{
     .cpu_funcs = { scal_cpu_func, scal_sse_func },
     .cpu_funcs_name = { "scal_cpu_func", "scal_sse_func" },
     .nbuffers = 1,
@@ -167,7 +169,8 @@ static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nim
   return 0;
 }
 
-struct starpu_codelet cl = {
+struct starpu_codelet cl =
+{
     .can_execute = can_execute,
     .cpu_funcs = { cpu_func },
     .cpu_funcs_name = { "cpu_func" },
@@ -212,7 +215,8 @@ static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nim
   return 0;
 }
 
-struct starpu_codelet cl = {
+struct starpu_codelet cl =
+{
     .can_execute = can_execute,
     .cpu_funcs = { cpu_func },
     .cpu_funcs_name = { "cpu_func" },
@@ -247,7 +251,8 @@ static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nim
   }
 }
 
-struct starpu_codelet cl = {
+struct starpu_codelet cl =
+{
     .can_execute = can_execute,
     .cpu_funcs = { cpu_func },
     .cpu_funcs_name = { "cpu_func" },
@@ -280,7 +285,8 @@ void func_cpu(void *descr[], void *_args)
         *x1 = *x1 * ffactor;
 }
 
-struct starpu_codelet mycodelet = {
+struct starpu_codelet mycodelet =
+{
         .cpu_funcs = { func_cpu },
         .cpu_funcs_name = { "func_cpu" },
         .nbuffers = 2,
@@ -544,14 +550,15 @@ An intermediate solution is to define a codelet with its
 starpu_codelet::where field set to \ref STARPU_NOWHERE, for instance:
 
 \code{.c}
-struct starpu_codelet {
+struct starpu_codelet cl =
+{
 	.where = STARPU_NOWHERE,
 	.nbuffers = 1,
 	.modes = { STARPU_R },
 }
 
 task = starpu_task_create();
-task->cl = starpu_codelet;
+task->cl = &cl;
 task->handles[0] = handle;
 starpu_task_submit(task);
 \endcode

+ 5 - 3
doc/doxygen/chapters/310_data_management.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -140,7 +140,8 @@ to retrieve the sub-handles to be passed as tasks parameters.
 
 \code{.c}
 /* Submit a task on each sub-vector */
-for (i=0; i<starpu_data_get_nb_children(handle); i++) {
+for (i=0; i<starpu_data_get_nb_children(handle); i++)
+{
     /* Get subdata number i (there is only 1 dimension) */
     starpu_data_handle_t sub_handle = starpu_data_get_sub_data(handle, 1, i);
     struct starpu_task *task = starpu_task_create();
@@ -404,7 +405,8 @@ into data <c>res</c>, then uses it for other computation, before looping again
 with a new reduction:
 
 \code{.c}
-for (i = 0; i < 100; i++) {
+for (i = 0; i < 100; i++)
+{
     starpu_mpi_task_insert(MPI_COMM_WORLD, &init_res, STARPU_W, res, 0);
     starpu_mpi_task_insert(MPI_COMM_WORLD, &work, STARPU_RW, A,
                STARPU_R, B, STARPU_REDUX, res, 0);

+ 8 - 2
doc/doxygen/chapters/320_scheduling.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -95,6 +95,11 @@ except that it sorts tasks by priority order, which allows to become even closer
 to HEFT by respecting priorities after having made the scheduling decision (but
 it still schedules tasks in the order they become available).
 
+The <b>dmdasd</b> (deque model data aware sorted decision) scheduler is similar
+to dmdas, except that when scheduling a task, it takes into account its priority
+when computing the minimum completion time, since this task may get executed
+before others, and thus the latter should be ignored.
+
 The <b>heft</b> (heterogeneous earliest finish time) scheduler is a deprecated
 alias for <b>dmda</b>.
 
@@ -200,7 +205,8 @@ the StarPU sources in the directory <c>examples/scheduler/</c>.
 The scheduler has to provide methods:
 
 \code{.c}
-static struct starpu_sched_policy dummy_sched_policy = {
+static struct starpu_sched_policy dummy_sched_policy =
+{
     .init_sched = init_dummy_sched,
     .deinit_sched = deinit_dummy_sched,
     .add_workers = dummy_sched_add_workers,

+ 4 - 0
doc/doxygen/chapters/350_modularized_scheduler.doxy

@@ -112,19 +112,23 @@ to be able to interact with other Scheduling Components.
 	Child Component. When the Push function returns, the task no longer
 	belongs to the calling Component. The Modularized Schedulers'
 	model relies on this function to perform prefetching.
+	See starpu_sched_component::push_task for more details
 
 	- Pull (Caller_Component, Parent_Component)  ->  Task \n
 	The calling Scheduling Component requests a task from
 	its Parent Component. When the Pull function ends, the returned
 	task belongs to the calling Component.
+	See starpu_sched_component::pull_task for more details
 
 	- Can_Push (Caller_Component, Parent_Component) \n
 	The calling Scheduling Component notifies its Parent Component that
 	it is ready to accept new tasks.
+	See starpu_sched_component::can_push for more details
 
 	- Can_Pull (Caller_Component, Child_Component) \n
 	The calling Scheduling Component notifies its Child Component
 	that it is ready to give new tasks.
+	See starpu_sched_component::can_pull for more details
 
 
 \section BuildAModularizedScheduler Building a Modularized Scheduler

+ 6 - 3
doc/doxygen/chapters/370_online_performance_tools.doxy

@@ -198,7 +198,8 @@ starpu_top_register_parameter_float("alpha", &alpha, 0, 10, modif_hook);
 <c>modif_hook</c> is a function which will be called when the parameter is being modified, it can for instance print the new value:
 
 \code{.c}
-void modif_hook(struct starpu_top_param *d) {
+void modif_hook(struct starpu_top_param *d)
+{
     fprintf(stderr,"%s has been modified: %f\n", d->name, alpha);
 }
 \endcode
@@ -341,12 +342,14 @@ be constructed dynamically at execution time, as long as this is done before
 submitting any task using it.
 
 \code{.c}
-static struct starpu_perfmodel mult_perf_model = {
+static struct starpu_perfmodel mult_perf_model =
+{
     .type = STARPU_HISTORY_BASED,
     .symbol = "mult_perf_model"
 };
 
-struct starpu_codelet cl = {
+struct starpu_codelet cl =
+{
     .cpu_funcs = { cpu_mult },
     .cpu_funcs_name = { "cpu_mult" },
     .nbuffers = 3,

+ 1 - 1
doc/doxygen/chapters/380_offline_performance_tools.doxy

@@ -129,7 +129,7 @@ collect the trace files from the MPI nodes, and
 specify them all on the command <c>starpu_fxt_tool</c>, for instance:
 
 \verbatim
-$ starpu_fxt_tool -i /tmp/prof_file_something1 -i /tmp/prof_file_something2
+$ starpu_fxt_tool -i /tmp/prof_file_something*
 \endverbatim
 
 By default, all tasks are displayed using a green color. To display tasks with

+ 19 - 10
doc/doxygen/chapters/390_faq.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -27,8 +27,10 @@ At initialisation time of libstarpu, the objects are initialized:
 
 \code{.c}
 int workerid;
-for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) {
-    switch (starpu_worker_get_type(workerid)) {
+for (workerid = 0; workerid < starpu_worker_get_count(); workerid++)
+{
+    switch (starpu_worker_get_type(workerid))
+    {
         case STARPU_CPU_WORKER:
             plan_cpu[workerid] = fftw_plan(...);
             break;
@@ -75,14 +77,16 @@ void starpufft_plan(void)
 
 \code{.c}
 int ret;
-struct starpu_driver = {
+struct starpu_driver =
+{
     .type = STARPU_CUDA_WORKER,
     .id.cuda_id = 0
 };
 ret = starpu_driver_init(&d);
 if (ret != 0)
     error();
-while (some_condition) {
+while (some_condition)
+{
     ret = starpu_driver_run_once(&d);
     if (ret != 0)
         error();
@@ -258,7 +262,8 @@ run the following program with -lcudart:
 #include <cuda.h>
 #include <cuda_runtime.h>
 
-int main(void) {
+int main(void)
+{
 	int n, i, version;
 	cudaError_t err;
 
@@ -274,7 +279,8 @@ int main(void) {
 	printf("runtime version %d\n", version);
 	printf("\n");
 
-	for (i = 0; i < n; i++) {
+	for (i = 0; i < n; i++)
+	{
 		struct cudaDeviceProp props;
 		printf("CUDA%d\n", i);
 		err = cudaGetDeviceProperties(&props, i);
@@ -306,7 +312,8 @@ run the following program with -lOpenCL:
 #include <stdio.h>
 #include <assert.h>
 
-int main(void) {
+int main(void)
+{
     cl_device_id did[16];
     cl_int err;
     cl_platform_id pid, pids[16];
@@ -318,7 +325,8 @@ int main(void) {
     err = clGetPlatformIDs(sizeof(pids)/sizeof(pids[0]), pids, &nbplat);
     assert(err == CL_SUCCESS);
     printf("%u platforms\n", nbplat);
-    for (j = 0; j < nbplat; j++) {
+    for (j = 0; j < nbplat; j++)
+    {
         pid = pids[j];
         printf("    platform %d\n", j);
         err = clGetPlatformInfo(pid, CL_PLATFORM_VERSION, sizeof(buf)-1, buf, &size);
@@ -329,7 +337,8 @@ int main(void) {
         err = clGetDeviceIDs(pid, CL_DEVICE_TYPE_ALL, sizeof(did)/sizeof(did[0]), did, &nb);
         assert(err == CL_SUCCESS);
         printf("%d devices\n", nb);
-        for (i = 0; i < nb; i++) {
+        for (i = 0; i < nb; i++)
+	{
             err = clGetDeviceInfo(did[i], CL_DEVICE_VERSION, sizeof(buf)-1, buf, &size);
             buf[size] = 0;
             printf("    device %d version %s\n", i, buf);

+ 38 - 19
doc/doxygen/chapters/410_mpi_support.doxy

@@ -28,7 +28,8 @@ initializes a token on node 0, and the token is passed from node to node,
 incremented by one on each step. The code is not using StarPU yet.
 
 \code{.c}
-    for (loop = 0; loop < nloops; loop++) {
+    for (loop = 0; loop < nloops; loop++)
+    {
         int tag = loop*size + rank;
 
         if (loop == 0 && rank == 0)
@@ -62,7 +63,8 @@ execution to StarPU.  This is possible by just using starpu_data_acquire(), for
 instance:
 
 \code{.c}
-    for (loop = 0; loop < nloops; loop++) {
+    for (loop = 0; loop < nloops; loop++)
+    {
         int tag = loop*size + rank;
 
 	/* Acquire the data to be able to write to it */
@@ -412,7 +414,8 @@ communication cache when unregistering the data.
 
 \code{.c}
 /* Returns the MPI node number where data is */
-int my_distrib(int x, int y, int nb_nodes) {
+int my_distrib(int x, int y, int nb_nodes)
+{
   /* Block distrib */
   return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes;
 
@@ -439,8 +442,10 @@ data which will be needed by the tasks that we will execute.
     unsigned matrix[X][Y];
     starpu_data_handle_t data_handles[X][Y];
 
-    for(x = 0; x < X; x++) {
-        for (y = 0; y < Y; y++) {
+    for(x = 0; x < X; x++)
+    {
+        for (y = 0; y < Y; y++)
+	{
             int mpi_rank = my_distrib(x, y, size);
             if (mpi_rank == my_rank)
                 /* Owning data */
@@ -454,7 +459,8 @@ data which will be needed by the tasks that we will execute.
             else
                 /* I know it's useless to allocate anything for this */
                 data_handles[x][y] = NULL;
-            if (data_handles[x][y]) {
+            if (data_handles[x][y])
+	    {
                 starpu_mpi_data_register(data_handles[x][y], x*X+y, mpi_rank);
             }
         }
@@ -604,8 +610,10 @@ register any data that wasn't registered already and will be needed, then
 migrate the data, and register the new location.
 
 \code{.c}
-    for(x = 0; x < X; x++) {
-        for (y = 0; y < Y; y++) {
+    for(x = 0; x < X; x++)
+    {
+        for (y = 0; y < Y; y++)
+	{
             int mpi_rank = my_distrib2(x, y, size);
             if (!data_handles[x][y] && (mpi_rank == my_rank
                   || my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
@@ -613,7 +621,8 @@ migrate the data, and register the new location.
                 /* Register newly-needed data */
                 starpu_variable_data_register(&data_handles[x][y], -1,
                                               (uintptr_t)NULL, sizeof(unsigned));
-            if (data_handles[x][y]) {
+            if (data_handles[x][y])
+	    {
                 /* Migrate the data */
                 starpu_mpi_data_migrate(MPI_COMM_WORLD, data_handles[x][y], mpi_rank);
             }
@@ -636,9 +645,12 @@ resides. Otherwise the unregistration will complain that it does not have the
 latest value on the original home node.
 
 \code{.c}
-    for(x = 0; x < X; x++) {
-        for (y = 0; y < Y; y++) {
-            if (data_handles[x][y]) {
+    for(x = 0; x < X; x++)
+    {
+        for (y = 0; y < Y; y++)
+	{
+            if (data_handles[x][y])
+	    {
                 int mpi_rank = my_distrib(x, y, size);
                 /* Get back data to original place where the user-provided buffer is.  */
                 starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL);
@@ -669,20 +681,24 @@ data_handles = malloc(nblocks*sizeof(starpu_data_handle_t *));
 for(x = 0; x < nblocks ;  x++)
 {
     int mpi_rank = my_distrib(x, nodes);
-    if (rank == root) {
+    if (rank == root)
+    {
         starpu_vector_data_register(&data_handles[x], STARPU_MAIN_RAM, (uintptr_t)vector[x],
                                     blocks_size, sizeof(float));
     }
-    else if ((mpi_rank == rank) || ((rank == mpi_rank+1 || rank == mpi_rank-1))) {
+    else if ((mpi_rank == rank) || ((rank == mpi_rank+1 || rank == mpi_rank-1)))
+    {
         /* I own that index, or i will need it for my computations */
         starpu_vector_data_register(&data_handles[x], -1, (uintptr_t)NULL,
                                    block_size, sizeof(float));
     }
-    else {
+    else
+    {
         /* I know it's useless to allocate anything for this */
         data_handles[x] = NULL;
     }
-    if (data_handles[x]) {
+    if (data_handles[x])
+    {
         starpu_mpi_data_register(data_handles[x], x*nblocks+y, mpi_rank);
     }
 }
@@ -691,10 +707,13 @@ for(x = 0; x < nblocks ;  x++)
 starpu_mpi_scatter_detached(data_handles, nblocks, root, MPI_COMM_WORLD);
 
 /* Calculation */
-for(x = 0; x < nblocks ;  x++) {
-    if (data_handles[x]) {
+for(x = 0; x < nblocks ;  x++)
+{
+    if (data_handles[x])
+    {
         int owner = starpu_data_get_rank(data_handles[x]);
-        if (owner == rank) {
+        if (owner == rank)
+	{
             starpu_task_insert(&cl, STARPU_RW, data_handles[x], 0);
         }
     }

+ 3 - 2
doc/doxygen/chapters/430_mic_scc_support.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -49,7 +49,8 @@ starpu_codelet::cpu_funcs_name, to provide StarPU with the function
 name of the CPU implementation, so for instance:
 
 \verbatim
-struct starpu_codelet cl = {
+struct starpu_codelet cl =
+{
     .cpu_funcs = {myfunc},
     .cpu_funcs_name = {"myfunc"},
     .nbuffers = 1,

+ 15 - 0
doc/doxygen/chapters/510_configure_options.doxy

@@ -553,6 +553,13 @@ that the MKL website
 provides a script to determine the linking flags.
 </dd>
 
+<dt>--disable-glpk</dt>
+<dd>
+\anchor disable-glpk
+\addindex __configure__--disable-glpk
+Disable the use of libglpk for computing area bounds.
+</dd>
+
 <dt>--disable-build-tests</dt>
 <dd>
 \anchor disable-build-tests
@@ -638,6 +645,14 @@ allows to specify the location to the SimGrid lib directory.
 Use the smpirun at <c>path</c>
 </dd>
 
+<dt>--enable-simgrid-mc</dt>
+<dd>
+\anchor enable-simgrid-mc
+\addindex __configure__--enable-simgrid-mc
+Enable the Model Checker in simulation of execution in simgrid, to allow
+exploring various execution paths.
+</dd>
+
 <dt>--enable-calibration-heuristic</dt>
 <dd>
 \anchor enable-calibration-heuristic

+ 4 - 2
doc/doxygen/chapters/api/data_partition.doxy

@@ -42,7 +42,8 @@ according to the filter \p f.
 
 Here an example of how to use the function.
 \code{.c}
-struct starpu_data_filter f = {
+struct starpu_data_filter f =
+{
         .filter_func = starpu_matrix_filter_block,
         .nchildren = nslicesx
 };
@@ -119,7 +120,8 @@ Here is an example of how to use the function:
 
 \code{.c}
 starpu_data_handle_t children[nslicesx];
-struct starpu_data_filter f = {
+struct starpu_data_filter f =
+{
         .filter_func = starpu_matrix_filter_block,
         .nchildren = nslicesx
 };

+ 1 - 1
doc/doxygen/chapters/api/insert_task.doxy

@@ -6,7 +6,7 @@
  * See the file version.doxy for copying conditions.
  */
 
-/*! \defgroup API_Insert_Task Insert_Task
+/*! \defgroup API_Insert_Task Task Insert Utility
 
 \fn int starpu_insert_task(struct starpu_codelet *cl, ...)
 \ingroup API_Insert_Task

+ 5 - 2
doc/doxygen/chapters/api/modularized_scheduler.doxy

@@ -69,6 +69,9 @@ like <c>component->push_task(component,task)</c>
      recursive call on a child or store the task in the component,
      then it will be returned by a further pull_task call.
      the caller must ensure that component is able to execute task.
+     This method must either return 0 if it the task was properly stored or
+     passed over to a child component, or return a value different from 0 if the
+     task could not be consumed (e.g. the queue is full).
 \var struct starpu_task * (*starpu_sched_component::pull_task)(struct starpu_sched_component *)
      pop a task from the scheduler module. this function is called by workers to get a task from their
      parents. this function should first return a locally stored task
@@ -79,13 +82,13 @@ like <c>component->push_task(component,task)</c>
      This function is called by a component which implements a queue,
      allowing it to signify to its parents that an empty slot is
      available in its queue. The basic implementation of this function
-     is a recursive call to its parents, the user have to specify a
+     is a recursive call to its parents, the user has to specify a
      personally-made function to catch those calls.
 \var void (*starpu_sched_component::can_pull)(struct starpu_sched_component *component)
      This function allow a component to wake up a worker. It is
      currently called by component which implements a queue, to
      signify to its children that a task have been pushed in its local
-     queue, and is available to been popped by a worker, for example.
+     queue, and is available to be popped by a worker, for example.
      The basic implementation of this function is a recursive call to
      its children, until at least one worker have been woken up.
 

+ 24 - 6
doc/doxygen/chapters/api/scheduling_policy.doxy

@@ -60,12 +60,23 @@ For each task not going through the scheduler (because starpu_task::execute_on_a
 	of the scheduler coherent even when StarPU bypasses the
 	scheduling strategy.
 \var struct starpu_task *(*starpu_sched_policy::pop_task)(unsigned sched_ctx_id)
-        Get a task from the scheduler. The mutex associated to the
-	worker is already taken when this method is called. If this
-	method is defined as <c>NULL</c>, the worker will only execute tasks
-	from its local queue. In this case, the push_task method
-	should use the starpu_push_local_task method to assign tasks
-	to the different workers.
+        Get a task from the scheduler.
+        If this method returns NULL, the worker will start sleeping. If later on
+        some task are pushed for this worker, starpu_wake_worker() must be
+        called to wake the worker so it can call the pop_task() method again.
+	
+        The mutex associated to the worker is already taken when this method
+        is called. This method may release it (e.g. for scalability reasons
+        when doing work stealing), but it must acquire it again before taking
+        the decision whether to return a task or NULL, so the atomicity of
+        deciding to return NULL and making the worker actually sleep is
+        preserved. Otherwise in simgrid or blocking driver mode the worker might start
+        sleeping while a task has just been pushed for it.
+
+        If this method is defined as <c>NULL</c>, the worker will only execute
+        tasks from its local queue. In this case, the push_task method should
+        use the starpu_push_local_task method to assign tasks to the different
+        workers.
 \var struct starpu_task *(*starpu_sched_policy::pop_every_task)(unsigned sched_ctx_id)
         Remove all available tasks from the scheduler (tasks are
 	chained by the means of the field starpu_task::prev and
@@ -106,6 +117,13 @@ condition variable. For instance, in the case of a scheduling strategy
 with a single task queue, the same condition variable would be used to
 block and wake up all workers.
 
+\fn int starpu_wake_worker(int workerid)
+\ingroup API_Scheduling_Policy
+In simgrid or blocking driver mode, 
+this should be called by push functions to wake the potential workers that are
+supposed to pick up the tasks which just have been pushed, otherwise they may
+remain sleeping.
+
 \fn void starpu_worker_get_job_id(struct starpu_task *task)
 \ingroup API_Scheduling_Policy
 Return the job id of the given task, i.e. a number that uniquely identifies this

+ 6 - 3
doc/doxygen/chapters/code/multiformat.c

@@ -30,7 +30,8 @@ starpu_data_handle_t handle;
  */
 #ifdef STARPU_USE_OPENCL
 void cpu_to_opencl_opencl_func(void *buffers[], void *args);
-struct starpu_codelet cpu_to_opencl_cl = {
+struct starpu_codelet cpu_to_opencl_cl =
+{
     .where = STARPU_OPENCL,
     .opencl_funcs = { cpu_to_opencl_opencl_func },
     .nbuffers = 1,
@@ -38,7 +39,8 @@ struct starpu_codelet cpu_to_opencl_cl = {
 };
 
 void opencl_to_cpu_func(void *buffers[], void *args);
-struct starpu_codelet opencl_to_cpu_cl = {
+struct starpu_codelet opencl_to_cpu_cl =
+{
     .where = STARPU_CPU,
     .cpu_funcs = { opencl_to_cpu_func },
     .cpu_funcs_name = { "opencl_to_cpu_func" },
@@ -47,7 +49,8 @@ struct starpu_codelet opencl_to_cpu_cl = {
 };
 #endif
 
-struct starpu_multiformat_data_interface_ops format_ops = {
+struct starpu_multiformat_data_interface_ops format_ops =
+{
 #ifdef STARPU_USE_OPENCL
     .opencl_elemsize = 2 * sizeof(float),
     .cpu_to_opencl_cl = &cpu_to_opencl_cl,

+ 2 - 1
doc/doxygen/chapters/code/vector_scal_c.c

@@ -32,7 +32,8 @@ extern void scal_sse_func(void *buffers[], void *_args);
 extern void scal_cuda_func(void *buffers[], void *_args);
 extern void scal_opencl_func(void *buffers[], void *_args);
 
-static struct starpu_codelet cl = {
+static struct starpu_codelet cl =
+{
     .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
     /* CPU implementation of the codelet */
     .cpu_funcs = { scal_cpu_func, scal_sse_func },

+ 2 - 1
doc/doxygen/chapters/code/vector_scal_opencl_codelet.cl

@@ -18,7 +18,8 @@
 __kernel void vector_mult_opencl(int nx, __global float* val, float factor)
 {
         const int i = get_global_id(0);
-        if (i < nx) {
+        if (i < nx)
+	{
                 val[i] *= factor;
         }
 }

+ 5 - 0
examples/cg/cg.c

@@ -389,6 +389,11 @@ static void parse_args(int argc, char **argv)
 	        if (strcmp(argv[i], "-maxiter") == 0)
 		{
 			i_max = atoi(argv[++i]);
+			if (i_max <= 0)
+			{
+				FPRINTF(stderr, "the number of iterations must be positive, not %d\n", i_max);
+				exit(EXIT_FAILURE);
+			}
 			continue;
 		}
 

+ 2 - 2
examples/cpp/add_vectors_cpp11.cpp

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2011, 2013-2015  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2012 INRIA
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -32,7 +32,7 @@
 #if !defined(STARPU_HAVE_CXX11)
 int main(int argc, char **argv)
 {
-	return STARPU_TEST_SKIPPED;
+	return 77;
 }
 #else
 void cpu_kernel_add_vectors(void *buffers[], void *cl_arg)

+ 1 - 1
examples/filters/fblock_opencl_kernel.cl

@@ -19,7 +19,7 @@
 __kernel void fblock_opencl(__global int* block, unsigned offset, int nx, int ny, int nz, unsigned ldy, unsigned ldz, int factor)
 {
         int i, j, k;
-        block = (__global void *)block + offset;
+        block = (__global char *)block + offset;
         for(k=0; k<nz ; k++)
 	{
                 for(j=0; j<ny ; j++)

+ 1 - 2
examples/mlr/mlr.c

@@ -51,7 +51,6 @@ static void cl_params(struct starpu_task *task, double *parameters)
 {
 	int m, n, k;
 	int* vector_mn;
-	starpu_data_handle_t vector_mn_handle;
 
 	vector_mn = (int*)STARPU_VECTOR_GET_PTR(task->interfaces[0]);
 	m = vector_mn[0];
@@ -160,7 +159,7 @@ int main(int argc, char **argv)
 
 	sum=0;
 	int m,n,k;
-	int* vector_mn = malloc( 2 * sizeof(int) );
+	int* vector_mn = calloc( 2, sizeof(int) );
 	starpu_data_handle_t vector_mn_handle;
 
 	starpu_vector_data_register( &vector_mn_handle,

+ 4 - 4
include/fstarpu_mod.f90

@@ -1647,17 +1647,17 @@ module fstarpu_mod
 
                 subroutine fstarpu_task_insert(arglist) bind(C)
                         use iso_c_binding, only: c_ptr
-                        type(c_ptr), dimension(:), intent(in) :: arglist
+                        type(c_ptr), dimension(*), intent(in) :: arglist
                 end subroutine fstarpu_task_insert
                 subroutine fstarpu_insert_task(arglist) bind(C,name="fstarpu_task_insert")
                         use iso_c_binding, only: c_ptr
-                        type(c_ptr), dimension(:), intent(in) :: arglist
+                        type(c_ptr), dimension(*), intent(in) :: arglist
                 end subroutine fstarpu_insert_task
 
                 subroutine fstarpu_unpack_arg(cl_arg,bufferlist) bind(C)
                         use iso_c_binding, only: c_ptr
                         type(c_ptr), value, intent(in) :: cl_arg
-                        type(c_ptr), dimension(:), intent(in) :: bufferlist
+                        type(c_ptr), dimension(*), intent(in) :: bufferlist
                 end subroutine fstarpu_unpack_arg
 
                 ! == starpu_sched_ctx.h ==
@@ -1669,7 +1669,7 @@ module fstarpu_mod
                         integer(c_int), intent(in) :: workers_array(*)
                         integer(c_int), value, intent(in) :: nworkers
                         character(c_char), intent(in) :: ctx_name
-                        type(c_ptr), dimension(:), intent(in) :: arglist
+                        type(c_ptr), dimension(*), intent(in) :: arglist
                 end function fstarpu_sched_ctx_create
 
                 ! unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, unsigned allow_overlap);

+ 3 - 1
include/starpu.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2014, 2016  Université de Bordeaux
+ * Copyright (C) 2009-2014, 2016-2017  Université de Bordeaux
  * Copyright (C) 2010-2015  CNRS
  * Copyright (C) 2014, 2016  INRIA
  *
@@ -53,7 +53,9 @@ typedef UINT_PTR uintptr_t;
 #include <starpu_perfmodel.h>
 #include <starpu_worker.h>
 #include <starpu_task.h>
+#ifndef BUILDING_STARPU
 #include <starpu_task_list.h>
+#endif
 #include <starpu_task_util.h>
 #include <starpu_sched_ctx.h>
 #include <starpu_expert.h>

+ 4 - 4
include/starpu_task.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2017  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2017  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011, 2014, 2016  INRIA
  * Copyright (C) 2016  Uppsala University
@@ -181,13 +181,13 @@ struct starpu_task
 	unsigned destroy:1;
 	unsigned regenerate:1;
 
-	unsigned workerid;
-	unsigned workerorder;
-
 	unsigned scheduled:1;
 
 	unsigned int mf_skip:1;
 
+	unsigned workerid;
+	unsigned workerorder;
+
 	int priority;
 
 	enum starpu_task_status status;

+ 14 - 4
include/starpu_util.h

@@ -29,6 +29,10 @@
 #include <execinfo.h>
 #endif
 
+#ifdef STARPU_SIMGRID_MC
+#include <simgrid/modelchecker.h>
+#endif
+
 #ifdef __cplusplus
 extern "C"
 {
@@ -111,17 +115,23 @@ extern "C"
 #  define STARPU_DUMP_BACKTRACE() do { } while (0)
 #endif
 
+#ifdef STARPU_SIMGRID_MC
+#define STARPU_SIMGRID_ASSERT(x) MC_assert(!!(x))
+#else
+#define STARPU_SIMGRID_ASSERT(x)
+#endif
+
 #ifdef STARPU_NO_ASSERT
 #define STARPU_ASSERT(x)		do { if (0) { (void) (x); } } while(0)
 #define STARPU_ASSERT_ACCESSIBLE(x)	do { if (0) { (void) (x); } } while(0)
 #define STARPU_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); (void) msg; } } while(0)
 #else
 #  if defined(__CUDACC__) || defined(STARPU_HAVE_WINDOWS)
-#    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); *(int*)NULL = 0; } } while(0)
-#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n\n", __starpu_func__, ## __VA_ARGS__); *(int*)NULL = 0; }} while(0)
+#    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); STARPU_SIMGRID_ASSERT(x); *(int*)NULL = 0; } } while(0)
+#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n\n", __starpu_func__, ## __VA_ARGS__); STARPU_SIMGRID_ASSERT(x); *(int*)NULL = 0; }} while(0)
 #  else
-#    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); assert(x); } } while (0)
-#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n\n", __starpu_func__, ## __VA_ARGS__); assert(x); } } while(0)
+#    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); STARPU_SIMGRID_ASSERT(x); assert(x); } } while (0)
+#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n\n", __starpu_func__, ## __VA_ARGS__); STARPU_SIMGRID_ASSERT(x); assert(x); } } while(0)
 
 #  endif
 #  define STARPU_ASSERT_ACCESSIBLE(ptr)	do { \

+ 2 - 2
mpi/examples/matrix_decomposition/mpi_decomposition_params.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2015-2016  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -27,7 +27,7 @@ unsigned nblocks = 2;
 unsigned nbigblocks = 2;
 #elif !defined(STARPU_LONG_CHECK)
 unsigned size = 4*320;
-unsigned nblocks = 16;
+unsigned nblocks = 4;
 unsigned nbigblocks = 2;
 #else
 unsigned size = 16*320;

+ 1 - 2
mpi/src/starpu_mpi.c

@@ -1172,12 +1172,11 @@ static void _starpu_mpi_test_detached_requests(void)
 	{
 		STARPU_PTHREAD_MUTEX_UNLOCK(&detached_requests_mutex);
 
-		STARPU_MPI_ASSERT_MSG(req->data_request != MPI_REQUEST_NULL, "Cannot test completion of the request MPI_REQUEST_NULL");
-
 		//_STARPU_MPI_DEBUG(3, "Test detached request %p - mpitag %d - TYPE %s %d\n", &req->data_request, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->node_tag.rank);
 #ifdef STARPU_SIMGRID
 		req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, &flag);
 #else
+		STARPU_MPI_ASSERT_MSG(req->data_request != MPI_REQUEST_NULL, "Cannot test completion of the request MPI_REQUEST_NULL");
 		req->ret = MPI_Test(&req->data_request, &flag, MPI_STATUS_IGNORE);
 #endif
 

+ 34 - 14
mpi/tests/Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009-2012, 2015-2016  Université de Bordeaux
+# Copyright (C) 2009-2012, 2015-2017  Université de Bordeaux
 # Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -16,6 +16,15 @@
 
 include $(top_srcdir)/starpu.mk
 
+if STARPU_SIMGRID
+STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling
+STARPU_HOSTNAME=mirage
+MALLOC_PERTURB_=0
+export STARPU_PERF_MODEL_DIR
+export STARPU_HOSTNAME
+export MALLOC_PERTURB_
+endif
+
 CC=$(MPICC)
 CCLD=$(MPICC)
 
@@ -23,17 +32,23 @@ if STARPU_HAVE_WINDOWS
 LOADER_BIN		=
 else
 loader_CPPFLAGS 	= 	$(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
+if !STARPU_SIMGRID
 LOADER			=	loader
 LOADER_BIN		=	$(abs_top_builddir)/mpi/tests/$(LOADER)
+endif
 loader_SOURCES		=	../../tests/loader.c
 endif
 
+if STARPU_SIMGRID
+MPI			=	$(abs_top_builddir)/tools/starpu_smpirun -np 4 -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile
+else
 # we always test on 4 processes, the execution time is not that bigger
 if STARPU_QUICK_CHECK
 MPI			=	$(MPIEXEC) $(MPIEXEC_ARGS) -np 4
 else
 MPI			=	$(MPIEXEC) $(MPIEXEC_ARGS) -np 4
 endif
+endif
 
 if STARPU_HAVE_AM111
 TESTS_ENVIRONMENT	=	STARPU_WORKERS_NOBIND=1 STARPU_NCPU=4 top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)"
@@ -42,11 +57,9 @@ else
 TESTS_ENVIRONMENT 	=	STARPU_WORKERS_NOBIND=1 STARPU_NCPU=4 top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(MPI) $(LOADER_BIN)
 endif
 
-if !STARPU_SIMGRID
 if STARPU_MPI_CHECK
 TESTS			=	$(starpu_mpi_TESTS)
 endif
-endif
 
 check_PROGRAMS = $(LOADER) $(starpu_mpi_TESTS)
 
@@ -87,7 +100,23 @@ AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS) $(ST
 ########################
 
 if BUILD_TESTS
-starpu_mpi_TESTS =				\
+
+starpu_mpi_TESTS =				
+
+starpu_mpi_TESTS +=				\
+	cache					\
+	cache_disable				\
+	callback				\
+	insert_task				\
+	insert_task_block			\
+	insert_task_owner			\
+	insert_task_owner2			\
+	insert_task_owner_data			\
+	insert_task_node_choice			\
+	matrix
+
+if !STARPU_SIMGRID
+starpu_mpi_TESTS +=				\
 	datatypes				\
 	pingpong				\
 	mpi_test				\
@@ -107,20 +136,10 @@ starpu_mpi_TESTS =				\
 	ring_async_implicit			\
 	block_interface				\
 	block_interface_pinned			\
-	cache					\
-	cache_disable				\
-	callback				\
-	matrix					\
 	matrix2					\
-	insert_task				\
 	insert_task_compute			\
 	insert_task_sent_cache			\
 	insert_task_recv_cache			\
-	insert_task_block			\
-	insert_task_owner			\
-	insert_task_owner2			\
-	insert_task_owner_data			\
-	insert_task_node_choice			\
 	insert_task_count			\
 	insert_task_dyn_handles			\
 	multiple_send				\
@@ -140,6 +159,7 @@ starpu_mpi_TESTS =				\
 	early_request				\
 	starpu_redefine				\
 	load_balancer
+endif
 
 noinst_PROGRAMS =				\
 	datatypes				\

+ 1 - 1
src/Makefile.am

@@ -395,7 +395,7 @@ dist-hook:
 	failed=0 ; \
 	for i in $$( grep "static inline" $$(find $(srcdir) -name \*.h) | sed -e 's/.*static inline //g' | grep -v ENAME | sed -e 's/[^(]* \(\|\*\)\([^ (]*\)(.*/\2/' | grep -v _starpu_spin_init) ; do \
 		for j in .libs/*.o ; do \
-			nm $$j | grep "U $$i" && { echo $$j ; failed=1 ; } ; \
+			nm $$j | grep "U $$i$$" && { echo $$j ; failed=1 ; } ; \
 		done ; \
 	done ; \
 	[ $$failed == 0 ]

+ 5 - 2
src/common/list.h

@@ -132,8 +132,6 @@
 #define LIST_TYPE(ENAME, DECL) \
   LIST_CREATE_TYPE(ENAME, DECL)
 
-/**@hideinitializer
- * The effective type declaration for lists */
 #define LIST_CREATE_TYPE(ENAME, DECL) \
   /** from automatic type: struct ENAME */ \
   struct ENAME \
@@ -142,6 +140,11 @@
     struct ENAME *_next; /**< @internal next cell */ \
     DECL \
   }; \
+  LIST_CREATE_TYPE_NOSTRUCT(ENAME, _prev, _next)
+
+/**@hideinitializer
+ * The effective type declaration for lists */
+#define LIST_CREATE_TYPE_NOSTRUCT(ENAME, _prev, _next) \
   /** @internal */ \
   struct ENAME##_list \
   { \

+ 164 - 10
src/common/prio_list.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2015-2016  Université de Bordeaux
+ * Copyright (C) 2015-2017  Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -44,6 +44,18 @@
  * void FOO_prio_list_push_prio_list_back(struct FOO_prio_list*, struct FOO_prio_list*)
  *   * Test whether element is part of the list
  * void FOO_prio_list_ismember(struct FOO_prio_list*, struct FOO*)
+ *   * Returns the first element of the list
+ * struct FOO*	FOO_prio_list_begin(struct FOO_prio_list*);
+ *   * Returns the value to test at the end of the list
+ * struct FOO*	FOO_prio_list_end(struct FOO_prio_list*);
+ *   * Returns the next element of the list
+ * struct FOO*	FOO_prio_list_next(struct FOO_prio_list*, struct FOO*)
+ *   * Returns the last element of the list
+ * struct FOO*	FOO_prio_list_last(struct FOO_prio_list*);
+ *   * Returns the value to test at the beginning of the list
+ * struct FOO*	FOO_prio_list_alpha(struct FOO_prio_list*);
+ *   * Retuns the previous element of the list
+ * struct FOO*	FOO_prio_list_prev(struct FOO_prio_list*, struct FOO*)
  *
  * PRIO_LIST_TYPE assumes that LIST_TYPE has already been called to create the
  * final structure.
@@ -101,7 +113,7 @@
 	{ \
 		/* Sort by decreasing order */ \
 		const struct ENAME##_prio_list_stage *e2 = ENAME##_node_to_list_stage_const(node); \
-		return (e2->PRIOFIELD - prio); \
+		return (e2->prio - prio); \
 	} \
 	static inline struct ENAME##_prio_list_stage *ENAME##_prio_list_add(struct ENAME##_prio_list *priolist, int prio) \
 	{ \
@@ -164,17 +176,14 @@
 			priolist->empty = ENAME##_prio_list_empty_slow(priolist); \
 		} \
 	} \
-	static inline struct ENAME *ENAME##_prio_list_pop_front(struct ENAME##_prio_list *priolist) \
+	static inline int ENAME##_prio_list_get_next_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node *node, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \
 	{ \
-		struct starpu_rbtree_node *node; \
 		struct ENAME##_prio_list_stage *stage; \
-		struct ENAME *ret; \
-		node = starpu_rbtree_first(&priolist->tree); \
 		while(1) { \
 			struct starpu_rbtree_node *next; \
 			if (!node) \
 				/* Tree is empty */ \
-				return NULL; \
+				return 0; \
 			stage = ENAME##_node_to_list_stage(node); \
 			if (!ENAME##_list_empty(&stage->list)) \
 				break; \
@@ -188,6 +197,22 @@
 			} \
 			node = next; \
 		} \
+		*pnode = node; \
+		*pstage = stage; \
+		return 1; \
+	} \
+	static inline int ENAME##_prio_list_get_first_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \
+	{ \
+		struct starpu_rbtree_node *node = starpu_rbtree_first(&priolist->tree); \
+		return ENAME##_prio_list_get_next_nonempty_stage(priolist, node, pnode, pstage); \
+	} \
+	static inline struct ENAME *ENAME##_prio_list_pop_front(struct ENAME##_prio_list *priolist) \
+	{ \
+		struct starpu_rbtree_node *node; \
+		struct ENAME##_prio_list_stage *stage; \
+		struct ENAME *ret; \
+		if (!ENAME##_prio_list_get_first_nonempty_stage(priolist, &node, &stage)) \
+			return NULL; \
 		ret = ENAME##_list_pop_front(&stage->list); \
 		if (ENAME##_list_empty(&stage->list)) { \
 			if (stage->prio != 0) \
@@ -200,6 +225,55 @@
 		} \
 		return ret; \
 	} \
+	static inline int ENAME##_prio_list_get_prev_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node *node, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \
+	{ \
+		struct ENAME##_prio_list_stage *stage; \
+		while(1) { \
+			struct starpu_rbtree_node *prev; \
+			if (!node) \
+				/* Tree is empty */ \
+				return 0; \
+			stage = ENAME##_node_to_list_stage(node); \
+			if (!ENAME##_list_empty(&stage->list)) \
+				break; \
+			/* Empty list, skip to prev tree entry */ \
+			prev = starpu_rbtree_prev(node); \
+			/* drop it if not 0-prio */ \
+			if (stage->prio != 0) \
+			{ \
+				starpu_rbtree_remove(&priolist->tree, node); \
+				free(stage); \
+			} \
+			node = prev; \
+		} \
+		*pnode = node; \
+		*pstage = stage; \
+		return 1; \
+	} \
+	static inline int ENAME##_prio_list_get_last_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \
+	{ \
+		struct starpu_rbtree_node *node = starpu_rbtree_last(&priolist->tree); \
+		return ENAME##_prio_list_get_prev_nonempty_stage(priolist, node, pnode, pstage); \
+	} \
+	static inline struct ENAME *ENAME##_prio_list_pop_back(struct ENAME##_prio_list *priolist) \
+	{ \
+		struct starpu_rbtree_node *node; \
+		struct ENAME##_prio_list_stage *stage; \
+		struct ENAME *ret; \
+		if (!ENAME##_prio_list_get_last_nonempty_stage(priolist, &node, &stage)) \
+			return NULL; \
+		ret = ENAME##_list_pop_back(&stage->list); \
+		if (ENAME##_list_empty(&stage->list)) { \
+			if (stage->prio != 0) \
+			{ \
+				/* stage got empty, remove it */ \
+				starpu_rbtree_remove(&priolist->tree, node); \
+				free(stage); \
+			} \
+			priolist->empty = ENAME##_prio_list_empty_slow(priolist); \
+		} \
+		return ret; \
+	} \
 	static inline void ENAME##_prio_list_push_prio_list_back(struct ENAME##_prio_list *priolist, struct ENAME##_prio_list *priolist_toadd) \
 	{ \
 		struct starpu_rbtree_node *node_toadd, *tmp; \
@@ -240,7 +314,51 @@
 			return ENAME##_list_ismember(&stage->list, e); \
 		} \
 		return 0; \
-	}
+	} \
+	static inline struct ENAME *ENAME##_prio_list_begin(struct ENAME##_prio_list *priolist) \
+	{ \
+		struct starpu_rbtree_node *node; \
+		struct ENAME##_prio_list_stage *stage; \
+		if (!ENAME##_prio_list_get_first_nonempty_stage(priolist, &node, &stage)) \
+			return NULL; \
+		return ENAME##_list_begin(&stage->list); \
+	} \
+	static inline struct ENAME *ENAME##_prio_list_end(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED) \
+	{ return NULL; } \
+	static inline struct ENAME *ENAME##_prio_list_next(struct ENAME##_prio_list *priolist, const struct ENAME *i) \
+	{ \
+		struct ENAME *next = ENAME##_list_next(i); \
+		if (next != ENAME##_list_end(NULL)) \
+			return next; \
+		struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, i->PRIOFIELD, ENAME##_prio_list_cmp_fn); \
+		struct ENAME##_prio_list_stage *stage; \
+		node = starpu_rbtree_next(node); \
+		if (!ENAME##_prio_list_get_next_nonempty_stage(priolist, node, &node, &stage)) \
+			return NULL; \
+		return ENAME##_list_begin(&stage->list); \
+	} \
+	static inline struct ENAME *ENAME##_prio_list_last(struct ENAME##_prio_list *priolist) \
+	{ \
+		struct starpu_rbtree_node *node; \
+		struct ENAME##_prio_list_stage *stage; \
+		if (!ENAME##_prio_list_get_last_nonempty_stage(priolist, &node, &stage)) \
+			return NULL; \
+		return ENAME##_list_last(&stage->list); \
+	} \
+	static inline struct ENAME *ENAME##_prio_list_alpha(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED) \
+	{ return NULL; } \
+	static inline struct ENAME *ENAME##_prio_list_prev(struct ENAME##_prio_list *priolist, const struct ENAME *i) \
+	{ \
+		struct ENAME *next = ENAME##_list_prev(i); \
+		if (next != ENAME##_list_alpha(NULL)) \
+			return next; \
+		struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, i->PRIOFIELD, ENAME##_prio_list_cmp_fn); \
+		struct ENAME##_prio_list_stage *stage; \
+		node = starpu_rbtree_prev(node); \
+		if (!ENAME##_prio_list_get_prev_nonempty_stage(priolist, node, &node, &stage)) \
+			return NULL; \
+		return ENAME##_list_last(&stage->list); \
+	} \
 
 #else
 
@@ -252,19 +370,55 @@
 	static inline void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \
 	{ (void) (priolist); /* ENAME##_list_deinit(&(priolist)->list); */ } \
 	static inline void ENAME##_prio_list_push_back(struct ENAME##_prio_list *priolist, struct ENAME *e) \
-	{ ENAME##_list_push_back(&(priolist)->list, (e)); } \
+	{ \
+		struct ENAME *cur; \
+		for (cur  = ENAME##_list_begin(&(priolist)->list); \
+		     cur != ENAME##_list_end(&(priolist)->list); \
+		     cur  = ENAME##_list_next(cur)) \
+			if ((e)->PRIOFIELD > cur->PRIOFIELD) \
+				break; \
+		if (cur == ENAME##_list_end(&(priolist)->list)) \
+			ENAME##_list_push_back(&(priolist)->list, (e)); \
+		else \
+			ENAME##_list_insert_before(&(priolist)->list, (e), cur); \
+	} \
 	static inline void ENAME##_prio_list_push_front(struct ENAME##_prio_list *priolist, struct ENAME *e) \
-	{ ENAME##_list_push_front(&(priolist)->list, (e)); } \
+	{ \
+		struct ENAME *cur; \
+		for (cur  = ENAME##_list_begin(&(priolist)->list); \
+		     cur != ENAME##_list_end(&(priolist)->list); \
+		     cur  = ENAME##_list_next(cur)) \
+			if ((e)->PRIOFIELD >= cur->PRIOFIELD) \
+				break; \
+		if (cur == ENAME##_list_end(&(priolist)->list)) \
+			ENAME##_list_push_back(&(priolist)->list, (e)); \
+		else \
+			ENAME##_list_insert_before(&(priolist)->list, (e), cur); \
+	} \
 	static inline int ENAME##_prio_list_empty(const struct ENAME##_prio_list *priolist) \
 	{ return ENAME##_list_empty(&(priolist)->list); } \
 	static inline void ENAME##_prio_list_erase(struct ENAME##_prio_list *priolist, struct ENAME *e) \
 	{ ENAME##_list_erase(&(priolist)->list, (e)); } \
 	static inline struct ENAME *ENAME##_prio_list_pop_front(struct ENAME##_prio_list *priolist) \
 	{ return ENAME##_list_pop_front(&(priolist)->list); } \
+	static inline struct ENAME *ENAME##_prio_list_pop_back(struct ENAME##_prio_list *priolist) \
+	{ return ENAME##_list_pop_back(&(priolist)->list); } \
 	static inline void ENAME##_prio_list_push_prio_list_back(struct ENAME##_prio_list *priolist, struct ENAME##_prio_list *priolist_toadd) \
 	{ ENAME##_list_push_list_back(&(priolist)->list, &(priolist_toadd)->list); } \
 	static inline int ENAME##_prio_list_ismember(const struct ENAME##_prio_list *priolist, const struct ENAME *e) \
 	{ return ENAME##_list_ismember(&(priolist)->list, (e)); } \
+	static inline struct ENAME *ENAME##_prio_list_begin(struct ENAME##_prio_list *priolist) \
+	{ return ENAME##_list_begin(&(priolist)->list); } \
+	static inline struct ENAME *ENAME##_prio_list_end(struct ENAME##_prio_list *priolist) \
+	{ return ENAME##_list_end(&(priolist)->list); } \
+	static inline struct ENAME *ENAME##_prio_list_next(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \
+	{ return ENAME##_list_next(i); } \
+	static inline struct ENAME *ENAME##_prio_list_last(struct ENAME##_prio_list *priolist) \
+	{ return ENAME##_list_last(&(priolist)->list); } \
+	static inline struct ENAME *ENAME##_prio_list_alpha(struct ENAME##_prio_list *priolist) \
+	{ return ENAME##_list_alpha(&(priolist)->list); } \
+	static inline struct ENAME *ENAME##_prio_list_prev(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \
+	{ return ENAME##_list_prev(i); } \
 
 #endif
 

+ 49 - 0
src/common/utils.c

@@ -143,6 +143,51 @@ void _starpu_mkpath_and_check(const char *path, mode_t mode)
 	}
 }
 
+char *_starpu_mkdtemp_internal(char *tmpl)
+{
+	int len = (int)strlen(tmpl);
+	int i;
+	int count = 1;
+	int ret;
+
+	// Initialize template
+	for(i=len-6 ; i<len ; i++)
+	{
+		STARPU_ASSERT_MSG(tmpl[i] == 'X', "Template must terminate by XXXXXX\n");
+		tmpl[i] = (char) (97 + starpu_lrand48() % 25);
+	}
+
+	// Try to create directory
+	ret = mkdir(tmpl, 0777);
+	while ((ret == -1) && (errno == EEXIST))
+	{
+		// Generate a new name
+		for(i=len-6 ; i<len ; i++)
+		{
+			tmpl[i] = (char) (97 + starpu_lrand48() % 25);
+		}
+		count ++;
+		if (count == 1000)
+		{
+			// We consider that after 1000 tries, we will not be able to create a directory
+			_STARPU_MSG("Error making StarPU temporary directory\n");
+			return NULL;
+
+		}
+		ret = mkdir(tmpl, 0777);
+	}
+	return tmpl;
+}
+
+char *_starpu_mkdtemp(char *tmpl)
+{
+#if defined(HAVE_MKDTEMP)
+	return mkdtemp(tmpl);
+#else
+	return _starpu_mkdtemp_internal(tmpl);
+#endif
+}
+
 char *_starpu_mktemp(const char *directory, int flags, int *fd)
 {
 	/* create template for mkstemp */
@@ -419,6 +464,10 @@ char *_starpu_get_home_path(void)
 		static int warn;
 		path = starpu_getenv("TMPDIR");
 		if (!path)
+			path = starpu_getenv("TEMP");
+		if (!path)
+			path = starpu_getenv("TMP");
+		if (!path)
 			path = "/tmp";
 		if (!warn)
 		{

+ 2 - 0
src/common/utils.h

@@ -126,6 +126,8 @@
 #define _STARPU_IS_ZERO(a) (fpclassify(a) == FP_ZERO)
 #endif
 
+char *_starpu_mkdtemp_internal(char *tmpl);
+char *_starpu_mkdtemp(char *tmpl);
 int _starpu_mkpath(const char *s, mode_t mode);
 void _starpu_mkpath_and_check(const char *s, mode_t mode);
 char *_starpu_mktemp(const char *directory, int flags, int *fd);

+ 15 - 15
src/core/perfmodel/perfmodel_bus.c

@@ -673,9 +673,6 @@ static void benchmark_all_gpu_devices(void)
 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
 	unsigned i;
 #endif
-#ifdef HAVE_CUDA_MEMCPY_PEER
-	unsigned j;
-#endif
 
 	_STARPU_DEBUG("Benchmarking the speed of the bus\n");
 
@@ -708,26 +705,29 @@ static void benchmark_all_gpu_devices(void)
 	ncuda = _starpu_get_cuda_device_count();
 	for (i = 0; i < ncuda; i++)
 	{
-		_STARPU_DISP("CUDA %d...\n", i);
+		_STARPU_DISP("CUDA %u...\n", i);
 		/* measure bandwidth between Host and Device i */
 		measure_bandwidth_between_host_and_dev(i, cudadev_timing_htod, cudadev_latency_htod, cudadev_timing_dtoh, cudadev_latency_dtoh, cudadev_timing_per_cpu, "CUDA");
 	}
 #ifdef HAVE_CUDA_MEMCPY_PEER
 	for (i = 0; i < ncuda; i++)
+	{
+		unsigned j;
 		for (j = 0; j < ncuda; j++)
 			if (i != j)
 			{
-				_STARPU_DISP("CUDA %d -> %d...\n", i, j);
+				_STARPU_DISP("CUDA %u -> %u...\n", i, j);
 				/* measure bandwidth between Host and Device i */
 				measure_bandwidth_between_dev_and_dev_cuda(i, j);
 			}
+	}
 #endif
 #endif
 #ifdef STARPU_USE_OPENCL
         nopencl = _starpu_opencl_get_device_count();
 	for (i = 0; i < nopencl; i++)
 	{
-		_STARPU_DISP("OpenCL %d...\n", i);
+		_STARPU_DISP("OpenCL %u...\n", i);
 		/* measure bandwith between Host and Device i */
 		measure_bandwidth_between_host_and_dev(i, opencldev_timing_htod, opencldev_latency_htod, opencldev_timing_dtoh, opencldev_latency_dtoh, opencldev_timing_per_cpu, "OpenCL");
 	}
@@ -2469,13 +2469,13 @@ static void write_bus_platform_file_content(int version)
 	{
 		unsigned j;
 		char i_name[16];
-		snprintf(i_name, sizeof(i_name), "CUDA%d", i);
+		snprintf(i_name, sizeof(i_name), "CUDA%u", i);
 		for (j = 0; j < ncuda; j++)
 		{
 			char j_name[16];
 			if (j == i)
 				continue;
-			snprintf(j_name, sizeof(j_name), "CUDA%d", j);
+			snprintf(j_name, sizeof(j_name), "CUDA%u", j);
 			fprintf(f, "   <link id=\"%s-%s\" bandwidth=\"%f%s\" latency=\"%f%s\"/>\n",
 				i_name, j_name,
 				1000000. / cudadev_timing_dtod[1+i][1+j], Bps,
@@ -2521,20 +2521,20 @@ static void write_bus_platform_file_content(int version)
 				if (i != j)
 				{
 					fprintf(f, "   <route src=\"CUDA%u\" dst=\"CUDA%u\" symmetrical=\"NO\">\n", i, j);
-					fprintf(f, "    <link_ctn id=\"CUDA%d-CUDA%d\"/>\n", i, j);
+					fprintf(f, "    <link_ctn id=\"CUDA%u-CUDA%u\"/>\n", i, j);
 					emit_platform_path_up(f,
 						hwloc_cuda_get_device_osdev_by_index(topology, i),
 						hwloc_cuda_get_device_osdev_by_index(topology, j));
 					fprintf(f, "   </route>\n");
 				}
 
-			fprintf(f, "   <route src=\"CUDA%d\" dst=\"RAM\" symmetrical=\"NO\">\n", i);
-			fprintf(f, "    <link_ctn id=\"CUDA%d-RAM\"/>\n", i);
+			fprintf(f, "   <route src=\"CUDA%u\" dst=\"RAM\" symmetrical=\"NO\">\n", i);
+			fprintf(f, "    <link_ctn id=\"CUDA%u-RAM\"/>\n", i);
 			emit_platform_forward_path(f, hwloc_cuda_get_device_osdev_by_index(topology, i));
 			fprintf(f, "   </route>\n");
 
-			fprintf(f, "   <route src=\"RAM\" dst=\"CUDA%d\" symmetrical=\"NO\">\n", i);
-			fprintf(f, "    <link_ctn id=\"RAM-CUDA%d\"/>\n", i);
+			fprintf(f, "   <route src=\"RAM\" dst=\"CUDA%u\" symmetrical=\"NO\">\n", i);
+			fprintf(f, "    <link_ctn id=\"RAM-CUDA%u\"/>\n", i);
 			emit_platform_backward_path(f, hwloc_cuda_get_device_osdev_by_index(topology, i));
 			fprintf(f, "   </route>\n");
 		}
@@ -2561,13 +2561,13 @@ flat_cuda:
 	{
 		unsigned j;
 		char i_name[16];
-		snprintf(i_name, sizeof(i_name), "CUDA%d", i);
+		snprintf(i_name, sizeof(i_name), "CUDA%u", i);
 		for (j = 0; j < ncuda; j++)
 		{
 			char j_name[16];
 			if (j == i)
 				continue;
-			snprintf(j_name, sizeof(j_name), "CUDA%d", j);
+			snprintf(j_name, sizeof(j_name), "CUDA%u", j);
 			fprintf(f, "   <route src=\"%s\" dst=\"%s\" symmetrical=\"NO\"><link_ctn id=\"%s-%s\"/><link_ctn id=\"Host\"/></route>\n", i_name, j_name, i_name, j_name);
 		}
 	}

+ 1 - 2
src/core/sched_ctx.c

@@ -913,9 +913,8 @@ unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched
 	return sched_ctx->id;
 }
 
-int fstarpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx_name, void ***_arglist)
+int fstarpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx_name, void **arglist)
 {
-	void **arglist = *_arglist;
 	int arg_i = 0;
 	int min_prio_set = 0;
 	int max_prio_set = 0;

+ 1 - 0
src/core/sched_ctx.h

@@ -26,6 +26,7 @@
 #include <common/barrier_counter.h>
 #include <profiling/profiling.h>
 #include <semaphore.h>
+#include <core/task.h>
 #include "sched_ctx_list.h"
 
 #ifdef STARPU_HAVE_HWLOC

+ 1 - 1
src/core/sched_policy.c

@@ -425,7 +425,7 @@ int _starpu_repush_task(struct _starpu_job *j)
 	task->status = STARPU_TASK_READY;
 	STARPU_AYU_ADDTOTASKQUEUE(j->job_id, -1);
 	/* if the context does not have any workers save the tasks in a temp list */
-	if(!sched_ctx->is_initial_sched)
+	if ((task->cl != NULL && task->cl->where != STARPU_NOWHERE) && (!sched_ctx->is_initial_sched))
 	{
 		/*if there are workers in the ctx that are not able to execute tasks
 		  we consider the ctx empty */

+ 8 - 1
src/core/task.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2013, 2015, 2016  CNRS
  * Copyright (C) 2011, 2014 INRIA
  *
@@ -135,4 +135,11 @@ void _starpu_watchdog_shutdown(void);
 int _starpu_task_wait_for_all_and_return_nb_waited_tasks(void);
 int _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(unsigned sched_ctx);
 
+
+#ifdef BUILDING_STARPU
+LIST_CREATE_TYPE_NOSTRUCT(starpu_task, prev, next);
+PRIO_LIST_CREATE_TYPE(starpu_task, priority);
+#define __STARPU_TASK_LIST_H__
+#endif
+
 #endif // __CORE_TASK_H__

+ 8 - 14
src/core/topology.c

@@ -1836,8 +1836,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				_starpu_memory_node_add_nworkers(memory_node);
 
                                 _starpu_worker_drives_memory_node(workerarg, STARPU_MAIN_RAM);
-				if (memory_node != STARPU_MAIN_RAM)
-					_starpu_worker_drives_memory_node(workerarg, memory_node);
+				_starpu_worker_drives_memory_node(workerarg, memory_node);
 				break;
 			}
 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
@@ -1875,7 +1874,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 					_starpu_cuda_bus_ids[devid+1][0] = _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
 #ifdef STARPU_SIMGRID
 					const char* cuda_memcpy_peer;
-					snprintf(name, sizeof(name), "CUDA%d", devid);
+					snprintf(name, sizeof(name), "CUDA%u", devid);
 					host = _starpu_simgrid_get_host_by_name(name);
 					STARPU_ASSERT(host);
 					_starpu_simgrid_memory_node_set_host(memory_node, host);
@@ -1931,8 +1930,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				_starpu_memory_node_add_nworkers(memory_node);
 
                                 _starpu_worker_drives_memory_node(&workerarg->set->workers[0], STARPU_MAIN_RAM);
-				if (memory_node != STARPU_MAIN_RAM)
-					_starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
+				_starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
 				break;
 #endif
 
@@ -1961,7 +1959,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 					_starpu_register_bus(STARPU_MAIN_RAM, memory_node);
 					_starpu_register_bus(memory_node, STARPU_MAIN_RAM);
 #ifdef STARPU_SIMGRID
-					snprintf(name, sizeof(name), "OpenCL%d", devid);
+					snprintf(name, sizeof(name), "OpenCL%u", devid);
 					host = _starpu_simgrid_get_host_by_name(name);
 					STARPU_ASSERT(host);
 					_starpu_simgrid_memory_node_set_host(memory_node, host);
@@ -1970,8 +1968,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				_starpu_memory_node_add_nworkers(memory_node);
 
                                 _starpu_worker_drives_memory_node(workerarg, STARPU_MAIN_RAM);
-				if (memory_node != STARPU_MAIN_RAM)
-					_starpu_worker_drives_memory_node(workerarg, memory_node);
+				_starpu_worker_drives_memory_node(workerarg, memory_node);
 				break;
 #endif
 
@@ -2001,8 +1998,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				_starpu_memory_node_add_nworkers(memory_node);
 
                                 _starpu_worker_drives_memory_node(&workerarg->set->workers[0], STARPU_MAIN_RAM);
-				if (memory_node != STARPU_MAIN_RAM)
-					_starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
+				_starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
 				break;
 #endif /* STARPU_USE_MIC */
 
@@ -2017,8 +2013,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				_starpu_memory_node_add_nworkers(memory_node);
 
                                 _starpu_worker_drives_memory_node(workerarg, STARPU_MAIN_RAM);
-				if (memory_node != STARPU_MAIN_RAM)
-					_starpu_worker_drives_memory_node(workerarg, memory_node);
+				_starpu_worker_drives_memory_node(workerarg, memory_node);
 			}
 				break;
 #endif /* STARPU_USE_SCC */
@@ -2040,8 +2035,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 
 				}
                                 _starpu_worker_drives_memory_node(&workerarg->set->workers[0], STARPU_MAIN_RAM);
-				if (memory_node != STARPU_MAIN_RAM)
-					_starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
+				_starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
 #ifndef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
                                 /* MPI driver thread can manage all slave memories if we disable the MPI multiple thread */
                                 unsigned findworker;

+ 0 - 1
src/core/workers.c

@@ -34,7 +34,6 @@
 #include <core/task.h>
 #include <datawizard/malloc.h>
 #include <profiling/profiling.h>
-#include <starpu_task_list.h>
 #include <sched_policies/sched_component.h>
 #include <datawizard/memory_nodes.h>
 #include <top/starpu_top_core.h>

+ 5 - 1
src/datawizard/malloc.c

@@ -251,6 +251,10 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 			{
 				char *path = starpu_getenv("TMPDIR");
 				if (!path)
+					path = starpu_getenv("TEMP");
+				if (!path)
+					path = starpu_getenv("TMP");
+				if (!path)
 					path = "/tmp";
 				/* Create bogus file if not done already */
 				char *name = _starpu_mktemp(path, O_RDWR | O_BINARY, &bogusfile);
@@ -325,7 +329,7 @@ end:
 #endif
 	if (ret == 0)
 	{
-		STARPU_ASSERT_MSG(*A, "Failed to allocated memory of size %ld b\n", (unsigned long)dim);
+		STARPU_ASSERT_MSG(*A, "Failed to allocated memory of size %lu b\n", (unsigned long)dim);
 	}
 	else if (flags & STARPU_MALLOC_COUNT)
 	{

+ 5 - 2
src/datawizard/memory_nodes.c

@@ -185,9 +185,12 @@ unsigned starpu_worker_get_memory_node(unsigned workerid)
 /* same utility as _starpu_memory_node_add_nworkers */
 void _starpu_worker_drives_memory_node(struct _starpu_worker *worker, unsigned memnode)
 {
-	_starpu_worker_drives_memory[worker->workerid][memnode] = 1;
+	if (! _starpu_worker_drives_memory[worker->workerid][memnode])
+	{
+		_starpu_worker_drives_memory[worker->workerid][memnode] = 1;
 #ifdef STARPU_SIMGRID
-	starpu_pthread_queue_register(&worker->wait, &_starpu_simgrid_transfer_queue[memnode]);
+		starpu_pthread_queue_register(&worker->wait, &_starpu_simgrid_transfer_queue[memnode]);
 #endif
+	}
 }
 

+ 8 - 41
src/debug/traces/starpu_fxt.c

@@ -400,7 +400,6 @@ static char last_codelet_parameter_description[STARPU_NMAXWORKERS][MAX_PARAMETER
 static double last_activity_flush_timestamp[STARPU_NMAXWORKERS];
 static double accumulated_sleep_time[STARPU_NMAXWORKERS];
 static double accumulated_exec_time[STARPU_NMAXWORKERS];
-static double reclaiming[STARPU_MAXNODES];
 
 static unsigned steal_number = 0;
 
@@ -3167,67 +3166,35 @@ void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *op
 				break;
 			case _STARPU_FUT_START_FREE:
 				if (!options->no_bus)
-				{
-					handle_memnode_event(&ev, options, "F");
-				}
+					handle_push_memnode_event(&ev, options, "F");
 				break;
 			case _STARPU_FUT_END_FREE:
 				if (!options->no_bus)
-				{
-					unsigned memnode = ev.param[0];
-					if (reclaiming[memnode])
-						handle_memnode_event(&ev, options, "R");
-					else
-						handle_memnode_event(&ev, options, "No");
-				}
+					handle_pop_memnode_event(&ev, options);
 				break;
 			case _STARPU_FUT_START_WRITEBACK:
 				if (!options->no_bus)
-				{
-					handle_memnode_event(&ev, options, "W");
-				}
+					handle_push_memnode_event(&ev, options, "W");
 				break;
 			case _STARPU_FUT_END_WRITEBACK:
 				if (!options->no_bus)
-				{
-					unsigned memnode = ev.param[0];
-					if (reclaiming[memnode])
-						handle_memnode_event(&ev, options, "R");
-					else
-						handle_memnode_event(&ev, options, "No");
-				}
+					handle_pop_memnode_event(&ev, options);
 				break;
 			case _STARPU_FUT_START_WRITEBACK_ASYNC:
 				if (!options->no_bus)
-				{
-					handle_memnode_event(&ev, options, "Wa");
-				}
+					handle_push_memnode_event(&ev, options, "Wa");
 				break;
 			case _STARPU_FUT_END_WRITEBACK_ASYNC:
 				if (!options->no_bus)
-				{
-					unsigned memnode = ev.param[0];
-					if (reclaiming[memnode])
-						handle_memnode_event(&ev, options, "R");
-					else
-						handle_memnode_event(&ev, options, "No");
-				}
+					handle_pop_memnode_event(&ev, options);
 				break;
 			case _STARPU_FUT_START_MEMRECLAIM:
 				if (!options->no_bus)
-				{
-					unsigned memnode = ev.param[0];
-					reclaiming[memnode] = 1;
-					handle_memnode_event(&ev, options, "R");
-				}
+					handle_push_memnode_event(&ev, options, "R");
 				break;
 			case _STARPU_FUT_END_MEMRECLAIM:
 				if (!options->no_bus)
-				{
-					unsigned memnode = ev.param[0];
-					reclaiming[memnode] = 0;
-					handle_memnode_event(&ev, options, "No");
-				}
+					handle_pop_memnode_event(&ev, options);
 				break;
 			case _STARPU_FUT_USED_MEM:
 				handle_used_mem(&ev, options);

+ 3 - 3
src/drivers/cuda/driver_cuda.c

@@ -648,13 +648,13 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 			/* Already initialized */
 			continue;
 		}
+		lastdevid = devid;
+		init_device_context(devid, memnode);
+
 #ifndef STARPU_SIMGRID
 		if (worker->config->topology.nworkerpercuda > 1 && props[devid].concurrentKernels == 0)
 			_STARPU_DISP("Warning: STARPU_NWORKER_PER_CUDA is %u, but CUDA device %u does not support concurrent kernel execution!\n", worker_set->nworkers, devid);
 #endif /* !STARPU_SIMGRID */
-		lastdevid = devid;
-		init_device_context(devid, memnode);
-
 	}
 
 	/* one more time to avoid hacks from third party lib :) */

+ 2 - 1
src/profiling/profiling_helpers.c

@@ -60,6 +60,7 @@ void _starpu_profiling_bus_helper_display_summary(FILE *stream)
 
 		unsigned unit = 0;
 		double d = convert_to_byte_units(transferred, max_unit, &unit);
+		double avg = (transfer_cnt != 0) ? (d / transfer_cnt) : 0;
 
 		_starpu_memory_node_get_name(src, src_name, sizeof(src_name));
 		_starpu_memory_node_get_name(dst, dst_name, sizeof(dst_name));
@@ -67,7 +68,7 @@ void _starpu_profiling_bus_helper_display_summary(FILE *stream)
 		fprintf(stream, "\t%s -> %s", src_name, dst_name);
 		fprintf(stream, "\t%.2lf %s", d, byte_units[unit]);
 		fprintf(stream, "\t%.2lf %s/s", d / elapsed_time, byte_units[unit]);
-		fprintf(stream, "\t(transfers : %lld - avg %.2lf %s)\n", transfer_cnt, d / transfer_cnt, byte_units[unit]);
+		fprintf(stream, "\t(transfers : %lld - avg %.2lf %s)\n", transfer_cnt, avg, byte_units[unit]);
 
 		sum_transferred += transferred;
 	}

+ 1 - 1
src/sched_policies/component_work_stealing.c

@@ -59,7 +59,7 @@ static struct starpu_task *  steal_task_round_robin(struct starpu_sched_componen
 		struct _starpu_prio_deque * fifo = wsd->fifos[i];
 
 		STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]);
-		task = _starpu_prio_deque_deque_task_for_worker(fifo, workerid);
+		task = _starpu_prio_deque_deque_task_for_worker(fifo, workerid, NULL);
 		if(task && !isnan(task->predicted))
 		{
 			fifo->exp_len -= task->predicted;

+ 21 - 9
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -355,6 +355,12 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
 	struct _starpu_fifo_taskq *fifo = dt->queue_array[best_workerid];
 
+	double now = starpu_timing_now();
+
+	starpu_pthread_mutex_t *sched_mutex;
+	starpu_pthread_cond_t *sched_cond;
+	starpu_worker_get_sched_condition(best_workerid, &sched_mutex, &sched_cond);
+
 #ifdef STARPU_USE_SC_HYPERVISOR
 	starpu_sched_ctx_call_pushed_task_cb(best_workerid, sched_ctx_id);
 #endif //STARPU_USE_SC_HYPERVISOR
@@ -362,10 +368,10 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 	_starpu_worker_lock(best_workerid);
 
         /* Sometimes workers didn't take the tasks as early as we expected */
-	fifo->exp_start = isnan(fifo->exp_start) ? starpu_timing_now() + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, starpu_timing_now());
+	fifo->exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now);
 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
 
-	if ((starpu_timing_now() + predicted_transfer) < fifo->exp_end)
+	if ((now + predicted_transfer) < fifo->exp_end)
 	{
 		/* We may hope that the transfer will be finished by
 		 * the start of the task. */
@@ -375,7 +381,7 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 	{
 		/* The transfer will not be finished by then, take the
 		 * remainder into account */
-		predicted_transfer = (starpu_timing_now() + predicted_transfer) - fifo->exp_end;
+		predicted_transfer = (now + predicted_transfer) - fifo->exp_end;
 	}
 
 	if(!isnan(predicted_transfer))
@@ -491,6 +497,8 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 
 	struct starpu_sched_ctx_iterator it;
 
+	double now = starpu_timing_now();
+
 	workers->init_iterator_for_parallel_tasks(workers, &it, task);
 	while(workers->has_next(workers, &it))
 	{
@@ -502,7 +510,7 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(worker, sched_ctx_id);
 
 		/* Sometimes workers didn't take the tasks as early as we expected */
-		double exp_start = isnan(fifo->exp_start) ? starpu_timing_now() + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, starpu_timing_now());
+		double exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now);
 
 		if (!starpu_worker_can_execute_task_impl(worker, task, &impl_mask))
 			continue;
@@ -646,6 +654,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 		task_prio = _normalize_prio(task->priority, dt->num_priorities, sched_ctx_id);
 
 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+	double now = starpu_timing_now();
 
 	struct starpu_sched_ctx_iterator it;
 	workers->init_iterator_for_parallel_tasks(workers, &it, task);
@@ -659,7 +668,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 		unsigned memory_node = starpu_worker_get_memory_node(workerid);
 
 		/* Sometimes workers didn't take the tasks as early as we expected */
-		double exp_start = isnan(fifo->exp_start) ? starpu_timing_now() + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, starpu_timing_now());
+		double exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now);
 
 		if (!starpu_worker_can_execute_task_impl(workerid, task, &impl_mask))
 			continue;
@@ -767,7 +776,9 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 			if (unknown)
 				continue;
 
-			exp_end[worker_ctx][nimpl] = exp_start + prev_exp_len + local_task_length[worker_ctx][nimpl];
+			double task_starting_time = STARPU_MAX(exp_start + prev_exp_len, now + local_data_penalty[worker_ctx][nimpl]); 
+
+			exp_end[worker_ctx][nimpl] = task_starting_time + local_task_length[worker_ctx][nimpl];
 
 			if (exp_end[worker_ctx][nimpl] < best_exp_end)
 			{
@@ -1133,17 +1144,18 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, int pe
 						       starpu_task_get_implementation(task));
 
 	double predicted_transfer = starpu_task_expected_data_transfer_time(memory_node, task);
+	double now = starpu_timing_now();
 
 	/* Update the predictions */
 	_starpu_worker_lock(workerid);
 	/* Sometimes workers didn't take the tasks as early as we expected */
-	fifo->exp_start = isnan(fifo->exp_start) ? starpu_timing_now() + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, starpu_timing_now());
+	fifo->exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now);
 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
 
 	/* If there is no prediction available, we consider the task has a null length */
 	if (!isnan(predicted_transfer))
 	{
-		if (starpu_timing_now() + predicted_transfer < fifo->exp_end)
+		if (now + predicted_transfer < fifo->exp_end)
 		{
 			/* We may hope that the transfer will be finished by
 			 * the start of the task. */
@@ -1153,7 +1165,7 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, int pe
 		{
 			/* The transfer will not be finished by then, take the
 			 * remainder into account */
-			predicted_transfer = (starpu_timing_now() + predicted_transfer) - fifo->exp_end;
+			predicted_transfer = (now + predicted_transfer) - fifo->exp_end;
 		}
 		task->predicted_transfer = predicted_transfer;
 		fifo->exp_end += predicted_transfer;

+ 14 - 93
src/sched_policies/eager_central_priority_policy.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2016  Université de Bordeaux
+ * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
  * Copyright (C) 2011  INRIA
  * Copyright (C) 2016  Uppsala University
@@ -20,33 +20,21 @@
 /*
  *	This is policy where every worker use the same JOB QUEUE, but taking
  *	task priorities into account
+ *
+ *	TODO: merge with eager, after checking the scalability
  */
 
 #include <starpu.h>
 #include <starpu_scheduler.h>
 #include <starpu_bitmap.h>
+#include "prio_deque.h"
 
 #include <common/fxt.h>
 #include <core/workers.h>
 
-#define DEFAULT_MIN_LEVEL	(-5)
-#define DEFAULT_MAX_LEVEL	(+5)
-
-struct _starpu_priority_taskq
-{
-	int min_prio;
-	int max_prio;
-	/* the actual lists
-	 *	taskq[p] is for priority [p - STARPU_MIN_PRIO] */
-	struct starpu_task_list *taskq;
-	unsigned *ntasks;
-
-	unsigned total_ntasks;
-};
-
 struct _starpu_eager_central_prio_data
 {
-	struct _starpu_priority_taskq *taskq;
+	struct _starpu_prio_deque taskq;
 	starpu_pthread_mutex_t policy_mutex;
 	struct starpu_bitmap *waiters;
 };
@@ -55,54 +43,19 @@ struct _starpu_eager_central_prio_data
  * Centralized queue with priorities
  */
 
-static struct _starpu_priority_taskq *_starpu_create_priority_taskq(int min_prio, int max_prio)
-{
-	struct _starpu_priority_taskq *central_queue;
-
-	_STARPU_MALLOC(central_queue, sizeof(struct _starpu_priority_taskq));
-	central_queue->min_prio = min_prio;
-	central_queue->max_prio = max_prio;
-	central_queue->total_ntasks = 0;
-	_STARPU_MALLOC(central_queue->taskq, (max_prio-min_prio+1) * sizeof(struct starpu_task_list));
-	_STARPU_MALLOC(central_queue->ntasks, (max_prio-min_prio+1) * sizeof(unsigned));
-
-	int prio;
-	for (prio = 0; prio < (max_prio-min_prio+1); prio++)
-	{
-		starpu_task_list_init(&central_queue->taskq[prio]);
-		central_queue->ntasks[prio] = 0;
-	}
-
-	return central_queue;
-}
-
-static void _starpu_destroy_priority_taskq(struct _starpu_priority_taskq *priority_queue)
-{
-	free(priority_queue->ntasks);
-	free(priority_queue->taskq);
-	free(priority_queue);
-}
-
 static void initialize_eager_center_priority_policy(unsigned sched_ctx_id)
 {
 	struct _starpu_eager_central_prio_data *data;
 	_STARPU_MALLOC(data, sizeof(struct _starpu_eager_central_prio_data));
 
-	/* In this policy, we support more than two levels of priority. */
-
-	if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0)
-		starpu_sched_ctx_set_min_priority(sched_ctx_id, DEFAULT_MIN_LEVEL);
-	if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0)
-		starpu_sched_ctx_set_max_priority(sched_ctx_id, DEFAULT_MAX_LEVEL);
-
 	/* only a single queue (even though there are several internaly) */
-	data->taskq = _starpu_create_priority_taskq(starpu_sched_ctx_get_min_priority(sched_ctx_id), starpu_sched_ctx_get_max_priority(sched_ctx_id));
+	_starpu_prio_deque_init(&data->taskq);
 	data->waiters = starpu_bitmap_create();
 
 	/* Tell helgrind that it's fine to check for empty fifo in
 	 * _starpu_priority_pop_task without actual mutex (it's just an
 	 * integer) */
-	STARPU_HG_DISABLE_CHECKING(data->taskq->total_ntasks);
+	STARPU_HG_DISABLE_CHECKING(data->taskq.ntasks);
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data);
 	STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL);
 }
@@ -113,7 +66,7 @@ static void deinitialize_eager_center_priority_policy(unsigned sched_ctx_id)
 	struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 
 	/* deallocate the job queue */
-	_starpu_destroy_priority_taskq(data->taskq);
+	_starpu_prio_deque_destroy(&data->taskq);
 	starpu_bitmap_destroy(data->waiters);
 
 	STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex);
@@ -124,18 +77,12 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 {
 	unsigned sched_ctx_id = task->sched_ctx;
 	struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	struct _starpu_priority_taskq *taskq = data->taskq;
+	struct _starpu_prio_deque *taskq = &data->taskq;
 
 	_starpu_worker_relax_on();
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	_starpu_worker_relax_off();
-	unsigned priolevel = task->priority - starpu_sched_ctx_get_min_priority(sched_ctx_id);
-	STARPU_ASSERT_MSG(task->priority >= starpu_sched_ctx_get_min_priority(sched_ctx_id) &&
-			  task->priority <= starpu_sched_ctx_get_max_priority(sched_ctx_id), "task priority %d is not between minimum %d and maximum %d\n", task->priority, starpu_sched_ctx_get_min_priority(sched_ctx_id), starpu_sched_ctx_get_max_priority(sched_ctx_id));
-
-	starpu_task_list_push_back(&taskq->taskq[priolevel], task);
-	taskq->ntasks[priolevel]++;
-	taskq->total_ntasks++;
+	_starpu_prio_deque_push_back_task(taskq, task);
 	starpu_push_task_end(task);
 
 	/*if there are no tasks block */
@@ -201,18 +148,18 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 
 static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id)
 {
-	struct starpu_task *chosen_task = NULL, *task, *nexttask;
+	struct starpu_task *chosen_task;
 	unsigned workerid = starpu_worker_get_id_check();
 	int skipped = 0;
 
 	struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 
-	struct _starpu_priority_taskq *taskq = data->taskq;
+	struct _starpu_prio_deque *taskq = &data->taskq;
 
 	/* Here helgrind would shout that this is unprotected, this is just an
 	 * integer access, and we hold the sched mutex, so we can not miss any
 	 * wake up. */
-	if (!STARPU_RUNNING_ON_VALGRIND && taskq->total_ntasks == 0)
+	if (!STARPU_RUNNING_ON_VALGRIND && _starpu_prio_deque_is_empty(taskq))
 	{
 		return NULL;
 	}
@@ -229,33 +176,7 @@ static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id)
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	_starpu_worker_relax_off();
 
-	unsigned priolevel = taskq->max_prio - taskq->min_prio;
-	do
-	{
-		if (taskq->ntasks[priolevel] > 0)
-		{
-			for (task  = starpu_task_list_begin(&taskq->taskq[priolevel]);
-			     task != starpu_task_list_end(&taskq->taskq[priolevel]) && !chosen_task;
-			     task  = nexttask)
-			{
-				unsigned nimpl;
-				nexttask = starpu_task_list_next(task);
-				if (starpu_worker_can_execute_task_first_impl(workerid, task, &nimpl))
-				{
-					/* there is some task that we can grab */
-					starpu_task_set_implementation(task, nimpl);
-					starpu_task_list_erase(&taskq->taskq[priolevel], task);
-					chosen_task = task;
-					taskq->ntasks[priolevel]--;
-					taskq->total_ntasks--;
-					break;
-				}
-				else
-					skipped = 1;
-			}
-		}
-	}
-	while (!chosen_task && priolevel-- > 0);
+	chosen_task = _starpu_prio_deque_pop_task_for_worker(taskq, workerid, &skipped);
 
 	if (!chosen_task && skipped)
 	{

+ 24 - 24
src/sched_policies/fifo_queues.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2016  Université de Bordeaux
+ * Copyright (C) 2010-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2013, 2016  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2016  Uppsala University
@@ -79,16 +79,16 @@ _starpu_fifo_get_exp_len_prev_task_list(struct _starpu_fifo_taskq *fifo_queue, s
 	struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, task->sched_ctx);
 	double exp_len = 0.0;
 	
-	if (list->head != NULL)
+	if (list->_head != NULL)
 	{
-		struct starpu_task *current = list->head;
+		struct starpu_task *current = list->_head;
 		struct starpu_task *prev = NULL;
 
-		if (list->head->priority == task->priority &&
-		    list->head->priority == list->tail->priority)
+		if (list->_head->priority == task->priority &&
+		    list->_head->priority == list->_tail->priority)
 		{
 			/* They all have the same priority, the task's place is at the end */
-			prev = list->tail;
+			prev = list->_tail;
 			current = NULL;
 		}
 		else
@@ -107,7 +107,7 @@ _starpu_fifo_get_exp_len_prev_task_list(struct _starpu_fifo_taskq *fifo_queue, s
 			{
 				/* the task's place is between prev and current */
 				struct starpu_task *it;
-				for(it = list->head; it != current; it = it->next)
+				for(it = list->_head; it != current; it = it->next)
 				{
 					exp_len += starpu_task_expected_length(it, perf_arch, nimpl);
 					(*fifo_ntasks) ++;
@@ -115,7 +115,7 @@ _starpu_fifo_get_exp_len_prev_task_list(struct _starpu_fifo_taskq *fifo_queue, s
 			}
 			else
 			{
-				/* the task's place is at the tail of the list */
+				/* the task's place is at the _tail of the list */
 				exp_len = fifo_queue->exp_len;
 				*fifo_ntasks = fifo_queue->ntasks;
 			}
@@ -131,25 +131,25 @@ _starpu_fifo_push_sorted_task(struct _starpu_fifo_taskq *fifo_queue, struct star
 {
 	struct starpu_task_list *list = &fifo_queue->taskq;
 
-	if (list->head == NULL)
+	if (list->_head == NULL)
 	{
-		list->head = task;
-		list->tail = task;
+		list->_head = task;
+		list->_tail = task;
 		task->prev = NULL;
 		task->next = NULL;
 	}
-	else if (list->head->priority == task->priority &&
-		 list->head->priority == list->tail->priority)
+	else if (list->_head->priority == task->priority &&
+		 list->_head->priority == list->_tail->priority)
 	{
 		/* They all have the same priority, just put at the end */
-		list->tail->next = task;
+		list->_tail->next = task;
 		task->next = NULL;
-		task->prev = list->tail;
-		list->tail = task;
+		task->prev = list->_tail;
+		list->_tail = task;
 	}
 	else
 	{
-		struct starpu_task *current = list->head;
+		struct starpu_task *current = list->_head;
 		struct starpu_task *prev = NULL;
 
 		while (current)
@@ -164,10 +164,10 @@ _starpu_fifo_push_sorted_task(struct _starpu_fifo_taskq *fifo_queue, struct star
 		if (prev == NULL)
 		{
 			/* Insert at the front of the list */
-			list->head->prev = task;
+			list->_head->prev = task;
 			task->prev = NULL;
-			task->next = list->head;
-			list->head = task;
+			task->next = list->_head;
+			list->_head = task;
 		}
 		else
 		{
@@ -181,11 +181,11 @@ _starpu_fifo_push_sorted_task(struct _starpu_fifo_taskq *fifo_queue, struct star
 			}
 			else
 			{
-				/* Insert at the tail of the list */
-				list->tail->next = task;
+				/* Insert at the _tail of the list */
+				list->_tail->next = task;
 				task->next = NULL;
-				task->prev = list->tail;
-				list->tail = task;
+				task->prev = list->_tail;
+				list->_tail = task;
 			}
 		}
 	}

+ 2 - 1
src/sched_policies/fifo_queues.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013, 2016  Université de Bordeaux
+ * Copyright (C) 2010-2013, 2016-2017  Université de Bordeaux
  * Copyright (C) 2016  Uppsala University
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,6 +21,7 @@
 #define __FIFO_QUEUES_H__
 
 #include <starpu.h>
+#include <core/task.h>
 
 struct _starpu_fifo_taskq
 {

+ 1 - 1
src/sched_policies/graph_test_policy.c

@@ -331,7 +331,7 @@ static struct starpu_task *pop_task_graph_test_policy(unsigned sched_ctx_id)
 		return NULL;
 	}
 
-	chosen_task = _starpu_prio_deque_pop_task_for_worker(prio, workerid);
+	chosen_task = _starpu_prio_deque_pop_task_for_worker(prio, workerid, NULL);
 	if (!chosen_task)
 		/* Tell pushers that we are waiting for tasks for us */
 		starpu_bitmap_set(data->waiters, workerid);

+ 1 - 1
src/sched_policies/helper_mct.c

@@ -122,6 +122,7 @@ int starpu_mct_compute_expected_times(struct starpu_sched_component *component,
 		double *min_exp_end_with_task, double *max_exp_end_with_task, int *suitable_components)
 {
 	int nsuitable_components = 0;
+	double now = starpu_timing_now();
 
 	int i;
 	for(i = 0; i < component->nchildren; i++)
@@ -137,7 +138,6 @@ int starpu_mct_compute_expected_times(struct starpu_sched_component *component,
 
 			/* Estimated availability of worker */
 			double estimated_end = c->estimated_end(c);
-			double now = starpu_timing_now();
 			if (estimated_end < now)
 				estimated_end = now;
 			estimated_transfer_length[i] = starpu_sched_component_transfer_length(c, task);

+ 5 - 2
src/sched_policies/parallel_heft.c

@@ -302,6 +302,8 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 	int unknown = 0;
 	struct starpu_sched_ctx_iterator it;
 
+	double now = starpu_timing_now();
+
 	memset(skip_worker, 0, nworkers_ctx*STARPU_MAXIMPLEMENTATIONS*sizeof(int));
 
 	workers->init_iterator(workers, &it);
@@ -313,7 +315,7 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 		{
 			/* Sometimes workers didn't take the tasks as early as we expected */
 			_starpu_worker_lock(workerid);
-			worker_exp_start[workerid] = STARPU_MAX(worker_exp_start[workerid], starpu_timing_now());
+			worker_exp_start[workerid] = STARPU_MAX(worker_exp_start[workerid], now);
 			worker_exp_end[workerid] = worker_exp_start[workerid] + worker_exp_len[workerid];
 			if (worker_exp_end[workerid] > max_exp_end)
 				max_exp_end = worker_exp_end[workerid];
@@ -500,6 +502,7 @@ static int parallel_heft_push_task(struct starpu_task *task)
 static void parallel_heft_add_workers(__attribute__((unused)) unsigned sched_ctx_id, int *workerids, unsigned nworkers)
 {
 	unsigned i;
+	double now = starpu_timing_now();
 	for (i = 0; i < nworkers; i++)
 	{
 		int workerid = workerids[i];
@@ -507,7 +510,7 @@ static void parallel_heft_add_workers(__attribute__((unused)) unsigned sched_ctx
 		/* init these structures only once for each worker */
 		if(!workerarg->has_prev_init)
 		{
-			worker_exp_start[workerid] = starpu_timing_now();
+			worker_exp_start[workerid] = now;
 			worker_exp_len[workerid] = 0.0;
 			worker_exp_end[workerid] = worker_exp_start[workerid];
 			ntasks[workerid] = 0;

+ 24 - 103
src/sched_policies/prio_deque.c

@@ -19,79 +19,6 @@
 #include "prio_deque.h"
 
 
-void _starpu_prio_deque_init(struct _starpu_prio_deque * pdeque)
-{
-	STARPU_ASSERT(pdeque);
-	memset(pdeque,0,sizeof(*pdeque));
-}
-void _starpu_prio_deque_destroy(struct _starpu_prio_deque * pdeque)
-{
-	STARPU_ASSERT(pdeque);
-	int i;
-	for(i = 0; i < pdeque->size_array; i++)
-	{
-		STARPU_ASSERT(starpu_task_list_empty(&pdeque->array[i].list));
-	}
-	free(pdeque->array);
-}
-
-int _starpu_prio_deque_is_empty(struct _starpu_prio_deque * pdeque)
-{
-	STARPU_ASSERT(pdeque);
-	return pdeque->ntasks == 0;
-}
-
-
-/* return the struct starpu_prio_list * of prio,
- * create it and return it if none exist yet
- */
-static struct starpu_task_list * get_prio(struct _starpu_prio_deque * pdeque, int prio)
-{
-	STARPU_ASSERT(pdeque);
-	int i;
-	for(i = 0; i < pdeque->size_array; i++)
-	{
-		if(pdeque->array[i].prio == prio)
-		{
-			return &pdeque->array[i].list;
-		}
-		else
-			if(pdeque->array[i].prio < prio)
-				break;
-	}
-	pdeque->size_array++;
-	_STARPU_REALLOC(pdeque->array, sizeof(struct _starpu_prio_list) * (pdeque->size_array));
-	memmove(pdeque->array + i + 1,
-		pdeque->array + i,
-		(pdeque->size_array - i - 1) * sizeof(struct _starpu_prio_list));
-	pdeque->array[i].prio = prio;
-	starpu_task_list_init(&pdeque->array[i].list);
-	return &pdeque->array[i].list;
-}
-
-
-
-int _starpu_prio_deque_push_task(struct _starpu_prio_deque * pdeque, struct starpu_task * task)
-{
-	STARPU_ASSERT(pdeque && task);
-	struct starpu_task_list * list = get_prio(pdeque, task->priority);
-	starpu_task_list_push_back(list, task);
-	pdeque->ntasks++;
-	return 0;
-}
-
-int _starpu_prio_deque_push_back_task(struct _starpu_prio_deque * pdeque, struct starpu_task * task)
-{
-	STARPU_ASSERT(pdeque && task);
-	struct starpu_task_list * list = get_prio(pdeque, task->priority);
-	starpu_task_list_push_front(list, task);
-	pdeque->ntasks++;
-	return 0;
-}
-
-
-
-
 /* a little dirty code factorization */
 
 static inline int pred_true(struct starpu_task * t STARPU_ATTRIBUTE_UNUSED, void * v STARPU_ATTRIBUTE_UNUSED)
@@ -104,53 +31,47 @@ static inline int pred_can_execute(struct starpu_task * t, void * pworkerid)
 	int i;
 	for(i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
 		if(starpu_worker_can_execute_task(*(int*)pworkerid, t,i))
+		{
+			starpu_task_set_implementation(t, i);
 			return 1;
+		}
 	return 0;
 }
 
 #define REMOVE_TASK(pdeque, first_task_field, next_task_field, predicate, parg)	\
-	{								\
-		int i;							\
-		struct starpu_task * t = NULL;				\
-		for(i = 0; i < pdeque->size_array; i++)			\
-		{							\
-			t = pdeque->array[i].list.first_task_field;	\
-			while(t && !predicate(t,parg))			\
-				t = t->next_task_field;			\
-			if(t)						\
-			{						\
-				starpu_task_list_erase(&pdeque->array[i].list, t); \
-				pdeque->ntasks--;			\
-				return t;				\
-			}						\
-		}							\
-		return NULL;						\
+	{									\
+		struct starpu_task * t;						\
+		for (t  = starpu_task_prio_list_begin(&pdeque->list);		\
+		     t != starpu_task_prio_list_end(&pdeque->list);		\
+		     t  = starpu_task_prio_list_next(&pdeque->list, t))		\
+		{								\
+			if (predicate(t, parg))					\
+			{							\
+				starpu_task_prio_list_erase(&pdeque->list, t);	\
+				pdeque->ntasks--;				\
+				return t;					\
+			}							\
+			else							\
+				if (skipped)					\
+					*skipped = 1;				\
+		}								\
+		return NULL;							\
 	}
 
 /* deque a task of the higher priority available */
 
 /* From the front of the list for the highest priority */
-struct starpu_task * _starpu_prio_deque_pop_task(struct _starpu_prio_deque * pdeque)
-{
-	REMOVE_TASK(pdeque, head, prev, pred_true, STARPU_POISON_PTR);
-}
-struct starpu_task * _starpu_prio_deque_pop_task_for_worker(struct _starpu_prio_deque * pdeque, int workerid)
+struct starpu_task * _starpu_prio_deque_pop_task_for_worker(struct _starpu_prio_deque * pdeque, int workerid, int *skipped)
 {
 	STARPU_ASSERT(pdeque);
 	STARPU_ASSERT(workerid >= 0 && (unsigned) workerid < starpu_worker_get_count());
-	REMOVE_TASK(pdeque, head, prev, pred_can_execute, &workerid);
+	REMOVE_TASK(pdeque, _head, prev, pred_can_execute, &workerid);
 }
 
 /* From the back of the list for the highest priority */
-struct starpu_task * _starpu_prio_deque_deque_task(struct _starpu_prio_deque * pdeque)
-{
-	STARPU_ASSERT(pdeque);
-	REMOVE_TASK(pdeque, tail, next, pred_true, STARPU_POISON_PTR);
-}
-
-struct starpu_task * _starpu_prio_deque_deque_task_for_worker(struct _starpu_prio_deque * pdeque, int workerid)
+struct starpu_task * _starpu_prio_deque_deque_task_for_worker(struct _starpu_prio_deque * pdeque, int workerid, int *skipped)
 {
 	STARPU_ASSERT(pdeque);
 	STARPU_ASSERT(workerid >= 0 && (unsigned) workerid < starpu_worker_get_count());
-	REMOVE_TASK(pdeque, tail, next, pred_can_execute, &workerid);
+	REMOVE_TASK(pdeque, _tail, next, pred_can_execute, &workerid);
 }

+ 51 - 18
src/sched_policies/prio_deque.h

@@ -16,49 +16,82 @@
 #ifndef __PRIO_DEQUE_H__
 #define __PRIO_DEQUE_H__
 #include <starpu.h>
-#include <starpu_task_list.h>
+#include <core/task.h>
 
 
-struct _starpu_prio_list
-{
-	int prio;
-	struct starpu_task_list list;
-};
-
 struct _starpu_prio_deque
 {
-	struct _starpu_prio_list * array;
+	struct starpu_task_prio_list list;
 	int size_array;
 	unsigned ntasks;
 	unsigned nprocessed;
 	double exp_start, exp_end, exp_len;
 };
 
-void _starpu_prio_deque_init(struct _starpu_prio_deque *);
-void _starpu_prio_deque_destroy(struct _starpu_prio_deque *);
+static inline void _starpu_prio_deque_init(struct _starpu_prio_deque *pdeque)
+{
+	memset(pdeque,0,sizeof(*pdeque));
+	starpu_task_prio_list_init(&pdeque->list);
+}
+
+static inline void _starpu_prio_deque_destroy(struct _starpu_prio_deque *pdeque)
+{
+	starpu_task_prio_list_deinit(&pdeque->list);
+}
 
 /* return 0 iff the struct _starpu_prio_deque is not empty */
-int _starpu_prio_deque_is_empty(struct _starpu_prio_deque *);
+static inline int _starpu_prio_deque_is_empty(struct _starpu_prio_deque *pdeque)
+{
+	return pdeque->ntasks == 0;
+}
 
-/* push a task in O(nb priorities) */
-int _starpu_prio_deque_push_task(struct _starpu_prio_deque *, struct starpu_task *);
+/* push a task in O(lg(nb priorities)) */
+static inline int _starpu_prio_deque_push_task(struct _starpu_prio_deque *pdeque, struct starpu_task *task)
+{
+	starpu_task_prio_list_push_front(&pdeque->list, task);
+	pdeque->ntasks++;
+	return 0;
+}
+static inline int _starpu_prio_deque_push_back_task(struct _starpu_prio_deque *pdeque, struct starpu_task *task)
+{
+	starpu_task_prio_list_push_back(&pdeque->list, task);
+	pdeque->ntasks++;
+	return 0;
+}
 int _starpu_prio_deque_push_back_task(struct _starpu_prio_deque *, struct starpu_task *);
 
 
 /* all _starpu_prio_deque_pop/deque_task function return a task or a NULL pointer if none are available
- * in O(nb priorities)
+ * in O(lg(nb priorities))
  */
 
-struct starpu_task * _starpu_prio_deque_pop_task(struct _starpu_prio_deque *);
+static inline struct starpu_task * _starpu_prio_deque_pop_task(struct _starpu_prio_deque *pdeque)
+{
+	struct starpu_task *task;
+	if (starpu_task_prio_list_empty(&pdeque->list))
+		return NULL;
+	task = starpu_task_prio_list_pop_front(&pdeque->list);
+	pdeque->ntasks--;
+	return task;
+}
 
 /* return a task that can be executed by workerid
  */
-struct starpu_task * _starpu_prio_deque_pop_task_for_worker(struct _starpu_prio_deque *, int workerid);
+struct starpu_task * _starpu_prio_deque_pop_task_for_worker(struct _starpu_prio_deque *, int workerid, int *skipped);
 
 /* deque a task of the higher priority available */
-struct starpu_task * _starpu_prio_deque_deque_task(struct _starpu_prio_deque *);
+static inline struct starpu_task * _starpu_prio_deque_deque_task(struct _starpu_prio_deque *pdeque)
+{
+	struct starpu_task *task;
+	if (starpu_task_prio_list_empty(&pdeque->list))
+		return NULL;
+	task = starpu_task_prio_list_pop_back(&pdeque->list);
+	pdeque->ntasks--;
+	return task;
+}
+
 /* return a task that can be executed by workerid
  */
-struct starpu_task * _starpu_prio_deque_deque_task_for_worker(struct _starpu_prio_deque *, int workerid);
+struct starpu_task * _starpu_prio_deque_deque_task_for_worker(struct _starpu_prio_deque *, int workerid, int *skipped);
 
 #endif /* __PRIO_DEQUE_H__ */

+ 1 - 3
src/util/fstarpu.c

@@ -480,9 +480,8 @@ void fstarpu_data_acquire(starpu_data_handle_t handle, intptr_t mode)
 	starpu_data_acquire(handle, (int)mode);
 }
 
-void fstarpu_unpack_arg(char *cl_arg, void ***_buffer_list)
+void fstarpu_unpack_arg(char *cl_arg, void **buffer_list)
 {
-	void **buffer_list = *_buffer_list;
 	size_t current_arg_offset = 0;
 	int nargs, arg;
 
@@ -505,7 +504,6 @@ void fstarpu_unpack_arg(char *cl_arg, void ***_buffer_list)
 		memcpy(argptr, cl_arg+current_arg_offset, arg_size);
 		current_arg_offset += arg_size;
 	}
-	free(cl_arg);
 }
 
 void fstarpu_sched_ctx_display_workers(int ctx)

+ 1 - 2
src/util/starpu_task_insert_utils.c

@@ -697,9 +697,8 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **
 }
 
 /* Fortran interface to task_insert */
-void fstarpu_task_insert(void ***_arglist)
+void fstarpu_task_insert(void **arglist)
 {
-	void **arglist = *_arglist;
 	struct starpu_codelet *cl = arglist[0];
 	if (cl == NULL)
 	{

+ 19 - 3
tests/Makefile.am

@@ -28,7 +28,7 @@ endif
 
 AM_CFLAGS = $(HWLOC_CFLAGS) $(FXT_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
 AM_CXXFLAGS = $(HWLOC_CFLAGS) $(FXT_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_SCIF_CPPFLAGS) $(GLOBAL_AM_CXXFLAGS) -Wno-unused
-LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(HWLOC_LIBS) @LIBS@ $(FXT_LIBS)
+LDADD = $(top_builddir)/src/@LIBSTARPU_LINK@ $(HWLOC_LIBS) $(FXT_LIBS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/
 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS) $(FXT_LDFLAGS)
 
@@ -60,7 +60,8 @@ EXTRA_DIST =					\
 	datawizard/interfaces/block/block_opencl_kernel.cl \
 	perfmodels/opencl_memset_kernel.cl \
 	$(MICROBENCHS:=.sh) \
-	microbenchs/microbench.sh
+	microbenchs/microbench.sh \
+	model-checking/starpu-mc.sh.in
 
 CLEANFILES = 					\
 	*.gcno *.gcda *.linkinfo core starpu_idle_microsec.log *.mod *.png *.output
@@ -133,7 +134,8 @@ XFAIL_TESTS	=				\
 
 myPROGRAMS =
 
-myPROGRAMS +=				\
+myPROGRAMS +=					\
+	main/mkdtemp				\
 	main/restart				\
 	main/execute_schedule			\
 	main/insert_task_pack			\
@@ -384,6 +386,20 @@ endif
 
 TESTS += datawizard/locality.sh
 
+################################
+# Simgrid Model Checking tests #
+################################
+
+if STARPU_SIMGRID_MC
+model_checking_prio_list_LDADD = 
+model_checking_prio_list_LDFLAGS = 
+model_checking_prio_list_SOURCES = model-checking/prio_list.c ../src/common/rbtree.c
+noinst_PROGRAMS += model-checking/prio_list
+if !STARPU_QUICK_CHECK
+TESTS += model-checking/prio_list.sh
+endif
+endif
+
 #######################
 # Source files        #
 #######################

+ 2 - 2
tests/datawizard/scal_opencl.cl

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2011, 2012  CNRS
- * Copyright (C) 2011  Université Bordeaux
+ * Copyright (C) 2011, 2017  Université Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,7 +18,7 @@
 __kernel void vector_mult_opencl(__global unsigned* val, unsigned offset, unsigned nx)
 {
         const int i = get_global_id(0);
-	val = (__global void*) val + offset;
+	val = (__global char*) val + offset;
         if (i < nx)
 	{
                 val[i] *= 2;

+ 10 - 13
tests/disk/disk_compute.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2013 Corentin Salingue
- * Copyright (C) 2015, 2016 CNRS
+ * Copyright (C) 2015, 2016, 2017 CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -34,13 +34,6 @@
  * actually
  */
 
-#ifdef STARPU_HAVE_WINDOWS
-#  include <io.h>
-#  if defined(_WIN32) && !defined(__CYGWIN__)
-#    define mkdir(path, mode) mkdir(path)
-#  endif
-#endif
-
 #define NX (1024)
 
 int dotest(struct starpu_disk_ops *ops, char *base)
@@ -216,13 +209,15 @@ static int merge_result(int old, int new)
 int main(void)
 {
 	int ret = 0;
+	int ret2;
 	char s[128];
+	char *ptr;
 
-	snprintf(s, sizeof(s), "/tmp/%s-disk-%d", getenv("USER"), getpid());
-	ret = mkdir(s, 0777);
-	if (ret)
+	snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER"));
+	ptr = _starpu_mkdtemp(s);
+	if (!ptr)
 	{
-		FPRINTF(stderr, "Cannot make directory <%s>\n", s);
+		FPRINTF(stderr, "Cannot make directory '%s'\n", s);
 		return STARPU_TEST_SKIPPED;
 	}
 
@@ -238,6 +233,8 @@ int main(void)
 		ret = merge_result(ret, STARPU_TEST_SKIPPED);
 	}
 #endif
-	rmdir(s);
+
+	ret2 = rmdir(s);
+	STARPU_CHECK_RETURN_VALUE(ret2, "rmdir '%s'\n", s);
 	return ret;
 }

+ 9 - 12
tests/disk/disk_copy.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2013 Corentin Salingue
- * Copyright (C) 2015 CNRS
+ * Copyright (C) 2015, 2017 CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -30,13 +30,6 @@
  * progressing because there is not enough room for all of them.
  */
 
-#ifdef STARPU_HAVE_WINDOWS
-#  include <io.h>
-#  if defined(_WIN32) && !defined(__CYGWIN__)
-#    define mkdir(path, mode) mkdir(path)
-#  endif
-#endif
-
 /* RAM is not enough to hold 6 times NX
  * DISK is just enough to hold 6 times NX */
 
@@ -167,11 +160,13 @@ static int merge_result(int old, int new)
 int main(void)
 {
 	int ret = 0;
+	int ret2;
 	char s[128];
+	char *ptr;
 
-	snprintf(s, sizeof(s), "/tmp/%s-disk-%d", getenv("USER"), getpid());
-	ret = mkdir(s, 0777);
-	if (ret)
+	snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER"));
+	ptr = _starpu_mkdtemp(s);
+	if (!ptr)
 	{
 		FPRINTF(stderr, "Cannot make directory <%s>\n", s);
 		return STARPU_TEST_SKIPPED;
@@ -182,7 +177,9 @@ int main(void)
 #ifdef STARPU_LINUX_SYS
 	ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s));
 #endif
-	rmdir(s);
+
+	ret2 = rmdir(s);
+	STARPU_CHECK_RETURN_VALUE(ret2, "rmdir '%s'\n", s);
 	return ret;
 }
 #endif

+ 8 - 11
tests/disk/disk_pack.c

@@ -41,13 +41,6 @@ int main(int argc, char **argv)
  * Here we force using the pack/unpack mechanism
  */
 
-#ifdef STARPU_HAVE_WINDOWS
-#  include <io.h>
-#  if defined(_WIN32) && !defined(__CYGWIN__)
-#    define mkdir(path, mode) mkdir(path)
-#  endif
-#endif
-
 #define NX (1024)
 
 const struct starpu_data_copy_methods my_vector_copy_data_methods_s;
@@ -274,11 +267,13 @@ static int merge_result(int old, int new)
 int main(void)
 {
 	int ret = 0;
+	int ret2;
 	char s[128];
+	char *ptr;
 
-	snprintf(s, sizeof(s), "/tmp/%s-disk-%d", getenv("USER"), getpid());
-	ret = mkdir(s, 0777);
-	if (ret)
+	snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER"));
+	ptr = _starpu_mkdtemp(s);
+	if (!ptr)
 	{
 		FPRINTF(stderr, "Cannot make directory <%s>\n", s);
 		return STARPU_TEST_SKIPPED;
@@ -289,7 +284,9 @@ int main(void)
 #ifdef STARPU_LINUX_SYS
 	ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s));
 #endif
-	rmdir(s);
+
+	ret2 = rmdir(s);
+	STARPU_CHECK_RETURN_VALUE(ret2, "rmdir '%s'\n", s);
 	return ret;
 }
 #endif

+ 14 - 12
tests/disk/mem_reclaim.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2013 Corentin Salingue
- * Copyright (C) 2015, 2016 CNRS
+ * Copyright (C) 2015, 2016, 2017 CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -39,13 +39,6 @@
 #define VALGRIND_MAKE_MEM_DEFINED(addr, size) (void)0
 #endif
 
-#ifdef STARPU_HAVE_WINDOWS
-#  include <io.h>
-#  if defined(_WIN32) && !defined(__CYGWIN__)
-#    define mkdir(path, mode) mkdir(path)
-#  endif
-#endif
-
 #ifdef STARPU_QUICK_CHECK
 #  define NDATA 4
 #  define NITER 16
@@ -210,11 +203,17 @@ static int merge_result(int old, int new)
 int main(void)
 {
 	int ret = 0;
+	int ret2;
 	char s[128];
+	char *ptr;
 
-	snprintf(s, sizeof(s), "/tmp/%s-disk-%d", getenv("USER"), getpid());
-	ret = mkdir(s, 0777);
-	STARPU_CHECK_RETURN_VALUE(ret, "mkdir '%s'\n", s);
+	snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER"));
+	ptr = _starpu_mkdtemp(s);
+	if (!ptr)
+	{
+		FPRINTF(stderr, "Cannot make directory '%s'\n", s);
+		return STARPU_TEST_SKIPPED;
+	}
 
 	setenv("STARPU_LIMIT_CPU_MEM", MEMSIZE_STR, 1);
 
@@ -230,7 +229,10 @@ int main(void)
 	ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s, starpu_vector_data_register, "unistd_direct with read/write vector ops"));
 	ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s, starpu_my_vector_data_register, "unistd_direct with pack/unpack vector ops"));
 #endif
-	rmdir(s);
+
+	ret2 = rmdir(s);
+	STARPU_CHECK_RETURN_VALUE(ret2, "rmdir '%s'\n", s);
+
 	return ret;
 }
 #endif

+ 64 - 0
tests/main/mkdtemp.c

@@ -0,0 +1,64 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2017 CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <common/config.h>
+#include <common/utils.h>
+#include "../helper.h"
+#include <unistd.h>
+
+int do_test(char *(*func)(char *tmpl))
+{
+	int ret;
+	char *path;
+	char dirname[128];
+	char *ptr;
+	struct stat sb;
+
+	path = starpu_getenv("TMPDIR");
+	if (!path)
+		path = starpu_getenv("TEMP");
+	if (!path)
+		path = starpu_getenv("TMP");
+	if (!path)
+		path = "/tmp";
+	snprintf(dirname, 128, "%s/abcdef_XXXXXX", path);
+	ptr = func(dirname);
+	FPRINTF(stderr, "Directory '%s' (res '%s')\n", dirname, ptr);
+
+	// use stat
+	ret = stat(dirname, &sb);
+	if (ret != 0 || !S_ISDIR(sb.st_mode))
+	{
+		FPRINTF(stderr, "Directory '%s' has not been created\n", dirname);
+		return 1;
+	}
+
+	ret = rmdir(dirname);
+	STARPU_CHECK_RETURN_VALUE(ret, "rmdir '%s'\n", dirname);
+
+	return ret;
+}
+
+int main(void)
+{
+	int ret, ret2;
+
+	ret = do_test(_starpu_mkdtemp);
+	ret2 = do_test(_starpu_mkdtemp_internal);
+
+	return ret + ret2;
+}

+ 13 - 10
tests/microbenchs/matrix_as_vector.c

@@ -237,18 +237,21 @@ int main(int argc, char **argv)
 	}
 
 #ifdef STARPU_USE_CUDA
-	cublasHandle_t handle;
-	cublasCreate(&handle);
-	cublasGetVersion(handle, &cublas_version);
-	cublasDestroy(handle);
-
 	devices = starpu_cuda_worker_get_count();
-	if (devices && cublas_version >= 7050)
+	if (devices)
 	{
-		starpu_cublas_init();
-		ret = check_size_on_device(STARPU_CUDA, "STARPU_CUDA");
-		if (ret) goto error;
-		starpu_cublas_shutdown();
+		cublasHandle_t handle;
+		cublasCreate(&handle);
+		cublasGetVersion(handle, &cublas_version);
+		cublasDestroy(handle);
+
+		if (cublas_version >= 7050)
+		{
+			starpu_cublas_init();
+			ret = check_size_on_device(STARPU_CUDA, "STARPU_CUDA");
+			if (ret) goto error;
+			starpu_cublas_shutdown();
+		}
 	}
 #endif
 #if 0

+ 1 - 1
tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh

@@ -18,6 +18,6 @@
 
 source $(dirname $0)/microbench.sh
 
-XSUCCESS="modular-heft modular-heft-prio modular-heft2 lws dmda dmdar dmdas dmdasd pheft"
+XSUCCESS="lws dmda dmdar dmdas dmdasd pheft"
 
 test_scheds parallel_dependent_homogeneous_tasks_data

+ 1 - 1
tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh

@@ -18,6 +18,6 @@
 
 source $(dirname $0)/microbench.sh
 
-XSUCCESS="modular-heft modular-heft-prio modular-heft2 dmda dmdar dmdas dmdasd pheft"
+XSUCCESS="dmda dmdar dmdas dmdasd pheft"
 
 test_scheds parallel_independent_heterogeneous_tasks_data

+ 1 - 1
tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh

@@ -18,6 +18,6 @@
 
 source $(dirname $0)/microbench.sh
 
-XSUCCESS="modular-heft modular-heft-prio modular-heft2 dmda dmdar dmdas dmdasd pheft"
+XSUCCESS="dmda dmdar dmdas dmdasd pheft"
 
 test_scheds parallel_independent_homogeneous_tasks_data

+ 6 - 1
tests/microbenchs/tasks_size_overhead.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2014, 2016  Université de Bordeaux
+ * Copyright (C) 2010-2014, 2016-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -155,6 +155,11 @@ int main(int argc, char **argv)
 	struct starpu_conf conf;
 
 	unsigned buffer;
+	char *starpu_sched = getenv("STARPU_SCHED");
+
+	if (starpu_sched && !strcmp(starpu_sched, "pheft"))
+		/* pheft does not run tasks in parallel, avoid burning time... */
+		return STARPU_TEST_SKIPPED;
 
 	/* Get number of CPUs */
 	starpu_conf_init(&conf);

+ 48 - 0
tests/model-checking/Makefile

@@ -0,0 +1,48 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2017  Université de Bordeaux
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+STARPU=../../
+CPPFLAGS=-I$(STARPU)/src
+CFLAGS=-Wall -Wextra -g $(STARPU)/src/common/rbtree.c -DNOCONFIG
+LDFLAGS=-lsimgrid
+
+MC_FLAGS=--cfg=model-check/reduction:none
+
+ifeq (1,0)
+MC_FLAGS+=--cfg=contexts/factory:ucontext
+MC_FLAGS+=--cfg=model-check/sparse-checkpoint:yes
+MC_FLAGS+=--cfg=model-check/visited:1000
+endif
+
+# To record the failing trace
+ifeq (1,0)
+MC_FLAGS+=--cfg=model-check/record:1
+# And replay it without simgrid-mc
+#MC_FLAGS+=--cfg=model-check/reply:'1;3;4'
+endif
+
+# To see which simix calls are made
+#MC_FLAGS+=--log=simix_popping.thres:debug
+
+test: prio_list
+	simgrid-mc ./prio_list platform.xml MAIN $(MC_FLAGS)
+
+debug: prio_list
+	simgrid-mc ./prio_list platform.xml MAIN --log=mc_safety.thres:debug $(MC_FLAGS)
+
+all: prio_list prio_list2
+
+clean:
+	rm -f prio_list

+ 18 - 0
tests/model-checking/platform.xml

@@ -0,0 +1,18 @@
+<?xml version='1.0'?>
+<!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd'>
+ <platform version="4">
+ <config id="General">
+   <prop id="network/TCP-gamma" value="-1"></prop>
+   <prop id="network/latency-factor" value="1"></prop>
+   <prop id="network/bandwidth-factor" value="1"></prop>
+ </config>
+ <AS  id="AS0"  routing="Full">
+   <host id="MAIN" speed="1f"/>
+   <host id="CPU0" speed="2000000000f"/>
+   <host id="CPU1" speed="2000000000f"/>
+
+   <host id="RAM" speed="1f"/>
+
+   <link id="Host" bandwidth="0.000000Bps" latency="0.000000s"/>
+ </AS>
+ </platform>

+ 186 - 0
tests/model-checking/prio_list.c

@@ -0,0 +1,186 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2017  Université de Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#define _STARPU_MALLOC(p, s) do {p = malloc(s);} while (0)
+#define STARPU_ATTRIBUTE_UNUSED __attribute((__unused__))
+
+#ifndef NOCONFIG
+#include <config.h>
+#else
+#define _GNU_SOURCE
+// Assuming recent simgrid
+#define STARPU_HAVE_SIMGRID_MSG_H
+#define STARPU_HAVE_XBT_SYNCHRO_H
+#endif
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <limits.h>
+#include <common/list.h>
+#include <common/prio_list.h>
+#ifdef STARPU_HAVE_SIMGRID_MSG_H
+#include <simgrid/msg.h>
+#else
+#include <msg/msg.h>
+#endif
+#include <simgrid/modelchecker.h>
+#ifdef STARPU_HAVE_XBT_SYNCHRO_H
+#include <xbt/synchro.h>
+#else
+#include <xbt/synchro_core.h>
+#endif
+
+#ifndef NLISTS
+#define NLISTS 1
+#endif
+#ifndef NITERS
+#define NITERS 1
+#endif
+#ifndef NTHREADS
+#define NTHREADS 2
+#endif
+#ifndef NELEMENTS
+#define NELEMENTS 4
+#endif
+
+// MC_ignore
+
+xbt_mutex_t mutex[NLISTS];
+
+
+LIST_TYPE(foo,
+		unsigned prio;
+		unsigned back;	/* Push at back instead of front? */
+	 );
+PRIO_LIST_TYPE(foo, prio);
+
+struct foo_prio_list mylist[NLISTS];
+
+void check_list_prio(struct foo_prio_list *list)
+{
+	struct foo *cur;
+	unsigned lastprio = UINT_MAX;
+	unsigned back = 0;
+	for (cur  = foo_prio_list_begin(list);
+	     cur != foo_prio_list_end(list);
+	     cur  = foo_prio_list_next(list, cur))
+	{
+		if (cur->prio == lastprio)
+                        /* For same prio, back elements should never get before
+                         * front elements */
+			MC_assert(!(back && !cur->back));
+		else
+			MC_assert(lastprio > cur->prio);
+		lastprio = cur->prio;
+		back = cur->back;
+	}
+}
+
+int worker(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[])
+{
+	unsigned myrank = atoi(argv[0]);
+	unsigned i, n, l, iter;
+	struct foo *elem;
+	struct drand48_data buffer;
+	long res;
+
+	srand48_r(myrank, &buffer);
+
+	l = myrank%NLISTS;
+
+	for (iter = 0; iter < NITERS; iter++)
+	{
+		for (i = 0; i < NELEMENTS; i++)
+		{
+			elem = malloc(sizeof(*elem));
+			lrand48_r(&buffer, &res);
+			elem->prio = res%10;
+			lrand48_r(&buffer, &res);
+			elem->back = res%2;
+			xbt_mutex_acquire(mutex[l]);
+			if (elem->back)
+				foo_prio_list_push_back(&mylist[l], elem);
+			else
+				foo_prio_list_push_front(&mylist[l], elem);
+			check_list_prio(&mylist[l]);
+			xbt_mutex_release(mutex[l]);
+		}
+
+		for (i = 0; i < NELEMENTS; i++)
+		{
+			lrand48_r(&buffer, &res);
+			n = res%(NELEMENTS-i);
+
+			xbt_mutex_acquire(mutex[l]);
+			for (elem  = foo_prio_list_begin(&mylist[l]);
+			     n--;
+			     elem  = foo_prio_list_next(&mylist[l], elem))
+				;
+			foo_prio_list_erase(&mylist[l], elem);
+			check_list_prio(&mylist[l]);
+			xbt_mutex_release(mutex[l]);
+		}
+
+		/* horrible way to wait for list getting empty */
+		MSG_process_sleep(1000);
+	}
+
+	return 0;
+}
+
+int master(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
+{
+	unsigned i, l;
+
+	for (l = 0; l < NLISTS; l++)
+	{
+		mutex[l] = xbt_mutex_init();
+		foo_prio_list_init(&mylist[l]);
+	}
+
+	for (i = 0; i < NTHREADS; i++)
+	{
+		char *s;
+		asprintf(&s, "%d\n", i);
+		char **args = malloc(sizeof(char*)*2);
+		args[0] = s;
+		args[1] = NULL;
+		MSG_process_create_with_arguments("test", worker, NULL, MSG_host_self(), 1, args);
+	}
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	if (argc < 3)
+	{
+		fprintf(stderr,"usage: %s platform.xml host\n", argv[0]);
+		exit(EXIT_FAILURE);
+	}
+	srand48(0);
+	MSG_init(&argc, argv);
+#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 13)
+	extern xbt_cfg_t _sg_cfg_set;
+	xbt_cfg_set_int(_sg_cfg_set, "contexts/stack-size", 128);
+#else
+	xbt_cfg_set_int("contexts/stack-size", 128);
+#endif
+	MSG_create_environment(argv[1]);
+	MSG_process_create("master", master, NULL, MSG_get_host_by_name(argv[2]));
+	MSG_main();
+	return 0;
+}

+ 19 - 0
tests/model-checking/prio_list.sh

@@ -0,0 +1,19 @@
+#!/bin/bash -x
+#
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2017  Université de Bordeaux
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+source $(dirname $0)/starpu-mc.sh
+test prio_list

+ 18 - 0
tests/model-checking/prio_list2.c

@@ -0,0 +1,18 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2017  Université de Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#define NLISTS 2
+#include "prio_list.c"

+ 37 - 0
tests/model-checking/starpu-mc.sh.in

@@ -0,0 +1,37 @@
+#!/bin/bash -x
+#
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2017  Université de Bordeaux
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+# Test a model-checking program with simgrid model checker
+
+SIMGRID_MC=@SIMGRID_MC@
+abs_top_srcdir=@abs_top_srcdir@
+abs_builddir=@abs_builddir@
+
+set -e
+
+[ -x "$SIMGRID_MC" ] || exit 77
+
+MC_FLAGS=--cfg=model-check/reduction:none
+
+# makes it much longer actually
+#MC_FLAGS+=--cfg=contexts/factory:ucontext
+#MC_FLAGS+=--cfg=model-check/sparse-checkpoint:yes
+#MC_FLAGS+=--cfg=model-check/visited:1000
+
+test() {
+	$SIMGRID_MC $abs_builddir/$1 $abs_top_srcdir/tests/model-checking/platform.xml MAIN $MC_FLAGS
+}

+ 2 - 0
tools/Makefile.am

@@ -137,6 +137,8 @@ EXTRA_DIST =				\
 	dev/rename.sed			\
 	dev/rename.sh			\
 	perfmodels/README		\
+	perfmodels/cluster.xml		\
+	perfmodels/hostfile		\
 	perfmodels/sampling/codelets/tmp/mlr_init.out	 \
 	valgrind/fxt.suppr		\
 	valgrind/hwloc.suppr		\

+ 3 - 4
tools/cppcheck/suppressions.txt

@@ -26,8 +26,7 @@ ConfigurationNotChecked
 *:sc_hypervisor/*
 
 varFuncNullUB:examples/sched_ctx/two_cpu_contexts.c:76
-negativeIndex:examples/stencil/stencil-tasks.c:200
-negativeIndex:examples/stencil/stencil-tasks.c:203
+negativeIndex:examples/stencil/stencil-tasks.c
 constStatement:examples/stencil/*
 
 unreadVariable:tests/openmp/*
@@ -55,7 +54,7 @@ unusedStructMember:src/core/perfmodel/perfmodel_bus.c:65
 unusedStructMember:src/core/perfmodel/perfmodel_bus.c:66
 unusedStructMember:src/core/simgrid.c:225
 unusedStructMember:src/core/simgrid.c:226
-wrongPrintfScanfArgNum:src/core/simgrid.c:883
+wrongPrintfScanfArgNum:src/core/simgrid.c:946
 duplicateExpression:src/util/starpu_task_insert.c:52
 
 // TODO: this could be an error?
@@ -63,7 +62,7 @@ redundantCopy:src/core/disk_ops/disk_leveldb.cpp:194
 
 nullPointerRedundantCheck:src/common/rbtree.c
 unreadVariable:src/datawizard/interfaces/*
-unreadVariable:src/drivers/driver_common/driver_common.c:492
+unreadVariable:src/drivers/driver_common/driver_common.c:493
 clarifyCondition:src/drivers/opencl/driver_opencl.c:945
 unreadVariable:src/drivers/opencl/driver_opencl.c:767
 clarifyCondition:src/drivers/cuda/driver_cuda.c:498

+ 1 - 1
tools/dev/internal/starpu_check_braces.sh

@@ -21,7 +21,7 @@ SHOW=less
 DIRS="tools src tests examples mpi"
 for d in ${1:-$DIRS}
 do
-    for ext in c h cl cu
+    for ext in c h cl cu doxy
     do
 	grep -rsn "{" $d |grep ".${ext}:" | grep -v "}" | grep -v ".${ext}:[0-9]*:[[:space:]]*{$" > /tmp/braces
 	if test -s /tmp/braces

+ 11 - 0
tools/perfmodels/cluster.xml

@@ -0,0 +1,11 @@
+<?xml version='1.0'?>
+<!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd'>
+<platform version="4">
+<AS  id="AS0"  routing="Vivaldi">	 
+  <cluster id="plafrim"
+           prefix="mirage0"    suffix=".plafrim.cluster"
+           radical="1-9"    speed="1Gf" 
+	   bw="4GBps"       lat="1us"
+           bb_bw="288GBps"  bb_lat="1us"/>
+</AS>
+</platform>

+ 4 - 0
tools/perfmodels/hostfile

@@ -0,0 +1,4 @@
+mirage01.plafrim.cluster
+mirage02.plafrim.cluster
+mirage03.plafrim.cluster
+mirage04.plafrim.cluster

+ 3 - 2
tools/starpu_fxt_tool.c

@@ -30,8 +30,9 @@ static void usage()
 	fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME);
         fprintf(stderr, "\n");
         fprintf(stderr, "Options:\n");
-	fprintf(stderr, "   -i <input file>     specify the input file. This can be specified several\n");
-	fprintf(stderr, "                       times for MPI execution case\n");
+	fprintf(stderr, "   -i <input file[s]>  specify the input file[s]. Several files can be provided,\n");
+	fprintf(stderr, "                       or the option specified several times for MPI execution\n");
+	fprintf(stderr, "                       case\n");
         fprintf(stderr, "   -o <output file>    specify the output file\n");
         fprintf(stderr, "   -c                  use a different colour for every type of task\n");
 	fprintf(stderr, "   -no-events          do not show events\n");