Selaa lähdekoodia

Merge branch 'master' into starpurm

Nathalie Furmento 7 vuotta sitten
vanhempi
commit
493de784c7
100 muutettua tiedostoa jossa 1553 lisäystä ja 499 poistoa
  1. 1 0
      AUTHORS
  2. 32 1
      ChangeLog
  3. 6 3
      configure.ac
  4. 60 0
      doc/doxygen/chapters/301_tasks.doxy
  5. 31 4
      doc/doxygen/chapters/310_data_management.doxy
  6. 1 1
      doc/doxygen/chapters/320_scheduling.doxy
  7. 70 72
      doc/doxygen/chapters/380_offline_performance_tools.doxy
  8. 45 7
      doc/doxygen/chapters/410_mpi_support.doxy
  9. 35 0
      doc/doxygen/chapters/501_environment_variables.doxy
  10. 33 1
      doc/doxygen/chapters/api/codelet_and_tasks.doxy
  11. 14 0
      doc/doxygen/chapters/api/data_interfaces.doxy
  12. 13 1
      doc/doxygen/chapters/api/data_partition.doxy
  13. 12 1
      doc/doxygen/chapters/api/explicit_dependencies.doxy
  14. 32 1
      doc/doxygen/chapters/api/initialization.doxy
  15. 12 1
      doc/doxygen/chapters/api/insert_task.doxy
  16. 8 0
      doc/doxygen/chapters/api/modularized_scheduler.doxy
  17. 15 10
      doc/doxygen/chapters/api/mpi.doxy
  18. 33 3
      doc/doxygen/chapters/api/scheduling_policy.doxy
  19. 4 1
      examples/Makefile.am
  20. 89 0
      examples/basic_examples/task_insert_color.c
  21. 19 9
      examples/cpp/add_vectors_interface.cpp
  22. 181 0
      examples/dependency/sequential_consistency.c
  23. 98 0
      examples/dependency/task_end_dep.c
  24. 32 7
      examples/filters/custom_mf/custom_interface.c
  25. 9 1
      examples/filters/fmultiple_manual.c
  26. 13 2
      examples/interface/complex_interface.c
  27. 2 2
      examples/spmv/matrix_market/mm_to_bcsr.h
  28. 4 4
      examples/spmv/matrix_market/mmio.h
  29. 2 1
      examples/stencil/life_opencl.c
  30. 12 1
      include/fstarpu_mod.f90
  31. 7 1
      include/starpu.h
  32. 4 1
      include/starpu_data_filters.h
  33. 4 1
      include/starpu_data_interfaces.h
  34. 1 0
      include/starpu_fxt.h
  35. 4 1
      include/starpu_opencl.h
  36. 3 0
      include/starpu_sched_component.h
  37. 6 0
      include/starpu_scheduler.h
  38. 11 1
      include/starpu_task.h
  39. 3 1
      include/starpu_task_util.h
  40. 2 2
      libstarpu-mic.pc.in
  41. 2 2
      libstarpu.pc.in
  42. 3 6
      mpi/examples/comm/comm.c
  43. 3 6
      mpi/examples/comm/mix_comm.c
  44. 3 7
      mpi/examples/complex/mpi_complex.c
  45. 4 9
      mpi/examples/matrix_decomposition/mpi_cholesky.c
  46. 12 7
      mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c
  47. 4 8
      mpi/examples/matrix_decomposition/mpi_cholesky_distributed.c
  48. 62 30
      mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c
  49. 4 10
      mpi/examples/matrix_mult/mm.c
  50. 4 7
      mpi/examples/mpi_lu/plu_example.c
  51. 4 7
      mpi/examples/mpi_lu/plu_implicit_example.c
  52. 3 7
      mpi/examples/mpi_lu/plu_outofcore_example.c
  53. 4 7
      mpi/examples/stencil/stencil5.c
  54. 4 8
      mpi/examples/stencil/stencil5_lb.c
  55. 4 5
      mpi/examples/user_datatype/my_interface.c
  56. 2 6
      mpi/examples/user_datatype/user_datatype.c
  57. 2 1
      mpi/include/starpu_mpi.h
  58. 2 1
      mpi/src/Makefile.am
  59. 1 1
      mpi/src/load_balancer/policy/data_movements_interface.c
  60. 1 1
      mpi/src/load_balancer/policy/load_data_interface.c
  61. 37 0
      mpi/src/mpi/starpu_mpi_driver.h
  62. 96 4
      mpi/src/mpi/starpu_mpi_mpi.c
  63. 10 5
      mpi/src/nmad/starpu_mpi_nmad.c
  64. 2 2
      mpi/src/starpu_mpi_datatype.c
  65. 40 12
      mpi/src/starpu_mpi_fxt.h
  66. 37 1
      mpi/src/starpu_mpi_init.c
  67. 8 0
      mpi/src/starpu_mpi_task_insert.c
  68. 10 0
      mpi/src/starpu_mpi_task_insert_fortran.c
  69. 5 3
      mpi/tests/Makefile.am
  70. 3 7
      mpi/tests/block_interface.c
  71. 3 7
      mpi/tests/block_interface_pinned.c
  72. 2 5
      mpi/tests/broadcast.c
  73. 3 6
      mpi/tests/cache.c
  74. 3 6
      mpi/tests/cache_disable.c
  75. 3 9
      mpi/tests/callback.c
  76. 3 7
      mpi/tests/datatypes.c
  77. 131 0
      mpi/tests/driver.c
  78. 3 7
      mpi/tests/early_request.c
  79. 3 7
      mpi/tests/gather.c
  80. 3 7
      mpi/tests/gather2.c
  81. 3 6
      mpi/tests/insert_task.c
  82. 3 6
      mpi/tests/insert_task_block.c
  83. 3 6
      mpi/tests/insert_task_compute.c
  84. 3 7
      mpi/tests/insert_task_count.c
  85. 3 7
      mpi/tests/insert_task_dyn_handles.c
  86. 3 6
      mpi/tests/insert_task_node_choice.c
  87. 3 7
      mpi/tests/insert_task_owner.c
  88. 3 7
      mpi/tests/insert_task_owner2.c
  89. 3 7
      mpi/tests/insert_task_owner_data.c
  90. 3 6
      mpi/tests/insert_task_recv_cache.c
  91. 3 6
      mpi/tests/insert_task_sent_cache.c
  92. 3 6
      mpi/tests/insert_task_seq.c
  93. 3 6
      mpi/tests/load_balancer.c
  94. 3 7
      mpi/tests/matrix.c
  95. 3 7
      mpi/tests/matrix2.c
  96. 3 7
      mpi/tests/mpi_detached_tag.c
  97. 3 7
      mpi/tests/mpi_earlyrecv.c
  98. 3 7
      mpi/tests/mpi_earlyrecv2.c
  99. 3 8
      mpi/tests/mpi_earlyrecv2_sync.c
  100. 0 0
      mpi/tests/mpi_irecv.c

+ 1 - 0
AUTHORS

@@ -30,6 +30,7 @@ Benjamin Lorendeau <benjamin.lorendeau@inria.fr>
 Antoine Lucas <antoine.lucas.33@gmail.com>
 Brice Mortier <brice.mortier@etu.u-bordeaux1.fr>
 Stojce Nakov <stojce.nakov@inria.fr>
+Lucas Leandro Nesi <llnesi@inf.ufrgs.br>
 Joris Pablo <joris.pablo@orange.fr>
 Damien Pasqualinotto <dam.pasqualinotto@wanadoo.fr>
 Samuel Pitoiset <samuel.pitoiset@inria.fr>

+ 32 - 1
ChangeLog

@@ -41,6 +41,23 @@ New features:
     more, StarPU will make the appropriate calls as needed.
   * Add starpu_task_notify_ready_soon_register to be notified when it is
     determined when a task will be ready an estimated amount of time from now.
+  * New StarPU-MPI initialization function (starpu_mpi_init_conf)
+    which allows StarPU-MPI to manage reserving a core for the MPI thread, or
+    merging it with CPU driver 0.
+  * Add possibility to delay the termination of a task with the
+    functions starpu_task_end_dep_add() which specifies the number of
+    calls to the function starpu_task_end_dep_release() needed to
+    trigger the task termination.
+  * Add possibility to define the sequential consistency at the task level
+    for each handle used by the task.
+  * Add STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU, and
+    STARPU_SPECIFIC_NODE_SLOW as generic values for codelet specific memory
+    nodes which can be used instead of exact node numbers.
+  * Add starpu_get_next_bindid and starpu_bind_thread_on to allow binding an
+    application-started thread on a free core. Use it in StarPU-MPI to
+    automatically bind the MPI thread on an available core.
+  * Add STARPU_RESERVE_NCPU environment variable and reserve_ncpus config
+    field to make StarPU use a few cores less.
 
 Small features:
   * Scheduling contexts may now be associated a user data pointer at creation
@@ -63,7 +80,7 @@ Small features:
   * Add SubmitOrder trace field.
   * Add workerids and workerids_len task fields.
   * Add priority management to StarPU-MPI. Can be disabled with
-  the STARPU_MPI_PRIORITIES environment variable.
+    the STARPU_MPI_PRIORITIES environment variable.
   * Add STARPU_MAIN_THREAD_CPUID and STARPU_MPI_THREAD_CPUID environment
     variables.
   * Add disk to disk copy functions and support asynchronous full read/write
@@ -82,11 +99,21 @@ Small features:
     fxt options (to be used with STARPU_GENERATE_TRACE)
   * New function starpu_task_set() similar as starpu_task_build() but
     with a task object given as the first parameter
+  * New functions
+    starpu_data_partition_submit_sequential_consistency() and
+    starpu_data_unpartition_submit_sequential_consistency()
+  * Add a new value STARPU_TASK_SYNCHRONOUS to be used in
+    starpu_task_insert() to define if the task is (or not) synchronous
+  * Add memory states events in the traces.
+  * Add starpu_sched_component_estimated_end_min_add() to fix termination
+    estimations in modular schedulers.
 
 Changes:
   * Vastly improve simgrid simulation time.
   * Switch default scheduler to lws.
   * Add "to" parameter to pull_task and can_push methods of components.
+  * Deprecate handle_to_pointer interface operation in favor of new to_pointer
+  operation.
 
 Small changes:
   * Use asynchronous transfers for task data fetches with were not prefetched.
@@ -100,6 +127,10 @@ StarPU 1.2.5 (git revision xxx)
 Small features:
   * Add a new value STARPU_TASK_COLOR to be used in
     starpu_task_insert() to pick up the color of a task in dag.dot
+  * Add starpu_data_pointer_is_inside().
+
+Changes:
+  * Do not export -lcuda -lcudart -lOpenCL in *starpu*.pc.
 
 StarPU 1.2.4 (git revision 255cf98175ef462749780f30bfed21452b74b594)
 ==============================================

+ 6 - 3
configure.ac

@@ -173,7 +173,7 @@ if test x$enable_simgrid = xyes ; then
 
 	# Latest functions
 	AC_CHECK_FUNCS([MSG_process_attach MSG_zone_get_hosts MSG_process_self_name])
-	AC_CHECK_FUNCS([xbt_mutex_try_acquire smpi_process_set_user_data sg_zone_get_by_name sg_link_name sg_host_route sg_host_self sg_host_speed simcall_process_create])
+	AC_CHECK_FUNCS([xbt_mutex_try_acquire smpi_process_set_user_data sg_zone_get_by_name sg_link_name sg_host_route sg_host_self sg_host_speed simcall_process_create sg_config_continue_after_help])
 	AC_CHECK_FUNCS([xbt_barrier_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT], [1], [Define to 1 if you have the `xbt_barrier_init' function.])])
 	AC_CHECK_DECLS([smpi_process_set_user_data], [], [], [[#include <smpi/smpi.h>]])
 
@@ -2153,7 +2153,7 @@ if test x$use_fxt = xyes; then
 		AC_DEFINE(STARPU_HAVE_POTI, [1], [Define to 1 if you have libpoti and it is meant to be used])
 		save_LIBS="$LIBS"
 		LIBS="$LIBS $POTI_LIBS"
-		AC_CHECK_FUNCS([poti_init_custom])
+		AC_CHECK_FUNCS([poti_init_custom poti_user_NewEvent])
 		LIBS="$save_LIBS"
 		FXT_CFLAGS="$FXT_CFLAGS $POTI_CFLAGS"
 		FXT_LIBS="$FXT_LIBS $POTI_LIBS"
@@ -2593,6 +2593,9 @@ if test "x$STARPU_DEVEL" != x; then
 	IS_SUPPORTED_CFLAG(-Wundef)
 	IS_SUPPORTED_CFLAG(-Wshadow)
 	IS_SUPPORTED_CFLAG(-Werror=pointer-arith)
+	if test x$enable_debug = xyes; then
+		IS_SUPPORTED_CFLAG(-fno-optimize-sibling-calls)
+	fi
 fi
 AM_CONDITIONAL([STARPU_DEVEL],[test "x$STARPU_DEVEL" != x])
 
@@ -3495,7 +3498,7 @@ AM_CONDITIONAL(AVAILABLE_DOC, [test x$available_doc != xno])
 ###############################################################################
 
 # these are the flags needed for linking libstarpu (and thus also for static linking)
-LIBSTARPU_LDFLAGS="$HWLOC_LIBS $FXT_LIBS $STARPU_COI_LDFLAGS $STARPU_SCIF_LDFLAGS $STARPU_RCCE_LDFLAGS $STARPU_LEVELDB_LDFLAGS $STARPU_GLPK_LDFLAGS $STARPU_LEVELDB_LDFLAGS $SIMGRID_LIBS $STARPU_BLAS_LDFLAGS $STARPU_OMP_LDFLAGS $DGELS_LIBS"
+LIBSTARPU_LDFLAGS="$STARPU_OPENCL_LDFLAGS $STARPU_CUDA_LDFLAGS $HWLOC_LIBS $FXT_LIBS $STARPU_COI_LDFLAGS $STARPU_SCIF_LDFLAGS $STARPU_RCCE_LDFLAGS $STARPU_LEVELDB_LDFLAGS $STARPU_GLPK_LDFLAGS $STARPU_LEVELDB_LDFLAGS $SIMGRID_LIBS $STARPU_BLAS_LDFLAGS $STARPU_OMP_LDFLAGS $DGELS_LIBS"
 AC_SUBST([LIBSTARPU_LDFLAGS])
 
 LIBSTARPU_LINK=libstarpu-$STARPU_EFFECTIVE_VERSION.la

+ 60 - 0
doc/doxygen/chapters/301_tasks.doxy

@@ -55,6 +55,66 @@ other tasks and may thus be a bottleneck if not executed early
 enough), the field starpu_task::priority should be set to transmit the
 priority information to StarPU.
 
+\section TaskDependencies Task Dependencies
+
+\subsection SequentialConsistency Sequential Consistency
+
+By default, task dependencies are inferred from data dependency (sequential
+coherency) by StarPU. The application can however disable sequential coherency
+for some data, and dependencies can be specifically expressed.
+
+Setting (or unsetting) sequential consistency can be done at the data
+level by calling starpu_data_set_sequential_consistency_flag() for a
+specific data or starpu_data_set_default_sequential_consistency_flag()
+for all datas.
+
+Setting (or unsetting) sequential consistency can also be done at task
+level by setting the field starpu_task::sequential_consistency to 0.
+
+Sequential consistency can also be set (or unset) for each handle of a
+specific task, this is done by using the field
+starpu_task::handles_sequential_consistency. When set, its value
+should be a array with the number of elements being the number of
+handles for the task, each element of the array being the sequential
+consistency for the i-th handle of the task. The field can easily be
+set when calling starpu_task_insert() with the flag
+::STARPU_HANDLES_SEQUENTIAL_CONSISTENCY
+
+\code{.c}
+char *seq_consistency = malloc(cl.nbuffers * sizeof(char));
+seq_consistency[0] = 1;
+seq_consistency[1] = 1;
+seq_consistency[2] = 0;
+ret = starpu_task_insert(&cl,
+	STARPU_RW, handleA, STARPU_RW, handleB, STARPU_RW, handleC,
+	STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, seq_consistency,
+	0);
+free(seq_consistency);
+\endcode
+
+The internal algorithm used by StarPU to set up implicit dependency is
+as follows:
+\code{.c}
+if (sequential_consistency(task) == 1)
+    for(i=0 ; i<STARPU_TASK_GET_NBUFFERS(task) ; i++)
+      if (sequential_consistency(i-th data, task) == 1)
+        if (sequential_consistency(i-th data) == 1)
+           create_implicit_dependency(...)
+\endcode
+
+\subsection TasksAndTagsDependencies Tasks And Tags Dependencies
+
+One can explicitely set dependencies between tasks using
+starpu_task_declare_deps_array(). Dependencies between tasks can be
+expressed through tags associated to a tag with the field
+starpu_task::tag_id and using the function starpu_tag_declare_deps()
+or starpu_tag_declare_deps_array().
+
+The termination of a task can be delayed through the function
+starpu_task_end_dep_add() which specifies the number of calls to the
+function starpu_task_end_dep_release() needed to trigger the task
+termination.
+
 \section SettingManyDataHandlesForATask Setting Many Data Handles For a Task
 
 The maximum number of data a task can manage is fixed by the environment variable

+ 31 - 4
doc/doxygen/chapters/310_data_management.doxy

@@ -840,8 +840,16 @@ main memory instead of copied in the GPU, a pivoting vector for instance.
 This can be achieved by setting the starpu_codelet::specific_nodes flag to
 <c>1</c>, and then fill the starpu_codelet::nodes array (or starpu_codelet::dyn_nodes when
 starpu_codelet::nbuffers is greater than \ref STARPU_NMAXBUFS) with the node numbers
-where data should be copied to, or <c>-1</c> to let StarPU copy it to the memory node
-where the task will be executed. For instance, with the following codelet:
+where data should be copied to, or <c>STARPU_SPECIFIC_NODE_LOCAL</c> to let
+StarPU copy it to the memory node where the task will be executed.
+
+<c>STARPU_SPECIFIC_NODE_CPU</c> can also be used to request data to be
+put in CPU-accessible memory (and let StarPU choose the NUMA node).
+<c>STARPU_SPECIFIC_NODE_FAST</c> and <c>STARPU_SPECIFIC_NODE_SLOW</c> can als be
+used
+
+For instance,
+with the following codelet:
 
 \code{.c}
 struct starpu_codelet cl =
@@ -850,12 +858,31 @@ struct starpu_codelet cl =
 	.nbuffers = 2,
 	.modes = {STARPU_RW, STARPU_RW},
 	.specific_nodes = 1,
-	.nodes = {STARPU_MAIN_RAM, -1},
+	.nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL},
 };
 \endcode
 
-the first data of the task will be kept in the main memory, while the second
+the first data of the task will be kept in the CPU memory, while the second
 data will be copied to the CUDA GPU as usual. A working example is available in
 <c>tests/datawizard/specific_node.c</c>
 
+With the following codelet:
+
+\code{.c}
+struct starpu_codelet cl =
+{
+	.cuda_funcs = { kernel },
+	.nbuffers = 2,
+	.modes = {STARPU_RW, STARPU_RW},
+	.specific_nodes = 1,
+	.nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_SLOW},
+};
+\endcode
+
+The first data will be copied into fast (but probably size-limited) local memory
+while the second data will be left in slow (but large) memory. This makes sense
+when the kernel does not make so many accesses to the second data, and thus data
+being remote e.g. over a PCI bus is not a performance problem, and avoids
+filling the fast local memory with data which does not need the performance.
+
 */

+ 1 - 1
doc/doxygen/chapters/320_scheduling.doxy

@@ -250,7 +250,7 @@ methods of the policy.
 
 Make sure to have a look at the \ref API_Scheduling_Policy section, which
 provides a list of the available functions for writing advanced schedulers, such
-as starpu_task_expected_length(), starpu_task_expected_data_transfer_time(),
+as starpu_task_expected_length(), starpu_task_expected_data_transfer_time_for(),
 starpu_task_expected_energy(), etc. Other
 useful functions include starpu_transfer_bandwidth(), starpu_transfer_latency(),
 starpu_transfer_predict(), ...

+ 70 - 72
doc/doxygen/chapters/380_offline_performance_tools.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2012,2015-2017                      Inria
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2009-2011,2014-2017                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -108,10 +108,8 @@ StarPU's internal locking.
 The environment variable \ref STARPU_FXT_TRACE can be set to 0 to disable the
 generation of the <c>prof_file_XXX_YYY</c> file.
 
-\subsection CreatingAGanttDiagram Creating a Gantt Diagram
-
-When the FxT trace file <c>prof_file_something</c> has been generated, it is possible to
-generate a trace in the Paje format by calling:
+When the FxT trace file <c>prof_file_something</c> has been generated,
+it is possible to generate different trace formats by calling:
 
 \verbatim
 $ starpu_fxt_tool -i /tmp/prof_file_something
@@ -121,46 +119,67 @@ Or alternatively, setting the environment variable \ref STARPU_GENERATE_TRACE
 to <c>1</c> before application execution will make StarPU do it automatically at
 application shutdown.
 
-This will create a file <c>paje.trace</c> in the current directory that
-can be inspected with the ViTE (http://vite.gforge.inria.fr/) trace
-visualizing open-source tool.  It is possible to open the
-file <c>paje.trace</c> with ViTE by using the following command:
+One can also set the environment variable \ref
+STARPU_GENERATE_TRACE_OPTIONS to specify options, see
+<c>starpu_fxt_tool --help</c>, for example:
 
 \verbatim
-$ vite paje.trace
+$ export STARPU_GENERATE_TRACE=1
+$ export STARPU_GENERATE_TRACE_OPTIONS="-no-acquire"
 \endverbatim
 
-To get names of tasks instead of "unknown", fill the optional
-starpu_codelet::name, or use a performance model for them.
-
-One can also introduce user-defined events in the diagram thanks to the
-starpu_fxt_trace_user_event_string() function.
-
-In the MPI execution case, \ref STARPU_GENERATE_TRACE will not work as expected
-(each node will try to generate paje.trace, thus mixing outputs...), you have to
-collect the trace files from the MPI nodes, and
-specify them all on the command <c>starpu_fxt_tool</c>, for instance:
+When running a MPI application, \ref STARPU_GENERATE_TRACE will not
+work as expected (each node will try to generate trace files, thus
+mixing outputs...), you have to collect the trace files from the MPI
+nodes, and specify them all on the command <c>starpu_fxt_tool</c>, for
+instance:
 
 \verbatim
 $ starpu_fxt_tool -i /tmp/prof_file_something*
 \endverbatim
 
-By default, all tasks are displayed using a green color. To display tasks with
-varying colors, pass option <c>-c</c> to <c>starpu_fxt_tool</c>.
+By default, the generated trace contains all informations. To reduce
+the trace size, various <c>-no-foo</c> options can be passed to
+<c>starpu_fxt_tool</c>, see <c>starpu_fxt_tool --help</c> .
+
+\subsubsection CreatingAGanttDiagram Creating a Gantt Diagram
+
+One of the generated files is a trace in the Paje format. The file,
+located in the current directory, is named <c>paje.trace</c>. It can
+be viewed with ViTE (http://vite.gforge.inria.fr/) a trace
+visualizing open-source tool.  To open the file <c>paje.trace</c> with
+ViTE, use the following command:
 
-By default, the trace contains all informations. To reduce the trace size,
-various <c>-no-foo</c> options can be passed to <c>starpu_fxt_tool</c>, see
-<c>starpu_fxt_tool --help</c> .
+\verbatim
+$ vite paje.trace
+\endverbatim
 
-To identify tasks precisely, the application can set the starpu_task::tag_id field of the
-task (or use \ref STARPU_TAG_ONLY when using starpu_task_insert()), and
-the value of the tag will show up in the trace.
+Tasks can be assigned a name (instead of the default 'unknown') by
+filling the optional starpu_codelet::name, or assigning them a
+performance model. The name can also be set with the field
+starpu_task::name or by using \ref STARPU_NAME when calling
+starpu_task_insert().
+
+Tasks are assigned default colors based on the worker which executed
+them (green for CPUs, yellow/orange/red for CUDAs, blue for OpenCLs,
+red for MICs, ...). To use a different color for every type of task,
+one can specify the option <c>-c</c> to <c>starpu_fxt_tool</c> or in
+\ref STARPU_GENERATE_TRACE_OPTIONS. Tasks can also be given a specific
+color by setting the field starpu_codelet::color or the
+starpu_task::color. Colors are expressed with the following format
+0xRRGGBB (e.g 0xFF0000 for red). See
+<c>basic_examples/task_insert_color</c> for examples on how to assign
+colors.
+
+To identify tasks precisely, the application can also set the field
+starpu_task::tag_id or setting \ref STARPU_TAG_ONLY when calling
+starpu_task_insert(). The value of the tag will then show up in the
+trace.
 
-It can also set the starpu_task::name field of the task (or use \ref STARPU_NAME)
-when using starpu_task_insert()), to replace in traces the name of the codelet
-with an arbitrarily chosen name.
+One can also introduce user-defined events in the diagram thanks to the
+starpu_fxt_trace_user_event_string() function.
 
-It can also set the iteration number, by just calling starpu_iteration_push()
+One can also set the iteration number, by just calling starpu_iteration_push()
 at the beginning of submission loops and starpu_iteration_pop() at the end of
 submission loops. These iteration numbers will show up in traces for all tasks
 submitted from there.
@@ -177,33 +196,23 @@ $ fxt_print -o -f /tmp/prof_file_something
 
 Timings are in nanoseconds (while timings as seen in ViTE are in milliseconds).
 
-\subsection CreatingADAGWithGraphviz Creating a DAG With Graphviz
+\subsubsection CreatingADAGWithGraphviz Creating a DAG With Graphviz
 
-When the FxT trace file <c>prof_file_something</c> has been generated, it is possible to
-generate a task graph in the DOT format by calling:
+Another generated trace file is a task graph described using the DOT
+language. The file, created in the current directory, is named
+<c>dag.dot</c> file in the current directory.
+It is possible to get a graphical output of the graph by using the
+<c>graphviz</c> library:
 
-\verbatim
-$ starpu_fxt_tool -i /tmp/prof_file_something
-\endverbatim
-
-This will create a <c>dag.dot</c> file in the current directory. This file is a
-task graph described using the DOT language. It is possible to get a
-graphical output of the graph by using the graphviz library:
 
 \verbatim
 $ dot -Tpdf dag.dot -o output.pdf
 \endverbatim
 
-\subsection TraceTaskDetails Getting Task Details
-
-When the FxT trace file <c>prof_file_something</c> has been generated, details on the
-executed tasks can be retrieved by calling:
+\subsubsection TraceTaskDetails Getting Task Details
 
-\verbatim
-$ starpu_fxt_tool -i /tmp/prof_file_something
-\endverbatim
-
-This will create a <c>tasks.rec</c> file in the current directory.  This file
+Another generated trace file gives details on the executed tasks. The
+file, created in the current directory, is named <c>tasks.rec</c>. This file
 is in the recutils format, i.e. <c>Field: value</c> lines, and empty lines to
 separate each task.  This can be used as a convenient input for various ad-hoc
 analysis tools. By default it only contains information about the actual
@@ -226,18 +235,12 @@ Another possibility is to obtain the performance models as an auxiliary <c>perfm
 $ starpu_perfmodel_recdump tasks.rec -o perfmodel.rec
 \endverbatim
 
-\subsection MonitoringActivity Monitoring Activity
+\subsubsection MonitoringActivity Monitoring Activity
 
-When the FxT trace file <c>prof_file_something</c> has been generated, it is possible to
-generate an activity trace by calling:
-
-\verbatim
-$ starpu_fxt_tool -i /tmp/prof_file_something
-\endverbatim
-
-This will create a file <c>activity.data</c> in the current
-directory. A profile of the application showing the activity of StarPU
-during the execution of the program can be generated:
+Another generated trace file is an activity trace. The file, created
+in the current directory, is named <c>activity.data</c>. A profile of
+the application showing the activity of StarPU during the execution of
+the program can be generated:
 
 \verbatim
 $ starpu_workers_activity activity.data
@@ -259,18 +262,13 @@ evolution of the number of tasks available in the system during the execution.
 Ready tasks are shown in black, and tasks that are submitted but not
 schedulable yet are shown in grey.
 
-\subsection Animation Getting Modular Schedular Animation
+\subsubsection Animation Getting Modular Schedular Animation
 
 When using modular schedulers (i.e. schedulers which use a modular architecture,
-and whose name start with "modular-"), the command
-
-\verbatim
-$ starpu_fxt_tool -i /tmp/prof_file_something
-\endverbatim
-
-will also produce a <c>trace.html</c> file which can be viewed in a
-javascript-enabled web browser. It shows the flow of tasks between the
-components of the modular scheduler.
+and whose name start with "modular-"), the call to
+<c>starpu_fxt_tool</c> will also produce a <c>trace.html</c> file
+which can be viewed in a javascript-enabled web browser. It shows the
+flow of tasks between the components of the modular scheduler.
 
 \subsection LimitingScopeTrace Limiting The Scope Of The Trace
 

+ 45 - 7
doc/doxygen/chapters/410_mpi_support.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2018                                CNRS
- * Copyright (C) 2011-2013,2017                           Inria
+ * Copyright (C) 2011-2013,2016,2017                      Inria
  * Copyright (C) 2009-2011,2013-2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -141,8 +141,7 @@ int main(int argc, char **argv)
 {
     int rank, size;
 
-    starpu_init(NULL);
-    starpu_mpi_init(&argc, &argv, 1);
+    starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
     starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
     starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
@@ -187,7 +186,6 @@ int main(int argc, char **argv)
     starpu_task_wait_for_all();
 
     starpu_mpi_shutdown();
-    starpu_shutdown();
 
     if (rank == last_rank)
     {
@@ -205,10 +203,10 @@ the beginning and the end.
 
 \section MPIInitialization How to Initialize StarPU-MPI
 
-As seen in the previous example, one has to call starpu_mpi_init() to
+As seen in the previous example, one has to call starpu_mpi_init_conf() to
 initialize StarPU-MPI. The third parameter of the function indicates
-if MPI should be initialized by StarPU or if the application will do
-it itself. If the application initializes MPI itself, it must call
+if MPI should be initialized by StarPU or if the application did it
+itself. If the application initializes MPI itself, it must call
 <c>MPI_Init_thread()</c> with <c>MPI_THREAD_SERIALIZED</c> or
 <c>MPI_THREAD_MULTIPLE</c>, since StarPU-MPI uses a separate thread to
 perform the communications. <c>MPI_THREAD_MULTIPLE</c> is necessary if
@@ -846,6 +844,46 @@ starpu_mpi_gather_detached(data_handles, nblocks, 0, MPI_COMM_WORLD);
 Other collective operations would be easy to define, just ask starpu-devel for
 them!
 
+\section MPIDriver Make StarPU-MPI progression thread execute tasks
+
+The default behaviour of StarPU-MPI is to spawn an MPI thread to take care only
+of MPI communications in an active fashion (i.e the StarPU-MPI thread sleeps
+only when there is no active request submitted by the application), with the
+goal of being as reactive as possible to communications. Knowing that, users
+usually leave one free core for the MPI thread when starting a distributed
+execution with StarPU-MPI.  However, this could result in a loss of performance
+for applications that does not require an extreme reactivity to MPI
+communications.
+
+The starpu_mpi_init_conf() routine allows the user to give the
+starpu_conf configuration structure of StarPU (usually given to the
+starpu_init() routine) to StarPU-MPI, so that StarPU-MPI reserves for its own
+use one of the CPU drivers of the current computing node, or one of the CPU
+cores, and then calls starpu_init() internally.
+
+This allows the MPI communication thread to call a StarPU CPU driver to run
+tasks when there is no active requests to take care of, and thus recover the
+computational power of the "lost" core. Since there is a trade-off between
+executing tasks and polling MPI requests, which is how much the application
+wants to lose in reactivity to MPI communications to get back the computing
+power of the core dedicated to the StarPU-MPI thread, there are two environment
+variables to pilot the behaviour of the MPI thread so that users can tune
+this trade-off depending of the behaviour of the application.
+
+The \ref STARPU_MPI_DRIVER_CALL_FREQUENCY environment variable sets how many times
+the MPI progression thread goes through the MPI_Test() loop on each active communication request
+(and thus try to make communications progress by going into the MPI layer)
+before executing tasks. The default value for this environment variable is 0,
+which means that the support for interleaving task execution and communication
+polling is deactivated, thus returning the MPI progression thread to its
+original behaviour.
+
+The \ref STARPU_MPI_DRIVER_TASK_FREQUENCY environment variable sets how many tasks
+are executed by the MPI communication thread before checking all active
+requests again. While this environment variable allows a better use of the core
+dedicated to StarPU-MPI for computations, it also decreases the reactivity of
+the MPI communication thread as much.
+
 \section MPIDebug Debugging MPI
 
 Communication trace will be enabled when the environment variable

+ 35 - 0
doc/doxygen/chapters/501_environment_variables.doxy

@@ -36,6 +36,17 @@ not allocate more CPU workers than there are physical CPUs, and that
 some CPUs are used to control the accelerators.
 </dd>
 
+<dt>STARPU_RESERVE_NCPU</dt>
+<dd>
+\anchor STARPU_RESERVE_NCPU
+\addindex __env__STARPU_RESERVE_NCPU
+Specify the number of CPU cores that should not be used by StarPU, so the
+application can use starpu_get_next_bindid() and starpu_bind_thread_on() to bind
+its own threads.
+
+This option is ignored if \ref STARPU_NCPU or starpu_config::ncpus is set.
+</dd>
+
 <dt>STARPU_NCPUS</dt>
 <dd>
 \anchor STARPU_NCPUS
@@ -615,6 +626,30 @@ of one of the nodes of a big cluster without actually running the rest.
 It of course does not provide computation results and timing.
 </dd>
 
+<dt>STARPU_MPI_DRIVER_CALL_FREQUENCY</dt>
+<dd>
+\anchor STARPU_MPI_DRIVER_CALL_FREQUENCY
+\addindex __env__STARPU_MPI_DRIVER_CALL_FREQUENCY
+When set to a positive value, activates the interleaving of the execution of
+tasks with the progression of MPI communications (\ref MPISupport). The
+starpu_mpi_init_conf() function must have been called by the application
+for that environment variable to be used. When set to 0, the MPI progression
+thread does not use at all the driver given by the user, and only focuses on
+making MPI communications progress.
+</dd>
+
+<dt>STARPU_MPI_DRIVER_TASK_FREQUENCY</dt>
+<dd>
+\anchor STARPU_MPI_DRIVER_TASK_FREQUENCY
+\addindex __env__STARPU_MPI_DRIVER_TASK_FREQUENCY
+When set to a positive value, the interleaving of the execution of tasks with
+the progression of MPI communications mechanism to execute several tasks before
+checking communication requests again (\ref MPISupport). The
+starpu_mpi_init_conf() function must have been called by the application
+for that environment variable to be used, and the
+STARPU_MPI_DRIVER_CALL_FREQUENCY environment variable set to a positive value.
+</dd>
+
 <dt>STARPU_SIMGRID_CUDA_MALLOC_COST</dt>
 <dd>
 \anchor STARPU_SIMGRID_CUDA_MALLOC_COST

+ 33 - 1
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -415,7 +415,7 @@ compiler implicitly do it in e.g. static storage case.
     Optional name of the codelet. This can be useful for debugging
     purposes.
 
-\var const char *starpu_codelet::color
+\var unsigned starpu_codelet::color
     Optional color of the codelet. This can be useful for debugging
     purposes.
 
@@ -520,6 +520,10 @@ the configuration of a task allocated with starpu_task_create().
     codelet, one should either define this field or the field
     starpu_task::modes defined above.
 
+\var char *starpu_task::handles_sequential_consistency
+    Optional pointer to an array of characters which allows to define
+    the sequential consistency for each handle for the current task.
+
 \var void *starpu_task::cl_arg
     Optional pointer which is passed to the codelet through the second
     argument of the codelet implementation (e.g.
@@ -860,6 +864,24 @@ starpu_task::modes or the \p i -th element of the field
 starpu_task::dyn_modes
 (see \ref SettingManyDataHandlesForATask)
 
+\def STARPU_CODELET_GET_NODE(codelet, i)
+\ingroup API_Codelet_And_Tasks
+Return the target node of the \p i -th data handle of \p codelet.
+If \p node is defined with a static or dynamic number of
+handles, will either return the \p i -th element of the field
+starpu_codelet::nodes or the \p i -th element of the field
+starpu_codelet::dyn_nodes
+(see \ref SettingManyDataHandlesForATask)
+
+\def STARPU_CODELET_SET_NODE(codelet, node, i)
+\ingroup API_Codelet_And_Tasks
+Set the target node of the \p i -th data handle of \p codelet.
+If \p codelet is defined with a static or dynamic number of
+handles, will either set the \p i -th element of the field
+starpu_codelet::nodes or the \p i -th element of the field
+starpu_codelet::dyn_nodes
+(see \ref SettingManyDataHandlesForATask)
+
 \fn struct starpu_task *starpu_task_create(void)
 \ingroup API_Codelet_And_Tasks
 Allocate a task structure and initialize it with default
@@ -977,6 +999,16 @@ Return the task currently executed by the
 worker, or <c>NULL</c> if it is called either from a thread that is not a
 task or simply because there is no task being executed at the moment.
 
+\fn int starpu_task_get_current_data_node(unsigned i)
+\ingroup API_Codelet_And_Tasks
+Return the memory node number of parameter \p i of the task currently executed,
+or -1 if it is called either from a thread that is not a task or simply because
+there is no task being executed at the moment.
+
+Usually, the returned memory node number is simply the memory node
+for the current worker. That may however be different when using e.g.
+starpu_codelet::specific_nodes .
+
 \fn const char *starpu_task_get_name(struct starpu_task *task)
 \ingroup API_Codelet_And_Tasks
 Return the name of \p task, i.e. either its starpu_task::name field, or

+ 14 - 0
doc/doxygen/chapters/api/data_interfaces.doxy

@@ -43,8 +43,16 @@ Per-interface data transfer methods.
     This provides a series of methods for performing ram/cuda/opencl synchronous and asynchronous transfers.
 
 \var void *(*starpu_data_interface_ops::handle_to_pointer)(starpu_data_handle_t handle, unsigned node)
+\deprecated
+    Use starpu_data_interface_ops::to_pointer instead.
     Return the current pointer (if any) for the handle on the given node.
 
+\var void *(*starpu_data_interface_ops::to_pointer)(void *data_interface, unsigned node)
+    Return the current pointer (if any) for the given interface on the given node.
+
+\var int (*starpu_data_interface_ops::pointer_is_inside)(void *data_interface, unsigned node, void *pointer)
+    Return whether the given \p pointer is within the data for the given interface on the given node.
+
 \var size_t (*starpu_data_interface_ops::get_size)(starpu_data_handle_t handle)
     Return an estimation of the size of data, for performance models.
 
@@ -524,6 +532,12 @@ Return the pointer associated with \p handle on node \p node or <c>NULL</c>
 if handle’s interface does not support this operation or data for this
 \p handle is not allocated on that \p node.
 
+\fn int starpu_data_pointer_is_inside(starpu_data_handle_t handle, unsigned node, void *pointer)
+\ingroup API_Data_Interfaces
+Return whether the given \p pointer is within the data for \p handle on node \p
+node (1) or not (0). If the handle interface does not support this operation,
+and thus the result is unknown, -1 is returned.
+
 \fn void *starpu_data_get_local_ptr(starpu_data_handle_t handle)
 \ingroup API_Data_Interfaces
 Return the local pointer associated with \p handle or <c>NULL</c> if

+ 13 - 1
doc/doxygen/chapters/api/data_partition.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2015,2017                           CNRS
+ * Copyright (C) 2010-2015,2017,2018                      CNRS
  * Copyright (C) 2011-2013                                Inria
  * Copyright (C) 2009-2011,2014-2015,2017-2018            Université de Bordeaux
  *
@@ -190,6 +190,12 @@ in readonly mode through starpu_data_partition_readonly_submit(), and will upgra
 that partitioning into read-write mode for the \p children, by invalidating \p
 initial_handle, and adding the necessary dependencies.
 
+\fn void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency)
+\ingroup API_Data_Partition
+Similar to starpu_data_partition_submit() but also allows to
+specify the coherency to be used for the main data \p initial_handle
+through the parameter \p sequential_consistency.
+
 \fn void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node)
 \ingroup API_Data_Partition
 This assumes that \p initial_handle is partitioned into \p children, and submits
@@ -215,6 +221,12 @@ state as obtained with starpu_data_partition_readonly_submit().
 \p gathering_node can be set to -1 to let the runtime decide which memory node
 should be used to gather the pieces.
 
+\fn void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node, int sequential_consistency)
+\ingroup API_Data_Partition
+Similar to starpu_data_unpartition_submit() but also allows to
+specify the coherency to be used for the main data \p initial_handle
+through the parameter \p sequential_consistency.
+
 \fn void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children)
 \ingroup API_Data_Partition
 This should be used to clear the partition planning established between \p

+ 12 - 1
doc/doxygen/chapters/api/explicit_dependencies.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2018                      CNRS
  * Copyright (C) 2009-2011,2014-2016                      Université de Bordeaux
  * Copyright (C) 2011-2012                                Inria
  *
@@ -135,4 +135,15 @@ to execute the tasks. When called several times on the same tag,
 notification will be done only on first call, thus implementing "OR"
 dependencies, until the tag is restarted using starpu_tag_restart().
 
+\fn void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps)
+\ingroup API_Explicit_Dependencies
+Add \p nb_deps end dependencies to the task \p t. This means the task
+will not terminate until the required number of calls to the function
+starpu_task_end_dep_release() has been made.
+
+\fn void starpu_task_end_dep_release(struct starpu_task *t)
+\ingroup API_Explicit_Dependencies
+Unlock 1 end dependency to the task \p t. This function must be called
+after starpu_task_end_dep_add().
+
 */

+ 32 - 1
doc/doxygen/chapters/api/initialization.doxy

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2011-2012,2014,2017                      Inria
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
+ * Copyright (C) 2009-2011,2014,2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -281,12 +281,43 @@ the argument was <c>NULL</c>.
 \ingroup API_Initialization_and_Termination
 Return 1 if StarPU is already initialized.
 
+\fn int starpu_wait_initialized(void)
+\ingroup API_Initialization_and_Termination
+Wait for starpu_init() call to finish.
+
 \fn void starpu_shutdown(void)
 \ingroup API_Initialization_and_Termination
 This is StarPU termination method. It must be called at the end of the
 application: statistics and other post-mortem debugging information
 are not guaranteed to be available until this method has been called.
 
+\def STARPU_THREAD_ACTIVE
+\ingroup API_Initialization_and_Termination
+This flag should be passed to starpu_get_next_bindid() and
+starpu_bind_thread_on() when binding a thread which will significantly eat CPU
+time, and should thus have its own dedicated CPU.
+
+\fn int starpu_get_next_bindid(unsigned flags, unsigned *preferred, unsigned npreferred)
+\ingroup API_Initialization_and_Termination
+This returns a PU binding ID which can be used to bind threads with
+starpu_bind_thread_on(). \p flags can be set to STARPU_THREAD_ACTIVE or 0.
+When \p npreferred is set to non-zero, \p preferred is an array of size \p
+npreferred in which a preference of PU binding IDs can be set. By default StarPU
+will return the first PU available for binding.
+
+\fn int starpu_bind_thread_on(int cpuid, unsigned flags, const char *name)
+\ingroup API_Initialization_and_Termination
+This binds the calling thread on the given \p cpuid (which should have been
+obtained with starpu_get_next_bindid()).
+
+This returns -1 if a thread was already bound to this PU (but binding will still
+have been done, and a warning will have been printed), so the caller can tell
+the user how to avoid the issue.
+
+\p name should be set to a unique string so that different calls with the same
+name for the same cpuid does not produce a warning.
+
+
 \fn void starpu_pause(void)
 \ingroup API_Initialization_and_Termination
 Suspend the processing of new tasks by

+ 12 - 1
doc/doxygen/chapters/api/insert_task.doxy

@@ -42,7 +42,7 @@ starpu_task::execute_on_a_specific_worker)
 <li> the specific values ::STARPU_VALUE, ::STARPU_CALLBACK,
 ::STARPU_CALLBACK_ARG, ::STARPU_CALLBACK_WITH_ARG, ::STARPU_PRIORITY,
 ::STARPU_TAG, ::STARPU_TAG_ONLY, ::STARPU_FLOPS, ::STARPU_SCHED_CTX, ::STARPU_CL_ARGS, ::STARPU_CL_ARGS_NFREE,
-::STARPU_TASK_DEPS_ARRAY, ::STARPU_TASK_COLOR
+::STARPU_TASK_DEPS_ARRAY, ::STARPU_TASK_COLOR, ::STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, ::STARPU_TASK_SYNCHRONOUS
 followed by the appropriated objects as defined elsewhere.
 </ul>
 
@@ -161,6 +161,17 @@ given values.
 Used when calling starpu_task_insert(), must be followed by an integer
 representing a color
 
+\def STARPU_TASK_SYNCHRONOUS
+\ingroup API_Insert_Task
+Used when calling starpu_task_insert(), must be followed by an integer
+stating if the task is synchronous or not
+
+\def STARPU_HANDLES_SEQUENTIAL_CONSISTENCY
+\ingroup API_Insert_Task
+Used when calling starpu_task_insert(), must be followed by an array
+of characters representing the sequential consistency for each buffer
+of the task.
+
 \fn void starpu_task_insert_data_make_room(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int current_buffer, int room)
 \ingroup API_Insert_Task
 Assuming that there are already \p current_buffer data handles passed to

+ 8 - 0
doc/doxygen/chapters/api/modularized_scheduler.doxy

@@ -268,6 +268,14 @@ of the children of the component.
 \ingroup API_Modularized_Scheduler
 function that can be used for the estimated_end component method, which just computes the minimum completion time of the children.
 
+\fn double starpu_sched_component_estimated_end_min_add(struct starpu_sched_component * component, double exp_len);
+\ingroup API_Modularized_Scheduler
+function that can be used for the estimated_end component method, which computes
+the minimum completion time of the children, and adds to it an estimation of how
+existing queued work, plus the exp_len work, can be completed. This is typically
+used instead of starpu_sched_component_estimated_end_min when the component
+contains a queue of tasks, which thus needs to be added to the estimations.
+
 \fn double starpu_sched_component_estimated_end_average(struct starpu_sched_component * component);
 \ingroup API_Modularized_Scheduler
 default function for the estimated_end component method, which just computes the average completion time of the children.

+ 15 - 10
doc/doxygen/chapters/api/mpi.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2011-2012,2017                           Inria
+ * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2011-2012,2016,2017                      Inria
  * Copyright (C) 2009-2011,2014-2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -26,13 +26,18 @@
 Defined when StarPU has been installed with MPI support. It should be
 used in your code to detect the availability of MPI.
 
+\fn int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm, struct starpu_conf *conf)
+\ingroup API_MPI_Support
+Initialize the StarPU library with the given \p conf, and initialize the
+StarPU-MPI library with the given MPI communicator \p comm. \p initialize_mpi
+indicates if MPI should be initialized or not by StarPU.
+StarPU-MPI takes the opportunity to modify \p conf to either reserve a core for
+its MPI thread (by default), or execute MPI calls on the CPU driver 0 between tasks.
+
 \fn int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm)
 \ingroup API_MPI_Support
-Initialize the starpumpi library with the given communicator \p comm.
-\p initialize_mpi indicates if MPI should be initialized or not by StarPU.
-If the value is not 0, MPI will be initialized by calling
-<c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED, ...)</c>.
-starpu_init() must be called before starpu_mpi_init_comm().
+This is the same as starpu_mpi_init_conf(), except that this does not initialize
+the StarPU library. The caller thus has to call starpu_init() before this.
 
 \fn int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi)
 \ingroup API_MPI_Support
@@ -55,9 +60,9 @@ calling <c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED,
 
 \fn int starpu_mpi_shutdown(void)
 \ingroup API_MPI_Support
-Clean the starpumpi library. This must be called between calling
-\c starpu_mpi functions and starpu_shutdown(). \c MPI_Finalize() will be
-called if StarPU-MPI has been initialized by starpu_mpi_init().
+Clean the starpumpi library. This must be called after calling any
+\c starpu_mpi functions and before the call to starpu_shutdown(), if any.
+\c MPI_Finalize() will be called if StarPU-MPI has been initialized by starpu_mpi_init().
 
 \fn void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts)
 \ingroup API_MPI_Support

+ 33 - 3
doc/doxygen/chapters/api/scheduling_policy.doxy

@@ -229,7 +229,13 @@ Return an estimated speedup factor relative to CPU speed
 
 \fn double starpu_task_expected_data_transfer_time(unsigned memory_node, struct starpu_task *task)
 \ingroup API_Scheduling_Policy
-Return expected data transfer time in micro-seconds.
+Return expected data transfer time in micro-seconds for the given \p
+memory_node. Prefer using starpu_task_expected_data_transfer_time_for() which is
+more precise.
+
+\fn double starpu_task_expected_data_transfer_time_for(struct starpu_task *task, unsigned worker)
+\ingroup API_Scheduling_Policy
+Return expected data transfer time in micro-seconds for the given \p worker.
 
 \fn double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned memory_node, enum starpu_data_access_mode mode)
 \ingroup API_Scheduling_Policy
@@ -249,11 +255,35 @@ Whether \ref STARPU_PREFETCH was set
 
 \fn int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node)
 \ingroup API_Scheduling_Policy
-Prefetch data for a given task on a given node
+Prefetch data for a given p task on a given p node
+
+\fn int starpu_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned node, int prio)
+\ingroup API_Scheduling_Policy
+Prefetch data for a given p task on a given p node with a given priority
 
 \fn int starpu_idle_prefetch_task_input_on_node(struct starpu_task *task, unsigned node)
 \ingroup API_Scheduling_Policy
-Prefetch data for a given task on a given node when the bus is idle
+Prefetch data for a given p task on a given p node when the bus is idle
+
+\fn int starpu_idle_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned node, int prio)
+\ingroup API_Scheduling_Policy
+Prefetch data for a given p task on a given p node when the bus is idle with a given priority
+
+\fn int starpu_prefetch_task_input_for(struct starpu_task *task, unsigned worker)
+\ingroup API_Scheduling_Policy
+Prefetch data for a given p task on a given p worker
+
+\fn int starpu_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio)
+\ingroup API_Scheduling_Policy
+Prefetch data for a given p task on a given p worker with a given priority
+
+\fn int starpu_idle_prefetch_task_input_for(struct starpu_task *task, unsigned worker)
+\ingroup API_Scheduling_Policy
+Prefetch data for a given p task on a given p worker when the bus is idle
+
+\fn int starpu_idle_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio)
+\ingroup API_Scheduling_Policy
+Prefetch data for a given p task on a given p worker when the bus is idle with a given priority
 
 \fn void starpu_task_notify_ready_soon_register(starpu_notify_ready_soon_func f, void *data);
 \ingroup API_Scheduling_Policy

+ 4 - 1
examples/Makefile.am

@@ -216,6 +216,7 @@ STARPU_EXAMPLES +=				\
 	basic_examples/variable			\
 	basic_examples/multiformat              \
 	basic_examples/dynamic_handles          \
+	basic_examples/task_insert_color	\
 	mlr/mlr					\
 	cpp/incrementer_cpp			\
 	cpp/add_vectors				\
@@ -248,7 +249,9 @@ STARPU_EXAMPLES +=				\
 	sched_ctx/dummy_sched_with_ctx		\
 	worker_collections/worker_tree_example  \
 	reductions/dot_product			\
-	reductions/minmax_reduction
+	reductions/minmax_reduction		\
+	dependency/task_end_dep			\
+	dependency/sequential_consistency
 
 endif
 

+ 89 - 0
examples/basic_examples/task_insert_color.c

@@ -0,0 +1,89 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2018                                      CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
+void func(void *descr[], void *_args)
+{
+	int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
+	(void)_args;
+
+	*x *= 2;
+}
+
+struct starpu_codelet mycodelet =
+{
+	.modes = { STARPU_RW },
+	.cpu_funcs = {func},
+	.cpu_funcs_name = {"func"},
+        .nbuffers = 1
+};
+
+struct starpu_codelet mycodelet_color =
+{
+	.modes = { STARPU_RW },
+	.cpu_funcs = {func},
+	.cpu_funcs_name = {"func"},
+        .nbuffers = 1,
+	.color = 0x0000FF,
+};
+
+int main(int argc, char **argv)
+{
+	unsigned i;
+	int value=42;
+	starpu_data_handle_t handle;
+	int ret;
+
+	ret = starpu_init(NULL);
+	if (ret == -ENODEV) goto enodev;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value));
+
+	// In the trace file, the following task should be green (executed on CPU)
+	ret = starpu_task_insert(&mycodelet, STARPU_RW, handle, STARPU_NAME, "mytask",
+				 0);
+	if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
+		starpu_data_unregister(handle);
+		goto enodev;
+	}
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
+	// In the trace file, the following task will be red as specified by STARPU_TASK_COLOR
+	ret = starpu_task_insert(&mycodelet, STARPU_RW, handle, STARPU_NAME, "mytask",
+				 STARPU_TASK_COLOR, 0xFF0000,
+				 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
+	// In the trace file, the following task will be blue as specified by the field color of mycodelet_color
+	ret = starpu_task_insert(&mycodelet_color, STARPU_RW, handle, STARPU_NAME, "mytask",
+				 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
+	starpu_task_wait_for_all();
+	starpu_data_unregister(handle);
+
+	starpu_shutdown();
+
+	return 0;
+
+ enodev:
+	return 77;
+}

+ 19 - 9
examples/cpp/add_vectors_interface.cpp

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2014,2016-2017                      CNRS
  * Copyright (C) 2012,2017                                Inria
- * Copyright (C) 2009-2011,2013-2015,2017                 Université de Bordeaux
+ * Copyright (C) 2009-2011,2013-2015,2017-2018                 Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -259,7 +259,8 @@ static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s =
 
 static void register_vector_cpp_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
 static starpu_ssize_t allocate_vector_cpp_buffer_on_node(void *data_interface_, unsigned dst_node);
-static void *vector_cpp_handle_to_pointer(starpu_data_handle_t handle, unsigned node);
+static void *vector_cpp_to_pointer(void *data_interface, unsigned node);
+static void *vector_cpp_pointer_is_inside(void *data_interface, unsigned node, void *ptr);
 static void free_vector_cpp_buffer_on_node(void *data_interface, unsigned node);
 static void free_vector_cpp_buffer_on_node(void *data_interface, unsigned node);
 static size_t vector_cpp_interface_get_size(starpu_data_handle_t handle);
@@ -277,7 +278,8 @@ static struct starpu_data_interface_ops interface_vector_cpp_ops =
 	.allocate_data_on_node = allocate_vector_cpp_buffer_on_node,
 	.free_data_on_node = free_vector_cpp_buffer_on_node,
 	.copy_methods = &vector_cpp_copy_data_methods_s,
-	.handle_to_pointer = vector_cpp_handle_to_pointer,
+	.to_pointer = vector_cpp_to_pointer,
+	.pointer_is_inside = vector_cpp_pointer_is_inside,
 	.get_size = vector_cpp_interface_get_size,
 	.footprint = footprint_vector_cpp_interface_crc32,
 	.compare = vector_cpp_compare,
@@ -299,7 +301,8 @@ static struct starpu_data_interface_ops interface_vector_cpp_ops =
 	allocate_vector_cpp_buffer_on_node,
 	free_vector_cpp_buffer_on_node,
 	&vector_cpp_copy_data_methods_s,
-	vector_cpp_handle_to_pointer,
+	vector_cpp_to_pointer,
+	vector_cpp_pointer_is_inside,
 	vector_cpp_interface_get_size,
 	footprint_vector_cpp_interface_crc32,
 	vector_cpp_compare,
@@ -316,16 +319,23 @@ static struct starpu_data_interface_ops interface_vector_cpp_ops =
 };
 #endif
 
-static void *vector_cpp_handle_to_pointer(starpu_data_handle_t handle, unsigned node)
+static void *vector_cpp_to_pointer(void *data_interface, unsigned node)
 {
-	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
-
-	struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *)
-		starpu_data_get_interface_on_node(handle, node);
+	(void) node;
+	struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) data_interface;
 
 	return (void*) vector_interface->ptr;
 }
 
+static int vector_cpp_pointer_is_inside(void *data_interface, unsigned node, void *ptr)
+{
+	(void) node;
+	struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) data_interface;
+
+	return (char*) ptr >= (char*) vector_interface->ptr &&
+		(char*) ptr < (char*) vector_interface->ptr + vector_interface->nx*vector_interface->elemsize;
+}
+
 static void register_vector_cpp_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
 {
 	struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) data_interface;

+ 181 - 0
examples/dependency/sequential_consistency.c

@@ -0,0 +1,181 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2018                                     CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
+void cpu_codeletA(void *descr[], void *args);
+void cpu_codeletB(void *descr[], void *args);
+void cpu_codeletC(void *descr[], void *args);
+
+struct starpu_codelet clA =
+{
+	.cpu_funcs = {cpu_codeletA},
+	.cpu_funcs_name = {"cpu_codeletA"},
+	.nbuffers = 1,
+	.modes = {STARPU_RW},
+	.name = "codeletA"
+};
+
+struct starpu_codelet clB =
+{
+	.cpu_funcs = {cpu_codeletB},
+	.cpu_funcs_name = {"cpu_codeletB"},
+	.nbuffers = 1,
+	.modes = {STARPU_RW},
+	.name = "codeletB"
+};
+
+struct starpu_codelet clC =
+{
+	.cpu_funcs = {cpu_codeletC},
+	.cpu_funcs_name = {"cpu_codeletC"},
+	.nbuffers = 1,
+	.modes = {STARPU_RW},
+	.name = "codeletC"
+};
+
+void cpu_codeletA(void *descr[], void *args)
+{
+	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
+	starpu_data_handle_t value_handle;
+	starpu_tag_t tagHoldC;
+	int ret;
+	unsigned char handle_sequential_consistency[] = {0};
+
+	FPRINTF(stderr, "[Task A] Value = %d\n", *val);
+
+	starpu_codelet_unpack_args(args, &value_handle, &tagHoldC);
+
+	// With several data, one would need to use a dynamically
+	// allocated array for the sequential consistency,
+	// the array could be freed immediately after calling
+	// starpu_task_insert()
+
+	ret = starpu_task_insert(&clB,
+				 STARPU_RW, value_handle,
+				 STARPU_CALLBACK_WITH_ARG, starpu_tag_notify_from_apps, tagHoldC,
+				 STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, handle_sequential_consistency,
+				 STARPU_NAME, "taskB",
+				 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
+	*val *= 2;
+}
+
+void cpu_codeletB(void *descr[], void *args)
+{
+	(void)args;
+	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
+
+	FPRINTF(stderr, "[Task B] Value = %d\n", *val);
+	STARPU_ASSERT_MSG(*val == 24, "Incorrect value %d (expected 24)\n", *val);
+	*val += 1;
+}
+
+void cpu_codeletC(void *descr[], void *args)
+{
+	(void)args;
+	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
+
+	FPRINTF(stderr, "[Task C] Value = %d\n", *val);
+	STARPU_ASSERT_MSG(*val == 25, "Incorrect value %d (expected 25)\n", *val);
+	*val *= 2;
+}
+
+/*
+ * Submit taskA and hold it
+ * Submit taskC and hold it
+ * Release taskA
+ * Execute taskA       --> submit taskB
+ * Execute taskB       --> callback: release taskC
+ *
+ * All three tasks use the same data in RW, taskB is submitted after
+ * taskC, so taskB should normally only execute after taskC but as the
+ * sequential consistency for (taskB, data) is unset, taskB can
+ * execute straightaway
+ */
+int main(void)
+{
+        int value=12;
+	int ret;
+	starpu_data_handle_t value_handle;
+	starpu_tag_t tagHoldA = 42;
+	starpu_tag_t tagHoldC = 84;
+	starpu_tag_t tagA = 421;
+	starpu_tag_t tagC = 842;
+
+	struct starpu_conf conf;
+
+	if (sizeof(starpu_tag_t) > sizeof(void*))
+	{
+		// Can't pass a tag_t through callback arg :/
+		return 77;
+	}
+
+	starpu_conf_init(&conf);
+	conf.nmic = 0;
+	conf.nscc = 0;
+	conf.nmpi_ms = 0;
+
+        ret = starpu_init(&conf);
+	if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
+		return 77;
+	}
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	if (starpu_cpu_worker_get_count() < 1)
+	{
+		FPRINTF(stderr, "This application requires at least 1 cpu worker\n");
+		starpu_shutdown();
+		return 77;
+	}
+
+	starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value));
+
+	starpu_tag_declare_deps_array(tagA, 1, &tagHoldA);
+	starpu_tag_declare_deps_array(tagC, 1, &tagHoldC);
+
+	ret = starpu_task_insert(&clA,
+				 STARPU_TAG, tagA,
+				 STARPU_RW, value_handle,
+				 STARPU_VALUE, &value_handle, sizeof(starpu_data_handle_t),
+				 STARPU_VALUE, &tagHoldC, sizeof(starpu_tag_t),
+				 STARPU_NAME, "taskA",
+				 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+	ret = starpu_task_insert(&clC,
+				 STARPU_TAG, tagC,
+				 STARPU_RW, value_handle,
+				 STARPU_NAME, "taskC",
+				 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
+	// Release taskA (we want to make sure it will execute after taskC has been submitted)
+	starpu_tag_notify_from_apps(tagHoldA);
+
+	starpu_data_unregister(value_handle);
+
+	STARPU_ASSERT_MSG(value == 50, "Incorrect value %d (expected 50)\n", value);
+
+	starpu_shutdown();
+
+	FPRINTF(stderr, "Value = %d\n", value);
+
+	return ret;
+}

+ 98 - 0
examples/dependency/task_end_dep.c

@@ -0,0 +1,98 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2018                                     CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
+void cpu_codelet2(void *descr[], void *args)
+{
+	(void)descr;
+	(void)args;
+}
+
+struct starpu_codelet cl2 =
+{
+	.cpu_funcs = {cpu_codelet2},
+	.cpu_funcs_name = {"cpu_codelet2"},
+	.name = "codelet2"
+};
+
+void cpu_codelet(void *descr[], void *args)
+{
+	(void)args;
+	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
+	struct starpu_task *task;
+
+	task = starpu_task_get_current();
+	starpu_task_end_dep_add(task, 1);
+
+	starpu_task_insert(&cl2,
+			   STARPU_CALLBACK_WITH_ARG, starpu_task_end_dep_release, task,
+			   0);
+	*val *= 2;
+}
+
+struct starpu_codelet cl =
+{
+	.cpu_funcs = {cpu_codelet},
+	.cpu_funcs_name = {"cpu_codelet"},
+	.nbuffers = 1,
+	.modes = {STARPU_RW},
+	.name = "codelet"
+};
+
+int main(void)
+{
+        int value=12;
+	int ret;
+	starpu_data_handle_t value_handle;
+	struct starpu_conf conf;
+
+	starpu_conf_init(&conf);
+	conf.nmic = 0;
+	conf.nscc = 0;
+	conf.nmpi_ms = 0;
+
+        ret = starpu_init(&conf);
+	if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
+		return 77;
+	}
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	if (starpu_cpu_worker_get_count() < 1)
+	{
+		FPRINTF(stderr, "This application requires at least 1 cpu worker\n");
+		starpu_shutdown();
+		return 77;
+	}
+
+	starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value));
+
+	ret = starpu_task_insert(&cl,
+				 STARPU_RW, value_handle,
+				 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
+	starpu_data_unregister(value_handle);
+
+        starpu_shutdown();
+
+	FPRINTF(stderr, "Value = %d\n", value);
+
+	return ret;
+}

+ 32 - 7
examples/filters/custom_mf/custom_interface.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2012-2013,2015-2017                      CNRS
- * Copyright (C) 2012-2014                                Université de Bordeaux
+ * Copyright (C) 2012-2014, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -73,8 +73,8 @@ static void     register_custom_handle(starpu_data_handle_t handle,
 				       void *data_interface);
 static starpu_ssize_t  allocate_custom_buffer_on_node(void *data_interface_,
 					       unsigned dst_node);
-static void*    custom_handle_to_pointer(starpu_data_handle_t data_handle,
-					 unsigned node);
+static void*    custom_to_pointer(void *data_interface, unsigned node);
+static int      custom_pointer_is_inside(void *data_interface, unsigned node, void *ptr);
 static void     free_custom_buffer_on_node(void *data_interface, unsigned node);
 static size_t   custom_interface_get_size(starpu_data_handle_t handle);
 static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle);
@@ -94,7 +94,8 @@ static struct starpu_data_interface_ops interface_custom_ops =
 {
 	.register_data_handle  = register_custom_handle,
 	.allocate_data_on_node = allocate_custom_buffer_on_node,
-	.handle_to_pointer     = custom_handle_to_pointer,
+	.to_pointer            = custom_to_pointer,
+	.pointer_is_inside     = custom_pointer_is_inside,
 	.free_data_on_node     = free_custom_buffer_on_node,
 	.copy_methods          = &custom_copy_data_methods_s,
 	.get_size              = custom_interface_get_size,
@@ -203,10 +204,9 @@ static void free_custom_buffer_on_node(void *data_interface, unsigned node)
 }
 
 static void*
-custom_handle_to_pointer(starpu_data_handle_t handle, unsigned node)
+custom_to_pointer(void *data, unsigned node)
 {
-	struct custom_data_interface *data_interface =
-		(struct custom_data_interface *) starpu_data_get_interface_on_node(handle, node);
+	struct custom_data_interface *data_interface = data;
 
 
 	switch(starpu_node_get_kind(node))
@@ -226,6 +226,31 @@ custom_handle_to_pointer(starpu_data_handle_t handle, unsigned node)
 	}
 }
 
+static int
+custom_pointer_is_inside(void *data, unsigned node, void *ptr)
+{
+	struct custom_data_interface *data_interface = data;
+
+	switch(starpu_node_get_kind(node))
+	{
+		case STARPU_CPU_RAM:
+			return (char*) ptr >= (char*) data_interface->cpu_ptr &&
+				(char*) ptr < (char*) data_interface->cpu_ptr + data_interface->nx * data_interface->ops->cpu_elemsize;
+#ifdef STARPU_USE_CUDA
+		case STARPU_CUDA_RAM:
+			return (char*) ptr >= (char*) data_interface->cuda_ptr &&
+				(char*) ptr < (char*) data_interface->cuda_ptr + data_interface->nx * data_interface->ops->cuda_elemsize;
+#endif
+#ifdef STARPU_USE_OPENCL
+		case STARPU_OPENCL_RAM:
+			return (char*) ptr >= (char*) data_interface->opencl_ptr &&
+				(char*) ptr < (char*) data_interface->opencl_ptr + data_interface->nx * data_interface->ops->opencl_elemsize;
+#endif
+		default:
+			assert(0);
+	}
+}
+
 static size_t custom_interface_get_size(starpu_data_handle_t handle)
 {
 	size_t size;

+ 9 - 1
examples/filters/fmultiple_manual.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2017                                     CNRS
  * Copyright (C) 2017                                     Inria
- * Copyright (C) 2015                                     Université de Bordeaux
+ * Copyright (C) 2015,2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -107,6 +107,14 @@ void empty(void *buffers[], void *cl_arg)
 	 * data, thus getting back all data pieces there.  */
 	(void)buffers;
 	(void)cl_arg;
+
+	/* This check is just for testsuite */
+	int node = starpu_task_get_current_data_node(0);
+	unsigned i;
+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(starpu_task_get_current());
+	STARPU_ASSERT(node >= 0);
+	for (i = 1; i < nbuffers; i++)
+		STARPU_ASSERT(starpu_task_get_current_data_node(i) == node);
 }
 
 struct starpu_codelet cl_switch =

+ 13 - 2
examples/interface/complex_interface.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2012-2015,2017                           CNRS
- * Copyright (C) 2013-2015                                Université de Bordeaux
+ * Copyright (C) 2013-2015, 2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,6 +20,16 @@
 
 #include "complex_interface.h"
 
+static int complex_pointer_is_inside(void *data_interface, unsigned node, void *ptr)
+{
+	struct starpu_complex_interface *complex_interface = data_interface;
+
+	return ((char*) ptr >= (char*) &complex_interface->real &&
+		(char*) ptr < (char*) (&complex_interface->real + 1))
+	    || ((char*) ptr >= (char*) &complex_interface->imaginary &&
+		(char*) ptr < (char*) (&complex_interface->imaginary + 1));
+}
+
 double *starpu_complex_get_real(starpu_data_handle_t handle)
 {
 	struct starpu_complex_interface *complex_interface =
@@ -195,7 +205,8 @@ static struct starpu_data_interface_ops interface_complex_ops =
 	.footprint = complex_footprint,
 	.interfaceid = STARPU_UNKNOWN_INTERFACE_ID,
 	.interface_size = sizeof(struct starpu_complex_interface),
-	.handle_to_pointer = NULL,
+	.to_pointer = NULL,
+	.pointer_is_inside = complex_pointer_is_inside,
 	.pack_data = complex_pack_data,
 	.unpack_data = complex_unpack_data,
 	.describe = complex_describe

+ 2 - 2
examples/spmv/matrix_market/mm_to_bcsr.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2008-2009,2011                           Université de Bordeaux
+ * Copyright (C) 2008-2009,2011, 2018                           Université de Bordeaux
  * Copyright (C) 2010-2011,2015,2017                      CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -51,4 +51,4 @@ typedef struct
 bcsr_t *mm_file_to_bcsr(char *filename, unsigned c, unsigned r);
 
 /* read the matrix as a set of valuated coordinates */
-bcsr_t *mm_to_bcsr(unsigned nz, unsigned *I, unsigned *J, float *val, unsigned c, unsigned r);
+bcsr_t *mm_to_bcsr(unsigned nz, unsigned *I_, unsigned *J, float *val, unsigned c, unsigned r);

+ 4 - 4
examples/spmv/matrix_market/mmio.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2008-2009,2011,2016                      Université de Bordeaux
+ * Copyright (C) 2008-2009,2011,2016, 2018                      Université de Bordeaux
  * Copyright (C) 2010,2015,2017                           CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -135,11 +135,11 @@ int mm_is_valid(MM_typecode matcode);		/* too complex for a macro */
 
 /*  high level routines */
 
-int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
+int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I_[], int J[],
 		 double val[], MM_typecode matcode);
-int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
+int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I_[], int J[],
 		double val[], MM_typecode matcode);
-int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img,
+int mm_read_mtx_crd_entry(FILE *f, int *I_, int *J, double *real, double *img,
 			MM_typecode matcode);
 
 int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,

+ 2 - 1
examples/stencil/life_opencl.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2012-2013,2016-2017                      CNRS
- * Copyright (C) 2010-2011,2013-2014                      Université de Bordeaux
+ * Copyright (C) 2010-2011,2013-2014,2018                 Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -21,6 +21,7 @@
 /* #define _externC extern "C" */
 
 #include <stencil.h>
+#define CL_TARGET_OPENCL_VERSION 100
 #ifdef __APPLE__
 #include <OpenCL/cl.h>
 #else

+ 12 - 1
include/fstarpu_mod.f90

@@ -2,7 +2,7 @@
 !
 ! Copyright (C) 2017, 2018                                     CNRS
 ! Copyright (C) 2016-2017                                Inria
-! Copyright (C) 2016-2017                                Université de Bordeaux
+! Copyright (C) 2016-2018                                Université de Bordeaux
 !
 ! StarPU is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Lesser General Public License as published by
@@ -56,6 +56,8 @@ module fstarpu_mod
         type(c_ptr), bind(C) :: FSTARPU_TAG_ONLY
         type(c_ptr), bind(C) :: FSTARPU_NAME
         type(c_ptr), bind(C) :: FSTARPU_TASK_COLOR
+        type(c_ptr), bind(C) :: FSTARPU_TASK_SYNCHRONOUS
+        type(c_ptr), bind(C) :: FSTARPU_HANDLES_SEQUENTIAL_CONSISTENCY
         type(c_ptr), bind(C) :: FSTARPU_NODE_SELECTION_POLICY
 
         type(c_ptr), bind(C) :: FSTARPU_VALUE
@@ -782,6 +784,15 @@ module fstarpu_mod
                         integer(c_int), value, intent(in) :: node
                 end function fstarpu_data_handle_to_pointer
 
+                ! void *starpu_data_pointer_is_inside(starpu_data_handle_t handle, unsigned node, void *ptr);
+                function fstarpu_data_pointer_is_inside (dh,node,ptr) bind(C,name="starpu_data_pointer_is_inside")
+                        use iso_c_binding, only: c_ptr, c_int, c_ptr
+                        integer(c_int) :: fstarpu_data_pointer_is_inside
+                        type(c_ptr), value, intent(in) :: dh
+                        integer(c_int), value, intent(in) :: node
+                        type(c_ptr), value, intent(in) :: ptr
+                end function fstarpu_data_pointer_is_inside
+
                 ! void *starpu_data_get_local_ptr(starpu_data_handle_t handle);
                 function fstarpu_data_get_local_ptr (dh) bind(C,name="starpu_data_get_local_ptr")
                         use iso_c_binding, only: c_ptr, c_int

+ 7 - 1
include/starpu.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012-2017                                Inria
- * Copyright (C) 2009-2014,2016-2017                      Université de Bordeaux
+ * Copyright (C) 2009-2014,2016-2018                      Université de Bordeaux
  * Copyright (C) 2010-2015,2017                           CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -94,6 +94,7 @@ struct starpu_conf
 	void (*sched_policy_init)(unsigned);
 
 	int ncpus;
+	int reserve_ncpus;
 	int ncuda;
 	int nopencl;
 	int nmic;
@@ -152,6 +153,11 @@ int starpu_conf_init(struct starpu_conf *conf);
 int starpu_init(struct starpu_conf *conf) STARPU_WARN_UNUSED_RESULT;
 int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv);
 int starpu_is_initialized(void);
+void starpu_wait_initialized(void);
+
+#define STARPU_THREAD_ACTIVE (1 << 0)
+unsigned starpu_get_next_bindid(unsigned flags, unsigned *preferred, unsigned npreferred);
+int starpu_bind_thread_on(int cpuid, unsigned flags, const char *name);
 
 void starpu_pause(void);
 void starpu_resume(void);

+ 4 - 1
include/starpu_data_filters.h

@@ -3,7 +3,7 @@
  * Copyright (C) 2011                                     Antoine Lucas
  * Copyright (C) 2009-2012,2014-2015,2017                 Université de Bordeaux
  * Copyright (C) 2010                                     Mehdi Juhoor
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
  * Copyright (C) 2011                                     Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -53,6 +53,9 @@ void starpu_data_unpartition_submit_r(starpu_data_handle_t initial_handle, int g
 void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node);
 void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children);
 
+void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency);
+void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node, int sequential_consistency);
+
 int starpu_data_get_nb_children(starpu_data_handle_t handle);
 starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i);
 

+ 4 - 1
include/starpu_data_interfaces.h

@@ -124,7 +124,9 @@ struct starpu_data_interface_ops
 	starpu_ssize_t	 (*allocate_data_on_node)	(void *data_interface, unsigned node);
 	void 		 (*free_data_on_node)		(void *data_interface, unsigned node);
 	const struct starpu_data_copy_methods *copy_methods;
-	void * 		 (*handle_to_pointer)		(starpu_data_handle_t handle, unsigned node);
+	void * 		 (*handle_to_pointer)		(starpu_data_handle_t handle, unsigned node); /* deprecated */
+	void * 		 (*to_pointer)			(void *data_interface, unsigned node);
+	int 		 (*pointer_is_inside)		(void *data_interface, unsigned node, void *ptr);
 	size_t 		 (*get_size)			(starpu_data_handle_t handle);
 	uint32_t 	 (*footprint)			(starpu_data_handle_t handle);
 	int 		 (*compare)			(void *data_interface_a, void *data_interface_b);
@@ -150,6 +152,7 @@ void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node);
 void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc);
 
 void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node);
+int starpu_data_pointer_is_inside(starpu_data_handle_t handle, unsigned node, void *ptr);
 void *starpu_data_get_local_ptr(starpu_data_handle_t handle);
 
 void *starpu_data_get_interface_on_node(starpu_data_handle_t handle, unsigned memory_node);

+ 1 - 0
include/starpu_fxt.h

@@ -49,6 +49,7 @@ struct starpu_fxt_options
 	unsigned ninputfiles;
 	unsigned no_smooth;
 	unsigned no_acquire;
+	unsigned memory_states;
 	unsigned internal;
 	unsigned label_deps;
 	char *filenames[STARPU_FXT_MAX_FILES];

+ 4 - 1
include/starpu_opencl.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2012                                Inria
- * Copyright (C) 2010-2014                                Université de Bordeaux
+ * Copyright (C) 2010-2014,2018                           Université de Bordeaux
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,6 +21,9 @@
 
 #include <starpu_config.h>
 #ifdef STARPU_USE_OPENCL
+#ifndef CL_TARGET_OPENCL_VERSION
+#define CL_TARGET_OPENCL_VERSION 100
+#endif
 #ifdef __APPLE__
 #include <OpenCL/cl.h>
 #else

+ 3 - 0
include/starpu_sched_component.h

@@ -128,6 +128,7 @@ int starpu_sched_component_can_push(struct starpu_sched_component * component, s
 int starpu_sched_component_can_pull(struct starpu_sched_component * component);
 double starpu_sched_component_estimated_load(struct starpu_sched_component * component);
 double starpu_sched_component_estimated_end_min(struct starpu_sched_component * component);
+double starpu_sched_component_estimated_end_min_add(struct starpu_sched_component * component, double exp_len);
 double starpu_sched_component_estimated_end_average(struct starpu_sched_component * component);
 
 struct starpu_sched_component_fifo_data
@@ -235,6 +236,8 @@ do \
 } \
 while(0)
 
+#define STARPU_COMPONENT_MUTEX_TRYLOCK(m) STARPU_PTHREAD_MUTEX_TRYLOCK((m))
+
 #define STARPU_COMPONENT_MUTEX_UNLOCK(m) STARPU_PTHREAD_MUTEX_UNLOCK((m))
 
 #ifdef __cplusplus

+ 6 - 0
include/starpu_scheduler.h

@@ -83,11 +83,17 @@ int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node);
 int starpu_idle_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned node, int prio);
 int starpu_idle_prefetch_task_input_on_node(struct starpu_task *task, unsigned node);
 
+int starpu_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio);
+int starpu_prefetch_task_input_for(struct starpu_task *task, unsigned worker);
+int starpu_idle_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio);
+int starpu_idle_prefetch_task_input_for(struct starpu_task *task, unsigned worker);
+
 uint32_t starpu_task_footprint(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 uint32_t starpu_task_data_footprint(struct starpu_task *task);
 double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch *perf_arch);
 double starpu_task_expected_data_transfer_time(unsigned memory_node, struct starpu_task *task);
+double starpu_task_expected_data_transfer_time_for(struct starpu_task *task, unsigned worker);
 double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned memory_node, enum starpu_data_access_mode mode);
 double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
 double starpu_task_expected_conversion_time(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);

+ 11 - 1
include/starpu_task.h

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2017                                Inria
  * Copyright (C) 2009-2018                                Université de Bordeaux
- * Copyright (C) 2010-2015,2017                           CNRS
+ * Copyright (C) 2010-2015,2017,2018                           CNRS
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2016                                     Uppsala University
  *
@@ -91,6 +91,9 @@ typedef starpu_scc_kernel_t (*starpu_scc_func_t)(void);
 
 #define STARPU_VARIABLE_NBUFFERS (-1)
 
+#define STARPU_SPECIFIC_NODE_LOCAL (-1)
+#define STARPU_SPECIFIC_NODE_CPU (-2)
+#define STARPU_SPECIFIC_NODE_SLOW (-3)
 struct starpu_task;
 struct starpu_codelet
 {
@@ -153,6 +156,8 @@ struct starpu_task
 	void *interfaces[STARPU_NMAXBUFS];
 	enum starpu_data_access_mode modes[STARPU_NMAXBUFS];
 
+	unsigned char *handles_sequential_consistency;
+
 	void *cl_arg;
 	size_t cl_arg_size;
 
@@ -225,6 +230,7 @@ struct starpu_task
 #else
 	void *omp_task;
 #endif
+	unsigned nb_termination_call_required;
 	void *sched_data;
 };
 
@@ -299,6 +305,9 @@ void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t
 
 void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]);
 
+void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps);
+void starpu_task_end_dep_release(struct starpu_task *t);
+
 int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]);
 int starpu_task_get_task_scheduled_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]);
 
@@ -348,6 +357,7 @@ void starpu_codelet_init(struct starpu_codelet *cl);
 void starpu_codelet_display_stats(struct starpu_codelet *cl);
 
 struct starpu_task *starpu_task_get_current(void);
+int starpu_task_get_current_data_node(unsigned i);
 
 const char *starpu_task_get_model_name(struct starpu_task *task);
 const char *starpu_task_get_name(struct starpu_task *task);

+ 3 - 1
include/starpu_task_util.h

@@ -64,7 +64,9 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 #define STARPU_CL_ARGS_NFREE	(26<<STARPU_MODE_SHIFT)
 #define STARPU_TASK_DEPS_ARRAY	(27<<STARPU_MODE_SHIFT)
 #define STARPU_TASK_COLOR       (28<<STARPU_MODE_SHIFT)
-#define STARPU_SHIFTED_MODE_MAX (29<<STARPU_MODE_SHIFT)
+#define STARPU_HANDLES_SEQUENTIAL_CONSISTENCY (29<<STARPU_MODE_SHIFT)
+#define STARPU_TASK_SYNCHRONOUS (30<<STARPU_MODE_SHIFT)
+#define STARPU_SHIFTED_MODE_MAX (31<<STARPU_MODE_SHIFT)
 
 int starpu_task_set(struct starpu_task *task, struct starpu_codelet *cl, ...);
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);

+ 2 - 2
libstarpu-mic.pc.in

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 # Copyright (C) 2010-2013,2015,2017                      CNRS
-# Copyright (C) 2009-2011,2013-2015                      Université de Bordeaux
+# Copyright (C) 2009-2011,2013-2015,2018                 Université de Bordeaux
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -23,6 +23,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ -DSTARPU_USE_DEPRECATED_API
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SC_HYPERVISOR@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_SC_HYPERVISOR@ @STARPU_EXPORTED_LIBS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@

+ 2 - 2
libstarpu.pc.in

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 # Copyright (C) 2011-2012                                Inria
-# Copyright (C) 2009-2015                                Université de Bordeaux
+# Copyright (C) 2009-2015,2018                           Université de Bordeaux
 # Copyright (C) 2010-2013,2015,2017                      CNRS
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -24,6 +24,6 @@ Name: starpu
 Description: offers support for heterogeneous multicore architecture
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ @SIMGRID_CFLAGS@ -DSTARPU_USE_DEPRECATED_API -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
-Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SC_HYPERVISOR@ @STARPU_EXPORTED_LIBS@
+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_SC_HYPERVISOR@ @STARPU_EXPORTED_LIBS@
 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
 Requires: @HWLOC_REQUIRES@

+ 3 - 6
mpi/examples/comm/comm.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2015-2017                                CNRS
- * Copyright (C) 2015,2017                                Université de Bordeaux
+ * Copyright (C) 2015,2017-2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -90,10 +90,8 @@ int main(int argc, char **argv)
 		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x);
 	}
 
-        ret = starpu_init(NULL);
-        STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-        ret = starpu_mpi_init_comm(NULL, NULL, 0, newcomm);
-        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+        ret = starpu_mpi_init_conf(NULL, NULL, 0, newcomm, NULL);
+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	if (newrank == 0)
 	{
@@ -148,7 +146,6 @@ int main(int argc, char **argv)
 	}
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 	MPI_Comm_free(&newcomm);
         MPI_Finalize();
 	return 0;

+ 3 - 6
mpi/examples/comm/mix_comm.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2015,2017                                CNRS
- * Copyright (C) 2015,2017                                Université de Bordeaux
+ * Copyright (C) 2015,2017-2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -87,10 +87,8 @@ int main(int argc, char **argv)
 		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x);
 	}
 
-        ret = starpu_init(NULL);
-        STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-        ret = starpu_mpi_init(NULL, NULL, 0);
-        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+        ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL);
+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	if (rank == 0)
 	{
@@ -180,7 +178,6 @@ int main(int argc, char **argv)
 	starpu_data_unregister(data[2]);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 	MPI_Comm_free(&newcomm);
         MPI_Finalize();
 	return 0;

+ 3 - 7
mpi/examples/complex/mpi_complex.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2012-2013,2015-2017                      CNRS
  * Copyright (C) 2013                                     Inria
- * Copyright (C) 2013-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2013-2015,2017-2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -43,10 +43,8 @@ int main(int argc, char **argv)
 	int ret;
 	int compare=0;
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes);
 
@@ -60,7 +58,6 @@ int main(int argc, char **argv)
 				fprintf(stderr, "We need at least 1 CPU.\n");
 		}
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		return 77;
 	}
 
@@ -108,7 +105,6 @@ int main(int argc, char **argv)
 	starpu_data_unregister(handle2);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return (rank == 0) ? !compare : 0;
 }

+ 4 - 9
mpi/examples/matrix_decomposition/mpi_cholesky.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012                                     Inria
- * Copyright (C) 2009-2012,2014-2015                      Université de Bordeaux
+ * Copyright (C) 2009-2012,2014-2015, 2018                      Université de Bordeaux
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  * Copyright (C) 2010                                     Mehdi Juhoor
  *
@@ -34,11 +34,8 @@ int main(int argc, char **argv)
 	int correctness;
 #endif
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes);
 	starpu_cublas_init();
@@ -50,7 +47,6 @@ int main(int argc, char **argv)
 			FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n");
 		}
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		return STARPU_TEST_SKIPPED;
 	}
 
@@ -61,6 +57,7 @@ int main(int argc, char **argv)
 
 	dw_cholesky(bmat, size/nblocks, rank, nodes, &timing, &flops);
 
+	starpu_cublas_shutdown();
 	starpu_mpi_shutdown();
 
 #ifndef STARPU_SIMGRID
@@ -70,8 +67,6 @@ int main(int argc, char **argv)
 #endif
 
 	matrix_free(&bmat, rank, nodes, 1);
-	starpu_cublas_shutdown();
-	starpu_shutdown();
 
 #ifndef STARPU_SIMGRID
 	assert(correctness);

+ 12 - 7
mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2015,2017,2018                           CNRS
  * Copyright (C) 2013                                     Inria
- * Copyright (C) 2009-2010,2014-2015,2017                 Université de Bordeaux
+ * Copyright (C) 2009-2010,2014-2015,2017-2018                 Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -35,7 +35,8 @@ static struct starpu_codelet cl11 =
 #endif
 	.nbuffers = 1,
 	.modes = {STARPU_RW},
-	.model = &chol_model_11
+	.model = &chol_model_11,
+	.color = 0xffff00,
 };
 
 static struct starpu_codelet cl21 =
@@ -46,9 +47,11 @@ static struct starpu_codelet cl21 =
 #elif defined(STARPU_SIMGRID)
 	.cuda_funcs = {(void*)1},
 #endif
+	.cuda_flags = {STARPU_CUDA_ASYNC},
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_RW},
-	.model = &chol_model_21
+	.model = &chol_model_21,
+	.color = 0x8080ff,
 };
 
 static struct starpu_codelet cl22 =
@@ -59,9 +62,11 @@ static struct starpu_codelet cl22 =
 #elif defined(STARPU_SIMGRID)
 	.cuda_funcs = {(void*)1},
 #endif
+	.cuda_flags = {STARPU_CUDA_ASYNC},
 	.nbuffers = 3,
 	.modes = {STARPU_R, STARPU_R, STARPU_RW | STARPU_COMMUTE},
-	.model = &chol_model_22
+	.model = &chol_model_22,
+	.color = 0x00ff00,
 };
 
 /*
@@ -157,6 +162,9 @@ void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing
 
 	starpu_task_wait_for_all();
 
+	starpu_mpi_barrier(MPI_COMM_WORLD);
+	end = starpu_timing_now();
+
 	for(x = 0; x < nblocks ; x++)
 	{
 		for (y = 0; y < nblocks; y++)
@@ -168,9 +176,6 @@ void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing
 	}
 	free(data_handles);
 
-	starpu_mpi_barrier(MPI_COMM_WORLD);
-	end = starpu_timing_now();
-
 	if (rank == 0)
 	{
 		*timing = end - start;

+ 4 - 8
mpi/examples/matrix_decomposition/mpi_cholesky_distributed.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012                                     Inria
- * Copyright (C) 2009-2011,2014-2015,2017                 Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2015,2017-2018                 Université de Bordeaux
  * Copyright (C) 2010-2013,2015,2017                      CNRS
  * Copyright (C) 2010                                     Mehdi Juhoor
  *
@@ -34,11 +34,8 @@ int main(int argc, char **argv)
 	int rank, nodes, ret;
 	double timing, flops;
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes);
 	starpu_cublas_init();
@@ -49,11 +46,10 @@ int main(int argc, char **argv)
 
 	dw_cholesky(bmat, size/nblocks, rank, nodes, &timing, &flops);
 
+	starpu_cublas_shutdown();
 	starpu_mpi_shutdown();
 
 	matrix_free(&bmat, rank, nodes, 0);
-	starpu_cublas_shutdown();
-	starpu_shutdown();
 
 	if (rank == 0)
 	{

+ 62 - 30
mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012                                     Inria
- * Copyright (C) 2009-2014                                Université de Bordeaux
+ * Copyright (C) 2009-2014, 2018                                Université de Bordeaux
  * Copyright (C) 2010-2013,2015,2017                      CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -23,6 +23,7 @@
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <cublas.h>
+#include <starpu_cublas_v2.h>
 #ifdef STARPU_HAVE_MAGMA
 #include "magma.h"
 #include "magma_lapack.h"
@@ -33,6 +34,11 @@
  * U22
  */
 
+#if defined(STARPU_USE_CUDA)
+static const float p1 =  1.0;
+static const float m1 = -1.0;
+#endif
+
 static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, void *_args)
 {
 	(void)_args;
@@ -49,10 +55,6 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, void
 	unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]);
 	unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]);
 
-#ifdef STARPU_USE_CUDA
-	cublasStatus st;
-#endif
-
 	switch (s)
 	{
 		case 0:
@@ -61,19 +63,16 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, void
 			break;
 #ifdef STARPU_USE_CUDA
 		case 1:
-#ifdef STARPU_HAVE_MAGMA
-			cublasSetKernelStream(starpu_cuda_get_local_stream());
-#endif
-			cublasSgemm('n', 't', dy, dx, dz,
-					-1.0f, left, ld21, right, ld12,
-					 1.0f, center, ld22);
-			st = cublasGetError();
-			if (STARPU_UNLIKELY(st != CUBLAS_STATUS_SUCCESS))
-				STARPU_CUBLAS_REPORT_ERROR(st);
-
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
+		{
+			cublasStatus_t status = cublasSgemm(starpu_cublas_get_local_handle(),
+					CUBLAS_OP_N, CUBLAS_OP_T, dy, dx, dz,
+					&m1, left, ld21, right, ld12,
+					&p1, center, ld22);
+			if (status != CUBLAS_STATUS_SUCCESS)
+				STARPU_CUBLAS_REPORT_ERROR(status);
 
 			break;
+		}
 #endif
 		default:
 			STARPU_ABORT();
@@ -113,6 +112,10 @@ static inline void chol_common_codelet_update_u21(void *descr[], int s, void *_a
 	unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]);
 	unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]);
 
+#ifdef STARPU_USE_CUDA
+	cublasStatus status;
+#endif
+
 	switch (s)
 	{
 		case 0:
@@ -120,11 +123,11 @@ static inline void chol_common_codelet_update_u21(void *descr[], int s, void *_a
 			break;
 #ifdef STARPU_USE_CUDA
 		case 1:
-#ifdef STARPU_HAVE_MAGMA
-			cublasSetKernelStream(starpu_cuda_get_local_stream());
-#endif
-			cublasStrsm('R', 'L', 'T', 'N', nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
+			status = cublasStrsm(starpu_cublas_get_local_handle(),
+					CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_T, CUBLAS_DIAG_NON_UNIT,
+					nx21, ny21, &p1, sub11, ld11, sub21, ld21);
+			if (status != CUBLAS_STATUS_SUCCESS)
+				STARPU_CUBLAS_REPORT_ERROR(status);
 			break;
 #endif
 		default:
@@ -197,36 +200,65 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, void *_a
 			{
 				int ret;
 				int info;
+			cudaStream_t stream = starpu_cuda_get_local_stream();
+#if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4)
+			cublasSetKernelStream(stream);
+			magmablasSetKernelStream(stream);
+#else
+			starpu_cublas_set_stream();
+#endif
 				ret = magma_spotrf_gpu(MagmaLower, nx, sub11, ld, &info);
 				if (ret != MAGMA_SUCCESS)
 				{
 					fprintf(stderr, "Error in Magma: %d\n", ret);
 					STARPU_ABORT();
 				}
+#if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4)
+			cudaError_t cures = cudaStreamSynchronize(stream);
+#else
 				cudaError_t cures = cudaThreadSynchronize();
+#endif
 				STARPU_ASSERT(!cures);
 			}
 #else
+			{
+
+			float *lambda11;
+			cublasStatus_t status;
+			cudaStream_t stream = starpu_cuda_get_local_stream();
+			cublasHandle_t handle = starpu_cublas_get_local_handle();
+			cudaHostAlloc((void **)&lambda11, sizeof(float), 0);
+
 			for (z = 0; z < nx; z++)
 			{
-				float lambda11;
-				cudaMemcpyAsync(&lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream());
-				cudaStreamSynchronize(starpu_cuda_get_local_stream());
+				cudaMemcpyAsync(lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, stream);
+				cudaStreamSynchronize(stream);
 
-				STARPU_ASSERT(lambda11 != 0.0f);
+				STARPU_ASSERT(*lambda11 != 0.0f);
 
-				lambda11 = sqrt(lambda11);
+				*lambda11 = sqrt(*lambda11);
 
-				cublasSetVector(1, sizeof(float), &lambda11, sizeof(float), &sub11[z+z*ld], sizeof(float));
+/*				cublasSetVector(1, sizeof(float), lambda11, sizeof(float), &sub11[z+z*ld], sizeof(float)); */
+				cudaMemcpyAsync(&sub11[z+z*ld], lambda11, sizeof(float), cudaMemcpyHostToDevice, stream);
+				float scal = 1.0f/(*lambda11);
 
-				cublasSscal(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
+				status = cublasSscal(handle,
+						     nx - z - 1, &scal, &sub11[(z+1)+z*ld], 1);
+				if (status != CUBLAS_STATUS_SUCCESS)
+					STARPU_CUBLAS_REPORT_ERROR(status);
 
-				cublasSsyr('U', nx - z - 1, -1.0f,
+				status = cublasSsyr(handle,
+						    CUBLAS_FILL_MODE_UPPER,
+						    nx - z - 1, &m1,
 							&sub11[(z+1)+z*ld], 1,
 							&sub11[(z+1)+(z+1)*ld], ld);
+				if (status != CUBLAS_STATUS_SUCCESS)
+					STARPU_CUBLAS_REPORT_ERROR(status);
 			}
 
-			cudaStreamSynchronize(starpu_cuda_get_local_stream());
+			cudaStreamSynchronize(stream);
+			cudaFreeHost(lambda11);
+			}
 #endif
 			break;
 #endif

+ 4 - 10
mpi/examples/matrix_mult/mm.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2016-2017                                CNRS
  * Copyright (C) 2016                                     Inria
- * Copyright (C) 2016-2017                                Université de Bordeaux
+ * Copyright (C) 2016-2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -296,19 +296,14 @@ static struct starpu_codelet gemm_cl =
 
 int main(int argc, char *argv[])
 {
-	/* Initializes the StarPU core */
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	/* Initializes the StarPU-MPI layer */
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	/* Initializes STarPU and the StarPU-MPI layer */
+	int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_ini_conft");
 
 	if (starpu_cpu_worker_get_count() == 0)
 	{
 		FPRINTF(stderr, "We need at least 1 CPU worker.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		return STARPU_TEST_SKIPPED;
 	}
 
@@ -387,7 +382,6 @@ int main(int argc, char *argv[])
 	}
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 	return 0;
 }
 

+ 4 - 7
mpi/examples/mpi_lu/plu_example.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012-2013                                Inria
- * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
+ * Copyright (C) 2010-2011,2013-2015,2017-2018                 Université de Bordeaux
  * Copyright (C) 2010-2013,2015-2018                      CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  *
@@ -425,6 +425,7 @@ int main(int argc, char **argv)
 {
 	int rank;
 	int world_size;
+	int ret;
 
 	/*
 	 *	Initialization
@@ -447,15 +448,12 @@ int main(int argc, char **argv)
 
 	parse_args(rank, argc, argv);
 
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	/* We disable sequential consistency in this example */
 	starpu_data_set_default_sequential_consistency_flag(0);
 
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
 	STARPU_ASSERT(p*q == world_size);
 
 	starpu_cublas_init();
@@ -594,7 +592,6 @@ int main(int argc, char **argv)
 
 	starpu_cublas_shutdown();
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 #if 0
 	MPI_Finalize();

+ 4 - 7
mpi/examples/mpi_lu/plu_implicit_example.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012-2013                                Inria
- * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
+ * Copyright (C) 2010-2011,2013-2015,2017-2018                 Université de Bordeaux
  * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  *
@@ -240,16 +240,14 @@ int main(int argc, char **argv)
 {
 	int rank;
 	int world_size;
+	int ret;
 
 	starpu_srand48((long int)time(NULL));
 
 	parse_args(argc, argv);
 
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size);
@@ -366,7 +364,6 @@ int main(int argc, char **argv)
 
 	starpu_cublas_shutdown();
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return 0;
 }

+ 3 - 7
mpi/examples/mpi_lu/plu_outofcore_example.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012-2014                                Inria
- * Copyright (C) 2010-2011,2013-2015,2017                 Université de Bordeaux
+ * Copyright (C) 2010-2011,2013-2015,2017-2018                 Université de Bordeaux
  * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  *
@@ -272,11 +272,8 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size);
@@ -401,7 +398,6 @@ int main(int argc, char **argv)
 
 	starpu_cublas_shutdown();
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return 0;
 }

+ 4 - 7
mpi/examples/stencil/stencil5.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2011-2018                                CNRS
- * Copyright (C) 2011-2017                                Université de Bordeaux
+ * Copyright (C) 2011-2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -100,11 +100,10 @@ int main(int argc, char **argv)
 	float mean=0;
 	float matrix[X][Y];
 	starpu_data_handle_t data_handles[X][Y];
+	int ret;
 
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
@@ -112,7 +111,6 @@ int main(int argc, char **argv)
 	{
 		FPRINTF(stderr, "We need at least 1 CPU worker.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		return 77;
 	}
 
@@ -254,7 +252,6 @@ int main(int argc, char **argv)
 	}
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	if (display)
 	{

+ 4 - 8
mpi/examples/stencil/stencil5_lb.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2018                                CNRS
- * Copyright (C) 2011,2013,2015-2017                      Université de Bordeaux
+ * Copyright (C) 2011,2013,2015-2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -147,14 +147,13 @@ int main(int argc, char **argv)
 	float mean=0;
 	float matrix[X][Y];
 	struct starpu_mpi_lb_conf itf;
+	int ret;
 
 	itf.get_neighbors = get_neighbors;
 	itf.get_data_unit_to_migrate = get_data_unit_to_migrate;
 
-	int ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
@@ -162,14 +161,12 @@ int main(int argc, char **argv)
 	{
 		FPRINTF(stderr, "Only works with 2 nodes\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		return 77;
 	}
 	if (starpu_cpu_worker_get_count() == 0)
 	{
 		FPRINTF(stderr, "We need at least 1 CPU worker.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		return 77;
 	}
 
@@ -276,7 +273,6 @@ int main(int argc, char **argv)
 	}
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	if (display)
 	{

+ 4 - 5
mpi/examples/user_datatype/my_interface.c

@@ -167,11 +167,10 @@ static starpu_ssize_t data_describe(void *data_interface, char *buf, size_t size
 	return snprintf(buf, size, "Data%d-%c", my_interface->d, my_interface->c);
 }
 
-static void *data_handle_to_pointer(starpu_data_handle_t handle, unsigned node)
+static void *data_to_pointer(void *data_interface, unsigned node)
 {
-	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
-
-	struct starpu_my_interface *my_interface = (struct starpu_my_interface *) starpu_data_get_interface_on_node(handle, node);
+	(void) node;
+	struct starpu_my_interface *my_interface = data_interface;
 
 	return (void*) &my_interface->d;
 }
@@ -211,7 +210,7 @@ static struct starpu_data_interface_ops interface_data_ops =
 	.footprint = data_footprint,
 	.interfaceid = STARPU_UNKNOWN_INTERFACE_ID,
 	.interface_size = sizeof(struct starpu_my_interface),
-	.handle_to_pointer = data_handle_to_pointer,
+	.to_pointer = data_to_pointer,
 	.pack_data = data_pack_data,
 	.unpack_data = data_unpack_data,
 	.describe = data_describe

+ 2 - 6
mpi/examples/user_datatype/user_datatype.c

@@ -31,10 +31,8 @@ int main(int argc, char **argv)
 	starpu_data_handle_t handle0;
 	starpu_data_handle_t handle1;
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes);
 
@@ -48,7 +46,6 @@ int main(int argc, char **argv)
 				fprintf(stderr, "We need at least 1 CPU.\n");
 		}
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		return 77;
 	}
 
@@ -107,7 +104,6 @@ int main(int argc, char **argv)
 	starpu_data_unregister(handle1);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	if (rank == 0)
 	{

+ 2 - 1
mpi/include/starpu_mpi.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2012,2014-2017                      Université de Bordeaux
+ * Copyright (C) 2009-2012,2014-2018                      Université de Bordeaux
  * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2016                                     Inria
  *
@@ -54,6 +54,7 @@ int starpu_mpi_barrier(MPI_Comm comm);
 
 int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
 
+int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm, struct starpu_conf *conf);
 int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm);
 int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi);
 int starpu_mpi_initialize(void) STARPU_DEPRECATED;

+ 2 - 1
mpi/src/Makefile.am

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 # Copyright (C) 2012                                     Inria
-# Copyright (C) 2010-2017                                CNRS
+# Copyright (C) 2010-2018                                CNRS
 # Copyright (C) 2009-2014, 2018                                Université de Bordeaux
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -72,6 +72,7 @@ noinst_HEADERS =					\
 	mpi/starpu_mpi_sync_data.h			\
 	mpi/starpu_mpi_comm.h				\
 	mpi/starpu_mpi_tag.h				\
+	mpi/starpu_mpi_driver.h				\
 	load_balancer/policy/data_movements_interface.h	\
 	load_balancer/policy/load_data_interface.h	\
 	load_balancer/policy/load_balancer_policy.h

+ 1 - 1
mpi/src/load_balancer/policy/data_movements_interface.c

@@ -260,7 +260,7 @@ static struct starpu_data_interface_ops interface_data_movements_ops =
 	.footprint = data_movements_footprint,
 	.interfaceid = STARPU_UNKNOWN_INTERFACE_ID,
 	.interface_size = sizeof(struct data_movements_interface),
-	.handle_to_pointer = NULL,
+	.to_pointer = NULL,
 	.pack_data = data_movements_pack_data,
 	.unpack_data = data_movements_unpack_data,
 	.describe = NULL

+ 1 - 1
mpi/src/load_balancer/policy/load_data_interface.c

@@ -243,7 +243,7 @@ static struct starpu_data_interface_ops interface_load_data_ops =
 	.footprint = load_data_footprint,
 	.interfaceid = STARPU_UNKNOWN_INTERFACE_ID,
 	.interface_size = sizeof(struct load_data_interface),
-	.handle_to_pointer = NULL,
+	.to_pointer = NULL,
 	.pack_data = load_data_pack_data,
 	.unpack_data = load_data_unpack_data,
 	.describe = NULL

+ 37 - 0
mpi/src/mpi/starpu_mpi_driver.h

@@ -0,0 +1,37 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2015-2018                                CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_MPI_DRIVER_H__
+#define __STARPU_MPI_DRIVER_H__
+
+#include <starpu.h>
+
+#ifdef STARPU_USE_MPI_MPI
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+void _starpu_mpi_driver_init(struct starpu_conf *conf);
+void _starpu_mpi_driver_shutdown();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // STARPU_USE_MPI_MPI
+#endif // __STARPU_MPI_DRIVER_H__

+ 96 - 4
mpi/src/mpi/starpu_mpi_mpi.c

@@ -3,7 +3,7 @@
  * Copyright (C) 2012-2013,2016-2017                      Inria
  * Copyright (C) 2009-2018                                Université de Bordeaux
  * Copyright (C) 2017                                     Guillaume Beauchamp
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -78,6 +78,12 @@ static starpu_pthread_t progress_thread;
 #endif
 static int running = 0;
 
+/* Driver taken by StarPU-MPI to process tasks when there is no requests to
+ * handle instead of polling endlessly */
+static struct starpu_driver *mpi_driver = NULL;
+static int mpi_driver_call_freq = 0;
+static int mpi_driver_task_freq = 0;
+
 #ifdef STARPU_SIMGRID
 static int wait_counter;
 static starpu_pthread_cond_t wait_counter_cond;
@@ -962,6 +968,7 @@ static void _starpu_mpi_test_detached_requests(void)
 		}
 		else
 		{
+			_STARPU_MPI_TRACE_POLLING_END();
 		     	struct _starpu_mpi_req *next_req;
 			next_req = _starpu_mpi_req_list_next(req);
 
@@ -993,6 +1000,7 @@ static void _starpu_mpi_test_detached_requests(void)
 			}
 
 			req = next_req;
+			_STARPU_MPI_TRACE_POLLING_BEGIN();
 		}
 
 		STARPU_PTHREAD_MUTEX_LOCK(&detached_requests_mutex);
@@ -1110,12 +1118,19 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	starpu_pthread_setname("MPI");
 
 #ifndef STARPU_SIMGRID
-	if (_starpu_mpi_thread_cpuid >= 0)
-		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
+	if (_starpu_mpi_thread_cpuid < 0)
+	{
+		_starpu_mpi_thread_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0);
+	}
+
+	if (starpu_bind_thread_on(_starpu_mpi_thread_cpuid, STARPU_THREAD_ACTIVE, "MPI") < 0)
+	{
+		_STARPU_DISP("No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n");
+	}
 	_starpu_mpi_do_initialize(argc_argv);
 	if (_starpu_mpi_thread_cpuid >= 0)
 		/* In case MPI changed the binding */
-		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
+		starpu_bind_thread_on(_starpu_mpi_thread_cpuid, STARPU_THREAD_ACTIVE, "MPI");
 #endif
 
 	_starpu_mpi_env_init();
@@ -1136,6 +1151,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 		_STARPU_ERROR("Your version of simgrid does not provide smpi_process_set_user_data, we can not continue without it\n");
 	}
 	smpi_process_set_user_data(tsd);
+        /* And wait for StarPU to get initialized, to come back to the same
+         * situation as native execution where that's always the case. */
+	starpu_wait_initialized();
 #endif
 
 	_starpu_mpi_comm_amounts_init(argc_argv->comm);
@@ -1149,6 +1167,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	_starpu_mpi_sync_data_init();
 	_starpu_mpi_datatype_init();
 
+	if (mpi_driver)
+		starpu_driver_init(mpi_driver);
+
 #ifdef STARPU_SIMGRID
 	starpu_pthread_wait_init(&wait);
 	starpu_pthread_queue_init(&dontsleep);
@@ -1172,6 +1193,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
 
  	int envelope_request_submitted = 0;
+	int mpi_driver_loop_counter = 0;
+	int mpi_driver_task_counter = 0;
+	_STARPU_MPI_TRACE_POLLING_BEGIN();
 
 	while (running || posted_requests || !(_starpu_mpi_req_list_empty(&ready_recv_requests)) || !(_starpu_mpi_req_prio_list_empty(&ready_send_requests)) || !(_starpu_mpi_req_list_empty(&detached_requests)))// || !(_starpu_mpi_early_request_count()) || !(_starpu_mpi_sync_data_count()))
 	{
@@ -1198,6 +1222,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 		unsigned n = 0;
 		while (!_starpu_mpi_req_list_empty(&ready_recv_requests))
 		{
+			_STARPU_MPI_TRACE_POLLING_END();
 			struct _starpu_mpi_req *req;
 
 			if (n++ == nready_process)
@@ -1238,6 +1263,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 			STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
 		}
 
+		_STARPU_MPI_TRACE_POLLING_BEGIN();
+
 		/* If there is no currently submitted envelope_request submitted to
                  * catch envelopes from senders, and there is some pending
                  * receive requests on our side, we resubmit a header request. */
@@ -1264,6 +1291,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
 			if (flag)
 			{
+				_STARPU_MPI_TRACE_POLLING_END();
 				_STARPU_MPI_COMM_FROM_DEBUG(envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, envelope_status.MPI_SOURCE, _STARPU_MPI_TAG_ENVELOPE, envelope->data_tag, envelope_comm);
 				_STARPU_MPI_DEBUG(4, "Envelope received with mode %d\n", envelope->mode);
 				if (envelope->mode == _STARPU_MPI_ENVELOPE_SYNC_READY)
@@ -1355,9 +1383,34 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 					}
 				}
 				envelope_request_submitted = 0;
+				_STARPU_MPI_TRACE_POLLING_BEGIN();
 			}
 			else
 			{
+				/* A call is made to driver_run_once only when
+				 * the progression thread have gone through the
+				 * communication progression loop
+				 * mpi_driver_call_freq times. It is
+				 * interesting to tune the
+				 * STARPU_MPI_DRIVER_CALL_FREQUENCY
+				 * depending on whether the user wants
+				 * reactivity or computing power from the MPI
+				 * progression thread. */
+				if ( mpi_driver && ( ++mpi_driver_loop_counter == mpi_driver_call_freq ))
+				{
+					mpi_driver_loop_counter = 0;
+					mpi_driver_task_counter = 0;
+					while (mpi_driver_task_counter++ < mpi_driver_task_freq)
+					{
+						_STARPU_MPI_TRACE_DRIVER_RUN_BEGIN();
+						STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
+						_STARPU_MPI_DEBUG(4, "running once mpi driver\n");
+						starpu_driver_run_once(mpi_driver);
+						STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
+						_STARPU_MPI_TRACE_DRIVER_RUN_END();
+					}
+				}
+
 				//_STARPU_MPI_DEBUG(4, "Nothing received, continue ..\n");
 			}
 		}
@@ -1368,6 +1421,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 #endif
 	}
 
+	_STARPU_MPI_TRACE_POLLING_END();
 	if (envelope_request_submitted)
 	{
 		_starpu_mpi_comm_cancel_recv();
@@ -1539,4 +1593,42 @@ int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int
 	return 0;
 }
 
+void _starpu_mpi_driver_init(struct starpu_conf *conf)
+{
+	/* We only initialize the driver if the environment variable
+	 * STARPU_MPI_DRIVER_CALL_FREQUENCY is defined by the user. If this environment
+	 * variable is not defined or defined at a value lower than or equal to zero,
+	 * StarPU-MPI will not use a driver. */
+	int driver_env = starpu_get_env_number_default("STARPU_MPI_DRIVER_CALL_FREQUENCY", 0);
+	if (driver_env > 0)
+	{
+#ifdef STARPU_SIMGRID
+		_STARPU_DISP("Warning: MPI driver is not supported with simgrid, this will be disabled");
+		return;
+#endif
+		mpi_driver_call_freq = driver_env;
+
+		_STARPU_MALLOC(mpi_driver, sizeof(struct starpu_driver));
+		mpi_driver->type = STARPU_CPU_WORKER;
+		mpi_driver->id.cpu_id = 0;
+
+		conf->not_launched_drivers = mpi_driver;
+		conf->n_not_launched_drivers = 1;
+
+		int tasks_freq_env = starpu_get_env_number_default("STARPU_MPI_DRIVER_TASK_FREQUENCY", 0);
+		if (tasks_freq_env > 0)
+			mpi_driver_task_freq = tasks_freq_env;
+	}
+}
+
+void _starpu_mpi_driver_shutdown()
+{
+	if (mpi_driver)
+	{
+		starpu_driver_deinit(mpi_driver);
+		free(mpi_driver);
+		mpi_driver = NULL;
+	}
+}
+
 #endif /* STARPU_USE_MPI_MPI */

+ 10 - 5
mpi/src/nmad/starpu_mpi_nmad.c

@@ -477,15 +477,20 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 {
 	struct _starpu_mpi_argc_argv *argc_argv = (struct _starpu_mpi_argc_argv *) arg;
 
-	starpu_pthread_setname("MPI");
-
 #ifndef STARPU_SIMGRID
-	if (_starpu_mpi_thread_cpuid >= 0)
-		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
+	if (_starpu_mpi_thread_cpuid < 0)
+	{
+		_starpu_mpi_thread_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0);
+	}
+
+	if (starpu_bind_thread_on(_starpu_mpi_thread_cpuid, STARPU_THREAD_ACTIVE, "MPI") < 0)
+	{
+		_STARPU_DISP("No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n");
+	}
 	_starpu_mpi_do_initialize(argc_argv);
 	if (_starpu_mpi_thread_cpuid >= 0)
 		/* In case MPI changed the binding */
-		_starpu_bind_thread_on_cpu(_starpu_mpi_thread_cpuid, STARPU_NOWORKERID);
+		starpu_bind_thread_on(_starpu_mpi_thread_cpuid, STARPU_THREAD_ACTIVE, "MPI");
 #endif
 
 	_starpu_mpi_env_init();

+ 2 - 2
mpi/src/starpu_mpi_datatype.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2012,2015                           Inria
- * Copyright (C) 2009-2011,2014-2015                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2015, 2018                      Université de Bordeaux
  * Copyright (C) 2010-2017                                CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -301,7 +301,7 @@ int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatyp
 		table->free_datatype_func = free_datatype_func;
 		HASH_ADD_INT(_starpu_mpi_datatype_funcs_table, id, table);
 	}
-	STARPU_ASSERT_MSG(handle->ops->handle_to_pointer, "The data interface must define the operation 'handle_to_pointer'\n");
+	STARPU_ASSERT_MSG(handle->ops->handle_to_pointer || handle->ops->to_pointer, "The data interface must define the operation 'to_pointer'\n");
 	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex);
 	return 0;
 }

+ 40 - 12
mpi/src/starpu_mpi_fxt.h

@@ -1,8 +1,8 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2012-2013                                Inria
+ * Copyright (C) 2012-2013,2016                           Inria
  * Copyright (C) 2010-2011,2014,2017                      Université de Bordeaux
- * Copyright (C) 2010,2012,2014-2017                      CNRS
+ * Copyright (C) 2010,2012,2014-2018                      CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -37,16 +37,6 @@ extern "C"
 #define _STARPU_MPI_FUT_IRECV_SUBMIT_END		0x5207
 #define _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN		0x5208
 #define _STARPU_MPI_FUT_ISEND_COMPLETE_END		0x5209
-#define _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN		0x5210
-#define _STARPU_MPI_FUT_IRECV_COMPLETE_END		0x5211
-#define _STARPU_MPI_FUT_SLEEP_BEGIN			0x5212
-#define _STARPU_MPI_FUT_SLEEP_END			0x5213
-#define _STARPU_MPI_FUT_DTESTING_BEGIN			0x5214
-#define _STARPU_MPI_FUT_DTESTING_END			0x5215
-#define _STARPU_MPI_FUT_UTESTING_BEGIN			0x5216
-#define _STARPU_MPI_FUT_UTESTING_END			0x5217
-#define _STARPU_MPI_FUT_UWAIT_BEGIN			0x5218
-#define _STARPU_MPI_FUT_UWAIT_END			0x5219
 #define _STARPU_MPI_FUT_DATA_SET_RANK			0x521a
 #define _STARPU_MPI_FUT_IRECV_TERMINATED		0x521b
 #define _STARPU_MPI_FUT_ISEND_TERMINATED		0x521c
@@ -54,8 +44,24 @@ extern "C"
 #define _STARPU_MPI_FUT_TESTING_DETACHED_END		0x521e
 #define _STARPU_MPI_FUT_TEST_BEGIN			0x521f
 #define _STARPU_MPI_FUT_TEST_END			0x5220
+#define _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN		0x520a
+#define _STARPU_MPI_FUT_IRECV_COMPLETE_END		0x520b
+#define _STARPU_MPI_FUT_SLEEP_BEGIN			0x520c
+#define _STARPU_MPI_FUT_SLEEP_END			0x520d
+#define _STARPU_MPI_FUT_DTESTING_BEGIN			0x520e
+#define _STARPU_MPI_FUT_DTESTING_END			0x520f
+#define _STARPU_MPI_FUT_UTESTING_BEGIN			0x5210
+#define _STARPU_MPI_FUT_UTESTING_END			0x5211
+#define _STARPU_MPI_FUT_UWAIT_BEGIN			0x5212
+#define _STARPU_MPI_FUT_UWAIT_END			0x5213
+#define _STARPU_MPI_FUT_POLLING_BEGIN			0x5214
+#define _STARPU_MPI_FUT_POLLING_END			0x5215
+#define _STARPU_MPI_FUT_DRIVER_RUN_BEGIN		0x5216
+#define _STARPU_MPI_FUT_DRIVER_RUN_END			0x5217
 
 #ifdef STARPU_USE_FXT
+static int trace_loop = 0;
+
 #define _STARPU_MPI_TRACE_START(rank, worldsize)	\
 	FUT_DO_PROBE3(_STARPU_MPI_FUT_START, (rank), (worldsize), _starpu_gettid());
 #define _STARPU_MPI_TRACE_STOP(rank, worldsize)	\
@@ -119,6 +125,24 @@ extern "C"
 #define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag)		do {} while(0)
 #define _STARPU_MPI_TRACE_TEST_END(peer, data_tag)		do {} while(0)
 #endif
+#define _STARPU_MPI_TRACE_POLLING_BEGIN()					\
+	if(!trace_loop) {						\
+		trace_loop = 1;							\
+		FUT_DO_PROBE1(_STARPU_MPI_FUT_POLLING_BEGIN, _starpu_gettid()); \
+	}
+#define _STARPU_MPI_TRACE_POLLING_END()	\
+	if(trace_loop) {							\
+		trace_loop = 0;							\
+		FUT_DO_PROBE1(_STARPU_MPI_FUT_POLLING_END, _starpu_gettid());	\
+	}
+#define _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN()	\
+	FUT_DO_PROBE1(_STARPU_MPI_FUT_DRIVER_RUN_BEGIN,  _starpu_gettid());
+#define _STARPU_MPI_TRACE_DRIVER_RUN_END()	\
+	FUT_DO_PROBE1(_STARPU_MPI_FUT_DRIVER_RUN_END, _starpu_gettid());
+#define _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN()	\
+	FUT_DO_PROBE1(_STARPU_MPI_FUT_DRIVER_RUN_BEGIN,  _starpu_gettid());
+#define _STARPU_MPI_TRACE_DRIVER_RUN_END()	\
+	FUT_DO_PROBE1(_STARPU_MPI_FUT_DRIVER_RUN_END, _starpu_gettid());
 #define TRACE
 #else
 #define _STARPU_MPI_TRACE_START(a, b)				do {} while(0);
@@ -148,6 +172,10 @@ extern "C"
 #define _STARPU_MPI_TRACE_TESTING_DETACHED_END()		do {} while(0)
 #define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag)		do {} while(0)
 #define _STARPU_MPI_TRACE_TEST_END(peer, data_tag)		do {} while(0)
+#define _STARPU_MPI_TRACE_POLLING_BEGIN()			do {} while(0);
+#define _STARPU_MPI_TRACE_POLLING_END()				do {} while(0);
+#define _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN()			do {} while(0);
+#define _STARPU_MPI_TRACE_DRIVER_RUN_END()			do {} while(0);
 #endif
 
 #ifdef __cplusplus

+ 37 - 1
mpi/src/starpu_mpi_init.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2009-2018                                Université de Bordeaux
  * Copyright (C) 2016                                     Inria
  *
@@ -35,12 +35,14 @@
 #if defined(STARPU_USE_MPI_MPI)
 #include <mpi/starpu_mpi_comm.h>
 #include <mpi/starpu_mpi_tag.h>
+#include <mpi/starpu_mpi_driver.h>
 #endif
 
 #ifdef STARPU_SIMGRID
 static int _mpi_world_size;
 static int _mpi_world_rank;
 #endif
+static int _mpi_initialized_starpu;
 
 static void _starpu_mpi_print_thread_level_support(int thread_level, char *msg)
 {
@@ -71,6 +73,7 @@ void _starpu_mpi_do_initialize(struct _starpu_mpi_argc_argv *argc_argv)
 {
 	if (argc_argv->initialize_mpi)
 	{
+		STARPU_ASSERT_MSG(argc_argv->comm == MPI_COMM_WORLD, "It does not make sense to ask StarPU-MPI to initialize MPI while a non-world communicator was given");
 		int thread_support;
 #ifdef STARPU_USE_MPI_NMAD
 		/* strat_prio is preferred for StarPU instead of default strat_aggreg */
@@ -177,6 +180,36 @@ int starpu_mpi_initialize_extended(int *rank, int *world_size)
 #endif
 }
 
+int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm, struct starpu_conf *conf)
+{
+	struct starpu_conf localconf;
+	if (!conf)
+	{
+		starpu_conf_init(&localconf);
+		conf = &localconf;
+	}
+
+#if defined(STARPU_USE_MPI_MPI)
+	_starpu_mpi_driver_init(conf);
+
+	if (starpu_get_env_number_default("STARPU_MPI_DRIVER_CALL_FREQUENCY", 0) <= 0)
+#endif
+	{
+		/* Reserve a core for our progression thread */
+		if (conf->reserve_ncpus == -1)
+			conf->reserve_ncpus = 1;
+		else
+			conf->reserve_ncpus++;
+	}
+
+	int ret = starpu_init(conf);
+	if (ret < 0)
+		return ret;
+	_mpi_initialized_starpu = 1;
+
+	return starpu_mpi_init_comm(argc, argv, initialize_mpi, comm);
+}
+
 int starpu_mpi_shutdown(void)
 {
 	void *value;
@@ -197,7 +230,10 @@ int starpu_mpi_shutdown(void)
 #if defined(STARPU_USE_MPI_MPI)
 	_starpu_mpi_tag_shutdown();
 	_starpu_mpi_comm_shutdown();
+	_starpu_mpi_driver_shutdown();
 #endif
+	if (_mpi_initialized_starpu)
+		starpu_shutdown();
 
 	return 0;
 }

+ 8 - 0
mpi/src/starpu_mpi_task_insert.c

@@ -452,6 +452,14 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
                 {
                         (void)va_arg(varg_list_copy, int);
                 }
+		else if (arg_type==STARPU_TASK_SYNCHRONOUS)
+                {
+                        (void)va_arg(varg_list_copy, int);
+                }
+		else if (arg_type==STARPU_HANDLES_SEQUENTIAL_CONSISTENCY)
+                {
+                        (void)va_arg(varg_list_copy, char *);
+                }
 		else
 		{
 			STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type);

+ 10 - 0
mpi/src/starpu_mpi_task_insert_fortran.c

@@ -298,6 +298,16 @@ int _fstarpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_no
 			arg_i++;
 			/* int* */
 		}
+		else if (arg_type==STARPU_TASK_SYNCHRONOUS)
+		{
+			arg_i++;
+			/* int* */
+		}
+		else if (arg_type==STARPU_HANDLES_SEQUENTIAL_CONSISTENCY)
+		{
+			arg_i++;
+			/* char* */
+		}
 		else
 		{
 			STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type);

+ 5 - 3
mpi/tests/Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2010-2017                                CNRS
+# Copyright (C) 2010-2018                                CNRS
 # Copyright (C) 2009-2018                                Université de Bordeaux
 # Copyright (C) 2013                                     Thibaut Lambert
 #
@@ -158,7 +158,8 @@ starpu_mpi_TESTS +=				\
 	tags_checking				\
 	sync					\
 	gather					\
-	gather2
+	gather2					\
+	driver
 
 if STARPU_USE_MPI_MPI
 starpu_mpi_TESTS +=				\
@@ -228,7 +229,8 @@ noinst_PROGRAMS =				\
 	policy_selection2			\
 	early_request				\
 	starpu_redefine				\
-	load_balancer
+	load_balancer				\
+	driver
 
 XFAIL_TESTS=					\
 	policy_register_toomany			\

+ 3 - 7
mpi/tests/block_interface.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2011,2014-2015,2017                 Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2015,2017-2018                 Université de Bordeaux
  * Copyright (C) 2010-2012,2014-2015,2017                 CNRS
  * Copyright (C) 2013                                     Inria
  *
@@ -32,10 +32,8 @@ int main(int argc, char **argv)
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
@@ -46,7 +44,6 @@ int main(int argc, char **argv)
 			FPRINTF(stderr, "We need at least 2 processes.\n");
 
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
@@ -143,7 +140,6 @@ int main(int argc, char **argv)
 		free(block);
 	}
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	if (!mpi_init)
 		MPI_Finalize();

+ 3 - 7
mpi/tests/block_interface_pinned.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2011,2014-2015,2017                 Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2015,2017-2018                 Université de Bordeaux
  * Copyright (C) 2010-2012,2015,2017                      CNRS
  * Copyright (C) 2013                                     Inria
  *
@@ -32,10 +32,8 @@ int main(int argc, char **argv)
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
@@ -46,7 +44,6 @@ int main(int argc, char **argv)
 			FPRINTF(stderr, "We need at least 2 processes.\n");
 
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		if (!mpi_init)
 			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
@@ -148,7 +145,6 @@ int main(int argc, char **argv)
 	fflush(stdout);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	if (!mpi_init)
 		MPI_Finalize();

+ 2 - 5
mpi/tests/broadcast.c

@@ -47,10 +47,8 @@ int main(int argc, char **argv)
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
@@ -96,7 +94,6 @@ int main(int argc, char **argv)
 	starpu_data_unregister(handle);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 	if (!mpi_init)
 		MPI_Finalize();
 

+ 3 - 6
mpi/tests/cache.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2015-2017                                CNRS
- * Copyright (C) 2015,2017                                Université de Bordeaux
+ * Copyright (C) 2015,2017-2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -80,10 +80,8 @@ int main(int argc, char **argv)
 	unsigned val = 42;
 	starpu_data_handle_t data;
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 
 	if (starpu_mpi_cache_is_enabled() == 0)
@@ -111,7 +109,6 @@ int main(int argc, char **argv)
 
 skip:
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return starpu_mpi_cache_is_enabled() == 0 ? STARPU_TEST_SKIPPED : 0;
 }

+ 3 - 6
mpi/tests/cache_disable.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2015-2017                                CNRS
- * Copyright (C) 2015,2017                                Université de Bordeaux
+ * Copyright (C) 2015,2017-2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -43,10 +43,8 @@ int main(int argc, char **argv)
 	int in_cache;
 	int cache;
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 
 	cache = starpu_mpi_cache_is_enabled();
@@ -98,7 +96,6 @@ int main(int argc, char **argv)
 
 skip:
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return cache == 0 ? STARPU_TEST_SKIPPED : 0;
 }

+ 3 - 9
mpi/tests/callback.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2013-2015,2017                           CNRS
- * Copyright (C) 2014-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2014-2015,2017-2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -63,13 +63,8 @@ int main(int argc, char **argv)
 	int y=12;
 	int rank, size;
 
-	ret = starpu_initialize(NULL, &argc, &argv);
-	if (ret == -ENODEV)
-		return STARPU_TEST_SKIPPED;
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
@@ -118,7 +113,6 @@ int main(int argc, char **argv)
 	STARPU_ASSERT_MSG(y == expected_y, "y should be equal to %d and not %d\n", expected_y, y);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return EXIT_SUCCESS;
 }

+ 3 - 7
mpi/tests/datatypes.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2013-2017                                CNRS
- * Copyright (C) 2014-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2014-2015,2017-2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -582,10 +582,8 @@ int main(int argc, char **argv)
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
@@ -596,7 +594,6 @@ int main(int argc, char **argv)
 			FPRINTF(stderr, "We need at least 2 processes.\n");
 
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		if (!mpi_init)
 			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
@@ -611,7 +608,6 @@ int main(int argc, char **argv)
 	exchange_csr(rank, &error);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	if (!mpi_init)
 		MPI_Finalize();

+ 131 - 0
mpi/tests/driver.c

@@ -0,0 +1,131 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2017, 2018 CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <math.h>
+#include "helper.h"
+
+int main(int argc, char **argv)
+{
+	int ret, rank, size, i;
+	starpu_data_handle_t tab_handle[4];
+	int values[4];
+	starpu_mpi_req request[2] = {NULL, NULL};
+	int mpi_init;
+
+	setenv("STARPU_MPI_DRIVER_CALL_FREQUENCY", "1", 1);
+	setenv("STARPU_MPI_DRIVER_TASK_FREQUENCY", "10", 1);
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
+
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
+
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	if (size%2 != 0)
+	{
+		FPRINTF_MPI(stderr, "We need a even number of processes.\n");
+		starpu_mpi_shutdown();
+		if (!mpi_init)
+			MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
+	}
+
+	for(i=0 ; i<4 ; i++)
+	{
+		if (i<3 || rank%2)
+		{
+			// all data are registered on all nodes, but the 4th data which is not registered on the receiving node
+			values[i] = (rank+1) * (i+1);
+			starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&values[i], sizeof(values[i]));
+			starpu_mpi_data_register(tab_handle[i], i, rank);
+		}
+	}
+
+	int other_rank = rank%2 == 0 ? rank+1 : rank-1;
+
+	FPRINTF_MPI(stderr, "rank %d exchanging with rank %d\n", rank, other_rank);
+
+	if (rank%2)
+	{
+		FPRINTF_MPI(stderr, "Sending values %d and %d to node %d\n", values[0], values[3], other_rank);
+		// this data will be received as an early registered data
+		starpu_mpi_isend(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD);
+		// this data will be received as an early UNregistered data
+		starpu_mpi_isend(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD);
+
+		starpu_mpi_send(tab_handle[1], other_rank, 1, MPI_COMM_WORLD);
+		starpu_mpi_recv(tab_handle[2], other_rank, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+	}
+	else
+	{
+		starpu_mpi_recv(tab_handle[1], other_rank, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+		starpu_mpi_send(tab_handle[2], other_rank, 2, MPI_COMM_WORLD);
+
+		// we register the data
+		starpu_variable_data_register(&tab_handle[3], -1, (uintptr_t)NULL, sizeof(int));
+		starpu_mpi_data_register(tab_handle[3], 3, rank);
+		starpu_mpi_irecv(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD);
+		starpu_mpi_irecv(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD);
+	}
+
+	int finished=0;
+	while (!finished)
+	{
+		for(i=0 ; i<2 ; i++)
+		{
+			if (request[i])
+			{
+				int flag;
+				MPI_Status status;
+				starpu_mpi_test(&request[i], &flag, &status);
+				if (flag)
+					FPRINTF_MPI(stderr, "request[%d] = %d %p\n", i, flag, request[i]);
+			}
+		}
+		finished = request[0] == NULL && request[1] == NULL;
+	}
+
+	if (rank%2 == 0)
+	{
+		void *ptr0;
+		void *ptr3;
+
+		starpu_data_acquire(tab_handle[0], STARPU_RW);
+		ptr0 = starpu_data_get_local_ptr(tab_handle[0]);
+		starpu_data_release(tab_handle[0]);
+
+		starpu_data_acquire(tab_handle[3], STARPU_RW);
+		ptr3 = starpu_data_get_local_ptr(tab_handle[3]);
+		starpu_data_release(tab_handle[3]);
+
+		ret = (*((int *)ptr0) == (other_rank+1)*1) && (*((int *)ptr3) == (other_rank+1)*4);
+		ret = !ret;
+		FPRINTF_MPI(stderr, "[%s] Received values %d and %d from node %d\n", ret?"FAILURE":"SUCCESS", *((int *)ptr0), *((int *)ptr3), other_rank);
+	}
+
+	for(i=0 ; i<4 ; i++)
+		starpu_data_unregister(tab_handle[i]);
+
+	starpu_mpi_shutdown();
+
+	if (!mpi_init)
+		MPI_Finalize();
+
+	return 0;
+}

+ 3 - 7
mpi/tests/early_request.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2015-2017                                CNRS
- * Copyright (C) 2015-2017                                Université de Bordeaux
+ * Copyright (C) 2015-2018                                Université de Bordeaux
  * Copyright (C) 2015                                     Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -189,10 +189,8 @@ int main(int argc, char * argv[])
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &mpi_size);
@@ -202,7 +200,6 @@ int main(int argc, char * argv[])
 		if (mpi_rank == 0)
 			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		if (!mpi_init)
 			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
@@ -241,7 +238,6 @@ int main(int argc, char * argv[])
 	free(el_right);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	if (!mpi_init)
 		MPI_Finalize();

+ 3 - 7
mpi/tests/gather.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2013,2015,2017                           CNRS
- * Copyright (C) 2014-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2014-2015,2017-2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,10 +27,8 @@ int main(int argc, char **argv)
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
@@ -39,7 +37,6 @@ int main(int argc, char **argv)
 	{
 		FPRINTF(stderr, "We need more than 2 processes.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		if (!mpi_init)
 			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
@@ -72,7 +69,6 @@ int main(int argc, char **argv)
 	}
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 	if (!mpi_init)
 		MPI_Finalize();
 

+ 3 - 7
mpi/tests/gather2.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2013,2015,2017                           CNRS
- * Copyright (C) 2014-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2014-2015,2017-2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,10 +25,8 @@ int main(int argc, char **argv)
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
@@ -37,7 +35,6 @@ int main(int argc, char **argv)
 	{
 		FPRINTF(stderr, "We need more than 2 processes.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		if (!mpi_init)
 			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
@@ -94,7 +91,6 @@ int main(int argc, char **argv)
 	}
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 	if (!mpi_init)
 		MPI_Finalize();
 

+ 3 - 6
mpi/tests/insert_task.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2015,2017,2018                           CNRS
  * Copyright (C) 2013                                     Inria
- * Copyright (C) 2011,2013-2015,2017                      Université de Bordeaux
+ * Copyright (C) 2011,2013-2015,2017-2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -55,10 +55,8 @@ int main(int argc, char **argv)
 	unsigned matrix[X][Y];
 	starpu_data_handle_t data_handles[X][Y];
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
@@ -126,7 +124,6 @@ int main(int argc, char **argv)
 		}
 	}
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 #if 0
 	for(x = 0; x < X; x++)

+ 3 - 6
mpi/tests/insert_task_block.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2015,2017,2018                           CNRS
  * Copyright (C) 2013                                     Inria
- * Copyright (C) 2013-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2013-2015,2017-2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -74,10 +74,8 @@ int main(int argc, char **argv)
 	unsigned matrix[SIZE*SIZE];
 	starpu_data_handle_t data_handles[SIZE][SIZE];
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize_extended");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
@@ -150,7 +148,6 @@ int main(int argc, char **argv)
 	}
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 #if 1
 	for(x = 0; x < SIZE; x++)

+ 3 - 6
mpi/tests/insert_task_compute.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2013-2017                                CNRS
- * Copyright (C) 2014-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2014-2015,2017-2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -45,10 +45,8 @@ int test(int rank, int node, int *before, int *after, int task_insert, int data_
 	struct starpu_data_descr descrs[2];
 	int barrier_ret;
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	if (starpu_cpu_worker_get_count() <= 0)
 	{
@@ -206,7 +204,6 @@ nodata:
 	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
 	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return ret == -ENODEV ? ret : !ok;
 }

+ 3 - 7
mpi/tests/insert_task_count.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009-2010,2014-2017                      Université de Bordeaux
+ * Copyright (C) 2009-2010,2014-2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -57,10 +57,8 @@ int main(int argc, char **argv)
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
@@ -75,7 +73,6 @@ int main(int argc, char **argv)
 				FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n");
 		}
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		if (!mpi_init)
 			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
@@ -109,7 +106,6 @@ int main(int argc, char **argv)
 	starpu_task_wait_for_all();
 	starpu_data_unregister(token_handle);
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	FPRINTF_MPI(stderr, "Final value for token %d\n", token);
 

+ 3 - 7
mpi/tests/insert_task_dyn_handles.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2017                                CNRS
- * Copyright (C) 2015,2017                                Université de Bordeaux
+ * Copyright (C) 2015,2017-2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -66,10 +66,8 @@ int main(int argc, char **argv)
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 
@@ -78,7 +76,6 @@ int main(int argc, char **argv)
 		if (rank == 0)
 			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		return STARPU_TEST_SKIPPED;
 	}
 
@@ -168,7 +165,6 @@ enodev:
 	}
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 	if (!mpi_init)
 		MPI_Finalize();
 	return ret;

+ 3 - 6
mpi/tests/insert_task_node_choice.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2015,2017                           CNRS
- * Copyright (C) 2015,2017                                Université de Bordeaux
+ * Copyright (C) 2015,2017-2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -49,10 +49,8 @@ int main(int argc, char **argv)
 	starpu_data_handle_t data_handlesx0;
 	starpu_data_handle_t data_handlesx1;
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
@@ -107,7 +105,6 @@ int main(int argc, char **argv)
 
 end:
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return 0;
 }

+ 3 - 7
mpi/tests/insert_task_owner.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2017                                CNRS
  * Copyright (C) 2013                                     Inria
- * Copyright (C) 2013-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2013-2015,2017-2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -81,10 +81,8 @@ int main(int argc, char **argv)
 	starpu_data_handle_t data_handlesx0 = NULL;
 	starpu_data_handle_t data_handlesx1 = NULL;
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
@@ -93,7 +91,6 @@ int main(int argc, char **argv)
 		if (rank == 0)
 			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		return STARPU_TEST_SKIPPED;
 	}
 
@@ -183,7 +180,6 @@ int main(int argc, char **argv)
 
 end:
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return 0;
 }

+ 3 - 7
mpi/tests/insert_task_owner2.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2017                                CNRS
  * Copyright (C) 2013                                     Inria
- * Copyright (C) 2012-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2012-2015,2017-2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -51,10 +51,8 @@ int main(int argc, char **argv)
 	int i, ret=0;
 	starpu_data_handle_t data_handles[4];
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
@@ -63,7 +61,6 @@ int main(int argc, char **argv)
 		if (rank == 0)
 			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		return STARPU_TEST_SKIPPED;
 	}
 
@@ -134,7 +131,6 @@ int main(int argc, char **argv)
 
 	free(values);
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return (rank == 0) ? ret : 0;
 }

+ 3 - 7
mpi/tests/insert_task_owner_data.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2017                                CNRS
  * Copyright (C) 2013                                     Inria
- * Copyright (C) 2012-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2012-2015,2017-2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -46,10 +46,8 @@ int main(int argc, char **argv)
 	starpu_data_handle_t data_handles[2];
 	int values[2];
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
@@ -58,7 +56,6 @@ int main(int argc, char **argv)
 		if (rank == 0)
 			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		return STARPU_TEST_SKIPPED;
 	}
 
@@ -114,7 +111,6 @@ int main(int argc, char **argv)
 	starpu_data_unregister(data_handles[1]);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return ret;
 }

+ 3 - 6
mpi/tests/insert_task_recv_cache.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2018                                CNRS
- * Copyright (C) 2014-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2014-2015,2017-2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -60,10 +60,8 @@ void test_cache(int rank, char *enabled, size_t *comm_amount)
 	FPRINTF_MPI(stderr, "Testing with STARPU_MPI_CACHE=%s\n", enabled);
 	setenv("STARPU_MPI_CACHE", enabled, 1);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	for(i = 0; i < 2; i++)
 	{
@@ -118,7 +116,6 @@ void test_cache(int rank, char *enabled, size_t *comm_amount)
 
 	starpu_mpi_comm_amounts_retrieve(comm_amount);
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 }
 
 int main(int argc, char **argv)

+ 3 - 6
mpi/tests/insert_task_sent_cache.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011-2018                                CNRS
  * Copyright (C) 2013                                     Inria
- * Copyright (C) 2013-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2013-2015,2017-2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -60,10 +60,8 @@ void test_cache(int rank, char *enabled, size_t *comm_amount)
 
 	setenv("STARPU_MPI_CACHE", enabled, 1);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	for(i = 0; i < 2; i++)
 	{
@@ -125,7 +123,6 @@ void test_cache(int rank, char *enabled, size_t *comm_amount)
 
 	starpu_mpi_comm_amounts_retrieve(comm_amount);
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 }
 
 int main(int argc, char **argv)

+ 3 - 6
mpi/tests/insert_task_seq.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2014,2017,2018                           CNRS
- * Copyright (C) 2017                                     Université de Bordeaux
+ * Copyright (C) 2017-2018                                     Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -68,10 +68,8 @@ void dotest(int rank, int size, char *enabled)
 
 	FPRINTF(stderr, "Testing with cache '%s'\n", enabled);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	for(x = 0; x < X; x++)
 	{
@@ -118,7 +116,6 @@ void dotest(int rank, int size, char *enabled)
 	}
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 }
 
 int main(int argc, char **argv)

+ 3 - 6
mpi/tests/load_balancer.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2017                                     CNRS
- * Copyright (C) 2017                                     Université de Bordeaux
+ * Copyright (C) 2017-2018                                     Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -55,10 +55,8 @@ int main(int argc, char **argv)
 	itf.get_data_unit_to_migrate = get_data_unit_to_migrate;
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	unsetenv("STARPU_MPI_LB");
 	starpu_mpi_lb_init(NULL, NULL);
@@ -68,7 +66,6 @@ int main(int argc, char **argv)
 	starpu_mpi_lb_shutdown();
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 	if (!mpi_init)
 		MPI_Finalize();
 

+ 3 - 7
mpi/tests/matrix.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2015-2017                                CNRS
- * Copyright (C) 2015,2017                                Université de Bordeaux
+ * Copyright (C) 2015,2017-2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -51,10 +51,8 @@ int main(int argc, char **argv)
 	starpu_data_handle_t data_X[N];
 	starpu_data_handle_t data_Y;
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, 1);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 
 	if (starpu_cpu_worker_get_count() == 0)
@@ -62,7 +60,6 @@ int main(int argc, char **argv)
 		if (rank == 0)
 			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		return STARPU_TEST_SKIPPED;
 	}
 
@@ -131,7 +128,6 @@ int main(int argc, char **argv)
 	starpu_data_unregister(data_Y);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	FPRINTF(stdout, "[%d] Y=%u\n", rank, Y);
 

+ 3 - 7
mpi/tests/matrix2.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2015-2017                                CNRS
- * Copyright (C) 2015,2017                                Université de Bordeaux
+ * Copyright (C) 2015,2017-2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -53,10 +53,8 @@ int main(int argc, char **argv)
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(NULL, NULL, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
@@ -71,7 +69,6 @@ int main(int argc, char **argv)
 				FPRINTF(stderr, "We need at least 1 CPU worker.\n");
 		}
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		if (!mpi_init)
 			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
@@ -137,7 +134,6 @@ int main(int argc, char **argv)
 	}
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	FPRINTF(stdout, "[%d] X[%d]=%u\n", rank, N-1, X[N-1]);
 

+ 3 - 7
mpi/tests/mpi_detached_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011,2014-2017                      Université de Bordeaux
+ * Copyright (C) 2010-2011,2014-2018                      Université de Bordeaux
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  * Copyright (C) 2013                                     Inria
  *
@@ -38,10 +38,8 @@ int main(int argc, char **argv)
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
@@ -52,7 +50,6 @@ int main(int argc, char **argv)
 			FPRINTF(stderr, "We need a even number of processes.\n");
 
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		if (!mpi_init)
 			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
@@ -86,7 +83,6 @@ int main(int argc, char **argv)
 	free(tab);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	if (!mpi_init)
 		MPI_Finalize();

+ 3 - 7
mpi/tests/mpi_earlyrecv.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2015,2017                           CNRS
  * Copyright (C) 2013                                     Inria
- * Copyright (C) 2009-2010,2014-2015,2017                 Université de Bordeaux
+ * Copyright (C) 2009-2010,2014-2015,2017-2018                 Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -30,10 +30,8 @@ int main(int argc, char **argv)
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
@@ -42,7 +40,6 @@ int main(int argc, char **argv)
 	{
 		FPRINTF_MPI(stderr, "We need a even number of processes.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		if (!mpi_init)
 			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
@@ -125,7 +122,6 @@ int main(int argc, char **argv)
 		starpu_data_unregister(tab_handle[i]);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	if (!mpi_init)
 		MPI_Finalize();

+ 3 - 7
mpi/tests/mpi_earlyrecv2.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2013                                     Inria
- * Copyright (C) 2009-2010,2014-2015,2017                 Université de Bordeaux
+ * Copyright (C) 2009-2010,2014-2015,2017-2018                 Université de Bordeaux
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -217,10 +217,8 @@ int main(int argc, char **argv)
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(&argc, &argv, mpi_init);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
@@ -229,7 +227,6 @@ int main(int argc, char **argv)
 	{
 		FPRINTF(stderr, "We need a even number of processes.\n");
 		starpu_mpi_shutdown();
-		starpu_shutdown();
 		if (!mpi_init)
 			MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
@@ -260,7 +257,6 @@ int main(int argc, char **argv)
 		global_ret = ret;
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	if (!mpi_init)
 		MPI_Finalize();

+ 3 - 8
mpi/tests/mpi_earlyrecv2_sync.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009-2010,2015                           Université de Bordeaux
+ * Copyright (C) 2009-2010,2015, 2018                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -104,10 +104,8 @@ int exchange_variable(int rank)
 	starpu_data_handle_t tab_handle[NB];
 	int value[NB];
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
 
 	FPRINTF_MPI(stderr, "Exchanging variable data\n");
 
@@ -122,7 +120,6 @@ int exchange_variable(int rank)
 		starpu_data_unregister(tab_handle[i]);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return ret;
 }
@@ -152,7 +149,6 @@ int exchange_void(int rank)
 		starpu_data_unregister(tab_handle[i]);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return ret;
 }
@@ -201,7 +197,6 @@ int exchange_complex(int rank)
 		starpu_data_unregister(handle[i]);
 
 	starpu_mpi_shutdown();
-	starpu_shutdown();
 
 	return ret;
 }

+ 0 - 0
mpi/tests/mpi_irecv.c


Kaikkia tiedostoja ei voida näyttää, sillä liian monta tiedostoa muuttui tässä diffissä