浏览代码

Merge branch 'master' into fpga2

Nathalie Furmento 5 年之前
父节点
当前提交
3adbf06fc7
共有 100 个文件被更改,包括 10008 次插入727 次删除
  1. 1 2
      .dir-locals.el
  2. 2 0
      .gitignore
  3. 169 52
      ChangeLog
  4. 12 5
      Makefile.am
  5. 1 3
      README
  6. 1 3
      README.dev
  7. 1 3
      STARPU-VERSION
  8. 1 2
      TODO
  9. 1 3
      autogen.sh
  10. 142 54
      configure.ac
  11. 2 4
      contrib/ci.inria.fr/Jenkinsfile-basic
  12. 2 4
      contrib/ci.inria.fr/Jenkinsfile-bsd
  13. 3 5
      contrib/ci.inria.fr/Jenkinsfile-windows
  14. 1 1
      contrib/ci.inria.fr/job-0-tarball.sh
  15. 1 1
      contrib/ci.inria.fr/job-1-build-windows.sh
  16. 1 2
      contrib/ci.inria.fr/job-1-check-windows.bat
  17. 2 1
      contrib/ci.inria.fr/job-1-check.sh
  18. 4 9
      doc/Makefile.am
  19. 10 8
      doc/doxygen/Makefile.am
  20. 6 3
      doc/doxygen/chapters/000_introduction.doxy
  21. 18 17
      doc/doxygen/chapters/101_building.doxy
  22. 1 3
      doc/doxygen/chapters/110_basic_examples.doxy
  23. 1 3
      doc/doxygen/chapters/201_advanced_examples.doxy
  24. 76 69
      doc/doxygen/chapters/210_check_list_performance.doxy
  25. 16 14
      doc/doxygen/chapters/301_tasks.doxy
  26. 176 10
      doc/doxygen/chapters/310_data_management.doxy
  27. 19 10
      doc/doxygen/chapters/320_scheduling.doxy
  28. 4 6
      doc/doxygen/chapters/330_scheduling_contexts.doxy
  29. 1 3
      doc/doxygen/chapters/340_scheduling_context_hypervisor.doxy
  30. 3 5
      doc/doxygen/chapters/350_scheduling_policy_definition.doxy
  31. 1 3
      doc/doxygen/chapters/360_debugging_tools.doxy
  32. 457 15
      doc/doxygen/chapters/370_online_performance_tools.doxy
  33. 183 34
      doc/doxygen/chapters/380_offline_performance_tools.doxy
  34. 1 3
      doc/doxygen/chapters/390_faq.doxy
  35. 9 9
      doc/doxygen/chapters/401_out_of_core.doxy
  36. 3 4
      doc/doxygen/chapters/410_mpi_support.doxy
  37. 2 2
      doc/doxygen/chapters/415_fault_tolerance.doxy
  38. 2 4
      doc/doxygen/chapters/420_fft_support.doxy
  39. 2 4
      doc/doxygen/chapters/430_mic_support.doxy
  40. 13 14
      doc/doxygen/chapters/450_native_fortran_support.doxy
  41. 1 3
      doc/doxygen/chapters/460_socl_opencl_extensions.doxy
  42. 7 8
      doc/doxygen/chapters/470_simgrid.doxy
  43. 16 17
      doc/doxygen/chapters/480_openmp_runtime_support.doxy
  44. 2 4
      doc/doxygen/chapters/490_clustering_a_machine.doxy
  45. 1 1
      doc/doxygen/chapters/495_interoperability.doxy
  46. 79 25
      doc/doxygen/chapters/501_environment_variables.doxy
  47. 9 18
      doc/doxygen/chapters/510_configure_options.doxy
  48. 8 3
      doc/doxygen/chapters/520_files.doxy
  49. 1 3
      doc/doxygen/chapters/601_scaling_vector_example.doxy
  50. 1 3
      doc/doxygen/chapters/610_fdl_1_3.doxy
  51. 1 3
      doc/doxygen/chapters/api/codelet_and_tasks.doxy
  52. 1 3
      doc/doxygen/chapters/api/fft_support.doxy
  53. 1 3
      doc/doxygen/chapters/api/mic_extensions.doxy
  54. 1 3
      doc/doxygen/chapters/api/mpi.doxy
  55. 1 3
      doc/doxygen/chapters/api/opencl_extensions.doxy
  56. 1 2
      doc/doxygen/chapters/api/openmp_runtime_support.doxy
  57. 2 4
      doc/doxygen/chapters/api/scheduling_contexts.doxy
  58. 1 3
      doc/doxygen/chapters/api/scheduling_policy.doxy
  59. 25 6
      doc/doxygen/chapters/api/threads.doxy
  60. 1 3
      doc/doxygen/chapters/api/versioning.doxy
  61. 1 3
      doc/doxygen/chapters/api/workers.doxy
  62. 1 2
      doc/doxygen/chapters/code/complex.c
  63. 2 4
      doc/doxygen/chapters/code/disk_compute.c
  64. 2 4
      doc/doxygen/chapters/code/disk_copy.c
  65. 1 2
      doc/doxygen/chapters/code/forkmode.c
  66. 1 3
      doc/doxygen/chapters/code/multiformat.c
  67. 1 2
      doc/doxygen/chapters/code/nf_initexit.f90
  68. 1 2
      doc/doxygen/chapters/code/simgrid.c
  69. 1 3
      doc/doxygen/chapters/code/vector_scal_c.c
  70. 1 2
      doc/doxygen/chapters/code/vector_scal_cpu.c
  71. 1 2
      doc/doxygen/chapters/code/vector_scal_cuda.c
  72. 1 3
      doc/doxygen/chapters/code/vector_scal_opencl.c
  73. 1 1
      doc/doxygen/chapters/code/vector_scal_opencl_codelet.cl
  74. 二进制
      doc/doxygen/chapters/images/starvz_visu.eps
  75. 二进制
      doc/doxygen/chapters/images/starvz_visu.png
  76. 4274 0
      doc/doxygen/chapters/images/trace_recv_use.eps
  77. 二进制
      doc/doxygen/chapters/images/trace_recv_use.pdf
  78. 二进制
      doc/doxygen/chapters/images/trace_recv_use.png
  79. 4019 0
      doc/doxygen/chapters/images/trace_send_use.eps
  80. 二进制
      doc/doxygen/chapters/images/trace_send_use.pdf
  81. 二进制
      doc/doxygen/chapters/images/trace_send_use.png
  82. 48 14
      doc/doxygen/dev/checkDoc.sh
  83. 0 28
      doc/doxygen/dev/sc_funcs.cocci
  84. 1 1
      doc/doxygen/dev/starpu_check_documented.py
  85. 23 28
      doc/doxygen/dev/starpu_check_refs.sh
  86. 63 36
      doc/doxygen/dev/starpu_check_undocumented.sh
  87. 0 28
      doc/doxygen/dev/starpu_funcs.cocci
  88. 14 6
      doc/doxygen/doxygen-config.cfg.in
  89. 3 4
      doc/doxygen/doxygen.cfg
  90. 1 1
      doc/doxygen/doxygen_filter.sh.in
  91. 18 8
      doc/doxygen/refman.tex
  92. 5 7
      doc/doxygen_dev/Makefile.am
  93. 1 1
      doc/doxygen_dev/chapters/000_introduction.doxy
  94. 1 1
      doc/doxygen_dev/chapters/010_core.doxy
  95. 2 4
      doc/doxygen_dev/doxygen-config.cfg.in
  96. 2 4
      doc/doxygen_dev/doxygen.cfg
  97. 1 1
      doc/doxygen_dev/doxygen_filter.sh.in
  98. 4 4
      doc/doxygen_dev/refman.tex
  99. 2 3
      doc/tutorial/Makefile
  100. 0 0
      doc/tutorial/README

+ 1 - 2
.dir-locals.el

@@ -1,7 +1,6 @@
 ;; StarPU --- Runtime system for heterogeneous multicore architectures.
 ;;
-;; Copyright (C) 2011                                     Inria
-;; Copyright (C) 2011,2012,2017                           CNRS
+;; Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 ;;
 ;; StarPU is free software; you can redistribute it and/or modify
 ;; it under the terms of the GNU Lesser General Public License as published by

+ 2 - 0
.gitignore

@@ -11,6 +11,7 @@
 /GRTAGS
 /GTAGS
 /tags
+/TAGS
 /config.cache
 /doc/starpu.info
 *~
@@ -24,6 +25,7 @@
 .dirstamp
 .tramp_history
 *.pc
+*.vim
 stamp-h[0-9]*
 starpu.log
 /tests/datawizard/handle_to_pointer

+ 169 - 52
ChangeLog

@@ -1,8 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2012-2014,2016-2018                      Inria
-# Copyright (C) 2009-2019                                Université de Bordeaux
-# Copyright (C) 2010-2019                                CNRS
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -16,14 +14,93 @@
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
 
-StarPU 1.4.0 (svn revision xxxx)
+StarPU 1.4.0 (git revision xxxx)
 ==============================================
 New features:
   * Fault tolerance support with starpu_task_ft_failed().
+  * Add get_max_size method to data interfaces for applications using data with
+    variable size to express their maximal potential size.
+  * New offline tool to draw graph showing elapsed time between sent
+    or received data and their use by tasks
+  * Add 4D tensor data interface.
+  * New sched_tasks.rec trace file which monitors task scheduling push/pop actions
+  * New STARPU_MPI_MEM_THROTTLE environment variable to throttle mpi
+    submission according to memory use.
+  * New number_events.data trace file which monitors number of events in trace
+    files. This file can be parsed by the new script
+    starpu_fxt_number_events_to_names.py to convert event keys to event names.
 
-StarPU 1.3.2 (git revision xxx)
+Small changes:
+  * Use the S4U interface of Simgrid instead of xbt and MSG.
+
+StarPU 1.3.4 (git revision xxx)
 ==============================================
 
+Small features:
+  * New environment variables STARPU_BUS_STATS_FILE and
+    STARPU_WORKER_STATS_FILE to specify files in which to display
+    statistics about data transfers and workers.
+  * Add starpu_bcsr_filter_vertical_block filtering function.
+  * Add starpu_interface_copy2d, 3d, and 4d to easily request data copies from
+    data interfaces.
+  * Move optimized cuda 2d copy from interfaces to new
+    starpu_cuda_copy2d_async_sync and starpu_cuda_copy3d_async_sync, and use
+    them from starpu_interface_copy2d and 3d.
+  * New function starpu_task_watchdog_set_hook to specify a function
+    to be called when the watchdog is raised
+
+StarPU 1.3.3 (git revision 11afc5b007fe1ab1c729b55b47a5a98ef7f3cfad)
+====================================================================
+
+New features:
+  * New semantic for starpu_task_insert() and alike parameters
+    STARPU_CALLBACK_ARG, STARPU_PROLOGUE_CALLBACK_ARG, and
+    STARPU_PROLOGUE_CALLBACK_POP_ARG which set respectively
+    starpu_task::callback_arg_free,
+    starpu_task::prologue_callback_arg_free and
+    starpu_task::prologue_callback_pop_arg_free to 1 when used.
+    New parameters STARPU_CALLBACK_ARG_NFREE,
+    STARPU_CALLBACK_WITH_ARG_NFREE, STARPU_PROLOGUE_CALLBACK_ARG_NFREE, and
+    STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE which set the corresponding
+    fields of starpu_task to 0.
+  * starpufft: Support 3D.
+  * New modular-eager-prio scheduler.
+  * Add 'ready' heuristic to modular schedulers.
+  * New modular-heteroprio scheduler.
+  * Add STARPU_TASK_SCHED_DATA
+  * Add support for staging schedulers.
+  * New modular-heteroprio-heft scheduler.
+  * New dmdap "data-aware performance model (priority)" scheduler
+
+Changes:
+  * Modification in the Native Fortran interface of the functions
+    fstarpu_mpi_task_insert, fstarpu_mpi_task_build and
+    fstarpu_mpi_task_post_build to only take 1 parameter being the MPI
+    communicator, the codelet and the various parameters for the task.
+
+Small features:
+  * New starpu_task_insert() and alike parameter STARPU_TASK_WORKERIDS
+    allowing to set the fields starpu_task::workerids_len and
+    starpu_task::workerids
+  * New starpu_task_insert() and alike parameters
+    STARPU_SEQUENTIAL_CONSISTENCY, STARPU_TASK_NO_SUBMITORDER and
+    STARPU_TASK_PROFILING_INFO
+  * New function starpu_create_callback_task() which creates and
+    submits an empty task with the specified callback
+
+
+Small changes:
+   * Default modular worker queues to 2 tasks unless it's an heft
+     scheduler
+   * Separate out STATUS_SLEEPING_SCHEDULING state from
+     STATUS_SLEEPING state
+     When running the scheduler while being idle, workers do not go in
+     the STATUS_SCHEDULING state, so that that time is considered as
+     idle time instead of overhead.
+
+StarPU 1.3.2 (git revision af22a20fc00a37addf3cc6506305f89feed940b0)
+====================================================================
+
 Small changes:
   * Improve OpenMP support to detect the environment is valid before
     launching OpenMP
@@ -35,7 +112,7 @@ Small changes:
     communications progress.
 
 StarPU 1.3.1 (git revision 01949488b4f8e6fe26d2c200293b8aae5876b038)
-==============================================
+====================================================================
 
 Small features:
   * Add starpu_filter_nparts_compute_chunk_size_and_offset helper.
@@ -46,7 +123,7 @@ Small changes:
     library is available, also check the compiled code can be run.
 
 StarPU 1.3.0 (git revision 24ca83c6dbb102e1cfc41db3bb21c49662067062)
-==============================================
+====================================================================
 
 New features:
   * New scheduler 'heteroprio' with heterogeneous priorities
@@ -194,33 +271,41 @@ Small changes:
   * STARPU_COMM_STATS also displays the bandwidth
   * Update data interfaces implementations to only use public API
 
-StarPU 1.2.9 (git revision xxx)
-==============================================
+StarPU 1.2.9 (git revision 3aca8da3138a99e93d7f93905d2543bd6f1ea1df)
+====================================================================
+
+Small changes:
   * Add STARPU_SIMGRID_TRANSFER_COST environment variable to easily disable
     data transfer costs.
+  * New dmdap "data-aware performance model (priority)" scheduler
+  * Modification in the Native Fortran interface of the functions
+    fstarpu_mpi_task_insert, fstarpu_mpi_task_build and
+    fstarpu_mpi_task_post_build to only take 1 parameter being the MPI
+    communicator, the codelet and the various parameters for the task.
 
 StarPU 1.2.8 (git revision f66374c9ad39aefb7cf5dfc31f9ab3d756bcdc3c)
-==============================================
+====================================================================
 
 Small features:
   * Minor fixes
 
 StarPU 1.2.7 (git revision 07cb7533c22958a76351bec002955f0e2818c530)
-==============================================
+====================================================================
 
 Small features:
   * Add STARPU_HWLOC_INPUT environment variable to save initialization time.
   * Add starpu_data_set/get_ooc_flag.
+  * Use starpu_mpi_tag_t (int64_t) for MPI communication tag
 
 StarPU 1.2.6 (git revision 23049adea01837479f309a75c002dacd16eb34ad)
-==============================================
+====================================================================
 
 Small changes:
   * Fix crash for lws scheduler
   * Avoid making hwloc load PCI topology when CUDA is not enabled
 
 StarPU 1.2.5 (git revision 22f32916916d158e3420033aa160854d1dd341bd)
-==============================================
+====================================================================
 
 Small features:
   * Add a new value STARPU_TASK_COLOR to be used in
@@ -231,7 +316,7 @@ Changes:
   * Do not export -lcuda -lcudart -lOpenCL in *starpu*.pc.
 
 StarPU 1.2.4 (git revision 255cf98175ef462749780f30bfed21452b74b594)
-==============================================
+====================================================================
 
 Small features:
    * Catch of signals SIGINT and SIGSEGV to dump fxt trace files.
@@ -248,8 +333,8 @@ Small features:
    * Add a way to choose the dag.dot colors.
 
 
-StarPU 1.2.3 (svn revision 22444)
-==============================================
+StarPU 1.2.3 (git revision 586ba6452a8eef99f275c891ce08933ae542c6c2)
+====================================================================
 
 New features:
   * Add per-node MPI data.
@@ -281,8 +366,8 @@ Small changes:
     reduction methods are provided, and make sure a handle is
     initialized before trying to read it.
 
-StarPU 1.2.2 (svn revision 21308)
-==============================================
+StarPU 1.2.2 (git revision a0b01437b7b91f33fb3ca36bdea35271cad34464)
+===================================================================
 
 New features:
   * Add starpu_data_acquire_try and starpu_data_acquire_on_node_try.
@@ -323,8 +408,8 @@ Small changes:
   * Fix odd ordering of CPU workers on CPUs due to GPUs stealing some
     cores
 
-StarPU 1.2.1 (svn revision 20299)
-==============================================
+StarPU 1.2.1 (git revision 473acaec8a1fb4f4c73d8b868e4f044b736b41ea)
+====================================================================
 
 New features:
   * Add starpu_fxt_trace_user_event_string.
@@ -360,8 +445,8 @@ Small changes:
     temporary data
   * Fix compatibility with simgrid 3.14
 
-StarPU 1.2.0 (svn revision 18521)
-==============================================
+StarPU 1.2.0 (git revision 5a86e9b61cd01b7797e18956283cc6ea22adfe11)
+====================================================================
 
 New features:
   * MIC Xeon Phi support
@@ -562,19 +647,51 @@ Small changes:
     STARPU_NMIC will be the number of devices, and STARPU_NMICCORES
     will be the number of cores per device.
 
-StarPU 1.1.5 (svn revision xxx)
-==============================================
+StarPU 1.1.8 (git revision f7b7abe9f86361cbc96f2b51c6ad7336b7d1d628)
+====================================================================
+The scheduling context release
+
+Small changes:
+  * Fix compatibility with simgrid 3.14
+  * Fix lock ordering for memory reclaiming
+
+StarPU 1.1.7 (git revision 341044b67809892cf4a388e482766beb50256907)
+====================================================================
 The scheduling context release
 
+Small changes:
+  * Fix type of data home node to allow users to pass -1 to define
+    temporary data
+
+StarPU 1.1.6 (git revision cdffbd5f5447e4d076d659232b3deb14f3c20da6)
+====================================================================
+The scheduling context release
+
+Small features:
+  * Add starpu_task_get_task_succs to get the list of children of a given
+    task.
+  * Ranges can be provided in STARPU_WORKERS_CPUID
+
+Small changes:
+  * Various fixes for MacOS and windows systems
+
+StarPU 1.1.5 (git revision 20469c6f3e7ecd6c0568c8e4e4b5b652598308d8xxx)
+=======================================================================
+The scheduling context release
+
+New features:
   * Add starpu_memory_pin and starpu_memory_unpin to pin memory allocated
     another way than starpu_malloc.
   * Add starpu_task_wait_for_n_submitted() and
     STARPU_LIMIT_MAX_NSUBMITTED_TASKS/STARPU_LIMIT_MIN_NSUBMITTED_TASKS to
     easily control the number of submitted tasks by making task submission
     block.
+  * Add STARPU_NOWHERE to create synchronization tasks with data.
+  * Document how to switch between differents views of the same data.
+  * Add Fortran 90 module and example using it
 
-StarPU 1.1.4 (svn revision 14856)
-==============================================
+StarPU 1.1.4 (git revision 2a3d30b28d6d099d271134a786335acdbb3931a3)
+====================================================================
 The scheduling context release
 
 New features:
@@ -608,8 +725,8 @@ Small features:
 Changes:
   * Fix complexity of implicit task/data dependency, from quadratic to linear.
 
-StarPU 1.1.3 (svn revision 13450)
-==============================================
+StarPU 1.1.3 (git revision 11afc5b007fe1ab1c729b55b47a5a98ef7f3cfad)
+====================================================================
 The scheduling context release
 
 New features:
@@ -624,8 +741,8 @@ Small changes:
     issues on parallel launches, MPI runs notably.
   * Lots of build fixes for icc on Windows.
 
-StarPU 1.1.2 (svn revision 13011)
-==============================================
+StarPU 1.1.2 (git revision d14c550798630bbc4f3da2b07d793c47e3018f02)
+====================================================================
 The scheduling context release
 
 New features:
@@ -638,8 +755,8 @@ New features:
   * Add STARPU_TRACE_BUFFER_SIZE environment variable to specify the size of
     the trace buffer.
 
-StarPU 1.1.1 (svn revision 12638)
-==============================================
+StarPU 1.1.1 (git revision dab2e51117fac5bef767f3a6b7677abb2147d2f2)
+====================================================================
 The scheduling context release
 
 New features:
@@ -683,8 +800,8 @@ Small changes:
     configure option --enable-starpufft-examples needs to be specified
     to change this behaviour.
 
-StarPU 1.1.0 (svn revision 11960)
-==============================================
+StarPU 1.1.0 (git revision 3c4bc72ccef30e767680cad3d749c4e9010d4476)
+====================================================================
 The scheduling context release
 
 New features:
@@ -921,8 +1038,8 @@ Small changes:
   * Fix performance regression: dmda queues were inadvertently made
     LIFOs in r9611.
 
-StarPU 1.0.3 (svn revision 7379)
-==============================================
+StarPU 1.0.3 (git revision 25f8b3a7b13050e99bf1725ca6f52cfd62e7a861)
+====================================================================
 
 Changes:
   * Several bug fixes in the build system
@@ -930,8 +1047,8 @@ Changes:
   * Fix generating FXT traces bigger than 64MiB.
   * Improve ENODEV error detections in StarPU FFT
 
-StarPU 1.0.2 (svn revision 7210)
-==============================================
+StarPU 1.0.2 (git revision 6f95de279d6d796a39debe8d6c5493b3bdbe0c37)
+====================================================================
 
 Changes:
   * Add starpu_block_shadow_filter_func_vector and an example.
@@ -940,8 +1057,8 @@ Changes:
   * Fix parallel tasks CPU binding and combined worker generation.
   * Fix generating FXT traces bigger than 64MiB.
 
-StarPU 1.0.1 (svn revision 6659)
-==============================================
+StarPU 1.0.1 (git revision 97ea6e15a273e23e4ddabf491b0f9481373ca01a)
+====================================================================
 
 Changes:
   * hwloc support. Warn users when hwloc is not found on the system and
@@ -955,8 +1072,8 @@ Changes:
   * Update SOCL to use new API
   * Documentation improvement.
 
-StarPU 1.0.0 (svn revision 6306)
-==============================================
+StarPU 1.0.0 (git revision d3ad9ca318ec9acfeaf8eb7d8a018b09e4722292)
+====================================================================
 The extensions-again release
 
 New features:
@@ -1042,8 +1159,8 @@ Small changes:
   * Documentation improvement.
 
 
-StarPU 0.9 (svn revision 3721)
-==============================================
+StarPU 0.9 (git revision 12bba8528fc0d85367d885cddc383ba54efca464)
+==================================================================
 The extensions release
 
   * Provide the STARPU_REDUX data access mode
@@ -1065,8 +1182,8 @@ The extensions release
   * Add stencil MPI example
   * Initial support for CUDA4
 
-StarPU 0.4 (svn revision 2535)
-==============================================
+StarPU 0.4 (git revision ad8d8be3619f211f228c141282d7d504646fc2a6)
+==================================================================
 The API strengthening release
 
   * Major API improvements
@@ -1091,8 +1208,8 @@ The API strengthening release
     - Add OpenCL support
     - Add support for Windows
 
-StarPU 0.2.901 aka 0.3-rc1 (svn revision 1236)
-==============================================
+StarPU 0.2.901 aka 0.3-rc1 (git revision 991f2abb772c17c3d45bbcf27f46197652e6a3ef)
+==================================================================================
 The asynchronous heterogeneous multi-accelerator release
 
   * Many API changes and code cleanups
@@ -1120,8 +1237,8 @@ The asynchronous heterogeneous multi-accelerator release
     specify where to bind the workers
   * Use the hwloc library to detect the actual number of cores
 
-StarPU 0.2.0 (svn revision 1013)
-==============================================
+StarPU 0.2.0 (git revision 73e989f0783e10815aff394f80242760c4ed098c)
+====================================================================
 The Stabilizing-the-Basics release
 
   * Various API cleanups
@@ -1135,8 +1252,8 @@ The Stabilizing-the-Basics release
   * More examples are supplied
 
 
-StarPU 0.1.0 (svn revision 794)
-==============================================
+StarPU 0.1.0 (git revision 911869a96b40c74eb92b30a43d3e08bf445d8078)
+====================================================================
 First release.
 
 Status:

+ 12 - 5
Makefile.am

@@ -1,9 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2011-2018                                Inria
-# Copyright (C) 2009-2017                                Université de Bordeaux
-# Copyright (C) 2017                                     Guillaume Beauchamp
-# Copyright (C) 2010-2019                                CNRS
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+# Copyright (C) 2017       Guillaume Beauchamp
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -109,7 +107,9 @@ versinclude_HEADERS = 				\
 	include/starpu_simgrid_wrap.h		\
 	include/starpu_mod.f90			\
 	include/fstarpu_mod.f90			\
-	include/starpu_clusters.h
+	include/starpu_clusters.h		\
+	include/starpu_perf_monitoring.h	\
+	include/starpu_perf_steering.h
 
 nodist_versinclude_HEADERS = 			\
 	include/starpu_config.h
@@ -164,6 +164,13 @@ recheck:
 	done ; \
 	exit $$RET
 
+showfailed:
+	@RET=0 ; \
+	for i in $(SUBDIRS) ; do \
+		make -s -C $$i showfailed || RET=1 ; \
+	done ; \
+	exit $$RET
+
 showcheck:
 	RET=0 ; \
 	for i in $(SUBDIRS) ; do \

+ 1 - 3
README

@@ -1,8 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2010                                     Inria
-# Copyright (C) 2009-2012,2014,2016                      Université de Bordeaux
-# Copyright (C) 2010-2015                                CNRS
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
README.dev

@@ -1,8 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2011,2012                                Inria
-# Copyright (C) 2010-2012,2015-2017,2019                 CNRS
-# Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
STARPU-VERSION

@@ -1,8 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2012,2018                                Inria
-# Copyright (C) 2012,2015,2017                           CNRS
-# Copyright (C) 2012-2014                                Université de Bordeaux
+# Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by

+ 1 - 2
TODO

@@ -1,7 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2012                                     Inria
-# Copyright (C) 2012-2013                                CNRS
+# Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
autogen.sh

@@ -1,9 +1,7 @@
 #!/bin/sh
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2010,2011,2014,2016                      Université de Bordeaux
-# Copyright (C) 2010,2015,2017                           CNRS
-# Copyright (C) 2017                                     Inria
+# Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by

+ 142 - 54
configure.ac

@@ -1,13 +1,11 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2011-2018                                Inria
-# Copyright (C) 2009-2019                                Université de Bordeaux
-# Copyright (C) 2017                                     Guillaume Beauchamp
-# Copyright (C) 2018                                     Federal University of Rio Grande do Sul (UFRGS)
-# Copyright (C) 2018                                     Umeà University
-# Copyright (C) 2010-2019                                CNRS
-# Copyright (C) 2013                                     Thibaut Lambert
-# Copyright (C) 2011                                     Télécom-SudParis
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+# Copyright (C) 2011       Télécom-SudParis
+# Copyright (C) 2013       Thibaut Lambert
+# Copyright (C) 2017       Guillaume Beauchamp
+# Copyright (C) 2018,2020  Federal University of Rio Grande do Sul (UFRGS)
+# Copyright (C) 2018       Umeà University
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -20,7 +18,7 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
-AC_INIT([StarPU], [1.3.99], [starpu-devel@lists.gforge.inria.fr], [starpu], [http://runtime.bordeaux.inria.fr/StarPU/])
+AC_INIT([StarPU], [1.3.99], [starpu-devel@lists.gforge.inria.fr], [starpu], [http://starpu.gforge.inria.fr/])
 AC_CONFIG_SRCDIR(include/starpu.h)
 AC_CONFIG_AUX_DIR([build-aux])
 
@@ -64,12 +62,7 @@ AC_SUBST([LIBSOCL_INTERFACE_AGE])
 
 AC_CANONICAL_SYSTEM
 
-dnl Automake 1.11 introduced `silent-rules' and `color-tests'.  Use them
-dnl when they're available.
-dnl do not use option subdir-objects, it causes fortran compilation to fail
-m4_ifdef([AM_SILENT_RULES],
-  [AM_INIT_AUTOMAKE([1.11 -Wall foreign silent-rules color-tests parallel-tests])],
-  [AM_INIT_AUTOMAKE([1.10 -Wall foreign])])
+AM_INIT_AUTOMAKE([1.11 -Wall -Wno-portability foreign silent-rules color-tests parallel-tests subdir-objects])
 
 m4_ifdef([AM_SILENT_RULES],
   [AM_SILENT_RULES(yes)])
@@ -273,34 +266,38 @@ if test x$enable_simgrid = xyes ; then
 		]
 	)
 	AC_CHECK_HEADERS([simgrid/msg.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MSG_H], [1], [Define to 1 if you have msg.h in simgrid/.])])
+	AC_CHECK_HEADERS([msg/msg.h], [AC_DEFINE([STARPU_HAVE_MSG_MSG_H], [1], [Define to 1 if you have msg.h in msg/.])])
 	AC_CHECK_HEADERS([simgrid/host.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_HOST_H], [1], [Define to 1 if you have host.h in simgrid/.])])
+	AC_CHECK_HEADERS([xbt/base.h], [AC_DEFINE([STARPU_HAVE_XBT_BASE_H], [1], [Define to 1 if you have base.h in xbt/.])])
+	AC_CHECK_HEADERS([simgrid/version.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_VERSION_H], [1], [Define to 1 if you have version.h in simgrid/.])], [], [[
+			  #ifdef STARPU_HAVE_XBT_BASE_H
+			  #include <xbt/base.h>
+			  #endif
+			  ]])
 	AC_CHECK_HEADERS([simgrid/simdag.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_SIMDAG_H], [1], [Define to 1 if you have simdag.h in simgrid/.])])
 	AC_CHECK_HEADERS([xbt/synchro.h], [AC_DEFINE([STARPU_HAVE_XBT_SYNCHRO_H], [1], [Define to 1 if you have synchro.h in xbt/.])])
+	AC_CHECK_HEADERS([xbt/config.h], [AC_DEFINE([STARPU_HAVE_XBT_CONFIG_H], [1], [Define to 1 if you have config.h in xbt/.])])
+	AC_CHECK_HEADERS([simgrid/actor.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ACTOR_H], [1], [Define to 1 if you have actor.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/engine.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ENGINE_H], [1], [Define to 1 if you have engine.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/semaphore.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_SEMAPHORE_H], [1], [Define to 1 if you have semaphore.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/mutex.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MUTEX_H], [1], [Define to 1 if you have mutex.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/cond.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_COND_H], [1], [Define to 1 if you have cond.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/barrier.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_BARRIER_H], [1], [Define to 1 if you have barrier.h in simgrid/.])])
+	AC_CHECK_HEADERS([simgrid/engine.h])
+	AC_CHECK_HEADERS([simgrid/zone.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ZONE_H], [1], [Define to 1 if you have zone.h in simgrid/.])])
 	AC_CHECK_TYPES([smx_actor_t], [AC_DEFINE([STARPU_HAVE_SMX_ACTOR_T], [1], [Define to 1 if you have the smx_actor_t type.])], [], [[#include <simgrid/simix.h>]])
 
 	# Latest functions
-	AC_CHECK_FUNCS([MSG_process_attach MSG_zone_get_hosts MSG_process_self_name MSG_process_userdata_init])
-	AC_CHECK_FUNCS([xbt_mutex_try_acquire smpi_process_set_user_data sg_zone_get_by_name sg_link_name sg_host_route sg_host_self sg_host_speed simcall_process_create sg_config_continue_after_help])
+	AC_CHECK_FUNCS([MSG_process_attach sg_actor_attach sg_actor_init MSG_zone_get_hosts sg_zone_get_hosts MSG_process_self_name MSG_process_userdata_init sg_actor_data])
+	AC_CHECK_FUNCS([xbt_mutex_try_acquire smpi_process_set_user_data SMPI_thread_create sg_zone_get_by_name sg_link_name sg_host_route sg_host_self sg_host_speed simcall_process_create sg_config_continue_after_help])
+	AC_CHECK_FUNCS([simgrid_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_SIMGRID_INIT], [1], [Define to 1 if you have the `simgrid_init' function.])])
 	AC_CHECK_FUNCS([xbt_barrier_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT], [1], [Define to 1 if you have the `xbt_barrier_init' function.])])
+	AC_CHECK_FUNCS([sg_actor_sleep_for sg_actor_self sg_actor_ref sg_host_get_properties sg_host_send_to sg_host_sendto sg_cfg_set_int sg_actor_self_execute sg_actor_execute simgrid_get_clock])
 	AC_CHECK_DECLS([smpi_process_set_user_data], [], [], [[#include <smpi/smpi.h>]])
 
 	# Oldies for compatibility with older simgrid
 	AC_CHECK_FUNCS([MSG_get_as_by_name MSG_zone_get_by_name MSG_environment_get_routing_root MSG_host_get_speed])
 
-	AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
-		    		[[
-#ifdef STARPU_HAVE_SIMGRID_MSG_H
-#include <simgrid/msg.h>
-#else
-#include <msg/msg.h>
-#endif
-				 ]],
-				[[msg_host_t foo; ]]
-			    )],
-	                 [],
-	                 [
-			   AC_MSG_ERROR(StarPU needs a version of Simgrid which defines the type msg_host_t (should be any version >= 3.8.1))
-		         ])
 	AC_DEFINE(STARPU_SIMGRID, [1], [Define this to enable simgrid execution])
 	# We won't bind or detect anything
 	with_hwloc=no
@@ -314,19 +311,23 @@ if test x$enable_simgrid = xyes ; then
 	if test x$enable_shared = xno ; then
 		# When linking statically, libtool does not realize we need libstdc++ for simgrid_cpp.cpp
 		SIMGRID_LIBS="$SIMGRID_LIBS -lstdc++"
-		LDFLAGS="$LDFLAGS -lstdc++"
+		LIBS="$LIBS -lstdc++"
 	fi
 
-	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
-			  #ifdef HAVE_SIMGRID_MSG_H
-			  #include <simgrid/msg.h>
-			  #include <simgrid/host.h>
-			  #else
-			  #include <msg/msg.h>
-			  #endif
-			  ]])],,
-			  CXXFLAGS="-std=c++11 $CXXFLAGS"
-			  NVCCFLAGS="-std=c++11 $NVCCFLAGS")
+	case \ $CXXFLAGS\  in 
+	*\ -std=*\ *) ;;
+	*) 
+		AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+				  #ifdef STARPU_HAVE_SIMGRID_MSG_H
+				  #include <simgrid/msg.h>
+				  #include <simgrid/host.h>
+				  #else
+				  #include <msg/msg.h>
+				  #endif
+				  ]])],,
+				  CXXFLAGS="-std=c++11 $CXXFLAGS")
+		;;
+	esac
 	AC_LANG_POP([C++])
 	AC_ARG_ENABLE(simgrid-mc, [AS_HELP_STRING([--enable-simgrid-mc],
 				[Enable using Model Checker of simgrid])],
@@ -334,6 +335,7 @@ if test x$enable_simgrid = xyes ; then
 	if test x$enable_simgrid_mc = xyes ; then
 		AC_DEFINE(STARPU_SIMGRID_MC, [1], [Define this to enable Model Checker in simgrid execution])
 		AC_PATH_PROG([SIMGRID_MC], [simgrid-mc], [no], [$simgrid_dir/bin:$PATH])
+		LDFLAGS="$LDFLAGS -Wl,-znorelro -Wl,-znoseparate-code"
 	fi
 fi
 AM_CONDITIONAL(STARPU_SIMGRID_MC, test x$enable_simgrid_mc = xyes)
@@ -499,7 +501,7 @@ fi
 AC_SUBST(CC_OR_MPICC, $cc_or_mpicc)
 
 AC_ARG_ENABLE(mpi-pedantic-isend, [AS_HELP_STRING([--enable-mpi-pedantic-isend],
-				   [Enable StarPU MPI pedantic isend])],
+				   [Prevent StarPU MPI from reading buffers while being sent over MPI])],
 				   enable_mpi_pedantic_isend=$enableval, enable_mpi_pedantic_isend=no)
 if test x$enable_mpi_pedantic_isend = xyes; then
 	AC_DEFINE(STARPU_MPI_PEDANTIC_ISEND, [1], [enable StarPU MPI pedantic isend])
@@ -697,9 +699,6 @@ if test x$enable_simgrid = xno ; then
 fi
 
 AM_CONDITIONAL(STARPU_MPI_CHECK, test x$running_mpi_check = xyes)
-if test x$running_mpi_check = xyes -a x$enable_simgrid = xyes -a x$enable_shared = xyes ; then
-    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, use --disable-shared to fix this])
-fi
 if test x$use_mpi = xyes ; then
     AC_MSG_CHECKING(whether MPI tests should be run)
     AC_MSG_RESULT($running_mpi_check)
@@ -727,6 +726,19 @@ else
 	running_mpi_check=no
 fi
 
+if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then
+    if test x$enable_simgrid = xyes ; then
+        if test x$enable_shared = xyes ; then
+	    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, use --disable-shared to fix this, or disable MPI with --disable-mpi])
+        else
+	    CFLAGS="$CFLAGS -fPIC"
+	    CXXFLAGS="$CXXFLAGS -fPIC"
+	    NVCCFLAGS="$NVCCFLAGS --compiler-options -fPIC"
+	    FFLAGS="$FFLAGS -fPIC"
+        fi
+    fi
+fi
+
 AM_CONDITIONAL(STARPU_USE_MPI_MPI, test x$build_mpi_lib = xyes)
 AM_CONDITIONAL(STARPU_USE_MPI_NMAD, test x$build_nmad_lib = xyes)
 AM_CONDITIONAL(STARPU_USE_MPI, test x$build_nmad_lib = xyes -o x$build_mpi_lib = xyes)
@@ -910,6 +922,12 @@ if test x$have_pthread_setname_np = xyes; then
 	AC_DEFINE(STARPU_HAVE_PTHREAD_SETNAME_NP,[1],[pthread_setname_np is available])
 fi
 
+if test "x$cross_compiling" = "xno"; then
+	STARPU_INIT_ZERO([[#include <pthread.h>]], pthread_mutex_t, PTHREAD_MUTEX_INITIALIZER)
+	STARPU_INIT_ZERO([[#include <pthread.h>]], pthread_cond_t, PTHREAD_COND_INITIALIZER)
+	STARPU_INIT_ZERO([[#include <pthread.h>]], pthread_rwlock_t, PTHREAD_RWLOCK_INITIALIZER)
+fi
+
 # There is no posix_memalign on Mac OS X, only memalign
 AC_CHECK_FUNCS([posix_memalign], [AC_DEFINE([STARPU_HAVE_POSIX_MEMALIGN], [1], [Define to 1 if you have the `posix_memalign' function.])])
 AC_CHECK_FUNCS([memalign], [AC_DEFINE([STARPU_HAVE_MEMALIGN], [1], [Define to 1 if you have the `memalign' function.])])
@@ -1013,15 +1031,15 @@ if test "x$enable_hdf5" != xno ; then
 	enable_include_hdf5=no
 	for f in $hdf5_inc_dir; do
 		if test -n "$f" ; then
-			SAVED_CFLAGS="${CFLAGS}"
-			CFLAGS=-I${f}
+			SAVED_CPPFLAGS="${CPPFLAGS}"
+			CPPFLAGS="$CPPFLAGS -I$f"
 			AC_CHECK_HEADERS([hdf5.h])
 			if test "$ac_cv_header_hdf5_h" = "yes" ; then
-				CFLAGS="-I${f} ${SAVED_CFLAGS}"
+				CPPFLAGS="-I${f} ${SAVED_CFLAGS}"
 				enable_include_hdf5=yes
 				break
 			else
-				CFLAGS=${SAVED_CFLAGS}
+				CPPFLAGS=${SAVED_CPPFLAGS}
 			fi
 			unset ac_cv_header_hdf5_h
 		fi
@@ -1076,6 +1094,21 @@ STARPU_CHECK_SYNC_FETCH_AND_OR
 # This defines HAVE_SYNC_LOCK_TEST_AND_SET
 STARPU_CHECK_SYNC_LOCK_TEST_AND_SET
 
+# This defines HAVE_ATOMIC_COMPARE_EXCHANGE_N
+STARPU_CHECK_ATOMIC_COMPARE_EXCHANGE_N
+
+# This defines HAVE_ATOMIC_EXCHANGE_N
+STARPU_CHECK_ATOMIC_EXCHANGE_N
+
+# This defines HAVE_ATOMIC_FETCH_ADD
+STARPU_CHECK_ATOMIC_FETCH_ADD
+
+# This defines HAVE_ATOMIC_FETCH_OR
+STARPU_CHECK_ATOMIC_FETCH_OR
+
+# This defines HAVE_ATOMIC_TEST_AND_SET
+STARPU_CHECK_ATOMIC_TEST_AND_SET
+
 # This defines HAVE_SYNC_SYNCHRONIZE
 STARPU_CHECK_SYNC_SYNCHRONIZE
 
@@ -1416,6 +1449,25 @@ if test x$enable_cuda = xyes; then
 	STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcudart"
 	STARPU_CUFFT_LDFLAGS="-lcufft"
 
+	AC_LANG_PUSH([C++])
+	case \ $NVCCFLAGS\  in 
+	*\ -std=*\ *) ;;
+	*) 
+		SAVED_CXX="$CXX"
+		CXX="$NVCC"
+		AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+				  #ifdef STARPU_HAVE_SIMGRID_MSG_H
+				  #include <simgrid/msg.h>
+				  #include <simgrid/host.h>
+				  #else
+				  #include <msg/msg.h>
+				  #endif
+				  ]])],,
+				  NVCCFLAGS="-std=c++11 $NVCCFLAGS")
+		CXX="$SAVED_CXX"
+	esac
+	AC_LANG_POP([C++])
+
         if test "$F77" = "gfortran" -o "$FC" = "gfortran" ; then
             STARPU_CUDA_FORTRAN_LDFLAGS="-lgfortran"
             AC_SUBST(STARPU_CUDA_FORTRAN_LDFLAGS)
@@ -1458,7 +1510,7 @@ if test x$enable_cuda = xyes; then
 		])
 	    ],
 	    [
-	    AC_MSG_ERROR([NVML found, but nvml.h could not be compiled])
+	    AC_MSG_WARN([NVML found, but nvml.h could not be compiled])
 	    have_valid_nvml="no"
 	    ]
 	)
@@ -2176,12 +2228,14 @@ if test x$use_fxt = xyes; then
 	LDFLAGS="$LDFLAGS $FXT_LDFLAGS"
    	AC_CHECK_FUNCS([enable_fut_flush])
    	AC_CHECK_FUNCS([fut_set_filename])
+	AC_CHECK_FUNCS([fut_setup_flush_callback])
 	LDFLAGS="$save_LDFLAGS"
 	LIBS="$save_LIBS"
 	save_CFLAGS="$CFLAGS"
 	CFLAGS="$CFLAGS $FXT_CFLAGS"
 	AC_CHECK_DECLS([enable_fut_flush], [], [], [[#include <fut.h>]])
 	AC_CHECK_DECLS([fut_set_filename], [], [], [[#include <fut.h>]])
+	AC_CHECK_DECLS([fut_setup_flush_callback], [], [], [[#include <fut.h>]])
 	CFLAGS="$save_CFLAGS"
 
 	if test x$enable_simgrid = xyes -a x$enable_shared = xno ; then
@@ -2190,6 +2244,10 @@ if test x$use_fxt = xyes; then
 		FXT_LIBS="$(pkg-config --variable=libdir fxt)/libfxt.a -Wl,--as-needed $(pkg-config --libs --static fxt) -Wl,--no-as-needed"
 	fi
 
+	AC_CHECK_LIB([papi], [PAPI_library_init],
+		     [AC_DEFINE([STARPU_PAPI], [1], [Define to 1 if you have the libpapi library])
+		      PAPI_LIBS=-lpapi])
+
 	##########################################
 	# Poti is a library to generate paje trace files
 	##########################################
@@ -2299,6 +2357,10 @@ fi
 AM_CONDITIONAL([STARPU_USE_AYUDAME1], [test "x$enable_ayudame1" = "xyes"])
 AM_CONDITIONAL([STARPU_USE_AYUDAME2], [test "x$enable_ayudame2" = "xyes"])
 
+
+STARPU_FXT_EVENT_DEFINES="`grep -E '#define\s+_STARPU_(MPI_)?FUT_' ${srcdir}/src/common/fxt.h ${srcdir}/mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 | cut -d : -f 2`"
+AC_SUBST([STARPU_FXT_EVENT_DEFINES])
+
 ###############################################################################
 #                                                                             #
 #                  Miscellaneous options for StarPU                           #
@@ -3056,6 +3118,14 @@ if test x$blas_lib = xmaybe; then
     fi
 fi
 
+if test x$blas_lib = xsystem; then
+    AC_CHECK_HEADER([cblas.h], [have_cblas_h=yes], [have_cblas_h=no])
+fi
+AM_CONDITIONAL(STARPU_HAVE_CBLAS_H, test x$have_cblas_h = xyes)
+if test x$have_cblas_h = xyes; then
+    AC_DEFINE(STARPU_HAVE_CBLAS_H, [1], [The blas library has blas.h])
+fi
+
 AM_CONDITIONAL(ATLAS_BLAS_LIB, test x$blas_lib = xatlas)
 AM_CONDITIONAL(GOTO_BLAS_LIB, test x$blas_lib = xgoto)
 AM_CONDITIONAL(MKL_BLAS_LIB, test x$blas_lib = xmkl)
@@ -3072,9 +3142,15 @@ AC_SUBST(BLAS_LIB,$blas_lib)
 #			 Multiple linear regression			      #
 #                                                                             #
 ###############################################################################
+if test x$enable_simgrid = xyes ; then
+	# There is no need for building mlr models in simgrid mode
+	default_enable_mlr=no
+else
+	default_enable_mlr=yes
+fi
 AC_ARG_ENABLE(mlr, [AS_HELP_STRING([--disable-mlr],
 			[Disable multiple linear regression models])],
-			enable_mlr=$enableval, enable_mlr=yes)
+			enable_mlr=$enableval, enable_mlr=$default_enable_mlr)
 
 AC_MSG_CHECKING(whether multiple linear regression models are disabled)
 if test x$enable_mlr = xyes -a "$starpu_windows" != "yes" ; then
@@ -3082,6 +3158,9 @@ if test x$enable_mlr = xyes -a "$starpu_windows" != "yes" ; then
 	install_min_dgels=no
 	support_mlr=yes
    	STARPU_SEARCH_LIBS(LAPACK,[dgels_],[lapack],use_system_lapack=yes,,)
+	if test x$blas_lib = xnone ; then
+	   use_system_lapack=no
+	fi
 	if test x$use_system_lapack = xyes; then
 	   	AC_DEFINE(STARPU_MLR_MODEL, [1], [use reflapack library])
 		LDFLAGS="-llapack $LDFLAGS"
@@ -3205,7 +3284,7 @@ AS_IF([test "$use_hwloc" != "no"],
 AM_CONDITIONAL(STARPU_HAVE_HWLOC, test "x$have_valid_hwloc" = "xyes")
 # in case hwloc was explicitely required, but is not available, this is an error
 AS_IF([test "$use_hwloc" = "yes" -a "$have_valid_hwloc" = "no"],
-      [AC_MSG_ERROR([cannot find hwloc])]
+      [AC_MSG_ERROR([cannot find hwloc or pkg-config])]
      )
 # in case hwloc is not available but was not explicitely disabled, this is an error
 AS_IF([test "$have_valid_hwloc" = "no" -a "$use_hwloc" != "no"],
@@ -3498,7 +3577,7 @@ AM_CONDITIONAL(AVAILABLE_DOC, [test x$available_doc != xno])
 ###############################################################################
 
 # these are the flags needed for linking libstarpu (and thus also for static linking)
-LIBSTARPU_LDFLAGS="$STARPU_OPENCL_LDFLAGS $STARPU_CUDA_LDFLAGS $HWLOC_LIBS $FXT_LIBS $STARPU_COI_LDFLAGS $STARPU_SCIF_LDFLAGS $STARPU_RCCE_LDFLAGS $STARPU_LEVELDB_LDFLAGS $STARPU_GLPK_LDFLAGS $STARPU_LEVELDB_LDFLAGS $SIMGRID_LIBS $STARPU_BLAS_LDFLAGS $STARPU_OMP_LDFLAGS $DGELS_LIBS"
+LIBSTARPU_LDFLAGS="$STARPU_OPENCL_LDFLAGS $STARPU_CUDA_LDFLAGS $HWLOC_LIBS $FXT_LIBS $PAPI_LIBS $STARPU_COI_LDFLAGS $STARPU_SCIF_LDFLAGS $STARPU_RCCE_LDFLAGS $STARPU_LEVELDB_LDFLAGS $STARPU_GLPK_LDFLAGS $STARPU_LEVELDB_LDFLAGS $SIMGRID_LIBS $STARPU_BLAS_LDFLAGS $STARPU_OMP_LDFLAGS $DGELS_LIBS"
 AC_SUBST([LIBSTARPU_LDFLAGS])
 
 LIBSTARPU_LINK=libstarpu-$STARPU_EFFECTIVE_VERSION.la
@@ -3522,6 +3601,7 @@ AC_CONFIG_COMMANDS([executable-scripts], [
   chmod +x tools/starpu_codelet_profile
   chmod +x tools/starpu_codelet_histo_profile
   chmod +x tools/starpu_mpi_comm_matrix.py
+  chmod +x tools/starpu_fxt_number_events_to_names.py
   chmod +x tools/starpu_workers_activity
   chmod +x tools/starpu_paje_draw_histogram
   chmod +x tools/starpu_paje_state_stats
@@ -3532,6 +3612,9 @@ AC_CONFIG_COMMANDS([executable-scripts], [
   chmod +x doc/doxygen/doxygen_filter.sh
   chmod +x doc/doxygen_dev/doxygen_filter.sh
   mkdir -p tests/microbenchs
+  test -e tests/microbenchs/tasks_data_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_data_overhead.sh tests/microbenchs/
+  test -e tests/microbenchs/sync_tasks_data_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/sync_tasks_data_overhead.sh tests/microbenchs/
+  test -e tests/microbenchs/async_tasks_data_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/async_tasks_data_overhead.sh tests/microbenchs/
   test -e tests/microbenchs/tasks_size_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead.sh tests/microbenchs/
   test -e tests/microbenchs/tasks_size_overhead_sched.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead_sched.sh tests/microbenchs/
   test -e tests/microbenchs/tasks_size_overhead_scheds.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead_scheds.sh tests/microbenchs/
@@ -3542,6 +3625,8 @@ AC_CONFIG_COMMANDS([executable-scripts], [
   test -e tests/microbenchs/parallel_independent_heterogeneous_tasks.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/parallel_independent_heterogeneous_tasks.sh tests/microbenchs/
   test -e tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh tests/microbenchs/
   test -e tests/microbenchs/parallel_independent_homogeneous_tasks.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/parallel_independent_homogeneous_tasks.sh tests/microbenchs/
+  test -e tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh tests/microbenchs/
+  test -e tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh tests/microbenchs/
   mkdir -p tests/datawizard
   test -e tests/datawizard/locality.sh || ln -sf $ac_abs_top_srcdir/tests/datawizard/locality.sh tests/datawizard/
   mkdir -p tests/overlap
@@ -3555,6 +3640,8 @@ AC_CONFIG_COMMANDS([executable-scripts], [
   test -e examples/lu/lu.sh || ln -sf $ac_abs_top_srcdir/examples/lu/lu.sh examples/lu/
   mkdir -p examples/cholesky
   test -e examples/cholesky/cholesky.sh || ln -sf $ac_abs_top_srcdir/examples/cholesky/cholesky.sh examples/cholesky/
+  mkdir -p examples/mult
+  test -e examples/mult/sgemm.sh || ln -sf $ac_abs_top_srcdir/examples/mult/sgemm.sh examples/mult/
   test -e tools/starpu_paje_draw_histogram.R || ln -sf $ac_abs_top_srcdir/tools/starpu_paje_draw_histogram.R tools/starpu_paje_draw_histogram.R
   test -e tools/starpu_paje_state_stats.R || ln -sf $ac_abs_top_srcdir/tools/starpu_paje_state_stats.R tools/starpu_paje_state_stats.R
   test -e tools/starpu_trace_state_stats.py || ln -sf $ac_abs_top_srcdir/tools/starpu_trace_state_stats.py tools/starpu_trace_state_stats.py
@@ -3588,6 +3675,7 @@ AC_OUTPUT([
 	tools/starpu_codelet_profile
 	tools/starpu_codelet_histo_profile
 	tools/starpu_mpi_comm_matrix.py
+	tools/starpu_fxt_number_events_to_names.py
 	tools/starpu_workers_activity
 	tools/starpu_paje_draw_histogram
 	tools/starpu_paje_state_stats

+ 2 - 4
contrib/ci.inria.fr/Jenkinsfile-basic

@@ -114,8 +114,7 @@ pipeline
 					emailext(body: '${DEFAULT_CONTENT}',
 						 subject: '${DEFAULT_SUBJECT}',
 						 replyTo: '$DEFAULT_REPLYTO',
-						 to: '$DEFAULT_RECIPIENTS',
-						 recipientProviders: [[$class: 'CulpritsRecipientProvider'],[$class: 'RequesterRecipientProvider']])
+						 to: '$DEFAULT_RECIPIENTS')
 				}
 			}
 		}
@@ -126,8 +125,7 @@ pipeline
 			emailext(body: '${DEFAULT_CONTENT}',
 				 subject: '${DEFAULT_SUBJECT}',
 				 replyTo: '$DEFAULT_REPLYTO',
-				 to: '$DEFAULT_RECIPIENTS',
-				 recipientProviders: [[$class: 'CulpritsRecipientProvider'],[$class: 'RequesterRecipientProvider']])
+				 to: '$DEFAULT_RECIPIENTS')
 		}
 	}
 }

+ 2 - 4
contrib/ci.inria.fr/Jenkinsfile-bsd

@@ -114,8 +114,7 @@ pipeline
 					emailext(body: '${DEFAULT_CONTENT}',
 						 subject: '${DEFAULT_SUBJECT}',
 						 replyTo: '$DEFAULT_REPLYTO',
-						 to: '$DEFAULT_RECIPIENTS',
-						 recipientProviders: [[$class: 'CulpritsRecipientProvider'],[$class: 'RequesterRecipientProvider']])
+						 to: '$DEFAULT_RECIPIENTS')
 				}
 			}
 		}
@@ -126,8 +125,7 @@ pipeline
 			emailext(body: '${DEFAULT_CONTENT}',
 				 subject: '${DEFAULT_SUBJECT}',
 				 replyTo: '$DEFAULT_REPLYTO',
-				 to: '$DEFAULT_RECIPIENTS',
-				 recipientProviders: [[$class: 'CulpritsRecipientProvider'],[$class: 'RequesterRecipientProvider']])
+				 to: '$DEFAULT_RECIPIENTS')
 		}
 	}
 }

+ 3 - 5
contrib/ci.inria.fr/Jenkinsfile-windows

@@ -1,7 +1,7 @@
 #!groovy
 // StarPU --- Runtime system for heterogeneous multicore architectures.
 //
-// Copyright (C) 2018                                CNRS
+// Copyright (C) 2018, 2019                          CNRS
 //
 // StarPU is free software; you can redistribute it and/or modify
 // it under the terms of the GNU Lesser General Public License as published by
@@ -118,8 +118,7 @@ pipeline
 					emailext(body: '${DEFAULT_CONTENT}',
 						 subject: '${DEFAULT_SUBJECT}',
 						 replyTo: '$DEFAULT_REPLYTO',
-						 to: '$DEFAULT_RECIPIENTS',
-						 recipientProviders: [[$class: 'CulpritsRecipientProvider'],[$class: 'RequesterRecipientProvider']])
+						 to: '$DEFAULT_RECIPIENTS')
 				}
 			}
 		}
@@ -130,8 +129,7 @@ pipeline
 			emailext(body: '${DEFAULT_CONTENT}',
 				 subject: '${DEFAULT_SUBJECT}',
 				 replyTo: '$DEFAULT_REPLYTO',
-				 to: '$DEFAULT_RECIPIENTS',
-				 recipientProviders: [[$class: 'CulpritsRecipientProvider'],[$class: 'RequesterRecipientProvider']])
+				 to: '$DEFAULT_RECIPIENTS')
 		}
 	}
 }

+ 1 - 1
contrib/ci.inria.fr/job-0-tarball.sh

@@ -1,7 +1,7 @@
 #!/bin/sh
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2018                                     CNRS
+# Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by

+ 1 - 1
contrib/ci.inria.fr/job-1-build-windows.sh

@@ -1,7 +1,7 @@
 #!/bin/sh
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2013,2015,2017-2018                      CNRS
+# Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by

+ 1 - 2
contrib/ci.inria.fr/job-1-check-windows.bat

@@ -1,7 +1,6 @@
-REM
 REM StarPU --- Runtime system for heterogeneous multicore architectures.
 REM
-REM Copyright (C) 2013-2018                                CNRS
+REM Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 REM
 REM StarPU is free software; you can redistribute it and/or modify
 REM it under the terms of the GNU Lesser General Public License as published by

+ 2 - 1
contrib/ci.inria.fr/job-1-check.sh

@@ -1,7 +1,7 @@
 #!/bin/sh
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2013-2018                                CNRS
+# Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -73,6 +73,7 @@ fi
 ../configure $CONFIGURE_OPTIONS $CONFIGURE_CHECK  $STARPU_CONFIGURE_OPTIONS
 
 export STARPU_TIMEOUT_ENV=1800
+export MPIEXEC_TIMEOUT=1800
 make
 #make check
 (make -k check || true) 2>&1 | tee  ../check_$$

+ 4 - 9
doc/Makefile.am

@@ -1,8 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2011,2012,2015                           Inria
-# Copyright (C) 2009,2011,2012,2015                      Université de Bordeaux
-# Copyright (C) 2010-2019                                CNRS
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -15,6 +13,9 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
+
+include $(top_srcdir)/starpu-notests.mk
+
 SUBDIRS = doxygen
 SUBDIRS += doxygen_dev
 DIST_SUBDIRS = doxygen
@@ -32,9 +33,3 @@ EXTRA_DIST =    tutorial/hello_world.c \
 
 txtdir = ${docdir}/tutorial
 txt_DATA = $(EXTRA_DIST)
-
-showcheck:
-	-cat /dev/null
-
-showsuite:
-	-cat /dev/null

+ 10 - 8
doc/doxygen/Makefile.am

@@ -1,8 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2013-2018                                Inria
-# Copyright (C) 2010-2019                                CNRS
-# Copyright (C) 2009,2011,2013,2014,2017,2019            Université de Bordeaux
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -155,7 +153,7 @@ chapters/version.sty: $(chapters)
                 if test -f $(top_srcdir)/doc/doxygen/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/doxygen/$$f ; fi \
         done | sort -r | head -1 > timestamp_sty
 	@if test -s timestamp_sty ; then \
-		LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%d %B %Y" > timestamp_sty_updated ;\
+		LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\
 		LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\
 	fi
 	@if test -s timestamp_sty_updated ; then \
@@ -174,7 +172,7 @@ chapters/version.html: $(chapters)
                 if test -f $(top_srcdir)/doc/doxygen/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/doxygen/$$f ; fi \
         done | sort -r | head -1 > timestamp_html
 	@if test -s timestamp_html ; then \
-		LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%d %B %Y" > timestamp_html_updated ;\
+		LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\
 		LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\
 	fi
 	@echo "This manual documents the usage of StarPU version $(VERSION)." > $(top_srcdir)/doc/doxygen/chapters/version.html
@@ -198,6 +196,7 @@ dox_inputs = $(DOX_CONFIG) 				\
 	$(top_srcdir)/include/starpu_bound.h		\
 	$(top_srcdir)/include/starpu_clusters.h		\
 	$(top_srcdir)/include/starpu_cublas.h		\
+	$(top_srcdir)/include/starpu_cublas_v2.h	\
 	$(top_srcdir)/include/starpu_cusparse.h		\
 	$(top_srcdir)/include/starpu_cuda.h		\
 	$(top_srcdir)/include/starpu_data_filters.h	\
@@ -213,6 +212,8 @@ dox_inputs = $(DOX_CONFIG) 				\
 	$(top_srcdir)/include/starpu_mod.f90		\
 	$(top_srcdir)/include/starpu_opencl.h		\
 	$(top_srcdir)/include/starpu_openmp.h		\
+	$(top_srcdir)/include/starpu_perf_monitoring.h	\
+	$(top_srcdir)/include/starpu_perf_steering.h	\
 	$(top_srcdir)/include/starpu_perfmodel.h	\
 	$(top_srcdir)/include/starpu_profiling.h	\
 	$(top_srcdir)/include/starpu_rand.h		\
@@ -270,13 +271,14 @@ $(DOX_PDF): $(DOX_TAG) refman.tex
 	$(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ;\
 	$(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' index.tex ;\
 	$(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' index.tex ;\
-	$(PDFLATEX) refman.tex ;\
+	max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\
+	! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\
 	$(MAKEINDEX) refman.idx ;\
-	$(PDFLATEX) refman.tex ;\
+	max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\
 	done=0; repeat=5 ;\
 	while test $$done = 0 -a $$repeat -gt 0; do \
            if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \
-	       $(PDFLATEX) refman.tex; \
+	       max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \
 	       repeat=`expr $$repeat - 1`; \
 	   else \
 	       done=1; \

+ 6 - 3
doc/doxygen/chapters/000_introduction.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2011-2013,2016                           Inria
- * Copyright (C) 2009-2011,2014,2016,2019                 Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -78,6 +76,11 @@ policies in a portable fashion (\ref HowToDefineANewSchedulingPolicy).
 
 The remainder of this section describes the main concepts used in StarPU.
 
+A video is available on the StarPU website
+http://starpu.gforge.inria.fr/ that presents these concepts in 26 minutes.
+
+Some tutorials are also available on http://starpu.gforge.inria.fr/tutorials/
+
 // explain the notion of codelet and task (i.e. g(A, B)
 
 \subsection CodeletAndTasks Codelet and Tasks

+ 18 - 17
doc/doxygen/chapters/101_building.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2011,2012,2018                           Inria
- * Copyright (C) 2009-2011,2013-2016,2019                 Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -55,7 +53,7 @@ location.
 
 If <c>libhwloc</c> is not available on your system, the option
 \ref without-hwloc "--without-hwloc" should be explicitely given when calling the
-<c>configure</c> script.
+script <c>configure</c>.
 
 
 \subsection GettingSources Getting Sources
@@ -88,8 +86,8 @@ $ git clone https://scm.gforge.inria.fr/anonscm/git/starpu/starpu.git
 
 Running <c>autogen.sh</c> is not necessary when using the tarball
 releases of StarPU.  However when using the source code from the git
-repository, you first need to generate the configure scripts and the
-Makefiles. This requires the availability of <c>autoconf</c> and
+repository, you first need to generate the script <c>configure</c> and the
+different Makefiles. This requires the availability of <c>autoconf</c> and
 <c>automake</c> >= 2.60.
 
 \verbatim
@@ -113,7 +111,7 @@ is advised to put them all in a separate directory. It is then
 easier to cleanup, and this allows to compile several configurations
 out of the same source tree. To do so, simply enter the directory
 where you want the compilation to produce its files, and invoke the
-<c>configure</c> script located in the StarPU source directory.
+script <c>configure</c> located in the StarPU source directory.
 
 \verbatim
 $ mkdir build
@@ -139,7 +137,7 @@ $ make
 Once everything is built, you may want to test the result. An
 extensive set of regression tests is provided with StarPU. Running the
 tests is done by calling <c>make check</c>. These tests are run every night
-and the result from the main profile is publicly available (http://starpu.gforge.inria.fr/testing/).
+and the result from the main profile is publicly available (http://starpu.gforge.inria.fr/testing/master/).
 
 \verbatim
 $ make check
@@ -246,7 +244,7 @@ int main(void)
     {
         return 1;
     }
-    printf("%d CPU coress\n", starpu_worker_get_count_by_type(STARPU_CPU_WORKER));
+    printf("%d CPU cores\n", starpu_worker_get_count_by_type(STARPU_CPU_WORKER));
     printf("%d CUDA GPUs\n", starpu_worker_get_count_by_type(STARPU_CUDA_WORKER));
     printf("%d OpenCL GPUs\n", starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER));
     starpu_shutdown();
@@ -273,7 +271,7 @@ int main(void)
     {
         return 1;
     }
-    printf("%d CPU coress\n", starpu_worker_get_count_by_type(STARPU_CPU_WORKER));
+    printf("%d CPU cores\n", starpu_worker_get_count_by_type(STARPU_CPU_WORKER));
     printf("%d CUDA GPUs\n", starpu_worker_get_count_by_type(STARPU_CUDA_WORKER));
     printf("%d OpenCL GPUs\n", starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER));
     starpu_shutdown();
@@ -428,12 +426,13 @@ While StarPU tasks are executing, the application is not supposed to do
 computations in the threads it starts itself, tasks should be used instead.
 
 If the application needs to reserve some cores for its own computations, it
-can do so with the starpu_conf::reserve_ncpus field, get the core IDs with
+can do so with the field starpu_conf::reserve_ncpus, get the core IDs with
 starpu_get_next_bindid(), and bind to them with starpu_bind_thread_on().
 
-Another option is for the application to put the whole StarPU on pause with
-starpu_pause() before performing its own computations, and let StarPU resume
-executing tasks with starpu_resume().
+Another option is for the application to pause StarPU by calling
+starpu_pause(), then to perform its own computations, and then to
+resume StarPU by calling starpu_resume() so that StarPU can execute
+tasks.
 
 \subsection EnablingOpenCL Enabling OpenCL
 
@@ -499,12 +498,12 @@ multiplication using BLAS and cuBLAS. They output the obtained GFlops.
 
 <c>lu_*</c> perform an LU factorization. They use different dependency primitives.
 
-\subsection SimulatedBenchmarks Simulated benchmarks
+\subsection SimulatedBenchmarks Simulated Benchmarks
 
 It can also be convenient to try simulated benchmarks, if you want to give a try
 at CPU-GPU scheduling without actually having a GPU at hand. This can be done by
-using the simgrid version of StarPU: first install the simgrid simulator from
-http://simgrid.gforge.inria.fr/ (we tested with simgrid from 3.11 to 3.16, and
+using the SimGrid version of StarPU: first install the SimGrid simulator from
+http://simgrid.gforge.inria.fr/ (we tested with SimGrid from 3.11 to 3.16, and
 3.18 to 3.22, other versions may have compatibility issues, 3.17 notably does
 not build at all. MPI simulation does not work with version 3.22),
 then configure StarPU with \ref enable-simgrid
@@ -527,4 +526,6 @@ Performance models are available for <c>cholesky_*</c>, <c>lu_*</c>, <c>*gemm</c
 320, 640, or 960 (plus 1440 for sirocco), and for <c>stencil</c> with block size 128x128x128, 192x192x192, and
 256x256x256.
 
+Read the chapter \ref SimGridSupport for more information on the SimGrid support.
+
 */

+ 1 - 3
doc/doxygen/chapters/110_basic_examples.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013,2015-2019                      CNRS
- * Copyright (C) 2011-2013                                Inria
- * Copyright (C) 2009-2011,2014,2015,2019                 Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
doc/doxygen/chapters/201_advanced_examples.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2011-2013                                Inria
- * Copyright (C) 2009-2011,2013,2014                      Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 76 - 69
doc/doxygen/chapters/210_check_list_performance.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011-2013,2015,2017                      Inria
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2013-2019                      Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,8 +26,9 @@ will show roughly where time is spent, and focus correspondingly.
 
 \section CheckTaskSize Check Task Size
 
-Make sure that your tasks are not too small, because the StarPU runtime overhead
-is not completely zero. You can run the tasks_size_overhead.sh script to get an
+Make sure that your tasks are not too small, as the StarPU runtime overhead
+is not completely zero. As explained in \ref TaskSizeOverhead, you can
+run the script \c tasks_size_overhead.sh to get an
 idea of the scalability of tasks depending on their duration (in µs), on your
 own system.
 
@@ -40,19 +39,18 @@ much bigger than this.
 of cores, so it's better to try to get 10ms-ish tasks.
 
 Tasks durations can easily be observed when performance models are defined (see
-\ref PerformanceModelExample) by using the <c>starpu_perfmodel_plot</c> or
-<c>starpu_perfmodel_display</c> tool (see \ref PerformanceOfCodelets)
+\ref PerformanceModelExample) by using the tools <c>starpu_perfmodel_plot</c> or
+<c>starpu_perfmodel_display</c> (see \ref PerformanceOfCodelets)
 
 When using parallel tasks, the problem is even worse since StarPU has to
-synchronize the execution of tasks.
+synchronize the tasks execution.
 
 \section ConfigurationImprovePerformance Configuration Which May Improve Performance
 
-The \ref enable-fast "--enable-fast" \c configure option disables all
+The \c configure option \ref enable-fast "--enable-fast" disables all
 assertions. This makes StarPU more performant for really small tasks by
 disabling all sanity checks. Only use this for measurements and production, not for development, since this will drop all basic checks.
 
-
 \section DataRelatedFeaturesToImprovePerformance Data Related Features Which May Improve Performance
 
 link to \ref DataManagement
@@ -81,14 +79,14 @@ link to \ref StaticScheduling
 
 For proper overlapping of asynchronous GPU data transfers, data has to be pinned
 by CUDA. Data allocated with starpu_malloc() is always properly pinned. If the
-application is registering to StarPU some data which has not been allocated with
-starpu_malloc(), it should use starpu_memory_pin() to pin it.
+application registers to StarPU some data which has not been allocated with
+starpu_malloc(), starpu_memory_pin() should be called to pin the data memory.
 
 Due to CUDA limitations, StarPU will have a hard time overlapping its own
 communications and the codelet computations if the application does not use a
 dedicated CUDA stream for its computations instead of the default stream,
-which synchronizes all operations of the GPU. StarPU provides one by the use
-of starpu_cuda_get_local_stream() which can be used by all CUDA codelet
+which synchronizes all operations of the GPU. The function
+starpu_cuda_get_local_stream() returns a stream which can be used by all CUDA codelet
 operations to avoid this issue. For instance:
 
 \code{.c}
@@ -102,14 +100,14 @@ to use a version that takes the a stream parameter.
 Unfortunately, some CUDA libraries do not have stream variants of
 kernels. This will seriously lower the potential for overlapping.
 If some CUDA calls are made without specifying this local stream,
-synchronization needs to be explicited with cudaThreadSynchronize() around these
+synchronization needs to be explicited with cudaDeviceSynchronize() around these
 calls, to make sure that they get properly synchronized with the calls using
 the local stream. Notably, \c cudaMemcpy() and \c cudaMemset() are actually
-asynchronous and need such explicit synchronization! Use cudaMemcpyAsync() and
-cudaMemsetAsync() instead.
+asynchronous and need such explicit synchronization! Use \c cudaMemcpyAsync() and
+\c cudaMemsetAsync() instead.
 
-Calling starpu_cublas_init() makes StarPU already do appropriate calls for the
-CUBLAS library. Some libraries like Magma may however change the current stream of CUBLAS v1,
+Calling starpu_cublas_init() will ensure StarPU to properly call the
+CUBLAS library functions. Some libraries like Magma may however change the current stream of CUBLAS v1,
 one then has to call <c>cublasSetKernelStream(</c>starpu_cuda_get_local_stream()<c>)</c> at
 the beginning of the codelet to make sure that CUBLAS is really using the proper
 stream. When using CUBLAS v2, starpu_cublas_get_local_handle() can be called to queue CUBLAS
@@ -147,14 +145,14 @@ triggered by the completion of the kernel.
 Using the flag ::STARPU_CUDA_ASYNC also permits to enable concurrent kernel
 execution, on cards which support it (Kepler and later, notably). This is
 enabled by setting the environment variable \ref STARPU_NWORKER_PER_CUDA to the
-number of kernels to execute concurrently.  This is useful when kernels are
+number of kernels to be executed concurrently.  This is useful when kernels are
 small and do not feed the whole GPU with threads to run.
 
-Concerning memory allocation, you should really not use \c cudaMalloc/ \c cudaFree
-within the kernel, since \c cudaFree introduces a awfully lot of synchronizations
+Concerning memory allocation, you should really not use \c cudaMalloc()/ \c cudaFree()
+within the kernel, since \c cudaFree() introduces a awfully lot of synchronizations
 within CUDA itself. You should instead add a parameter to the codelet with the
 ::STARPU_SCRATCH mode access. You can then pass to the task a handle registered
-with the desired size but with the \c NULL pointer, that handle can even be the
+with the desired size but with the \c NULL pointer, the handle can even be
 shared between tasks, StarPU will allocate per-task data on the fly before task
 execution, and reuse the allocated data between tasks.
 
@@ -177,8 +175,8 @@ kernel startup and completion.
 
 It may happen that for some reason, StarPU does not make progress for a long
 period of time.  Reason are sometimes due to contention inside StarPU, but
-sometimes this is due to external reasons, such as stuck MPI driver, or CUDA
-driver, etc.
+sometimes this is due to external reasons, such as a stuck MPI or CUDA
+driver.
 
 <c>export STARPU_WATCHDOG_TIMEOUT=10000</c> (\ref STARPU_WATCHDOG_TIMEOUT)
 
@@ -187,30 +185,34 @@ any task for 10ms, but lets the application continue normally. In addition to th
 
 <c>export STARPU_WATCHDOG_CRASH=1</c> (\ref STARPU_WATCHDOG_CRASH)
 
-raises <c>SIGABRT</c> in this condition, thus allowing to catch the situation in gdb.
+raises <c>SIGABRT</c> in this condition, thus allowing to catch the
+situation in \c gdb.
+
 It can also be useful to type <c>handle SIGABRT nopass</c> in <c>gdb</c> to be able to let
 the process continue, after inspecting the state of the process.
 
 \section HowToLimitMemoryPerNode How to Limit Memory Used By StarPU And Cache Buffer Allocations
 
 By default, StarPU makes sure to use at most 90% of the memory of GPU devices,
-moving data in and out of the device as appropriate and with prefetch and
-writeback optimizations. Concerning the main memory, by default it will not
-limit its consumption, since by default it has nowhere to push the data to when
-memory gets tight. This also means that by default StarPU will not cache buffer
-allocations in main memory, since it does not know how much of the system memory
-it can afford.
-
-In the case of GPUs, the \ref STARPU_LIMIT_CUDA_MEM, \ref STARPU_LIMIT_CUDA_devid_MEM,
-\ref STARPU_LIMIT_OPENCL_MEM, and \ref STARPU_LIMIT_OPENCL_devid_MEM environment variables
-can be used to control how
-much (in MiB) of the GPU device memory should be used at most by StarPU (their
-default values are 90% of the available memory).
-
-In the case of the main memory, the \ref STARPU_LIMIT_CPU_MEM environment
-variable can be used to specify how much (in MiB) of the main memory should be
-used at most by StarPU for buffer allocations. This way, StarPU will be able to
-cache buffer allocations (which can be a real benefit if a lot of bufferes are
+moving data in and out of the device as appropriate, as well as using
+prefetch and writeback optimizations.
+
+The environment variables \ref STARPU_LIMIT_CUDA_MEM, \ref STARPU_LIMIT_CUDA_devid_MEM,
+\ref STARPU_LIMIT_OPENCL_MEM, and \ref STARPU_LIMIT_OPENCL_devid_MEM
+can be used to control how much (in MiB) of the GPU device memory
+should be used at most by StarPU (the default value is to use 90% of the
+available memory).
+
+By default, the usage of the main memory is not limited, as the
+default mechanims do not provide means to evict main memory when it
+gets too tight. This also means that by default StarPU will not cache buffer
+allocations in main memory, since it does not know how much of the
+system memory it can afford.
+
+The environment variable \ref STARPU_LIMIT_CPU_MEM can be used to
+specify how much (in MiB) of the main memory should be used at most by
+StarPU for buffer allocations. This way, StarPU will be able to
+cache buffer allocations (which can be a real benefit if a lot of buffers are
 involved, or if allocation fragmentation can become a problem), and when using
 \ref OutOfCore, StarPU will know when it should evict data out to the disk.
 
@@ -233,8 +235,8 @@ caches or data out to the disk, starpu_memory_allocate() can be used to
 specify an amount of memory to be accounted for. starpu_memory_deallocate()
 can be used to account freed memory back. Those can for instance be used by data
 interfaces with dynamic data buffers: instead of using starpu_malloc_on_node(),
-they would dynamically allocate data with malloc/realloc, and notify starpu of
-the delta thanks to starpu_memory_allocate() and starpu_memory_deallocate() calls.
+they would dynamically allocate data with \c malloc()/\c realloc(), and notify StarPU of
+the delta by calling starpu_memory_allocate() and starpu_memory_deallocate().
 
 starpu_memory_get_total() and starpu_memory_get_available()
 can be used to get an estimation of how much memory is available.
@@ -251,7 +253,7 @@ to reserve this amount immediately.
 
 It is possible to reduce the memory footprint of the task and data internal
 structures of StarPU by describing the shape of your machine and/or your
-application at the \c configure step.
+application when calling \c configure.
 
 To reduce the memory footprint of the data internal structures of StarPU, one
 can set the
@@ -271,28 +273,27 @@ execution. For example, in the Cholesky factorization (dense linear algebra
 application), the GEMM task uses up to 3 buffers, so it is possible to set the
 maximum number of task buffers to 3 to run a Cholesky factorization on StarPU.
 
-The size of the various structures of StarPU can be printed by 
+The size of the various structures of StarPU can be printed by
 <c>tests/microbenchs/display_structures_size</c>.
 
-It is also often useless to submit *all* the tasks at the same time. One can
-make the starpu_task_submit() function block when a reasonable given number of
-tasks have been submitted, by setting the \ref STARPU_LIMIT_MIN_SUBMITTED_TASKS and
-\ref STARPU_LIMIT_MAX_SUBMITTED_TASKS environment variables, for instance:
+It is also often useless to submit *all* the tasks at the same time.
+Task submission can be blocked when a reasonable given number of
+tasks have been submitted, by setting the environment variables \ref
+STARPU_LIMIT_MIN_SUBMITTED_TASKS and \ref STARPU_LIMIT_MAX_SUBMITTED_TASKS.
 
 <c>
 export STARPU_LIMIT_MAX_SUBMITTED_TASKS=10000
-
 export STARPU_LIMIT_MIN_SUBMITTED_TASKS=9000
 </c>
 
-To make StarPU block submission when 10000 tasks are submitted, and unblock
+will make StarPU block submission when 10000 tasks are submitted, and unblock
 submission when only 9000 tasks are still submitted, i.e. 1000 tasks have
 completed among the 10000 which were submitted when submission was blocked. Of
 course this may reduce parallelism if the threshold is set too low. The precise
 balance depends on the application task graph.
 
 An idea of how much memory is used for tasks and data handles can be obtained by
-setting the \ref STARPU_MAX_MEMORY_USE environment variable to <c>1</c>.
+setting the environment variable \ref STARPU_MAX_MEMORY_USE to <c>1</c>.
 
 \section HowtoReuseMemory How To Reuse Memory
 
@@ -303,7 +304,7 @@ tasks. For this system to work with MPI tasks, you need to submit tasks progress
 of as soon as possible, because in the case of MPI receives, the allocation cache check for reusing data
 buffers will be done at submission time, not at execution time.
 
-You have two options to control the task submission flow. The first one is by
+There is two options to control the task submission flow. The first one is by
 controlling the number of submitted tasks during the whole execution. This can
 be done whether by setting the environment variables
 \ref STARPU_LIMIT_MAX_SUBMITTED_TASKS and \ref STARPU_LIMIT_MIN_SUBMITTED_TASKS to
@@ -348,11 +349,12 @@ To force continuing calibration,
 use <c>export STARPU_CALIBRATE=1</c> (\ref STARPU_CALIBRATE). This may be necessary if your application
 has not-so-stable performance. StarPU will force calibration (and thus ignore
 the current result) until 10 (<c>_STARPU_CALIBRATION_MINIMUM</c>) measurements have been
-made on each architecture, to avoid badly scheduling tasks just because the
+made on each architecture, to avoid bad scheduling decisions just because the
 first measurements were not so good. Details on the current performance model status
-can be obtained from the tool <c>starpu_perfmodel_display</c>: the <c>-l</c>
-option lists the available performance models, and the <c>-s</c> option permits
-to choose the performance model to be displayed. The result looks like:
+can be obtained with the tool <c>starpu_perfmodel_display</c>: the
+option <c>-l</c> lists the available performance models, and the
+option <c>-s</c> allows to choose the performance model to be
+displayed. The result looks like:
 
 \verbatim
 $ starpu_perfmodel_display -s starpu_slu_lu_model_11
@@ -364,7 +366,7 @@ e5a07e31  4096     0.000000e+00  1.717457e+01  5.190038e+00  14
 ...
 \endverbatim
 
-Which shows that for the LU 11 kernel with a 1MiB matrix, the average
+which shows that for the LU 11 kernel with a 1MiB matrix, the average
 execution time on CPUs was about 25ms, with a 0.2ms standard deviation, over
 8 samples. It is a good idea to check this before doing actual performance
 measurements.
@@ -373,7 +375,7 @@ A graph can be drawn by using the tool <c>starpu_perfmodel_plot</c>:
 
 \verbatim
 $ starpu_perfmodel_plot -s starpu_slu_lu_model_11
-4096 16384 65536 262144 1048576 4194304 
+4096 16384 65536 262144 1048576 4194304
 $ gnuplot starpu_starpu_slu_lu_model_11.gp
 $ gv starpu_starpu_slu_lu_model_11.eps
 \endverbatim
@@ -436,10 +438,14 @@ and in Joules for the energy consumption models.
 A quick view of how many tasks each worker has executed can be obtained by setting
 <c>export STARPU_WORKER_STATS=1</c> (\ref STARPU_WORKER_STATS). This is a convenient way to check that
 execution did happen on accelerators, without penalizing performance with
-the profiling overhead.
+the profiling overhead. \ref STARPU_WORKER_STATS_FILE can be defined
+to specify a filename in which to display statistics, by default
+statistics are printed on the standard error stream.
 
 A quick view of how much data transfers have been issued can be obtained by setting
-<c>export STARPU_BUS_STATS=1</c> (\ref STARPU_BUS_STATS).
+<c>export STARPU_BUS_STATS=1</c> (\ref STARPU_BUS_STATS). \ref
+STARPU_BUS_STATS_FILE can be defined to specify a filename in which to
+display statistics, by default statistics are printed on the standard error stream.
 
 More detailed profiling information can be enabled by using <c>export STARPU_PROFILING=1</c> (\ref STARPU_PROFILING)
 or by
@@ -451,28 +457,29 @@ STARPU_BUS_STATS=1</c> and <c>export STARPU_WORKER_STATS=1</c> .
 \section OverheadProfiling Overhead Profiling
 
 \ref OfflinePerformanceTools can already provide an idea of to what extent and
-which part of StarPU bring overhead on the execution time. To get a more precise
-analysis of the parts of StarPU which bring most overhead, <c>gprof</c> can be used.
+which part of StarPU brings an overhead on the execution time. To get a more precise
+analysis of which parts of StarPU bring the most overhead, <c>gprof</c> can be used.
 
 First, recompile and reinstall StarPU with <c>gprof</c> support:
 
 \code
-./configure --enable-perf-debug --disable-shared --disable-build-tests --disable-build-examples
+../configure --enable-perf-debug --disable-shared --disable-build-tests --disable-build-examples
 \endcode
 
 Make sure not to leave a dynamic version of StarPU in the target path: remove
 any remaining <c>libstarpu-*.so</c>
 
 Then relink your application with the static StarPU library, make sure that
-running <c>ldd</c> on your application does not mention any libstarpu
+running <c>ldd</c> on your application does not mention any \c libstarpu
 (i.e. it's really statically-linked).
 
 \code
 gcc test.c -o test $(pkg-config --cflags starpu-1.3) $(pkg-config --libs starpu-1.3)
 \endcode
 
-Now you can run your application, and a <c>gmon.out</c> file should appear in the
-current directory, you can process it by running <c>gprof</c> on your application:
+Now you can run your application, this will create a file
+<c>gmon.out</c> in the current directory, it can be processed by
+running <c>gprof</c> on your application:
 
 \code
 gprof ./test

+ 16 - 14
doc/doxygen/chapters/301_tasks.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2011,2012,2018                           Inria
- * Copyright (C) 2009-2011,2014-2016,2018                 Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -31,6 +29,8 @@ checked if bad performance are observed. To get a grasp at the scalability
 possibility according to task size, one can run
 <c>tests/microbenchs/tasks_size_overhead.sh</c> which draws curves of the
 speedup of independent tasks of very small sizes.
+To determine what task size your application is actually using, one can use
+<c>starpu_fxt_data_trace</c>, see \ref DataTrace .
 
 The choice of scheduler also has impact over the overhead: for instance, the
  scheduler <c>dmda</c> takes time to make a decision, while <c>eager</c> does
@@ -40,7 +40,7 @@ impact that has on the target machine.
 \section TaskSubmission Task Submission
 
 To let StarPU make online optimizations, tasks should be submitted
-asynchronously as much as possible. Ideally, all the tasks should be
+asynchronously as much as possible. Ideally, all tasks should be
 submitted, and mere calls to starpu_task_wait_for_all() or
 starpu_data_unregister() be done to wait for
 termination. StarPU will then be able to rework the whole schedule, overlap
@@ -52,7 +52,7 @@ By default, StarPU will consider the tasks in the order they are submitted by
 the application. If the application programmer knows that some tasks should
 be performed in priority (for instance because their output is needed by many
 other tasks and may thus be a bottleneck if not executed early
-enough), the field starpu_task::priority should be set to transmit the
+enough), the field starpu_task::priority should be set to provide the
 priority information to StarPU.
 
 \section TaskDependencies Task Dependencies
@@ -165,14 +165,14 @@ starpu_task_insert(&dummy_big_cl,
 \endcode
 
 The whole code for this complex data interface is available in the
-directory <c>examples/basic_examples/dynamic_handles.c</c>.
+file <c>examples/basic_examples/dynamic_handles.c</c>.
 
 \section SettingVariableDataHandlesForATask Setting a Variable Number Of Data Handles For a Task
 
-Normally, the number of data handles given to a task is fixed in the
-starpu_codelet::nbuffers codelet field. This field can however be set to
-\ref STARPU_VARIABLE_NBUFFERS, in which case the starpu_task::nbuffers task field
-must be set, and the starpu_task::modes field (or starpu_task::dyn_modes field,
+Normally, the number of data handles given to a task is set with
+starpu_codelet::nbuffers. This field can however be set to
+\ref STARPU_VARIABLE_NBUFFERS, in which case starpu_task::nbuffers
+must be set, and starpu_task::modes (or starpu_task::dyn_modes,
 see \ref SettingManyDataHandlesForATask) should be used to specify the modes for
 the handles.
 
@@ -215,7 +215,7 @@ struct starpu_codelet cl =
 
 Schedulers which are multi-implementation aware (only <c>dmda</c> and
 <c>pheft</c> for now) will use the performance models of all the
-implementations it was given, and pick the one which seems to be the fastest.
+provided implementations, and pick the one which seems to be the fastest.
 
 \section EnablingImplementationAccordingToCapabilities Enabling Implementation According To Capabilities
 
@@ -333,7 +333,7 @@ struct starpu_codelet cl =
 };
 \endcode
 
-Note: the most generic variant should be provided first, as some schedulers are
+Note that the most generic variant should be provided first, as some schedulers are
 not able to try the different variants.
 
 \section InsertTaskUtility Insert Task Utility
@@ -341,7 +341,7 @@ not able to try the different variants.
 StarPU provides the wrapper function starpu_task_insert() to ease
 the creation and submission of tasks.
 
-Here the implementation of the codelet:
+Here the implementation of a codelet:
 
 \code{.c}
 void func_cpu(void *descr[], void *_args)
@@ -477,7 +477,7 @@ ret = starpu_task_get_task_succs(task, sizeof(tasks)/sizeof(*tasks), tasks);
 \section ParallelTasks Parallel Tasks
 
 StarPU can leverage existing parallel computation libraries by the means of
-parallel tasks. A parallel task is a task which gets worked on by a set of CPUs
+parallel tasks. A parallel task is a task which is run by a set of CPUs
 (called a parallel or combined worker) at the same time, by using an existing
 parallel CPU implementation of the computation to be achieved. This can also be
 useful to improve the load balance between slow CPUs and fast GPUs: since CPUs
@@ -564,6 +564,8 @@ worker sizes (making several measurements for each worker size) and
 thus be able to avoid choosing a large combined worker if the codelet
 does not actually scale so much.
 
+This is however for now only proof of concept, and has not really been optimized yet.
+
 \subsection CombinedWorkers Combined Workers
 
 By default, StarPU creates combined workers according to the architecture

+ 176 - 10
doc/doxygen/chapters/310_data_management.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014-2019                      Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,11 +20,16 @@ TODO: intro which mentions consistency among other things
 
 \section DataInterface Data Interface
 
-StarPU provides several data interfaces for programmers to describe the data layout of their application. There are predefined interfaces already available in StarPU. Users can define new data interfaces as explained in \ref DefiningANewDataInterface. All functions provided by StarPU are documented in \ref API_Data_Interfaces. You will find a short list below.
+StarPU provides several data interfaces for programmers to describe
+the data layout of their application. There are predefined interfaces
+already available in StarPU. Users can define new data interfaces as
+explained in \ref DefiningANewDataInterface. All functions provided by
+StarPU are documented in \ref API_Data_Interfaces. You will find a
+short list below.
 
 \subsection VariableDataInterface Variable Data Interface
 
-A variable is a given size byte element, typically a scalar. Here an
+A variable is a given-size byte element, typically a scalar. Here an
 example of how to register a variable data to StarPU by using
 starpu_variable_data_register().
 
@@ -49,6 +52,13 @@ starpu_data_handle_t vector_handle;
 starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
 \endcode
 
+Vectors can be partitioned into pieces by using
+starpu_vector_filter_block(). They can also be partitioned with some overlapping
+by using starpu_vector_filter_block_shadow(). By default StarPU
+uses the same size for each piece. If different sizes are desired,
+starpu_vector_filter_list() or starpu_vector_filter_list_long() can be used
+instead. To just divide in two pieces, starpu_vector_filter_divide_in_2() can be used.
+
 \subsection MatrixDataInterface Matrix Data Interface
 
 To register 2-D matrices with a potential padding, one can use the
@@ -62,9 +72,15 @@ matrix = (float*)malloc(width * height * sizeof(float));
 starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float));
 \endcode
 
+2D matrices can be partitioned into 2D matrices along the x dimension by
+using starpu_matrix_filter_block(), and along the y dimension by using
+starpu_matrix_filter_vertical_block(). They can also be partitioned
+with some overlapping by using starpu_matrix_filter_block_shadow() and
+starpu_matrix_filter_vertical_block_shadow().
+
 \subsection BlockDataInterface Block Data Interface
 
-To register 3-D blocks with potential paddings on Y and Z dimensions,
+To register 3-D matrices with potential paddings on Y and Z dimensions,
 one can use the block data interface. Here an example of how to
 register a block data to StarPU by using starpu_block_data_register().
 
@@ -75,6 +91,29 @@ block = (float*)malloc(nx*ny*nz*sizeof(float));
 starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
 \endcode
 
+3D matrices can be partitioned along the x dimension by
+using starpu_block_filter_block(), or along the y dimension
+by using starpu_block_filter_vertical_block, or along the
+z dimension by using starpu_block_filter_depth_block. They
+can also be partitioned with some overlapping by using
+starpu_block_filter_block_shadow(), starpu_block_filter_vertical_block_shadow(),
+or starpu_block_filter_depth_block_shadow().
+
+\subsection TensorDataInterface Tensor Data Interface
+
+To register 4-D matrices with potential paddings on Y, Z, and T dimensions,
+one can use the tensor data interface. Here an example of how to
+register a tensor data to StarPU by using starpu_tensor_data_register().
+
+\code{.c}
+float *block;
+starpu_data_handle_t block_handle;
+block = (float*)malloc(nx*ny*nz*nt*sizeof(float));
+starpu_tensor_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx*ny*nz, nx, ny, nz, nt, sizeof(float));
+\endcode
+
+Partitioning filters are not implemented yet.
+
 \subsection BCSRDataInterface BCSR Data Interface
 
 BCSR (Blocked Compressed Sparse Row Representation) sparse matrix data
@@ -142,10 +181,18 @@ starpu_bcsr_data_register(&bcsr_handle,
 StarPU provides an example on how to deal with such matrices in
 <c>examples/spmv</c>.
 
+BCSR data handles can be partitioned into its dense matrix blocks by using
+starpu_bcsr_filter_canonical_block(), or split into other BCSR data handles by
+using starpu_bcsr_filter_vertical_block() (but only split along the leading dimension is
+supported, i.e. along adjacent nnz blocks)
+
 \subsection CSRDataInterface CSR Data Interface
 
 TODO
 
+CSR data handles can be partitioned into vertical CSR matrices by using
+starpu_csr_filter_vertical_block().
+
 \subsection VariableSizeDataInterface Data Interface with Variable Size
 
 Tasks are actually allowed to change the size of data interfaces.
@@ -170,7 +217,39 @@ that the StarPU core knows the new data layout. The starpu_data_interface_ops
 structure however then needs to have the starpu_data_interface_ops::dontcache
 field set to 1, to prevent StarPU from trying to perform any cached allocation,
 since the allocated size will vary. An example is available in
-<c>tests/datawizard/variable_size.c</c>
+<c>tests/datawizard/variable_size.c</c>. The example uses its own data
+interface so as to contain some simulation information for data growth, but the
+principle can be applied for any data interface.
+
+The principle is to use <c>starpu_malloc_on_node_flags</c> to make the new
+allocation, and use <c>starpu_free_on_node_flags</c> to release any previous
+allocation. The flags have to be precisely like in the example:
+
+\code{.c}
+unsigned workerid = starpu_worker_get_id_check();
+unsigned dst_node = starpu_worker_get_memory_node(workerid);
+interface->ptr = starpu_malloc_on_node_flags(dst_node, size + increase, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW);
+starpu_free_on_node_flags(dst_node, old, size, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW);
+interface->size += increase;
+\endcode
+
+so that the allocated area has the expected properties and the allocation is accounted for properly.
+
+Depending on the interface (vector, CSR, etc.) you may have to fix several
+members of the data interface: e.g. both <c>nx</c> and <c>allocsize</c> for
+vectors, and store the pointer both in <c>ptr</c> and <c>dev_handle</c>.
+
+Some interfaces make a distinction between the actual number of elements
+stored in the data and the actually allocated buffer. For instance, the vector
+interface uses the <c>nx</c> field for the former, and the <c>allocsize</c> for
+the latter. This allows for lazy reallocation to avoid reallocating the buffer
+everytime to exactly match the actual number of elements. Computations and data
+transfers will use <c>nx</c> field, while allocation functions will use the
+<c>allocsize</c>. One just has to make sure that <c>allocsize</c> is always
+bigger or equal to <c>nx</c>.
+
+Important note: one can not change the size of a partitioned data.
+
 
 \section DataManagement Data Management
 
@@ -476,6 +555,34 @@ starpu_data_invalidate_submit(handle);
 
 And now we can start using vertical slices, etc.
 
+\section DataPointers Handles data buffer pointers
+
+A simple understanding of starpu handles is that it's a collection of buffers on
+each memory node of the machine, which contain the same data.  The picture is
+however made more complex with the OpenCL support and with partitioning.
+
+When partitioning a handle, the data buffers of the subhandles will indeed
+be inside the data buffers of the main handle (to save transferring data
+back and forth between the main handle and the subhandles). But in OpenCL,
+a <c>cl_mem</c> is not a pointer, but an opaque value on which pointer
+arithmetic can not be used. That is why data interfaces contain three members:
+<c>dev_handle</c>, <c>offset</c>, and <c>ptr</c>. The <c>dev_handle</c> member
+is what the allocation function returned, and one can not do arithmetic on
+it. The <c>offset</c> member is the offset inside the allocated area, most often
+it will be 0 because data start at the beginning of the allocated area, but
+when the handle is partitioned, the subhandles will have varying <c>offset</c>
+values, for each subpiece. The <c>ptr</c> member, in the non-OpenCL case, i.e.
+when pointer arithmetic can be used on <c>dev_handle</c>, is just the sum of
+<c>dev_handle</c> and <c>offset</c>, provided for convenience.
+
+This means that:
+<ul>
+<li>computation kernels can use <c>ptr</c> in non-OpenCL implementations.</li>
+<li>computation kernels have to use <c>dev_handle</c> and <c>offset</c> in the OpenCL implementation.</li>
+<li>allocation methods of data interfaces have to store the value returned by starpu_malloc_on_node in <c>dev_handle</c> and <c>ptr</c>, and set <c>offset</c> to 0.</li>
+<li>partitioning filters have to copy over <c>dev_handle</c> without modifying it, set in the child different values of <c>offset</c>, and set <c>ptr</c> accordingly as the sum of <c>dev_handle</c> and <c>offset</c>.</li>
+</ul>
+
 \section DefiningANewDataFilter Defining A New Data Filter
 
 StarPU provides a series of predefined filters in \ref API_Data_Partition, but
@@ -758,7 +865,11 @@ A full example may be found in <c>examples/basic_examples/multiformat.c</c>.
 
 \section DefiningANewDataInterface Defining A New Data Interface
 
-Let's define a new data interface to manage complex numbers.
+This section proposes an example how to define your own interface, when the
+StarPU-provided interface do not fit your needs. Here we take a dumb example of
+an array of complex numbers represented by two arrays of double values.
+
+Let's thus define a new data interface to manage arrays of complex numbers:
 
 \code{.c}
 /* interface for complex numbers */
@@ -770,6 +881,15 @@ struct starpu_complex_interface
 };
 \endcode
 
+That structure stores enough to describe <b>one</b> buffer of such kind of
+data. It is used for the buffer stored in the main memory, another instance
+is used for the buffer stored in a GPU, etc. A <i>data handle</i> is thus a
+collection of such structures, to remember each buffer on each memory node.
+
+Note: one should not take pointers into such structures, because StarPU needs
+to be able to copy over the content of it to various places, for instance to
+efficiently migrate a data buffer from one data handle to another data handle.
+
 Registering such a data to StarPU is easily done using the function
 starpu_data_register(). The last
 parameter of the function, <c>interface_complex_ops</c>, will be
@@ -795,12 +915,41 @@ void starpu_complex_data_register(starpu_data_handle_t *handle,
 }
 \endcode
 
-The <c>starpu_complex_interface</c> structure is here used just to store the
+The <c>struct starpu_complex_interface complex</c> is here used just to store the
 parameters that the user provided to <c>starpu_complex_data_register</c>.
 starpu_data_register() will first allocate the handle, and
 then pass the <c>starpu_complex_interface</c> structure to the
 starpu_data_interface_ops::register_data_handle method, which records them
-within the data handle (it is called once per node by starpu_data_register()).
+within the data handle (it is called once per node by starpu_data_register()):
+
+\code{.c}
+static void complex_register_data_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
+{
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface;
+
+	unsigned node;
+	for (node = 0; node < STARPU_MAXNODES; node++)
+	{
+		struct starpu_complex_interface *local_interface = (struct starpu_complex_interface *)
+			starpu_data_get_interface_on_node(handle, node);
+
+		local_interface->nx = complex_interface->nx;
+		if (node == home_node)
+		{
+			local_interface->real = complex_interface->real;
+			local_interface->imaginary = complex_interface->imaginary;
+		}
+		else
+		{
+			local_interface->real = NULL;
+			local_interface->imaginary = NULL;
+		}
+	}
+}
+\endcode
+
+If the application provided a home node, the corresponding pointers will be
+recorded for that node. Others have no buffer allocated yet.
 
 Different operations need to be defined for a data interface through
 the type starpu_data_interface_ops. We only define here the basic
@@ -927,4 +1076,21 @@ when the kernel does not make so many accesses to the second data, and thus data
 being remote e.g. over a PCI bus is not a performance problem, and avoids
 filling the fast local memory with data which does not need the performance.
 
+In cases where the kernel is fine with some data being either local or in the
+main memory, ::STARPU_SPECIFIC_NODE_LOCAL_OR_CPU can be used. StarPU will then
+be free to leave the data in the main memory and let the kernel access it from
+accelerators, or to move it to the accelerator before starting the kernel, for
+instance:
+
+\code{.c}
+struct starpu_codelet cl =
+{
+	.cuda_funcs = { kernel },
+	.nbuffers = 2,
+	.modes = {STARPU_RW, STARPU_R},
+	.specific_nodes = 1,
+	.nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU},
+};
+\endcode
+
 */

+ 19 - 10
doc/doxygen/chapters/320_scheduling.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2011,2012,2016                           Inria
- * Copyright (C) 2009-2011,2014-2019                      Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -100,14 +98,18 @@ become available, without taking priorities into account.
 The <b>dmda</b> (deque model data aware) scheduler is similar to dm, but it also takes
 into account data transfer time.
 
+The <b>dmdap</b> (deque model data aware prio) scheduler is similar to dmda,
+except that it sorts tasks by priority order, which allows to become even closer
+to HEFT by respecting priorities after having made the scheduling decision (but
+it still schedules tasks in the order they become available).
+
 The <b>dmdar</b> (deque model data aware ready) scheduler is similar to dmda,
 but it also privileges tasks whose data buffers are already available
 on the target device.
 
-The <b>dmdas</b> (deque model data aware sorted) scheduler is similar to dmdar,
-except that it sorts tasks by priority order, which allows to become even closer
-to HEFT by respecting priorities after having made the scheduling decision (but
-it still schedules tasks in the order they become available).
+The <b>dmdas</b> combines dmdap and dmdas: it sorts tasks by priority order,
+but for a given priority it will privilege tasks whose data buffers are already
+available on the target device.
 
 The <b>dmdasd</b> (deque model data aware sorted decision) scheduler is similar
 to dmdas, except that when scheduling a task, it takes into account its priority
@@ -164,7 +166,7 @@ processing units), the idle power of the machine should be given by setting
 be obtained from the machine power supplier.
 
 The energy actually consumed by the total execution can be displayed by setting
-<c>export STARPU_PROFILING=1 STARPU_WORKER_STATS=1</c> .
+<c>export STARPU_PROFILING=1 STARPU_WORKER_STATS=1</c> (\ref STARPU_PROFILING and \ref STARPU_WORKER_STATS).
 
 For OpenCL devices, on-line task consumption measurement is currently supported through the
 <c>CL_PROFILING_POWER_CONSUMED</c> OpenCL extension, implemented in the MoviSim
@@ -204,12 +206,15 @@ pre-defined Modularized Schedulers :
 - Eager-based Schedulers (with/without prefetching : \c modular-eager ,
 \c modular-eager-prefetching) : \n
 Naive scheduler, which tries to map a task on the first available resource
-it finds.
+it finds. The prefecthing variant queues several tasks in advance to be able to
+do data prefetching. This may however degrade load balancing a bit.
 
 - Prio-based Schedulers (with/without prefetching :
-\c modular-prio, \c modular-prio-prefetching) : \n
+\c modular-prio, \c modular-prio-prefetching , \c modular-eager-prio) : \n
 Similar to Eager-Based Schedulers. Can handle tasks which have a defined
 priority and schedule them accordingly.
+The \c modular-eager-prio variant integrates the eager and priority queue in a
+single component. This allows it to do a better job at pushing tasks.
 
 - Random-based Schedulers (with/without prefetching: \c modular-random,
 \c modular-random-prio, \c modular-random-prefetching, \c
@@ -231,6 +236,10 @@ modular-heft-prio is similar to \c modular-heft, but only decides the memory
 node, not the exact worker, just pushing tasks to one central queue per memory
 node.
 
+- Heteroprio Scheduler: \n
+Maps tasks to worker similarly to HEFT, but first attribute accelerated tasks to
+GPUs, then not-so-accelerated tasks to CPUs.
+
 To use one of these schedulers, one can set the environment variable \ref STARPU_SCHED.
 
 \section StaticScheduling Static Scheduling

+ 4 - 6
doc/doxygen/chapters/330_scheduling_contexts.doxy

@@ -1,9 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011-2013,2016                           Inria
- * Copyright (C) 2010-2018                                CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2016                                     Uppsala University
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2016       Uppsala University
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -21,7 +19,7 @@
 
 TODO: improve!
 
-\section GeneralIdeas General Ideas
+\section ContextGeneralIdeas General Ideas
 
 Scheduling contexts represent abstracts sets of workers that allow the
 programmers to control the distribution of computational resources
@@ -107,7 +105,7 @@ int id_ctx = starpu_sched_ctx_create(workerids, 3, "my_ctx", STARPU_SCHED_CTX_PO
 /* .... */
 \endcode
 
-\section CreatingAContext Creating A Context To Partition a GPU
+\section CreatingAGPUContext Creating A Context To Partition a GPU
 
 The contexts can also be used to group set of SMs of an NVIDIA GPU in order to isolate
 the parallel kernels and allow them to coexecution on a specified partiton of the GPU.

+ 1 - 3
doc/doxygen/chapters/340_scheduling_context_hypervisor.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011-2013                                Inria
- * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 3 - 5
doc/doxygen/chapters/350_scheduling_policy_definition.doxy

@@ -1,9 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2013                                     Inria
- * Copyright (C) 2014,2016-2019                           CNRS
- * Copyright (C) 2014,2017,2019                           Université de Bordeaux
- * Copyright (C) 2013                                     Simon Archipoff
+ * Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2013       Simon Archipoff
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,7 +17,7 @@
 
 /*! \page HowToDefineANewSchedulingPolicy How To Define A New Scheduling Policy
 
-\section Introduction Introduction
+\section NewSchedulingPolicy_Introduction Introduction
 
 StarPU provides two ways of defining a scheduling policy, a basic monolithic
 way, and a modular way.

+ 1 - 3
doc/doxygen/chapters/360_debugging_tools.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017, 2019                          CNRS
- * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

文件差异内容过多而无法显示
+ 457 - 15
doc/doxygen/chapters/370_online_performance_tools.doxy


+ 183 - 34
doc/doxygen/chapters/380_offline_performance_tools.doxy

@@ -1,8 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011,2012,2015-2017                      Inria
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014-2017,2019                 Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2020       Federal University of Rio Grande do Sul (UFRGS)
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -160,6 +159,13 @@ starpu_task::color. Colors are expressed with the following format
 <c>basic_examples/task_insert_color</c> for examples on how to assign
 colors.
 
+To get statistics on the time spend in runtime overhead, one can use the
+statistics plugin of ViTE. In Preferences, select Plugins. In "States Type",
+select "Worker State". Then click on "Reload" to update the histogram. The red
+"Idle" percentages are due to lack of parallelism, while the brown "Overhead"
+and "Scheduling" percentages are due to the overhead of the runtime and of the
+scheduler.
+
 To identify tasks precisely, the application can also set the field
 starpu_task::tag_id or setting \ref STARPU_TAG_ONLY when calling
 starpu_task_insert(). The value of the tag will then show up in the
@@ -202,7 +208,7 @@ $ dot -Tpdf dag.dot -o output.pdf
 
 Another generated trace file gives details on the executed tasks. The
 file, created in the current directory, is named <c>tasks.rec</c>. This file
-is in the recutils format, i.e. <c>Field: value</c> lines, and empty lines to
+is in the recutils format, i.e. <c>Field: value</c> lines, and empty lines are used to
 separate each task.  This can be used as a convenient input for various ad-hoc
 analysis tools. By default it only contains information about the actual
 execution. Performance models can be obtained by running
@@ -224,6 +230,14 @@ Another possibility is to obtain the performance models as an auxiliary <c>perfm
 $ starpu_perfmodel_recdump tasks.rec -o perfmodel.rec
 \endverbatim
 
+\subsubsection TraceSchedTaskDetails Getting Scheduling Task Details
+
+The file, <c>sched_tasks.rec</c>, created in the current directory,
+and in the recutils format, gives information about the tasks
+scheduling, and lists the push and pop actions of the scheduler. For
+each action, it gives the timestamp, the job priority and the job id.
+Each action is separated from the next one by empty lines.
+
 \subsubsection MonitoringActivity Monitoring Activity
 
 Another generated trace file is an activity trace. The file, created
@@ -259,6 +273,34 @@ and whose name start with "modular-"), the call to
 which can be viewed in a javascript-enabled web browser. It shows the
 flow of tasks between the components of the modular scheduler.
 
+\subsubsection TimeBetweenSendRecvDataUse Analyzing Time Between MPI Data Transfer and Use by Tasks
+
+<c>starpu_fxt_tool</c> produces a file called <c>comms.rec</c> which describes all 
+MPI communications. The script <c>starpu_send_recv_data_use.py</c> uses this file 
+and <c>tasks.rec</c> in order to produce two graphs: the first one shows durations 
+between the reception of data and their usage by a task and the second one plots the 
+same graph but with elapsed time between send and usage of a data by the sender.
+
+\image html trace_recv_use.png
+\image latex trace_recv_use.eps "" width=\textwidth
+
+\image html trace_send_use.png
+\image latex trace_send_use.eps "" width=\textwidth
+
+
+\subsubsection NumberEvents Number of events in trace files
+
+When launched with the option <c>-number-events</c>, <c>starpu_fxt_tool</c> will
+produce a file named <c>number_events.data</c>. This file contains the number of
+events for each event type. Events are represented with their key. To convert
+event keys to event names, you can use the <c>starpu_fxt_number_events_to_names.py</c>
+script:
+
+\verbatim
+$ starpu_fxt_number_events_to_names.py number_events.data
+\endverbatim
+
+
 \subsection LimitingScopeTrace Limiting The Scope Of The Trace
 
 For computing statistics, it is useful to limit the trace to a given portion of
@@ -434,11 +476,26 @@ histogram of the codelet execution time distribution.
 \image html distrib_data_histo.png
 \image latex distrib_data_histo.eps "" width=\textwidth
 
+\section DataTrace Data trace and tasks length
+
+It is possible to get statistics about tasks length and data size by using :
+\verbatim
+$ starpu_fxt_data_trace filename [codelet1 codelet2 ... codeletn]
+\endverbatim
+Where filename is the FxT trace file and codeletX the names of the codelets you
+want to profile (if no names are specified, <c>starpu_fxt_data_trace</c> will profile them all).
+This will create a file, <c>data_trace.gp</c> which
+can be executed to get a <c>.eps</c> image of these results. On the image, each point represents a
+task, and each color corresponds to a codelet.
+
+\image html data_trace.png
+\image latex data_trace.eps "" width=\textwidth
+
 \section TraceStatistics Trace Statistics
 
 More than just codelet performance, it is interesting to get statistics over all
 kinds of StarPU states (allocations, data transfers, etc.). This is particularly
-useful to check what may have gone wrong in the accurracy of the simgrid
+useful to check what may have gone wrong in the accurracy of the SimGrid
 simulation.
 
 This requires the <c>R</c> statistical tool, with the <c>plyr</c>,
@@ -526,6 +583,39 @@ more efficient):
 $ starpu_paje_sort paje.trace
 \endverbatim
 
+\section PapiCounters PAPI counters
+
+Performance counter values could be obtained from the PAPI framework if
+<c>./configure</c> detected the libpapi. One has to set the \ref STARPU_PROFILING
+environment variable to 1 and then specify which events to record with the
+\ref STARPU_PROF_PAPI_EVENTS environment variable. For instance:
+
+\verbatim
+export STARPU_PROFILING=1 STARPU_PROF_PAPI_EVENTS="PAPI_TOT_INS PAPI_TOT_CYC"
+\endverbatim
+
+In the current simple implementation, only CPU tasks have their events measured
+and require CPUs that support the PAPI events. All events that PAPI support are
+available from their documentation (https://icl.cs.utk.edu/projects/papi/wiki/PAPIC:Preset_Event_Definitions).
+It is important to note that not all events are available on all systems, and
+general PAPI recommendations should be followed.
+
+The counter values can be accessed using the profiling interface:
+\code{.c}
+task->profiling_info->papi_values
+\endcode
+Also, it can be accessed and/or saved with tracing when using \ref STARPU_FXT_TRACE. With the use of <c>starpu_fxt_tool</c>
+the file <c>papi.rec</c> is generated containing the following triple:
+
+\verbatim
+Task Id
+Event Id
+Value
+\endverbatim
+
+External tools like <c>rec2csv</c> can be used to convert this rec file to a <c>csv</c>, where each
+line represents a value for an event for a task.
+
 \section TheoreticalLowerBoundOnExecutionTime Theoretical Lower Bound On Execution Time
 
 StarPU can record a trace of what tasks are needed to complete the
@@ -590,6 +680,69 @@ the priorities as the StarPU scheduler would, i.e. schedule prioritized
 tasks before less prioritized tasks, to check to which extend this results
 to a less optimal solution. This increases even more computation time.
 
+\section starvz Trace visualization with StarVZ
+
+Creating views with StarVZ (see: https://github.com/schnorr/starvz) is made up of two steps. The initial
+stage consists of a pre-processing of the traces generated by the application.
+The second step consists of the analysis itself and is carried out with the
+aid of R packages. To download and install StarVZ, it is necessary to have R,
+pajeng and the following packages:
+
+\verbatim
+# For pajeng
+apt install -y git cmake build-essential libboost-dev asciidoc flex bison
+git clone git://github.com/schnorr/pajeng.git
+mkdir -p pajeng/b ; cd pajeng/b
+cmake ..
+make
+
+# For R tidyverse
+apt install -y r-base libxml2-dev libssl-dev libcurl4-openssl-dev libgit2-dev libboost-dev
+\endverbatim
+
+To install the StarVZ the following commands can be used:
+
+\verbatim
+git clone https://github.com/schnorr/starvz.git
+echo "install.packages(c('tidyverse', 'devtools'), repos = 'https://cloud.r-project.org')" | R --vanilla
+echo "library(devtools); devtools::install_local(path='./starvz/R_package')" | R --vanilla
+\endverbatim
+
+To generate traces from an application, it is necessary to set \ref STARPU_GENERATE_TRACE.
+and build StarPU with FxT. Then, Step 1 of StarVZ can be used on a folder with
+StarPU FxT traces:
+
+\verbatim
+export PATH=starvz/:$PATH
+export PATH=pajeng/b:$PATH
+export PATH=$STARPU_HOME/bin:$PATH
+
+./starvz/src/phase1-workflow.sh /tmp/ ""
+\endverbatim
+
+Then the second step can be executed directly in R, StarVZ enables a set of
+different plots that can be configured on a .yaml file. A default file is provided
+<c>full_config.yaml</c>; also the options can be changed directly in R.
+
+\verbatim
+library(starvz)
+dtrace <- the_fast_reader_function("./")
+
+pajer <- config::get(file = "starvz/full_config.yaml")
+
+pajer$starpu$active = TRUE
+pajer$submitted$active = TRUE
+pajer$st$abe$active = TRUE
+
+plot <- the_master_function(dtrace)
+\endverbatim
+
+An example of visualization follows:
+
+\image html starvz_visu.png
+\image latex starvz_visu.eps "" width=\textwidth
+
+
 \section MemoryFeedback Memory Feedback
 
 It is possible to enable memory statistics. To do so, you need to pass
@@ -601,58 +754,50 @@ Moreover, statistics will be displayed at the end of the execution on
 data handles which have not been cleared out. This can be disabled by
 setting the environment variable \ref STARPU_MEMORY_STATS to <c>0</c>.
 
-For example, if you do not unregister data at the end of the complex
-example, you will get something similar to:
-
-\verbatim
-$ STARPU_MEMORY_STATS=0 ./examples/interface/complex
-Complex[0] = 45.00 + 12.00 i
-Complex[0] = 78.00 + 78.00 i
-Complex[0] = 45.00 + 12.00 i
-Complex[0] = 45.00 + 12.00 i
-\endverbatim
+For example, by adding a call to the function
+starpu_data_display_memory_stats() in the fblock example before
+unpartitioning the data, one will get something
+similar to:
 
 \verbatim
-$ STARPU_MEMORY_STATS=1 ./examples/interface/complex
-Complex[0] = 45.00 + 12.00 i
-Complex[0] = 78.00 + 78.00 i
-Complex[0] = 45.00 + 12.00 i
-Complex[0] = 45.00 + 12.00 i
-
+$ STARPU_MEMORY_STATS=1 ./examples/filters/fblock
+...
 #---------------------
-Memory stats:
+Memory stats :
 #-------
-Data on Node #3
+Data on Node #2
 #-----
-Data : 0x553ff40
-Size : 16
+Data : 0x5562074e8670
+Size : 144
 
 #--
 Data access stats
 /!\ Work Underway
 Node #0
-	Direct access : 4
+	Direct access : 0
 	Loaded (Owner) : 0
 	Loaded (Shared) : 0
-	Invalidated (was Owner) : 0
+	Invalidated (was Owner) : 1
 
-Node #3
+Node #2
 	Direct access : 0
-	Loaded (Owner) : 0
-	Loaded (Shared) : 1
+	Loaded (Owner) : 1
+	Loaded (Shared) : 0
 	Invalidated (was Owner) : 0
 
+#-------
+Data on Node #3
 #-----
-Data : 0x5544710
-Size : 16
+Data : 0x5562074e9338
+Size : 96
 
 #--
 Data access stats
 /!\ Work Underway
 Node #0
-	Direct access : 2
+	Direct access : 0
 	Loaded (Owner) : 0
-	Loaded (Shared) : 1
+	Loaded (Shared) : 0
 	Invalidated (was Owner) : 1
 
 Node #3
@@ -660,6 +805,10 @@ Node #3
 	Loaded (Owner) : 1
 	Loaded (Shared) : 0
 	Invalidated (was Owner) : 0
+
+
+#---------------------
+...
 \endverbatim
 
 \section DataStatistics Data Statistics

+ 1 - 3
doc/doxygen/chapters/390_faq.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014,2016,2017                 Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 9 - 9
doc/doxygen/chapters/401_out_of_core.doxy

@@ -1,9 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2013,2014,2016-2019                      CNRS
- * Copyright (C) 2013,2014,2017,2018-2019                 Université de Bordeaux
- * Copyright (C) 2013                                     Inria
- * Copyright (C) 2013                                     Corentin Salingue
+ * Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2013       Corentin Salingue
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,7 +17,7 @@
 
 /*! \page OutOfCore Out Of Core
 
-\section Introduction Introduction
+\section OutOfCore_Introduction Introduction
 
 When using StarPU, one may need to store more data than what the main memory
 (RAM) can store. This part describes the method to add a new memory node on a
@@ -91,7 +89,7 @@ system daemons, and application data).
 When the register call is made, StarPU will benchmark the disk. This can
 take some time.
 
-<strong>Warning: the size thus has to be at least \ref STARPU_DISK_SIZE_MIN bytes ! </strong> 
+<strong>Warning: the size thus has to be at least \ref STARPU_DISK_SIZE_MIN bytes ! </strong>
 
 StarPU will then automatically try to evict unused data to this new disk. One
 can also use the standard StarPU memory node API to prefetch data etc., see the
@@ -127,7 +125,7 @@ value right after this call, and thus the very first task using the handle needs
 to use the ::STARPU_W mode like above, ::STARPU_R or ::STARPU_RW would not make
 sense.
 
-By default, StarPU will try to push any data handle to the disk. 
+By default, StarPU will try to push any data handle to the disk.
 To specify whether a given handle should be pushed to the disk,
 starpu_data_set_ooc_flag() should be used.
 
@@ -174,7 +172,9 @@ work on this area in the coming future.
 
 Beyond pure performance feedback, some figures are interesting to have a look at.
 
-Using <c>export STARPU_BUS_STATS=1</c> gives an overview of the data
+Using <c>export STARPU_BUS_STATS=1</c> (\ref STARPU_BUS_STATS and \ref STARPU_BUS_STATS_FILE
+to define a filename in which to display statistics, by default the
+standard error stream is used) gives an overview of the data
 transfers which were needed. The values can also be obtained at runtime
 by using starpu_bus_get_profiling_info(). An example can be read in
 <c>src/profiling/profiling_helpers.c</c>.
@@ -188,7 +188,7 @@ Data transfer speed for /tmp/sthibault-disk-DJzhAj (node 1):
 1 -> 0: 23858 µs
 
 #---------------------
-TEST DISK MEMORY 
+TEST DISK MEMORY
 
 #---------------------
 Data transfer stats:

+ 3 - 4
doc/doxygen/chapters/410_mpi_support.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2011-2013,2016,2017                      Inria
- * Copyright (C) 2009-2011,2013-2019                      Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -236,7 +234,8 @@ For send communications, data is acquired with the mode ::STARPU_R.
 When using the \c configure option
 \ref enable-mpi-pedantic-isend "--enable-mpi-pedantic-isend", the mode
 ::STARPU_RW is used to make sure there is no more than 1 concurrent
-\c MPI_Isend() call accessing a data.
+\c MPI_Isend() call accessing a data
+and StarPU does not read from it from tasks during the communication.
 
 Internally, all communication are divided in 2 communications, a first
 message is used to exchange an envelope describing the data (i.e its

+ 2 - 2
doc/doxygen/chapters/415_fault_tolerance.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2019                                     Université de Bordeaux
+ * Copyright (C) 2019-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -16,7 +16,7 @@
 
 /*! \page FaultTolerance Fault Tolerance
 
-\section Introduction Introduction
+\section FaultTolerance_Introduction Introduction
 
 Due to e.g. hardware error, some tasks may fail, or even complete nodes may
 fail.  For now, StarPU provides some support for failure of tasks.

+ 2 - 4
doc/doxygen/chapters/420_fft_support.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017, 2019                          CNRS
- * Copyright (C) 2009-2011,2014,2015                      Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -66,7 +64,7 @@ the task completion, and thus permits to enqueue a series of tasks.
 
 All functions are defined in \ref API_FFT_Support.
 
-\section Compilation Compilation
+\section FFTCompilation Compilation
 
 The flags required to compile or link against the FFT library are accessible
 with the following commands:

+ 2 - 4
doc/doxygen/chapters/430_mic_support.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017, 2019                          CNRS
- * Copyright (C) 2011,2012,2016                           Inria
- * Copyright (C) 2009-2011,2013-2016                      Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,7 +16,7 @@
 
 /*! \page MICSupport MIC Xeon Phi Support
 
-\section Compilation Compilation
+\section MICCompilation Compilation
 
 MIC Xeon Phi support actually needs two compilations of StarPU, one for the host and one for
 the device. The <c>PATH</c> environment variable has to include the path to the

+ 13 - 14
doc/doxygen/chapters/450_native_fortran_support.doxy

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2016,2017,2019                           CNRS
- * Copyright (C) 2014,2016                                Inria
+ * Copyright (C) 2014-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -29,9 +28,9 @@ symbols in the same Fortran code has unspecified behaviour.
 See \ref APIMIX for a discussion about valid and unspecified
 combinations.
 
-\section Implementation Implementation Details and Specificities
+\section NFImplementation Implementation Details and Specificities
 
-\subsection Prerequisites Prerequisites
+\subsection NFPrerequisites Prerequisites
 
 The Native Fortran support relies on Fortran 2008 specific constructs,
 as well as on the support of interoperability of assumed-shape arrays
@@ -44,7 +43,7 @@ not to work with GNU GFortran < 4.9, Intel Fortran Compiler < 2016.
 See Section \ref OldFortran on information on how to write StarPU
 Fortran code with older compilers.
 
-\subsection Configuration Configuration
+\subsection NFConfiguration Configuration
 
 The Native Fortran API is enabled and its companion
 <c>fstarpu_mod.f90</c> Fortran module source file is installed
@@ -55,13 +54,13 @@ disable-fortran "--disable-fortran". Conditional compiled source codes
 may check for the availability of the Native Fortran Support by testing
 whether the preprocessor macro <c>STARPU_HAVE_FC</c> is defined or not.
 
-\subsection Examples Examples
+\subsection NFExamples Examples
 
 Several examples using the Native Fortran API are provided in
 StarPU's <c>examples/native_fortran/</c> examples directory, to showcase
 the Fortran flavor of various basic and more advanced StarPU features.
 
-\subsection AppCompile Compiling a Native Fortran Application
+\subsection NFAppCompile Compiling a Native Fortran Application
 
 The Fortran module <c>fstarpu_mod.f90</c> installed in StarPU's
 <c>include/</c> directory provides all the necessary API definitions. It
@@ -74,7 +73,7 @@ examples directory comes with its own dedicated Makefile for out-of-tree
 build. Such example Makefiles may be used as starting points for
 building application codes with StarPU.
 
-\section Idioms Fortran Translation for Common StarPU API Idioms
+\section NFIdioms Fortran Translation for Common StarPU API Idioms
 
 All these examples assume that the standard Fortran module <c>iso_c_binding</c>
 is in use.
@@ -142,7 +141,7 @@ is in use.
         call fstarpu_codelet_add_buffer(my_cl, FSTARPU_RW.ior.FSTARPU_LOCALITY)
 \endcode
 
-\section InitExit Uses, Initialization and Shutdown
+\section NFInitExit Uses, Initialization and Shutdown
 
 The snippet below show an example of minimal StarPU code using the
 Native Fortran support. The program should <c>use</c> the standard
@@ -154,7 +153,7 @@ the runtime engine and frees all internal StarPU data structures.
 
 \snippet nf_initexit.f90 To be included. You should update doxygen if you see this text.
 
-\section InsertTask Fortran Flavor of StarPU's Variadic Insert_task
+\section NFInsertTask Fortran Flavor of StarPU's Variadic Insert_task
 
 Fortran does not have a construction similar to C variadic functions on which
 starpu_insert_task() relies at the time of this writing. However, Fortran's variable
@@ -183,7 +182,7 @@ Example extracted from nf_vector.f90:
             C_NULL_PTR /))                                ! no more args
 \endcode
 
-\section Structs Functions and Subroutines Expecting Data Structures Arguments
+\section NFStructs Functions and Subroutines Expecting Data Structures Arguments
 
 Several StarPU structures that are expected to be passed to the C API,
 are replaced by function/subroutine wrapper sets to allocate, set fields
@@ -215,8 +214,8 @@ structure:
         call fstarpu_codelet_free(cl_vec)
 \endcode
 
-\section Notes Additional Notes about the Native Fortran Support
-\subsection OldFortran Using StarPU with Older Fortran Compilers
+\section NFNotes Additional Notes about the Native Fortran Support
+\subsection NFOldFortran Using StarPU with Older Fortran Compilers
 
 When using older compilers, Fortran applications may still interoperate
 with StarPU using C marshalling functions as exemplified in StarPU's
@@ -231,7 +230,7 @@ Note that this marshalled FORTRAN support remains available even
 when specifying \c configure option \ref disable-fortran "--disable-fortran"
 (which only disables StarPU's native Fortran layer).
 
-\subsection APIMIX Valid API Mixes and Language Mixes
+\subsection NFAPIMIX Valid API Mixes and Language Mixes
 
 Mixing uses of
 <c>fstarpu_</c> and <c>starpu_</c> symbols in the same

+ 1 - 3
doc/doxygen/chapters/460_socl_opencl_extensions.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009-2011,2014-2016                      Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 7 - 8
doc/doxygen/chapters/470_simgrid.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011,2012,2014,2016,2017                 Inria
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014-2019                      Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -23,14 +21,15 @@
 /*! \page SimGridSupport SimGrid Support
 
 StarPU can use Simgrid in order to simulate execution on an arbitrary
-platform. This was tested with simgrid from 3.11 to 3.16, and 3.18 to 3.22.
+platform. This was tested with SimGrid from 3.11 to 3.16, and 3.18 to
+3.25. SimGrid versions 3.25 and above need to be configured with -Denable_msg=ON .
 Other versions may have compatibility issues. 3.17 notably does not build at
 all. MPI simulation does not work with version 3.22.
 
 \section Preparing Preparing Your Application For Simulation
 
 There are a few technical details which need to be handled for an application to
-be simulated through Simgrid.
+be simulated through SimGrid.
 
 If the application uses <c>gettimeofday</c> to make its
 performance measurements, the real time will be used, which will be bogus. To
@@ -38,19 +37,19 @@ get the simulated time, it has to use starpu_timing_now() which returns the
 virtual timestamp in us.
 
 For some technical reason, the application's .c file which contains \c main() has
-to be recompiled with \c starpu_simgrid_wrap.h, which in the simgrid case will <c># define main()</c>
+to be recompiled with \c starpu_simgrid_wrap.h, which in the SimGrid case will <c># define main()</c>
 into <c>starpu_main()</c>, and it is \c libstarpu which will provide the real \c main() and
 will call the application's \c main().
 
 To be able to test with crazy data sizes, one may want to only allocate
 application data if the macro \c STARPU_SIMGRID is not defined.  Passing a <c>NULL</c> pointer to
 \c starpu_data_register functions is fine, data will never be read/written to by
-StarPU in Simgrid mode anyway.
+StarPU in SimGrid mode anyway.
 
 To be able to run the application with e.g. CUDA simulation on a system which
 does not have CUDA installed, one can fill the starpu_codelet::cuda_funcs with \c (void*)1, to
 express that there is a CUDA implementation, even if one does not actually
-provide it. StarPU will not actually run it in Simgrid mode anyway by default
+provide it. StarPU will not actually run it in SimGrid mode anyway by default
 (unless the ::STARPU_CODELET_SIMGRID_EXECUTE or ::STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT
 flags are set in the codelet)
 

+ 16 - 17
doc/doxygen/chapters/480_openmp_runtime_support.doxy

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2014-2017, 2019                          CNRS
- * Copyright (C) 2014                                     Inria
+ * Copyright (C) 2014-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,9 +27,9 @@ inline or as outlined functions.
 
 All functions are defined in \ref API_OpenMP_Runtime_Support.
 
-\section Implementation Implementation Details and Specificities
+\section OMPImplementation Implementation Details and Specificities
 
-\subsection MainThread Main Thread
+\subsection OMPMainThread Main Thread
 
 When using the SORS, the main thread gets involved in executing OpenMP tasks
 just like every other threads, in order to be compliant with the
@@ -38,7 +37,7 @@ specification execution model. This contrasts with StarPU's usual
 execution model where the main thread submit tasks but does not take
 part in executing them.
 
-\subsection TaskSemantics Extended Task Semantics
+\subsection OMPTaskSemantics Extended Task Semantics
 
 The semantics of tasks generated by the SORS are extended with respect
 to regular StarPU tasks in that SORS' tasks may block and be preempted
@@ -46,7 +45,7 @@ by SORS call, whereas regular StarPU tasks cannot. SORS tasks may
 coexist with regular StarPU tasks. However, only the tasks created using
 SORS API functions inherit from extended semantics.
 
-\section Configuration Configuration
+\section OMPConfiguration Configuration
 
 The SORS can be compiled into <c>libstarpu</c> through
 the \c configure option \ref enable-openmp "--enable-openmp".
@@ -54,7 +53,7 @@ Conditional compiled source codes may check for the
 availability of the OpenMP Runtime Support by testing whether the C
 preprocessor macro <c>STARPU_OPENMP</c> is defined or not.
 
-\section InitExit Initialization and Shutdown
+\section OMPInitExit Initialization and Shutdown
 
 The SORS needs to be executed/terminated by the
 starpu_omp_init() / starpu_omp_shutdown() instead of
@@ -82,7 +81,7 @@ static void omp_destructor(void)
 \sa starpu_omp_init()
 \sa starpu_omp_shutdown()
 
-\section Parallel Parallel Regions and Worksharing
+\section OMPSharing Parallel Regions and Worksharing
 
 The SORS provides functions to create OpenMP parallel regions as well as
 mapping work on participating workers. The current implementation does
@@ -252,13 +251,13 @@ starpu_omp_master() function variants.
 \sa starpu_omp_single_copyprivate_inline_begin()
 \sa starpu_omp_single_copyprivate_inline_end()
 
-\section Task Tasks
+\section OMPTask Tasks
 
 The SORS implements the necessary support of OpenMP 3.1 and OpenMP 4.0's
 so-called explicit tasks, together with OpenMP 4.0's data dependency
 management.
 
-\subsection OMPTask Explicit Tasks
+\subsection OMPTaskExplicit Explicit Tasks
 Explicit OpenMP tasks are created with the SORS using the
 starpu_omp_task_region() function. The implementation supports
 <c>if</c>, <c>final</c>, <c>untied</c> and <c>mergeable</c> clauses
@@ -314,7 +313,7 @@ void parallel_region_f(void *buffers[], void *args)
 \sa struct starpu_omp_task_region_attr
 \sa starpu_omp_task_region()
 
-\subsection DataDependencies Data Dependencies
+\subsection OMPDataDependencies Data Dependencies
 The SORS implements inter-tasks data dependencies as specified in OpenMP
 4.0. Data dependencies are expressed using regular StarPU data handles
 (\ref starpu_data_handle_t) plugged into the task's <c>attr.cl</c>
@@ -332,7 +331,7 @@ dependencies between sibling tasks. Consequently the behaviour is
 unspecified if dependencies are expressed beween tasks that have not
 been created by the same parent task.
 
-\subsection TaskSyncs TaskWait and TaskGroup
+\subsection OMPTaskSyncs TaskWait and TaskGroup
 The SORS implements both the <c>taskwait</c> and <c>taskgroup</c> OpenMP
 task synchronization constructs specified in OpenMP 4.0, with the
 starpu_omp_taskwait() and starpu_omp_taskgroup() functions respectively.
@@ -404,12 +403,12 @@ void parallel_region_f(void *buffers[], void *args)
 \sa starpu_omp_taskgroup_inline_begin()
 \sa starpu_omp_taskgroup_inline_end()
 
-\section Synchronization Synchronization Support
+\section OMPSynchronization Synchronization Support
 
 The SORS implements objects and method to build common OpenMP
 synchronization constructs.
 
-\subsection SimpleLock Simple Locks
+\subsection OMPSimpleLock Simple Locks
 
 The SORS Simple Locks are opaque starpu_omp_lock_t objects enabling multiple
 tasks to synchronize with each others, following the Simple Lock
@@ -428,7 +427,7 @@ Simple Locks as they incur less processing overhead than Nestable Locks.
 \sa starpu_omp_unset_lock()
 \sa starpu_omp_test_lock()
 
-\subsection NestableLock Nestable Locks
+\subsection OMPNestableLock Nestable Locks
 
 The SORS Nestable Locks are opaque starpu_omp_nest_lock_t objects enabling
 multiple tasks to synchronize with each others, following the Nestable
@@ -449,7 +448,7 @@ incur less processing overhead than Nestable Locks.
 \sa starpu_omp_unset_nest_lock()
 \sa starpu_omp_test_nest_lock()
 
-\subsection Critical Critical Sections
+\subsection OMPCritical Critical Sections
 
 The SORS implements support for OpenMP critical sections through the
 family of \ref starpu_omp_critical functions. Critical sections may optionally
@@ -461,7 +460,7 @@ a named one or the anonymous one.
 \sa starpu_omp_critical_inline_begin()
 \sa starpu_omp_critical_inline_end()
 
-\subsection Barrier Barriers
+\subsection OMPBarrier Barriers
 
 The SORS provides the starpu_omp_barrier() function to implement
 barriers over parallel region teams. In accordance with the OpenMP

+ 2 - 4
doc/doxygen/chapters/490_clustering_a_machine.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2015-2019                                CNRS
- * Copyright (C) 2015,2018                                Université de Bordeaux
- * Copyright (C) 2015,2016                                Inria
+ * Copyright (C) 2015-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,7 +16,7 @@
 
 /*! \page ClusteringAMachine Clustering A Machine
 
-\section GeneralIdeas General Ideas
+\section ClusteringGeneralIdeas General Ideas
 
 Clusters are a concept introduced in this
 <a href="https://hal.inria.fr/view/index/docid/1181135">paper</a>.

+ 1 - 1
doc/doxygen/chapters/495_interoperability.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018                                     Inria
+ * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 79 - 25
doc/doxygen/chapters/501_environment_variables.doxy

@@ -1,9 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011-2013,2015-2017                      Inria
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2013-2019                      Université de Bordeaux
- * Copyright (C) 2016                                     Uppsala University
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2016       Uppsala University
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,7 +20,7 @@
 The behavior of the StarPU library and tools may be tuned thanks to
 the following environment variables.
 
-\section ConfiguringWorkers Configuring Workers
+\section EnvConfiguringWorkers Configuring Workers
 
 <dl>
 
@@ -512,6 +510,15 @@ the coefficient to be applied to it before adding it to the computation part.
 Define the execution time penalty of a joule (\ref Energy-basedScheduling).
 </dd>
 
+<dt>STARPU_SCHED_READY</dt>
+<dd>
+\anchor STARPU_SCHED_READY
+\addindex __env__STARPU_SCHED_READY
+For a modular scheduler with sorted queues below the decision component, workers
+pick up a task which has most of its data already available. Setting this to 0
+disables this.
+</dd>
+
 <dt>STARPU_IDLE_POWER</dt>
 <dd>
 \anchor STARPU_IDLE_POWER
@@ -526,6 +533,13 @@ Define the idle power of the machine (\ref Energy-basedScheduling).
 Enable on-line performance monitoring (\ref EnablingOn-linePerformanceMonitoring).
 </dd>
 
+<dt>STARPU_PROF_PAPI_EVENTS</dt>
+<dd>
+\anchor STARPU_PROF_PAPI_EVENTS
+\addindex __env__STARPU_PROF_PAPI_EVENTS
+Specify which PAPI events should be recorded in the trace (\ref PapiCounters).
+</dd>
+
 </dl>
 
 \section Extensions Extensions
@@ -598,6 +612,22 @@ When set to 0, the use of priorities to order MPI communications is disabled
 (\ref MPISupport).
 </dd>
 
+<dt>STARPU_MPI_NDETACHED_SEND</dt>
+<dd>
+\anchor STARPU_MPI_NDETACHED_SEND
+\addindex __env__STARPU_MPI_NDETACHED_SEND
+This sets the number of send requests that StarPU-MPI will emit concurrently. The default is 10.
+</dd>
+
+<dt>STARPU_MPI_NREADY_PROCESS</dt>
+<dd>
+\anchor STARPU_MPI_NREADY_PROCESS
+\addindex __env__STARPU_MPI_NREADY_PROCESS
+This sets the number of requests that StarPU-MPI will submit to MPI before
+polling for termination of existing requests. The default is 10.
+</dd>
+
+
 <dt>STARPU_MPI_FAKE_SIZE</dt>
 <dd>
 \anchor STARPU_MPI_FAKE_SIZE
@@ -642,12 +672,21 @@ for that environment variable to be used, and the
 STARPU_MPI_DRIVER_CALL_FREQUENCY environment variable set to a positive value.
 </dd>
 
+<dt>STARPU_MPI_MEM_THROTTLE</dt>
+<dd>
+\anchor STARPU_MPI_MEM_THROTTLE
+\addindex __env__STARPU_MPI_MEM_THROTTLE
+When set to a positive value, this makes the starpu_mpi_*recv* functions
+block when the memory allocation required for network reception overflows the
+available main memory (as typically set by \ref STARPU_LIMIT_CPU_MEM)
+</dd>
+
 <dt>STARPU_SIMGRID_TRANSFER_COST</dt>
 <dd>
 \anchor STARPU_SIMGRID_TRANSFER_COST
 \addindex __env__STARPU_SIMGRID_TRANSFER_COST
 When set to 1 (which is the default), data transfers (over PCI bus, typically) are taken into account
-in simgrid mode.
+in SimGrid mode.
 </dd>
 
 <dt>STARPU_SIMGRID_CUDA_MALLOC_COST</dt>
@@ -655,7 +694,7 @@ in simgrid mode.
 \anchor STARPU_SIMGRID_CUDA_MALLOC_COST
 \addindex __env__STARPU_SIMGRID_CUDA_MALLOC_COST
 When set to 1 (which is the default), CUDA malloc costs are taken into account
-in simgrid mode.
+in SimGrid mode.
 </dd>
 
 <dt>STARPU_SIMGRID_CUDA_QUEUE_COST</dt>
@@ -663,14 +702,14 @@ in simgrid mode.
 \anchor STARPU_SIMGRID_CUDA_QUEUE_COST
 \addindex __env__STARPU_SIMGRID_CUDA_QUEUE_COST
 When set to 1 (which is the default), CUDA task and transfer queueing costs are
-taken into account in simgrid mode.
+taken into account in SimGrid mode.
 </dd>
 
 <dt>STARPU_PCI_FLAT</dt>
 <dd>
 \anchor STARPU_PCI_FLAT
 \addindex __env__STARPU_PCI_FLAT
-When unset or set to 0, the platform file created for simgrid will
+When unset or set to 0, the platform file created for SimGrid will
 contain PCI bandwidths and routes.
 </dd>
 
@@ -678,7 +717,7 @@ contain PCI bandwidths and routes.
 <dd>
 \anchor STARPU_SIMGRID_QUEUE_MALLOC_COST
 \addindex __env__STARPU_SIMGRID_QUEUE_MALLOC_COST
-When unset or set to 1, simulate within simgrid the GPU transfer queueing.
+When unset or set to 1, simulate within SimGrid the GPU transfer queueing.
 </dd>
 
 <dt>STARPU_MALLOC_SIMULATION_FOLD</dt>
@@ -695,7 +734,7 @@ MiB. The default is 1, thus allowing 64GiB virtual memory when Linux's
 \anchor STARPU_SIMGRID_TASK_SUBMIT_COST
 \addindex __env__STARPU_SIMGRID_TASK_SUBMIT_COST
 When set to 1 (which is the default), task submission costs are taken into
-account in simgrid mode. This provides more accurate simgrid predictions,
+account in SimGrid mode. This provides more accurate SimGrid predictions,
 especially for the beginning of the execution.
 </dd>
 
@@ -704,7 +743,7 @@ especially for the beginning of the execution.
 \anchor STARPU_SIMGRID_FETCHING_INPUT_COST
 \addindex __env__STARPU_SIMGRID_FETCHING_INPUT_COST
 When set to 1 (which is the default), fetching input costs are taken into
-account in simgrid mode. This provides more accurate simgrid predictions,
+account in SimGrid mode. This provides more accurate SimGrid predictions,
 especially regarding data transfers.
 </dd>
 
@@ -713,7 +752,7 @@ especially regarding data transfers.
 \anchor STARPU_SIMGRID_SCHED_COST
 \addindex __env__STARPU_SIMGRID_SCHED_COST
 When set to 1 (0 is the default), scheduling costs are taken into
-account in simgrid mode. This provides more accurate simgrid predictions,
+account in SimGrid mode. This provides more accurate SimGrid predictions,
 and allows studying scheduling overhead of the runtime system. However,
 it also makes simulation non-deterministic.
 </dd>
@@ -903,6 +942,10 @@ that have a limited amount of memory.
 Specify the maximum number of megabytes that should be
 available to the application in the main CPU memory. Setting it enables allocation
 cache in main memory. Setting it to zero lets StarPU overflow memory.
+
+Note: for now not all StarPU allocations get throttled by this
+parameter. Notably MPI reception are not throttled unless \ref
+STARPU_MPI_MEM_THROTTLE is set to 1.
 </dd>
 
 <dt>STARPU_LIMIT_CPU_NUMA_devid_MEM</dt>
@@ -1061,7 +1104,17 @@ StarPU for internal data structures during execution.
 \anchor STARPU_BUS_STATS
 \addindex __env__STARPU_BUS_STATS
 When defined, statistics about data transfers will be displayed when calling
-starpu_shutdown() (\ref Profiling).
+starpu_shutdown() (\ref Profiling). By default, statistics are printed
+on the standard error stream, use the environement variable \ref
+STARPU_BUS_STATS_FILE to define another filename.
+</dd>
+
+<dt>STARPU_BUS_STATS_FILE</dt>
+<dd>
+\anchor STARPU_BUS_STATS_FILE
+\addindex __env__STARPU_BUS_STATS_FILE
+Define the name of the file where to display data transfers
+statistics, see \ref STARPU_BUS_STATS.
 </dd>
 
 <dt>STARPU_WORKER_STATS</dt>
@@ -1071,7 +1124,17 @@ starpu_shutdown() (\ref Profiling).
 When defined, statistics about the workers will be displayed when calling
 starpu_shutdown() (\ref Profiling). When combined with the
 environment variable \ref STARPU_PROFILING, it displays the energy
-consumption (\ref Energy-basedScheduling).
+consumption (\ref Energy-basedScheduling).  By default, statistics are
+printed on the standard error stream, use the environement variable
+\ref STARPU_WORKER_STATS_FILE to define another filename.
+</dd>
+
+<dt>STARPU_WORKER_STATS_FILE</dt>
+<dd>
+\anchor STARPU_WORKER_STATS_FILE
+\addindex __env__STARPU_WORKER_STATS_FILE
+Define the name of the file where to display workers statistics, see
+\ref STARPU_WORKER_STATS.
 </dd>
 
 <dt>STARPU_STATS</dt>
@@ -1174,19 +1237,10 @@ average.
 \addindex __env__STARPU_RAND_SEED
 The random scheduler and some examples use random numbers for their own
 working. Depending on the examples, the seed is by default juste always 0 or
-the current time() (unless simgrid mode is enabled, in which case it is always
+the current time() (unless SimGrid mode is enabled, in which case it is always
 0). \ref STARPU_RAND_SEED allows to set the seed to a specific value.
 </dd>
 
-<dt>STARPU_IDLE_TIME</dt>
-<dd>
-\anchor STARPU_IDLE_TIME
-\addindex __env__STARPU_IDLE_TIME
-When set to a value being a valid filename, a corresponding file
-will be created when shutting down StarPU. The file will contain the
-sum of all the workers' idle time.
-</dd>
-
 <dt>STARPU_GLOBAL_ARBITER</dt>
 <dd>
 \anchor STARPU_GLOBAL_ARBITER

+ 9 - 18
doc/doxygen/chapters/510_configure_options.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011-2013,2015-2017                      Inria
- * Copyright (C) 2010-2017, 2019                                CNRS
- * Copyright (C) 2009-2011,2013-2018                      Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -98,8 +96,8 @@ means of the tool <c>pkg-config</c>.
 
 <dt>--with-hwloc=<c>prefix</c></dt>
 <dd>
-\anchor with-hwloc
-\addindex __configure__--with-hwloc
+\anchor with-hwloc-prefix
+\addindex __configure__--with-hwloc-prefix
 Specify <c>hwloc</c> should be used by StarPU. <c>hwloc</c> should be found in the
 directory specified by <c>prefix</c>
 </dd>
@@ -281,7 +279,7 @@ contain the OpenCL shared libraries---e.g. <c>libOpenCL.so</c>. This defaults to
 \addindex __configure__--enable-opencl-simulator
 Enable considering the provided OpenCL implementation as a simulator, i.e. use
 the kernel duration returned by OpenCL profiling information as wallclock time
-instead of the actual measured real time. This requires simgrid support.
+instead of the actual measured real time. This requires the SimGrid support.
 </dd>
 
 <dt>--enable-maximplementations=<c>count</c></dt>
@@ -389,7 +387,8 @@ to be available in the main memory of the node submitting the request.
 For send communications, data is acquired with the mode ::STARPU_R.
 When enabling the pedantic mode, data are instead acquired with the
 ::STARPU_RW which thus ensures that there is not more than 1
-concurrent MPI_Isend calls accessing the data.
+concurrent MPI_Isend calls accessing the data
+and StarPU does not read from it from tasks during the communication.
 </dd>
 
 <dt>--enable-mpi-master-slave</dt>
@@ -652,14 +651,6 @@ Disable the build of tests.
 Disable the build of examples.
 </dd>
 
-<dt>--disable-build-tests</dt>
-<dd>
-\anchor disable-build-tests
-\addindex __configure__--disable-build-tests
-Disable the build of tests.
-</dd>
-
-
 <dt>--enable-sc-hypervisor</dt>
 <dd>
 \anchor enable-sc-hypervisor
@@ -679,10 +670,10 @@ Enable memory statistics (\ref MemoryFeedback).
 <dd>
 \anchor enable-simgrid
 \addindex __configure__--enable-simgrid
-Enable simulation of execution in simgrid, to allow easy experimentation with
+Enable simulation of execution in SimGrid, to allow easy experimentation with
 various numbers of cores and GPUs, or amount of memory, etc. Experimental.
 
-The path to simgrid can be specified through the <c>SIMGRID_CFLAGS</c> and
+The path to SimGrid can be specified through the <c>SIMGRID_CFLAGS</c> and
 <c>SIMGRID_LIBS</c> environment variables, for instance:
 
 \verbatim
@@ -727,7 +718,7 @@ Use the smpirun at <c>path</c>
 <dd>
 \anchor enable-simgrid-mc
 \addindex __configure__--enable-simgrid-mc
-Enable the Model Checker in simulation of execution in simgrid, to allow
+Enable the Model Checker in simulation of execution in SimGrid, to allow
 exploring various execution paths.
 </dd>
 

+ 8 - 3
doc/doxygen/chapters/520_files.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017, 2019                          CNRS
- * Copyright (C) 2011-2013,2018                           Inria
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,6 +23,7 @@
 \file starpu_bound.h
 \file starpu_clusters.h
 \file starpu_cublas.h
+\file starpu_cublas_v2.h
 \file starpu_cusparse.h
 \file starpu_cuda.h
 \file starpu_data_filters.h
@@ -36,10 +35,15 @@
 \file starpu_expert.h
 \file starpu_fxt.h
 \file starpu_hash.h
+\file starpu_helper.h
+\file starpu_heteroprio.h
 \file starpu_mic.h
+\file starpu_mpi_ms.h
 \file starpu_mod.f90
 \file starpu_opencl.h
 \file starpu_openmp.h
+\file starpu_perf_monitoring.h
+\file starpu_perf_steering.h
 \file starpu_perfmodel.h
 \file starpu_profiling.h
 \file starpu_rand.h
@@ -52,6 +56,7 @@
 \file starpu_stdlib.h
 \file starpu_task_bundle.h
 \file starpu_task.h
+\file starpu_task_dep.h
 \file starpu_task_list.h
 \file starpu_task_util.h
 \file starpu_thread.h

+ 1 - 3
doc/doxygen/chapters/601_scaling_vector_example.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017, 2019                          CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
doc/doxygen/chapters/610_fdl_1_3.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011-2013,2015,2017                      Inria
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2013-2018                      Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
doc/doxygen/chapters/api/fft_support.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
doc/doxygen/chapters/api/mic_extensions.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
doc/doxygen/chapters/api/mpi.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011,2012,2016,2017                      Inria
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014-2018                      Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
doc/doxygen/chapters/api/opencl_extensions.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 2
doc/doxygen/chapters/api/openmp_runtime_support.doxy

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2014,2015,2017,2019                      CNRS
- * Copyright (C) 2014,2016                                Inria
+ * Copyright (C) 2014-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 2 - 4
doc/doxygen/chapters/api/scheduling_contexts.doxy

@@ -1,9 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011-2013,2016,2017                      Inria
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
- * Copyright (C) 2016                                     Uppsala University
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2016       Uppsala University
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
doc/doxygen/chapters/api/scheduling_policy.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011-2013                                Inria
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014-2019                      Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 25 - 6
doc/doxygen/chapters/api/threads.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,7 +18,7 @@
 
 \brief This section describes the thread facilities provided
 by StarPU. The thread function are either implemented on top of the
-pthread library or the Simgrid library when the simulated performance
+pthread library or the SimGrid library when the simulated performance
 mode is enabled (\ref SimGridSupport).
 
 \def STARPU_PTHREAD_CREATE_ON
@@ -35,6 +33,13 @@ Call starpu_pthread_create() and abort on error.
 \ingroup API_Threads
 Call starpu_pthread_mutex_init() and abort on error.
 
+\def STARPU_PTHREAD_MUTEX_INIT0
+\ingroup API_Threads
+Call starpu_pthread_mutex_init() only if the content of
+PTHREAD_MUTEX_INITIALIZER is not zero. This should be called instead
+of STARPU_PTHREAD_MUTEX_INIT when it is known that the content of the
+pthread_mutex_t was already zeroed.
+
 \def STARPU_PTHREAD_MUTEX_DESTROY
 \ingroup API_Threads
 Call starpu_pthread_mutex_destroy() and abort on error.
@@ -67,6 +72,13 @@ Call starpu_pthread_getspecific() and abort on error.
 \ingroup API_Threads
 Call starpu_pthread_rwlock_init() and abort on error.
 
+\def STARPU_PTHREAD_RWLOCK_INIT0
+\ingroup API_Threads
+Call starpu_pthread_rwlock_init() only if the content of
+PTHREAD_RWLOCK_INITIALIZER is not zero. This should be called instead
+of STARPU_PTHREAD_RWLOCK_INIT when it is known that the content of the
+pthread_rwlock_t was already zeroed.
+
 \def STARPU_PTHREAD_RWLOCK_RDLOCK
 \ingroup API_Threads
 Call starpu_pthread_rwlock_rdlock() and abort on error.
@@ -87,6 +99,13 @@ Call starpu_pthread_rwlock_destroy() and abort on error.
 \ingroup API_Threads
 Call starpu_pthread_cond_init() and abort on error.
 
+\def STARPU_PTHREAD_COND_INIT0
+\ingroup API_Threads
+Call starpu_pthread_cond_init() only if the content of
+PTHREAD_COND_INITIALIZER is not zero. This should be called instead
+of STARPU_PTHREAD_COND_INIT when it is known that the content of the
+pthread_cond_t was already zeroed.
+
 \def STARPU_PTHREAD_COND_DESTROY
 \ingroup API_Threads
 Call starpu_pthread_cond_destroy() and abort on error.
@@ -359,8 +378,8 @@ todo
 \fn void starpu_sleep(float nb_sec)
 \ingroup API_Threads
 Similar to calling Unix' \c sleep function, except that it takes a float
-to allow sub-second sleeping, and when StarPU is compiled in simgrid mode it
-does not really sleep but just makes simgrid record that the thread has taken
+to allow sub-second sleeping, and when StarPU is compiled in SimGrid mode it
+does not really sleep but just makes SimGrid record that the thread has taken
 some time to sleep.
 
 */

+ 1 - 3
doc/doxygen/chapters/api/versioning.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
doc/doxygen/chapters/api/workers.doxy

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011-2013,2017                           Inria
- * Copyright (C) 2010-2017, 2019                          CNRS
- * Copyright (C) 2009-2011,2014,2016-2019                 Université de Bordeaux
+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 2
doc/doxygen/chapters/code/complex.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
- * Copyright (C) 2010-2014                                Université de Bordeaux
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 2 - 4
doc/doxygen/chapters/code/disk_compute.c

@@ -1,9 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2013,2017,2018                           CNRS
- * Copyright (C) 2013                                     Inria
- * Copyright (C) 2014                                     Université de Bordeaux
- * Copyright (C) 2013                                     Corentin Salingue
+ * Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2013       Corentin Salingue
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 2 - 4
doc/doxygen/chapters/code/disk_copy.c

@@ -1,9 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2013,2017                                CNRS
- * Copyright (C) 2013                                     Inria
- * Copyright (C) 2014                                     Université de Bordeaux
- * Copyright (C) 2013                                     Corentin Salingue
+ * Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2013       Corentin Salingue
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 2
doc/doxygen/chapters/code/forkmode.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2010-2014                                Université de Bordeaux
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
doc/doxygen/chapters/code/multiformat.c

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2013                                     Inria
- * Copyright (C) 2010-2014                                Université de Bordeaux
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 2
doc/doxygen/chapters/code/nf_initexit.f90

@@ -1,7 +1,6 @@
 ! StarPU --- Runtime system for heterogeneous multicore architectures.
 !
-! Copyright (C) 2017                                     CNRS
-! Copyright (C) 2016                                     Inria
+! Copyright (C) 2016-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 !
 ! StarPU is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Lesser General Public License as published by

+ 1 - 2
doc/doxygen/chapters/code/simgrid.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2010-2014                                Université de Bordeaux
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
doc/doxygen/chapters/code/vector_scal_c.c

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
- * Copyright (C) 2013                                     Inria
- * Copyright (C) 2010-2014                                Université de Bordeaux
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 2
doc/doxygen/chapters/code/vector_scal_cpu.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2013                                     Université de Bordeaux
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 2
doc/doxygen/chapters/code/vector_scal_cuda.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013,2015,2017,2018                 CNRS
- * Copyright (C) 2010,2014                                Université de Bordeaux
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 3
doc/doxygen/chapters/code/vector_scal_opencl.c

@@ -1,8 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010,2012,2013,2015,2017,2018            CNRS
- * Copyright (C) 2011,2014                                Université de Bordeaux
- * Copyright (C) 2010                                     Inria
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 1
doc/doxygen/chapters/code/vector_scal_opencl_codelet.cl

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010,2011,2013,2015,2017                 CNRS
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

二进制
doc/doxygen/chapters/images/starvz_visu.eps


二进制
doc/doxygen/chapters/images/starvz_visu.png


文件差异内容过多而无法显示
+ 4274 - 0
doc/doxygen/chapters/images/trace_recv_use.eps


二进制
doc/doxygen/chapters/images/trace_recv_use.pdf


二进制
doc/doxygen/chapters/images/trace_recv_use.png


文件差异内容过多而无法显示
+ 4019 - 0
doc/doxygen/chapters/images/trace_send_use.eps


二进制
doc/doxygen/chapters/images/trace_send_use.pdf


二进制
doc/doxygen/chapters/images/trace_send_use.png


+ 48 - 14
doc/doxygen/dev/checkDoc.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2013,2014,2016,2017                      CNRS
+# Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -16,21 +16,55 @@
 #
 dirname=$(dirname $0)
 
-x=$(grep ingroup $dirname/../chapters/api/*.doxy $dirname/../chapters/api/sc_hypervisor/*.doxy |awk -F':' '{print $2}'| awk 'NF != 2')
-if test -n "$x" ; then
-    echo Errors on group definitions
-    echo $x
-fi
-
-echo
+DIRS="$dirname/../../../include $dirname/../../../mpi/include $dirname/../../../starpurm/include $dirname/../../../sc_hypervisor/include"
 echo "Defined groups"
-grep ingroup $dirname/../chapters/api/*.doxy $dirname/../chapters/api/sc_hypervisor/*.doxy|awk -F':' '{print $2}'| awk 'NF == 2'|sort|uniq
+groups=""
+for d in $DIRS
+do
+    echo Checking $d
+    gg=$(grep -rs defgroup $d | awk '{print $3}')
+    echo $gg
+    groups=$(echo $groups $gg)
+done
+for g in $groups
+do
+    gg=$(echo $g | sed 's/_/__/g')
+    x=$(grep $gg $dirname/../refman.tex)
+    if test -z "$x"
+    then
+	echo "Error. Group $g not included in refman.tex"
+    fi
+done
 echo
 
-for f in $dirname/../../../build/doc/doxygen/latex/*tex ; do
-    x=$(grep $(basename $f .tex) $dirname/../refman.tex)
-    if test -z "$x" ; then
-	echo Error. $f not included in refman.tex
-    fi
+for d in $DIRS
+do
+    for f in $(find $d -name "*.h")
+    do
+	ff=$(echo $f  | awk -F'/' '{print $NF}')
+	x=$(grep $ff $dirname/../doxygen-config.cfg.in)
+	if test -z "$x"
+	then
+	    echo Error. $f not included in doxygen-config.cfg.in
+	fi
+	x=$(grep $ff $dirname/../chapters/520_files.doxy)
+	if test -z "$x"
+	then
+	    echo Error. $f not included in 520_files.doxy
+	fi
+    done
+done
+echo
+
+for p in starpu sc__hypervisor
+do
+    for f in $dirname/../../../build/doc/doxygen/latex/${p}*tex
+    do
+	x=$(grep $(basename $f .tex) $dirname/../refman.tex)
+	if test -z "$x"
+	then
+	    echo Error. $f not included in refman.tex
+	fi
+    done
 done
 

+ 0 - 28
doc/doxygen/dev/sc_funcs.cocci

@@ -1,28 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2014,2015,2017                           CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-@scfunc@
-position p;
-type t;
-identifier f =~ "sc";
-@@
-
-t f@p( ... );
-
-@ script:python @
-p << scfunc.p;
-f << scfunc.f;
-@@
-print "%s,%s:%s" % (f,p[0].file,p[0].line)

+ 1 - 1
doc/doxygen/dev/starpu_check_documented.py

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2013,2014,2016,2017                      CNRS
+# Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by

+ 23 - 28
doc/doxygen/dev/starpu_check_refs.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2016-2018                                CNRS
+# Copyright (C) 2016-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -20,10 +20,6 @@ greencolor=$(tput setaf 2)
 
 dirname=$(dirname $0)
 
-STARPU_H_FILES=$(find $dirname/../../../include $dirname/../../../mpi/include -name '*.h')
-SC_H_FILES=$(find $dirname/../../../sc_hypervisor/include -name '*.h')
-SRC="$dirname/../../../src $dirname/../../../mpi/src $dirname/../../../sc_hypervisor/src"
-
 #grep --exclude-dir=.git --binary-files=without-match -rsF "\ref" $dirname/../chapters|grep -v "\\ref [a-zA-Z]"
 #echo continue && read
 
@@ -32,36 +28,35 @@ GREP="grep --exclude-dir=.git --binary-files=without-match -rsF"
 REFS=$($GREP "\ref" $dirname/../chapters| tr ':' '\012' | tr '.' '\012'  | tr ',' '\012'  | tr '(' '\012' | tr ')' '\012' | tr ' ' '\012'|grep -F '\ref' -A1 | grep -v '^--$' | sed 's/\\ref/=\\ref/' | tr '\012' ':' | tr '=' '\012' | sort | uniq)
 find $dirname/../chapters -name "*doxy" -exec cat {} \; > /tmp/DOXYGEN_$$
 cat $dirname/../refman.tex >> /tmp/DOXYGEN_$$
+find $dirname/../../../include -name "*h" -exec cat {} \; >> /tmp/DOXYGEN_$$
+find $dirname/../../../starpurm/include -name "*h" -exec cat {} \; >> /tmp/DOXYGEN_$$
+find $dirname/../../../mpi/include -name "*h" -exec cat {} \; >> /tmp/DOXYGEN_$$
+find $dirname/../../../sc_hypervisor/include -name "*h" -exec cat {} \; >> /tmp/DOXYGEN_$$
+
+stcolor=$(tput sgr0)
+redcolor=$(tput setaf 1)
+greencolor=$(tput setaf 2)
 
 for r in $REFS
 do
     ref=$(echo $r | sed 's/\\ref:\(.*\):/\1/')
-    n=$($GREP -crs "section $ref" /tmp/DOXYGEN_$$)
-    if test $n -eq 0
+    if test -n "$ref"
     then
-	n=$($GREP -crs "anchor $ref" /tmp/DOXYGEN_$$)
-	if test $n -eq 0
-	then
-	    n=$($GREP -crs "ingroup $ref" /tmp/DOXYGEN_$$)
-	    if test $n -eq 0
+	#echo "ref $ref"
+	for keyword in "section " "anchor " "ingroup " "defgroup " "def " "struct " "label{"
+	do
+	    n=$($GREP -crs "${keyword}${ref}" /tmp/DOXYGEN_$$)
+	    if test $n -ne 0
 	    then
-		n=$($GREP -crs "def $ref" /tmp/DOXYGEN_$$)
-		if test $n -eq 0
-		then
-		    n=$($GREP -crs "struct $ref" /tmp/DOXYGEN_$$)
-		    if test $n -eq 0
-		    then
-			if test $n -eq 0
-			then
-			    n=$($GREP -crs "label{$ref" /tmp/DOXYGEN_$$)
-			    if test $n -eq 0
-			    then
-				echo $ref missing
-			    fi
-			fi
-		    fi
-		fi
+		break
 	    fi
+	done
+	if test $n -eq 0
+	then
+	    echo "${redcolor}$ref${stcolor} is missing"
+	else
+	    true
+	    #echo "${greencolor}$ref${stcolor} is ok"
 	fi
     fi
 done

+ 63 - 36
doc/doxygen/dev/starpu_check_undocumented.sh

@@ -1,8 +1,7 @@
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2011-2018                                CNRS
-# Copyright (C) 2011                                     Inria
+# Copyright (C) 2011-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -15,8 +14,6 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
-# Note: expects Coccinelle's spatch command n the PATH
-# See: http://coccinelle.lip6.fr/
 
 stcolor=$(tput sgr0)
 redcolor=$(tput setaf 1)
@@ -40,52 +37,81 @@ else
     fi
 fi
 
-if [ "$1" == "--func" ] || [ "$1" == "" ] ; then
-    starpu_functions=$(spatch -very_quiet -sp_file $dirname/starpu_funcs.cocci $STARPU_H_FILES)
-    sc_functions=$(spatch -very_quiet -sp_file $dirname/sc_funcs.cocci $SC_H_FILES)
-    for func in $starpu_functions $sc_functions ; do
-	fname=$(echo $func|awk -F ',' '{print $1}')
-	location=$(echo $func|awk -F ',' '{print $2}')
-	x=$(grep "$fname(" $dirname/../chapters/api/*.doxy | grep "\\fn")
-	if test "$x" == "" ; then
-	    echo "function ${redcolor}${fname}${stcolor} at location ${redcolor}$location${stcolor} is not (or incorrectly) documented"
-	    #	else
-	    #		echo "function ${greencolor}${fname}${stcolor} at location $location is correctly documented"
-	fi
+ok()
+{
+    type=$1
+    name=$2
+    echo "$type ${greencolor}${name}${stcolor} is (maybe correctly) documented"
+}
+
+ko()
+{
+    type=$1
+    name=$2
+    echo "$type ${redcolor}${name}${stcolor} is not (or incorrectly) documented"
+}
+
+if [ "$1" == "--func" ] || [ "$1" == "" ]
+then
+    for f in $STARPU_H_FILES $SC_H_FILES
+    do
+	grep "(" $f | grep ';' | grep starpu | grep '^[a-z]' | grep -v typedef | grep -v '(\*' | while read line
+	do
+	    x=$(grep -F -B1 "$line" $f | head -1)
+	    fname=$(echo $line | awk -F'(' '{print $1}' | awk '{print $NF}' | tr -d '*')
+	    if test "$x" == '*/'
+	    then
+		ok function $fname
+	    else
+		#echo $line
+		ko function $fname
+	    fi
+	done
     done
-    echo
 fi
 
 if [ "$1" == "--struct" ] || [ "$1" == "" ] ; then
-    starpu_structs=$(grep "struct starpu" $STARPU_H_FILES | grep -v "[;|,|(|)]" | awk '{print $2}')
-    sc_structs=$(grep "struct sc" $SC_H_FILES | grep -v "[;|,|(|)]" | awk '{print $2}')
-    for struct in $starpu_structs $sc_structs ; do
-	x=$(grep -F "\\struct $struct" $dirname/../chapters/api/*.doxy)
-	if test "$x" == "" ; then
-	    echo "struct ${redcolor}${struct}${stcolor} is not (or incorrectly) documented"
+    starpu=$(grep "^struct starpu_[a-z_]*$" $STARPU_H_FILES | awk '{print $NF}')
+    sc=$(grep "^struct sc_[a-z_]*$" $SC_H_FILES | awk '{print $NF}')
+    for o in $starpu $sc ; do
+	hfile=$(grep -l "^struct ${o}$" $STARPU_H_FILES $SC_H_FILES)
+	x=$(grep -B1 "^struct ${o}$" $hfile | head -1)
+	if test "$x" == '*/'
+	then
+	    ok "struct" ${o}
+	else
+	    ko "struct" ${o}
 	fi
     done
     echo
 fi
 
 if [ "$1" == "--enum" ] || [ "$1" == "" ] ; then
-    starpu_enums=$(grep "enum starpu" $STARPU_H_FILES | grep -v "[;|,|(|)]" | awk '{print $2}')
-    sc_enums=$(grep "enum starpu" $SC_H_FILES | grep -v "[;|,|(|)]" | awk '{print $2}')
-    for enum in $starpu_enums $sc_enums ; do
-	x=$(grep -F "\\enum $enum" $dirname/../chapters/api/*.doxy)
-	if test "$x" == "" ; then
-	    echo "enum ${redcolor}${enum}${stcolor} is not (or incorrectly) documented"
+    starpu=$(grep "^enum starpu_[a-z_]*$" $STARPU_H_FILES | awk '{print $NF}')
+    sc=$(grep "^enum sc_[a-z_]*$" $SC_H_FILES | awk '{print $NF}')
+    for o in $starpu $sc ; do
+	hfile=$(grep -l "^enum ${o}$" $STARPU_H_FILES $SC_H_FILES)
+	x=$(grep -B1 "^enum ${o}$" $hfile | head -1)
+	if test "$x" == '*/'
+	then
+	    ok "enum" ${o}
+	else
+	    ko "enum" ${o}
 	fi
     done
     echo
 fi
 
 if [ "$1" == "--macro" ] || [ "$1" == "" ] ; then
-    macros=$(grep "define\b" $STARPU_H_FILES $SC_H_FILES |grep -v deprecated|grep "#" | grep -v "__" | sed 's/#[ ]*/#/g' | awk '{print $2}' | awk -F'(' '{print $1}' | sort|uniq)
-    for macro in $macros ; do
-	x=$(grep -F "\\def $macro" $dirname/../chapters/api/*.doxy)
-	if test "$x" == "" ; then
-	    echo "macro ${redcolor}${macro}${stcolor} is not (or incorrectly) documented"
+    macros=$(grep "define\b" $STARPU_H_FILES $SC_H_FILES |grep -v deprecated|grep "#" | grep -v "__" | sed 's/#[ ]*/#/g' | awk '{print $2}' | awk -F'(' '{print $1}' | grep -i starpu | sort|uniq)
+    for o in $macros ; do
+	hfile=$(grep -l "define\b ${o}" $STARPU_H_FILES $SC_H_FILES)
+	x=$(grep -B1 "define\b ${o}" $hfile | head -1)
+	if test "$x" == '*/'
+	then
+	    ok "define" ${o}
+	else
+	    ko "define" ${o}
 	fi
     done
     echo
@@ -96,8 +122,9 @@ if [ "$1" == "--var" ] || [ "$1" == "" ] ; then
     for variable in $variables ; do
 	x=$(grep "$variable" $dirname/../chapters/501_environment_variables.doxy | grep "\\anchor")
 	if test "$x" == "" ; then
-	    echo "variable ${redcolor}${variable}${stcolor} is not (or incorrectly) documented"
+	    ko "variable" $variable
+	else
+	    ok "variable" $variable
 	fi
     done
 fi
-

+ 0 - 28
doc/doxygen/dev/starpu_funcs.cocci

@@ -1,28 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2013,2015,2017                           CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-@starpufunc@
-position p;
-type t;
-identifier f =~ "starpu";
-@@
-
-t f@p( ... );
-
-@ script:python @
-p << starpufunc.p;
-f << starpufunc.f;
-@@
-print "%s,%s:%s" % (f,p[0].file,p[0].line)

+ 14 - 6
doc/doxygen/doxygen-config.cfg.in

@@ -1,10 +1,8 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2011-2014,2016,2018                      Inria
-# Copyright (C) 2010-2017, 2019                          CNRS
-# Copyright (C) 2009-2014                                Université de Bordeaux
-# Copyright (C) 2013                                     Simon Archipoff
-# Copyright (C) 2011                                     Télécom-SudParis
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+# Copyright (C) 2011       Télécom-SudParis
+# Copyright (C) 2013       Simon Archipoff
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -26,6 +24,7 @@ INPUT                  = @top_srcdir@/doc/doxygen/chapters \
 	 		 @top_srcdir@/include/starpu_clusters.h \
 			 @top_srcdir@/include/starpu_cusparse.h \
 			 @top_srcdir@/include/starpu_cublas.h \
+			 @top_srcdir@/include/starpu_cublas_v2.h \
 			 @top_srcdir@/include/starpu_cuda.h \
 			 @top_srcdir@/include/starpu_data_filters.h \
 			 @top_srcdir@/include/starpu_data.h \
@@ -39,9 +38,12 @@ INPUT                  = @top_srcdir@/doc/doxygen/chapters \
 			 @top_srcdir@/include/starpu_hash.h \
 			 @top_srcdir@/include/starpu_helper.h \
 			 @top_srcdir@/include/starpu_mic.h \
+			 @top_srcdir@/include/starpu_mpi_ms.h \
 			 @top_srcdir@/include/starpu_mod.f90 \
 			 @top_srcdir@/include/starpu_opencl.h \
 			 @top_srcdir@/include/starpu_openmp.h \
+			 @top_srcdir@/include/starpu_perf_monitoring.h \
+			 @top_srcdir@/include/starpu_perf_steering.h \
 			 @top_srcdir@/include/starpu_perfmodel.h \
 			 @top_srcdir@/include/starpu_profiling.h \
 			 @top_srcdir@/include/starpu_rand.h \
@@ -69,7 +71,13 @@ INPUT                  = @top_srcdir@/doc/doxygen/chapters \
 			 @top_srcdir@/mpi/include/fstarpu_mpi_mod.f90 \
 			 @top_srcdir@/starpufft/include/starpufft.h \
 			 @top_srcdir@/sc_hypervisor/include \
-			 @top_srcdir@/starpurm/include/starpurm.h
+			 @top_srcdir@/sc_hypervisor/include/sc_hypervisor_config.h \
+			 @top_srcdir@/sc_hypervisor/include/sc_hypervisor_policy.h \
+			 @top_srcdir@/sc_hypervisor/include/sc_hypervisor_lp.h  \
+			 @top_srcdir@/sc_hypervisor/include/sc_hypervisor.h \
+			 @top_srcdir@/sc_hypervisor/include/sc_hypervisor_monitoring.h \
+			 @top_srcdir@/starpurm/include/starpurm.h \
+			 @top_srcdir@/include/schedulers/starpu_heteroprio.h
 
 EXAMPLE_PATH           = @top_srcdir@/doc/doxygen \
 		       	 @top_srcdir@/doc/doxygen/chapters \

+ 3 - 4
doc/doxygen/doxygen.cfg

@@ -1,9 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2010-2015,2017,2019                      CNRS
-# Copyright (C) 2011,2012,2014                           Inria
-# Copyright (C) 2009-2014                                Université de Bordeaux
-# Copyright (C) 2011                                     Télécom-SudParis
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+# Copyright (C) 2011       Télécom-SudParis
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -1627,6 +1625,7 @@ PREDEFINED             = STARPU_USE_OPENCL=1 \
 			 STARPU_MKL=1 \
 			 STARPU_WORKER_CALLBACKS=1 \
 			 STARPU_HAVE_GLPK_H=1 \
+			 STARPU_USE_MPI_MASTER_SLAVE=1 \
                          __GCC__
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then

+ 1 - 1
doc/doxygen/doxygen_filter.sh.in

@@ -1,7 +1,7 @@
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2013,2014,2016,2017,2019                 CNRS
+# Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by

+ 18 - 8
doc/doxygen/refman.tex

@@ -1,9 +1,7 @@
 % StarPU --- Runtime system for heterogeneous multicore architectures.
 %
-% Copyright (C) 2013-2016,2018                           Inria
-% Copyright (C) 2013-2019                                CNRS
-% Copyright (C) 2014,2018-2019                                Université de Bordeaux
-% Copyright (C) 2013                                     Simon Archipoff
+% Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+% Copyright (C) 2013       Simon Archipoff
 %
 % StarPU is free software; you can redistribute it and/or modify
 % it under the terms of the GNU Lesser General Public License as published by
@@ -37,11 +35,11 @@ Generated by Doxygen.
 This manual documents the usage of StarPU version \STARPUVERSION. Its contents
 was last updated on \STARPUUPDATED.\\
 
-Copyright © 2009–2018 Université de Bordeaux\\
+Copyright © 2009–2019 Université de Bordeaux
 
-Copyright © 2010-2018 CNRS
+Copyright © 2010-2019 CNRS
 
-Copyright © 2011-2018 Inria
+Copyright © 2011-2019 Inria
 
 \medskip
 
@@ -235,6 +233,8 @@ Documentation License”.
 \input{group__API__Codelet__And__Tasks}
 \input{group__API__Insert__Task}
 \input{group__API__Explicit__Dependencies}
+\input{group__API__Perf__Monitoring}
+\input{group__API__Perf__Steering}
 \input{group__API__Performance__Model}
 \input{group__API__Profiling}
 \input{group__API__Theoretical__Lower__Bound__on__Execution__Time}
@@ -260,6 +260,9 @@ Documentation License”.
 \input{group__API__Modularized__Scheduler}
 \input{group__API__Clustering__Machine}
 \input{group__API__Interop__Support}
+\input{group__API__Master__Slave}
+\input{group__API__Random__Functions}
+\input{group__API__Sink}
 
 \chapter{File Index}
 \input{files}
@@ -274,6 +277,7 @@ Documentation License”.
 \input{starpu__clusters_8h}
 \input{starpu__config_8h}
 \input{starpu__cublas_8h}
+\input{starpu__cublas__v2_8h}
 \input{starpu__cusparse_8h}
 \input{starpu__cuda_8h}
 \input{starpu__data_8h}
@@ -285,12 +289,17 @@ Documentation License”.
 \input{starpu__expert_8h}
 \input{starpu__fxt_8h}
 \input{starpu__hash_8h}
+\input{starpu__helper_8h}
+\input{starpu__heteroprio_8h}
 \input{starpu__mic_8h}
 \input{starpu__mod_8f90}
 \input{starpu__mpi_8h}
 \input{starpu__mpi__lb_8h}
+\input{starpu__mpi__ms_8h}
 \input{starpu__opencl_8h}
 \input{starpu__openmp_8h}
+\input{starpu__perf__monitoring_8h}
+\input{starpu__perf__steering_8h}
 \input{starpu__perfmodel_8h}
 \input{starpu__profiling_8h}
 \input{starpu__rand_8h}
@@ -303,6 +312,7 @@ Documentation License”.
 \input{starpu__stdlib_8h}
 \input{starpu__task_8h}
 \input{starpu__task__bundle_8h}
+\input{starpu__task__dep_8h}
 \input{starpu__task__list_8h}
 \input{starpu__task__util_8h}
 \input{starpu__thread_8h}
@@ -335,7 +345,7 @@ Documentation License”.
 \hypertarget{GNUFreeDocumentationLicense}{}
 \input{GNUFreeDocumentationLicense}
 
-\part{Index}
+%\part{Index}
 \addcontentsline{toc}{chapter}{Index}
 \printindex
 

+ 5 - 7
doc/doxygen_dev/Makefile.am

@@ -1,8 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2010-2018                                CNRS
-# Copyright (C) 2013-2018                                Inria
-# Copyright (C) 2009,2011,2013,2014,2017                 Université de Bordeaux
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -126,7 +124,6 @@ $(DOX_TAG): $(dox_inputs)
 
 $(DOX_PDF): $(DOX_TAG) refman.tex
 	@cp $(top_srcdir)/doc/doxygen_dev/chapters/version.sty $(DOX_LATEX_DIR)
-	@-cp $(top_srcdir)/doc/doxygen_dev/chapters/images/*pdf $(DOX_LATEX_DIR)
 	@echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex
 	@cd $(DOX_LATEX_DIR) ;\
 	rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\
@@ -134,13 +131,14 @@ $(DOX_PDF): $(DOX_TAG) refman.tex
 	$(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ;\
 	$(SED) -i s'/\\item Module\\-Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' index.tex ;\
 	$(SED) -i s'/\\item File\\-Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' index.tex ;\
-	$(PDFLATEX) refman.tex ;\
+	max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\
+	! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\
 	$(MAKEINDEX) refman.idx ;\
-	$(PDFLATEX) refman.tex ;\
+	max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\
 	done=0; repeat=5 ;\
 	while test $$done = 0 -a $$repeat -gt 0; do \
            if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \
-	       $(PDFLATEX) refman.tex; \
+	       max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \
 	       repeat=`expr $$repeat - 1`; \
 	   else \
 	       done=1; \

+ 1 - 1
doc/doxygen_dev/chapters/000_introduction.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018                                     CNRS
+ * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 1 - 1
doc/doxygen_dev/chapters/010_core.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018                                     Inria
+ * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 2 - 4
doc/doxygen_dev/doxygen-config.cfg.in

@@ -1,9 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2010-2018                                CNRS
-# Copyright (C) 2011-2013,2016                           Inria
-# Copyright (C) 2009-2014                                Université de Bordeaux
-# Copyright (C) 2011                                     Télécom-SudParis
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+# Copyright (C) 2011       Télécom-SudParis
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by

+ 2 - 4
doc/doxygen_dev/doxygen.cfg

@@ -1,9 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2010-2015,2017,2018,2019                 CNRS
-# Copyright (C) 2009-2014                                Université de Bordeaux
-# Copyright (C) 2011                                     Télécom-SudParis
-# Copyright (C) 2011,2012                                Inria
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+# Copyright (C) 2011       Télécom-SudParis
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by

+ 1 - 1
doc/doxygen_dev/doxygen_filter.sh.in

@@ -1,7 +1,7 @@
 #!/bin/bash
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2013,2014,2016-2018                      CNRS
+# Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by

+ 4 - 4
doc/doxygen_dev/refman.tex

@@ -1,6 +1,6 @@
 % StarPU --- Runtime system for heterogeneous multicore architectures.
 %
-% Copyright (C) 2018                                     CNRS
+% Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 %
 % StarPU is free software; you can redistribute it and/or modify
 % it under the terms of the GNU Lesser General Public License as published by
@@ -34,11 +34,11 @@ Generated by Doxygen.
 This manual documents the internal usage of StarPU version \STARPUVERSION. Its contents
 was last updated on \STARPUUPDATED.\\
 
-Copyright © 2009–2018 Université de Bordeaux
+Copyright © 2009–2019 Université de Bordeaux
 
-Copyright © 2010-2018 CNRS
+Copyright © 2010-2019 CNRS
 
-Copyright © 2011-2018 Inria
+Copyright © 2011-2019 Inria
 
 \medskip
 

+ 2 - 3
doc/tutorial/Makefile

@@ -1,7 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009-2011, 2019                                 Université de Bordeaux
-# Copyright (C) 2010-2014, 2019                           CNRS
+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -13,7 +12,7 @@
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
+#
 CFLAGS          +=      $$(pkg-config --cflags starpu-1.1)
 LDLIBS          +=      $$(pkg-config --libs starpu-1.1)
 

+ 0 - 0
doc/tutorial/README


部分文件因为文件数量过多而无法显示