Explorar el Código

doc: various fixes - add missing stuff

Nathalie Furmento hace 8 años
padre
commit
0ab9a7a78c
Se han modificado 28 ficheros con 419 adiciones y 296 borrados
  1. 10 6
      doc/doxygen/chapters/api/bitmap.doxy
  2. 76 55
      doc/doxygen/chapters/api/codelet_and_tasks.doxy
  3. 3 4
      doc/doxygen/chapters/api/cuda_extensions.doxy
  4. 66 34
      doc/doxygen/chapters/api/data_interfaces.doxy
  5. 8 5
      doc/doxygen/chapters/api/data_management.doxy
  6. 6 6
      doc/doxygen/chapters/api/data_out_of_core.doxy
  7. 34 25
      doc/doxygen/chapters/api/data_partition.doxy
  8. 4 4
      doc/doxygen/chapters/api/explicit_dependencies.doxy
  9. 10 2
      doc/doxygen/chapters/api/fft_support.doxy
  10. 3 3
      doc/doxygen/chapters/api/fxt_support.doxy
  11. 25 23
      doc/doxygen/chapters/api/initialization.doxy
  12. 2 2
      doc/doxygen/chapters/api/misc_helpers.doxy
  13. 9 9
      doc/doxygen/chapters/api/modularized_scheduler.doxy
  14. 27 19
      doc/doxygen/chapters/api/mpi.doxy
  15. 3 3
      doc/doxygen/chapters/api/opencl_extensions.doxy
  16. 2 2
      doc/doxygen/chapters/api/openmp_runtime_support.doxy
  17. 2 2
      doc/doxygen/chapters/api/parallel_tasks.doxy
  18. 14 8
      doc/doxygen/chapters/api/performance_model.doxy
  19. 2 2
      doc/doxygen/chapters/api/profiling.doxy
  20. 17 7
      doc/doxygen/chapters/api/running_driver.doxy
  21. 2 2
      doc/doxygen/chapters/api/scc_extensions.doxy
  22. 5 5
      doc/doxygen/chapters/api/scheduling_policy.doxy
  23. 15 11
      doc/doxygen/chapters/api/standard_memory_library.doxy
  24. 7 7
      doc/doxygen/chapters/api/threads.doxy
  25. 2 2
      doc/doxygen/chapters/api/toolbox.doxy
  26. 33 37
      doc/doxygen/chapters/api/top.doxy
  27. 30 9
      doc/doxygen/chapters/api/workers.doxy
  28. 2 2
      doc/doxygen/doxygen_filter.sh.in

+ 10 - 6
doc/doxygen/chapters/api/bitmap.doxy

@@ -1,6 +1,6 @@
 /*
  * This file is part of the StarPU Handbook.
- * Copyright (C) 2014  CNRS
+ * Copyright (C) 2014, 2017  CNRS
  * See the file version.doxy for copying conditions.
  */
 
@@ -8,6 +8,10 @@
 
 \brief This section describes the bitmap facilities provided by StarPU.
 
+\struct starpu_bitmap
+\ingroup API_Bitmap
+todo
+
 \fn struct starpu_bitmap *starpu_bitmap_create(void)
 \ingroup API_Bitmap
 create a empty starpu_bitmap
@@ -26,7 +30,7 @@ unset bit \p e in \p b
 
 \fn void starpu_bitmap_unset_all(struct starpu_bitmap *b)
 \ingroup API_Bitmap
-unset all bits in \b b
+unset all bits in \p b
 
 \fn int starpu_bitmap_get(struct starpu_bitmap *b, int e)
 \ingroup API_Bitmap
@@ -34,15 +38,15 @@ return true iff bit \p e is set in \p b
 
 \fn void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c)
 \ingroup API_Bitmap
-Basically compute starpu_bitmap_unset_all(a) ; a = b & c;
+Basically compute starpu_bitmap_unset_all(\p a) ; \p a = \p b & \p c;
 
 \fn void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b)
 \ingroup API_Bitmap
-Basically compute a |= b
+Basically compute \p a |= \p b
 
 \fn int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e)
 \ingroup API_Bitmap
-return 1 iff e set in b1 AND e set in b2
+return 1 iff \p e is set in \p b1 AND \p e is set in \p b2
 
 \fn int starpu_bitmap_cardinal(struct starpu_bitmap *b)
 \ingroup API_Bitmap
@@ -50,7 +54,7 @@ return the number of set bits in \p b
 
 \fn int starpu_bitmap_first(struct starpu_bitmap *b)
 \ingroup API_Bitmap
-return the index of first set bit of \p b, -1 if none
+return the index of the first set bit of \p b, -1 if none
 
 \fn int starpu_bitmap_last(struct starpu_bitmap *b)
 \ingroup API_Bitmap

+ 76 - 55
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012, 2017  INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -55,6 +55,9 @@ The task is waiting for a task.
 \var starpu_task_status::STARPU_TASK_BLOCKED_ON_DATA
 \ingroup API_Codelet_And_Tasks
 The task is waiting for some data.
+\var starpu_task_status::STARPU_TASK_STOPPED
+\ingroup API_Codelet_And_Tasks
+The task is stopped.
 
 \def STARPU_NOWHERE
 \ingroup API_Codelet_And_Tasks
@@ -198,7 +201,7 @@ has to be actually done.
 
 \var int (*starpu_codelet::can_execute)(unsigned workerid, struct starpu_task *task, unsigned nimpl)
 Define a function which should return 1 if the worker designated by
-workerid can execute the <c>nimpl</c>th implementation of the given
+\p workerid can execute the \p nimpl -th implementation of the given
 task, 0 otherwise.
 
 \var enum starpu_codelet_type starpu_codelet::type
@@ -238,7 +241,7 @@ management library, and the second argument is a pointer to the
 argument passed from the field starpu_task::cl_arg. If the field
 starpu_codelet::where is set, then the field starpu_codelet::cpu_funcs
 is ignored if ::STARPU_CPU does not appear in the field
-starpu_codelet::where, it must be non-null otherwise.
+starpu_codelet::where, it must be non-<c>NULL</c> otherwise.
 
 \var char *starpu_codelet::cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS]
 Optional array of strings which provide the name of the CPU functions
@@ -256,7 +259,7 @@ void cuda_func(void *buffers[], void *cl_arg)
 \endcode
 If the field starpu_codelet::where is set, then the field
 starpu_codelet::cuda_funcs is ignored if ::STARPU_CUDA does not appear
-in the field starpu_codelet::where, it must be non-null otherwise.
+in the field starpu_codelet::where, it must be non-<c>NULL</c> otherwise.
 
 \var char starpu_codelet::cuda_flags[STARPU_MAXIMPLEMENTATIONS]
 Optional array of flags for CUDA execution. They specify some semantic details
@@ -271,7 +274,7 @@ void opencl_func(void *buffers[], void *cl_arg)
 \endcode
 If the field starpu_codelet::where field is set, then the field
 starpu_codelet::opencl_funcs is ignored if ::STARPU_OPENCL does not
-appear in the field starpu_codelet::where, it must be non-null
+appear in the field starpu_codelet::where, it must be non-<c>NULL</c>
 otherwise.
 
 \var char starpu_codelet::opencl_flags[STARPU_MAXIMPLEMENTATIONS]
@@ -286,8 +289,8 @@ starpu_mic_kernel_t mic_func(struct starpu_codelet *cl, unsigned nimpl)
 \endcode
 If the field starpu_codelet::where is set, then the field
 starpu_codelet::mic_funcs is ignored if ::STARPU_MIC does not appear
-in the field starpu_codelet::where. It can be null if
-starpu_codelet::cpu_funcs_name is non-NULL, in which case StarPU will
+in the field starpu_codelet::where. It can be <c>NULL</c> if
+starpu_codelet::cpu_funcs_name is non-<c>NULL</c>, in which case StarPU will
 simply make a symbol lookup to get the implementation.
 
 \var starpu_mpi_ms_func_t starpu_codelet::mpi_ms_funcs[STARPU_MAXIMPLEMENTATIONS]
@@ -298,8 +301,8 @@ starpu_mpi_ms_kernel_t mpi_ms_func(struct starpu_codelet *cl, unsigned nimpl)
 \endcode
 If the field starpu_codelet::where is set, then the field
 starpu_codelet::mpi_ms_funcs is ignored if ::STARPU_MPI_MS does not appear
-in the field starpu_codelet::where. It can be null if
-starpu_codelet::cpu_funcs_name is non-NULL, in which case StarPU will
+in the field starpu_codelet::where. It can be <c>NULL</c> if
+starpu_codelet::cpu_funcs_name is non-<c>NULL</c>, in which case StarPU will
 simply make a symbol lookup to get the implementation.
 
 \var starpu_scc_func_t starpu_codelet::scc_funcs[STARPU_MAXIMPLEMENTATIONS]
@@ -310,8 +313,8 @@ starpu_scc_kernel_t scc_func(struct starpu_codelet *cl, unsigned nimpl)
 \endcode
 If the field starpu_codelet::where is set, then the field
 starpu_codelet::scc_funcs is ignored if ::STARPU_SCC does not appear
-in the field starpu_codelet::where. It can be null if
-starpu_codelet::cpu_funcs_name is non-NULL, in which case StarPU will
+in the field starpu_codelet::where. It can be <c>NULL</c> if
+starpu_codelet::cpu_funcs_name is non-<c>NULL</c>, in which case StarPU will
 simply make a symbol lookup to get the implementation.
 
 \var int starpu_codelet::nbuffers
@@ -319,7 +322,7 @@ Specify the number of arguments taken by the codelet. These arguments are
 managed by the DSM and are accessed from the <c>void *buffers[]</c> array. The
 constant argument passed with the field starpu_task::cl_arg is not counted in
 this number. This value should not be above \ref STARPU_NMAXBUFS. It may be set
-to STARPU_VARIABLE_NBUFFERS to specify that the number of buffers and their
+to \ref STARPU_VARIABLE_NBUFFERS to specify that the number of buffers and their
 access modes will be set in starpu_task::nbuffers and starpu_task::modes or
 starpu_task::dyn_modes, which thus permits to define codelets with a varying
 number of data.
@@ -372,7 +375,7 @@ its field starpu_perfmodel::symbol is not set.
 \var struct starpu_perfmodel *starpu_codelet::energy_model
 Optional pointer to the task energy consumption performance model
 associated to this codelet. This optional field is ignored when set to
-<c>NULL</c> or when its field starpu_perfmodel::field is not set. In
+<c>NULL</c> or when its field starpu_perfmodel::symbol is not set. In
 the case of parallel codelets, this has to account for all processing
 units involved in the parallel execution.
 
@@ -421,7 +424,7 @@ purposes.
 \var struct starpu_codelet *starpu_task::cl
 Is a pointer to the corresponding structure starpu_codelet. This
 describes where the kernel should be executed, and supplies the
-appropriate implementations. When set to NULL, no code is executed
+appropriate implementations. When set to <c>NULL</c>, no code is executed
 during the tasks, such empty tasks can be useful for synchronization
 purposes.
 This field has been made deprecated. One should use instead the
@@ -431,7 +434,7 @@ starpu_codelet::modes.
 
 \var int starpu_task::nbuffers
 Specifies the number of buffers. This is only used when starpu_codelet::nbuffers
-is STARPU_VARIABLE_NBUFFERS.
+is \ref STARPU_VARIABLE_NBUFFERS.
 
 \var starpu_data_handle_t starpu_task::handles[STARPU_NMAXBUFS]
 Is an array of ::starpu_data_handle_t. It specifies the handles to the
@@ -459,7 +462,7 @@ happen, managed by the DSM. Is used when the field
 starpu_task::dyn_handles is defined.
 
 \var enum starpu_data_access_mode starpu_task::modes[STARPU_NMAXBUFS]
-Is used only when starpu_codelet::nbuffers is STARPU_VARIABLE_NBUFFERS.
+Is used only when starpu_codelet::nbuffers is \ref STARPU_VARIABLE_NBUFFERS.
 It is an array of ::starpu_data_access_mode. It describes the required
 access modes to the data neeeded by the codelet (e.g. ::STARPU_RW). The
 number of entries in this array must be specified in the field
@@ -468,7 +471,7 @@ unsufficient, this value can be set with the configure option
 \ref enable-maxbuffers "--enable-maxbuffers".
 
 \var enum starpu_data_access_mode *starpu_task::dyn_modes
-Is used only when starpu_codelet::nbuffers is STARPU_VARIABLE_NBUFFERS.
+Is used only when starpu_codelet::nbuffers is \ref STARPU_VARIABLE_NBUFFERS.
 It is an array of ::starpu_data_access_mode. It describes the required
 access modes to the data needed by the codelet (e.g. ::STARPU_RW).
 The number of entries in this array must be specified in the field
@@ -484,7 +487,7 @@ or starpu_codelet::cuda_func). The default value is <c>NULL</c>.
 starpu_codelet_pack_args() and starpu_codelet_unpack_args() are helpers that can
 can be used to respectively pack and unpack data into and from it, but the
 application can manage it any way, the only requirement is that the size of the
-data must be set in starpu_task:cl_arg_size .
+data must be set in starpu_task::cl_arg_size .
 
 \var size_t starpu_task::cl_arg_size
 Optional field. For some specific drivers, the pointer
@@ -509,13 +512,13 @@ codelet does not execute in the same memory space as the main thread.
 \var void (*starpu_task::callback_func)(void *)
 Optional field, the default value is <c>NULL</c>. This is a function
 pointer of prototype <c>void (*f)(void *)</c> which specifies a
-possible callback. If this pointer is non-null, the callback function
+possible callback. If this pointer is non-<c>NULL</c>, the callback function
 is executed on the host after the execution of the task. Tasks which
 depend on it might already be executing. The callback is passed the
 value contained in the starpu_task::callback_arg field. No callback is
-executed if the field is set to NULL.
+executed if the field is set to <c>NULL</c>.
 
-\var void *starpu_task::callback_arg (optional) (default: NULL)
+\var void *starpu_task::callback_arg (optional) (default: <c>NULL</c>)
 Optional field, the default value is <c>NULL</c>. This is the pointer
 passed to the callback function. This field is ignored if the field
 starpu_task::callback_func is set to <c>NULL</c>.
@@ -530,13 +533,13 @@ destroying the task.
 Optional field, the default value is <c>NULL</c>. This is a function
 pointer of prototype <c>void (*f)(void *)</c> which specifies a
 possible callback. 
-If this pointer is non-null, the callback function
+If this pointer is non-<c>NULL</c>, the callback function
 is executed on the host when the task becomes ready for execution,
 before getting scheduled. The callback is passed the
 value contained in the starpu_task::prologue_callback_arg field. No callback is
-executed if the field is set to NULL.
+executed if the field is set to <c>NULL</c>.
 
-\var void *starpu_task::prologue_callback_arg (optional) (default: NULL)
+\var void *starpu_task::prologue_callback_arg (optional) (default: <c>NULL</c>)
 Optional field, the default value is <c>NULL</c>. This is the pointer
 passed to the prologue callback function. This field is ignored if the field
 starpu_task::prologue_callback_func is set to <c>NULL</c>.
@@ -547,6 +550,13 @@ application through <c>malloc()</c>, setting starpu_task::prologue_callback_arg_
 to 1 makes StarPU automatically call <c>free(prologue_callback_arg)</c> when
 destroying the task.
 
+\var void (*starpu_task::prologue_callback_pop_func)(void *)
+todo
+\var void *starpu_task::prologue_callback_pop_arg (optional) (default: <c>NULL</c>)
+todo
+\var unsigned starpu_task::prologue_callback_pop_arg_free
+todo
+
 \var unsigned starpu_task::use_tag
 Optional field, the default value is 0. If set, this flag indicates
 that the task should be associated with the tag contained in the
@@ -606,7 +616,7 @@ the field starpu_task::execute_on_a_specific_worker is set to 0.
 
 \var starpu_task_bundle_t starpu_task::bundle
 Optional field. The bundle that includes this task. If no bundle is
-used, this should be NULL.
+used, this should be <c>NULL</c>.
 
 \var unsigned starpu_task::detach
 Optional field, default value is 1. If this flag is set, it is not
@@ -647,6 +657,9 @@ Optional field. Predicted data transfer duration for the task in
 microseconds. This field is only valid if the scheduling strategy uses
 performance models.
 
+\var double starpu_task::predicted_start
+todo
+
 \var struct starpu_task *starpu_task::prev
 \private
 A pointer to the previous task. This should only be used by StarPU.
@@ -669,7 +682,7 @@ hypervisor load balancing.
 \var void *starpu_task::starpu_private
 \private
 This is private to StarPU, do not modify. If the task is allocated by
-hand (without starpu_task_create()), this field should be set to NULL.
+hand (without starpu_task_create()), this field should be set to <c>NULL</c>.
 
 \var int starpu_task::magic
 \private
@@ -684,12 +697,21 @@ Scheduling context.
 \var int starpu_task::hypervisor_tag
 Helps the hypervisor monitor the execution of this task.
 
+\var unsigned starpu_task::possibly_parallel
+todo
+
+\var unsigned starpu_task::prefetched
+todo
+
 \var unsigned starpu_task::scheduled
 Whether the scheduler has pushed the task on some queue
 
+\var struct starpu_omp_task *starpu_task::omp_task
+todo
+
 \fn void starpu_task_init(struct starpu_task *task)
 \ingroup API_Codelet_And_Tasks
-Initialize task with default values. This function is
+Initialize \p task with default values. This function is
 implicitly called by starpu_task_create(). By default, tasks initialized
 with starpu_task_init() must be deinitialized explicitly with
 starpu_task_clean(). Tasks can also be initialized statically, using
@@ -699,16 +721,16 @@ starpu_task_clean(). Tasks can also be initialized statically, using
 \ingroup API_Codelet_And_Tasks
 It is possible to initialize statically allocated tasks with
 this value. This is equivalent to initializing a structure starpu_task
-with the function starpu_task_init() function.
+with the function starpu_task_init().
 
 \def STARPU_TASK_GET_NBUFFERS(task)
 \ingroup API_Codelet_And_Tasks
-Return the number of buffers for this task, i.e. starpu_codelet::nbuffers, or
-starpu_task::nbuffers if the former is STARPU_VARIABLE_NBUFFERS.
+Return the number of buffers for \p task, i.e. starpu_codelet::nbuffers, or
+starpu_task::nbuffers if the former is \ref STARPU_VARIABLE_NBUFFERS.
 
 \def STARPU_TASK_GET_HANDLE(task, i)
 \ingroup API_Codelet_And_Tasks
-Return the \p i th data handle of the given task. If the task
+Return the \p i th data handle of \p task. If \p task
 is defined with a static or dynamic number of handles, will either
 return the \p i th element of the field starpu_task::handles or the \p
 i th element of the field starpu_task::dyn_handles
@@ -716,8 +738,8 @@ i th element of the field starpu_task::dyn_handles
 
 \def STARPU_TASK_SET_HANDLE(task, handle, i)
 \ingroup API_Codelet_And_Tasks
-Set the \p i th data handle of the given task with the given
-dat handle. If the task is defined with a static or dynamic number of
+Set the \p i th data handle of \p task with \p handle.
+If \p task is defined with a static or dynamic number of
 handles, will either set the \p i th element of the field
 starpu_task::handles or the \p i th element of the field
 starpu_task::dyn_handles
@@ -725,8 +747,8 @@ starpu_task::dyn_handles
 
 \def STARPU_CODELET_GET_MODE(codelet, i)
 \ingroup API_Codelet_And_Tasks
-Return the access mode of the \p i th data handle of the given
-codelet. If the codelet is defined with a static or dynamic number of
+Return the access mode of the \p i th data handle of \p codelet.
+If \p codelet is defined with a static or dynamic number of
 handles, will either return the \p i th element of the field
 starpu_codelet::modes or the \p i th element of the field
 starpu_codelet::dyn_modes
@@ -734,8 +756,8 @@ starpu_codelet::dyn_modes
 
 \def STARPU_CODELET_SET_MODE(codelet, mode, i)
 \ingroup API_Codelet_And_Tasks
-Set the access mode of the \p i th data handle of the given
-codelet. If the codelet is defined with a static or dynamic number of
+Set the access mode of the \p i th data handle of \p codelet.
+If \p codelet is defined with a static or dynamic number of
 handles, will either set the \p i th element of the field
 starpu_codelet::modes or the \p i th element of the field
 starpu_codelet::dyn_modes
@@ -743,8 +765,8 @@ starpu_codelet::dyn_modes
 
 \def STARPU_TASK_GET_MODE(task, i)
 \ingroup API_Codelet_And_Tasks
-Return the access mode of the \p i th data handle of the given
-task. If the task is defined with a static or dynamic number of
+Return the access mode of the \p i th data handle of \p task.
+If \p task is defined with a static or dynamic number of
 handles, will either return the \p i th element of the field
 starpu_task::modes or the \p i th element of the field
 starpu_task::dyn_modes
@@ -752,8 +774,8 @@ starpu_task::dyn_modes
 
 \def STARPU_TASK_SET_MODE(task, mode, i)
 \ingroup API_Codelet_And_Tasks
-Set the access mode of the \p i th data handle of the given
-task. If the task is defined with a static or dynamic number of
+Set the access mode of the \p i th data handle of \p task.
+If \p task is defined with a static or dynamic number of
 handles, will either set the \p i th element of the field
 starpu_task::modes or the \p i th element of the field
 starpu_task::dyn_modes
@@ -772,13 +794,12 @@ calling starpu_task_destroy().
 
 \fn struct starpu_task *starpu_task_dup(struct starpu_task *task)
 \ingroup API_Codelet_And_Tasks
-Allocate a task structure which is the exact duplicate of the
-given task.
+Allocate a task structure which is the exact duplicate of \p task.
 
 \fn void starpu_task_clean(struct starpu_task *task)
 \ingroup API_Codelet_And_Tasks
 Release all the structures automatically allocated to execute
-task, but not the task structure itself and values set by the user
+\p task, but not the task structure itself and values set by the user
 remain unchanged. It is thus useful for statically allocated tasks for
 instance. It is also useful when users want to execute the same
 operation several times with as least overhead as possible. It is
@@ -790,7 +811,7 @@ manipulates the task after calling the callback).
 \fn void starpu_task_destroy(struct starpu_task *task)
 \ingroup API_Codelet_And_Tasks
 Free the resource allocated during starpu_task_create() and
-associated with task. This function is already called automatically
+associated with \p task. This function is already called automatically
 after the execution of a task when the field starpu_task::destroy is
 set, which is the default for tasks created by starpu_task_create().
 Calling this function on a statically allocated task results in an
@@ -807,12 +828,12 @@ specified task was either synchronous or detached.
 \fn int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks)
 \ingroup API_Codelet_And_Tasks
 This function allows to wait for an array of tasks. Upon successful completion,
-this function returns 0. Otherwise, <c>-EINVAL</c> indicates that a task
+this function returns 0. Otherwise, <c>-EINVAL</c> indicates that one of the tasks
 was either synchronous or detached.
 
 \fn int starpu_task_submit(struct starpu_task *task)
 \ingroup API_Codelet_And_Tasks
-This function submits task to StarPU. Calling this function
+This function submits \p task to StarPU. Calling this function
 does not mean that the task will be executed immediately as there can
 be data or task (tag) dependencies that are not fulfilled yet: StarPU
 will take care of scheduling this task with respect to such
@@ -830,8 +851,8 @@ starpu_task::synchronous is set to 0.
 
 \fn int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id)
 \ingroup API_Codelet_And_Tasks
-This function submits a task to StarPU to the context <c> sched_ctx_id </c>.
-By default starpu_task_submit submits the task to a global context that is
+This function submits \p task to StarPU to the context \p sched_ctx_id.
+By default, starpu_task_submit() submits the task to a global context that is
 created automatically by StarPU.
 
 
@@ -849,14 +870,14 @@ executed.
 
 \fn int starpu_task_wait_for_n_submitted(unsigned n)
 \ingroup API_Codelet_And_Tasks
-This function blocks until there are <c> n </c> submitted tasks left (to the
+This function blocks until there are \p n submitted tasks left (to the
 current context or the global one if there aren't any) to be executed. It does
 not destroy these tasks.
 
 \fn int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx_id, unsigned n)
 \ingroup API_Codelet_And_Tasks
-This function waits until there are <c> n </c> tasks submitted left to be
-executed that were already submitted to the context <c> sched_ctx_id </c>.
+This function waits until there are \p n tasks submitted left to be
+executed that were already submitted to the context \p sched_ctx_id.
 
 \fn int starpu_task_nready(void)
 \ingroup API_Codelet_And_Tasks
@@ -880,7 +901,7 @@ task or simply because there is no task being executed at the moment.
 
 \fn const char *starpu_task_get_name(struct starpu_task *task)
 \ingroup API_Codelet_And_Tasks
-This function returns the name of \p task, i.e. either its task->name field, or
+This function returns the name of \p task, i.e. either its starpu_task::name field, or
 the name of the corresponding performance model.
 
 \fn const char *starpu_task_get_model_name(struct starpu_task *task)
@@ -898,12 +919,12 @@ This function waits until there is no more ready task.
 \fn void starpu_task_set_implementation(struct starpu_task *task, unsigned impl)
 \ingroup API_Codelet_And_Tasks
 This function should be called by schedulers to specify the
-codelet implementation to be executed when executing the task.
+codelet implementation to be executed when executing \p task.
 
 \fn unsigned starpu_task_get_implementation(struct starpu_task *task)
 \ingroup API_Codelet_And_Tasks
 This function return the codelet implementation to be executed
-when executing the task.
+when executing \p task.
 
 \fn void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg)
 \ingroup API_Codelet_And_Tasks

+ 3 - 4
doc/doxygen/chapters/api/cuda_extensions.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -19,7 +19,6 @@ CUDA as shown in \ref FullSourceCodeVectorScal.
 This macro defines the maximum number of CUDA devices that are
 supported by StarPU.
 
-
 \fn cudaStream_t starpu_cuda_get_local_stream(void)
 \ingroup API_CUDA_Extensions
 This function gets the current worker’s CUDA stream. StarPU
@@ -28,7 +27,7 @@ function is only provided for convenience so that programmers can
 easily use asynchronous operations within codelets without having to
 create a stream by hand. Note that the application is not forced to
 use the stream provided by starpu_cuda_get_local_stream() and may also
-create its own streams. Synchronizing with cudaThreadSynchronize() is
+create its own streams. Synchronizing with <c>cudaThreadSynchronize()</c> is
 allowed, but will reduce the likelihood of having all transfers
 overlapped.
 
@@ -57,7 +56,7 @@ copy was successful, or fails otherwise.
 
 \fn void starpu_cuda_set_device(unsigned devid)
 \ingroup API_CUDA_Extensions
-Calls cudaSetDevice(devid) or cudaGLSetGLDevice(devid),
+Calls <c>cudaSetDevice(\p devid)</c> or <c>cudaGLSetGLDevice(\p devid)</c>,
 according to whether \p devid is among the field
 starpu_conf::cuda_opengl_interoperability.
 

+ 66 - 34
doc/doxygen/chapters/api/data_interfaces.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012, 2017  INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -13,47 +13,63 @@ Per-interface data transfer methods.
 \ingroup API_Data_Interfaces
 \var void (*starpu_data_interface_ops::register_data_handle)(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
 Register an existing interface into a data handle.
+
 \var starpu_ssize_t (*starpu_data_interface_ops::allocate_data_on_node)(void *data_interface, unsigned node)
 Allocate data for the interface on a given node.
+
 \var void (*starpu_data_interface_ops::free_data_on_node)(void *data_interface, unsigned node)
 Free data of the interface on a given node.
+
 \var const struct starpu_data_copy_methods *starpu_data_interface_ops::copy_methods
 ram/cuda/opencl synchronous and asynchronous transfer methods.
+
 \var void *(*starpu_data_interface_ops::handle_to_pointer)(starpu_data_handle_t handle, unsigned node)
 Return the current pointer (if any) for the handle on the given node.
+
 \var size_t (*starpu_data_interface_ops::get_size)(starpu_data_handle_t handle)
 Return an estimation of the size of data, for performance models.
+
 \var uint32_t (*starpu_data_interface_ops::footprint)(starpu_data_handle_t handle)
 Return a 32bit footprint which characterizes the data size.
+
 \var int (*starpu_data_interface_ops::compare)(void *data_interface_a, void *data_interface_b)
 Compare the data size of two interfaces.
+
 \var void (*starpu_data_interface_ops::display)(starpu_data_handle_t handle, FILE *f)
 Dump the sizes of a handle to a file.
+
 \var starpu_ssize_t (*starpu_data_interface_ops::describe)(void *data_interface, char *buf, size_t size)
 Describe the data into a string.
+
 \var enum starpu_data_interface_id starpu_data_interface_ops::interfaceid
 An identifier that is unique to each interface.
+
 \var size_t starpu_data_interface_ops::interface_size
 The size of the interface data descriptor.
+
 \var char starpu_data_interface_ops::is_multiformat
 todo
+
 \var char starpu_data_interface_ops::dontcache
 If set to non-zero, StarPU will never try to reuse an allocated buffer for a
 different handle. This can be notably useful for application-defined interfaces
 which have a dynamic size, and for which it thus does not make sense to reuse
 the buffer since will probably not have the proper size.
+
 \var struct starpu_multiformat_data_interface_ops* (*starpu_data_interface_ops::get_mf_ops)(void *data_interface)
 todo
+
 \var int (*starpu_data_interface_ops::pack_data)(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count)
 Pack the data handle into a contiguous buffer at the address allocated with
-starpu_malloc_flags(ptr, size, 0) (and thus returned in ptr) and
-set the size of the newly created buffer in count. If ptr is NULL, the
+<c>starpu_malloc_flags(ptr, size, 0)</c> (and thus returned in \p ptr) and
+set the size of the newly created buffer in \p count. If \p ptr is <c>NULL</c>, the
 function should not copy the data in the buffer but just set count to
 the size of the buffer which would have been allocated. The special
 value -1 indicates the size is yet unknown.
+
 \var int (*starpu_data_interface_ops::unpack_data) (starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
-Unpack the data handle from the contiguous buffer at the address ptr
-of size count
+Unpack the data handle from the contiguous buffer at the address \p ptr
+of size \p count
 
 \struct starpu_data_copy_methods
 Defines the per-interface methods. If the any_to_any method is
@@ -91,22 +107,27 @@ node. Return 0 on success.
 Define how to copy data from the \p src_interface interface on the
 \p src_node CUDA node to the \p dst_interface interface on the \p dst_node
 CPU node. Return 0 on success.
+
 \var int (*starpu_data_copy_methods::cuda_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 Define how to copy data from the \p src_interface interface on the
 \p src_node CUDA node to the \p dst_interface interface on the \p dst_node CUDA
 node. Return 0 on success.
+
 \var int (*starpu_data_copy_methods::cuda_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 Define how to copy data from the \p src_interface interface on the
 \p src_node CUDA node to the \p dst_interface interface on the \p dst_node
 OpenCL node. Return 0 on success.
+
 \var int (*starpu_data_copy_methods::opencl_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 Define how to copy data from the \p src_interface interface on the
 \p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
 CPU node. Return 0 on success.
+
 \var int (*starpu_data_copy_methods::opencl_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 Define how to copy data from the \p src_interface interface on the
 \p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
 CUDA node. Return 0 on success.
+
 \var int (*starpu_data_copy_methods::opencl_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 Define how to copy data from the \p src_interface interface on the
 \p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
@@ -121,29 +142,33 @@ node. Return 0 on success.
 Define how to copy data from the \p src_interface interface on the
 \p src_node node to the \p dst_interface interface on the \p dst_node node.
 Must return 0 if the transfer was actually completed completely
-synchronously, or -EAGAIN if at least some transfers are still ongoing
+synchronously, or <c>-EAGAIN</c> if at least some transfers are still ongoing
 and should be awaited for by the core.
+
 \var int (*starpu_data_copy_methods::scc_sink_to_src)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 Define how to copy data from the \p src_interface interface on the
 \p src_node node to the \p dst_interface interface on the \p dst_node node.
 Must return 0 if the transfer was actually completed completely
-synchronously, or -EAGAIN if at least some transfers are still ongoing
+synchronously, or <c>-EAGAIN</c> if at least some transfers are still ongoing
 and should be awaited for by the core.
+
 \var int (*starpu_data_copy_methods::scc_sink_to_sink)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 Define how to copy data from the \p src_interface interface on the
 \p src_node node to the \p dst_interface interface on the \p dst_node node.
 Must return 0 if the transfer was actually completed completely
-synchronously, or -EAGAIN if at least some transfers are still ongoing
+synchronously, or <c>-EAGAIN</c> if at least some transfers are still ongoing
 and should be awaited for by the core.
 
 \var int (*starpu_data_copy_methods::ram_to_mpi_ms)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 Define how to copy data from the \p src_interface interface on the
 \p src_node CPU node to the \p dst_interface interface on the \p dst_node MPI Slave
 node. Return 0 on success.
+
 \var int (*starpu_data_copy_methods::mpi_ms_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 Define how to copy data from the \p src_interface interface on the
 \p src_node MPI Slave node to the \p dst_interface interface on the \p dst_node CPU
 node. Return 0 on success.
+
 \var int (*starpu_data_copy_methods::mpi_ms_to_mpi_ms)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 Define how to copy data from the \p src_interface interface on the
 \p src_node MPI Slave node to the \p dst_interface interface on the \p dst_node
@@ -153,43 +178,47 @@ MPI Slave node. Return 0 on success.
 Define how to copy data from the \p src_interface interface on the
 \p src_node CPU node to the \p dst_interface interface on the \p dst_node CUDA
 node, using the given stream. Must return 0 if the transfer was
-actually completed completely synchronously, or -EAGAIN if at least
+actually completed completely synchronously, or <c>-EAGAIN</c> if at least
 some transfers are still ongoing and should be awaited for by the core.
+
 \var int (*starpu_data_copy_methods::cuda_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
 Define how to copy data from the \p src_interface interface on the
 \p src_node CUDA node to the \p dst_interface interface on the \p dst_node CPU
 node, using the given stream. Must return 0 if the transfer was
-actually completed completely synchronously, or -EAGAIN if at least
+actually completed completely synchronously, or <c>-EAGAIN</c> if at least
 some transfers are still ongoing and should be awaited for by the core.
+
 \var int (*starpu_data_copy_methods::cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
 Define how to copy data from the \p src_interface interface on the
 \p src_node CUDA node to the \p dst_interface interface on the \p dst_node CUDA
 node, using the given stream. Must return 0 if the transfer was
-actually completed completely synchronously, or -EAGAIN if at least
+actually completed completely synchronously, or <c>-EAGAIN</c> if at least
 some transfers are still ongoing and should be awaited for by the core.
 
 \var int (*starpu_data_copy_methods::ram_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event)
 Define how to copy data from the \p src_interface interface on the
 \p src_node CPU node to the \p dst_interface interface on the \p dst_node
-OpenCL node, by recording in event, a pointer to a cl_event, the event
+OpenCL node, by recording in \p event, a pointer to a <c>cl_event</c>, the event
 of the last submitted transfer. Must return 0 if the transfer was
-actually completed completely synchronously, or -EAGAIN if at least
+actually completed completely synchronously, or <c>-EAGAIN</c> if at least
 some transfers are still ongoing and should be awaited for by the
 core.
+
 \var int (*starpu_data_copy_methods::opencl_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event)
 Define how to copy data from the \p src_interface interface on the
 \p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
-CPU node, by recording in event, a pointer to a cl_event, the event of
+CPU node, by recording in \p event, a pointer to a <c>cl_event</c>, the event of
 the last submitted transfer. Must return 0 if the transfer was
-actually completed completely synchronously, or -EAGAIN if at least
+actually completed completely synchronously, or <c>-EAGAIN</c> if at least
 some transfers are still ongoing and should be awaited for by the
 core.
+
 \var int (*starpu_data_copy_methods::opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event)
 Define how to copy data from the \p src_interface interface on the
 \p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
-OpenCL node, by recording in event, a pointer to a cl_event, the event
+OpenCL node, by recording in \p event, a pointer to a <c>cl_event</c>, the event
 of the last submitted transfer. Must return 0 if the transfer was
-actually completed completely synchronously, or -EAGAIN if at least
+actually completed completely synchronously, or <c>-EAGAIN</c> if at least
 some transfers are still ongoing and should be awaited for by the
 core.
 
@@ -197,32 +226,35 @@ core.
 Define how to copy data from the \p src_interface interface on the
 \p src_node CPU node to the \p dst_interface interface on the \p dst_node MPI Slave
 node, with the given even. Must return 0 if the transfer was
-actually completed completely synchronously, or -EAGAIN if at least
+actually completed completely synchronously, or <c>-EAGAIN</c> if at least
 some transfers are still ongoing and should be awaited for by the core.
+
 \var int (*starpu_data_copy_methods::mpi_ms_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event)
 Define how to copy data from the \p src_interface interface on the
 \p src_node MPI Slave node to the \p dst_interface interface on the \p dst_node CPU
 node, with the given event. Must return 0 if the transfer was
-actually completed completely synchronously, or -EAGAIN if at least
+actually completed completely synchronously, or <c>-EAGAIN</c> if at least
 some transfers are still ongoing and should be awaited for by the core.
+
 \var int (*starpu_data_copy_methods::mpi_ms_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event)
 Define how to copy data from the \p src_interface interface on the
 \p src_node MPI Slave node to the \p dst_interface interface on the \p dst_node MPI Slave 
 node, using the given stream. Must return 0 if the transfer was
-actually completed completely synchronously, or -EAGAIN if at least
+actually completed completely synchronously, or <c>-EAGAIN</c> if at least
 some transfers are still ongoing and should be awaited for by the core.
 
 \var int (*starpu_data_copy_methods::ram_to_mic_async)(void *src_intreface, unsigned src_node, void *dst_interface, unsigned dst_node)
 Define how to copy data from the \p src_interface interface on the
 \p src_node CPU node to the \p dst_interface interface on the \p dst_node
 MIC node. Must return 0 if the transfer was actually completed
-completely synchronously, or -EAGAIN if at least some transfers are
+completely synchronously, or <c>-EAGAIN</c> if at least some transfers are
 still ongoing and should be awaited for by the core.
+
 \var int (*starpu_data_copy_methods::mic_to_ram_async)(void *src_intreface, unsigned src_node, void *dst_interface, unsigned dst_node)
 Define how to copy data from the \p src_interface interface on the
 \p src_node MIC node to the \p dst_interface interface on the \p dst_node
 CPU node. Must return 0 if the transfer was actually completed
-completely synchronously, or -EAGAIN if at least some transfers are
+completely synchronously, or <c>-EAGAIN</c> if at least some transfers are
 still ongoing and should be awaited for by the core.
 
 \var int (*starpu_data_copy_methods::any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data)
@@ -230,8 +262,8 @@ Define how to copy data from the \p src_interface interface on the
 \p src_node node to the \p dst_interface interface on the \p dst_node node.
 This is meant to be implemented through the starpu_interface_copy()
 helper, to which async_data should be passed as such, and will be used
-to manage asynchronicity. This must return -EAGAIN if any of the
-starpu_interface_copy() calls has returned -EAGAIN (i.e. at least some
+to manage asynchronicity. This must return <c>-EAGAIN</c> if any of the
+starpu_interface_copy() calls has returned <c>-EAGAIN</c> (i.e. at least some
 transfer is still ongoing), and return 0 otherwise.
 
 \enum starpu_data_interface_id
@@ -284,7 +316,7 @@ typically a scalar, and initialize \p handle to represent this data item.
 
 Here an example of how to use the function.
 \code{.c}
-float var;
+float var = 42.0;
 starpu_data_handle_t var_handle;
 starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
 \endcode
@@ -384,7 +416,7 @@ Return the interface associated with \p handle on \p memory_node.
 \ingroup API_Data_Interfaces
 
 Each data interface is provided with a set of field access functions.
-The ones using a void * parameter aimed to be used in codelet
+The ones using a <c>void *</c> parameter aimed to be used in codelet
 implementations (see for example the code in
 \ref VectorScalingUsingStarPUAPI).
 
@@ -414,7 +446,7 @@ Execute the packing operation of the interface of the data
 registered at \p handle (see starpu_data_interface_ops). This
 packing operation must allocate a buffer large enough at \p ptr and copy
 into the newly allocated buffer the data associated to \p handle. \p count
-will be set to the size of the allocated buffer. If \p ptr is NULL, the
+will be set to the size of the allocated buffer. If \p ptr is <c>NULL</c>, the
 function should not copy the data in the buffer but just set \p count to
 the size of the buffer which would have been allocated. The special
 value -1 indicates the size is yet unknown.
@@ -794,7 +826,7 @@ Return the number of rows in a block.
 
 \fn uint32_t starpu_bcsr_get_c(starpu_data_handle_t handle)
 \ingroup API_Data_Interfaces
-Return the numberof columns in a block.
+Return the number of columns in a block.
 
 \fn size_t starpu_bcsr_get_elemsize(starpu_data_handle_t handle)
 \ingroup API_Data_Interfaces
@@ -1051,34 +1083,34 @@ Applications can provide their own interface as shown in
 \ingroup API_Data_Interfaces
 Allocate \p size bytes on node \p dst_node with the given allocation \p flags. This returns 0 if
 allocation failed, the allocation method should then return <c>-ENOMEM</c> as
-allocated size. Deallocation must be done with starpu_free_on_node.
+allocated size. Deallocation must be done with starpu_free_on_node().
 
 \fn void starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags)
 \ingroup API_Data_Interfaces
 Free \p addr of \p size bytes on node \p dst_node which was previously allocated
-with starpu_malloc_on_node with the given allocation \p flags.
+with starpu_malloc_on_node() with the given allocation \p flags.
 
 \fn uintptr_t starpu_malloc_on_node(unsigned dst_node, size_t size)
 \ingroup API_Data_Interfaces
 Allocate \p size bytes on node \p dst_node with the default allocation flags. This returns 0 if
 allocation failed, the allocation method should then return <c>-ENOMEM</c> as
-allocated size. Deallocation must be done with starpu_free_on_node.
+allocated size. Deallocation must be done with starpu_free_on_node().
 
 \fn void starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
 \ingroup API_Data_Interfaces
 Free \p addr of \p size bytes on node \p dst_node which was previously allocated
-with starpu_malloc_on_node.
+with starpu_malloc_on_node().
 
 \fn void starpu_malloc_on_node_set_default_flags(unsigned node, int flags)
 \ingroup API_Data_Interfaces
 Define the defaultflags for allocations performed by starpu_malloc_on_node() and
-starpu_free_on_node(). The default is STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT.
+starpu_free_on_node(). The default is \ref STARPU_MALLOC_PINNED | \ref STARPU_MALLOC_COUNT.
 
 \fn int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, void *async_data)
 \ingroup API_Data_Interfaces
 Copy \p size bytes from byte offset \p src_offset of \p src on \p src_node
 to byte offset \p dst_offset of \p dst on \p dst_node. This is to be used in
-the any_to_any() copy method, which is provided with the async_data to
+the starpu_data_copy_methods::any_to_any copy method, which is provided with \p async_data to
 be passed to starpu_interface_copy(). this returns <c>-EAGAIN</c> if the
 transfer is still ongoing, or 0 if the transfer is already completed.
 

+ 8 - 5
doc/doxygen/chapters/api/data_management.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -76,6 +76,9 @@ should thus be used to try to group tasks on the same core or cache, etc. For
 now only the ws and lws schedulers take this flag into account, and only when
 rebuild with USE_LOCALITY flag defined in the
 src/sched_policies/work_stealing_policy.c source code.
+\var starpu_data_access_mode::STARPU_ACCESS_MODE_MAX
+\ingroup API_Data_Management
+todo
 
 @name Basic Data Management API
 \ingroup API_Data_Management
@@ -309,7 +312,7 @@ This can for instance be used to wait for tasks which produce the data, but with
 
 \def STARPU_ACQUIRE_NO_NODE_LOCK_ALL
 \ingroup API_Data_Management
-This is the same as STARPU_ACQUIRE_NO_NODE, but will lock the data on all nodes, preventing them from being evicted for instance.
+This is the same as ::STARPU_ACQUIRE_NO_NODE, but will lock the data on all nodes, preventing them from being evicted for instance.
 This is mostly useful inside starpu only.
 
 \fn int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode)
@@ -317,7 +320,7 @@ This is mostly useful inside starpu only.
 This is the same as starpu_data_acquire(), except that the data
 will be available on the given memory node instead of main
 memory.
-STARPU_ACQUIRE_NO_NODE and STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
+::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
 explicit node number.
 
 \fn int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg)
@@ -325,7 +328,7 @@ explicit node number.
 This is the same as starpu_data_acquire_cb(), except that the
 data will be available on the given memory node instead of main
 memory.
-STARPU_ACQUIRE_NO_NODE and STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
+::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
 explicit node number.
 
 \fn int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency)
@@ -333,7 +336,7 @@ explicit node number.
 This is the same as starpu_data_acquire_cb_sequential_consistency(), except that the
 data will be available on the given memory node instead of main
 memory.
-STARPU_ACQUIRE_NO_NODE and STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
+::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
 explicit node number.
 
 \def STARPU_DATA_ACQUIRE_CB(handle, mode, code)

+ 6 - 6
doc/doxygen/chapters/api/data_out_of_core.doxy

@@ -93,18 +93,18 @@ The \p size must be at least 1 MB !
 
 \fn void *starpu_disk_open(unsigned node, void *pos, size_t size)
 \ingroup API_Out_Of_Core
-Open an existing file memory in a disk node. \p size: this is a size of your
-file. \p pos is specific position dependent on the backend, given to the \c open
+Open an existing file memory in a disk node. \p size is the size of the
+file. \p pos is the specific position dependent on the backend, given to the \c open
 method of the disk operations. This returns an opaque object pointer.
 
 \fn void starpu_disk_close(unsigned node, void *obj, size_t size)
 \ingroup API_Out_Of_Core
-Close an existing data opened with starpu_disk_open.
+Close an existing data opened with starpu_disk_open().
 
 \var starpu_disk_swap_node
-\ingrop API_Out_Of_Core
+\ingroup API_Out_Of_Core
 This contains the node number of the disk swap, if set up through the
-STARPU_DISK_SWAP variable.
+\ref STARPU_DISK_SWAP variable.
 
 \var starpu_disk_stdio_ops
 \ingroup API_Out_Of_Core
@@ -126,7 +126,7 @@ Only available on Linux systems.
 \var starpu_disk_leveldb_ops
 \ingroup API_Out_Of_Core
 This set uses the leveldb created by Google <br />
-Show here: https://code.google.com/p/leveldb/ <br />
+More information at https://code.google.com/p/leveldb/ <br />
 It doesn't support asynchronous transfers.
 
 */

+ 34 - 25
doc/doxygen/chapters/api/data_partition.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -13,18 +13,18 @@ The filter structure describes a data partitioning operation, to be
 given to the starpu_data_partition() function.
 \ingroup API_Data_Partition
 \var void (*starpu_data_filter::filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts)
-This function fills the child_interface structure with interface
-information for the id-th child of the parent father_interface (among
-nparts).
+This function fills the \p child_interface structure with interface
+information for the \p id -th child of the parent \p father_interface (among
+\p nparts).
 \var unsigned starpu_data_filter::nchildren
 This is the number of parts to partition the data into.
 \var unsigned (*starpu_data_filter::get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle)
 This returns the number of children. This can be used instead of
-nchildren when the number of children depends on the actual data (e.g.
+starpu_data_filter::nchildren when the number of children depends on the actual data (e.g.
 the number of blocks in a sparse matrix).
 \var struct starpu_data_interface_ops *(*starpu_data_filter::get_child_ops)(struct starpu_data_filter *, unsigned id)
 In case the resulting children use a different data interface, this
-function returns which interface is used by child number id.
+function returns which interface is used by child number \p id.
 \var unsigned starpu_data_filter::filter_arg
 Allow to define an additional parameter for the filter function.
 \var void *starpu_data_filter::filter_arg_ptr
@@ -36,7 +36,7 @@ function, such as the sizes of the different parts.
 
 \fn void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f)
 \ingroup API_Data_Partition
-This requests partitioning one StarPU data initial_handle into
+This requests partitioning one StarPU data \p initial_handle into
 several subdata according to the filter \p f.
 
 Here an example of how to use the function.
@@ -52,7 +52,7 @@ starpu_data_partition(A_handle, &f);
 \ingroup API_Data_Partition
 This unapplies one filter, thus unpartitioning the data. The
 pieces of data are collected back into one big piece in the
-\p gathering_node (usually STARPU_MAIN_RAM). Tasks working on the partitioned data must
+\p gathering_node (usually ::STARPU_MAIN_RAM). Tasks working on the partitioned data must
 be already finished when calling starpu_data_unpartition().
 
 Here an example of how to use the function.
@@ -66,7 +66,7 @@ This function returns the number of children.
 
 \fn starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i)
 \ingroup API_Data_Partition
-Return the ith child of the given \p handle, which must have been
+Return the \p i -th child of the given \p handle, which must have been
 partitionned beforehand.
 
 \fn starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... )
@@ -110,7 +110,7 @@ This plans for partitioning one StarPU data handle \p initial_handle into
 several subdata according to the filter \p f. The handles are returned into
 the \p children array, which has to be the same size as the number of parts
 described in \p f. These handles are not immediately usable,
-starpu_data_partition_submit has to be called to submit the actual partitioning.
+starpu_data_partition_submit() has to be called to submit the actual partitioning.
 
 Here is an example of how to use the function:
 
@@ -141,18 +141,18 @@ starpu_data_partition_submit(A_handle, nslicesx, children);
 \fn void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
 \ingroup API_Data_Partition
 
-This is the same as starpu_data_partition_submit, but does not invalidate \p
+This is the same as starpu_data_partition_submit(), but it does not invalidate \p
 initial_handle. This allows to continue using it, but the application has to be
 careful not to write to \p initial_handle or \p children handles, only read from
 them, since the coherency is otherwise not guaranteed.  This thus allows to
 submit various tasks which concurrently read from various partitions of the data.
 
 When the application wants to write to \p initial_handle again, it should call
-starpu_data_unpartition_submit, which will properly add dependencies between the
+starpu_data_unpartition_submit(), which will properly add dependencies between the
 reads on the \p children and the writes to be submitted.
 
 If instead the application wants to write to \p children handles, it should
-call starpu_data_partition_readwrite_upgrade_submit, which will properly add
+call starpu_data_partition_readwrite_upgrade_submit(), which will correctly add
 dependencies between the reads on the \p initial_handle and the writes to be
 submitted.
 
@@ -160,7 +160,7 @@ submitted.
 \ingroup API_Data_Partition
 
 This assumes that a partitioning of \p initial_handle has already been submited
-in readonly mode through starpu_data_partition_readonly_submit, and will upgrade
+in readonly mode through starpu_data_partition_readonly_submit(), and will upgrade
 that partitioning into read-write mode for the \p children, by invalidating \p
 initial_handle, and adding the necessary dependencies.
 
@@ -182,7 +182,7 @@ This assumes that \p initial_handle is partitioned into \p children, and submits
 just a readonly unpartitionning of it, i.e. submitting a gathering of the pieces
 on the requested \p gathering_node memory node. It does not invalidate the
 children. This brings \p initial_handle and \p children handles to the same
-state as obtained with starpu_data_partition_readonly_submit.
+state as obtained with starpu_data_partition_readonly_submit().
 
 \p gathering_node can be set to -1 to let the runtime decide which memory node
 should be used to gather the pieces.
@@ -191,7 +191,7 @@ should be used to gather the pieces.
 \ingroup API_Data_Partition
 
 This should be used to clear the partition planning established between \p
-root_data and \p children with starpu_data_partition_plan. This will notably
+root_data and \p children with starpu_data_partition_plan(). This will notably
 submit an unregister all the \p children, which can thus not be used any more
 afterwards.
 
@@ -214,9 +214,10 @@ equal size.
 Return in \p child_interface the \p id th element of the vector
 represented by \p father_interface once partitioned in \p nparts chunks of
 equal size with a shadow border <c>filter_arg_ptr</c>, thus getting a vector
-of size (n-2*shadow)/nparts+2*shadow. The <c>filter_arg_ptr</c> field
-of \p f must be the shadow size casted into void*. <b>IMPORTANT</b>:
-This can only be used for read-only access, as no coherency is
+of size <c>(n-2*shadow)/nparts+2*shadow</c>. The <c>filter_arg_ptr</c> field
+of \p f must be the shadow size casted into \c void*.
+
+<b>IMPORTANT</b>: This can only be used for read-only access, as no coherency is
 enforced for the shadowed parts. An usage example is available in
 examples/filters/shadow.c
 
@@ -254,7 +255,9 @@ last submatrix contains the remainder.
 This partitions a dense Matrix along the x dimension, with a
 shadow border <c>filter_arg_ptr</c>, thus getting ((x-2*shadow)/\p
 nparts +2*shadow,y) matrices. If \p nparts does not divide x-2*shadow,
-the last submatrix contains the remainder. <b>IMPORTANT</b>: This can
+the last submatrix contains the remainder.
+
+<b>IMPORTANT</b>: This can
 only be used for read-only access, as no coherency is enforced for the
 shadowed parts. A usage example is available in
 examples/filters/shadow2d.c
@@ -271,9 +274,10 @@ This partitions a dense Matrix along the y dimension, with a
 shadow border <c>filter_arg_ptr</c>, thus getting
 (x,(y-2*shadow)/\p nparts +2*shadow) matrices. If \p nparts does not
 divide y-2*shadow, the last submatrix contains the remainder.
+
 <b>IMPORTANT</b>: This can only be used for read-only access, as no
 coherency is enforced for the shadowed parts. A usage example is
-available in examples/filters/shadow2d.c 
+available in examples/filters/shadow2d.c
 
 @name Predefined Block Filter Functions
 \ingroup API_Data_Partition
@@ -295,7 +299,9 @@ submatrix contains the remainder.
 This partitions a block along the X dimension, with a
 shadow border <c>filter_arg_ptr</c>, thus getting
 ((x-2*shadow)/\p nparts +2*shadow,y,z) blocks. If \p nparts does not
-divide x, the last submatrix contains the remainder. <b>IMPORTANT</b>:
+divide x, the last submatrix contains the remainder.
+
+<b>IMPORTANT</b>:
 This can only be used for read-only access, as no coherency is
 enforced for the shadowed parts.
 
@@ -310,7 +316,9 @@ submatrix contains the remainder.
 This partitions a block along the Y dimension, with a
 shadow border <c>filter_arg_ptr</c>, thus getting
 (x,(y-2*shadow)/\p nparts +2*shadow,z) 3D matrices. If \p nparts does not
-divide y, the last submatrix contains the remainder. <b>IMPORTANT</b>:
+divide y, the last submatrix contains the remainder.
+
+<b>IMPORTANT</b>:
 This can only be used for read-only access, as no coherency is
 enforced for the shadowed parts.
 
@@ -325,7 +333,9 @@ submatrix contains the remainder.
 This partitions a block along the Z dimension, with a
 shadow border <c>filter_arg_ptr</c>, thus getting
 (x,y,(z-2*shadow)/\p nparts +2*shadow) blocks. If \p nparts does not
-divide z, the last submatrix contains the remainder. <b>IMPORTANT</b>:
+divide z, the last submatrix contains the remainder.
+
+<b>IMPORTANT</b>:
 This can only be used for read-only access, as no coherency is
 enforced for the shadowed parts.
 
@@ -347,4 +357,3 @@ This partitions a block-sparse matrix into vertical
 block-sparse matrices.
 
 */
-

+ 4 - 4
doc/doxygen/chapters/api/explicit_dependencies.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -26,7 +26,7 @@ redundancy in the task dependencies.
 \ingroup API_Explicit_Dependencies
 Fills \p task_array with the list of tasks which are direct children of \p task.
 \p ndeps is the size of \p task_array.  This function returns the number of
-direct children. \p task_array can be set to NULL if \p ndeps is 0, which allows
+direct children. \p task_array can be set to <c>NULL</c> if \p ndeps is 0, which allows
 to compute the number of children before allocating an array to store them.
 This function can only be called if \p task has not completed yet, otherwise
 the results are undefined. The result may also be outdated if some additional
@@ -60,7 +60,7 @@ submitted to StarPU with starpu_task_submit().
 
 <b>WARNING! Use with caution</b>. Because of the variable arity of
 starpu_tag_declare_deps(), note that the last arguments must be of
-type starpu_tag_t : constant values typically need to be explicitly
+type ::starpu_tag_t : constant values typically need to be explicitly
 casted. Otherwise, due to integer sizes and argument passing on the
 stack, the C compiler might consider the tag <c>0x200000003</c>
 instead of <c>0x2</c> and <c>0x3</c> when calling
@@ -75,7 +75,7 @@ starpu_tag_declare_deps((starpu_tag_t)0x1, 2, (starpu_tag_t)0x32, (starpu_tag_t)
 \fn void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array)
 \ingroup API_Explicit_Dependencies
 This function is similar to starpu_tag_declare_deps(), except
-that its does not take a variable number of arguments but an array of
+that its does not take a variable number of arguments but an \p array of
 tags of size \p ndeps.
 
 \code{.c}

+ 10 - 2
doc/doxygen/chapters/api/fft_support.doxy

@@ -1,16 +1,24 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
 
 /*! \defgroup API_FFT_Support FFT Support
 
+\def STARPUFFT_FORWARD
+\ingroup API_FFT_Support
+todo
+
+\def STARPUFFT_INVERSE
+\ingroup API_FFT_Support
+todo
+
 \fn void * starpufft_malloc(size_t n)
 \ingroup API_FFT_Support
-Allocates memory for \p n bytes. This is preferred over malloc(),
+Allocates memory for \p n bytes. This is preferred over \c malloc(),
 since it allocates pinned memory, which allows overlapped transfers.
 
 \fn void * starpufft_free(void *p)

+ 3 - 3
doc/doxygen/chapters/api/fxt_support.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -79,8 +79,8 @@ start recording it again, etc.
 
 \fn void starpu_fxt_autostart_profiling(int autostart)
 \ingroup API_FxT_Support
-Determines whether profiling should be started by starpu_init, or only when
-starpu_fxt_start_profiling is called. \e autostart should be 1 to do so, or 0 to
+Determines whether profiling should be started by starpu_init(), or only when
+starpu_fxt_start_profiling() is called. \p autostart should be 1 to do so, or 0 to
 prevent it.
 
 \fn void starpu_fxt_write_data_trace(char *filename_in)

+ 25 - 23
doc/doxygen/chapters/api/initialization.doxy

@@ -1,22 +1,13 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012, 2017  INRIA
  * See the file version.doxy for copying conditions.
  */
 
 /*! \defgroup API_Initialization_and_Termination Initialization and Termination
 
-\struct starpu_driver
-structure for a driver
-\ingroup API_Initialization_and_Termination
-\var enum starpu_worker_archtype starpu_driver::type
-The type of the driver. Only ::STARPU_CPU_WORKER,
-::STARPU_CUDA_WORKER and ::STARPU_OPENCL_WORKER are currently supported.
-\var union starpu_driver::id
-The identifier of the driver.
-
 \struct starpu_conf
 \ingroup API_Initialization_and_Termination
 This structure is passed to the starpu_init() function in order to
@@ -27,12 +18,18 @@ The environment variables overwrite the equivalent parameters.
 \var int starpu_conf::magic
 \private
 Will be initialized by starpu_conf_init(). Should not be set by hand.
+
 \var const char*starpu_conf::sched_policy_name
 This is the name of the scheduling policy. This can also be specified
-with the environment variable \ref STARPU_SCHED. (default = NULL).
+with the environment variable \ref STARPU_SCHED. (default = <c>NULL</c>).
+
 \var struct starpu_sched_policy *starpu_conf::sched_policy
 This is the definition of the scheduling policy. This field is ignored
-if starpu_conf::sched_policy_name is set. (default = NULL)
+if starpu_conf::sched_policy_name is set. (default = <c>NULL</c>)
+
+\var void (*starpu_conf::sched_policy_init)(unsigned)
+todo
+
 \var int starpu_conf::ncpus
 This is the number of CPU cores that StarPU can use. This can also be
 specified with the environment variable \ref STARPU_NCPU . (default = -1)
@@ -78,7 +75,7 @@ This can also be specified with the environment variable
 \var unsigned starpu_conf::workers_cuda_gpuid[STARPU_NMAXWORKERS]
 If the starpu_conf::use_explicit_workers_cuda_gpuid flag is set, this
 array contains the logical identifiers of the CUDA devices (as used by
-cudaGetDevice()).
+\c cudaGetDevice()).
 \var unsigned starpu_conf::use_explicit_workers_opencl_gpuid
 If this flag is set, the OpenCL workers will be attached to the OpenCL
 devices specified in the starpu_conf::workers_opencl_gpuid array.
@@ -146,9 +143,9 @@ variable \ref STARPU_SINGLE_COMBINED_WORKER.
 
 \var char *starpu_conf::mic_sink_program_path
 Path to the kernel to execute on the MIC device, compiled for MIC
-architecture. When set to NULL, StarPU automatically looks next to the
+architecture. When set to <c>NULL</c>, StarPU automatically looks next to the
 host program location.
-(default = NULL)
+(default = <c>NULL</c>)
 
 \var int starpu_conf::disable_asynchronous_copy
 This flag should be set to 1 to disable
@@ -200,16 +197,16 @@ configure script the option \ref disable-asynchronous-mpi-master-slave-copy "--d
 Enable CUDA/OpenGL interoperation on these CUDA
 devices. This can be set to an array of CUDA device
 identifiers for which cudaGLSetGLDevice() should be called
-instead of cudaSetDevice(). Its size is specified by the
+instead of \c cudaSetDevice(). Its size is specified by the
 starpu_conf::n_cuda_opengl_interoperability field below
-(default = NULL)
+(default = <c>NULL</c>)
 \var unsigned starpu_conf::n_cuda_opengl_interoperability
 todo
 
 \var struct starpu_driver *starpu_conf::not_launched_drivers
 Array of drivers that should not be launched by
 StarPU. The application will run in one of its own
-threads. (default = NULL)
+threads. (default = <c>NULL</c>)
 \var unsigned starpu_conf::n_not_launched_drivers
 The number of StarPU drivers that should not be
 launched by StarPU. (default = 0)
@@ -220,14 +217,19 @@ automatically be flushed when it fills in, but it may still
 be interesting to specify a bigger value to avoid any
 flushing (which would disturb the trace).
 
+\var starpu_conf::global_sched_ctx_min_priority
+todo
+\var starpu_conf::global_sched_ctx_max_priority
+todo
+
 \fn int starpu_init(struct starpu_conf *conf)
 \ingroup API_Initialization_and_Termination
 This is StarPU initialization method, which must be called prior to
 any other StarPU call. It is possible to specify StarPU’s
 configuration (e.g. scheduling policy, number of cores, ...) by
-passing a non-null argument. Default configuration is used if the
-passed argument is NULL. Upon successful completion, this function
-returns 0. Otherwise, -ENODEV indicates that no worker was available
+passing a non-<c>NULL</c> argument. Default configuration is used if the
+passed argument is <c>NULL</c>. Upon successful completion, this function
+returns 0. Otherwise, <c>-ENODEV</c> indicates that no worker was available
 (so that StarPU was not initialized).
 
 \fn int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
@@ -246,8 +248,8 @@ specified through environment variables, starpu_conf_init() initializes
 the fields of the structure according to the environment variables.
 For instance if \ref STARPU_CALIBRATE is set, its value is put in the
 field starpu_conf::calibrate of the structure passed as argument. Upon successful
-completion, this function returns 0. Otherwise, -EINVAL indicates that
-the argument was NULL.
+completion, this function returns 0. Otherwise, <c>-EINVAL</c> indicates that
+the argument was <c>NULL</c>.
 
 \fn void starpu_shutdown(void)
 \ingroup API_Initialization_and_Termination

+ 2 - 2
doc/doxygen/chapters/api/misc_helpers.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -15,7 +15,7 @@ asynchronous indicates whether the function should block or not. In
 the case of an asynchronous call, it is possible to synchronize with
 the termination of this operation either by the means of implicit
 dependencies (if enabled) or by calling starpu_task_wait_for_all(). If
-\p callback_func is not NULL, this callback function is executed after
+\p callback_func is not <c>NULL</c>, this callback function is executed after
 the handle has been copied, and it is given the pointer \p callback_arg as argument.
 
 \fn void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where)

+ 9 - 9
doc/doxygen/chapters/api/modularized_scheduler.doxy

@@ -31,9 +31,9 @@ flags for starpu_sched_component::properties
 This structure represent a scheduler module.  A scheduler is a
 tree-like structure of them, some parts of scheduler can be shared by
 several contexes to perform some local optimisations, so, for all
-components, a list of parent is defined indexed by sched_ctx_id. They
+components, a list of parent is defined by \c sched_ctx_id. They
 embed there specialised method in a pseudo object-style, so calls are
-like component->push_task(component,task)
+like <c>component->push_task(component,task)</c>
 
 \var struct starpu_sched_tree *starpu_sched_component::tree
      The tree containing the component
@@ -42,7 +42,7 @@ like component->push_task(component,task)
 \var starpu_sched_component::workers_in_ctx
      this member contain the subset of starpu_sched_component::workers that is currently available in the context
      The push method should take this member into account.
-     this member is set with :
+     this member is set with :	
      component->workers UNION tree->workers UNION
      component->child[i]->workers_in_ctx iff exist x such as component->children[i]->parents[x] == component
 \var void *starpu_sched_component::data
@@ -183,7 +183,7 @@ The actual scheduler
 \fn void starpu_sched_component_destroy(struct starpu_sched_component *component)
 \ingroup API_Modularized_Scheduler
 	 free data allocated by starpu_sched_component_create and call component->deinit_data(component)
-	 set to null the member starpu_sched_component::fathers[sched_ctx_id] of all child if its equal to \p component
+	 set to <c>NULL</c> the member starpu_sched_component::fathers[sched_ctx_id] of all child if its equal to \p component
 
 \fn void starpu_sched_component_destroy_rec(struct starpu_sched_component *component)
 \ingroup API_Modularized_Scheduler
@@ -195,9 +195,9 @@ The actual scheduler
 
 \fn int starpu_sched_component_execute_preds(struct starpu_sched_component *component, struct starpu_task *task, double *length)
 \ingroup API_Modularized_Scheduler
-	 return a non null value if \p component can execute \p task.
+	 return a non <c>NULL</c> value if \p component can execute \p task.
 	 write the execution prediction length for the best implementation of the best worker available and write this at \p length address.
-	 this result is more relevant if starpu_sched_component::is_homogeneous is non null.
+	 this result is more relevant if starpu_sched_component::is_homogeneous is non <c>NULL</c>.
 	 if a worker need to be calibrated for an implementation, nan is set to \p length.
 
 \fn double starpu_sched_component_transfer_length(struct starpu_sched_component *component, struct starpu_task *task)
@@ -431,11 +431,11 @@ todo
 
 \struct starpu_sched_component_specs
 \ingroup API_Modularized_Scheduler
-	 Define how build a scheduler according to topology. Each level (except for hwloc_machine_composed_sched_component) can be NULL, then
+	 Define how build a scheduler according to topology. Each level (except for hwloc_machine_composed_sched_component) can be <c>NULL</c>, then
 	 the level is just skipped. Bugs everywhere, do not rely on.
 \var struct starpu_sched_component_composed_recipe *starpu_sched_specs::hwloc_machine_composed_sched_component
      the composed component to put on the top of the scheduler
-     this member must not be NULL as it is the root of the topology
+     this member must not be <c>NULL</c> as it is the root of the topology
 \var struct starpu_sched_component_composed_recipe *starpu_sched_specs::hwloc_component_composed_sched_component
      the composed component to put for each memory component
 \var struct starpu_sched_component_composed_recipe *starpu_sched_specs::hwloc_socket_composed_sched_component
@@ -444,7 +444,7 @@ todo
      the composed component to put for each cache
 \var struct starpu_sched_component_composed_recipe *(*starpu_sched_specs::worker_composed_sched_component)(enum starpu_worker_archtype archtype)
      a function that return a starpu_sched_component_composed_recipe to put on top of a worker of type \p archtype.
-     NULL is a valid return value, then no component will be added on top
+     <c>NULL</c> is a valid return value, then no component will be added on top
 \var starpu_sched_specs::mix_heterogeneous_workers
      this flag is a dirty hack because of the poor expressivity of this interface. As example, if you want to build
      a heft component with a fifo component per numa component, and you also have GPUs, if this flag is set, GPUs will share those fifos.

+ 27 - 19
doc/doxygen/chapters/api/mpi.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012, 2017  INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -27,13 +27,13 @@ starpu_init() must be called before starpu_mpi_init_comm().
 
 \fn int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi)
 \ingroup API_MPI_Support
-Call starpu_mpi_init_comm() with the MPI communicator MPI_COMM_WORLD.
+Call starpu_mpi_init_comm() with the MPI communicator \c MPI_COMM_WORLD.
 
 \fn int starpu_mpi_initialize(void)
 \deprecated
 \ingroup API_MPI_Support
 This function has been made deprecated. One should use instead the
-function starpu_mpi_init(). This function does not call MPI_Init(), it
+function starpu_mpi_init(). This function does not call \c MPI_Init(), it
 should be called beforehand.
 
 \fn int starpu_mpi_initialize_extended(int *rank, int *world_size)
@@ -47,7 +47,7 @@ calling <c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED,
 \fn int starpu_mpi_shutdown(void)
 \ingroup API_MPI_Support
 Cleans the starpumpi library. This must be called between calling
-starpu_mpi functions and starpu_shutdown(). MPI_Finalize() will be
+starpu_mpi functions and starpu_shutdown(). \c MPI_Finalize() will be
 called if StarPU-MPI has been initialized by starpu_mpi_init().
 
 \fn void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts)
@@ -67,11 +67,11 @@ Return in \p rank the rank of the calling process in the communicator \p comm
 
 \fn int starpu_mpi_world_rank(void)
 \ingroup API_MPI_Support
-Return the rank of the calling process in the communicator MPI_COMM_WORLD
+Return the rank of the calling process in the communicator \c MPI_COMM_WORLD
 
 \fn int starpu_mpi_world_size(void)
 \ingroup API_MPI_Support
-Return the size of the communicator MPI_COMM_WORLD
+Return the size of the communicator \c MPI_COMM_WORLD
 
 @name Communication
 \anchor MPIPtpCommunication
@@ -269,7 +269,7 @@ It also automatically clears the MPI communication cache when unregistering the
 
 \def starpu_mpi_data_register(data_handle, tag, rank)
 \ingroup API_MPI_Support
-Register to MPI a StarPU data handle with the given tag, rank and the MPI communicator MPI_COMM_WORLD.
+Register to MPI a StarPU data handle with the given tag, rank and the MPI communicator \c MPI_COMM_WORLD.
 It also automatically clears the MPI communication cache when unregistering the data.
 
 \fn void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int tag)
@@ -279,7 +279,7 @@ It also automatically clears the MPI communication cache when unregistering the
 
 \def starpu_data_set_tag
 \ingroup API_MPI_Support
-Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_tag
+Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_tag()
 
 \fn void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm)
 \ingroup API_MPI_Support
@@ -288,33 +288,33 @@ It also automatically clears the MPI communication cache when unregistering the
 
 \def starpu_mpi_data_set_rank
 \ingroup API_MPI_Support
-Register to MPI a StarPU data handle with the given rank and the MPI communicator MPI_COMM_WORLD. No tag will be defined.
+Register to MPI a StarPU data handle with the given rank and the MPI communicator \c MPI_COMM_WORLD. No tag will be defined.
 It also automatically clears the MPI communication cache when unregistering the data.
-Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_rank
+Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_rank()
 
 \def starpu_data_set_rank
 \ingroup API_MPI_Support
-Register to MPI a StarPU data handle with the given rank and the MPI communicator MPI_COMM_WORLD. No tag will be defined.
+Register to MPI a StarPU data handle with the given rank and the MPI communicator \c MPI_COMM_WORLD. No tag will be defined.
 It also automatically clears the MPI communication cache when unregistering the data.
-Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_rank
+Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_rank()
 
 \fn int starpu_mpi_data_get_rank(starpu_data_handle_t handle)
 \ingroup API_MPI_Support
 Return the rank of the given data.
 
-\def starpu_data_get_rank(starpu_data_handle_t handle)
+\def starpu_data_get_rank
 \ingroup API_MPI_Support
 Return the rank of the given data.
-Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_rank
+Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_rank()
 
 \fn int starpu_mpi_data_get_tag(starpu_data_handle_t handle)
 \ingroup API_MPI_Support
 Return the tag of the given data.
 
-\def starpu_data_get_tag(starpu_data_handle_t handle)
+\def starpu_data_get_tag
 \ingroup API_MPI_Support
 Return the tag of the given data.
-Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_tag
+Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_tag()
 
 \fn void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t handle, int new_rank)
 \ingroup API_MPI_Support
@@ -394,7 +394,7 @@ Create a task corresponding to codelet with the following arguments.
 The argument list must be zero-terminated. The function performs the
 first two steps of the function starpu_mpi_task_insert(). Only the MPI
 node selected in the first step of the algorithm will return a valid
-task structure which can then be submitted, others will return NULL. The function
+task structure which can then be submitted, others will return <c>NULL</c>. The function
 starpu_mpi_task_post_build() MUST be called after that on all nodes, and after the submission of
 the task on the node which creates it, with the SAME list of arguments.
 
@@ -421,6 +421,14 @@ the argument \p arg.
 \anchor MPINodeSelectionPolicy
 \ingroup API_MPI_Support
 
+\def STARPU_MPI_NODE_SELECTION_CURRENT_POLICY
+\ingroup API_MPI_Support
+todo
+
+\def STARPU_MPI_NODE_SELECTION_MOST_R_DATA
+\ingroup API_MPI_Support
+todo
+
 \fn int starpu_mpi_node_selection_get_current_policy()
 \ingroup API_MPI_Support
 Return the current policy used to select the node which will execute the codelet
@@ -428,8 +436,8 @@ Return the current policy used to select the node which will execute the codelet
 \fn int starpu_mpi_node_selection_set_current_policy(int policy)
 \ingroup API_MPI_Support
 Set the current policy used to select the node which will
-execute the codelet. The policy STARPU_MPI_NODE_SELECTION_MOST_R_DATA selects the
-node having the most data in R mode so as to minimize the amount of
+execute the codelet. The policy ::STARPU_MPI_NODE_SELECTION_MOST_R_DATA selects the
+node having the most data in ::STARPU_R mode so as to minimize the amount of
 data to be transfered.
 
 \fn int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func)

+ 3 - 3
doc/doxygen/chapters/api/opencl_extensions.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -165,7 +165,7 @@ Release the given \p kernel, to be called after kernel execution.
 \ingroup API_OpenCL_Extensions
 This function allows to collect statistics on a kernel execution.
 After termination of the kernels, the OpenCL codelet should call this
-function to pass it the even returned by clEnqueueNDRangeKernel, to
+function to pass it the even returned by \c clEnqueueNDRangeKernel(), to
 let StarPU collect statistics about the kernel execution (used cycles,
 consumed energy).
 
@@ -208,7 +208,7 @@ file and line number.
 \fn cl_int starpu_opencl_allocate_memory(int devid, cl_mem *addr, size_t size, cl_mem_flags flags)
 \ingroup API_OpenCL_Extensions
 Allocate \p size bytes of memory, stored in \p addr. \p flags must be a valid
-combination of cl_mem_flags values.
+combination of \c cl_mem_flags values.
 
 \fn cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret)
 \ingroup API_OpenCL_Extensions

+ 2 - 2
doc/doxygen/chapters/api/openmp_runtime_support.doxy

@@ -341,12 +341,12 @@ clause without code outlining.
 \fn void starpu_omp_single_copyprivate_inline_end(void)
 \ingroup API_OpenMP_Runtime_Support
 This function completes the execution of a single section and returns the
-broadcasted copyprivate pointer for tasks that lost the election and NULL for
+broadcasted copyprivate pointer for tasks that lost the election and <c>NULL</c> for
 the task that won the election. This function can be used to implement
 <c>\#pragma omp single</c> with a copyprivate clause without code outlining.
 
 \return the copyprivate pointer for tasks that lost the election and therefore did not execute the code of the single section.
-\return NULL for the task that won the election and executed the code of the single section.
+\return <c>NULL</c> for the task that won the election and executed the code of the single section.
 
 \sa starpu_omp_single_copyprivate_inline
 \sa starpu_omp_single_copyprivate_inline_begin

+ 2 - 2
doc/doxygen/chapters/api/parallel_tasks.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -49,7 +49,7 @@ between the different workers of the given combined worker.
 
 \fn void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size)
 \ingroup API_Parallel_Tasks
-Initialise the barrier for the parallel task, to be pushed to \e worker_size
+Initialise the barrier for the parallel task, to be pushed to \p worker_size
 workers (without having to explicit a given combined worker).
 
 */

+ 14 - 8
doc/doxygen/chapters/api/performance_model.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012, 2016 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -11,6 +11,8 @@
 \enum starpu_perfmodel_type
 \ingroup API_Performance_Model
 TODO
+\var starpu_perfmodel_type::STARPU_PERFMODEL_INVALID
+todo
 \var starpu_perfmodel_type::STARPU_PER_ARCH
 \ingroup API_Performance_Model
 Application-provided per-arch cost model function
@@ -81,29 +83,33 @@ is the symbol name for the performance model, which will be used as
 file name to store the model. It must be set otherwise the model will
 be ignored.
 \var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl)
-Used by ::STARPU_COMMON: takes a task and implementation number, and
+Used by ::STARPU_COMMON takes a task and implementation number, and
 must return a task duration estimation in micro-seconds.
 \var double (*starpu_perfmodel::arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl)
-Used by ::STARPU_COMMON: takes a task, an arch and implementation number, and
+Used by ::STARPU_COMMON takes a task, an arch and implementation number, and
 must return a task duration estimation in micro-seconds on that arch.
 \var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl)
 Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
-::STARPU_NL_REGRESSION_BASED. If not NULL, takes a task and
+::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, takes a task and
 implementation number, and returns the size to be used as index to distinguish
 histories and as a base for regressions.
 \var uint32_t (*starpu_perfmodel::footprint)(struct starpu_task *)
-Used by ::STARPU_HISTORY_BASED. If not NULL, takes a task and returns the
+Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, takes a task and returns the
 footprint to be used as index to distinguish histories. The default is to use
-the starpu_task_data_footprint function.
+the starpu_task_data_footprint() function.
 \var unsigned starpu_perfmodel::is_loaded
 \private
 Whether the performance model is already loaded from the disk.
 \var unsigned starpu_perfmodel::benchmarking
 \private
+todo
 \var unsigned starpu_perfmodel::is_init
 todo
 \var starpu_perfmodel_state_t starpu_perfmodel::state
 \private
+todo
+\var void (*starpu_perfmodel::parameters)(struct starpu_task * task, double *parameters);
+todo
 \var const char ** starpu_perfmodel::parameters_names
 \private
 Names of parameters used for multiple linear regression models (M, N, K)
@@ -217,7 +223,7 @@ todo
 \ingroup API_Performance_Model
 this function frees internal memory used for sampling directory
 management. It should only be called by an application which is not
-calling starpu_shutdown as this function already calls it. See for
+calling starpu_shutdown() as this function already calls it. See for
 example <c>tools/starpu_perfmodel_display.c</c>.
 
 \fn int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model)
@@ -308,6 +314,6 @@ Return the estimated time to transfer a given size between two memory nodes.
 
 \fn double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint)
 \ingroup API_Performance_Model
-Return the estimated time of a task whose model is named \p and whose footprint is \p footprint
+Return the estimated time of a task with the given model and the given footprint.
 
 */

+ 2 - 2
doc/doxygen/chapters/api/profiling.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -138,7 +138,7 @@ needs to be called before starpu_init().
 \ingroup API_Profiling
 Get the profiling info associated to the worker identified by
 \p workerid, and reset the profiling measurements. If the argument \p
-worker_info is NULL, only reset the counters associated to worker
+worker_info is <c>NULL</c>, only reset the counters associated to worker
 \p workerid. Upon successful completion, this function returns 0.
 Otherwise, a negative value is returned.
 

+ 17 - 7
doc/doxygen/chapters/api/running_driver.doxy

@@ -1,36 +1,46 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
 
 /*! \defgroup API_Running_Drivers Running Drivers
 
+\struct starpu_driver
+structure for a driver
+\ingroup API_Running_Drivers
+\var enum starpu_worker_archtype starpu_driver::type
+The type of the driver. Only ::STARPU_CPU_WORKER,
+::STARPU_CUDA_WORKER and ::STARPU_OPENCL_WORKER are currently supported.
+\var union starpu_driver::id
+The identifier of the driver.
+
 \fn int starpu_driver_run(struct starpu_driver *d)
 \ingroup API_Running_Drivers
 Initialize the given driver, run it until it receives a request to
 terminate, deinitialize it and return 0 on success. It returns
-<c>-EINVAL</c> if <c>d->type</c> is not a valid StarPU device type
-(::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER or ::STARPU_OPENCL_WORKER). This
-is the same as using the following functions: calling
+<c>-EINVAL</c> if starpu_driver::type is not a valid StarPU device type
+(::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER or ::STARPU_OPENCL_WORKER).
+
+This is the same as using the following functions: calling
 starpu_driver_init(), then calling starpu_driver_run_once() in a loop,
 and eventually starpu_driver_deinit().
 
 \fn int starpu_driver_init(struct starpu_driver *d)
 \ingroup API_Running_Drivers
 Initialize the given driver. Returns 0 on success, <c>-EINVAL</c> if
-<c>d->type</c> is not a valid ::starpu_worker_archtype.
+starpu_driver::type is not a valid ::starpu_worker_archtype.
 
 \fn int starpu_driver_run_once(struct starpu_driver *d)
 \ingroup API_Running_Drivers
-Run the driver once, then returns 0 on success, <c>-EINVAL</c> if <c>d->type</c> is not a valid ::starpu_worker_archtype.
+Run the driver once, then returns 0 on success, <c>-EINVAL</c> if starpu_driver::type is not a valid ::starpu_worker_archtype.
 
 \fn int starpu_driver_deinit(struct starpu_driver *d)
 \ingroup API_Running_Drivers
 Deinitialize the given driver. Returns 0 on success, <c>-EINVAL</c> if
-<c>d->type</c> is not a valid ::starpu_worker_archtype.
+starpu_driver::type is not a valid ::starpu_worker_archtype.
 
 \fn void starpu_drivers_request_termination(void)
 \ingroup API_Running_Drivers

+ 2 - 2
doc/doxygen/chapters/api/scc_extensions.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -32,6 +32,6 @@ and return the index in the array through \p symbol.
 \ingroup API_SCC_Extensions
 If success, return the pointer to the function defined by \p symbol on
 the device linked to the called device. This can for instance be used
-in a starpu_scc_func_t implementation.
+in a starpu_scc_func_symbol_t implementation.
 
 */

+ 5 - 5
doc/doxygen/chapters/api/scheduling_policy.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -62,7 +62,7 @@ For each task not going through the scheduler (because starpu_task::execute_on_a
 \var struct starpu_task *(*starpu_sched_policy::pop_task)(unsigned sched_ctx_id)
         Get a task from the scheduler. The mutex associated to the
 	worker is already taken when this method is called. If this
-	method is defined as NULL, the worker will only execute tasks
+	method is defined as <c>NULL</c>, the worker will only execute tasks
 	from its local queue. In this case, the push_task method
 	should use the starpu_push_local_task method to assign tasks
 	to the different workers.
@@ -93,7 +93,7 @@ For each task not going through the scheduler (because starpu_task::execute_on_a
 
 \fn struct starpu_sched_policy **starpu_sched_get_predefined_policies()
 \ingroup API_Scheduling_Policy
-Return an NULL-terminated array of all the predefined scheduling
+Return an <c>NULL</c>-terminated array of all the predefined scheduling
 policies.
 
 \fn void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond)
@@ -166,7 +166,7 @@ which implementation numbers can be used.
 Schedulers need to call it before assigning a task to a worker,
 otherwise the task may fail to execute.
 This should be preferred rather than calling starpu_worker_can_execute_task for
-each and every implementation. It can also be used with impl_mask == NULL to
+each and every implementation. It can also be used with <c>impl_mask == NULL</c> to
 check for at least one implementation without determining which.
 
 \fn int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_task *task, unsigned *nimpl)
@@ -176,7 +176,7 @@ the first implementation which can be used.
 Schedulers need to call it before assigning a task to a worker,
 otherwise the task may fail to execute.
 This should be preferred rather than calling starpu_worker_can_execute_task for
-each and every implementation. It can also be used with impl_mask == NULL to
+each and every implementation. It can also be used with <c>impl_mask == NULL</c> to
 check for at least one implementation without determining which.
 
 \fn uint32_t starpu_task_footprint(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)

+ 15 - 11
doc/doxygen/chapters/api/standard_memory_library.doxy

@@ -1,13 +1,17 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
 
 /*! \defgroup API_Standard_Memory_Library Standard Memory Library
 
+\def starpu_ssize_t
+\ingroup API_Standard_Memory_Library
+todo
+
 \def starpu_data_malloc_pinned_if_possible
 \ingroup API_Standard_Memory_Library
 \deprecated
@@ -36,13 +40,13 @@ starpu_free_flags() with the same flag.
 
 \def STARPU_MALLOC_NORECLAIM
 \ingroup API_Standard_Memory_Library
-Value passed to the function starpu_malloc_flags() along STARPU_MALLOC_COUNT
+Value passed to the function starpu_malloc_flags() along ::STARPU_MALLOC_COUNT
 to indicate that while the memory allocation should be kept in the limits
-defined for STARPU_MALLOC_COUNT, no reclaiming should be performed by
-starpu_malloc_flags itself, thus potentially overflowing the
+defined for ::STARPU_MALLOC_COUNT, no reclaiming should be performed by
+starpu_malloc_flags() itself, thus potentially overflowing the
 memory node a bit. StarPU will reclaim memory after next task termination,
-according to the STARPU_MINIMUM_AVAILABLE_MEM and STARPU_TARGET_AVAILABLE_MEM
-environment variables. If STARPU_MEMORY_WAIT is set, no overflowing will happen,
+according to \ref STARPU_MINIMUM_AVAILABLE_MEM and \ref STARPU_TARGET_AVAILABLE_MEM
+environment variables. If ::STARPU_MEMORY_WAIT is set, no overflowing will happen,
 starpu_malloc_flags() will wait for other eviction mechanisms to release enough memory.
 
 \def STARPU_MALLOC_SIMULATION_FOLDED
@@ -62,7 +66,7 @@ by the given flag.
 \fn void starpu_malloc_set_align(size_t align)
 \ingroup API_Standard_Memory_Library
 This function sets an alignment constraints for starpu_malloc()
-allocations. align must be a power of two. This is for instance called
+allocations. \p align must be a power of two. This is for instance called
 automatically by the OpenCL driver to specify its own alignment
 constraints.
 
@@ -126,9 +130,9 @@ If a memory limit is defined on the given node (see Section
 allocate memory, but only accounts for it. This can be useful when the
 application allocates data another way, but want StarPU to be aware of the
 allocation size e.g. for memory reclaiming.
-By default, the function returns -ENOMEM if there is not enough room on
-the given node. \p flags can be either STARPU_MEMORY_WAIT or
-STARPU_MEMORY_OVERFLOW to change this.
+By default, the function returns <c>-ENOMEM</c> if there is not enough room on
+the given node. \p flags can be either ::STARPU_MEMORY_WAIT or
+::STARPU_MEMORY_OVERFLOW to change this.
 
 \fn void starpu_memory_deallocate(unsigned node, size_t size)
 \ingroup API_Standard_Memory_Library
@@ -148,7 +152,7 @@ bytes to become available on \p node. Of course, since another thread may be
 allocating memory concurrently, this does not necessarily mean that this amount
 will be actually available, just that it was reached. To atomically wait for
 some amount of memory and reserve it, starpu_memory_allocate() should be used
-with the STARPU_MEMORY_WAIT flag.
+with the ::STARPU_MEMORY_WAIT flag.
 
 \def STARPU_MEMORY_WAIT
 \ingroup API_Standard_Memory_Library

+ 7 - 7
doc/doxygen/chapters/api/threads.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -178,7 +178,7 @@ It does not do anything when the simulated performance mode is enabled
 \ingroup API_Threads
 This function initializes the mutex object pointed to by \p mutex
 according to the mutex attributes specified in \p mutexattr.  If \p
-mutexattr is NULL, default attributes are used instead.
+mutexattr is <c>NULL</c>, default attributes are used instead.
 
 \fn int starpu_pthread_mutex_destroy(starpu_pthread_mutex_t *mutex)
 \ingroup API_Threads
@@ -211,7 +211,7 @@ This function behaves identically to starpu_pthread_mutex_lock(),
 except that it does not block the calling thread if the mutex is
 already locked by another thread (or by the calling thread in the case
 of a ``fast''  mutex). Instead, the function returns immediately with
-the error code EBUSY.
+the error code \c EBUSY.
 
 This function also produces trace when the configure option
 \ref enable-fxt-lock "--enable-fxt-lock" is enabled.
@@ -244,7 +244,7 @@ location pointed to by \p key.
 \fn int starpu_pthread_key_delete(starpu_pthread_key_t key)
 \ingroup API_Threads
 This function deallocates a TSD key. It does not check whether
-non-NULL values are associated with that key in the currently
+non-<c>NULL</c> values are associated with that key in the currently
 executing threads, nor call the destructor function associated with
 the key.
 
@@ -256,7 +256,7 @@ thread, storing the given \p pointer instead.
 \fn void *starpu_pthread_getspecific(starpu_pthread_key_t key)
 \ingroup API_Threads
 This function returns the value associated with \p key on success, and
-NULL on error.
+<c>NULL</c> on error.
 
 \typedef STARPU_PTHREAD_COND_INITIALIZER
 \ingroup API_Threads
@@ -266,7 +266,7 @@ This macro initializes the condition variable given in parameter.
 \ingroup API_Threads
 This function initializes the condition variable \p cond, using the
 condition attributes specified in \p cond_attr, or default attributes
-if \p cond_attr is NULL.
+if \p cond_attr is <c>NULL</c>.
 
 \fn int starpu_pthread_cond_signal(starpu_pthread_cond_t *cond)
 \ingroup API_Threads
@@ -278,7 +278,7 @@ one is restarted, but it not specified which.
 \fn int starpu_pthread_cond_broadcast(starpu_pthread_cond_t *cond)
 \ingroup API_Threads
 This function restarts all the threads that are waiting on the
-condition variable \p cond. Nothing happens if no threads are waiting on cond.
+condition variable \p cond. Nothing happens if no threads are waiting on \p cond.
 
 \fn int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex)
 \ingroup API_Threads

+ 2 - 2
doc/doxygen/chapters/api/toolbox.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -13,7 +13,7 @@ to have a code which can be compiled with any C compiler.
 
 \def STARPU_GNUC_PREREQ
 \ingroup API_Toolbox
-Return true (non-zero) if GCC version MAJ.MIN or later is being used (macro taken from glibc.)
+Return true (non-zero) if GCC version \p maj.\p min or later is being used (macro taken from glibc.)
 
 \def STARPU_UNLIKELY
 \ingroup API_Toolbox

+ 33 - 37
doc/doxygen/chapters/api/top.doxy

@@ -1,7 +1,7 @@
 /*
  * This file is part of the StarPU Handbook.
  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2017  CNRS
  * Copyright (C) 2011, 2012 INRIA
  * See the file version.doxy for copying conditions.
  */
@@ -96,7 +96,7 @@ todo
 \var void *starpu_top_param::value
 todo
 \var char **starpu_top_param::enum_values
-only for enum type can be NULL
+only for enum type can be <c>NULL</c>
 \var int starpu_top_param::nb_values
 todo
 \var void (*starpu_top_param::callback)(struct starpu_top_param*)
@@ -117,50 +117,50 @@ todo
 
 \fn struct starpu_top_data *starpu_top_add_data_boolean(const char *data_name, int active)
 \ingroup API_StarPUTop_Interface
-This fonction register a data named data_name of type boolean.
-If \p active=0, the value will NOT be displayed to user by default.
+This function registers a data named \p data_name of type boolean.
+If \p active is 0, the value will NOT be displayed to user by default.
 Any other value will make the value displayed by default.
 
 \fn struct starpu_top_data *starpu_top_add_data_integer(const char *data_name, int minimum_value, int maximum_value, int active)
 \ingroup API_StarPUTop_Interface
-This fonction register a data named \p data_name of type integer. The
-minimum and maximum value will be usefull to define the scale in UI.
-If \p active=0, the value will NOT be displayed to user by default.
+This function registers a data named \p data_name of type integer. The
+minimum and maximum value will be used to define the scale in the UI.
+If \p active is 0, the value will NOT be displayed to user by default.
 Any other value will make the value displayed by default.
 
 \fn struct starpu_top_data *starpu_top_add_data_float(const char *data_name, double minimum_value, double maximum_value, int active)
 \ingroup API_StarPUTop_Interface
-This fonction register a data named data_name of type float. The
-minimum and maximum value will be usefull to define the scale in UI.
-If \p active=0, the value will NOT be displayed to user by default.
+This function registers a data named \p data_name of type float. The
+minimum and maximum value will be used to define the scale in the UI.
+If \p active is 0, the value will NOT be displayed to user by default.
 Any other value will make the value displayed by default.
 
 \fn struct starpu_top_param *starpu_top_register_parameter_boolean(const char *param_name, int *parameter_field, void (*callback)(struct starpu_top_param*))
 \ingroup API_StarPUTop_Interface
-This fonction register a parameter named \p parameter_name, of type
-boolean. The \p callback fonction will be called when the parameter is
-modified by UI, and can be null.
+This function registers a parameter named \p parameter_name, of type
+boolean. The \p callback function will be called when the parameter is
+modified by the UI, and can be <c>NULL</c>.
 
 \fn struct starpu_top_param *starpu_top_register_parameter_float(const char *param_name, double *parameter_field, double minimum_value, double maximum_value, void (*callback)(struct starpu_top_param*))
 \ingroup API_StarPUTop_Interface
-his fonction register a parameter named \p param_name, of type
-integer. Minimum and maximum value will be used to prevent user seting
-incorrect value. The \p callback fonction will be called when the
-parameter is modified by UI, and can be null.
+This function registers a parameter named \p param_name, of type
+integer. Minimum and maximum value will be used to prevent users from setting
+incorrect value. The \p callback function will be called when the
+parameter is modified by the UI, and can be <c>NULL</c>.
 
 \fn struct starpu_top_param *starpu_top_register_parameter_integer(const char *param_name, int *parameter_field, int minimum_value, int maximum_value, void (*callback)(struct starpu_top_param*))
 \ingroup API_StarPUTop_Interface
-This fonction register a parameter named \p param_name, of type float.
-Minimum and maximum value will be used to prevent user seting
-incorrect value. The \p callback fonction will be called when the
-parameter is modified by UI, and can be null.
+This function registers a parameter named \p param_name, of type float.
+Minimum and maximum value will be used to prevent users from setting
+incorrect value. The \p callback function will be called when the
+parameter is modified by UI, and can be <c>NULL</c>.
 
 \fn struct starpu_top_param *starpu_top_register_parameter_enum(const char *param_name, int *parameter_field, char **values, int nb_values, void (*callback)(struct starpu_top_param*))
 \ingroup API_StarPUTop_Interface
-This fonction register a parameter named \p param_name, of type enum.
-Minimum and maximum value will be used to prevent user seting
-incorrect value. The \p callback fonction will be called when the
-parameter is modified by UI, and can be null.
+This function registers a parameter named \p param_name, of type enum.
+Minimum and maximum value will be used to prevent users from setting
+incorrect value. The \p callback function will be called when the
+parameter is modified by the UI, and can be <c>NULL</c>.
 
 @name Initialisation
 \ingroup API_StarPUTop_Interface
@@ -178,37 +178,33 @@ GO message.
 \fn void starpu_top_update_parameter(const struct starpu_top_param *param)
 \ingroup API_StarPUTop_Interface
 This function should be called after every modification of a parameter
-from something other than starpu_top. This fonction notice UI that the
+from something other than starpu_top. This function notices the UI that the
 configuration changed.
 
 \fn void starpu_top_update_data_boolean(const struct starpu_top_data *data, int value)
 \ingroup API_StarPUTop_Interface
-This function updates the value of the starpu_top_data on UI.
+This function updates the value of the starpu_top_data in the UI.
 
 \fn void starpu_top_update_data_integer(const struct starpu_top_data *data, int value)
 \ingroup API_StarPUTop_Interface
-This function updates the value of the starpu_top_data on UI.
+This function updates the value of the starpu_top_data in the UI.
 
 \fn void starpu_top_update_data_float(const struct starpu_top_data *data, double value)
 \ingroup API_StarPUTop_Interface
-This function updates the value of the starpu_top_data on UI.
+This function updates the value of the starpu_top_data in the UI.
 
 \fn void starpu_top_task_prevision(struct starpu_task *task, int devid, unsigned long long start, unsigned long long end)
 \ingroup API_StarPUTop_Interface
-This function notifies UI than the task have been planed to run from start to end, on computation-core.
+This function notifies the UI that \p task is planned to run from \p start to \p end, on computation-core.
 
 \fn void starpu_top_debug_log(const char *message)
 \ingroup API_StarPUTop_Interface
-This function is useful in debug mode. The starpu developper doesn't
-need to check if the debug mode is active. This is checked by
-starpu_top itsefl. It just send a message to display by UI.
+When running in debug mode, the function sends \p message to be displayed by the UI.
 
 \fn void starpu_top_debug_lock(const char *message)
 \ingroup API_StarPUTop_Interface
-This function is useful in debug mode. The starpu developper doesn't
-need to check if the debug mode is active. This is checked by
-starpu_top itsefl. It send a message and wait for a continue message
-from UI to return. The lock (wich create a stop-point) should be
+When running in debug mode, the functions sends a message and waits for a continue message
+from the UI to return. The lock (which creates a stop-point) should be
 called only by the main thread. Calling it from more than one thread
 is not supported.
 

+ 30 - 9
doc/doxygen/chapters/api/workers.doxy

@@ -91,10 +91,26 @@ Only the list data structure is available but further data
 structures(like tree) implementations are foreseen.
 \var void *starpu_worker_collection::workerids
         The workerids managed by the collection
+\var void *starpu_worker_collection::collection_private
+        todo
+\var void *starpu_worker_collection::unblocked_workers
+        todo
+\var unsigned starpu_worker_collection::nunblocked_workers
+        todo
+\var void *starpu_worker_collection::masters
+        todo
+\var unsigned starpu_worker_collection::nmasters
+        todo
+\var char starpu_worker_collection::present[STARPU_NMAXWORKERS]
+        todo
+\var char starpu_worker_collection::is_unblocked[STARPU_NMAXWORKERS]
+        todo
+\var char starpu_worker_collection::is_master[STARPU_NMAXWORKERS]
+        todo
 \var unsigned starpu_worker_collection::nworkers
         The number of workers in the collection
 \var enum starpu_worker_collection_type starpu_worker_collection::type
-        The type of structure (currently ::STARPU_WORKER_LIST is the only one available)
+        The type of structure
 \var unsigned (*starpu_worker_collection::has_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it)
         Checks if there is another element in collection
 \var int (*starpu_worker_collection::get_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it)
@@ -109,6 +125,8 @@ structures(like tree) implementations are foreseen.
         Deinitialize the colection
 \var void (*starpu_worker_collection::init_iterator)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it)
         Initialize the cursor if there is one
+\var void (*starpu_worker_collection::init_iterator_for_parallel_tasks)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it, struct starpu_task *task);
+        todo
 
 \enum starpu_worker_collection_type
 \ingroup API_Workers_Properties
@@ -116,6 +134,9 @@ Types of structures the worker collection can implement
 \var starpu_worker_collection_type::STARPU_WORKER_LIST
 \ingroup API_Workers_Properties
 The collection is an array
+\var starpu_worker_collection_type::STARPU_WORKER_TREE
+\ingroup API_Workers_Properties
+The collection is a tree
 
 \struct starpu_sched_ctx_iterator
 \ingroup API_Workers_Properties
@@ -134,7 +155,7 @@ units executing StarPU tasks). The returned value should be at most
 \fn int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
 \ingroup API_Workers_Properties
 Returns the number of workers of the given type. A positive (or
-NULL) value is returned in case of success, -EINVAL indicates that the
+<c>NULL</c>) value is returned in case of success, <c>-EINVAL</c> indicates that the
 type is not valid otherwise.
 
 \fn unsigned starpu_cpu_worker_get_count(void)
@@ -180,7 +201,7 @@ between 0 and starpu_worker_get_count() - 1.
 
 \fn unsigned starpu_worker_get_id_check(void)
 \ingroup API_Workers_Properties
-This is the same as starpu_worker_get_id, but aborts when called from outside a
+This is the same as starpu_worker_get_id(), but aborts when called from outside a
 worker (i.e. when starpu_worker_get_id() would return -1).
 
 \fn unsigned starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, unsigned maxsize)
@@ -190,7 +211,7 @@ given type. It fills the array \p workerids with the identifiers of the
 workers that have the type indicated in the first argument. The
 argument \p maxsize indicates the size of the array \p workerids. The returned
 value gives the number of identifiers that were put in the array.
--ERANGE is returned is \p maxsize is lower than the number of workers
+<c>-ERANGE</c> is returned is \p maxsize is lower than the number of workers
 with the appropriate type: in that case, the array is filled with the
 \p maxsize first elements. To avoid such overflows, the value of maxsize
 can be chosen by the means of the function
@@ -199,7 +220,7 @@ equal to \ref STARPU_NMAXWORKERS.
 
 \fn int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num)
 \ingroup API_Workers_Properties
-This returns the identifier of the num-th worker that has the
+This returns the identifier of the \p num -th worker that has the
 specified type type. If there are no such worker, -1 is returned.
 
 \fn int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid)
@@ -214,7 +235,7 @@ This function returns the device id of the given worker. The
 worker should be identified with the value returned by the
 starpu_worker_get_id() function. In the case of a CUDA worker, this
 device identifier is the logical device identifier exposed by CUDA
-(used by the function cudaGetDevice() for instance). The device
+(used by the function \c cudaGetDevice() for instance). The device
 identifier of a CPU worker is the logical identifier of the core on
 which the worker was bound; this identifier is either provided by the
 OS or by the library <c>hwloc</c> in case it is available.
@@ -233,9 +254,9 @@ unspecified.
 \ingroup API_Workers_Properties
 This function allows to get the name of a given worker. StarPU
 associates a unique human readable string to each processing unit.
-This function copies at most the maxlen first bytes of the unique
-string associated to a worker identified by its identifier id into the
-dst buffer. The caller is responsible for ensuring that \p dst is a
+This function copies at most the \p maxlen first bytes of the unique
+string associated to a worker identified by its identifier \p id into the
+\p dst buffer. The caller is responsible for ensuring that \p dst is a
 valid pointer to a buffer of \p maxlen bytes at least. Calling this
 function on an invalid identifier results in an unspecified behaviour.
 

+ 2 - 2
doc/doxygen/doxygen_filter.sh.in

@@ -2,7 +2,7 @@
 #
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2014 CNRS
+# Copyright (C) 2014, 2017 CNRS
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -16,7 +16,7 @@
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
 if [ "$(basename $1)" == "starpufft.h" ] ; then
-    gcc -E $1 -I @top_srcdir@/include/ -I @top_builddir@/include/ |grep starpufft
+    gcc -E $1 -I @top_srcdir@/include/ -I @top_builddir@/include/ |grep -i starpufft
 else
     # the macro STARPU_DEPRECATED needs to be removed as it is not properly processed by doxygen
     # lines starting with // in the doxygen input files are considered as comments to be removed