8 年之前 · 553f2b122e
--- a/doc/doxygen/chapters/api/bitmap.doxy
+++ b/doc/doxygen/chapters/api/bitmap.doxy
@@ -18,7 +18,7 @@ create a empty starpu_bitmap
 
				 
			
 
				 \fn void starpu_bitmap_destroy(struct starpu_bitmap *b)
			
 
				 \ingroup API_Bitmap
			
 
				-free a starpu_bitmap
			
 
				+free \b
			
 
				 
			
 
				 \fn void starpu_bitmap_set(struct starpu_bitmap *b, int e)
			
 
				 \ingroup API_Bitmap
			
@@ -38,7 +38,7 @@ return true iff bit \p e is set in \p b
 
				 
			
 
				 \fn void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c)
			
 
				 \ingroup API_Bitmap
			
 
				-Basically compute starpu_bitmap_unset_all(\p a) ; \p a = \p b & \p c;
			
 
				+Basically compute \c starpu_bitmap_unset_all(\p a) ; \p a = \p b & \p c;
			
 
				 
			
 
				 \fn void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b)
			
 
				 \ingroup API_Bitmap
			
--- a/doc/doxygen/chapters/api/codelet_and_tasks.doxy
+++ b/doc/doxygen/chapters/api/codelet_and_tasks.doxy
--- a/doc/doxygen/chapters/api/cuda_extensions.doxy
+++ b/doc/doxygen/chapters/api/cuda_extensions.doxy
@@ -21,7 +21,7 @@ supported by StarPU.
 
				 
			
 
				 \fn cudaStream_t starpu_cuda_get_local_stream(void)
			
 
				 \ingroup API_CUDA_Extensions
			
 
				-This function gets the current worker’s CUDA stream. StarPU
			
 
				+Return the current worker’s CUDA stream. StarPU
			
 
				 provides a stream for every CUDA device controlled by StarPU. This
			
 
				 function is only provided for convenience so that programmers can
			
 
				 easily use asynchronous operations within codelets without having to
			
@@ -33,8 +33,7 @@ overlapped.
 
				 
			
 
				 \fn const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid)
			
 
				 \ingroup API_CUDA_Extensions
			
 
				-This function returns a pointer to device properties for worker
			
 
				-\p workerid (assumed to be a CUDA worker).
			
 
				+Return a pointer to device properties for worker \p workerid (assumed to be a CUDA worker).
			
 
				 
			
 
				 \fn void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status)
			
 
				 \ingroup API_CUDA_Extensions
			
@@ -48,8 +47,8 @@ Calls starpu_cuda_report_error(), passing the current function, file and line po
 
				 \ingroup API_CUDA_Extensions
			
 
				 Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
			
 
				 to the pointer \p dst_ptr on \p dst_node. The function first tries to
			
 
				-copy the data asynchronous (unless stream is <c>NULL</c>). If the
			
 
				-asynchronous copy fails or if stream is <c>NULL</c>, it copies the
			
 
				+copy the data asynchronous (unless \p stream is <c>NULL</c>). If the
			
 
				+asynchronous copy fails or if \p stream is <c>NULL</c>, it copies the
			
 
				 data synchronously. The function returns <c>-EAGAIN</c> if the
			
 
				 asynchronous launch was successfull. It returns 0 if the synchronous
			
 
				 copy was successful, or fails otherwise.
			
--- a/doc/doxygen/chapters/api/data_interfaces.doxy
+++ b/doc/doxygen/chapters/api/data_interfaces.doxy
@@ -9,288 +9,303 @@
 
				 /*! \defgroup API_Data_Interfaces Data Interfaces
			
 
				 
			
 
				 \struct starpu_data_interface_ops
			
 
				-Per-interface data transfer methods.
			
 
				 \ingroup API_Data_Interfaces
			
 
				+Per-interface data transfer methods.
			
 
				 \var void (*starpu_data_interface_ops::register_data_handle)(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
			
 
				-Register an existing interface into a data handle.
			
 
				+    Register an existing interface into a data handle.
			
 
				 
			
 
				 \var starpu_ssize_t (*starpu_data_interface_ops::allocate_data_on_node)(void *data_interface, unsigned node)
			
 
				-Allocate data for the interface on a given node.
			
 
				+    Allocate data for the interface on a given node.
			
 
				 
			
 
				 \var void (*starpu_data_interface_ops::free_data_on_node)(void *data_interface, unsigned node)
			
 
				-Free data of the interface on a given node.
			
 
				+    Free data of the interface on a given node.
			
 
				 
			
 
				 \var const struct starpu_data_copy_methods *starpu_data_interface_ops::copy_methods
			
 
				-ram/cuda/opencl synchronous and asynchronous transfer methods.
			
 
				+    ram/cuda/opencl synchronous and asynchronous transfer methods.
			
 
				 
			
 
				 \var void *(*starpu_data_interface_ops::handle_to_pointer)(starpu_data_handle_t handle, unsigned node)
			
 
				-Return the current pointer (if any) for the handle on the given node.
			
 
				+    Return the current pointer (if any) for the handle on the given node.
			
 
				 
			
 
				 \var size_t (*starpu_data_interface_ops::get_size)(starpu_data_handle_t handle)
			
 
				-Return an estimation of the size of data, for performance models.
			
 
				+    Return an estimation of the size of data, for performance models.
			
 
				 
			
 
				 \var uint32_t (*starpu_data_interface_ops::footprint)(starpu_data_handle_t handle)
			
 
				-Return a 32bit footprint which characterizes the data size.
			
 
				+    Return a 32bit footprint which characterizes the data size.
			
 
				 
			
 
				 \var int (*starpu_data_interface_ops::compare)(void *data_interface_a, void *data_interface_b)
			
 
				-Compare the data size of two interfaces.
			
 
				+    Compare the data size of two interfaces.
			
 
				 
			
 
				 \var void (*starpu_data_interface_ops::display)(starpu_data_handle_t handle, FILE *f)
			
 
				-Dump the sizes of a handle to a file.
			
 
				+    Dump the sizes of a handle to a file.
			
 
				 
			
 
				 \var starpu_ssize_t (*starpu_data_interface_ops::describe)(void *data_interface, char *buf, size_t size)
			
 
				-Describe the data into a string.
			
 
				+    Describe the data into a string.
			
 
				 
			
 
				 \var enum starpu_data_interface_id starpu_data_interface_ops::interfaceid
			
 
				-An identifier that is unique to each interface.
			
 
				+    An identifier that is unique to each interface.
			
 
				 
			
 
				 \var size_t starpu_data_interface_ops::interface_size
			
 
				-The size of the interface data descriptor.
			
 
				+    The size of the interface data descriptor.
			
 
				 
			
 
				 \var char starpu_data_interface_ops::is_multiformat
			
 
				-todo
			
 
				+    todo
			
 
				 
			
 
				 \var char starpu_data_interface_ops::dontcache
			
 
				-If set to non-zero, StarPU will never try to reuse an allocated buffer for a
			
 
				-different handle. This can be notably useful for application-defined interfaces
			
 
				-which have a dynamic size, and for which it thus does not make sense to reuse
			
 
				-the buffer since will probably not have the proper size.
			
 
				+    If set to non-zero, StarPU will never try to reuse an allocated
			
 
				+    buffer for a  different handle. This can be notably useful for
			
 
				+    application-defined interfaces which have a dynamic size, and for
			
 
				+    which it thus does not make sense to reuse the buffer since will
			
 
				+    probably not have the proper size.
			
 
				 
			
 
				 \var struct starpu_multiformat_data_interface_ops* (*starpu_data_interface_ops::get_mf_ops)(void *data_interface)
			
 
				-todo
			
 
				+    todo
			
 
				 
			
 
				 \var int (*starpu_data_interface_ops::pack_data)(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count)
			
 
				-Pack the data handle into a contiguous buffer at the address allocated with
			
 
				-<c>starpu_malloc_flags(ptr, size, 0)</c> (and thus returned in \p ptr) and
			
 
				-set the size of the newly created buffer in \p count. If \p ptr is <c>NULL</c>, the
			
 
				-function should not copy the data in the buffer but just set count to
			
 
				-the size of the buffer which would have been allocated. The special
			
 
				-value -1 indicates the size is yet unknown.
			
 
				+    Pack the data handle into a contiguous buffer at the address
			
 
				+    allocated with <c>starpu_malloc_flags(ptr, size, 0)</c> (and thus
			
 
				+    returned in \p ptr) and set the size of the newly created buffer
			
 
				+    in \p count. If \p ptr is <c>NULL</c>, the function should not
			
 
				+    copy the data in the buffer but just set count to the size of the
			
 
				+    buffer which would have been allocated. The special value -1
			
 
				+    indicates the size is yet unknown.
			
 
				 
			
 
				 \var int (*starpu_data_interface_ops::unpack_data) (starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
			
 
				-Unpack the data handle from the contiguous buffer at the address \p ptr
			
 
				-of size \p count
			
 
				+    Unpack the data handle from the contiguous buffer at the address
			
 
				+    \p ptr of size \p count
			
 
				 
			
 
				 \struct starpu_data_copy_methods
			
 
				-Defines the per-interface methods. If the any_to_any method is
			
 
				-provided, it will be used by default if no more specific method is
			
 
				-provided. It can still be useful to provide more specific method in
			
 
				-case of e.g. available particular CUDA or OpenCL support.
			
 
				 \ingroup API_Data_Interfaces
			
 
				+Defines the per-interface methods. If the
			
 
				+starpu_data_copy_methods::any_to_any method is provided, it will be
			
 
				+used by default if no specific method is provided. It can still be
			
 
				+useful to provide more specific method in case of e.g. available
			
 
				+particular CUDA or OpenCL support.
			
 
				 \var int (*starpu_data_copy_methods::can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node)
			
 
				-If defined, allows the interface to declare whether it supports transferring
			
 
				-from \p src_interface on node \p src_node to \p dst_interface on node \p
			
 
				-dst_node, run from node \p handling_node. If not defined, it is assumed that the
			
 
				-interface supports all transfers.
			
 
				+    If defined, allows the interface to declare whether it supports
			
 
				+    transferring from \p src_interface on node \p src_node to \p
			
 
				+    dst_interface on node \p dst_node, run from node \p handling_node.
			
 
				+    If not defined, it is assumed that the interface supports all
			
 
				+    transfers.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::ram_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the \p
			
 
				-src_node CPU node to the \p dst_interface interface on the \p dst_node
			
 
				-CPU  node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				+    dst_node CPU node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::ram_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CPU node to the \p dst_interface interface on the \p dst_node CUDA
			
 
				-node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				+    dst_node CUDA node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::ram_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CPU node to the \p dst_interface interface on the \p dst_node
			
 
				-OpenCL node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				+    dst_node OpenCL node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::ram_to_mic)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CPU node to the \p dst_interface interface on the \p dst_node MIC
			
 
				-node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				+    dst_node MIC node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::cuda_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CUDA node to the \p dst_interface interface on the \p dst_node
			
 
				-CPU node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CUDA node to the \p dst_interface interface on the \p
			
 
				+    dst_node CPU node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::cuda_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CUDA node to the \p dst_interface interface on the \p dst_node CUDA
			
 
				-node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CUDA node to the \p dst_interface interface on the \p
			
 
				+    dst_node CUDA node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::cuda_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CUDA node to the \p dst_interface interface on the \p dst_node
			
 
				-OpenCL node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CUDA node to the \p dst_interface interface on the \p
			
 
				+    dst_node OpenCL node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::opencl_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
			
 
				-CPU node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node OpenCL node to the \p dst_interface interface on the
			
 
				+    \p dst_node CPU node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::opencl_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
			
 
				-CUDA node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node OpenCL node to the \p dst_interface interface on the
			
 
				+    \p dst_node CUDA node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::opencl_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
			
 
				-OpenCL node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node OpenCL node to the \p dst_interface interface on the
			
 
				+    \p dst_node OpenCL node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::mic_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node MIC node to the \p dst_interface interface on the \p dst_node CPU
			
 
				-node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node MIC node to the \p dst_interface interface on the \p
			
 
				+    dst_node CPU node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::scc_src_to_sink)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node node to the \p dst_interface interface on the \p dst_node node.
			
 
				-Must return 0 if the transfer was actually completed completely
			
 
				-synchronously, or <c>-EAGAIN</c> if at least some transfers are still ongoing
			
 
				-and should be awaited for by the core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node node to the \p dst_interface interface on the \p
			
 
				+    dst_node node. Must return 0 if the transfer was actually
			
 
				+    completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				+    some transfers are still ongoing and should be awaited for by the
			
 
				+    core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::scc_sink_to_src)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node node to the \p dst_interface interface on the \p dst_node node.
			
 
				-Must return 0 if the transfer was actually completed completely
			
 
				-synchronously, or <c>-EAGAIN</c> if at least some transfers are still ongoing
			
 
				-and should be awaited for by the core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node node to the \p dst_interface interface on the \p
			
 
				+    dst_node node. Must return 0 if the transfer was actually
			
 
				+    completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				+    some transfers are still ongoing and should be awaited for by the core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::scc_sink_to_sink)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node node to the \p dst_interface interface on the \p dst_node node.
			
 
				-Must return 0 if the transfer was actually completed completely
			
 
				-synchronously, or <c>-EAGAIN</c> if at least some transfers are still ongoing
			
 
				-and should be awaited for by the core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node node to the \p dst_interface interface on the \p
			
 
				+    dst_node node. Must return 0 if the transfer was actually
			
 
				+    completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				+    some transfers are still ongoing and should be awaited for by the
			
 
				+    core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::ram_to_mpi_ms)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CPU node to the \p dst_interface interface on the \p dst_node MPI Slave
			
 
				-node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				+    dst_node MPI Slave node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::mpi_ms_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node MPI Slave node to the \p dst_interface interface on the \p dst_node CPU
			
 
				-node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node MPI Slave node to the \p dst_interface interface on
			
 
				+    the \p dst_node CPU node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::mpi_ms_to_mpi_ms)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node MPI Slave node to the \p dst_interface interface on the \p dst_node
			
 
				-MPI Slave node. Return 0 on success.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node MPI Slave node to the \p dst_interface interface on
			
 
				+    the \p dst_node MPI Slave node. Return 0 on success.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::ram_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CPU node to the \p dst_interface interface on the \p dst_node CUDA
			
 
				-node, using the given stream. Must return 0 if the transfer was
			
 
				-actually completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				-some transfers are still ongoing and should be awaited for by the core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				+    dst_node CUDA node, using the given stream. Must return 0 if the
			
 
				+    transfer was actually completed completely synchronously, or
			
 
				+    <c>-EAGAIN</c> if at least some transfers are still ongoing and
			
 
				+    should be awaited for by the core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::cuda_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CUDA node to the \p dst_interface interface on the \p dst_node CPU
			
 
				-node, using the given stream. Must return 0 if the transfer was
			
 
				-actually completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				-some transfers are still ongoing and should be awaited for by the core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CUDA node to the \p dst_interface interface on the \p
			
 
				+    dst_node CPU node, using the given stream. Must return 0 if the
			
 
				+    transfer was actually completed completely synchronously, or
			
 
				+    <c>-EAGAIN</c> if at least some transfers are still ongoing and
			
 
				+    should be awaited for by the core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CUDA node to the \p dst_interface interface on the \p dst_node CUDA
			
 
				-node, using the given stream. Must return 0 if the transfer was
			
 
				-actually completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				-some transfers are still ongoing and should be awaited for by the core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CUDA node to the \p dst_interface interface on the \p
			
 
				+    dst_node CUDA node, using the given stream. Must return 0 if the
			
 
				+    transfer was actually completed completely synchronously, or
			
 
				+    <c>-EAGAIN</c> if at least some transfers are still ongoing and
			
 
				+    should be awaited for by the core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::ram_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CPU node to the \p dst_interface interface on the \p dst_node
			
 
				-OpenCL node, by recording in \p event, a pointer to a <c>cl_event</c>, the event
			
 
				-of the last submitted transfer. Must return 0 if the transfer was
			
 
				-actually completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				-some transfers are still ongoing and should be awaited for by the
			
 
				-core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				+    dst_node OpenCL node, by recording in \p event, a pointer to a
			
 
				+    <c>cl_event</c>, the event of the last submitted transfer. Must
			
 
				+    return 0 if the transfer was actually completed completely
			
 
				+    synchronously, or <c>-EAGAIN</c> if at least some transfers are
			
 
				+    still ongoing and should be awaited for by the core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::opencl_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
			
 
				-CPU node, by recording in \p event, a pointer to a <c>cl_event</c>, the event of
			
 
				-the last submitted transfer. Must return 0 if the transfer was
			
 
				-actually completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				-some transfers are still ongoing and should be awaited for by the
			
 
				-core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node OpenCL node to the \p dst_interface interface on the
			
 
				+    \p dst_node CPU node, by recording in \p event, a pointer to a
			
 
				+    <c>cl_event</c>, the event of the last submitted transfer. Must
			
 
				+    return 0 if the transfer was actually completed completely
			
 
				+    synchronously, or <c>-EAGAIN</c> if at least some transfers are
			
 
				+    still ongoing and should be awaited for by the core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node OpenCL node to the \p dst_interface interface on the \p dst_node
			
 
				-OpenCL node, by recording in \p event, a pointer to a <c>cl_event</c>, the event
			
 
				-of the last submitted transfer. Must return 0 if the transfer was
			
 
				-actually completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				-some transfers are still ongoing and should be awaited for by the
			
 
				-core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node OpenCL node to the \p dst_interface interface on the
			
 
				+    \p dst_node OpenCL node, by recording in \p event, a pointer to a
			
 
				+    <c>cl_event</c>, the event of the last submitted transfer. Must
			
 
				+    return 0 if the transfer was actually completed completely
			
 
				+    synchronously, or <c>-EAGAIN</c> if at least some transfers are
			
 
				+    still ongoing and should be awaited for by the core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::ram_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CPU node to the \p dst_interface interface on the \p dst_node MPI Slave
			
 
				-node, with the given even. Must return 0 if the transfer was
			
 
				-actually completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				-some transfers are still ongoing and should be awaited for by the core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				+    dst_node MPI Slave node, with the given even. Must return 0 if the
			
 
				+    transfer was actually completed completely synchronously, or
			
 
				+    <c>-EAGAIN</c> if at least some transfers are still ongoing and
			
 
				+    should be awaited for by the core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::mpi_ms_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node MPI Slave node to the \p dst_interface interface on the \p dst_node CPU
			
 
				-node, with the given event. Must return 0 if the transfer was
			
 
				-actually completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				-some transfers are still ongoing and should be awaited for by the core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node MPI Slave node to the \p dst_interface interface on
			
 
				+    the \p dst_node CPU node, with the given event. Must return 0 if
			
 
				+    the transfer was actually completed completely synchronously, or
			
 
				+    <c>-EAGAIN</c> if at least some transfers are still ongoing and
			
 
				+    should be awaited for by the core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::mpi_ms_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node MPI Slave node to the \p dst_interface interface on the \p dst_node MPI Slave 
			
 
				-node, using the given stream. Must return 0 if the transfer was
			
 
				-actually completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				-some transfers are still ongoing and should be awaited for by the core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node MPI Slave node to the \p dst_interface interface on
			
 
				+    the \p dst_node MPI Slave node, using the given stream. Must
			
 
				+    return 0 if the transfer was actually completed completely
			
 
				+    synchronously, or <c>-EAGAIN</c> if at least some transfers are
			
 
				+    still ongoing and should be awaited for by the core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::ram_to_mic_async)(void *src_intreface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node CPU node to the \p dst_interface interface on the \p dst_node
			
 
				-MIC node. Must return 0 if the transfer was actually completed
			
 
				-completely synchronously, or <c>-EAGAIN</c> if at least some transfers are
			
 
				-still ongoing and should be awaited for by the core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node CPU node to the \p dst_interface interface on the \p
			
 
				+    dst_node MIC node. Must return 0 if the transfer was actually
			
 
				+    completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				+    some transfers are still ongoing and should be awaited for by the
			
 
				+    core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::mic_to_ram_async)(void *src_intreface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node MIC node to the \p dst_interface interface on the \p dst_node
			
 
				-CPU node. Must return 0 if the transfer was actually completed
			
 
				-completely synchronously, or <c>-EAGAIN</c> if at least some transfers are
			
 
				-still ongoing and should be awaited for by the core.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node MIC node to the \p dst_interface interface on the \p
			
 
				+    dst_node CPU node. Must return 0 if the transfer was actually
			
 
				+    completed completely synchronously, or <c>-EAGAIN</c> if at least
			
 
				+    some transfers are still ongoing and should be awaited for by the
			
 
				+    core.
			
 
				 
			
 
				 \var int (*starpu_data_copy_methods::any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data)
			
 
				-Define how to copy data from the \p src_interface interface on the
			
 
				-\p src_node node to the \p dst_interface interface on the \p dst_node node.
			
 
				-This is meant to be implemented through the starpu_interface_copy()
			
 
				-helper, to which async_data should be passed as such, and will be used
			
 
				-to manage asynchronicity. This must return <c>-EAGAIN</c> if any of the
			
 
				-starpu_interface_copy() calls has returned <c>-EAGAIN</c> (i.e. at least some
			
 
				-transfer is still ongoing), and return 0 otherwise.
			
 
				+    Define how to copy data from the \p src_interface interface on the
			
 
				+    \p src_node node to the \p dst_interface interface on the \p
			
 
				+    dst_node node. This is meant to be implemented through the
			
 
				+    starpu_interface_copy() helper, to which async_data should be
			
 
				+    passed as such, and will be used to manage asynchronicity. This
			
 
				+    must return <c>-EAGAIN</c> if any of the starpu_interface_copy()
			
 
				+    calls has returned <c>-EAGAIN</c> (i.e. at least some transfer is
			
 
				+    still ongoing), and return 0 otherwise.
			
 
				 
			
 
				 \enum starpu_data_interface_id
			
 
				 \ingroup API_Data_Interfaces
			
 
				 Identifier for all predefined StarPU data interfaces
			
 
				 \var starpu_data_interface_id::STARPU_UNKNOWN_INTERFACE_ID
			
 
				-Unknown interface
			
 
				+    Unknown interface
			
 
				 \var starpu_data_interface_id::STARPU_MATRIX_INTERFACE_ID
			
 
				-Identifier for the matrix data interface
			
 
				+    Identifier for the matrix data interface
			
 
				 \var starpu_data_interface_id::STARPU_BLOCK_INTERFACE_ID
			
 
				-Identifier for block data interface
			
 
				+    Identifier for block data interface
			
 
				 \var starpu_data_interface_id::STARPU_VECTOR_INTERFACE_ID
			
 
				-Identifier for the vector data interface
			
 
				+    Identifier for the vector data interface
			
 
				 \var starpu_data_interface_id::STARPU_CSR_INTERFACE_ID
			
 
				-Identifier for the csr data interface
			
 
				+    Identifier for the csr data interface
			
 
				 \var starpu_data_interface_id::STARPU_BCSR_INTERFACE_ID
			
 
				-Identifier for the bcsr data interface
			
 
				+    Identifier for the bcsr data interface
			
 
				 \var starpu_data_interface_id::STARPU_VARIABLE_INTERFACE_ID
			
 
				-Identifier for the variable data interface
			
 
				+    Identifier for the variable data interface
			
 
				 \var starpu_data_interface_id::STARPU_VOID_INTERFACE_ID
			
 
				-Identifier for the void data interface
			
 
				+    Identifier for the void data interface
			
 
				 \var starpu_data_interface_id::STARPU_MULTIFORMAT_INTERFACE_ID
			
 
				-Identifier for the multiformat data interface
			
 
				+    Identifier for the multiformat data interface
			
 
				 \var starpu_data_interface_id::STARPU_COO_INTERFACE_ID
			
 
				-Identifier for the coo data interface
			
 
				+    Identifier for the coo data interface
			
 
				 \var starpu_data_interface_id::STARPU_MAX_INTERFACE_ID
			
 
				-Maximum number of data interfaces
			
 
				+    Maximum number of data interfaces
			
 
				 
			
 
				 @name Registering Data
			
 
				 \ingroup API_Data_Interfaces
			
@@ -306,8 +321,8 @@ Register a void interface. There is no data really associated
 
				 to that interface, but it may be used as a synchronization mechanism.
			
 
				 It also permits to express an abstract piece of data that is managed
			
 
				 by the application internally: this makes it possible to forbid the
			
 
				-concurrent execution of different tasks accessing the same <c>void</c> data
			
 
				-in read-write concurrently. 
			
 
				+concurrent execution of different tasks accessing the same <c>void</c>
			
 
				+data in read-write concurrently.
			
 
				 
			
 
				 \fn void starpu_variable_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, size_t size)
			
 
				 \ingroup API_Data_Interfaces
			
@@ -329,7 +344,7 @@ buffer located at \p ptr, or device handle \p dev_handle and offset \p offset
 
				 
			
 
				 \fn void starpu_vector_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize)
			
 
				 \ingroup API_Data_Interfaces
			
 
				-Register the \p nx elemsize-byte elements pointed to by \p ptr and initialize \p handle to represent it.
			
 
				+Register the \p nx \p elemsize-byte elements pointed to by \p ptr and initialize \p handle to represent it.
			
 
				 
			
 
				 Here an example of how to use the function.
			
 
				 \code{.c}
			
@@ -395,7 +410,7 @@ Blocks have size \p r * \p c. \p nrow is the number of rows (in terms of
 
				 blocks), \p colind[i] is the block-column index for block i in \p nzval,
			
 
				 \p rowptr[i] is the block-index (in \p nzval) of the first block of row i.
			
 
				 \p firstentry is the index of the first entry of the given arrays
			
 
				-(usually 0 or 1). 
			
 
				+(usually 0 or 1).
			
 
				 
			
 
				 \fn void starpu_csr_data_register(starpu_data_handle_t *handle, int home_node, uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize)
			
 
				 \ingroup API_Data_Interfaces
			
@@ -429,7 +444,7 @@ if handle’s interface does not support this operation or data for this
 
				 \fn void *starpu_data_get_local_ptr(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Interfaces
			
 
				 Return the local pointer associated with \p handle or <c>NULL</c> if
			
 
				-\p handle’s interface does not have data allocated locally 
			
 
				+\p handle’s interface does not have any data allocated locally.
			
 
				 
			
 
				 \fn enum starpu_data_interface_id starpu_data_get_interface_id(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Interfaces
			
@@ -463,18 +478,18 @@ after calling the data unpacking operation.
 
				 \ingroup API_Data_Interfaces
			
 
				 
			
 
				 \struct starpu_variable_interface
			
 
				-Variable interface for a single data (not a vector, a matrix, a list, ...)
			
 
				 \ingroup API_Data_Interfaces
			
 
				+Variable interface for a single data (not a vector, a matrix, a list, ...)
			
 
				 \var enum starpu_data_interface_id starpu_variable_interface::id
			
 
				-Identifier of the interface
			
 
				+    Identifier of the interface
			
 
				 \var uintptr_t starpu_variable_interface::ptr
			
 
				-local pointer of the variable
			
 
				+    local pointer of the variable
			
 
				 \var uintptr_t starpu_variable_interface::dev_handle
			
 
				-device handle of the variable.
			
 
				+    device handle of the variable.
			
 
				 \var size_t starpu_variable_interface::offset
			
 
				-offset in the variable
			
 
				+    offset in the variable
			
 
				 \var size_t starpu_variable_interface::elemsize
			
 
				-size of the variable
			
 
				+    size of the variable
			
 
				 
			
 
				 \fn size_t starpu_variable_get_elemsize(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Interfaces
			
@@ -495,7 +510,7 @@ Return the size of the variable designated by \p interface.
 
				 \def STARPU_VARIABLE_GET_DEV_HANDLE(interface)
			
 
				 \ingroup API_Data_Interfaces
			
 
				 Return a device handle for the variable designated by
			
 
				-\p interface, to be used on OpenCL. The offset documented below has to be
			
 
				+\p interface, to be used with OpenCL. The offset documented below has to be
			
 
				 used in addition to this.
			
 
				 
			
 
				 \def STARPU_VARIABLE_GET_OFFSET(interface)
			
@@ -510,19 +525,19 @@ be used with the device handle.
 
				 Vector interface
			
 
				 \ingroup API_Data_Interfaces
			
 
				 \var enum starpu_data_interface_id starpu_vector_interface::id
			
 
				-Identifier of the interface
			
 
				+    Identifier of the interface
			
 
				 \var uintptr_t starpu_vector_interface::ptr
			
 
				-local pointer of the vector
			
 
				+    local pointer of the vector
			
 
				 \var uintptr_t starpu_vector_interface::dev_handle
			
 
				-device handle of the vector.
			
 
				+    device handle of the vector.
			
 
				 \var size_t starpu_vector_interface::offset
			
 
				-offset in the vector
			
 
				+    offset in the vector
			
 
				 \var uint32_t starpu_vector_interface::nx
			
 
				-number of elements on the x-axis of the vector
			
 
				+    number of elements on the x-axis of the vector
			
 
				 \var size_t starpu_vector_interface::elemsize
			
 
				-size of the elements of the vector
			
 
				+    size of the elements of the vector
			
 
				 \var uint32_t starpu_vector_interface::slice_base
			
 
				-vector slice base, used by the StarPU OpenMP runtime support
			
 
				+    vector slice base, used by the StarPU OpenMP runtime support
			
 
				 
			
 
				 \fn uint32_t starpu_vector_get_nx(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Interfaces
			
@@ -545,7 +560,7 @@ be used instead.
 
				 \def STARPU_VECTOR_GET_DEV_HANDLE(interface)
			
 
				 \ingroup API_Data_Interfaces
			
 
				 Return a device handle for the array designated by \p interface,
			
 
				-to be used on OpenCL. the offset documented below has to be used in
			
 
				+to be used with OpenCL. the offset documented below has to be used in
			
 
				 addition to this.
			
 
				 
			
 
				 \def STARPU_VECTOR_GET_OFFSET(interface)
			
@@ -577,22 +592,22 @@ Return the OpenMP slice base annotation of each element of the array designated
 
				 Matrix interface for dense matrices
			
 
				 \ingroup API_Data_Interfaces
			
 
				 \var enum starpu_data_interface_id starpu_matrix_interface::id
			
 
				-Identifier of the interface
			
 
				+    Identifier of the interface
			
 
				 \var uintptr_t starpu_matrix_interface::ptr
			
 
				-local pointer of the matrix
			
 
				+    local pointer of the matrix
			
 
				 \var uintptr_t starpu_matrix_interface::dev_handle
			
 
				-device handle of the matrix.
			
 
				+    device handle of the matrix.
			
 
				 \var size_t starpu_matrix_interface::offset
			
 
				-offset in the matrix
			
 
				+    offset in the matrix
			
 
				 \var uint32_t starpu_matrix_interface::nx
			
 
				-number of elements on the x-axis of the matrix
			
 
				+    number of elements on the x-axis of the matrix
			
 
				 \var uint32_t starpu_matrix_interface::ny
			
 
				-number of elements on the y-axis of the matrix
			
 
				+    number of elements on the y-axis of the matrix
			
 
				 \var uint32_t starpu_matrix_interface::ld
			
 
				-number of elements between each row of the matrix. Maybe be equal to
			
 
				-starpu_matrix_interface::nx when there is no padding.
			
 
				+    number of elements between each row of the matrix. Maybe be equal
			
 
				+    to starpu_matrix_interface::nx when there is no padding.
			
 
				 \var size_t starpu_matrix_interface::elemsize
			
 
				-size of the elements of the matrix
			
 
				+    size of the elements of the matrix
			
 
				 
			
 
				 \fn uint32_t starpu_matrix_get_nx(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Interfaces
			
@@ -627,7 +642,7 @@ and offset need to be used instead.
 
				 \def STARPU_MATRIX_GET_DEV_HANDLE(interface)
			
 
				 \ingroup API_Data_Interfaces
			
 
				 Return a device handle for the matrix designated by \p interface,
			
 
				-to be used on OpenCL. The offset documented below has to be used in
			
 
				+to be used with OpenCL. The offset documented below has to be used in
			
 
				 addition to this.
			
 
				 
			
 
				 \def STARPU_MATRIX_GET_OFFSET(interface)
			
@@ -662,25 +677,25 @@ designated by \p interface.
 
				 Block interface for 3D dense blocks
			
 
				 \ingroup API_Data_Interfaces
			
 
				 \var enum starpu_data_interface_id starpu_block_interface::id
			
 
				-identifier of the interface
			
 
				+    identifier of the interface
			
 
				 \var uintptr_t starpu_block_interface::ptr
			
 
				-local pointer of the block
			
 
				+    local pointer of the block
			
 
				 \var uintptr_t starpu_block_interface::dev_handle
			
 
				-device handle of the block.
			
 
				+    device handle of the block.
			
 
				 \var size_t starpu_block_interface::offset
			
 
				-offset in the block.
			
 
				+    offset in the block.
			
 
				 \var uint32_t starpu_block_interface::nx
			
 
				-number of elements on the x-axis of the block.
			
 
				+    number of elements on the x-axis of the block.
			
 
				 \var uint32_t starpu_block_interface::ny
			
 
				-number of elements on the y-axis of the block.
			
 
				+    number of elements on the y-axis of the block.
			
 
				 \var uint32_t starpu_block_interface::nz
			
 
				-number of elements on the z-axis of the block.
			
 
				+    number of elements on the z-axis of the block.
			
 
				 \var uint32_t starpu_block_interface::ldy
			
 
				-number of elements between two lines
			
 
				+    number of elements between two lines
			
 
				 \var uint32_t starpu_block_interface::ldz
			
 
				-number of elements between two planes
			
 
				+    number of elements between two planes
			
 
				 \var size_t starpu_block_interface::elemsize
			
 
				-size of the elements of the block.
			
 
				+    size of the elements of the block.
			
 
				 
			
 
				 \fn uint32_t starpu_block_get_nx(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Interfaces
			
@@ -770,25 +785,25 @@ BCSR interface for sparse matrices (blocked compressed sparse
 
				 row representation)
			
 
				 \ingroup API_Data_Interfaces
			
 
				 \var enum starpu_data_interface_id starpu_bcsr_interface::id
			
 
				-Identifier of the interface
			
 
				+    Identifier of the interface
			
 
				 \var uint32_t starpu_bcsr_interface::nnz
			
 
				-number of non-zero BLOCKS
			
 
				+    number of non-zero BLOCKS
			
 
				 \var uint32_t starpu_bcsr_interface::nrow
			
 
				-number of rows (in terms of BLOCKS)
			
 
				+    number of rows (in terms of BLOCKS)
			
 
				 \var uintptr_t starpu_bcsr_interface::nzval
			
 
				-non-zero values
			
 
				+    non-zero values
			
 
				 \var uint32_t *starpu_bcsr_interface::colind
			
 
				-position of non-zero entried on the row
			
 
				+    position of non-zero entried on the row
			
 
				 \var uint32_t *starpu_bcsr_interface::rowptr
			
 
				-index (in nzval) of the first entry of the row
			
 
				+    index (in nzval) of the first entry of the row
			
 
				 \var starpu_bcsr_interface::firstentry
			
 
				-k for k-based indexing (0 or 1 usually). Also useful when partitionning the matrix.
			
 
				+    k for k-based indexing (0 or 1 usually). Also useful when partitionning the matrix.
			
 
				 \var uint32_t starpu_bcsr_interface::r
			
 
				-size of the blocks
			
 
				+    size of the blocks
			
 
				 \var uint32_t starpu_bcsr_interface::c
			
 
				-size of the blocks
			
 
				+    size of the blocks
			
 
				 \var size_t starpu_bcsr_interface::elemsize;
			
 
				-size of the elements of the matrix
			
 
				+    size of the elements of the matrix
			
 
				 
			
 
				 \fn uint32_t starpu_bcsr_get_nnz(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Interfaces
			
@@ -883,21 +898,21 @@ matrix designated by \p interface, to be used with the device handles.
 
				 CSR interface for sparse matrices (compressed sparse row representation)
			
 
				 \ingroup API_Data_Interfaces
			
 
				 \var enum starpu_data_interface_id starpu_csr_interface::id
			
 
				-Identifier of the interface
			
 
				+    Identifier of the interface
			
 
				 \var uint32_t starpu_csr_interface::nnz
			
 
				-number of non-zero entries
			
 
				+    number of non-zero entries
			
 
				 \var uint32_t starpu_csr_interface::nrow
			
 
				-number of rows
			
 
				+    number of rows
			
 
				 \var uintptr_t starpu_csr_interface::nzval
			
 
				-non-zero values
			
 
				+    non-zero values
			
 
				 \var uint32_t *starpu_csr_interface::colind
			
 
				-position of non-zero entries on the row
			
 
				+    position of non-zero entries on the row
			
 
				 \var uint32_t *starpu_csr_interface::rowptr
			
 
				-index (in nzval) of the first entry of the row
			
 
				+    index (in nzval) of the first entry of the row
			
 
				 \var uint32_t starpu_csr_interface::firstentry
			
 
				-k for k-based indexing (0 or 1 usually). also useful when partitionning the matrix.
			
 
				+    k for k-based indexing (0 or 1 usually). also useful when partitionning the matrix.
			
 
				 \var size_t starpu_csr_interface::elemsize
			
 
				-size of the elements of the matrix
			
 
				+    size of the elements of the matrix
			
 
				 
			
 
				 \fn uint32_t starpu_csr_get_nnz(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Interfaces
			
@@ -999,21 +1014,21 @@ designated by \p interface.
 
				 COO Matrices
			
 
				 \ingroup API_Data_Interfaces
			
 
				 \var enum starpu_data_interface_id starpu_coo_interface::id
			
 
				-identifier of the interface
			
 
				+    identifier of the interface
			
 
				 \var uint32_t  *starpu_coo_interface::columns
			
 
				-column array of the matrix
			
 
				+    column array of the matrix
			
 
				 \var uint32_t  *starpu_coo_interface::rows
			
 
				-row array of the matrix
			
 
				+    row array of the matrix
			
 
				 \var uintptr_t starpu_coo_interface::values
			
 
				-values of the matrix
			
 
				+    values of the matrix
			
 
				 \var uint32_t  starpu_coo_interface::nx
			
 
				-number of elements on the x-axis of the matrix
			
 
				+    number of elements on the x-axis of the matrix
			
 
				 \var uint32_t  starpu_coo_interface::ny
			
 
				-number of elements on the y-axis of the matrix
			
 
				+    number of elements on the y-axis of the matrix
			
 
				 \var uint32_t  starpu_coo_interface::n_values
			
 
				-number of values registered in the matrix
			
 
				+    number of values registered in the matrix
			
 
				 \var size_t starpu_coo_interface::elemsize
			
 
				-size of the elements of the matrix
			
 
				+    size of the elements of the matrix
			
 
				 
			
 
				 \def STARPU_COO_GET_COLUMNS(interface)
			
 
				 \ingroup API_Data_Interfaces
			
@@ -1023,7 +1038,7 @@ by \p interface.
 
				 \def STARPU_COO_GET_COLUMNS_DEV_HANDLE(interface)
			
 
				 \ingroup API_Data_Interfaces
			
 
				 Return a device handle for the column array of the matrix
			
 
				-designated by \p interface, to be used on OpenCL. The offset documented
			
 
				+designated by \p interface, to be used with OpenCL. The offset documented
			
 
				 below has to be used in addition to this.
			
 
				 
			
 
				 \def STARPU_COO_GET_ROWS(interface)
			
@@ -1103,7 +1118,7 @@ with starpu_malloc_on_node().
 
				 
			
 
				 \fn void starpu_malloc_on_node_set_default_flags(unsigned node, int flags)
			
 
				 \ingroup API_Data_Interfaces
			
 
				-Define the defaultflags for allocations performed by starpu_malloc_on_node() and
			
 
				+Define the default flags for allocations performed by starpu_malloc_on_node() and
			
 
				 starpu_free_on_node(). The default is \ref STARPU_MALLOC_PINNED | \ref STARPU_MALLOC_COUNT.
			
 
				 
			
 
				 \fn int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, void *async_data)
			
@@ -1141,4 +1156,3 @@ Return the next available id for a newly created data interface
 
				 (\ref DefiningANewDataInterface).
			
 
				 
			
 
				 */
			
 
				-
			
--- a/doc/doxygen/chapters/api/data_management.doxy
+++ b/doc/doxygen/chapters/api/data_management.doxy
@@ -30,55 +30,48 @@ concurrent data accesses, see \ref ConcurrentDataAccess for the details.
 
				 \ingroup API_Data_Management
			
 
				 This datatype describes a data access mode.
			
 
				 \var starpu_data_access_mode::STARPU_NONE
			
 
				-\ingroup API_Data_Management
			
 
				-TODO
			
 
				+    TODO
			
 
				 \var starpu_data_access_mode::STARPU_R
			
 
				-\ingroup API_Data_Management
			
 
				-read-only mode.
			
 
				+    read-only mode.
			
 
				 \var starpu_data_access_mode::STARPU_W
			
 
				-\ingroup API_Data_Management
			
 
				-write-only mode.
			
 
				+    write-only mode.
			
 
				 \var starpu_data_access_mode::STARPU_RW
			
 
				-\ingroup API_Data_Management
			
 
				-read-write mode. This is equivalent to ::STARPU_R|::STARPU_W
			
 
				+    read-write mode. This is equivalent to ::STARPU_R|::STARPU_W
			
 
				 \var starpu_data_access_mode::STARPU_SCRATCH
			
 
				-\ingroup API_Data_Management
			
 
				-A temporary buffer is allocated for the task, but StarPU does not
			
 
				-enforce data consistency---i.e. each device has its own buffer,
			
 
				-independently from each other (even for CPUs), and no data transfer is
			
 
				-ever performed. This is useful for temporary variables to avoid
			
 
				-allocating/freeing buffers inside each task. Currently, no behavior is
			
 
				-defined concerning the relation with the ::STARPU_R and ::STARPU_W modes
			
 
				-and the value provided at registration --- i.e., the value of the
			
 
				-scratch buffer is undefined at entry of the codelet function.  It is
			
 
				-being considered for future extensions at least to define the initial
			
 
				-value.  For now, data to be used in ::STARPU_SCRATCH mode should be
			
 
				-registered with node <c>-1</c> and a <c>NULL</c> pointer, since the
			
 
				-value of the provided buffer is simply ignored for now.
			
 
				+    A temporary buffer is allocated for the task, but StarPU does not
			
 
				+    enforce data consistency---i.e. each device has its own buffer,
			
 
				+    independently from each other (even for CPUs), and no data
			
 
				+    transfer is ever performed. This is useful for temporary variables
			
 
				+    to avoid allocating/freeing buffers inside each task. Currently,
			
 
				+    no behavior is defined concerning the relation with the ::STARPU_R
			
 
				+    and ::STARPU_W modes and the value provided at registration ---
			
 
				+    i.e., the value of the scratch buffer is undefined at entry of the
			
 
				+    codelet function.  It is being considered for future extensions at
			
 
				+    least to define the initial value.  For now, data to be used in
			
 
				+    ::STARPU_SCRATCH mode should be registered with node -1 and
			
 
				+    a <c>NULL</c> pointer, since the value of the provided buffer is
			
 
				+    simply ignored for now.
			
 
				 \var starpu_data_access_mode::STARPU_REDUX
			
 
				-\ingroup API_Data_Management
			
 
				-todo
			
 
				+    todo
			
 
				 \var starpu_data_access_mode::STARPU_COMMUTE
			
 
				-\ingroup API_Data_Management
			
 
				-In addition to that, ::STARPU_COMMUTE can be passed along ::STARPU_W
			
 
				-or ::STARPU_RW to express that StarPU can let tasks commute, which is
			
 
				-useful e.g. when bringing a contribution into some data, which can be
			
 
				-done in any order (but still require sequential consistency against
			
 
				-reads or non-commutative writes).
			
 
				+    ::STARPU_COMMUTE can be passed along
			
 
				+    ::STARPU_W or ::STARPU_RW to express that StarPU can let tasks
			
 
				+    commute, which is useful e.g. when bringing a contribution into
			
 
				+    some data, which can be done in any order (but still require
			
 
				+    sequential consistency against reads or non-commutative writes).
			
 
				 \var starpu_data_access_mode::STARPU_SSEND
			
 
				-\ingroup API_Data_Management
			
 
				-used in starpu_mpi_insert_task() to specify the data has to be sent
			
 
				-using a synchronous and non-blocking mode (see starpu_mpi_issend())
			
 
				+    used in starpu_mpi_insert_task() to specify the data has to be
			
 
				+    sent using a synchronous and non-blocking mode (see
			
 
				+    starpu_mpi_issend())
			
 
				 \var starpu_data_access_mode::STARPU_LOCALITY
			
 
				-\ingroup API_Data_Management
			
 
				-used to tell the scheduler which data is the most important for the task, and
			
 
				-should thus be used to try to group tasks on the same core or cache, etc. For
			
 
				-now only the ws and lws schedulers take this flag into account, and only when
			
 
				-rebuild with USE_LOCALITY flag defined in the
			
 
				-src/sched_policies/work_stealing_policy.c source code.
			
 
				+    used to tell the scheduler which data is the most important for
			
 
				+    the task, and should thus be used to try to group tasks on the
			
 
				+    same core or cache, etc. For now only the ws and lws schedulers
			
 
				+    take this flag into account, and only when rebuild with
			
 
				+    USE_LOCALITY flag defined in the
			
 
				+    src/sched_policies/work_stealing_policy.c source code.
			
 
				 \var starpu_data_access_mode::STARPU_ACCESS_MODE_MAX
			
 
				-\ingroup API_Data_Management
			
 
				-todo
			
 
				+    todo
			
 
				 
			
 
				 @name Basic Data Management API
			
 
				 \ingroup API_Data_Management
			
@@ -141,7 +134,7 @@ same interface as the handle \p handlesrc.
 
				 
			
 
				 \fn void starpu_data_unregister(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Management
			
 
				-This function unregisters a data handle from StarPU. If the
			
 
				+Unregister a data \p handle from StarPU. If the
			
 
				 data was automatically allocated by StarPU because the home node was
			
 
				 -1, all automatically allocated buffers are freed. Otherwise, a valid
			
 
				 copy of the data is put back into the home node in the buffer that was
			
@@ -158,61 +151,61 @@ buffer that was initially registered.
 
				 
			
 
				 \fn void starpu_data_unregister_submit(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Management
			
 
				-Destroy the data handle once it is not needed anymore by any
			
 
				+Destroy the data \p handle once it is not needed anymore by any
			
 
				 submitted task. No coherency is assumed.
			
 
				 
			
 
				 \fn void starpu_data_invalidate(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Management
			
 
				-Destroy all replicates of the data handle immediately. After
			
 
				-data invalidation, the first access to the handle must be performed in
			
 
				-write-only mode. Accessing an invalidated data in read-mode results in
			
 
				-undefined behaviour.
			
 
				+Destroy all replicates of the data \p handle immediately. After
			
 
				+data invalidation, the first access to \p handle must be performed in
			
 
				+::STARPU_W mode. Accessing an invalidated data in ::STARPU_R mode
			
 
				+results in undefined behaviour.
			
 
				 
			
 
				 \fn void starpu_data_invalidate_submit(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Management
			
 
				-Submits invalidation of the data handle after completion of
			
 
				+Submit invalidation of the data \p handle after completion of
			
 
				 previously submitted tasks.
			
 
				 
			
 
				 \fn void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask)
			
 
				 \ingroup API_Data_Management
			
 
				-This function sets the write-through mask of a given data (and
			
 
				+Set the write-through mask of the data \p handle (and
			
 
				 its children), i.e. a bitmask of nodes where the data should be always
			
 
				 replicated after modification. It also prevents the data from being
			
 
				 evicted from these nodes when memory gets scarse. When the data is
			
 
				-modified, it is automatically transfered into those memory node. For
			
 
				+modified, it is automatically transfered into those memory nodes. For
			
 
				 instance a <c>1<<0</c> write-through mask means that the CUDA workers
			
 
				 will commit their changes in main memory (node 0).
			
 
				 
			
 
				 \fn int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async)
			
 
				 \ingroup API_Data_Management
			
 
				-Issue a fetch request for a given data to a given node, i.e.
			
 
				+Issue a fetch request for the data \p handle to \p node, i.e.
			
 
				 requests that the data be replicated to the given node as soon as possible, so that it is
			
 
				-available there for tasks. If the \p async parameter is 0, the call will
			
 
				+available there for tasks. If \p async is 0, the call will
			
 
				 block until the transfer is achieved, else the call will return immediately,
			
 
				 after having just queued the request. In the latter case, the request will
			
 
				 asynchronously wait for the completion of any task writing on the data.
			
 
				 
			
 
				 \fn int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async)
			
 
				 \ingroup API_Data_Management
			
 
				-Issue a prefetch request for a given data to a given node, i.e.
			
 
				-requests that the data be replicated to the given node when there is room for it, so that it is
			
 
				-available there for tasks. If the \p async parameter is 0, the call will
			
 
				+Issue a prefetch request for the data \p handle to \p node, i.e.
			
 
				+requests that the data be replicated to \p node when there is room for it, so that it is
			
 
				+available there for tasks. If \p async is 0, the call will
			
 
				 block until the transfer is achieved, else the call will return immediately,
			
 
				 after having just queued the request. In the latter case, the request will
			
 
				 asynchronously wait for the completion of any task writing on the data.
			
 
				 
			
 
				 \fn int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async)
			
 
				 \ingroup API_Data_Management
			
 
				-Issue an idle prefetch request for a given data to a given node, i.e.
			
 
				-requests that the data be replicated to the given node, so that it is
			
 
				-available there for tasks, but only when the bus is really idle. If the \p async parameter is 0, the call will
			
 
				+Issue an idle prefetch request for the data \p handle to \p node, i.e.
			
 
				+requests that the data be replicated to \p node, so that it is
			
 
				+available there for tasks, but only when the bus is really idle. If \p async is 0, the call will
			
 
				 block until the transfer is achieved, else the call will return immediately,
			
 
				 after having just queued the request. In the latter case, the request will
			
 
				 asynchronously wait for the completion of any task writing on the data.
			
 
				 
			
 
				 \fn void starpu_data_wont_use(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Management
			
 
				-Advise StarPU that this handle will not be used in the close future, and is
			
 
				+Advise StarPU that \p handle will not be used in the close future, and is
			
 
				 thus a good candidate for eviction from GPUs. StarPU will thus write its value
			
 
				 back to its home node when the bus is idle, and select this data in priority
			
 
				 for eviction when memory gets low.
			
@@ -224,7 +217,7 @@ Return the handle corresponding to the data pointed to by the \p ptr host pointe
 
				 \fn int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node)
			
 
				 \ingroup API_Data_Management
			
 
				 Explicitly ask StarPU to allocate room for a piece of data on
			
 
				-the specified memory node.
			
 
				+the specified memory \p node.
			
 
				 
			
 
				 \fn void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested)
			
 
				 \ingroup API_Data_Management
			
@@ -232,13 +225,12 @@ Query the status of \p handle on the specified \p memory_node.
 
				 
			
 
				 \fn void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important)
			
 
				 \ingroup API_Data_Management
			
 
				-This function allows to specify that a piece of data can be
			
 
				-discarded without impacting the application.
			
 
				+Specify that the data \p handle can be discarded without impacting the application.
			
 
				 
			
 
				 \fn void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl)
			
 
				 \ingroup API_Data_Management
			
 
				-This sets the codelets to be used for \p handle when it is
			
 
				-accessed in the mode ::STARPU_REDUX. Per-worker buffers will be initialized with
			
 
				+Set the codelets to be used for \p handle when it is accessed in the
			
 
				+mode ::STARPU_REDUX. Per-worker buffers will be initialized with
			
 
				 the codelet \p init_cl, and reduction between per-worker buffers will be
			
 
				 done with the codelet \p redux_cl.
			
 
				 
			
@@ -248,14 +240,14 @@ todo
 
				 
			
 
				 \fn void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data)
			
 
				 \ingroup API_Data_Management
			
 
				-This sets the "user_data" field for the \p handle to \p user_data . It can
			
 
				-then be retrieved with starpu_data_get_user_data. \p user_data can be any
			
 
				+Sset the field \c user_data for the \p handle to \p user_data . It can
			
 
				+then be retrieved with starpu_data_get_user_data(). \p user_data can be any
			
 
				 application-defined value, for instance a pointer to an object-oriented
			
 
				 container for the data.
			
 
				 
			
 
				 \fn void *starpu_data_get_user_data(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Management
			
 
				-This retrieves the "user_data" field previously set for the \p handle .
			
 
				+This retrieves the field \c user_data previously set for the \p handle.
			
 
				 
			
 
				 @name Access registered data from the application
			
 
				 \ingroup API_Data_Management
			
@@ -264,10 +256,10 @@ This retrieves the "user_data" field previously set for the \p handle .
 
				 \ingroup API_Data_Management
			
 
				 The application must call this function prior to accessing
			
 
				 registered data from main memory outside tasks. StarPU ensures that
			
 
				-the application will get an up-to-date copy of the data in main memory
			
 
				+the application will get an up-to-date copy of \p handle in main memory
			
 
				 located where the data was originally registered, and that all
			
 
				 concurrent accesses (e.g. from tasks) will be consistent with the
			
 
				-access mode specified in the mode argument. starpu_data_release() must
			
 
				+access mode specified with \p mode. starpu_data_release() must
			
 
				 be called once the application does not need to access the piece of
			
 
				 data anymore. Note that implicit data dependencies are also enforced
			
 
				 by starpu_data_acquire(), i.e. starpu_data_acquire() will wait for all
			
@@ -281,9 +273,9 @@ successful completion, this function returns 0.
 
				 \fn int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg)
			
 
				 \ingroup API_Data_Management
			
 
				 Asynchronous equivalent of starpu_data_acquire(). When the data
			
 
				-specified in \p handle is available in the appropriate access
			
 
				-mode, the \p callback function is executed. The application may access
			
 
				-the requested data during the execution of this \p callback. The \p callback
			
 
				+specified in \p handle is available in the access \p mode, the \p
			
 
				+callback function is executed. The application may access
			
 
				+the requested data during the execution of \p callback. The \p callback
			
 
				 function must call starpu_data_release() once the application does not
			
 
				 need to access the piece of data anymore. Note that implicit data
			
 
				 dependencies are also enforced by starpu_data_acquire_cb() in case they
			
@@ -294,8 +286,8 @@ completion, this function returns 0.
 
				 \fn int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				 \ingroup API_Data_Management
			
 
				 Equivalent of starpu_data_acquire_cb() with the possibility of enabling or disabling data dependencies.
			
 
				-When the data specified in \p handle is available in the appropriate access
			
 
				-mode, the \p callback function is executed. The application may access
			
 
				+When the data specified in \p handle is available in the access
			
 
				+\p mode, the \p callback function is executed. The application may access
			
 
				 the requested data during the execution of this \p callback. The \p callback
			
 
				 function must call starpu_data_release() once the application does not
			
 
				 need to access the piece of data anymore. Note that implicit data
			
@@ -320,16 +312,16 @@ This is mostly useful inside starpu only.
 
				 This is the same as starpu_data_acquire(), except that the data
			
 
				 will be available on the given memory node instead of main
			
 
				 memory.
			
 
				-::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
			
 
				-explicit node number.
			
 
				+::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be
			
 
				+used instead of an explicit node number.
			
 
				 
			
 
				 \fn int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg)
			
 
				 \ingroup API_Data_Management
			
 
				 This is the same as starpu_data_acquire_cb(), except that the
			
 
				 data will be available on the given memory node instead of main
			
 
				 memory.
			
 
				-::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
			
 
				-explicit node number.
			
 
				+::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be
			
 
				+used instead of an explicit node number.
			
 
				 
			
 
				 \fn int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				 \ingroup API_Data_Management
			
@@ -350,7 +342,7 @@ called from task callbacks.
 
				 
			
 
				 \fn void starpu_data_release(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Management
			
 
				-This function releases the piece of data acquired by the
			
 
				+Release the piece of data acquired by the
			
 
				 application either by starpu_data_acquire() or by
			
 
				 starpu_data_acquire_cb().
			
 
				 
			
@@ -358,19 +350,20 @@ starpu_data_acquire_cb().
 
				 \ingroup API_Data_Management
			
 
				 This is the same as starpu_data_release(), except that the data
			
 
				 will be available on the given memory \p node instead of main memory.
			
 
				-The \p node parameter must be exactly the same as the corresponding starpu_data_acquire_on_node* call.
			
 
				+The \p node parameter must be exactly the same as the corresponding \c
			
 
				+starpu_data_acquire_on_node* call.
			
 
				 
			
 
				 \fn starpu_arbiter_t starpu_arbiter_create(void)
			
 
				 \ingroup API_Data_Management
			
 
				-This creates a data access arbiter, see \ref ConcurrentDataAccess for the details
			
 
				+Create a data access arbiter, see \ref ConcurrentDataAccess for the details
			
 
				 
			
 
				 \fn void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter)
			
 
				 \ingroup API_Data_Management
			
 
				-This makes accesses to \p handle managed by \p arbiter
			
 
				+Make access to \p handle managed by \p arbiter
			
 
				 
			
 
				 \fn void starpu_arbiter_destroy(starpu_arbiter_t arbiter)
			
 
				 \ingroup API_Data_Management
			
 
				-This destroys the \p arbiter . This must only be called after all data assigned
			
 
				-to it have been unregistered.
			
 
				+Destroy the \p arbiter . This must only be called after all data
			
 
				+assigned to it have been unregistered.
			
 
				 
			
 
				 */
			
--- a/doc/doxygen/chapters/api/data_partition.doxy
+++ b/doc/doxygen/chapters/api/data_partition.doxy
@@ -13,31 +13,32 @@ The filter structure describes a data partitioning operation, to be
 
				 given to the starpu_data_partition() function.
			
 
				 \ingroup API_Data_Partition
			
 
				 \var void (*starpu_data_filter::filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts)
			
 
				-This function fills the \p child_interface structure with interface
			
 
				-information for the \p id -th child of the parent \p father_interface (among
			
 
				-\p nparts).
			
 
				+    Fill the \p child_interface structure with interface information
			
 
				+    for the \p id -th child of the parent \p father_interface (among
			
 
				+    \p nparts).
			
 
				 \var unsigned starpu_data_filter::nchildren
			
 
				-This is the number of parts to partition the data into.
			
 
				+    Number of parts to partition the data into.
			
 
				 \var unsigned (*starpu_data_filter::get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle)
			
 
				-This returns the number of children. This can be used instead of
			
 
				-starpu_data_filter::nchildren when the number of children depends on the actual data (e.g.
			
 
				-the number of blocks in a sparse matrix).
			
 
				+    Return the number of children. This can be used instead of
			
 
				+    starpu_data_filter::nchildren when the number of children depends
			
 
				+    on the actual data (e.g. the number of blocks in a sparse matrix).
			
 
				 \var struct starpu_data_interface_ops *(*starpu_data_filter::get_child_ops)(struct starpu_data_filter *, unsigned id)
			
 
				-In case the resulting children use a different data interface, this
			
 
				-function returns which interface is used by child number \p id.
			
 
				+    In case the resulting children use a different data interface,
			
 
				+    this function returns which interface is used by child number \p
			
 
				+    id.
			
 
				 \var unsigned starpu_data_filter::filter_arg
			
 
				-Allow to define an additional parameter for the filter function.
			
 
				+    Allow to define an additional parameter for the filter function.
			
 
				 \var void *starpu_data_filter::filter_arg_ptr
			
 
				-Allow to define an additional pointer parameter for the filter
			
 
				-function, such as the sizes of the different parts.
			
 
				+    Allow to define an additional pointer parameter for the filter
			
 
				+    function, such as the sizes of the different parts.
			
 
				 
			
 
				 @name Basic API
			
 
				 \ingroup API_Data_Partition
			
 
				 
			
 
				 \fn void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f)
			
 
				 \ingroup API_Data_Partition
			
 
				-This requests partitioning one StarPU data \p initial_handle into
			
 
				-several subdata according to the filter \p f.
			
 
				+Request the partitioning of \p initial_handle into several subdata
			
 
				+according to the filter \p f.
			
 
				 
			
 
				 Here an example of how to use the function.
			
 
				 \code{.c}
			
@@ -50,10 +51,11 @@ starpu_data_partition(A_handle, &f);
 
				 
			
 
				 \fn void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node)
			
 
				 \ingroup API_Data_Partition
			
 
				-This unapplies one filter, thus unpartitioning the data. The
			
 
				-pieces of data are collected back into one big piece in the
			
 
				-\p gathering_node (usually ::STARPU_MAIN_RAM). Tasks working on the partitioned data must
			
 
				-be already finished when calling starpu_data_unpartition().
			
 
				+Unapply the filter which has been applied to \p root_data, thus
			
 
				+unpartitioning the data. The pieces of data are collected back into
			
 
				+one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM).
			
 
				+Tasks working on the partitioned data must be already finished when
			
 
				+calling starpu_data_unpartition().
			
 
				 
			
 
				 Here an example of how to use the function.
			
 
				 \code{.c}
			
@@ -62,7 +64,7 @@ starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
 
				 
			
 
				 \fn int starpu_data_get_nb_children(starpu_data_handle_t handle)
			
 
				 \ingroup API_Data_Partition
			
 
				-This function returns the number of children.
			
 
				+Return the number of children \p handle has been partitioned into.
			
 
				 
			
 
				 \fn starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i)
			
 
				 \ingroup API_Data_Partition
			
@@ -91,13 +93,13 @@ va_list for the parameter list.
 
				 
			
 
				 \fn void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...)
			
 
				 \ingroup API_Data_Partition
			
 
				-Applies \p nfilters filters to the handle designated by
			
 
				+Apply \p nfilters filters to the handle designated by
			
 
				 \p root_handle recursively. \p nfilters pointers to variables of the type
			
 
				 starpu_data_filter should be given.
			
 
				 
			
 
				 \fn void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa)
			
 
				 \ingroup API_Data_Partition
			
 
				-Applies \p nfilters filters to the handle designated by
			
 
				+Apply \p nfilters filters to the handle designated by
			
 
				 \p root_handle recursively. It uses a va_list of pointers to variables of
			
 
				 the type starpu_data_filter.
			
 
				 
			
@@ -106,11 +108,12 @@ the type starpu_data_filter.
 
				 
			
 
				 \fn void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children)
			
 
				 \ingroup API_Data_Partition
			
 
				-This plans for partitioning one StarPU data handle \p initial_handle into
			
 
				-several subdata according to the filter \p f. The handles are returned into
			
 
				-the \p children array, which has to be the same size as the number of parts
			
 
				-described in \p f. These handles are not immediately usable,
			
 
				-starpu_data_partition_submit() has to be called to submit the actual partitioning.
			
 
				+Plan to partition \p initial_handle into several subdata according to
			
 
				+the filter \p f.
			
 
				+The handles are returned into the \p children array, which has to be
			
 
				+the same size as the number of parts described in \p f. These handles
			
 
				+are not immediately usable, starpu_data_partition_submit() has to be
			
 
				+called to submit the actual partitioning.
			
 
				 
			
 
				 Here is an example of how to use the function:
			
 
				 
			
@@ -125,8 +128,7 @@ starpu_data_partition_plan(A_handle, &f, children);
 
				 
			
 
				 \fn void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
			
 
				 \ingroup API_Data_Partition
			
 
				-
			
 
				-This submits the actual partitioning of \p initial_handle into the \p nparts
			
 
				+Submit the actual partitioning of \p initial_handle into the \p nparts
			
 
				 \p children handles. This call is asynchronous, it only submits that the
			
 
				 partitioning should be done, so that the \p children handles can now be used to
			
 
				 submit tasks, and \p initial_handle can not be used to submit tasks any more (to
			
@@ -140,7 +142,6 @@ starpu_data_partition_submit(A_handle, nslicesx, children);
 
				 
			
 
				 \fn void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
			
 
				 \ingroup API_Data_Partition
			
 
				-
			
 
				 This is the same as starpu_data_partition_submit(), but it does not invalidate \p
			
 
				 initial_handle. This allows to continue using it, but the application has to be
			
 
				 careful not to write to \p initial_handle or \p children handles, only read from
			
@@ -158,7 +159,6 @@ submitted.
 
				 
			
 
				 \fn void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
			
 
				 \ingroup API_Data_Partition
			
 
				-
			
 
				 This assumes that a partitioning of \p initial_handle has already been submited
			
 
				 in readonly mode through starpu_data_partition_readonly_submit(), and will upgrade
			
 
				 that partitioning into read-write mode for the \p children, by invalidating \p
			
@@ -166,7 +166,6 @@ initial_handle, and adding the necessary dependencies.
 
				 
			
 
				 \fn void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node)
			
 
				 \ingroup API_Data_Partition
			
 
				-
			
 
				 This assumes that \p initial_handle is partitioned into \p children, and submits
			
 
				 an unpartitionning of it, i.e. submitting a gathering of the pieces on the
			
 
				 requested \p gathering_node memory node, and submitting an invalidation of the
			
@@ -177,7 +176,6 @@ should be used to gather the pieces.
 
				 
			
 
				 \fn void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node)
			
 
				 \ingroup API_Data_Partition
			
 
				-
			
 
				 This assumes that \p initial_handle is partitioned into \p children, and submits
			
 
				 just a readonly unpartitionning of it, i.e. submitting a gathering of the pieces
			
 
				 on the requested \p gathering_node memory node. It does not invalidate the
			
@@ -189,7 +187,6 @@ should be used to gather the pieces.
 
				 
			
 
				 \fn void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children)
			
 
				 \ingroup API_Data_Partition
			
 
				-
			
 
				 This should be used to clear the partition planning established between \p
			
 
				 root_data and \p children with starpu_data_partition_plan(). This will notably
			
 
				 submit an unregister all the \p children, which can thus not be used any more
			
@@ -246,13 +243,13 @@ functions for matrix data. Examples on how to use them are shown in
 
				 
			
 
				 \fn void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 \ingroup API_Data_Partition
			
 
				-This partitions a dense Matrix along the x dimension, thus
			
 
				+Partition a dense Matrix along the x dimension, thus
			
 
				 getting (x/\p nparts ,y) matrices. If \p nparts does not divide x, the
			
 
				 last submatrix contains the remainder.
			
 
				 
			
 
				 \fn void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 \ingroup API_Data_Partition
			
 
				-This partitions a dense Matrix along the x dimension, with a
			
 
				+Partition a dense Matrix along the x dimension, with a
			
 
				 shadow border <c>filter_arg_ptr</c>, thus getting ((x-2*shadow)/\p
			
 
				 nparts +2*shadow,y) matrices. If \p nparts does not divide x-2*shadow,
			
 
				 the last submatrix contains the remainder.
			
@@ -264,13 +261,13 @@ examples/filters/shadow2d.c
 
				 
			
 
				 \fn void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 \ingroup API_Data_Partition
			
 
				-This partitions a dense Matrix along the y dimension, thus
			
 
				+Partition a dense Matrix along the y dimension, thus
			
 
				 getting (x,y/\p nparts) matrices. If \p nparts does not divide y, the
			
 
				 last submatrix contains the remainder.
			
 
				 
			
 
				 \fn void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 \ingroup API_Data_Partition
			
 
				-This partitions a dense Matrix along the y dimension, with a
			
 
				+Partition a dense Matrix along the y dimension, with a
			
 
				 shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				 (x,(y-2*shadow)/\p nparts +2*shadow) matrices. If \p nparts does not
			
 
				 divide y-2*shadow, the last submatrix contains the remainder.
			
@@ -290,13 +287,13 @@ examples/filters/shadow3d.c
 
				 
			
 
				 \fn void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 \ingroup API_Data_Partition
			
 
				-This partitions a block along the X dimension, thus getting
			
 
				+Partition a block along the X dimension, thus getting
			
 
				 (x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last
			
 
				 submatrix contains the remainder.
			
 
				 
			
 
				 \fn void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 \ingroup API_Data_Partition
			
 
				-This partitions a block along the X dimension, with a
			
 
				+Partition a block along the X dimension, with a
			
 
				 shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				 ((x-2*shadow)/\p nparts +2*shadow,y,z) blocks. If \p nparts does not
			
 
				 divide x, the last submatrix contains the remainder.
			
@@ -307,13 +304,13 @@ enforced for the shadowed parts.
 
				 
			
 
				 \fn void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 \ingroup API_Data_Partition
			
 
				-This partitions a block along the Y dimension, thus getting
			
 
				+Partition a block along the Y dimension, thus getting
			
 
				 (x,y/\p nparts ,z) blocks. If \p nparts does not divide y, the last
			
 
				 submatrix contains the remainder.
			
 
				 
			
 
				 \fn void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 \ingroup API_Data_Partition
			
 
				-This partitions a block along the Y dimension, with a
			
 
				+Partition a block along the Y dimension, with a
			
 
				 shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				 (x,(y-2*shadow)/\p nparts +2*shadow,z) 3D matrices. If \p nparts does not
			
 
				 divide y, the last submatrix contains the remainder.
			
@@ -324,13 +321,13 @@ enforced for the shadowed parts.
 
				 
			
 
				 \fn void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 \ingroup API_Data_Partition
			
 
				-This partitions a block along the Z dimension, thus getting
			
 
				+Partition a block along the Z dimension, thus getting
			
 
				 (x,y,z/\p nparts) blocks. If \p nparts does not divide z, the last
			
 
				 submatrix contains the remainder.
			
 
				 
			
 
				 \fn void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 \ingroup API_Data_Partition
			
 
				-This partitions a block along the Z dimension, with a
			
 
				+Partition a block along the Z dimension, with a
			
 
				 shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				 (x,y,(z-2*shadow)/\p nparts +2*shadow) blocks. If \p nparts does not
			
 
				 divide z, the last submatrix contains the remainder.
			
@@ -349,11 +346,10 @@ functions for BCSR data. Examples on how to use them are shown in
 
				 
			
 
				 \fn void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 \ingroup API_Data_Partition
			
 
				-This partitions a block-sparse matrix into dense matrices.
			
 
				+Partition a block-sparse matrix into dense matrices.
			
 
				 
			
 
				 \fn void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 \ingroup API_Data_Partition
			
 
				-This partitions a block-sparse matrix into vertical
			
 
				-block-sparse matrices.
			
 
				+Partition a block-sparse matrix into vertical block-sparse matrices.
			
 
				 
			
 
				 */
			
--- a/doc/doxygen/chapters/api/explicit_dependencies.doxy
+++ b/doc/doxygen/chapters/api/explicit_dependencies.doxy
@@ -24,7 +24,7 @@ redundancy in the task dependencies.
 
				 
			
 
				 \fn int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
			
 
				 \ingroup API_Explicit_Dependencies
			
 
				-Fills \p task_array with the list of tasks which are direct children of \p task.
			
 
				+Fill \p task_array with the list of tasks which are direct children of \p task.
			
 
				 \p ndeps is the size of \p task_array.  This function returns the number of
			
 
				 direct children. \p task_array can be set to <c>NULL</c> if \p ndeps is 0, which allows
			
 
				 to compute the number of children before allocating an array to store them.
			
@@ -34,13 +34,13 @@ dependency has been added in the meanwhile.
 
				 
			
 
				 \fn int starpu_task_get_task_scheduled_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
			
 
				 \ingroup API_Explicit_Dependencies
			
 
				-This behaves like starpu_task_get_task_succs(), except that it only reports
			
 
				+Behave like starpu_task_get_task_succs(), except that it only reports
			
 
				 tasks which will go through the scheduler, thus avoiding tasks with not codelet,
			
 
				 or with explicit placement.
			
 
				 
			
 
				 \typedef starpu_tag_t
			
 
				 \ingroup API_Explicit_Dependencies
			
 
				-This type defines a task logical identifer. It is possible to
			
 
				+Define a task logical identifer. It is possible to
			
 
				 associate a task with a unique <em>tag</em> chosen by the application,
			
 
				 and to express dependencies between tasks by the means of those tags.
			
 
				 To do so, fill the field starpu_task::tag_id with a tag number (can be
			
@@ -74,7 +74,7 @@ starpu_tag_declare_deps((starpu_tag_t)0x1, 2, (starpu_tag_t)0x32, (starpu_tag_t)
 
				 
			
 
				 \fn void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array)
			
 
				 \ingroup API_Explicit_Dependencies
			
 
				-This function is similar to starpu_tag_declare_deps(), except
			
 
				+Similar to starpu_tag_declare_deps(), except
			
 
				 that its does not take a variable number of arguments but an \p array of
			
 
				 tags of size \p ndeps.
			
 
				 
			
@@ -86,7 +86,7 @@ starpu_tag_declare_deps_array((starpu_tag_t)0x1, 2, tag_array);
 
				 
			
 
				 \fn int starpu_tag_wait(starpu_tag_t id)
			
 
				 \ingroup API_Explicit_Dependencies
			
 
				-This function blocks until the task associated to tag \p id has
			
 
				+Block until the task associated to tag \p id has
			
 
				 been executed. This is a blocking call which must therefore not be
			
 
				 called within tasks or callbacks, but only from the application
			
 
				 directly. It is possible to synchronize with the same tag multiple
			
@@ -97,27 +97,26 @@ starpu_task::destroy was enabled).
 
				 
			
 
				 \fn int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id)
			
 
				 \ingroup API_Explicit_Dependencies
			
 
				-This function is similar to starpu_tag_wait() except that it
			
 
				+Similar to starpu_tag_wait() except that it
			
 
				 blocks until all the \p ntags tags contained in the array \p id are
			
 
				 terminated.
			
 
				 
			
 
				 \fn void starpu_tag_restart(starpu_tag_t id)
			
 
				 \ingroup API_Explicit_Dependencies
			
 
				-This function can be used to clear the <em>already
			
 
				-notified</em> status of a tag which is not associated with a task.
			
 
				+Clear the <em>already notified</em> status of a tag which is not associated with a task.
			
 
				 Before that, calling starpu_tag_notify_from_apps() again will not
			
 
				 notify the successors. After that, the next call to
			
 
				 starpu_tag_notify_from_apps() will notify the successors.
			
 
				 
			
 
				 \fn void starpu_tag_remove(starpu_tag_t id)
			
 
				 \ingroup API_Explicit_Dependencies
			
 
				-This function releases the resources associated to tag \p id.
			
 
				+Release the resources associated to tag \p id.
			
 
				 It can be called once the corresponding task has been executed and
			
 
				 when there is no other tag that depend on this tag anymore.
			
 
				 
			
 
				 \fn void starpu_tag_notify_from_apps(starpu_tag_t id)
			
 
				 \ingroup API_Explicit_Dependencies
			
 
				-This function explicitly unlocks tag \p id. It may be useful in
			
 
				+Explicitly unlock tag \p id. It may be useful in
			
 
				 the case of applications which execute part of their computation
			
 
				 outside StarPU tasks (e.g. third-party libraries). It is also provided
			
 
				 as a convenient tool for the programmer, for instance to entirely
			
--- a/doc/doxygen/chapters/api/fft_support.doxy
+++ b/doc/doxygen/chapters/api/fft_support.doxy
@@ -18,7 +18,7 @@ todo
 
				 
			
 
				 \fn void * starpufft_malloc(size_t n)
			
 
				 \ingroup API_FFT_Support
			
 
				-Allocates memory for \p n bytes. This is preferred over \c malloc(),
			
 
				+Allocate memory for \p n bytes. This is preferred over \c malloc(),
			
 
				 since it allocates pinned memory, which allows overlapped transfers.
			
 
				 
			
 
				 \fn void * starpufft_free(void *p)
			
@@ -27,12 +27,12 @@ Release memory previously allocated.
 
				 
			
 
				 \fn struct starpufft_plan * starpufft_plan_dft_1d(int n, int sign, unsigned flags)
			
 
				 \ingroup API_FFT_Support
			
 
				-Initializes a plan for 1D FFT of size \p n. \p sign can be STARPUFFT_FORWARD
			
 
				+Initialize a plan for 1D FFT of size \p n. \p sign can be STARPUFFT_FORWARD
			
 
				 or STARPUFFT_INVERSE. \p flags must be 0.
			
 
				 
			
 
				 \fn struct starpufft_plan * starpufft_plan_dft_2d(int n, int m, int sign, unsigned flags)
			
 
				 \ingroup API_FFT_Support
			
 
				-Initializes a plan for 2D FFT of size (\p n, \p m). \p sign can be
			
 
				+Initialize a plan for 2D FFT of size (\p n, \p m). \p sign can be
			
 
				 STARPUFFT_FORWARD or STARPUFFT_INVERSE. flags must be \p 0.
			
 
				 
			
 
				 \fn struct starpu_task * starpufft_start(starpufft_plan p, void *in, void *out)
			
@@ -60,11 +60,11 @@ the expected types). This submits and waits for the task.
 
				 
			
 
				 \fn void starpufft_cleanup(starpufft_plan p)
			
 
				 \ingroup API_FFT_Support
			
 
				-Releases data for plan \p p, in the starpufft_start() case.
			
 
				+Release data for plan \p p, in the starpufft_start() case.
			
 
				 
			
 
				 \fn void starpufft_destroy_plan(starpufft_plan p)
			
 
				 \ingroup API_FFT_Support
			
 
				-Destroys plan \p p, i.e. release all CPU (fftw) and GPU (cufft)
			
 
				+Destroy plan \p p, i.e. release all CPU (fftw) and GPU (cufft)
			
 
				 resources.
			
 
				 
			
 
				 */
			
--- a/doc/doxygen/chapters/api/fxt_support.doxy
+++ b/doc/doxygen/chapters/api/fxt_support.doxy
@@ -12,47 +12,58 @@
 
				 todo
			
 
				 \ingroup API_FxT_Support
			
 
				 \var char starpu_fxt_codelet_event::symbol[256]
			
 
				-name of the codelet
			
 
				+    name of the codelet
			
 
				 \var int starpu_fxt_codelet_event::workerid
			
 
				+    todo
			
 
				 \var char starpu_fxt_codelet_event::perfmodel_archname[256]
			
 
				+    todo
			
 
				 \var uint32_t starpu_fxt_codelet_event::hash
			
 
				+    todo
			
 
				 \var size_t starpu_fxt_codelet_event::size
			
 
				+    todo
			
 
				 \var float starpu_fxt_codelet_event::time
			
 
				+    todo
			
 
				 
			
 
				 \struct starpu_fxt_options
			
 
				 todo
			
 
				 \ingroup API_FxT_Support
			
 
				 \var unsigned starpu_fxt_options::per_task_colour
			
 
				+    todo
			
 
				 \var unsigned starpu_fxt_options::no_counter
			
 
				+    todo
			
 
				 \var starpu_unsigned fxt_options::no_bus
			
 
				+    todo
			
 
				 \var unsigned starpu_fxt_options::ninputfiles
			
 
				+    todo
			
 
				 \var char *starpu_fxt_options::filenames[STARPU_FXT_MAX_FILES]
			
 
				+    todo
			
 
				 \var char *starpu_fxt_options::out_paje_path
			
 
				+    todo
			
 
				 \var char *starpu_fxt_options::distrib_time_path
			
 
				+    todo
			
 
				 \var char *starpu_fxt_options::activity_path
			
 
				+    todo
			
 
				 \var char *starpu_fxt_options::dag_path
			
 
				-
			
 
				+    todo
			
 
				 \var char *starpu_fxt_options::file_prefix
			
 
				-In case we are going to gather multiple traces (e.g in the case of MPI
			
 
				-processes), we may need to prefix the name of the containers.
			
 
				+    In case we are going to gather multiple traces (e.g in the case of
			
 
				+    MPI processes), we may need to prefix the name of the containers.
			
 
				 \var uint64_t starpu_fxt_options::file_offset
			
 
				-In case we are going to gather multiple traces (e.g in the case of MPI
			
 
				-processes), we may need to prefix the name of the containers.
			
 
				+    In case we are going to gather multiple traces (e.g in the case of
			
 
				+    MPI processes), we may need to prefix the name of the containers.
			
 
				 \var int starpu_fxt_options::file_rank
			
 
				-In case we are going to gather multiple traces (e.g in the case of MPI
			
 
				-processes), we may need to prefix the name of the containers.
			
 
				-
			
 
				+    In case we are going to gather multiple traces (e.g in the case of
			
 
				+    MPI processes), we may need to prefix the name of the containers.
			
 
				 \var char starpu_fxt_options::worker_names[STARPU_NMAXWORKERS][256]
			
 
				-Output parameters
			
 
				+    Output parameters
			
 
				 \var struct starpu_perfmodel_arch starpu_fxt_options::worker_archtypes[STARPU_NMAXWORKERS]
			
 
				-Output parameters
			
 
				+    Output parameters
			
 
				 \var int starpu_fxt_options::nworkers
			
 
				-Output parameters
			
 
				-
			
 
				+    Output parameters
			
 
				 \var struct starpu_fxt_codelet_event **starpu_fxt_options::dumped_codelets
			
 
				-In case we want to dump the list of codelets to an external tool
			
 
				+    In case we want to dump the list of codelets to an external tool
			
 
				 \var long starpu_fxt_options::dumped_codelets_count
			
 
				-In case we want to dump the list of codelets to an external tool
			
 
				+    In case we want to dump the list of codelets to an external tool
			
 
				 
			
 
				 \fn void starpu_fxt_options_init(struct starpu_fxt_options *options)
			
 
				 \ingroup API_FxT_Support
			
@@ -79,7 +90,7 @@ start recording it again, etc.
 
				 
			
 
				 \fn void starpu_fxt_autostart_profiling(int autostart)
			
 
				 \ingroup API_FxT_Support
			
 
				-Determines whether profiling should be started by starpu_init(), or only when
			
 
				+Determine whether profiling should be started by starpu_init(), or only when
			
 
				 starpu_fxt_start_profiling() is called. \p autostart should be 1 to do so, or 0 to
			
 
				 prevent it.
			
 
				 
			
--- a/doc/doxygen/chapters/api/initialization.doxy
+++ b/doc/doxygen/chapters/api/initialization.doxy
@@ -17,220 +17,226 @@ number of processing units and takes the default scheduling policy.
 
				 The environment variables overwrite the equivalent parameters.
			
 
				 \var int starpu_conf::magic
			
 
				 \private
			
 
				-Will be initialized by starpu_conf_init(). Should not be set by hand.
			
 
				+    Will be initialized by starpu_conf_init(). Should not be set by
			
 
				+    hand.
			
 
				 
			
 
				 \var const char*starpu_conf::sched_policy_name
			
 
				-This is the name of the scheduling policy. This can also be specified
			
 
				-with the environment variable \ref STARPU_SCHED. (default = <c>NULL</c>).
			
 
				+    Name of the scheduling policy. This can also be specified with the
			
 
				+    environment variable \ref STARPU_SCHED. (default = <c>NULL</c>).
			
 
				 
			
 
				 \var struct starpu_sched_policy *starpu_conf::sched_policy
			
 
				-This is the definition of the scheduling policy. This field is ignored
			
 
				-if starpu_conf::sched_policy_name is set. (default = <c>NULL</c>)
			
 
				+    Definition of the scheduling policy. This field is ignored if
			
 
				+    starpu_conf::sched_policy_name is set. (default = <c>NULL</c>)
			
 
				 
			
 
				 \var void (*starpu_conf::sched_policy_init)(unsigned)
			
 
				-todo
			
 
				+    todo
			
 
				 
			
 
				 \var int starpu_conf::ncpus
			
 
				-This is the number of CPU cores that StarPU can use. This can also be
			
 
				-specified with the environment variable \ref STARPU_NCPU . (default = -1)
			
 
				+    Number of CPU cores that StarPU can use. This can also be
			
 
				+    specified with the environment variable \ref STARPU_NCPU .
			
 
				+    (default = -1)
			
 
				 \var int starpu_conf::ncuda
			
 
				-This is the number of CUDA devices that StarPU can use. This can also
			
 
				-be specified with the environment variable \ref STARPU_NCUDA. (default =
			
 
				--1)
			
 
				+    Number of CUDA devices that StarPU can use. This can also be
			
 
				+    specified with the environment variable \ref STARPU_NCUDA.
			
 
				+    (default = -1)
			
 
				 \var int starpu_conf::nopencl
			
 
				-This is the number of OpenCL devices that StarPU can use. This can
			
 
				-also be specified with the environment variable \ref STARPU_NOPENCL.
			
 
				-(default = -1)
			
 
				+    Number of OpenCL devices that StarPU can use. This can also be
			
 
				+    specified with the environment variable \ref STARPU_NOPENCL.
			
 
				+    (default = -1)
			
 
				 \var int starpu_conf::nmic
			
 
				-This is the number of MIC devices that StarPU can use. This can also
			
 
				-be specified with the environment variable \ref STARPU_NMIC.
			
 
				-(default = -1)
			
 
				+    Number of MIC devices that StarPU can use. This can also be
			
 
				+    specified with the environment variable \ref STARPU_NMIC.
			
 
				+    (default = -1)
			
 
				 \var int starpu_conf::nscc
			
 
				-This is the number of SCC devices that StarPU can use. This can also
			
 
				-be specified with the environment variable \ref STARPU_NSCC.
			
 
				-(default = -1)
			
 
				+    Number of SCC devices that StarPU can use. This can also be
			
 
				+    specified with the environment variable \ref STARPU_NSCC.
			
 
				+    (default = -1)
			
 
				 \var int starpu_conf::nmpi_ms
			
 
				-This is the number of MPI Master Slave devices that StarPU can use. This can also
			
 
				-be specified with the environment variable \ref STARPU_NMPI_MS.
			
 
				-(default = -1)
			
 
				+    Number of MPI Master Slave devices that StarPU can use. This can
			
 
				+    also be specified with the environment variable \ref
			
 
				+    STARPU_NMPI_MS. (default = -1)
			
 
				 
			
 
				 \var unsigned starpu_conf::use_explicit_workers_bindid
			
 
				-If this flag is set, the starpu_conf::workers_bindid array indicates
			
 
				-where the different workers are bound, otherwise StarPU automatically
			
 
				-selects where to bind the different workers. This can also be
			
 
				-specified with the environment variable \ref STARPU_WORKERS_CPUID. (default = 0)
			
 
				+    If this flag is set, the starpu_conf::workers_bindid array
			
 
				+    indicates where the different workers are bound, otherwise StarPU
			
 
				+    automatically selects where to bind the different workers. This
			
 
				+    can also be specified with the environment variable \ref
			
 
				+    STARPU_WORKERS_CPUID. (default = 0)
			
 
				 \var unsigned starpu_conf::workers_bindid[STARPU_NMAXWORKERS]
			
 
				-If the starpu_conf::use_explicit_workers_bindid flag is set, this
			
 
				-array indicates where to bind the different workers. The i-th entry of
			
 
				-the starpu_conf::workers_bindid indicates the logical identifier of
			
 
				-the processor which should execute the i-th worker. Note that the
			
 
				-logical ordering of the CPUs is either determined by the OS, or
			
 
				-provided by the hwloc library in case it is available.
			
 
				+    If the starpu_conf::use_explicit_workers_bindid flag is set, this
			
 
				+    array indicates where to bind the different workers. The i-th
			
 
				+    entry of the starpu_conf::workers_bindid indicates the logical
			
 
				+    identifier of the processor which should execute the i-th worker.
			
 
				+    Note that the logical ordering of the CPUs is either determined by
			
 
				+    the OS, or provided by the hwloc library in case it is available.
			
 
				 \var unsigned starpu_conf::use_explicit_workers_cuda_gpuid
			
 
				-If this flag is set, the CUDA workers will be attached to the CUDA
			
 
				-devices specified in the starpu_conf::workers_cuda_gpuid array.
			
 
				-Otherwise, StarPU affects the CUDA devices in a round-robin fashion.
			
 
				-This can also be specified with the environment variable
			
 
				-\ref STARPU_WORKERS_CUDAID. (default = 0)
			
 
				+    If this flag is set, the CUDA workers will be attached to the CUDA
			
 
				+    devices specified in the starpu_conf::workers_cuda_gpuid array.
			
 
				+    Otherwise, StarPU affects the CUDA devices in a round-robin
			
 
				+    fashion. This can also be specified with the environment variable
			
 
				+    \ref STARPU_WORKERS_CUDAID. (default = 0)
			
 
				 \var unsigned starpu_conf::workers_cuda_gpuid[STARPU_NMAXWORKERS]
			
 
				-If the starpu_conf::use_explicit_workers_cuda_gpuid flag is set, this
			
 
				-array contains the logical identifiers of the CUDA devices (as used by
			
 
				-\c cudaGetDevice()).
			
 
				+    If the starpu_conf::use_explicit_workers_cuda_gpuid flag is set,
			
 
				+    this array contains the logical identifiers of the CUDA devices
			
 
				+    (as used by \c cudaGetDevice()).
			
 
				 \var unsigned starpu_conf::use_explicit_workers_opencl_gpuid
			
 
				-If this flag is set, the OpenCL workers will be attached to the OpenCL
			
 
				-devices specified in the starpu_conf::workers_opencl_gpuid array.
			
 
				-Otherwise, StarPU affects the OpenCL devices in a round-robin fashion.
			
 
				-This can also be specified with the environment variable
			
 
				-\ref STARPU_WORKERS_OPENCLID. (default = 0)
			
 
				+    If this flag is set, the OpenCL workers will be attached to the
			
 
				+    OpenCL devices specified in the starpu_conf::workers_opencl_gpuid
			
 
				+    array. Otherwise, StarPU affects the OpenCL devices in a
			
 
				+    round-robin fashion. This can also be specified with the
			
 
				+    environment variable \ref STARPU_WORKERS_OPENCLID. (default = 0)
			
 
				 \var unsigned starpu_conf::workers_opencl_gpuid[STARPU_NMAXWORKERS]
			
 
				-If the starpu_conf::use_explicit_workers_opencl_gpuid flag is set,
			
 
				-this array contains the logical identifiers of the OpenCL devices to
			
 
				-be used.
			
 
				+    If the starpu_conf::use_explicit_workers_opencl_gpuid flag is set,
			
 
				+    this array contains the logical identifiers of the OpenCL devices
			
 
				+    to be used.
			
 
				 \var unsigned starpu_conf::use_explicit_workers_mic_deviceid
			
 
				-If this flag is set, the MIC workers will be attached to the MIC
			
 
				-devices specified in the array starpu_conf::workers_mic_deviceid.
			
 
				-Otherwise, StarPU affects the MIC devices in a round-robin fashion.
			
 
				-This can also be specified with the environment variable
			
 
				-\ref STARPU_WORKERS_MICID.
			
 
				-(default = 0)
			
 
				+    If this flag is set, the MIC workers will be attached to the MIC
			
 
				+    devices specified in the array starpu_conf::workers_mic_deviceid.
			
 
				+    Otherwise, StarPU affects the MIC devices in a round-robin
			
 
				+    fashion. This can also be specified with the environment variable
			
 
				+    \ref STARPU_WORKERS_MICID. (default = 0)
			
 
				 \var unsigned starpu_conf::workers_mic_deviceid[STARPU_NMAXWORKERS]
			
 
				-If the flag starpu_conf::use_explicit_workers_mic_deviceid is set, the
			
 
				-array contains the logical identifiers of the MIC devices to be used.
			
 
				+    If the flag starpu_conf::use_explicit_workers_mic_deviceid is set,
			
 
				+    the array contains the logical identifiers of the MIC devices to
			
 
				+    be used.
			
 
				 \var unsigned starpu_conf::use_explicit_workers_scc_deviceid
			
 
				-If this flag is set, the SCC workers will be attached to the SCC
			
 
				-devices specified in the array starpu_conf::workers_scc_deviceid.
			
 
				-(default = 0)
			
 
				+    If this flag is set, the SCC workers will be attached to the SCC
			
 
				+    devices specified in the array starpu_conf::workers_scc_deviceid.
			
 
				+    (default = 0)
			
 
				 \var unsigned starpu_conf::workers_scc_deviceid[STARPU_NMAXWORKERS]
			
 
				-If the flag starpu_conf::use_explicit_workers_scc_deviceid is set, the
			
 
				-array contains the logical identifiers of the SCC devices to be used.
			
 
				-Otherwise, StarPU affects the SCC devices in a round-robin fashion.
			
 
				-This can also be specified with the environment variable
			
 
				-\ref STARPU_WORKERS_SCCID.
			
 
				+    If the flag starpu_conf::use_explicit_workers_scc_deviceid is set,
			
 
				+    the array contains the logical identifiers of the SCC devices to
			
 
				+    be used. Otherwise, StarPU affects the SCC devices in a
			
 
				+    round-robin fashion. This can also be specified with the
			
 
				+    environment variable \ref STARPU_WORKERS_SCCID.
			
 
				 \var unsigned starpu_conf::use_explicit_workers_mpi_ms_deviceid
			
 
				-If this flag is set, the MPI Master Slave workers will be attached to the MPI Master Slave
			
 
				-devices specified in the array starpu_conf::workers_mpi_ms_deviceid.
			
 
				-Otherwise, StarPU affects the MPI Master Slave devices in a round-robin fashion.
			
 
				-(default = 0)
			
 
				+    If this flag is set, the MPI Master Slave workers will be attached
			
 
				+    to the MPI Master Slave devices specified in the array
			
 
				+    starpu_conf::workers_mpi_ms_deviceid. Otherwise, StarPU affects
			
 
				+    the MPI Master Slave devices in a round-robin fashion. (default =
			
 
				+    0)
			
 
				 \var unsigned starpu_conf::workers_mpi_ms_deviceid[STARPU_NMAXWORKERS]
			
 
				-If the flag starpu_conf::use_explicit_workers_mpi_ms_deviceid is set, the
			
 
				-array contains the logical identifiers of the MPI Master Slave devices to be used.
			
 
				+    If the flag starpu_conf::use_explicit_workers_mpi_ms_deviceid is
			
 
				+    set, the array contains the logical identifiers of the MPI Master
			
 
				+    Slave devices to be used.
			
 
				 
			
 
				 \var int starpu_conf::bus_calibrate
			
 
				-If this flag is set, StarPU will recalibrate the bus.  If this value
			
 
				-is equal to <c>-1</c>, the default value is used.  This can also be
			
 
				-specified with the environment variable \ref STARPU_BUS_CALIBRATE. (default
			
 
				-= 0)
			
 
				+    If this flag is set, StarPU will recalibrate the bus.  If this
			
 
				+    value is equal to -1, the default value is used. This can
			
 
				+    also be specified with the environment variable \ref
			
 
				+    STARPU_BUS_CALIBRATE. (default = 0)
			
 
				 \var int starpu_conf::calibrate
			
 
				-If this flag is set, StarPU will calibrate the performance models when
			
 
				-executing tasks. If this value is equal to <c>-1</c>, the default
			
 
				-value is used. If the value is equal to <c>1</c>, it will force
			
 
				-continuing calibration. If the value is equal to <c>2</c>, the
			
 
				-existing performance models will be overwritten. This can also be
			
 
				-specified with the environment variable \ref STARPU_CALIBRATE. (default =
			
 
				-0)
			
 
				+    If this flag is set, StarPU will calibrate the performance models
			
 
				+    when executing tasks. If this value is equal to -1, the
			
 
				+    default value is used. If the value is equal to 1, it will
			
 
				+    force continuing calibration. If the value is equal to 2,
			
 
				+    the existing performance models will be overwritten. This can also
			
 
				+    be specified with the environment variable \ref STARPU_CALIBRATE.
			
 
				+    (default = 0)
			
 
				 \var int starpu_conf::single_combined_worker
			
 
				-By default, StarPU executes parallel tasks
			
 
				-concurrently. Some parallel libraries (e.g. most OpenMP
			
 
				-implementations) however do not support concurrent calls to
			
 
				-parallel code. In such case, setting this flag makes StarPU
			
 
				-only start one parallel task at a time (but other CPU and
			
 
				-GPU tasks are not affected and can be run concurrently).
			
 
				-The parallel task scheduler will however
			
 
				-still try varying combined worker sizes to look for the
			
 
				-most efficient ones. This can also be specified with the environment
			
 
				-variable \ref STARPU_SINGLE_COMBINED_WORKER.
			
 
				-(default = 0)
			
 
				+    By default, StarPU executes parallel tasks concurrently. Some
			
 
				+    parallel libraries (e.g. most OpenMP implementations) however do
			
 
				+    not support concurrent calls to parallel code. In such case,
			
 
				+    setting this flag makes StarPU only start one parallel task at a
			
 
				+    time (but other CPU and GPU tasks are not affected and can be run
			
 
				+    concurrently). The parallel task scheduler will however still try
			
 
				+    varying combined worker sizes to look for the most efficient ones.
			
 
				+    This can also be specified with the environment variable \ref
			
 
				+    STARPU_SINGLE_COMBINED_WORKER. (default = 0)
			
 
				 
			
 
				 \var char *starpu_conf::mic_sink_program_path
			
 
				-Path to the kernel to execute on the MIC device, compiled for MIC
			
 
				-architecture. When set to <c>NULL</c>, StarPU automatically looks next to the
			
 
				-host program location.
			
 
				-(default = <c>NULL</c>)
			
 
				+    Path to the kernel to execute on the MIC device, compiled for MIC
			
 
				+    architecture. When set to <c>NULL</c>, StarPU automatically looks
			
 
				+    next to the host program location. (default = <c>NULL</c>)
			
 
				 
			
 
				 \var int starpu_conf::disable_asynchronous_copy
			
 
				-This flag should be set to 1 to disable
			
 
				-asynchronous copies between CPUs and all accelerators. This
			
 
				-can also be specified with the environment variable
			
 
				-\ref STARPU_DISABLE_ASYNCHRONOUS_COPY. The
			
 
				-AMD implementation of OpenCL is known to fail when copying
			
 
				-data asynchronously. When using this implementation, it is
			
 
				-therefore necessary to disable asynchronous data transfers.
			
 
				-This can also be specified at compilation time by giving to
			
 
				-the configure script the option
			
 
				-\ref disable-asynchronous-copy "--disable-asynchronous-copy". (default = 0)
			
 
				+    This flag should be set to 1 to disable asynchronous copies
			
 
				+    between CPUs and all accelerators. This can also be specified with
			
 
				+    the environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_COPY.
			
 
				+    The AMD implementation of OpenCL is known to fail when copying
			
 
				+    data asynchronously. When using this implementation, it is
			
 
				+    therefore necessary to disable asynchronous data transfers. This
			
 
				+    can also be specified at compilation time by giving to the
			
 
				+    configure script the option
			
 
				+    \ref disable-asynchronous-copy "--disable-asynchronous-copy".
			
 
				+    (default = 0)
			
 
				 \var int starpu_conf::disable_asynchronous_cuda_copy
			
 
				-This flag should be set to 1 to disable
			
 
				-asynchronous copies between CPUs and CUDA accelerators.
			
 
				-This can also be specified with the environment variable
			
 
				-\ref STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY.
			
 
				-This can also be specified at compilation time by giving to
			
 
				-the configure script the option
			
 
				-\ref disable-asynchronous-cuda-copy "--disable-asynchronous-cuda-copy". (default = 0)
			
 
				+    This flag should be set to 1 to disable asynchronous copies
			
 
				+    between CPUs and CUDA accelerators. This can also be specified
			
 
				+    with the environment variable \ref
			
 
				+    STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY.
			
 
				+    This can also be specified at compilation time by giving to the
			
 
				+    configure script the option
			
 
				+    \ref disable-asynchronous-cuda-copy "--disable-asynchronous-cuda-copy".
			
 
				+    (default = 0)
			
 
				 \var int starpu_conf::disable_asynchronous_opencl_copy
			
 
				-This flag should be set to 1 to disable
			
 
				-asynchronous copies between CPUs and OpenCL accelerators.
			
 
				-This can also be specified with the environment
			
 
				-variable \ref STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY. The AMD
			
 
				-implementation of OpenCL is known to fail
			
 
				-when copying data asynchronously. When using this
			
 
				-implementation, it is therefore necessary to disable
			
 
				-asynchronous data transfers. This can also be specified at
			
 
				-compilation time by giving to the configure script the
			
 
				-option \ref disable-asynchronous-opencl-copy "--disable-asynchronous-opencl-copy".
			
 
				-(default = 0)
			
 
				+    This flag should be set to 1 to disable asynchronous copies
			
 
				+    between CPUs and OpenCL accelerators. This can also be specified
			
 
				+    with the environment variable \ref
			
 
				+    STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY. The AMD implementation of
			
 
				+    OpenCL is known to fail when copying data asynchronously. When
			
 
				+    using this implementation, it is therefore necessary to disable
			
 
				+    asynchronous data transfers. This can also be specified at
			
 
				+    compilation time by giving to the configure script the
			
 
				+    option
			
 
				+    \ref disable-asynchronous-opencl-copy "--disable-asynchronous-opencl-copy".
			
 
				+    (default = 0)
			
 
				 \var int starpu_conf::disable_asynchronous_mic_copy
			
 
				-This flag should be set to 1 to disable asynchronous copies between
			
 
				-CPUs and MIC accelerators. This can also be specified with the
			
 
				-environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY.
			
 
				-This can also be specified at compilation time by giving to the
			
 
				-configure script the option \ref disable-asynchronous-mic-copy "--disable-asynchronous-mic-copy".
			
 
				-(default = 0).
			
 
				+    This flag should be set to 1 to disable asynchronous copies
			
 
				+    between CPUs and MIC accelerators. This can also be specified with
			
 
				+    the environment variable \ref
			
 
				+    STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY. This can also be specified
			
 
				+    at compilation time by giving to the configure script the option
			
 
				+    \ref disable-asynchronous-mic-copy "--disable-asynchronous-mic-copy".
			
 
				+    (default = 0).
			
 
				 \var int starpu_conf::disable_asynchronous_mpi_ms_copy
			
 
				-This flag should be set to 1 to disable asynchronous copies between
			
 
				-CPUs and MPI Master Slave devices. This can also be specified with the
			
 
				-environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY.
			
 
				-This can also be specified at compilation time by giving to the
			
 
				-configure script the option \ref disable-asynchronous-mpi-master-slave-copy "--disable-asynchronous-mpi-master-slave-copy".
			
 
				-(default = 0).
			
 
				+    This flag should be set to 1 to disable asynchronous copies
			
 
				+    between CPUs and MPI Master Slave devices. This can also be
			
 
				+    specified with the environment variable \ref
			
 
				+    STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY. This can also be
			
 
				+    specified at compilation time by giving to the configure script
			
 
				+    the option
			
 
				+    \ref disable-asynchronous-mpi-master-slave-copy "--disable-asynchronous-mpi-master-slave-copy".
			
 
				+    (default = 0).
			
 
				 
			
 
				 \var unsigned *starpu_conf::cuda_opengl_interoperability
			
 
				-Enable CUDA/OpenGL interoperation on these CUDA
			
 
				-devices. This can be set to an array of CUDA device
			
 
				-identifiers for which cudaGLSetGLDevice() should be called
			
 
				-instead of \c cudaSetDevice(). Its size is specified by the
			
 
				-starpu_conf::n_cuda_opengl_interoperability field below
			
 
				-(default = <c>NULL</c>)
			
 
				+    Enable CUDA/OpenGL interoperation on these CUDA devices. This can
			
 
				+    be set to an array of CUDA device identifiers for which
			
 
				+    \c cudaGLSetGLDevice() should be called instead of
			
 
				+    \c cudaSetDevice(). Its size is specified by the
			
 
				+    starpu_conf::n_cuda_opengl_interoperability field below
			
 
				+    (default = <c>NULL</c>)
			
 
				 \var unsigned starpu_conf::n_cuda_opengl_interoperability
			
 
				-todo
			
 
				+    todo
			
 
				 
			
 
				 \var struct starpu_driver *starpu_conf::not_launched_drivers
			
 
				-Array of drivers that should not be launched by
			
 
				-StarPU. The application will run in one of its own
			
 
				-threads. (default = <c>NULL</c>)
			
 
				+    Array of drivers that should not be launched by StarPU. The
			
 
				+    application will run in one of its own threads. (default =
			
 
				+    <c>NULL</c>)
			
 
				 \var unsigned starpu_conf::n_not_launched_drivers
			
 
				-The number of StarPU drivers that should not be
			
 
				-launched by StarPU. (default = 0)
			
 
				+    The number of StarPU drivers that should not be launched by
			
 
				+    StarPU. (default = 0)
			
 
				+
			
 
				 \var starpu_conf::trace_buffer_size
			
 
				-Specifies the buffer size used for FxT tracing.
			
 
				-Starting from FxT version 0.2.12, the buffer will
			
 
				-automatically be flushed when it fills in, but it may still
			
 
				-be interesting to specify a bigger value to avoid any
			
 
				-flushing (which would disturb the trace).
			
 
				+    Specify the buffer size used for FxT tracing. Starting from FxT
			
 
				+    version 0.2.12, the buffer will automatically be flushed when it
			
 
				+    fills in, but it may still be interesting to specify a bigger
			
 
				+    value to avoid any flushing (which would disturb the trace).
			
 
				 
			
 
				 \var starpu_conf::global_sched_ctx_min_priority
			
 
				-todo
			
 
				+    todo
			
 
				 \var starpu_conf::global_sched_ctx_max_priority
			
 
				-todo
			
 
				+    todo
			
 
				 
			
 
				 \fn int starpu_init(struct starpu_conf *conf)
			
 
				 \ingroup API_Initialization_and_Termination
			
 
				 This is StarPU initialization method, which must be called prior to
			
 
				 any other StarPU call. It is possible to specify StarPU’s
			
 
				 configuration (e.g. scheduling policy, number of cores, ...) by
			
 
				-passing a non-<c>NULL</c> argument. Default configuration is used if the
			
 
				-passed argument is <c>NULL</c>. Upon successful completion, this function
			
 
				-returns 0. Otherwise, <c>-ENODEV</c> indicates that no worker was available
			
 
				-(so that StarPU was not initialized).
			
 
				+passing a non-<c>NULL</c> \p conf. Default configuration is used if \p
			
 
				+conf is <c>NULL</c>. Upon successful completion, this function
			
 
				+returns 0. Otherwise, <c>-ENODEV</c> indicates that no worker was
			
 
				+available (and thus StarPU was not initialized).
			
 
				 
			
 
				 \fn int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
			
 
				 \ingroup API_Initialization_and_Termination
			
@@ -242,12 +248,12 @@ same program.
 
				 
			
 
				 \fn int starpu_conf_init(struct starpu_conf *conf)
			
 
				 \ingroup API_Initialization_and_Termination
			
 
				-This function initializes the conf structure passed as argument with
			
 
				-the default values. In case some configuration parameters are already
			
 
				+Initialize the \p conf structure with the default values. In case some
			
 
				+configuration parameters are already
			
 
				 specified through environment variables, starpu_conf_init() initializes
			
 
				-the fields of the structure according to the environment variables.
			
 
				+the fields of \p conf according to the environment variables.
			
 
				 For instance if \ref STARPU_CALIBRATE is set, its value is put in the
			
 
				-field starpu_conf::calibrate of the structure passed as argument. Upon successful
			
 
				+field starpu_conf::calibrate of \p conf. Upon successful
			
 
				 completion, this function returns 0. Otherwise, <c>-EINVAL</c> indicates that
			
 
				 the argument was <c>NULL</c>.
			
 
				 
			
@@ -259,7 +265,7 @@ are not guaranteed to be available until this method has been called.
 
				 
			
 
				 \fn void starpu_pause(void)
			
 
				 \ingroup API_Initialization_and_Termination
			
 
				-This call is used to suspend the processing of new tasks by
			
 
				+Suspend the processing of new tasks by
			
 
				 workers. It can be used in a program where StarPU is used during only
			
 
				 a part of the execution. Without this call, the workers continue to
			
 
				 poll for new tasks in a tight loop, wasting CPU time. The symmetric
			
@@ -297,7 +303,6 @@ devices are disabled.
 
				 
			
 
				 \fn void starpu_topology_print(FILE *f)
			
 
				 \ingroup API_Initialization_and_Termination
			
 
				-Prints a description of the topology on f.
			
 
				+Print a description of the topology on \p f.
			
 
				 
			
 
				 */
			
 
				-
			
--- a/doc/doxygen/chapters/api/insert_task.doxy
+++ b/doc/doxygen/chapters/api/insert_task.doxy
@@ -10,12 +10,12 @@
 
				 
			
 
				 \fn int starpu_insert_task(struct starpu_codelet *cl, ...)
			
 
				 \ingroup API_Insert_Task
			
 
				-This function does the same as the function starpu_task_insert(). It has been kept to avoid breaking old codes.
			
 
				+Similar to starpu_task_insert(). Kept to avoid breaking old codes.
			
 
				 
			
 
				 \fn int starpu_task_insert(struct starpu_codelet *cl, ...)
			
 
				 \ingroup API_Insert_Task
			
 
				 Create and submit a task corresponding to \p cl with the
			
 
				-following arguments. The argument list must be zero-terminated.
			
 
				+following given arguments. The argument list must be zero-terminated.
			
 
				 
			
 
				 The arguments following the codelet can be of the following types:
			
 
				 <ul>
			
@@ -47,13 +47,13 @@ implementation to retrieve them.
 
				 
			
 
				 \def STARPU_VALUE
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must
			
 
				+Used when calling starpu_task_insert(), must
			
 
				 be followed by a pointer to a constant value and the size of the
			
 
				 constant
			
 
				 
			
 
				 \def STARPU_CL_ARGS
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must
			
 
				+Used when calling starpu_task_insert(), must
			
 
				 be followed by a memory buffer containing the arguments to be given to
			
 
				 the task, and by the size of the arguments. The memory buffer should
			
 
				 be the result of a previous call to starpu_codelet_pack_args(), and will be
			
@@ -61,12 +61,12 @@ freed (i.e. starpu_task::cl_arg_free will be set to 1)
 
				 
			
 
				 \def STARPU_CALLBACK
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must
			
 
				+Used when calling starpu_task_insert(), must
			
 
				 be followed by a pointer to a callback function
			
 
				 
			
 
				 \def STARPU_CALLBACK_WITH_ARG
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must
			
 
				+Used when calling starpu_task_insert(), must
			
 
				 be followed by two pointers: one to a callback function, and the other
			
 
				 to be given as an argument to the callback function; this is
			
 
				 equivalent to using both ::STARPU_CALLBACK and
			
@@ -74,13 +74,13 @@ equivalent to using both ::STARPU_CALLBACK and
 
				 
			
 
				 \def STARPU_CALLBACK_ARG
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must
			
 
				+Used when calling starpu_task_insert(), must
			
 
				 be followed by a pointer to be given as an argument to the callback
			
 
				 function
			
 
				 
			
 
				 \def STARPU_PRIORITY
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must
			
 
				+Used when calling starpu_task_insert(), must
			
 
				 be followed by a integer defining a priority level
			
 
				 
			
 
				 \def STARPU_DATA_ARRAY
			
@@ -93,42 +93,42 @@ TODO
 
				 
			
 
				 \def STARPU_EXECUTE_ON_WORKER
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must be
			
 
				+Used when calling starpu_task_insert(), must be
			
 
				 followed by an integer value specifying the worker on which to execute
			
 
				 the task (as specified by starpu_task::execute_on_a_specific_worker)
			
 
				 
			
 
				 \def STARPU_WORKER_ORDER
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must be
			
 
				+used when calling starpu_task_insert(), must be
			
 
				 followed by an integer value specifying the worker order in which to execute
			
 
				 the tasks (as specified by starpu_task::workerorder)
			
 
				 
			
 
				 \def STARPU_TAG
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must be followed by a tag.
			
 
				+Used when calling starpu_task_insert(), must be followed by a tag.
			
 
				 
			
 
				 \def STARPU_TAG_ONLY
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must be followed by a tag.
			
 
				-It sets starpu_task::tag_id, but leaves starpu_task::use_tag as 0.
			
 
				+Used when calling starpu_task_insert(), must be followed by a tag
			
 
				+stored in starpu_task::tag_id. Leave starpu_task::use_tag as 0.
			
 
				 
			
 
				 \def STARPU_NAME
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must be followed by a char *.
			
 
				-It sets starpu_task::name to it.
			
 
				+Used when calling starpu_task_insert(), must be followed by a char *
			
 
				+stored in starpu_task::name.
			
 
				 
			
 
				 \def STARPU_FLOPS
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must
			
 
				+Used when calling starpu_task_insert(), must
			
 
				 be followed by an amount of floating point operations, as a double.
			
 
				 Users <b>MUST</b> explicitly cast into double, otherwise parameter
			
 
				 passing will not work.
			
 
				 
			
 
				 \def STARPU_SCHED_CTX
			
 
				 \ingroup API_Insert_Task
			
 
				-this macro is used when calling starpu_task_insert(), and must
			
 
				-be followed by the id of the scheduling context to which we want to
			
 
				-submit the task.
			
 
				+Used when calling starpu_task_insert(), must
			
 
				+be followed by the id of the scheduling context to which to submit the
			
 
				+task to.
			
 
				 
			
 
				 \fn void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...)
			
 
				 \ingroup API_Insert_Task
			
@@ -146,7 +146,7 @@ parameters.
 
				 \fn void starpu_codelet_unpack_args_and_copyleft(void *cl_arg, void *buffer, size_t buffer_size, ...)
			
 
				 \ingroup API_Insert_Task
			
 
				 Similar to starpu_codelet_unpack_args(), but if any parameter is
			
 
				-0, copy the part of cl_arg that has not been read in buffer which
			
 
				+0, copy the part of \p cl_arg that has not been read in \p buffer which
			
 
				 can then be used in a later call to one of the unpack functions.
			
 
				 
			
 
				 \fn struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...)
			
--- a/doc/doxygen/chapters/api/lower_bound.doxy
+++ b/doc/doxygen/chapters/api/lower_bound.doxy
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * This file is part of the StarPU Handbook.
			
 
				  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
			
 
				  * Copyright (C) 2011, 2012 INRIA
			
 
				  * See the file version.doxy for copying conditions.
			
 
				  */
			
@@ -22,7 +22,7 @@ Stop recording tasks
 
				 
			
 
				 \fn void starpu_bound_print_dot(FILE *output)
			
 
				 \ingroup API_Theoretical_Lower_Bound_on_Execution_Time
			
 
				-Print the DAG that was recorded
			
 
				+Emit the DAG that was recorded on \p output.
			
 
				 
			
 
				 \fn void starpu_bound_compute(double *res, double *integer_res, int integer)
			
 
				 \ingroup API_Theoretical_Lower_Bound_on_Execution_Time
			
@@ -42,7 +42,7 @@ tasks, in the mps format
 
				 
			
 
				 \fn void starpu_bound_print(FILE *output, int integer)
			
 
				 \ingroup API_Theoretical_Lower_Bound_on_Execution_Time
			
 
				-Emit statistics of actual execution vs theoretical upper bound.
			
 
				+Emit on \p output the statistics of actual execution vs theoretical upper bound.
			
 
				 \p integer permits to choose between integer solving (which takes a
			
 
				 long time but is correct), and relaxed solving (which provides an
			
 
				 approximate solution).
			
--- a/doc/doxygen/chapters/api/mic_extensions.doxy
+++ b/doc/doxygen/chapters/api/mic_extensions.doxy
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * This file is part of the StarPU Handbook.
			
 
				  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
			
 
				  * Copyright (C) 2011, 2012 INRIA
			
 
				  * See the file version.doxy for copying conditions.
			
 
				  */
			
@@ -10,13 +10,12 @@
 
				 
			
 
				 \def STARPU_USE_MIC
			
 
				 \ingroup API_MIC_Extensions
			
 
				-This macro is defined when StarPU has been installed with MIC support.
			
 
				+Defined when StarPU has been installed with MIC support.
			
 
				 It should be used in your code to detect the availability of MIC.
			
 
				 
			
 
				 \def STARPU_MAXMICDEVS
			
 
				 \ingroup API_MIC_Extensions
			
 
				-This macro defines the maximum number of MIC devices that are
			
 
				-supported by StarPU.
			
 
				+Define the maximum number of MIC devices that are supported by StarPU.
			
 
				 
			
 
				 \typedef starpu_mic_func_symbol_t
			
 
				 \ingroup API_MIC_Extensions
			
@@ -24,13 +23,13 @@ Type for MIC function symbols
 
				 
			
 
				 \fn int starpu_mic_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name)
			
 
				 \ingroup API_MIC_Extensions
			
 
				-Initiate a lookup on each MIC device to find the adress of the
			
 
				-function named \p func_name, store them in the global array kernels
			
 
				+Initiate a lookup on each MIC device to find the address of the
			
 
				+function named \p func_name, store it in the global array kernels
			
 
				 and return the index in the array through \p symbol.
			
 
				 
			
 
				 \fn starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol)
			
 
				 \ingroup API_MIC_Extensions
			
 
				-If success, return the pointer to the function defined by \p symbol on
			
 
				+If successfull, return the pointer to the function defined by \p symbol on
			
 
				 the device linked to the called device. This can for instance be used
			
 
				 in a starpu_mic_func_t implementation.
			
 
				 
			
--- a/doc/doxygen/chapters/api/misc_helpers.doxy
+++ b/doc/doxygen/chapters/api/misc_helpers.doxy
@@ -20,16 +20,17 @@ the handle has been copied, and it is given the pointer \p callback_arg as argum
 
				 
			
 
				 \fn void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where)
			
 
				 \ingroup API_Miscellaneous_Helpers
			
 
				-This function executes the given function on a subset of workers. When
			
 
				+Execute the given function \p func on a subset of workers. When
			
 
				 calling this method, the offloaded function \p func is executed by
			
 
				-every StarPU worker that may execute the function. The argument \p arg
			
 
				+every StarPU worker that are eligible to execute the function.
			
 
				+The argument \p arg
			
 
				 is passed to the offloaded function. The argument \p where specifies
			
 
				 on which types of processing units the function should be executed.
			
 
				 Similarly to the field starpu_codelet::where, it is possible to
			
 
				 specify that the function should be executed on every CUDA device and
			
 
				 every CPU by passing ::STARPU_CPU|::STARPU_CUDA. This function blocks
			
 
				-until the function has been executed on every appropriate processing
			
 
				-units, so that it may not be called from a callback function for
			
 
				+until \p func has been executed on every appropriate processing
			
 
				+units, and thus may not be called from a callback function for
			
 
				 instance.
			
 
				 
			
 
				 \fn void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t where, const char *name)
			
--- a/doc/doxygen/chapters/api/modularized_scheduler.doxy
+++ b/doc/doxygen/chapters/api/modularized_scheduler.doxy
@@ -13,18 +13,16 @@
 
				 \ingroup API_Modularized_Scheduler
			
 
				 flags for starpu_sched_component::properties
			
 
				 \var starpu_sched_component_properties::STARPU_SCHED_COMPONENT_HOMOGENEOUS
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				      indicate that all workers have the same starpu_worker_archtype
			
 
				 \var starpu_sched_component_properties::STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				      indicate that all workers have the same memory component
			
 
				 
			
 
				 \def STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS
			
 
				 \ingroup API_Modularized_Scheduler
			
 
				-     indicate if component is homogeneous
			
 
				+indicate if component is homogeneous
			
 
				 \def STARPU_SCHED_COMPONENT_IS_SINGLE_MEMORY_NODE
			
 
				 \ingroup API_Modularized_Scheduler
			
 
				-     indicate if all workers have the same memory component
			
 
				+indicate if all workers have the same memory component
			
 
				 
			
 
				 \struct starpu_sched_component
			
 
				 \ingroup API_Modularized_Scheduler
			
@@ -42,7 +40,7 @@ like <c>component->push_task(component,task)</c>
 
				 \var starpu_sched_component::workers_in_ctx
			
 
				      this member contain the subset of starpu_sched_component::workers that is currently available in the context
			
 
				      The push method should take this member into account.
			
 
				-     this member is set with :	
			
 
				+     this member is set with :
			
 
				      component->workers UNION tree->workers UNION
			
 
				      component->child[i]->workers_in_ctx iff exist x such as component->children[i]->parents[x] == component
			
 
				 \var void *starpu_sched_component::data
			
--- a/doc/doxygen/chapters/api/mpi.doxy
+++ b/doc/doxygen/chapters/api/mpi.doxy
@@ -13,13 +13,12 @@
 
				 
			
 
				 \def STARPU_USE_MPI
			
 
				 \ingroup API_MPI_Support
			
 
				-This macro is defined when StarPU has been installed with MPI
			
 
				-support. It should be used in your code to detect the availability of
			
 
				-MPI.
			
 
				+Defined when StarPU has been installed with MPI support. It should be
			
 
				+used in your code to detect the availability of MPI.
			
 
				 
			
 
				 \fn int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm)
			
 
				 \ingroup API_MPI_Support
			
 
				-Initializes the starpumpi library with the given communicator.
			
 
				+Initialize the starpumpi library with the given communicator \p comm.
			
 
				 \p initialize_mpi indicates if MPI should be initialized or not by StarPU.
			
 
				 If the value is not 0, MPI will be initialized by calling
			
 
				 <c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED, ...)</c>.
			
@@ -46,8 +45,8 @@ calling <c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED,
 
				 
			
 
				 \fn int starpu_mpi_shutdown(void)
			
 
				 \ingroup API_MPI_Support
			
 
				-Cleans the starpumpi library. This must be called between calling
			
 
				-starpu_mpi functions and starpu_shutdown(). \c MPI_Finalize() will be
			
 
				+Clean the starpumpi library. This must be called between calling
			
 
				+\c starpu_mpi functions and starpu_shutdown(). \c MPI_Finalize() will be
			
 
				 called if StarPU-MPI has been initialized by starpu_mpi_init().
			
 
				 
			
 
				 \fn void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts)
			
@@ -79,33 +78,33 @@ Return the size of the communicator \c MPI_COMM_WORLD
 
				 
			
 
				 \fn int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm)
			
 
				 \ingroup API_MPI_Support
			
 
				-Performs a standard-mode, blocking send of \p data_handle to the node
			
 
				+Perform a standard-mode, blocking send of \p data_handle to the node
			
 
				 \p dest using the message tag \p mpi_tag within the communicator \p
			
 
				 comm.
			
 
				 
			
 
				 \fn int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, MPI_Status *status)
			
 
				 \ingroup API_MPI_Support
			
 
				-Performs a standard-mode, blocking receive in \p data_handle from the
			
 
				+Perform a standard-mode, blocking receive in \p data_handle from the
			
 
				 node \p source using the message tag \p mpi_tag within the
			
 
				 communicator \p comm.
			
 
				 
			
 
				 \fn int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm)
			
 
				 \ingroup API_MPI_Support
			
 
				-Posts a standard-mode, non blocking send of \p data_handle to the node
			
 
				+Post a standard-mode, non blocking send of \p data_handle to the node
			
 
				 \p dest using the message tag \p mpi_tag within the communicator \p
			
 
				 comm. After the call, the pointer to the request \p req can be used to
			
 
				 test or to wait for the completion of the communication.
			
 
				 
			
 
				 \fn int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int mpi_tag, MPI_Comm comm)
			
 
				 \ingroup API_MPI_Support
			
 
				-Posts a nonblocking receive in \p data_handle from the node \p source
			
 
				+Post a nonblocking receive in \p data_handle from the node \p source
			
 
				 using the message tag \p mpi_tag within the communicator \p comm.
			
 
				 After the call, the pointer to the request \p req can be used to test
			
 
				 or to wait for the completion of the communication.
			
 
				 
			
 
				 \fn int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
			
 
				 \ingroup API_MPI_Support
			
 
				-Posts a standard-mode, non blocking send of \p data_handle to the node
			
 
				+Post a standard-mode, non blocking send of \p data_handle to the node
			
 
				 \p dest using the message tag \p mpi_tag within the communicator \p
			
 
				 comm. On completion, the \p callback function is called with the
			
 
				 argument \p arg.
			
@@ -116,7 +115,7 @@ of the request.
 
				 
			
 
				 \fn int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
			
 
				 \ingroup API_MPI_Support
			
 
				-Posts a nonblocking receive in \p data_handle from the node \p source
			
 
				+Post a nonblocking receive in \p data_handle from the node \p source
			
 
				 using the message tag \p mpi_tag within the communicator \p comm. On
			
 
				 completion, the \p callback function is called with the argument \p
			
 
				 arg.
			
@@ -127,7 +126,7 @@ of the request.
 
				 
			
 
				 \fn int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				 \ingroup API_MPI_Support
			
 
				-Posts a nonblocking receive in \p data_handle from the node \p source
			
 
				+Post a nonblocking receive in \p data_handle from the node \p source
			
 
				 using the message tag \p mpi_tag within the communicator \p comm. On
			
 
				 completion, the \p callback function is called with the argument \p
			
 
				 arg.
			
@@ -143,13 +142,13 @@ of the request.
 
				 
			
 
				 \fn int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm)
			
 
				 \ingroup API_MPI_Support
			
 
				-Performs a synchronous-mode, non-blocking send of \p data_handle to the node
			
 
				+Perform a synchronous-mode, non-blocking send of \p data_handle to the node
			
 
				 \p dest using the message tag \p mpi_tag within the communicator \p
			
 
				 comm.
			
 
				 
			
 
				 \fn int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
			
 
				 \ingroup API_MPI_Support
			
 
				-Performs a synchronous-mode, non-blocking send of \p data_handle to the node
			
 
				+Perform a synchronous-mode, non-blocking send of \p data_handle to the node
			
 
				 \p dest using the message tag \p mpi_tag within the communicator \p
			
 
				 comm. On completion, the \p callback function is called with the argument \p
			
 
				 arg.
			
@@ -160,7 +159,7 @@ of the request.
 
				 
			
 
				 \fn int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status)
			
 
				 \ingroup API_MPI_Support
			
 
				-Returns when the operation identified by request \p req is complete.
			
 
				+Return when the operation identified by request \p req is complete.
			
 
				 
			
 
				 \fn int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status)
			
 
				 \ingroup API_MPI_Support
			
@@ -170,7 +169,7 @@ operation.
 
				 
			
 
				 \fn int starpu_mpi_barrier(MPI_Comm comm)
			
 
				 \ingroup API_MPI_Support
			
 
				-Blocks the caller until all group members of the communicator \p comm
			
 
				+Block the caller until all group members of the communicator \p comm
			
 
				 have called it.
			
 
				 
			
 
				 \fn int starpu_mpi_wait_for_all(MPI_Comm comm)
			
@@ -179,19 +178,19 @@ Wait until all StarPU tasks and communications for the given communicator are co
 
				 
			
 
				 \fn int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)
			
 
				 \ingroup API_MPI_Support
			
 
				-Posts a standard-mode, non blocking send of \p data_handle to the node
			
 
				+Post a standard-mode, non blocking send of \p data_handle to the node
			
 
				 \p dest using the message tag \p mpi_tag within the communicator \p
			
 
				 comm. On completion, \p tag is unlocked.
			
 
				 
			
 
				 \fn int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)
			
 
				 \ingroup API_MPI_Support
			
 
				-Posts a nonblocking receive in \p data_handle from the node \p source
			
 
				+Post a nonblocking receive in \p data_handle from the node \p source
			
 
				 using the message tag \p mpi_tag within the communicator \p comm. On
			
 
				 completion, \p tag is unlocked.
			
 
				 
			
 
				 \fn int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)
			
 
				 \ingroup API_MPI_Support
			
 
				-Posts \p array_size standard-mode, non blocking send. Each post sends
			
 
				+Post \p array_size standard-mode, non blocking send. Each post sends
			
 
				 the n-th data of the array \p data_handle to the n-th node of the
			
 
				 array \p dest using the n-th message tag of the array \p mpi_tag
			
 
				 within the n-th communicator of the array \p comm. On completion of
			
@@ -199,7 +198,7 @@ the all the requests, \p tag is unlocked.
 
				 
			
 
				 \fn int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)
			
 
				 \ingroup API_MPI_Support
			
 
				-Posts \p array_size nonblocking receive. Each post receives in the n-th
			
 
				+Post \p array_size nonblocking receive. Each post receives in the n-th
			
 
				 data of the array \p data_handle from the n-th node of the array \p
			
 
				 source using the n-th message tag of the array \p mpi_tag within the
			
 
				 n-th communicator of the array \p comm. On completion of the all the
			
@@ -327,25 +326,25 @@ flushes the cache for this data to avoid incoherencies.
 
				 
			
 
				 \def STARPU_EXECUTE_ON_NODE
			
 
				 \ingroup API_MPI_Support
			
 
				-this macro is used when calling starpu_mpi_task_insert(), and must be
			
 
				+Used when calling starpu_mpi_task_insert(), must be
			
 
				 followed by a integer value which specified the node on which to
			
 
				 execute the codelet.
			
 
				 
			
 
				 \def STARPU_EXECUTE_ON_DATA
			
 
				 \ingroup API_MPI_Support
			
 
				-this macro is used when calling starpu_mpi_task_insert(), and must be
			
 
				+Used when calling starpu_mpi_task_insert(), must be
			
 
				 followed by a data handle to specify that the node owning the given
			
 
				 data will execute the codelet.
			
 
				 
			
 
				 \def STARPU_NODE_SELECTION_POLICY
			
 
				 \ingroup API_MPI_Support
			
 
				-this macro is used when calling starpu_mpi_task_insert(), and must be
			
 
				+Used when calling starpu_mpi_task_insert(), must be
			
 
				 followed by a identifier to a node selection policy. This is needed when several
			
 
				 nodes own data in ::STARPU_W mode.
			
 
				 
			
 
				 \fn int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
			
 
				 \ingroup API_MPI_Support
			
 
				-This function does the same as the function starpu_mpi_task_insert(). It has been kept to avoid breaking old codes.
			
 
				+Call starpu_mpi_task_insert(). Symbol kept for backward compatibility.
			
 
				 
			
 
				 \fn int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...)
			
 
				 \ingroup API_MPI_Support
			
@@ -390,7 +389,7 @@ has been modified. The cache can be disabled (see \ref STARPU_MPI_CACHE).
 
				 
			
 
				 \fn struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...)
			
 
				 \ingroup API_MPI_Support
			
 
				-Create a task corresponding to codelet with the following arguments.
			
 
				+Create a task corresponding to \p codelet with the following given arguments.
			
 
				 The argument list must be zero-terminated. The function performs the
			
 
				 first two steps of the function starpu_mpi_task_insert(). Only the MPI
			
 
				 node selected in the first step of the algorithm will return a valid
			
@@ -400,9 +399,9 @@ the task on the node which creates it, with the SAME list of arguments.
 
				 
			
 
				 \fn int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...)
			
 
				 \ingroup API_MPI_Support
			
 
				-This function MUST be called after a call to starpu_mpi_task_build(),
			
 
				-with the SAME list of arguments. It performs the fourth -- last -- step of the algorithm described in
			
 
				-starpu_mpi_task_insert().
			
 
				+MUST be called after a call to starpu_mpi_task_build(),
			
 
				+with the SAME list of arguments. Perform the fourth -- last -- step of
			
 
				+the algorithm described in starpu_mpi_task_insert().
			
 
				 
			
 
				 \fn void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node)
			
 
				 \ingroup API_MPI_Support
			
@@ -442,7 +441,7 @@ data to be transfered.
 
				 
			
 
				 \fn int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func)
			
 
				 \ingroup API_MPI_Support
			
 
				-Register a new policy which can then be used when there is several nodes owning data in W mode.
			
 
				+Register a new policy which can then be used when there is several nodes owning data in ::STARPU_W mode.
			
 
				 Here an example of function defining a node selection policy.
			
 
				 The codelet will be executed on the node owing the first data with a size bigger than 1M, or on the node
			
 
				 0 if no data fits the given size.
			
@@ -479,7 +478,7 @@ Unregister a previously registered policy.
 
				 
			
 
				 \fn void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle)
			
 
				 \ingroup API_MPI_Support
			
 
				-Perform a reduction on the given data. All nodes send the data to its
			
 
				+Perform a reduction on the given data \p handle. All nodes send the data to its
			
 
				 owner node which will perform a reduction.
			
 
				 
			
 
				 \fn int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
			
@@ -509,7 +508,7 @@ function is called with the argument \p sarg on any other process.
 
				 
			
 
				 \def STARPU_USE_MPI_MASTER_SLAVE
			
 
				 \ingroup API_MPI_Support
			
 
				-This macro is defined when StarPU has been installed with MPI Master Slave
			
 
				+Defined when StarPU has been installed with MPI Master Slave
			
 
				 support. It should be used in your code to detect the availability of
			
 
				 MPI Master Slave.
			
 
				 
			
--- a/doc/doxygen/chapters/api/multiformat_data_interface.doxy
+++ b/doc/doxygen/chapters/api/multiformat_data_interface.doxy
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * This file is part of the StarPU Handbook.
			
 
				  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2017  CNRS
			
 
				  * Copyright (C) 2011, 2012 INRIA
			
 
				  * See the file version.doxy for copying conditions.
			
 
				  */
			
@@ -36,12 +36,19 @@ The different fields are:
 
				 todo
			
 
				 \ingroup API_Multiformat_Data_Interface
			
 
				 \var enum starpu_data_interface_id starpu_multiformat_interface::id
			
 
				+    todo
			
 
				 \var void *starpu_multiformat_interface::cpu_ptr
			
 
				+    todo
			
 
				 \var void *starpu_multiformat_interface::cuda_ptr
			
 
				+    todo
			
 
				 \var void *starpu_multiformat_interface::opencl_ptr
			
 
				+    todo
			
 
				 \var void *starpu_multiformat_interface::mic_ptr
			
 
				+    todo
			
 
				 \var uint32_t starpu_multiformat_interface::nx
			
 
				+    todo
			
 
				 \var struct starpu_multiformat_data_interface_ops *starpu_multiformat_interface::ops
			
 
				+    todo
			
 
				 
			
 
				 \fn void starpu_multiformat_data_register(starpu_data_handle_t *handle, int home_node, void *ptr, uint32_t nobjects, struct starpu_multiformat_data_interface_ops *format_ops)
			
 
				 \ingroup API_Multiformat_Data_Interface
			
@@ -54,22 +61,22 @@ describes the format.
 
				 
			
 
				 \def STARPU_MULTIFORMAT_GET_CPU_PTR(interface)
			
 
				 \ingroup API_Multiformat_Data_Interface
			
 
				-returns the local pointer to the data with CPU format.
			
 
				+Return the local pointer to the data with CPU format.
			
 
				 
			
 
				 \def STARPU_MULTIFORMAT_GET_CUDA_PTR(interface)
			
 
				 \ingroup API_Multiformat_Data_Interface
			
 
				-returns the local pointer to the data with CUDA format.
			
 
				+Return the local pointer to the data with CUDA format.
			
 
				 
			
 
				 \def STARPU_MULTIFORMAT_GET_OPENCL_PTR(interface)
			
 
				 \ingroup API_Multiformat_Data_Interface
			
 
				-returns the local pointer to the data with OpenCL format.
			
 
				+Return the local pointer to the data with OpenCL format.
			
 
				 
			
 
				 \def STARPU_MULTIFORMAT_GET_MIC_PTR(interface)
			
 
				 \ingroup API_Multiformat_Data_Interface
			
 
				-returns the local pointer to the data with MIC format.
			
 
				+Return the local pointer to the data with MIC format.
			
 
				 
			
 
				 \def STARPU_MULTIFORMAT_GET_NX(interface)
			
 
				 \ingroup API_Multiformat_Data_Interface
			
 
				-returns the number of elements in the data.
			
 
				+Return the number of elements in the data.
			
 
				 
			
 
				 */
			
--- a/doc/doxygen/chapters/api/opencl_extensions.doxy
+++ b/doc/doxygen/chapters/api/opencl_extensions.doxy
@@ -10,42 +10,42 @@
 
				 
			
 
				 \def STARPU_USE_OPENCL
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-This macro is defined when StarPU has been installed with
			
 
				+Defined when StarPU has been installed with
			
 
				 OpenCL support. It should be used in your code to detect the
			
 
				 availability of OpenCL as shown in \ref FullSourceCodeVectorScal.
			
 
				 
			
 
				 \def STARPU_MAXOPENCLDEVS
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-This macro defines the maximum number of OpenCL devices that are
			
 
				+Define the maximum number of OpenCL devices that are
			
 
				 supported by StarPU.
			
 
				 
			
 
				 \def STARPU_OPENCL_DATADIR
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-This macro defines the directory in which the OpenCL codelets of the
			
 
				+Define the directory in which the OpenCL codelets of the
			
 
				 applications provided with StarPU have been installed.
			
 
				 
			
 
				 \struct starpu_opencl_program
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-Stores the OpenCL programs as compiled for the different OpenCL
			
 
				+Store the OpenCL programs as compiled for the different OpenCL
			
 
				 devices.
			
 
				 \var cl_program starpu_opencl_program::programs[STARPU_MAXOPENCLDEVS]
			
 
				-Stores each program for each OpenCL device.
			
 
				+    Store each program for each OpenCL device.
			
 
				 
			
 
				 @name Writing OpenCL kernels
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				 
			
 
				 \fn void starpu_opencl_get_context(int devid, cl_context *context)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-Places the OpenCL context of the device designated by \p devid
			
 
				-into \p context.
			
 
				+Return the OpenCL context of the device designated by \p devid
			
 
				+in \p context.
			
 
				 
			
 
				 \fn void starpu_opencl_get_device(int devid, cl_device_id *device)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-Places the cl_device_id corresponding to \p devid in \p device.
			
 
				+Return the cl_device_id corresponding to \p devid in \p device.
			
 
				 
			
 
				 \fn void starpu_opencl_get_queue(int devid, cl_command_queue *queue)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-Places the command queue of the device designated by \p devid
			
 
				+Return the command queue of the device designated by \p devid
			
 
				 into \p queue.
			
 
				 
			
 
				 \fn void starpu_opencl_get_current_context(cl_context *context)
			
@@ -59,12 +59,12 @@ worker.
 
				 
			
 
				 \fn int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-Sets the arguments of a given kernel. The list of arguments
			
 
				+Set the arguments of a given kernel. The list of arguments
			
 
				 must be given as <c>(size_t size_of_the_argument, cl_mem *
			
 
				-pointer_to_the_argument)</c>. The last argument must be 0. Returns the
			
 
				+pointer_to_the_argument)</c>. The last argument must be 0. Return the
			
 
				 number of arguments that were successfully set. In case of failure,
			
 
				-returns the id of the argument that could not be set and err is set to
			
 
				-the error returned by OpenCL. Otherwise, returns the number of
			
 
				+return the id of the argument that could not be set and \p err is set to
			
 
				+the error returned by OpenCL. Otherwise, return the number of
			
 
				 arguments that were set.
			
 
				 
			
 
				 Here an example:
			
@@ -94,15 +94,15 @@ purpose for instance).
 
				 
			
 
				 \fn int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char *build_options)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-This function compiles an OpenCL source code stored in a file.
			
 
				+Compile an OpenCL source code stored in a file.
			
 
				 
			
 
				 \fn int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char *build_options)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-This function compiles an OpenCL source code stored in a string.
			
 
				+Compile an OpenCL source code stored in a string.
			
 
				 
			
 
				 \fn int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-This function unloads an OpenCL compiled code.
			
 
				+Unload an OpenCL compiled code.
			
 
				 
			
 
				 \fn void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source)
			
 
				 \ingroup API_OpenCL_Extensions
			
@@ -119,12 +119,12 @@ string.
 
				 
			
 
				 \fn void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-Similar to function starpu_opencl_load_program_source() but it allocates the buffers located_file_name, located_dir_name and opencl_program_source.
			
 
				+Similar to function starpu_opencl_load_program_source() but allocate the buffers \p located_file_name, \p located_dir_name and \p opencl_program_source.
			
 
				 
			
 
				 \fn int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char *build_options)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				 Compile the OpenCL kernel stored in the file \p source_file_name
			
 
				-with the given options \p build_options and stores the result in the
			
 
				+with the given options \p build_options and store the result in the
			
 
				 directory <c>$STARPU_HOME/.starpu/opencl</c> with the same filename as
			
 
				 \p source_file_name. The compilation is done for every OpenCL device,
			
 
				 and the filename is suffixed with the vendor id and the device id of
			
@@ -133,7 +133,7 @@ the OpenCL device.
 
				 \fn int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char *build_options)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				 Compile the OpenCL kernel in the string \p opencl_program_source
			
 
				-with the given options \p build_options and stores the result in the
			
 
				+with the given options \p build_options and store the result in the
			
 
				 directory <c>$STARPU_HOME/.starpu/opencl</c> with the filename \p
			
 
				 file_name. The compilation is done for every OpenCL device, and the
			
 
				 filename is suffixed with the vendor id and the device id of the
			
@@ -163,9 +163,9 @@ Release the given \p kernel, to be called after kernel execution.
 
				 
			
 
				 \fn int starpu_opencl_collect_stats(cl_event event)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-This function allows to collect statistics on a kernel execution.
			
 
				+Collect statistics on a kernel execution.
			
 
				 After termination of the kernels, the OpenCL codelet should call this
			
 
				-function to pass it the even returned by \c clEnqueueNDRangeKernel(), to
			
 
				+function with the event returned by \c clEnqueueNDRangeKernel(), to
			
 
				 let StarPU collect statistics about the kernel execution (used cycles,
			
 
				 consumed energy).
			
 
				 
			
@@ -179,13 +179,13 @@ error code.
 
				 
			
 
				 \fn void starpu_opencl_display_error(const char *func, const char *file, int line, const char *msg, cl_int status)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-Given a valid error status, prints the corresponding error message on
			
 
				-stdout, along with the given function name \p func, the given filename
			
 
				-\p file, the given line number \p line and the given message \p msg.
			
 
				+Given a valid error status, print the corresponding error message on
			
 
				+\c stdout, along with the function name \p func, the filename
			
 
				+\p file, the line number \p line and the message \p msg.
			
 
				 
			
 
				 \def STARPU_OPENCL_DISPLAY_ERROR(status)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-Call the function starpu_opencl_display_error() with the given error
			
 
				+Call the function starpu_opencl_display_error() with the error
			
 
				 \p status, the current function name, current file and line number,
			
 
				 and a empty message.
			
 
				 
			
@@ -195,15 +195,14 @@ Call the function starpu_opencl_display_error() and abort.
 
				 
			
 
				 \def STARPU_OPENCL_REPORT_ERROR(status)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-Call the function starpu_opencl_report_error() with the given error \p
			
 
				-status, with the current function name, current file and line number,
			
 
				+Call the function starpu_opencl_report_error() with the error \p
			
 
				+status, the current function name, current file and line number,
			
 
				 and a empty message.
			
 
				 
			
 
				 \def STARPU_OPENCL_REPORT_ERROR_WITH_MSG(msg, status)
			
 
				 \ingroup API_OpenCL_Extensions
			
 
				-Call the function starpu_opencl_report_error() with the given \p msg
			
 
				-and the given error \p status, with the current function name, current
			
 
				-file and line number.
			
 
				+Call the function starpu_opencl_report_error() with \p msg
			
 
				+and \p status, the current function name, current file and line number.
			
 
				 
			
 
				 \fn cl_int starpu_opencl_allocate_memory(int devid, cl_mem *addr, size_t size, cl_mem_flags flags)
			
 
				 \ingroup API_OpenCL_Extensions
			
--- a/doc/doxygen/chapters/api/performance_model.doxy
+++ b/doc/doxygen/chapters/api/performance_model.doxy
@@ -12,46 +12,44 @@
 
				 \ingroup API_Performance_Model
			
 
				 TODO
			
 
				 \var starpu_perfmodel_type::STARPU_PERFMODEL_INVALID
			
 
				-todo
			
 
				+    todo
			
 
				 \var starpu_perfmodel_type::STARPU_PER_ARCH
			
 
				-\ingroup API_Performance_Model
			
 
				-Application-provided per-arch cost model function
			
 
				+    Application-provided per-arch cost model function
			
 
				 \var starpu_perfmodel_type::STARPU_COMMON
			
 
				-\ingroup API_Performance_Model
			
 
				-Application-provided common cost model function, with per-arch factor
			
 
				+    Application-provided common cost model function, with per-arch
			
 
				+    factor
			
 
				 \var starpu_perfmodel_type::STARPU_HISTORY_BASED
			
 
				-\ingroup API_Performance_Model
			
 
				-Automatic history-based cost model
			
 
				+    Automatic history-based cost model
			
 
				 \var starpu_perfmodel_type::STARPU_REGRESSION_BASED
			
 
				-\ingroup API_Performance_Model
			
 
				-Automatic linear regression-based cost model  (alpha * size ^ beta)
			
 
				+    Automatic linear regression-based cost model  (alpha * size ^
			
 
				+    beta)
			
 
				 \var starpu_perfmodel_type::STARPU_NL_REGRESSION_BASED
			
 
				-\ingroup API_Performance_Model
			
 
				-Automatic non-linear regression-based cost model (a * size ^ b + c)
			
 
				+    Automatic non-linear regression-based cost model (a * size ^ b +
			
 
				+    c)
			
 
				 \var starpu_perfmodel_type::STARPU_MULTIPLE_REGRESSION_BASED
			
 
				-\ingroup API_Performance_Model
			
 
				-Automatic multiple linear regression-based cost model. Application provides parameters, their combinations and exponents
			
 
				+    Automatic multiple linear regression-based cost model. Application
			
 
				+    provides parameters, their combinations and exponents.
			
 
				 
			
 
				 \struct starpu_perfmodel_device
			
 
				 todo
			
 
				 \ingroup API_Performance_Model
			
 
				 \var enum starpu_worker_archtype starpu_perfmodel_device::type
			
 
				-is the type of the device
			
 
				+    type of the device
			
 
				 \var int starpu_perfmodel_device::devid
			
 
				-is the identifier of the precise device
			
 
				+    identifier of the precise device
			
 
				 \var int starpu_perfmodel_device::ncore
			
 
				-is the number of execution in parallel, minus 1
			
 
				+    number of execution in parallel, minus 1
			
 
				 
			
 
				 \struct starpu_perfmodel_arch
			
 
				 todo
			
 
				 \ingroup API_Performance_Model
			
 
				 \var int starpu_perfmodel_arch::ndevices
			
 
				-is the number of the devices for the given arch
			
 
				+    number of the devices for the given arch
			
 
				 \var struct starpu_perfmodel_device *starpu_perfmodel_arch::devices
			
 
				-is the list of the devices for the given arch
			
 
				+    list of the devices for the given arch
			
 
				 
			
 
				 \struct starpu_perfmodel
			
 
				-Contains all information about a performance model. At least the
			
 
				+Contain all information about a performance model. At least the
			
 
				 type and symbol fields have to be filled when defining a performance
			
 
				 model for a codelet. For compatibility, make sure to initialize the
			
 
				 whole structure to zero, either by using explicit memset, or by
			
@@ -59,161 +57,176 @@ letting the compiler implicitly do it in e.g. static storage case. If
 
				 not provided, other fields have to be zero.
			
 
				 \ingroup API_Performance_Model
			
 
				 \var enum starpu_perfmodel_type starpu_perfmodel::type
			
 
				-is the type of performance model
			
 
				-<ul>
			
 
				-<li>::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
			
 
				-::STARPU_NL_REGRESSION_BASED: No other fields needs to be provided,
			
 
				-this is purely history-based.
			
 
				-</li>
			
 
				-<li> ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields starpu_perfmodel::nparameters (number of different parameters),  starpu_perfmodel::ncombinations (number of parameters combinations-tuples) and table starpu_perfmodel::combinations which defines exponents of the equation. Function cl_perf_func also needs to define how to extract parameters from the task.
			
 
				-</li>
			
 
				-<li> ::STARPU_PER_ARCH: either field starpu_perfmodel::arch_cost_function has to be
			
 
				-filled with a function that returns the cost in micro-seconds on the arch given
			
 
				-as parameter, or field starpu_perfmodel::per_arch has to be
			
 
				-filled with functions which return the cost in micro-seconds.
			
 
				-</li>
			
 
				-<li> ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
			
 
				-filled with a function that returns the cost in micro-seconds on a
			
 
				-CPU, timing on other archs will be determined by multiplying by an
			
 
				-arch-specific factor.
			
 
				-</li>
			
 
				-</ul>
			
 
				+    type of performance model
			
 
				+    <ul>
			
 
				+    <li>
			
 
				+    ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
			
 
				+    ::STARPU_NL_REGRESSION_BASED: No other fields needs to be
			
 
				+    provided, this is purely history-based.
			
 
				+    </li>
			
 
				+    <li>
			
 
				+    ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields
			
 
				+    starpu_perfmodel::nparameters (number of different parameters),
			
 
				+    starpu_perfmodel::ncombinations (number of parameters
			
 
				+    combinations-tuples) and table starpu_perfmodel::combinations
			
 
				+    which defines exponents of the equation. Function cl_perf_func
			
 
				+    also needs to define how to extract parameters from the task. 
			
 
				+    </li>
			
 
				+    <li>
			
 
				+    ::STARPU_PER_ARCH: either field
			
 
				+    starpu_perfmodel::arch_cost_function has to be filled with a
			
 
				+    function that returns the cost in micro-seconds on the arch given
			
 
				+    as parameter, or field starpu_perfmodel::per_arch has to be filled
			
 
				+    with functions which return the cost in micro-seconds.
			
 
				+    </li>
			
 
				+    <li>
			
 
				+    ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
			
 
				+    filled with a function that returns the cost in micro-seconds on a
			
 
				+    CPU, timing on other archs will be determined by multiplying by an
			
 
				+    arch-specific factor.
			
 
				+    </li>
			
 
				+    </ul>
			
 
				 \var const char *starpu_perfmodel::symbol
			
 
				-is the symbol name for the performance model, which will be used as
			
 
				-file name to store the model. It must be set otherwise the model will
			
 
				-be ignored.
			
 
				+    symbol name for the performance model, which will be used as file
			
 
				+    name to store the model. It must be set otherwise the model will
			
 
				+    be ignored.
			
 
				 \var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl)
			
 
				-Used by ::STARPU_COMMON takes a task and implementation number, and
			
 
				-must return a task duration estimation in micro-seconds.
			
 
				+    Used by ::STARPU_COMMON. Take a task and implementation number,
			
 
				+    and must return a task duration estimation in micro-seconds.
			
 
				 \var double (*starpu_perfmodel::arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl)
			
 
				-Used by ::STARPU_COMMON takes a task, an arch and implementation number, and
			
 
				-must return a task duration estimation in micro-seconds on that arch.
			
 
				+    Used by ::STARPU_COMMON. Take a task, an arch and implementation
			
 
				+    number, and must return a task duration estimation in
			
 
				+    micro-seconds on that arch.
			
 
				 \var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl)
			
 
				-Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
			
 
				-::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, takes a task and
			
 
				-implementation number, and returns the size to be used as index to distinguish
			
 
				-histories and as a base for regressions.
			
 
				+    Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
			
 
				+    ::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, take a task and
			
 
				+    implementation number, and return the size to be used as index to
			
 
				+    distinguish histories and as a base for regressions.
			
 
				 \var uint32_t (*starpu_perfmodel::footprint)(struct starpu_task *)
			
 
				-Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, takes a task and returns the
			
 
				-footprint to be used as index to distinguish histories. The default is to use
			
 
				-the starpu_task_data_footprint() function.
			
 
				+    Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, take a task
			
 
				+    and return the footprint to be used as index to distinguish
			
 
				+    histories. The default is to use the starpu_task_data_footprint()
			
 
				+    function.
			
 
				 \var unsigned starpu_perfmodel::is_loaded
			
 
				 \private
			
 
				-Whether the performance model is already loaded from the disk.
			
 
				+    Whether the performance model is already loaded from the disk.
			
 
				 \var unsigned starpu_perfmodel::benchmarking
			
 
				 \private
			
 
				-todo
			
 
				+    todo
			
 
				 \var unsigned starpu_perfmodel::is_init
			
 
				-todo
			
 
				+    todo
			
 
				 \var starpu_perfmodel_state_t starpu_perfmodel::state
			
 
				 \private
			
 
				-todo
			
 
				+    todo
			
 
				 \var void (*starpu_perfmodel::parameters)(struct starpu_task * task, double *parameters);
			
 
				-todo
			
 
				+    todo
			
 
				 \var const char ** starpu_perfmodel::parameters_names
			
 
				 \private
			
 
				-Names of parameters used for multiple linear regression models (M, N, K)
			
 
				+    Names of parameters used for multiple linear regression models (M,
			
 
				+    N, K)
			
 
				 \var unsigned starpu_perfmodel::nparameters
			
 
				 \private
			
 
				-Number of parameters used for multiple linear regression models
			
 
				+    Number of parameters used for multiple linear regression models
			
 
				 \var unsigned ** starpu_perfmodel::combinations
			
 
				 \private
			
 
				-Table of combinations of parameters (and the exponents) used for multiple linear regression models
			
 
				+    Table of combinations of parameters (and the exponents) used for
			
 
				+    multiple linear regression models
			
 
				 \var unsigned starpu_perfmodel::ncombinations
			
 
				 \private
			
 
				-Number of combination of parameters used for multiple linear regression models
			
 
				-
			
 
				+    Number of combination of parameters used for multiple linear
			
 
				+    regression models
			
 
				 
			
 
				 \struct starpu_perfmodel_regression_model
			
 
				-...
			
 
				+todo
			
 
				 \ingroup API_Performance_Model
			
 
				 \var double starpu_perfmodel_regression_model::sumlny
			
 
				-sum of ln(measured)
			
 
				+    sum of ln(measured)
			
 
				 \var double starpu_perfmodel_regression_model::sumlnx
			
 
				-sum of ln(size)
			
 
				+    sum of ln(size)
			
 
				 \var double starpu_perfmodel_regression_model::sumlnx2
			
 
				-sum of ln(size)^2
			
 
				+    sum of ln(size)^2
			
 
				 \var unsigned long starpu_perfmodel_regression_model::minx
			
 
				-minimum size
			
 
				+    minimum size
			
 
				 \var unsigned long starpu_perfmodel_regression_model::maxx
			
 
				-maximum size
			
 
				+    maximum size
			
 
				 \var double starpu_perfmodel_regression_model::sumlnxlny
			
 
				-sum of ln(size)*ln(measured)
			
 
				+    sum of ln(size)*ln(measured)
			
 
				 \var double starpu_perfmodel_regression_model::alpha
			
 
				-estimated = alpha * size ^ beta
			
 
				+    estimated = alpha * size ^ beta
			
 
				 \var double starpu_perfmodel_regression_model::beta
			
 
				-estimated = alpha * size ^ beta
			
 
				+    estimated = alpha * size ^ beta
			
 
				 \var unsigned starpu_perfmodel_regression_model::valid
			
 
				-whether the linear regression model is valid (i.e. enough measures)
			
 
				+    whether the linear regression model is valid (i.e. enough measures)
			
 
				 \var double starpu_perfmodel_regression_model::a
			
 
				-estimated = a size ^b + c
			
 
				+    estimated = a size ^b + c
			
 
				 \var double starpu_perfmodel_regression_model::b
			
 
				-estimated = a size ^b + c
			
 
				+    estimated = a size ^b + c
			
 
				 \var double starpu_perfmodel_regression_model::c
			
 
				-estimated = a size ^b + c
			
 
				+    estimated = a size ^b + c
			
 
				 \var unsigned starpu_perfmodel_regression_model::nl_valid
			
 
				-whether the non-linear regression model is valid (i.e. enough measures)
			
 
				+    whether the non-linear regression model is valid (i.e. enough measures)
			
 
				 \var unsigned starpu_perfmodel_regression_model::nsample
			
 
				-number of sample values for non-linear regression
			
 
				+    number of sample values for non-linear regression
			
 
				 \var double starpu_perfmodel_regression_model::coeff[]
			
 
				-list of computed coefficients for multiple linear regression model
			
 
				+    list of computed coefficients for multiple linear regression model
			
 
				 \var double starpu_perfmodel_regression_model::ncoeff
			
 
				-number of coefficients for multiple linear regression model
			
 
				+    number of coefficients for multiple linear regression model
			
 
				 \var double starpu_perfmodel_regression_model::multi_valid
			
 
				-whether the multiple linear regression model is valid
			
 
				+    whether the multiple linear regression model is valid
			
 
				 
			
 
				 \struct starpu_perfmodel_per_arch
			
 
				 contains information about the performance model of a given
			
 
				 arch.
			
 
				 \ingroup API_Performance_Model
			
 
				 \var starpu_perfmodel_per_arch_cost_function starpu_perfmodel_per_arch::cost_function
			
 
				-Used by ::STARPU_PER_ARCH, must point to functions which take a task,
			
 
				-the target arch and implementation number (as mere conveniency, since
			
 
				-the array is already indexed by these), and must return a task
			
 
				-duration estimation in micro-seconds.
			
 
				+    Used by ::STARPU_PER_ARCH, must point to functions which take a
			
 
				+    task, the target arch and implementation number (as mere
			
 
				+    conveniency, since the array is already indexed by these), and
			
 
				+    must return a task duration estimation in micro-seconds.
			
 
				 \var starpu_perfmodel_per_arch_size_base starpu_perfmodel_per_arch::size_base
			
 
				-Same as in structure starpu_perfmodel, but per-arch, in case it
			
 
				-depends on the architecture-specific implementation.
			
 
				+    Same as in structure starpu_perfmodel, but per-arch, in case it
			
 
				+    depends on the architecture-specific implementation.
			
 
				 \var struct starpu_perfmodel_history_table *starpu_perfmodel_per_arch::history
			
 
				 \private
			
 
				-The history of performance measurements.
			
 
				+    The history of performance measurements.
			
 
				 \var struct starpu_perfmodel_history_list *starpu_perfmodel_per_arch::list
			
 
				 \private
			
 
				-Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and ::STARPU_MULTIPLE_REGRESSION_BASED,
			
 
				-records all execution history measures.
			
 
				+    Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and
			
 
				+    ::STARPU_MULTIPLE_REGRESSION_BASED, records all execution history
			
 
				+    measures.
			
 
				 \var struct starpu_perfmodel_regression_model starpu_perfmodel_per_arch::regression
			
 
				 \private
			
 
				-Used by ::STARPU_REGRESSION_BASED, 
			
 
				-::STARPU_NL_REGRESSION_BASED and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated factors of the
			
 
				-regression.
			
 
				+    Used by ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED
			
 
				+    and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated
			
 
				+    factors of the regression.
			
 
				 
			
 
				 \struct starpu_perfmodel_history_list
			
 
				 todo
			
 
				 \ingroup API_Performance_Model
			
 
				 \var struct starpu_perfmodel_history_list *starpu_perfmodel_history_list::next
			
 
				-todo
			
 
				+    todo
			
 
				 \var struct starpu_perfmodel_history_entry *starpu_perfmodel_history_list::entry
			
 
				-todo
			
 
				+    todo
			
 
				 
			
 
				 \struct starpu_perfmodel_history_entry
			
 
				 todo
			
 
				 \ingroup API_Performance_Model
			
 
				 \var double starpu_perfmodel_history_entry::mean
			
 
				-mean_n = 1/n sum
			
 
				+    mean_n = 1/n sum
			
 
				 \var double starpu_perfmodel_history_entry::deviation
			
 
				-n dev_n = sum2 - 1/n (sum)^2
			
 
				+    n dev_n = sum2 - 1/n (sum)^2
			
 
				 \var double starpu_perfmodel_history_entry::sum
			
 
				-sum of samples (in µs)
			
 
				+    sum of samples (in µs)
			
 
				 \var double starpu_perfmodel_history_entry::sum2
			
 
				-sum of samples^2
			
 
				+    sum of samples^2
			
 
				 \var unsigned starpu_perfmodel_history_entry::nsample
			
 
				-number of samples
			
 
				+    number of samples
			
 
				 \var uint32_t starpu_perfmodel_history_entry::footprint
			
 
				-data footprint
			
 
				+    data footprint
			
 
				 \var size_t starpu_perfmodel_history_entry::size
			
 
				-in bytes
			
 
				+    in bytes
			
 
				 \var double starpu_perfmodel_history_entry::flops
			
 
				-Provided by the application
			
 
				+    Provided by the application
			
 
				 
			
 
				 \fn void starpu_perfmodel_init(struct starpu_perfmodel *model)
			
 
				 \ingroup API_Performance_Model
			
@@ -221,31 +234,31 @@ todo
 
				 
			
 
				 \fn void starpu_perfmodel_free_sampling_directories(void)
			
 
				 \ingroup API_Performance_Model
			
 
				-this function frees internal memory used for sampling directory
			
 
				+Free internal memory used for sampling directory
			
 
				 management. It should only be called by an application which is not
			
 
				 calling starpu_shutdown() as this function already calls it. See for
			
 
				 example <c>tools/starpu_perfmodel_display.c</c>.
			
 
				 
			
 
				 \fn int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model)
			
 
				 \ingroup API_Performance_Model
			
 
				-loads the performance model found in the given file. The model structure has to be
			
 
				+Load the performance model found in the file named \p filename. \p model has to be
			
 
				 completely zero, and will be filled with the information stored in the given file.
			
 
				 
			
 
				 \fn int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
			
 
				 \ingroup API_Performance_Model
			
 
				-loads a given performance model. The model structure has to be
			
 
				-completely zero, and will be filled with the information saved in
			
 
				+Load a given performance model. \p model has to be
			
 
				+completely zero, and will be filled with the information stored in
			
 
				 <c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
			
 
				-external tools that should read the performance model files.
			
 
				+external tools that want to read the performance model files.
			
 
				 
			
 
				 \fn int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
			
 
				 \ingroup API_Performance_Model
			
 
				-unloads the given model which has been previously loaded
			
 
				+Unload \p model which has been previously loaded
			
 
				 through the function starpu_perfmodel_load_symbol()
			
 
				 
			
 
				 \fn void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl)
			
 
				 \ingroup API_Performance_Model
			
 
				-returns the path to the debugging information for the performance model.
			
 
				+Return the path to the debugging information for the performance model.
			
 
				 
			
 
				 \fn char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
			
 
				 \ingroup API_Performance_Model
			
@@ -253,19 +266,19 @@ todo
 
				 
			
 
				 \fn void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl)
			
 
				 \ingroup API_Performance_Model
			
 
				-returns the architecture name for \p arch
			
 
				+Return the architecture name for \p arch
			
 
				 
			
 
				 \fn struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id)
			
 
				 \ingroup API_Performance_Model
			
 
				-returns the architecture type of a given worker.
			
 
				+Return the architecture type of the worker \p workerid.
			
 
				 
			
 
				 \fn int starpu_perfmodel_list(FILE *output)
			
 
				 \ingroup API_Performance_Model
			
 
				-prints a list of all performance models on \p output
			
 
				+Print a list of all performance models on \p output
			
 
				 
			
 
				 \fn void starpu_perfmodel_directory(FILE *output)
			
 
				 \ingroup API_Performance_Model
			
 
				-prints the directory name storing performance models on \p output
			
 
				+Print the directory name storing performance models on \p output
			
 
				 
			
 
				 \fn void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
			
 
				 \ingroup API_Performance_Model
			
@@ -281,23 +294,23 @@ todo
 
				 
			
 
				 \fn void starpu_bus_print_bandwidth(FILE *f)
			
 
				 \ingroup API_Performance_Model
			
 
				-prints a matrix of bus bandwidths on \p f.
			
 
				+Print a matrix of bus bandwidths on \p f.
			
 
				 
			
 
				 \fn void starpu_bus_print_affinity(FILE *f)
			
 
				 \ingroup API_Performance_Model
			
 
				-prints the affinity devices on \p f.
			
 
				+Print the affinity devices on \p f.
			
 
				 
			
 
				 \fn void starpu_bus_print_filenames(FILE *f)
			
 
				 \ingroup API_Performance_Model
			
 
				-prints on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
			
 
				+Print on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
			
 
				 
			
 
				 \fn void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
			
 
				 \ingroup API_Performance_Model
			
 
				-This feeds the performance model model with an explicit
			
 
				+Feed the performance model model with an explicit
			
 
				 measurement measured (in µs), in addition to measurements done by StarPU
			
 
				 itself. This can be useful when the application already has an
			
 
				 existing set of measurements done in good conditions, that StarPU
			
 
				-could benefit from instead of doing on-line measurements. And example
			
 
				+could benefit from instead of doing on-line measurements. An example
			
 
				 of use can be seen in \ref PerformanceModelExample.
			
 
				 
			
 
				 \fn double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node)
			
--- a/doc/doxygen/chapters/api/profiling.doxy
+++ b/doc/doxygen/chapters/api/profiling.doxy
@@ -14,52 +14,52 @@ This structure contains information about the execution of a
 
				 task. It is accessible from the field starpu_task::profiling_info if
			
 
				 profiling was enabled.
			
 
				 \var struct timespec starpu_profiling_task_info::submit_time
			
 
				-Date of task submission (relative to the initialization of StarPU).
			
 
				+    Date of task submission (relative to the initialization of StarPU).
			
 
				 
			
 
				 \var struct timespec starpu_profiling_task_info::push_start_time
			
 
				-Time when the task was submitted to the scheduler.
			
 
				+    Time when the task was submitted to the scheduler.
			
 
				 
			
 
				 \var struct timespec starpu_profiling_task_info::push_end_time
			
 
				-Time when the scheduler finished with the task submission.
			
 
				+    Time when the scheduler finished with the task submission.
			
 
				 
			
 
				 \var struct timespec starpu_profiling_task_info::pop_start_time
			
 
				-Time when the scheduler started to be requested for a task, and eventually gave that task.
			
 
				+    Time when the scheduler started to be requested for a task, and eventually gave that task.
			
 
				 
			
 
				 \var struct timespec starpu_profiling_task_info::pop_end_time
			
 
				-Time when the scheduler finished providing the task for execution.
			
 
				+    Time when the scheduler finished providing the task for execution.
			
 
				 
			
 
				 \var struct timespec starpu_profiling_task_info::acquire_data_start_time
			
 
				-Time when the worker started fetching input data.
			
 
				+    Time when the worker started fetching input data.
			
 
				 
			
 
				 \var struct timespec starpu_profiling_task_info::acquire_data_end_time
			
 
				-Time when the worker finished fetching input data.
			
 
				+    Time when the worker finished fetching input data.
			
 
				 
			
 
				 \var struct timespec starpu_profiling_task_info::start_time
			
 
				-Date of task execution beginning (relative to the initialization of StarPU).
			
 
				+    Date of task execution beginning (relative to the initialization of StarPU).
			
 
				 
			
 
				 \var struct timespec starpu_profiling_task_info::end_time
			
 
				-Date of task execution termination (relative to the initialization of StarPU).
			
 
				+    Date of task execution termination (relative to the initialization of StarPU).
			
 
				 
			
 
				 \var struct timespec starpu_profiling_task_info::release_data_start_time
			
 
				-Time when the worker started releasing data.
			
 
				+    Time when the worker started releasing data.
			
 
				 
			
 
				 \var struct timespec starpu_profiling_task_info::release_data_end_time
			
 
				-Time when the worker finished releasing data.
			
 
				+    Time when the worker finished releasing data.
			
 
				 
			
 
				 \var struct timespec starpu_profiling_task_info::callback_start_time
			
 
				-Time when the worker started the application callback for the task.
			
 
				+    Time when the worker started the application callback for the task.
			
 
				 
			
 
				 \var struct timespec starpu_profiling_task_info::callback_end_time
			
 
				-Time when the worker finished the application callback for the task.
			
 
				+    Time when the worker finished the application callback for the task.
			
 
				 
			
 
				 \var int starpu_profiling_task_info::workerid
			
 
				-Identifier of the worker which has executed the task.
			
 
				+    Identifier of the worker which has executed the task.
			
 
				 
			
 
				 \var uint64_t starpu_profiling_task_info::used_cycles
			
 
				-Number of cycles used by the task, only available in the MoviSim
			
 
				+    Number of cycles used by the task, only available in the MoviSim
			
 
				 
			
 
				 \var uint64_t starpu_profiling_task_info::stall_cycles
			
 
				-Number of cycles stalled within the task, only available in the MoviSim
			
 
				+    Number of cycles stalled within the task, only available in the MoviSim
			
 
				 
			
 
				 \var double starpu_profiling_task_info::energy_consumed
			
 
				 Energy consumed by the task, only available in the MoviSim
			
@@ -100,18 +100,16 @@ todo
 
				 
			
 
				 \typedef STARPU_PROFILING_DISABLE
			
 
				 \ingroup API_Profiling
			
 
				-This value is used when calling the function
			
 
				-starpu_profiling_status_set() to disable profiling.
			
 
				+Used when calling the function starpu_profiling_status_set() to disable profiling.
			
 
				 
			
 
				 \typedef STARPU_PROFILING_ENABLE
			
 
				 \ingroup API_Profiling
			
 
				-This value is used when calling the function
			
 
				-starpu_profiling_status_set() to enable profiling.
			
 
				+Used when calling the function starpu_profiling_status_set() to enable profiling.
			
 
				 
			
 
				 \fn int starpu_profiling_status_set(int status)
			
 
				 \ingroup API_Profiling
			
 
				-This function sets the profiling status. Profiling is activated
			
 
				-by passing \ref STARPU_PROFILING_ENABLE in status. Passing
			
 
				+Set the profiling status. Profiling is activated
			
 
				+by passing \ref STARPU_PROFILING_ENABLE in \p status. Passing
			
 
				 \ref STARPU_PROFILING_DISABLE disables profiling. Calling this function
			
 
				 resets all profiling measurements. When profiling is enabled, the
			
 
				 field starpu_task::profiling_info points to a valid structure
			
@@ -126,13 +124,12 @@ there was an error.
 
				 
			
 
				 \fn void starpu_profiling_init(void)
			
 
				 \ingroup API_Profiling
			
 
				-This function resets performance counters and enable profiling if the
			
 
				+Reset performance counters and enable profiling if the
			
 
				 environment variable \ref STARPU_PROFILING is set to a positive value.
			
 
				 
			
 
				 \fn void starpu_profiling_set_id(int new_id)
			
 
				 \ingroup API_Profiling
			
 
				-This function sets the ID used for profiling trace filename. It
			
 
				-needs to be called before starpu_init().
			
 
				+Set the ID used for profiling trace filename. HAS to be called before starpu_init().
			
 
				 
			
 
				 \fn int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *worker_info)
			
 
				 \ingroup API_Profiling
			
@@ -164,21 +161,21 @@ Return the destination point of bus \p busid
 
				 
			
 
				 \fn double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end)
			
 
				 \ingroup API_Profiling
			
 
				-Returns the time elapsed between \p start and \p end in microseconds.
			
 
				+Return the time elapsed between \p start and \p end in microseconds.
			
 
				 
			
 
				 \fn double starpu_timing_timespec_to_us(struct timespec *ts)
			
 
				 \ingroup API_Profiling
			
 
				-Converts the given timespec \p ts into microseconds
			
 
				+Convert the given timespec \p ts into microseconds
			
 
				 
			
 
				 \fn void starpu_profiling_bus_helper_display_summary(void)
			
 
				 \ingroup API_Profiling
			
 
				-Displays statistics about the bus on stderr. if the environment
			
 
				+Display statistics about the bus on \c stderr. if the environment
			
 
				 variable \ref STARPU_BUS_STATS is defined. The function is called
			
 
				 automatically by starpu_shutdown().
			
 
				 
			
 
				 \fn void starpu_profiling_worker_helper_display_summary(void)
			
 
				 \ingroup API_Profiling
			
 
				-Displays statistics about the workers on stderr if the
			
 
				+Displays statistic about the workers on \c stderr if the
			
 
				 environment variable \ref STARPU_WORKER_STATS is defined. The function is
			
 
				 called automatically by starpu_shutdown().
			
 
				 
			
--- a/doc/doxygen/chapters/api/running_driver.doxy
+++ b/doc/doxygen/chapters/api/running_driver.doxy
@@ -12,10 +12,10 @@
 
				 structure for a driver
			
 
				 \ingroup API_Running_Drivers
			
 
				 \var enum starpu_worker_archtype starpu_driver::type
			
 
				-The type of the driver. Only ::STARPU_CPU_WORKER,
			
 
				-::STARPU_CUDA_WORKER and ::STARPU_OPENCL_WORKER are currently supported.
			
 
				+    Type of the driver. Only ::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER
			
 
				+    and ::STARPU_OPENCL_WORKER are currently supported.
			
 
				 \var union starpu_driver::id
			
 
				-The identifier of the driver.
			
 
				+    Identifier of the driver.
			
 
				 
			
 
				 \fn int starpu_driver_run(struct starpu_driver *d)
			
 
				 \ingroup API_Running_Drivers
			
@@ -30,8 +30,8 @@ and eventually starpu_driver_deinit().
 
				 
			
 
				 \fn int starpu_driver_init(struct starpu_driver *d)
			
 
				 \ingroup API_Running_Drivers
			
 
				-Initialize the given driver. Returns 0 on success, <c>-EINVAL</c> if
			
 
				-starpu_driver::type is not a valid ::starpu_worker_archtype.
			
 
				+Initialize the given driver. Returns 0 on success, <c>-EINVAL</c>
			
 
				+if starpu_driver::type is not a valid ::starpu_worker_archtype.
			
 
				 
			
 
				 \fn int starpu_driver_run_once(struct starpu_driver *d)
			
 
				 \ingroup API_Running_Drivers
			
--- a/doc/doxygen/chapters/api/scc_extensions.doxy
+++ b/doc/doxygen/chapters/api/scc_extensions.doxy
@@ -10,12 +10,12 @@
 
				 
			
 
				 \def STARPU_USE_SCC
			
 
				 \ingroup API_SCC_Extensions
			
 
				-This macro is defined when StarPU has been installed with SCC support.
			
 
				+Defined when StarPU has been installed with SCC support.
			
 
				 It should be used in your code to detect the availability of SCC.
			
 
				 
			
 
				 \def STARPU_MAXSCCDEVS
			
 
				 \ingroup API_SCC_Extensions
			
 
				-This macro defines the maximum number of SCC devices that are
			
 
				+Define the maximum number of SCC devices that are
			
 
				 supported by StarPU.
			
 
				 
			
 
				 \typedef starpu_scc_func_symbol_t
			
--- a/doc/doxygen/chapters/api/scheduling_contexts.doxy
+++ b/doc/doxygen/chapters/api/scheduling_contexts.doxy
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * This file is part of the StarPU Handbook.
			
 
				  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2011, 2012 INRIA
			
 
				  * Copyright (C) 2016 Uppsala University
			
 
				  * See the file version.doxy for copying conditions.
			
@@ -22,17 +22,17 @@ Performance counters used by the starpu to indicate the
 
				 hypervisor how the application and the resources are executing.
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				 \var void (*starpu_sched_ctx_performance_counters::notify_idle_cycle)(unsigned sched_ctx_id, int worker, double idle_time)
			
 
				-        Informs the hypervisor for how long a worker has been idle in the specified context
			
 
				+        Inform the hypervisor for how long a worker has been idle in the specified context
			
 
				 \var void (*starpu_sched_ctx_performance_counters::notify_pushed_task)(unsigned sched_ctx_id, int worker)
			
 
				-        Notifies the hypervisor that a task has been scheduled on the queue of the worker corresponding to the specified context
			
 
				+        Notify the hypervisor that a task has been scheduled on the queue of the worker corresponding to the specified context
			
 
				 \var void (*starpu_sched_ctx_performance_counters::notify_poped_task)(unsigned sched_ctx_id, int worker)
			
 
				-        Informs the hypervisor that a task executing a specified number of instructions has been poped from the worker
			
 
				+        Inform the hypervisor that a task executing a specified number of instructions has been poped from the worker
			
 
				 \var void (*starpu_sched_ctx_performance_counters::notify_post_exec_task)(struct starpu_task *task, size_t data_size, uint32_t footprint, int hypervisor_tag, double flops)
			
 
				-        Notifies the hypervisor that a task has just been executed
			
 
				+        Notify the hypervisor that a task has just been executed
			
 
				 \var void (*starpu_sched_ctx_performance_counters::notify_submitted_job)(struct starpu_task *task, uint32_t footprint, size_t data_size)
			
 
				-        Notifies the hypervisor that a task has just been submitted
			
 
				+        Notify the hypervisor that a task has just been submitted
			
 
				 \var void (*starpu_sched_ctx_performance_counters::notify_delete_context)(unsigned sched_ctx)
			
 
				-        Notifies the hypervisor that the context was deleted
			
 
				+        Notify the hypervisor that the context was deleted
			
 
				 
			
 
				 
			
 
				 @name Scheduling Contexts Basic API
			
@@ -45,7 +45,7 @@ modified at configure by using the option \ref enable-max-sched-ctxs "--enable-m
 
				 
			
 
				 \fn unsigned starpu_sched_ctx_create(int *workerids_ctx, int nworkers_ctx, const char *sched_ctx_name, ...)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This function creates a scheduling context with the given parameters
			
 
				+Create a scheduling context with the given parameters
			
 
				 (see below) and assigns the workers in \p workerids_ctx to execute the
			
 
				 tasks submitted to it. The return value represents the identifier of
			
 
				 the context that has just been created. It will be further used to
			
@@ -79,47 +79,47 @@ to a custom user data structure, to be retrieved by \ref starpu_sched_ctx_get_us
 
				 
			
 
				 \def STARPU_SCHED_CTX_POLICY_NAME
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This macro is used when calling starpu_sched_ctx_create() to specify a
			
 
				+Used when calling starpu_sched_ctx_create() to specify a
			
 
				 name for a scheduling policy
			
 
				 
			
 
				 \def STARPU_SCHED_CTX_POLICY_STRUCT
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This macro is used when calling starpu_sched_ctx_create() to specify a
			
 
				+Used when calling starpu_sched_ctx_create() to specify a
			
 
				 pointer to a scheduling policy
			
 
				 
			
 
				 \def STARPU_SCHED_CTX_POLICY_MIN_PRIO
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This macro is used when calling starpu_sched_ctx_create() to specify a
			
 
				+Used when calling starpu_sched_ctx_create() to specify a
			
 
				 minimum scheduler priority value.
			
 
				 
			
 
				 \def STARPU_SCHED_CTX_POLICY_MAX_PRIO
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This macro is used when calling starpu_sched_ctx_create() to specify a
			
 
				+Used when calling starpu_sched_ctx_create() to specify a
			
 
				 maximum scheduler priority value.
			
 
				 
			
 
				 \def STARPU_SCHED_CTX_AWAKE_WORKERS
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This macro is used when calling starpu_sched_ctx_create() to specify a
			
 
				+Used when calling starpu_sched_ctx_create() to specify a
			
 
				 pointer to a scheduling policy
			
 
				 
			
 
				 \def STARPU_SCHED_CTX_POLICY_INIT
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This macro is used when calling starpu_sched_ctx_create() to specify a
			
 
				+Used when calling starpu_sched_ctx_create() to specify a
			
 
				 function pointer allowing to initialize the scheduling policy.
			
 
				 
			
 
				 \def STARPU_SCHED_CTX_USER_DATA
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This macro is used when calling starpu_sched_ctx_create() to specify a
			
 
				+Used when calling starpu_sched_ctx_create() to specify a
			
 
				 pointer to some user data related to the context being created.
			
 
				 
			
 
				 \def STARPU_SCHED_CTX_SUB_CTXS
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This macro is used when calling starpu_sched_ctx_create() to specify 
			
 
				+Used when calling starpu_sched_ctx_create() to specify
			
 
				 a list of sub contextes of the current context.
			
 
				 
			
 
				 \def STARPU_SCHED_CTX_CUDA_NSMS
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This macro is used when calling starpu_sched_ctx_create() in order
			
 
				+Used when calling starpu_sched_ctx_create() in order
			
 
				 to create a context on the NVIDIA GPU to specify the number of SMs
			
 
				 the context should have
			
 
				 
			
@@ -129,24 +129,24 @@ Create a context indicating an approximate interval of resources
 
				 
			
 
				 \fn void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void* args), void *args)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-Execute the callback whenever the last task of the context finished executing, it is called with the pramaters: sched_ctx and any other paramter needed
			
 
				-by the application (packed in a void*)
			
 
				+Execute the callback whenever the last task of the context finished executing, it is called with the parameters \p sched_ctx and any other parameter needed
			
 
				+by the application (packed in \p args)
			
 
				 
			
 
				 \fn void starpu_sched_ctx_add_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This function adds dynamically the workers in \p workerids_ctx to the
			
 
				+Add dynamically the workers in \p workerids_ctx to the
			
 
				 context \p sched_ctx_id. The last argument cannot be greater than
			
 
				 \ref STARPU_NMAX_SCHED_CTXS.
			
 
				 
			
 
				 \fn void starpu_sched_ctx_remove_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This function removes the workers in \p workerids_ctx from the context
			
 
				+Remove the workers in \p workerids_ctx from the context
			
 
				 \p sched_ctx_id. The last argument cannot be greater than
			
 
				 STARPU_NMAX_SCHED_CTXS.
			
 
				 
			
 
				 \fn void starpu_sched_ctx_display_workers(unsigned sched_ctx_id, FILE *f)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-This function prints on the file \p f the worker names belonging to the context \p sched_ctx_id
			
 
				+Print on the file \p f the worker names belonging to the context \p sched_ctx_id
			
 
				 
			
 
				 \fn void starpu_sched_ctx_delete(unsigned sched_ctx_id)
			
 
				 \ingroup API_Scheduling_Contexts
			
@@ -181,13 +181,13 @@ possible.
 
				 
			
 
				 \fn unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-Returns the list of workers in the array \p workerids, the returned value is the 
			
 
				+Return the list of workers in the array \p workerids, the returned value is the
			
 
				 number of workers. The user should free the \p workerids table after finishing
			
 
				 using it (it is allocated inside the function with the proper size)
			
 
				 
			
 
				 \fn unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-Returns the list of workers in the array \p workerids, the returned value is the 
			
 
				+Return the list of workers in the array \p workerids, the returned value is the
			
 
				 number of workers. This list is provided in raw order, i.e. not sorted by tree or list order,
			
 
				 and the user should not free the \p workerids table.
			
 
				 This function is thus much less costly than starpu_sched_ctx_get_workers_list.
			
@@ -200,7 +200,7 @@ blocked)
 
				 
			
 
				 \fn unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-    Return the number of workers shared by two contexts.
			
 
				+Return the number of workers shared by two contexts.
			
 
				 
			
 
				 \fn unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id)
			
 
				 \ingroup API_Scheduling_Contexts
			
@@ -234,7 +234,7 @@ statically allocate tasks with a default priority.
 
				 
			
 
				 \fn int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-Defines the minimum task priority level supported by the scheduling
			
 
				+Define the minimum task priority level supported by the scheduling
			
 
				 policy of the given scheduler context. The default minimum priority
			
 
				 level is the same as the default priority level which is 0 by
			
 
				 convention. The application may access that value by calling the function
			
@@ -244,7 +244,7 @@ should not be used directly from the application.
 
				 
			
 
				 \fn int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-Defines the maximum priority level supported by the scheduling policy
			
 
				+Define the maximum priority level supported by the scheduling policy
			
 
				 of the given scheduler context. The default maximum priority level is
			
 
				 1. The application may access that value by calling the
			
 
				 starpu_sched_ctx_get_max_priority function. This function should only
			
@@ -253,12 +253,12 @@ should not be used directly from the application.
 
				 
			
 
				 \fn int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-Returns the current minimum priority level supported by the scheduling
			
 
				+Return the current minimum priority level supported by the scheduling
			
 
				 policy of the given scheduler context.
			
 
				 
			
 
				 \fn int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-Returns the current maximum priority level supported by the scheduling
			
 
				+Return the current maximum priority level supported by the scheduling
			
 
				 policy of the given scheduler context.
			
 
				 
			
 
				 \fn int starpu_sched_ctx_min_priority_is_set(unsigned sched_ctx_id)
			
@@ -294,7 +294,7 @@ Return the worker collection managed by the indicated context
 
				 
			
 
				 \fn void starpu_sched_ctx_set_perf_counters(unsigned sched_ctx_id, void *perf_counters)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-Indicates to starpu the pointer to the performance counter
			
 
				+Indicate to starpu the pointer to the performance counter
			
 
				 
			
 
				 \fn void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id)
			
 
				 \ingroup API_Scheduling_Contexts
			
@@ -316,12 +316,12 @@ additional condition variables) the context
 
				 
			
 
				 \fn void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-Return the scheduling policy data (private information of the scheduler) of the contexts previously 
			
 
				+Return the scheduling policy data (private information of the scheduler) of the contexts previously
			
 
				 assigned to.
			
 
				 
			
 
				 \fn void *starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void *param, unsigned sched_ctx_id)
			
 
				 \ingroup API_Scheduling_Contexts
			
 
				-execute any parallel code on the workers of the sched_ctx (workers are blocked)
			
 
				+Execute any parallel code on the workers of the sched_ctx (workers are blocked)
			
 
				 
			
 
				 \fn int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id)
			
 
				 \ingroup API_Scheduling_Contexts
			
--- a/doc/doxygen/chapters/api/scheduling_policy.doxy
+++ b/doc/doxygen/chapters/api/scheduling_policy.doxy
@@ -20,7 +20,7 @@ configure by using the option \ref enable-maximplementations "--enable-maximplem
 
				 
			
 
				 \struct starpu_sched_policy
			
 
				 \ingroup API_Scheduling_Policy
			
 
				-This structure contains all the methods that implement a
			
 
				+Contain all the methods that implement a
			
 
				 scheduling policy. An application may specify which scheduling
			
 
				 strategy in the field starpu_conf::sched_policy passed to the function
			
 
				 starpu_init().
			
@@ -30,9 +30,9 @@ For each task going through the scheduler, the following methods get called in t
 
				 <ul>
			
 
				 <li>starpu_sched_policy::submit_hook when the task is submitted</li>
			
 
				 <li>starpu_sched_policy::push_task when the task becomes ready. The scheduler is here <b>given</b> the task</li>
			
 
				-<li>starpu_sched_policy::pop_task when a worker is idle. The scheduler here <b>gives</b> back the task to the core</li>
			
 
				+<li>starpu_sched_policy::pop_task when the worker is idle. The scheduler here <b>gives</b> back the task to the core</li>
			
 
				 <li>starpu_sched_policy::pre_exec_hook right before the worker actually starts the task computation (after transferring any missing data).</li>
			
 
				-<li>starpu_sched_policy::post_exec_hook right after the worker actually completed the task computation.</li>
			
 
				+<li>starpu_sched_policy::post_exec_hook right after the worker actually completes the task computation.</li>
			
 
				 </ul>
			
 
				 
			
 
				 For each task not going through the scheduler (because starpu_task::execute_on_a_specific_worker was set), these get called:
			
@@ -41,7 +41,7 @@ For each task not going through the scheduler (because starpu_task::execute_on_a
 
				 <li>starpu_sched_policy::submit_hook when the task is submitted</li>
			
 
				 <li>starpu_sched_policy::push_task_notify when the task becomes ready. This is just a notification, the scheduler does not have to do anything about the task.</li>
			
 
				 <li>starpu_sched_policy::pre_exec_hook right before the worker actually starts the task computation (after transferring any missing data).</li>
			
 
				-<li>starpu_sched_policy::post_exec_hook right after the worker actually completed the task computation.</li>
			
 
				+<li>starpu_sched_policy::post_exec_hook right after the worker actually completes the task computation.</li>
			
 
				 </ul>
			
 
				 
			
 
				 
			
@@ -110,7 +110,7 @@ block and wake up all workers.
 
				 \fn int starpu_sched_set_min_priority(int min_prio)
			
 
				 \ingroup API_Scheduling_Policy
			
 
				 TODO: check if this is correct
			
 
				-Defines the minimum task priority level supported by the scheduling
			
 
				+Define the minimum task priority level supported by the scheduling
			
 
				 policy. The default minimum priority level is the same as the default
			
 
				 priority level which is 0 by convention. The application may access
			
 
				 that value by calling the function starpu_sched_get_min_priority().
			
@@ -121,7 +121,7 @@ application.
 
				 \fn int starpu_sched_set_max_priority(int max_prio)
			
 
				 \ingroup API_Scheduling_Policy
			
 
				 TODO: check if this is correct
			
 
				-Defines the maximum priority level supported by the scheduling policy.
			
 
				+Define the maximum priority level supported by the scheduling policy.
			
 
				 The default maximum priority level is 1. The application may access
			
 
				 that value by calling the function starpu_sched_get_max_priority().
			
 
				 This function should only be called from the initialization method of
			
@@ -131,13 +131,13 @@ application.
 
				 \fn int starpu_sched_get_min_priority(void)
			
 
				 \ingroup API_Scheduling_Policy
			
 
				 TODO: check if this is correct
			
 
				-Returns the current minimum priority level supported by the scheduling
			
 
				+Return the current minimum priority level supported by the scheduling
			
 
				 policy
			
 
				 
			
 
				 \fn int starpu_sched_get_max_priority(void)
			
 
				 \ingroup API_Scheduling_Policy
			
 
				 TODO: check if this is correct
			
 
				-Returns the current maximum priority level supported by the scheduling
			
 
				+Return the current maximum priority level supported by the scheduling
			
 
				 policy
			
 
				 
			
 
				 \fn int starpu_push_local_task(int workerid, struct starpu_task *task, int back)
			
@@ -150,7 +150,7 @@ Setting \p back to 0 therefore ensures a FIFO ordering.
 
				 
			
 
				 \fn int starpu_push_task_end(struct starpu_task *task)
			
 
				 \ingroup API_Scheduling_Policy
			
 
				-This function must be called by a scheduler to notify that the given
			
 
				+Must be called by a scheduler to notify that the given
			
 
				 task has just been pushed.
			
 
				 
			
 
				 \fn int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
			
@@ -181,24 +181,24 @@ check for at least one implementation without determining which.
 
				 
			
 
				 \fn uint32_t starpu_task_footprint(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
			
 
				 \ingroup API_Scheduling_Policy
			
 
				-Returns the footprint for a given task, taking into account user-provided
			
 
				+Return the footprint for a given task, taking into account user-provided
			
 
				 perfmodel footprint or size_base functions.
			
 
				 
			
 
				 \fn uint32_t starpu_task_data_footprint(struct starpu_task *task)
			
 
				 \ingroup API_Scheduling_Policy
			
 
				-Returns the raw footprint for the data of a given task (without taking into account user-provided functions).
			
 
				+Return the raw footprint for the data of a given task (without taking into account user-provided functions).
			
 
				 
			
 
				 \fn double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
			
 
				 \ingroup API_Scheduling_Policy
			
 
				-Returns expected task duration in micro-seconds.
			
 
				+Return expected task duration in micro-seconds.
			
 
				 
			
 
				 \fn double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch *perf_arch)
			
 
				 \ingroup API_Scheduling_Policy
			
 
				-Returns an estimated speedup factor relative to CPU speed
			
 
				+Return an estimated speedup factor relative to CPU speed
			
 
				 
			
 
				 \fn double starpu_task_expected_data_transfer_time(unsigned memory_node, struct starpu_task *task)
			
 
				 \ingroup API_Scheduling_Policy
			
 
				-Returns expected data transfer time in micro-seconds.
			
 
				+Return expected data transfer time in micro-seconds.
			
 
				 
			
 
				 \fn double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned memory_node, enum starpu_data_access_mode mode)
			
 
				 \ingroup API_Scheduling_Policy
			
@@ -206,11 +206,11 @@ Predict the transfer time (in micro-seconds) to move \p handle to a memory node
 
				 
			
 
				 \fn double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
			
 
				 \ingroup API_Scheduling_Policy
			
 
				-Returns expected energy consumption in J
			
 
				+Return expected energy consumption in J
			
 
				 
			
 
				 \fn double starpu_task_expected_conversion_time(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
			
 
				 \ingroup API_Scheduling_Policy
			
 
				-Returns expected conversion time in ms (multiformat interface only)
			
 
				+Return expected conversion time in ms (multiformat interface only)
			
 
				 
			
 
				 \fn int starpu_get_prefetch_flag(void)
			
 
				 \ingroup API_Scheduling_Policy
			
--- a/doc/doxygen/chapters/api/standard_memory_library.doxy
+++ b/doc/doxygen/chapters/api/standard_memory_library.doxy
@@ -60,20 +60,20 @@ and write to normally, but get bogus values.
 
				 
			
 
				 \fn int starpu_malloc_flags(void **A, size_t dim, int flags)
			
 
				 \ingroup API_Standard_Memory_Library
			
 
				-Performs a memory allocation based on the constraints defined
			
 
				+Perform a memory allocation based on the constraints defined
			
 
				 by the given flag.
			
 
				 
			
 
				 \fn void starpu_malloc_set_align(size_t align)
			
 
				 \ingroup API_Standard_Memory_Library
			
 
				-This function sets an alignment constraints for starpu_malloc()
			
 
				+Set an alignment constraints for starpu_malloc()
			
 
				 allocations. \p align must be a power of two. This is for instance called
			
 
				 automatically by the OpenCL driver to specify its own alignment
			
 
				 constraints.
			
 
				 
			
 
				 \fn int starpu_malloc(void **A, size_t dim)
			
 
				 \ingroup API_Standard_Memory_Library
			
 
				-This function allocates data of the given size \p dim in main memory, and
			
 
				-returns the pointer to the allocated data through \p A.
			
 
				+Allocate data of the given size \p dim in main memory, and
			
 
				+return the pointer to the allocated data through \p A.
			
 
				 It will also try to pin it in CUDA or OpenCL, so that data transfers
			
 
				 from this buffer can be asynchronous, and thus permit data transfer
			
 
				 and computation overlapping. The allocated buffer must be freed thanks
			
@@ -81,24 +81,23 @@ to the starpu_free() function.
 
				 
			
 
				 \fn int starpu_free(void *A)
			
 
				 \ingroup API_Standard_Memory_Library
			
 
				-This function frees memory which has previously been allocated
			
 
				-with starpu_malloc().
			
 
				+Free memory which has previously been allocated with starpu_malloc().
			
 
				 
			
 
				 \fn int starpu_free_flags(void *A, size_t dim, int flags)
			
 
				 \ingroup API_Standard_Memory_Library
			
 
				-This function frees memory by specifying its size. The given
			
 
				+Free memory by specifying its size. The given
			
 
				 flags should be consistent with the ones given to starpu_malloc_flags()
			
 
				 when allocating the memory.
			
 
				 
			
 
				 \fn int starpu_memory_pin(void *addr, size_t size)
			
 
				 \ingroup API_Standard_Memory_Library
			
 
				-This function pins the given memory area, so that CPU-GPU transfers can be done
			
 
				+Pin the given memory area, so that CPU-GPU transfers can be done
			
 
				 asynchronously with DMAs. The memory must be unpinned with
			
 
				 starpu_memory_unpin() before being freed. Returns 0 on success, -1 on error.
			
 
				 
			
 
				 \fn int starpu_memory_unpin(void *addr, size_t size)
			
 
				 \ingroup API_Standard_Memory_Library
			
 
				-This function unpins the given memory area previously pinned with
			
 
				+Unpin the given memory area previously pinned with
			
 
				 starpu_memory_pin(). Returns 0 on success, -1 on error.
			
 
				 
			
 
				 \fn ssize_t starpu_memory_get_total(unsigned node)
			
@@ -109,7 +108,7 @@ on the node. Otherwise return -1.
 
				 
			
 
				 \fn ssize_t starpu_memory_get_total_all_nodes()
			
 
				 \ingroup API_Standard_Memory_Library
			
 
				-return the amount of total memory on all memory nodes for whose a memory limit
			
 
				+Return the amount of total memory on all memory nodes for whose a memory limit
			
 
				 is defined (see Section \ref HowToLimitMemoryPerNode).
			
 
				 
			
 
				 \fn ssize_t starpu_memory_get_available(unsigned node)
			
@@ -120,7 +119,7 @@ on the node. Otherwise return -1.
 
				 
			
 
				 \fn ssize_t starpu_memory_get_available_all_nodes()
			
 
				 \ingroup API_Standard_Memory_Library
			
 
				-return the amount of available memory on all memory nodes for whose a memory limit
			
 
				+Return the amount of available memory on all memory nodes for whose a memory limit
			
 
				 is defined (see Section \ref HowToLimitMemoryPerNode).
			
 
				 
			
 
				 \fn int starpu_memory_allocate(unsigned node, size_t size, int flags)
			
--- a/doc/doxygen/chapters/api/task_lists.doxy
+++ b/doc/doxygen/chapters/api/task_lists.doxy
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * This file is part of the StarPU Handbook.
			
 
				  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2011, 2012 INRIA
			
 
				  * See the file version.doxy for copying conditions.
			
 
				  */
			
@@ -12,9 +12,9 @@
 
				 Stores a double-chained list of tasks
			
 
				 \ingroup API_Task_Lists
			
 
				 \var struct starpu_task *starpu_task_list::head
			
 
				-head of the list
			
 
				+    head of the list
			
 
				 \var struct starpu_task *starpu_task_list::tail
			
 
				-tail of the list
			
 
				+    tail of the list
			
 
				 
			
 
				 \fn void starpu_task_list_init(struct starpu_task_list *list)
			
 
				 \ingroup API_Task_Lists
			
--- a/doc/doxygen/chapters/api/threads.doxy
+++ b/doc/doxygen/chapters/api/threads.doxy
@@ -15,190 +15,171 @@ mode is enabled (\ref SimGridSupport).
 
				 
			
 
				 \def STARPU_PTHREAD_CREATE_ON
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_create_on() and aborts on error.
			
 
				+Call starpu_pthread_create_on() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_CREATE
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_create() and aborts on error.
			
 
				+Call starpu_pthread_create() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_MUTEX_INIT
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_mutex_init() and aborts
			
 
				-on error.
			
 
				+Call starpu_pthread_mutex_init() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_MUTEX_DESTROY
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_mutex_destroy() and
			
 
				-aborts on error.
			
 
				+Call starpu_pthread_mutex_destroy() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_MUTEX_LOCK
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_mutex_lock() and aborts
			
 
				-on error.
			
 
				+Call starpu_pthread_mutex_lock() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_MUTEX_UNLOCK
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_mutex_unlock() and aborts
			
 
				-on error.
			
 
				+Call starpu_pthread_mutex_unlock() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_KEY_CREATE
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_key_create() and aborts
			
 
				-on error.
			
 
				+Call starpu_pthread_key_create() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_KEY_DELETE
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_key_delete() and aborts
			
 
				-on error.
			
 
				+Call starpu_pthread_key_delete() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_SETSPECIFIC
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_setspecific() and aborts
			
 
				-on error.
			
 
				+Call starpu_pthread_setspecific() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_GETSPECIFIC
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_getspecific() and aborts
			
 
				-on error.
			
 
				+Call starpu_pthread_getspecific() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_RWLOCK_INIT
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_rwlock_init() and aborts
			
 
				-on error.
			
 
				+Call starpu_pthread_rwlock_init() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_RWLOCK_RDLOCK
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_rwlock_rdlock() and
			
 
				-aborts on error.
			
 
				+Call starpu_pthread_rwlock_rdlock() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_RWLOCK_WRLOCK
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_rwlock_wrlock() and
			
 
				-aborts on error.
			
 
				+Call starpu_pthread_rwlock_wrlock() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_RWLOCK_UNLOCK
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_rwlock_unlock() and
			
 
				-aborts on error.
			
 
				+Call starpu_pthread_rwlock_unlock() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_RWLOCK_DESTROY
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_rwlock_destroy() and
			
 
				-aborts on error.
			
 
				+Call starpu_pthread_rwlock_destroy() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_COND_INIT
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_cond_init() and aborts on error.
			
 
				+Call starpu_pthread_cond_init() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_COND_DESTROY
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_cond_destroy() and aborts
			
 
				-on error.
			
 
				+Call starpu_pthread_cond_destroy() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_COND_SIGNAL
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_cond_signal() and aborts
			
 
				-on error.
			
 
				+Call starpu_pthread_cond_signal() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_COND_BROADCAST
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_cond_broadcast() and
			
 
				-aborts on error.
			
 
				+Call starpu_pthread_cond_broadcast() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_COND_WAIT
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_cond_wait() and aborts on error.
			
 
				+Call starpu_pthread_cond_wait() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_BARRIER_INIT
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_barrier_init() and aborts
			
 
				-on error.
			
 
				+Call starpu_pthread_barrier_init() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_BARRIER_DESTROY
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_barrier_destroy() and
			
 
				-aborts on error.
			
 
				+Call starpu_pthread_barrier_destroy() and abort on error.
			
 
				 
			
 
				 \def STARPU_PTHREAD_BARRIER_WAIT
			
 
				 \ingroup API_Threads
			
 
				-This macro calls the function starpu_pthread_barrier_wait() and aborts
			
 
				-on error.
			
 
				+Call starpu_pthread_barrier_wait() and abort on error.
			
 
				 
			
 
				 \fn int starpu_pthread_create_on(char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg, int where)
			
 
				 \ingroup API_Threads
			
 
				 
			
 
				 \fn int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg)
			
 
				 \ingroup API_Threads
			
 
				-This function starts a new thread in the calling process.  The new
			
 
				+Start a new thread in the calling process. The new
			
 
				 thread starts execution by invoking \p start_routine; \p arg is passed
			
 
				 as the sole argument of \p start_routine.
			
 
				 
			
 
				 \fn int starpu_pthread_join(starpu_pthread_t thread, void **retval)
			
 
				 \ingroup API_Threads
			
 
				-This function waits for the thread specified by \p thread to
			
 
				+Wait for the thread specified by \p thread to
			
 
				 terminate.  If that thread has already terminated, then the function
			
 
				 returns immediately. The thread specified by \p thread must be
			
 
				 joinable.
			
 
				 
			
 
				 \fn int starpu_pthread_exit(void *retval)
			
 
				 \ingroup API_Threads
			
 
				-This function terminates the calling thread and returns a value via
			
 
				+Terminate the calling thread and return a value via
			
 
				 \p retval that (if the thread is joinable) is available to another thread
			
 
				 in the same process that calls starpu_pthread_join().
			
 
				 
			
 
				 \fn int starpu_pthread_attr_init(starpu_pthread_attr_t *attr)
			
 
				 \ingroup API_Threads
			
 
				-This function initializes the thread attributes object pointed to by
			
 
				+Initialize the thread attributes object pointed to by
			
 
				 \p attr with default attribute values.
			
 
				 
			
 
				-It does not do anything when the simulated performance mode is enabled
			
 
				+Do not do anything when the simulated performance mode is enabled
			
 
				 (\ref SimGridSupport).
			
 
				 
			
 
				 \fn int starpu_pthread_attr_destroy(starpu_pthread_attr_t *attr)
			
 
				 \ingroup API_Threads
			
 
				-This function destroys a thread attributes object which is no longer
			
 
				+Destroy a thread attributes object which is no longer
			
 
				 required. Destroying a thread attributes object has no effect on
			
 
				 threads that were created using that object.
			
 
				 
			
 
				-It does not do anything when the simulated performance mode is enabled
			
 
				+Do not do anything when the simulated performance mode is enabled
			
 
				 (\ref SimGridSupport).
			
 
				 
			
 
				 \fn int starpu_pthread_attr_setdetachstate(starpu_pthread_attr_t *attr, int detachstate)
			
 
				 \ingroup API_Threads
			
 
				-This function sets the detach state attribute of the thread attributes
			
 
				+Set the detach state attribute of the thread attributes
			
 
				 object referred to by \p attr to the value specified in \p
			
 
				 detachstate.  The detach state attribute determines whether a thread
			
 
				 created using the thread attributes object \p attr will be created in
			
 
				 a joinable or a detached state.
			
 
				 
			
 
				-It does not do anything when the simulated performance mode is enabled
			
 
				+Do not do anything when the simulated performance mode is enabled
			
 
				 (\ref SimGridSupport).
			
 
				 
			
 
				 \fn int starpu_pthread_mutex_init(starpu_pthread_mutex_t *mutex, const starpu_pthread_mutexattr_t *mutexattr)
			
 
				 \ingroup API_Threads
			
 
				-This function initializes the mutex object pointed to by \p mutex
			
 
				+Initialize the mutex object pointed to by \p mutex
			
 
				 according to the mutex attributes specified in \p mutexattr.  If \p
			
 
				 mutexattr is <c>NULL</c>, default attributes are used instead.
			
 
				 
			
 
				 \fn int starpu_pthread_mutex_destroy(starpu_pthread_mutex_t *mutex)
			
 
				 \ingroup API_Threads
			
 
				-This function destroys a mutex object, freeing the resources it might
			
 
				+Destroy a mutex object, and free the resources it might
			
 
				 hold. The mutex must be unlocked on entrance.
			
 
				 
			
 
				 \fn int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex)
			
 
				 \ingroup API_Threads
			
 
				-This function locks the given mutex. If the mutex is currently
			
 
				+Lock the given \p mutex. If \p mutex is currently
			
 
				 unlocked, it becomes locked and owned by the calling thread, and the
			
 
				-function returns immediately. If the mutex is already locked by
			
 
				-another thread, the function suspends the calling thread until the
			
 
				-mutex is unlocked.
			
 
				+function returns immediately. If \p mutex is already locked by
			
 
				+another thread, the function suspends the calling thread until
			
 
				+\p mutex is unlocked.
			
 
				 
			
 
				 This function also produces trace when the configure option
			
 
				 \ref enable-fxt-lock "--enable-fxt-lock" is enabled.
			
 
				 
			
 
				 \fn int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex)
			
 
				 \ingroup API_Threads
			
 
				-This function unlocks the given mutex. The mutex is assumed to be
			
 
				+Unlock the given \p mutex. The mutex is assumed to be
			
 
				 locked and owned by the calling thread on entrance to
			
 
				 starpu_pthread_mutex_unlock().
			
 
				 
			
@@ -207,7 +188,7 @@ This function also produces trace when the configure option
 
				 
			
 
				 \fn int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex)
			
 
				 \ingroup API_Threads
			
 
				-This function behaves identically to starpu_pthread_mutex_lock(),
			
 
				+Behave identically to starpu_pthread_mutex_lock(),
			
 
				 except that it does not block the calling thread if the mutex is
			
 
				 already locked by another thread (or by the calling thread in the case
			
 
				 of a ``fast''  mutex). Instead, the function returns immediately with
			
@@ -218,7 +199,7 @@ This function also produces trace when the configure option
 
				 
			
 
				 \typedef STARPU_PTHREAD_MUTEX_INITIALIZER
			
 
				 \ingroup API_Threads
			
 
				-This macro initializes the mutex given in parameter.
			
 
				+Initialize the mutex given in parameter.
			
 
				 
			
 
				 \fn int starpu_pthread_mutexattr_gettype(const starpu_pthread_mutexattr_t *attr, int *type)
			
 
				 \ingroup API_Threads
			
@@ -238,52 +219,52 @@ todo
 
				 
			
 
				 \fn int starpu_pthread_key_create(starpu_pthread_key_t *key, void (*destr_function) (void *))
			
 
				 \ingroup API_Threads
			
 
				-This function allocates a new TSD key. The key is stored in the
			
 
				+Allocate a new TSD key. The key is stored in the
			
 
				 location pointed to by \p key.
			
 
				 
			
 
				 \fn int starpu_pthread_key_delete(starpu_pthread_key_t key)
			
 
				 \ingroup API_Threads
			
 
				-This function deallocates a TSD key. It does not check whether
			
 
				+Deallocate a TSD key. Do not check whether
			
 
				 non-<c>NULL</c> values are associated with that key in the currently
			
 
				 executing threads, nor call the destructor function associated with
			
 
				 the key.
			
 
				 
			
 
				 \fn int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
			
 
				 \ingroup API_Threads
			
 
				-This function changes the value associated with \p key in the calling
			
 
				+Change the value associated with \p key in the calling
			
 
				 thread, storing the given \p pointer instead.
			
 
				 
			
 
				 \fn void *starpu_pthread_getspecific(starpu_pthread_key_t key)
			
 
				 \ingroup API_Threads
			
 
				-This function returns the value associated with \p key on success, and
			
 
				+Return the value associated with \p key on success, and
			
 
				 <c>NULL</c> on error.
			
 
				 
			
 
				 \typedef STARPU_PTHREAD_COND_INITIALIZER
			
 
				 \ingroup API_Threads
			
 
				-This macro initializes the condition variable given in parameter.
			
 
				+Initialize the condition variable given in parameter.
			
 
				 
			
 
				 \fn int starpu_pthread_cond_init(starpu_pthread_cond_t *cond, starpu_pthread_condattr_t *cond_attr)
			
 
				 \ingroup API_Threads
			
 
				-This function initializes the condition variable \p cond, using the
			
 
				+Initialize the condition variable \p cond, using the
			
 
				 condition attributes specified in \p cond_attr, or default attributes
			
 
				 if \p cond_attr is <c>NULL</c>.
			
 
				 
			
 
				 \fn int starpu_pthread_cond_signal(starpu_pthread_cond_t *cond)
			
 
				 \ingroup API_Threads
			
 
				-This function restarts one of the threads that are waiting on the
			
 
				+Restart one of the threads that are waiting on the
			
 
				 condition variable \p cond. If no threads are waiting on \p cond,
			
 
				 nothing happens. If several threads are waiting on \p cond, exactly
			
 
				-one is restarted, but it not specified which.
			
 
				+one is restarted, but it is not specified which.
			
 
				 
			
 
				 \fn int starpu_pthread_cond_broadcast(starpu_pthread_cond_t *cond)
			
 
				 \ingroup API_Threads
			
 
				-This function restarts all the threads that are waiting on the
			
 
				+Restart all the threads that are waiting on the
			
 
				 condition variable \p cond. Nothing happens if no threads are waiting on \p cond.
			
 
				 
			
 
				 \fn int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex)
			
 
				 \ingroup API_Threads
			
 
				-This function atomically unlocks the mutex (as per
			
 
				-starpu_pthread_mutex_unlock()) and waits for the condition variable \p cond
			
 
				+Atomically unlock \p mutex (as per
			
 
				+starpu_pthread_mutex_unlock()) and wait for the condition variable \p cond
			
 
				 to be signaled. The thread execution is suspended and does not consume
			
 
				 any CPU time until the condition variable is signaled. The mutex must
			
 
				 be locked by the calling thread on entrance to
			
@@ -295,27 +276,27 @@ This function also produces trace when the configure option
 
				 
			
 
				 \fn int starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, const struct timespec *abstime)
			
 
				 \ingroup API_Threads
			
 
				-This function atomically unlocks \p mutex and waits on \p cond, as
			
 
				-starpu_pthread_cond_wait() does, but it also bounds the duration of
			
 
				-the wait.
			
 
				+Atomicall unlocks \p mutex and wait on \p cond, as
			
 
				+starpu_pthread_cond_wait() does, but also bound the duration of
			
 
				+the wait with \p abstime.
			
 
				 
			
 
				 \fn int starpu_pthread_cond_destroy(starpu_pthread_cond_t *cond)
			
 
				 \ingroup API_Threads
			
 
				-This function destroys a condition variable, freeing the resources it
			
 
				+Destroy a condition variable, freeing the resources it
			
 
				 might hold. No threads must be waiting on the condition variable on
			
 
				 entrance to the function.
			
 
				 
			
 
				 \fn int starpu_pthread_rwlock_init(starpu_pthread_rwlock_t *rwlock, const starpu_pthread_rwlockattr_t *attr)
			
 
				 \ingroup API_Threads
			
 
				-This function is the same as starpu_pthread_mutex_init().
			
 
				+Similar to starpu_pthread_mutex_init().
			
 
				 
			
 
				 \fn int starpu_pthread_rwlock_destroy(starpu_pthread_rwlock_t *rwlock)
			
 
				 \ingroup API_Threads
			
 
				-This function is the same as starpu_pthread_mutex_destroy().
			
 
				+Similar to starpu_pthread_mutex_destroy().
			
 
				 
			
 
				 \fn int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock)
			
 
				 \ingroup API_Threads
			
 
				-This function is the same as starpu_pthread_mutex_lock().
			
 
				+Similar to starpu_pthread_mutex_lock().
			
 
				 
			
 
				 \fn int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock)
			
 
				 \ingroup API_Threads
			
@@ -323,7 +304,7 @@ todo
 
				 
			
 
				 \fn int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock)
			
 
				 \ingroup API_Threads
			
 
				-This function is the same as starpu_pthread_mutex_lock().
			
 
				+Similar to starpu_pthread_mutex_lock().
			
 
				 
			
 
				 \fn int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock)
			
 
				 \ingroup API_Threads
			
@@ -331,7 +312,7 @@ todo
 
				 
			
 
				 \fn int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock)
			
 
				 \ingroup API_Threads
			
 
				-This function is the same as starpu_pthread_mutex_unlock().
			
 
				+Similar to starpu_pthread_mutex_unlock().
			
 
				 
			
 
				 \fn int starpu_pthread_barrier_init(starpu_pthread_barrier_t *barrier, const starpu_pthread_barrierattr_t *attr, unsigned count)
			
 
				 \ingroup API_Threads
			
@@ -367,7 +348,7 @@ todo
 
				 
			
 
				 \fn void starpu_sleep(float nb_sec)
			
 
				 \ingroup API_Threads
			
 
				-This is the same as calling Unix' sleep function, except that it takes a float
			
 
				+Similar to calling Unix' \c sleep function, except that it takes a float
			
 
				 to allow sub-second sleeping, and when StarPU is compiled in simgrid mode it
			
 
				 does not really sleep but just makes simgrid record that the thread has taken
			
 
				 some time to sleep.
			
--- a/doc/doxygen/chapters/api/toolbox.doxy
+++ b/doc/doxygen/chapters/api/toolbox.doxy
@@ -17,52 +17,51 @@ Return true (non-zero) if GCC version \p maj.\p min or later is being used (macr
 
				 
			
 
				 \def STARPU_UNLIKELY
			
 
				 \ingroup API_Toolbox
			
 
				-When building with a GNU C Compiler, this macro allows programmers to mark an expression as unlikely.
			
 
				+When building with a GNU C Compiler, allow programmers to mark an expression as unlikely.
			
 
				 
			
 
				 \def STARPU_LIKELY
			
 
				 \ingroup API_Toolbox
			
 
				-When building with a GNU C Compiler, this macro allows programmers to mark an expression as likely.
			
 
				+When building with a GNU C Compiler, allow programmers to mark an expression as likely.
			
 
				 
			
 
				 \def STARPU_ATTRIBUTE_UNUSED
			
 
				 \ingroup API_Toolbox
			
 
				-When building with a GNU C Compiler, this macro is defined to __attribute__((unused))
			
 
				+When building with a GNU C Compiler, defined to __attribute__((unused))
			
 
				 
			
 
				 \def STARPU_ATTRIBUTE_INTERNAL
			
 
				 \ingroup API_Toolbox
			
 
				-When building with a GNU C Compiler, this macro is defined to __attribute__((visibility ("internal")))
			
 
				+When building with a GNU C Compiler, defined to __attribute__((visibility ("internal")))
			
 
				 
			
 
				 \def STARPU_ATTRIBUTE_MALLOC
			
 
				 \ingroup API_Toolbox
			
 
				-When building with a GNU C Compiler, this macro is defined to __attribute__((malloc))
			
 
				+When building with a GNU C Compiler, defined to __attribute__((malloc))
			
 
				 
			
 
				 \def STARPU_ATTRIBUTE_WARN_UNUSED_RESULT
			
 
				 \ingroup API_Toolbox
			
 
				-When building with a GNU C Compiler, this macro is defined to __attribute__((warn_unused_result))
			
 
				+When building with a GNU C Compiler, defined to __attribute__((warn_unused_result))
			
 
				 
			
 
				 \def STARPU_ATTRIBUTE_PURE
			
 
				 \ingroup API_Toolbox
			
 
				-When building with a GNU C Compiler, this macro is defined to __attribute__((pure))
			
 
				+When building with a GNU C Compiler, defined to __attribute__((pure))
			
 
				 
			
 
				 \def STARPU_ATTRIBUTE_ALIGNED
			
 
				 \ingroup API_Toolbox
			
 
				-When building with a GNU C Compiler, this macro is defined to__attribute__((aligned(size)))
			
 
				+When building with a GNU C Compiler, defined to__attribute__((aligned(size)))
			
 
				 
			
 
				 \def STARPU_WARN_UNUSED_RESULT
			
 
				 \ingroup API_Toolbox
			
 
				-When building with a GNU C Compiler, this macro is defined to__attribute__((__warn_unused_result__))
			
 
				+When building with a GNU C Compiler, defined to__attribute__((__warn_unused_result__))
			
 
				 
			
 
				 \def STARPU_POISON_PTR
			
 
				 \ingroup API_Toolbox
			
 
				-This macro defines a value which can be used to mark pointers as
			
 
				-invalid values.
			
 
				+Define a value which can be used to mark pointers as invalid values.
			
 
				 
			
 
				 \def STARPU_MIN
			
 
				 \ingroup API_Toolbox
			
 
				-This macro returns the min of the two parameters.
			
 
				+Return the min of the two parameters.
			
 
				 
			
 
				 \def STARPU_MAX
			
 
				 \ingroup API_Toolbox
			
 
				-This macro returns the max of the two parameters.
			
 
				+Return the max of the two parameters.
			
 
				 
			
 
				 \def STARPU_ASSERT
			
 
				 \ingroup API_Toolbox
			
@@ -77,21 +76,19 @@ given message will be displayed.
 
				 
			
 
				 \def STARPU_ABORT
			
 
				 \ingroup API_Toolbox
			
 
				-This macro aborts the program.
			
 
				+Abort the program.
			
 
				 
			
 
				 \def STARPU_ABORT_MSG
			
 
				 \ingroup API_Toolbox
			
 
				-This macro aborts the program, and displays the given message.
			
 
				+Abort the program, and display the given message.
			
 
				 
			
 
				 \def STARPU_CHECK_RETURN_VALUE
			
 
				 \ingroup API_Toolbox
			
 
				-If \p err has a value which is not 0, the given message is displayed
			
 
				-before aborting.
			
 
				+Abort the program (after displaying \p message) if \p err has a value which is not 0.
			
 
				 
			
 
				 \def STARPU_CHECK_RETURN_VALUE_IS
			
 
				 \ingroup API_Toolbox
			
 
				-If \p err has a value which is not \p value, the given message is displayed
			
 
				-before aborting.
			
 
				+Abort the program (after displaying \p message) if \p err is different from \p value.
			
 
				 
			
 
				 \def STARPU_RMB
			
 
				 \ingroup API_Toolbox
			
@@ -103,9 +100,9 @@ This macro can be used to do a synchronization.
 
				 
			
 
				 \fn int starpu_get_env_number(const char *str)
			
 
				 \ingroup API_Toolbox
			
 
				-If \p str is the name of a existing environment variable which is
			
 
				-defined to an integer, the function returns the value of the integer.
			
 
				-It returns 0 otherwise.
			
 
				+Return the integer value of the environment variable named \p str.
			
 
				+Return 0 otherwise (the variable does not exist or has a non-integer
			
 
				+value).
			
 
				 
			
 
				 */