6 years ago · af93366acd
--- a/doc/doxygen/Makefile.am
+++ b/doc/doxygen/Makefile.am
@@ -105,22 +105,12 @@ chapters =	\
 
				 	chapters/code/disk_compute.c \
			
 
				 	chapters/code/nf_initexit.f90 \
			
 
				 	chapters/api/codelet_and_tasks.doxy \
			
 
				-	chapters/api/cuda_extensions.doxy \
			
 
				-	chapters/api/data_interfaces.doxy \
			
 
				-	chapters/api/data_management.doxy \
			
 
				-	chapters/api/data_partition.doxy \
			
 
				-	chapters/api/data_out_of_core.doxy \
			
 
				-	chapters/api/expert_mode.doxy \
			
 
				 	chapters/api/explicit_dependencies.doxy \
			
 
				 	chapters/api/fft_support.doxy \
			
 
				-	chapters/api/fxt_support.doxy \
			
 
				-	chapters/api/implicit_dependencies.doxy \
			
 
				 	chapters/api/initialization.doxy \
			
 
				 	chapters/api/insert_task.doxy \
			
 
				-	chapters/api/lower_bound.doxy \
			
 
				 	chapters/api/misc_helpers.doxy \
			
 
				 	chapters/api/mpi.doxy \
			
 
				-	chapters/api/multiformat_data_interface.doxy \
			
 
				 	chapters/api/opencl_extensions.doxy \
			
 
				 	chapters/api/openmp_runtime_support.doxy \
			
 
				 	chapters/api/mic_extensions.doxy \
			
@@ -128,7 +118,6 @@ chapters =	\
 
				 	chapters/api/parallel_tasks.doxy \
			
 
				 	chapters/api/performance_model.doxy \
			
 
				 	chapters/api/profiling.doxy \
			
 
				-	chapters/api/running_driver.doxy \
			
 
				 	chapters/api/scheduling_contexts.doxy \
			
 
				 	chapters/api/scheduling_policy.doxy \
			
 
				 	chapters/api/standard_memory_library.doxy \
			
@@ -138,13 +127,11 @@ chapters =	\
 
				 	chapters/api/versioning.doxy \
			
 
				 	chapters/api/workers.doxy \
			
 
				 	chapters/api/threads.doxy \
			
 
				-	chapters/api/bitmap.doxy \
			
 
				 	chapters/api/tree.doxy \
			
 
				 	chapters/api/toolbox.doxy \
			
 
				 	chapters/api/sc_hypervisor/sc_hypervisor.doxy \
			
 
				 	chapters/api/sc_hypervisor/sc_hypervisor_usage.doxy \
			
 
				 	chapters/api/modularized_scheduler.doxy \
			
 
				-	chapters/api/clustering_machine.doxy \
			
 
				 	chapters/api/interoperability.doxy
			
 
				 
			
 
				 images = 	\
			
--- a/doc/doxygen/chapters/api/bitmap.doxy
+++ b/doc/doxygen/chapters/api/bitmap.doxy
@@ -1,81 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2014,2015,2017                           CNRS
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Bitmap  Bitmap
			
 
				-
			
 
				-\brief This section describes the bitmap facilities provided by StarPU.
			
 
				-
			
 
				-\struct starpu_bitmap
			
 
				-\ingroup API_Bitmap
			
 
				-todo
			
 
				-
			
 
				-\fn struct starpu_bitmap *starpu_bitmap_create(void)
			
 
				-\ingroup API_Bitmap
			
 
				-create a empty starpu_bitmap
			
 
				-
			
 
				-\fn void starpu_bitmap_destroy(struct starpu_bitmap *b)
			
 
				-\ingroup API_Bitmap
			
 
				-free \b
			
 
				-
			
 
				-\fn void starpu_bitmap_set(struct starpu_bitmap *b, int e)
			
 
				-\ingroup API_Bitmap
			
 
				-set bit \p e in \p b
			
 
				-
			
 
				-\fn void starpu_bitmap_unset(struct starpu_bitmap *b, int e)
			
 
				-\ingroup API_Bitmap
			
 
				-unset bit \p e in \p b
			
 
				-
			
 
				-\fn void starpu_bitmap_unset_all(struct starpu_bitmap *b)
			
 
				-\ingroup API_Bitmap
			
 
				-unset all bits in \p b
			
 
				-
			
 
				-\fn int starpu_bitmap_get(struct starpu_bitmap *b, int e)
			
 
				-\ingroup API_Bitmap
			
 
				-return true iff bit \p e is set in \p b
			
 
				-
			
 
				-\fn void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c)
			
 
				-\ingroup API_Bitmap
			
 
				-Basically compute \c starpu_bitmap_unset_all(\p a) ; \p a = \p b & \p c;
			
 
				-
			
 
				-\fn void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b)
			
 
				-\ingroup API_Bitmap
			
 
				-Basically compute \p a |= \p b
			
 
				-
			
 
				-\fn int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e)
			
 
				-\ingroup API_Bitmap
			
 
				-return 1 iff \p e is set in \p b1 AND \p e is set in \p b2
			
 
				-
			
 
				-\fn int starpu_bitmap_cardinal(struct starpu_bitmap *b)
			
 
				-\ingroup API_Bitmap
			
 
				-return the number of set bits in \p b
			
 
				-
			
 
				-\fn int starpu_bitmap_first(struct starpu_bitmap *b)
			
 
				-\ingroup API_Bitmap
			
 
				-return the index of the first set bit of \p b, -1 if none
			
 
				-
			
 
				-\fn int starpu_bitmap_last(struct starpu_bitmap *b)
			
 
				-\ingroup API_Bitmap
			
 
				-return the position of the last set bit of \p b, -1 if none
			
 
				-
			
 
				-\fn int starpu_bitmap_next(struct starpu_bitmap *b, int e)
			
 
				-\ingroup API_Bitmap
			
 
				-return the position of set bit right after \p e in \p b, -1 if none
			
 
				-
			
 
				-\fn int starpu_bitmap_has_next(struct starpu_bitmap *b, int e)
			
 
				-\ingroup API_Bitmap
			
 
				-todo
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/chapters/api/clustering_machine.doxy
+++ b/doc/doxygen/chapters/api/clustering_machine.doxy
@@ -1,103 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2017, 2019                                     CNRS
			
 
				- * Copyright (C) 2017                                     Inria
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Clustering_Machine Clustering Machine
			
 
				-
			
 
				-\def STARPU_CLUSTER_MIN_NB
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_MAX_NB
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_NB
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_POLICY_NAME
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_POLICY_STRUCT
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_KEEP_HOMOGENEOUS
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_PREFERE_MIN
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_CREATE_FUNC
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_CREATE_FUNC_ARG
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_TYPE
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_AWAKE_WORKERS
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_PARTITION_ONE
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_NEW
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\def STARPU_CLUSTER_NCORES
			
 
				-\ingroup API_Clustering_Machine
			
 
				-TODO
			
 
				-
			
 
				-\enum starpu_cluster_types
			
 
				-\ingroup API_Clustering_Machine
			
 
				-todo
			
 
				-\var starpu_cluster_types::STARPU_CLUSTER_OPENMP
			
 
				-todo
			
 
				-\var starpu_cluster_types::STARPU_CLUSTER_INTEL_OPENMP_MKL
			
 
				-todo
			
 
				-\var starpu_cluster_types::STARPU_CLUSTER_GNU_OPENMP_MKL
			
 
				-todo
			
 
				-
			
 
				-\struct starpu_cluster_machine
			
 
				-\ingroup API_Clustering_Machine
			
 
				-todo
			
 
				-
			
 
				-\fn struct starpu_cluster_machine* starpu_cluster_machine(hwloc_obj_type_t cluster_level, ...)
			
 
				-\ingroup API_Clustering_Machine
			
 
				-todo
			
 
				-
			
 
				-\fn int starpu_uncluster_machine(struct starpu_cluster_machine* clusters)
			
 
				-\ingroup API_Clustering_Machine
			
 
				-todo
			
 
				-
			
 
				-\fn int starpu_cluster_print(struct starpu_cluster_machine* clusters)
			
 
				-\ingroup API_Clustering_Machine
			
 
				-todo
			
 
				-
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/chapters/api/cuda_extensions.doxy
+++ b/doc/doxygen/chapters/api/cuda_extensions.doxy
@@ -1,125 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				- * Copyright (C) 2009-2011,2014,2017                      Université de Bordeaux
			
 
				- * Copyright (C) 2011,2012                                Inria
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_CUDA_Extensions CUDA Extensions
			
 
				-
			
 
				-\def STARPU_USE_CUDA
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-This macro is defined when StarPU has been installed with CUDA
			
 
				-support. It should be used in your code to detect the availability of
			
 
				-CUDA as shown in \ref FullSourceCodeVectorScal.
			
 
				-
			
 
				-\def STARPU_MAXCUDADEVS
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-This macro defines the maximum number of CUDA devices that are
			
 
				-supported by StarPU.
			
 
				-
			
 
				-\fn cudaStream_t starpu_cuda_get_local_stream(void)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-Return the current worker’s CUDA stream. StarPU
			
 
				-provides a stream for every CUDA device controlled by StarPU. This
			
 
				-function is only provided for convenience so that programmers can
			
 
				-easily use asynchronous operations within codelets without having to
			
 
				-create a stream by hand. Note that the application is not forced to
			
 
				-use the stream provided by starpu_cuda_get_local_stream() and may also
			
 
				-create its own streams. Synchronizing with <c>cudaThreadSynchronize()</c> is
			
 
				-allowed, but will reduce the likelihood of having all transfers
			
 
				-overlapped.
			
 
				-
			
 
				-\fn const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-Return a pointer to device properties for worker \p workerid (assumed to be a CUDA worker).
			
 
				-
			
 
				-\fn void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-Report a CUDA error.
			
 
				-
			
 
				-\def STARPU_CUDA_REPORT_ERROR(status)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-Calls starpu_cuda_report_error(), passing the current function, file and line position.
			
 
				-
			
 
				-\fn int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
			
 
				-to the pointer \p dst_ptr on \p dst_node. The function first tries to
			
 
				-copy the data asynchronous (unless \p stream is <c>NULL</c>). If the
			
 
				-asynchronous copy fails or if \p stream is <c>NULL</c>, it copies the
			
 
				-data synchronously. The function returns <c>-EAGAIN</c> if the
			
 
				-asynchronous launch was successfull. It returns 0 if the synchronous
			
 
				-copy was successful, or fails otherwise.
			
 
				-
			
 
				-\fn void starpu_cuda_set_device(unsigned devid)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-Calls <c>cudaSetDevice(\p devid)</c> or <c>cudaGLSetGLDevice(\p devid)</c>,
			
 
				-according to whether \p devid is among the field
			
 
				-starpu_conf::cuda_opengl_interoperability.
			
 
				-
			
 
				-\fn void starpu_cublas_init(void)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-This function initializes CUBLAS on every CUDA device. The
			
 
				-CUBLAS library must be initialized prior to any CUBLAS call. Calling
			
 
				-starpu_cublas_init() will initialize CUBLAS on every CUDA device
			
 
				-controlled by StarPU. This call blocks until CUBLAS has been properly
			
 
				-initialized on every device.
			
 
				-
			
 
				-\fn void starpu_cublas_set_stream(void)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-This function sets the proper CUBLAS stream for CUBLAS v1. This must be called from the CUDA
			
 
				-codelet before calling CUBLAS v1 kernels, so that they are queued on the proper
			
 
				-CUDA stream. When using one thread per CUDA worker, this function does not
			
 
				-do anything since the CUBLAS stream does not change, and is set once by
			
 
				-starpu_cublas_init().
			
 
				-
			
 
				-\fn cublasHandle_t starpu_cublas_get_local_handle(void)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-This function returns the CUBLAS v2 handle to be used to queue CUBLAS v2
			
 
				-kernels. It is properly initialized and configured for multistream by
			
 
				-starpu_cublas_init().
			
 
				-
			
 
				-\fn void starpu_cublas_shutdown(void)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-This function synchronously deinitializes the CUBLAS library on
			
 
				-every CUDA device.
			
 
				-
			
 
				-\fn void starpu_cublas_report_error(const char *func, const char *file, int line, int status)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-Report a cublas error.
			
 
				-
			
 
				-\def STARPU_CUBLAS_REPORT_ERROR(status)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-Calls starpu_cublas_report_error(), passing the current
			
 
				-function, file and line position.
			
 
				-
			
 
				-\fn void starpu_cusparse_init(void)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-Calling starpu_cusparse_init() will initialize CUSPARSE on every CUDA device
			
 
				-controlled by StarPU. This call blocks until CUSPARSE has been properly
			
 
				-initialized on every device.
			
 
				-
			
 
				-\fn cusparseHandle_t starpu_cusparse_get_local_handle(void)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-This function returns the CUSPARSE handle to be used to queue CUSPARSE
			
 
				-kernels. It is properly initialized and configured for multistream by
			
 
				-starpu_cusparse_init().
			
 
				-
			
 
				-\fn void starpu_cusparse_shutdown(void)
			
 
				-\ingroup API_CUDA_Extensions
			
 
				-This function synchronously deinitializes the CUSPARSE library on
			
 
				-every CUDA device.
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/chapters/api/data_interfaces.doxy
+++ b/doc/doxygen/chapters/api/data_interfaces.doxy
--- a/doc/doxygen/chapters/api/data_management.doxy
+++ b/doc/doxygen/chapters/api/data_management.doxy
@@ -1,443 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2011,2012,2017                           Inria
			
 
				- * Copyright (C) 2010-2019                                CNRS
			
 
				- * Copyright (C) 2009-2011,2014-2017,2019                 Université de Bordeaux
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Data_Management Data Management
			
 
				-
			
 
				-\brief This section describes the data management facilities provided
			
 
				-by StarPU. We show how to use existing data interfaces in
			
 
				-\ref API_Data_Interfaces, but developers can design their own data interfaces if
			
 
				-required.
			
 
				-
			
 
				-\typedef starpu_data_handle_t
			
 
				-\ingroup API_Data_Management
			
 
				-StarPU uses ::starpu_data_handle_t as an opaque handle to
			
 
				-manage a piece of data. Once a piece of data has been registered to
			
 
				-StarPU, it is associated to a ::starpu_data_handle_t which keeps track
			
 
				-of the state of the piece of data over the entire machine, so that we
			
 
				-can maintain data consistency and locate data replicates for instance.
			
 
				-
			
 
				-\typedef starpu_arbiter_t
			
 
				-\ingroup API_Data_Management
			
 
				-This is an arbiter, which implements an advanced but centralized management of
			
 
				-concurrent data accesses, see \ref ConcurrentDataAccess for the details.
			
 
				-
			
 
				-\enum starpu_data_access_mode
			
 
				-\ingroup API_Data_Management
			
 
				-This datatype describes a data access mode.
			
 
				-\var starpu_data_access_mode::STARPU_NONE
			
 
				-    TODO
			
 
				-\var starpu_data_access_mode::STARPU_R
			
 
				-    read-only mode.
			
 
				-\var starpu_data_access_mode::STARPU_W
			
 
				-    write-only mode.
			
 
				-\var starpu_data_access_mode::STARPU_RW
			
 
				-    read-write mode. This is equivalent to ::STARPU_R|::STARPU_W
			
 
				-\var starpu_data_access_mode::STARPU_SCRATCH
			
 
				-    A temporary buffer is allocated for the task, but StarPU does not
			
 
				-    enforce data consistency---i.e. each device has its own buffer,
			
 
				-    independently from each other (even for CPUs), and no data
			
 
				-    transfer is ever performed. This is useful for temporary variables
			
 
				-    to avoid allocating/freeing buffers inside each task. Currently,
			
 
				-    no behavior is defined concerning the relation with the ::STARPU_R
			
 
				-    and ::STARPU_W modes and the value provided at registration ---
			
 
				-    i.e., the value of the scratch buffer is undefined at entry of the
			
 
				-    codelet function.  It is being considered for future extensions at
			
 
				-    least to define the initial value.  For now, data to be used in
			
 
				-    ::STARPU_SCRATCH mode should be registered with node -1 and
			
 
				-    a <c>NULL</c> pointer, since the value of the provided buffer is
			
 
				-    simply ignored for now.
			
 
				-\var starpu_data_access_mode::STARPU_REDUX
			
 
				-    todo
			
 
				-\var starpu_data_access_mode::STARPU_COMMUTE
			
 
				-    ::STARPU_COMMUTE can be passed along
			
 
				-    ::STARPU_W or ::STARPU_RW to express that StarPU can let tasks
			
 
				-    commute, which is useful e.g. when bringing a contribution into
			
 
				-    some data, which can be done in any order (but still require
			
 
				-    sequential consistency against reads or non-commutative writes).
			
 
				-\var starpu_data_access_mode::STARPU_SSEND
			
 
				-    used in starpu_mpi_insert_task() to specify the data has to be
			
 
				-    sent using a synchronous and non-blocking mode (see
			
 
				-    starpu_mpi_issend())
			
 
				-\var starpu_data_access_mode::STARPU_LOCALITY
			
 
				-    used to tell the scheduler which data is the most important for
			
 
				-    the task, and should thus be used to try to group tasks on the
			
 
				-    same core or cache, etc. For now only the ws and lws schedulers
			
 
				-    take this flag into account, and only when rebuild with
			
 
				-    USE_LOCALITY flag defined in the
			
 
				-    src/sched_policies/work_stealing_policy.c source code.
			
 
				-\var starpu_data_access_mode::STARPU_ACCESS_MODE_MAX
			
 
				-    todo
			
 
				-
			
 
				-@name Basic Data Management API
			
 
				-\ingroup API_Data_Management
			
 
				-
			
 
				-Data management is done at a high-level in StarPU: rather than
			
 
				-accessing a mere list of contiguous buffers, the tasks may manipulate
			
 
				-data that are described by a high-level construct which we call data
			
 
				-interface.
			
 
				-
			
 
				-An example of data interface is the "vector" interface which describes
			
 
				-a contiguous data array on a spefic memory node. This interface is a
			
 
				-simple structure containing the number of elements in the array, the
			
 
				-size of the elements, and the address of the array in the appropriate
			
 
				-address space (this address may be invalid if there is no valid copy
			
 
				-of the array in the memory node). More informations on the data
			
 
				-interfaces provided by StarPU are given in \ref API_Data_Interfaces.
			
 
				-
			
 
				-When a piece of data managed by StarPU is used by a task, the task
			
 
				-implementation is given a pointer to an interface describing a valid
			
 
				-copy of the data that is accessible from the current processing unit.
			
 
				-
			
 
				-Every worker is associated to a memory node which is a logical
			
 
				-abstraction of the address space from which the processing unit gets
			
 
				-its data. For instance, the memory node associated to the different
			
 
				-CPU workers represents main memory (RAM), the memory node associated
			
 
				-to a GPU is DRAM embedded on the device. Every memory node is
			
 
				-identified by a logical index which is accessible from the
			
 
				-function starpu_worker_get_memory_node(). When registering a piece of
			
 
				-data to StarPU, the specified memory node indicates where the piece of
			
 
				-data initially resides (we also call this memory node the home node of
			
 
				-a piece of data).
			
 
				-
			
 
				-In the case of NUMA systems, functions starpu_memory_nodes_numa_devid_to_id()
			
 
				-and starpu_memory_nodes_numa_id_to_devid() can be used to convert from NUMA node
			
 
				-numbers as seen by the Operating System and NUMA node numbers as seen by StarPU.
			
 
				-
			
 
				-\fn void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops)
			
 
				-\ingroup API_Data_Management
			
 
				-Register a piece of data into the handle located at the
			
 
				-\p handleptr address. The \p data_interface buffer contains the initial
			
 
				-description of the data in the \p home_node. The \p ops argument is a
			
 
				-pointer to a structure describing the different methods used to
			
 
				-manipulate this type of interface. See starpu_data_interface_ops for
			
 
				-more details on this structure.
			
 
				-If \p home_node is -1, StarPU will automatically allocate the memory when
			
 
				-it is used for the first time in write-only mode. Once such data
			
 
				-handle has been automatically allocated, it is possible to access it
			
 
				-using any access mode.
			
 
				-Note that StarPU supplies a set of predefined types of interface (e.g.
			
 
				-vector or matrix) which can be registered by the means of helper
			
 
				-functions (e.g. starpu_vector_data_register() or
			
 
				-starpu_matrix_data_register()).
			
 
				-
			
 
				-\fn void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node)
			
 
				-\ingroup API_Data_Management
			
 
				-Register that a buffer for \p handle on \p node will be set. This is typically
			
 
				-used by starpu_*_ptr_register helpers before setting the interface pointers for
			
 
				-this node, to tell the core that that is now allocated.
			
 
				-
			
 
				-\fn void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc)
			
 
				-\ingroup API_Data_Management
			
 
				-Register a new piece of data into the handle \p handledst with the
			
 
				-same interface as the handle \p handlesrc.
			
 
				-
			
 
				-\fn void starpu_data_unregister(starpu_data_handle_t handle)
			
 
				-\ingroup API_Data_Management
			
 
				-Unregister a data \p handle from StarPU. If the
			
 
				-data was automatically allocated by StarPU because the home node was
			
 
				--1, all automatically allocated buffers are freed. Otherwise, a valid
			
 
				-copy of the data is put back into the home node in the buffer that was
			
 
				-initially registered. Using a data handle that has been unregistered
			
 
				-from StarPU results in an undefined behaviour. In case we do not need
			
 
				-to update the value of the data in the home node, we can use
			
 
				-the function starpu_data_unregister_no_coherency() instead.
			
 
				-
			
 
				-\fn void starpu_data_unregister_no_coherency(starpu_data_handle_t handle)
			
 
				-\ingroup API_Data_Management
			
 
				-This is the same as starpu_data_unregister(), except that
			
 
				-StarPU does not put back a valid copy into the home node, in the
			
 
				-buffer that was initially registered.
			
 
				-
			
 
				-\fn void starpu_data_unregister_submit(starpu_data_handle_t handle)
			
 
				-\ingroup API_Data_Management
			
 
				-Destroy the data \p handle once it is no longer needed by any
			
 
				-submitted task. No coherency is assumed.
			
 
				-
			
 
				-\fn void starpu_data_invalidate(starpu_data_handle_t handle)
			
 
				-\ingroup API_Data_Management
			
 
				-Destroy all replicates of the data \p handle immediately. After
			
 
				-data invalidation, the first access to \p handle must be performed in
			
 
				-::STARPU_W mode. Accessing an invalidated data in ::STARPU_R mode
			
 
				-results in undefined behaviour.
			
 
				-
			
 
				-\fn void starpu_data_invalidate_submit(starpu_data_handle_t handle)
			
 
				-\ingroup API_Data_Management
			
 
				-Submit invalidation of the data \p handle after completion of
			
 
				-previously submitted tasks.
			
 
				-
			
 
				-\fn void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask)
			
 
				-\ingroup API_Data_Management
			
 
				-Set the write-through mask of the data \p handle (and
			
 
				-its children), i.e. a bitmask of nodes where the data should be always
			
 
				-replicated after modification. It also prevents the data from being
			
 
				-evicted from these nodes when memory gets scarse. When the data is
			
 
				-modified, it is automatically transfered into those memory nodes. For
			
 
				-instance a <c>1<<0</c> write-through mask means that the CUDA workers
			
 
				-will commit their changes in main memory (node 0).
			
 
				-
			
 
				-\fn void starpu_data_set_name(starpu_data_handle_t handle, const char *name)
			
 
				-\ingroup API_Data_Management
			
 
				-Set the name of the data, to be shown in various profiling tools.
			
 
				-
			
 
				-\fn void starpu_data_set_coordinates_array(starpu_data_handle_t handle, int dimensions, int dims[])
			
 
				-\ingroup API_Data_Management
			
 
				-Set the coordinates of the data, to be shown in various profiling tools.
			
 
				-\p dimensions is the size of the \p dims array
			
 
				-This can be for instance the tile coordinates within a big matrix.
			
 
				-
			
 
				-\fn void starpu_data_set_coordinates(starpu_data_handle_t handle, unsigned dimensions, ...)
			
 
				-\ingroup API_Data_Management
			
 
				-Set the coordinates of the data, to be shown in various profiling tools.
			
 
				-\p dimensions is the number of subsequent \c int parameters.
			
 
				-This can be for instance the tile coordinates within a big matrix.
			
 
				-
			
 
				-\fn void starpu_data_set_ooc_flag(starpu_data_handle_t handle, unsigned flag)
			
 
				-\ingroup API_Data_Management
			
 
				-Set whether this data should be elligible to be evicted to disk storage (1) or
			
 
				-not (0). The default is 1.
			
 
				-
			
 
				-\fn unsigned starpu_data_get_ooc_flag(starpu_data_handle_t handle)
			
 
				-\ingroup API_Data_Management
			
 
				-Get whether this data was set to be elligible to be evicted to disk storage (1) or
			
 
				-not (0).
			
 
				-
			
 
				-\fn int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async)
			
 
				-\ingroup API_Data_Management
			
 
				-Issue a fetch request for the data \p handle to \p node, i.e.
			
 
				-requests that the data be replicated to the given node as soon as possible, so that it is
			
 
				-available there for tasks. If \p async is 0, the call will
			
 
				-block until the transfer is achieved, else the call will return immediately,
			
 
				-after having just queued the request. In the latter case, the request will
			
 
				-asynchronously wait for the completion of any task writing on the data.
			
 
				-
			
 
				-\fn int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async)
			
 
				-\ingroup API_Data_Management
			
 
				-Issue a prefetch request for the data \p handle to \p node, i.e.
			
 
				-requests that the data be replicated to \p node when there is room for it, so that it is
			
 
				-available there for tasks. If \p async is 0, the call will
			
 
				-block until the transfer is achieved, else the call will return immediately,
			
 
				-after having just queued the request. In the latter case, the request will
			
 
				-asynchronously wait for the completion of any task writing on the data.
			
 
				-
			
 
				-\fn int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async)
			
 
				-\ingroup API_Data_Management
			
 
				-Issue an idle prefetch request for the data \p handle to \p node, i.e.
			
 
				-requests that the data be replicated to \p node, so that it is
			
 
				-available there for tasks, but only when the bus is really idle. If \p async is 0, the call will
			
 
				-block until the transfer is achieved, else the call will return immediately,
			
 
				-after having just queued the request. In the latter case, the request will
			
 
				-asynchronously wait for the completion of any task writing on the data.
			
 
				-
			
 
				-\fn unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node)
			
 
				-\ingroup API_Data_Management
			
 
				-Check whether a valid copy of \p handle is currently available on memory node \p
			
 
				-node .
			
 
				-
			
 
				-\fn void starpu_data_wont_use(starpu_data_handle_t handle)
			
 
				-\ingroup API_Data_Management
			
 
				-Advise StarPU that \p handle will not be used in the close future, and is
			
 
				-thus a good candidate for eviction from GPUs. StarPU will thus write its value
			
 
				-back to its home node when the bus is idle, and select this data in priority
			
 
				-for eviction when memory gets low.
			
 
				-
			
 
				-\fn starpu_data_handle_t starpu_data_lookup(const void *ptr)
			
 
				-\ingroup API_Data_Management
			
 
				-Return the handle corresponding to the data pointed to by the \p ptr host pointer.
			
 
				-
			
 
				-\fn int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node)
			
 
				-\ingroup API_Data_Management
			
 
				-Explicitly ask StarPU to allocate room for a piece of data on
			
 
				-the specified memory \p node.
			
 
				-
			
 
				-\fn void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested)
			
 
				-\ingroup API_Data_Management
			
 
				-Query the status of \p handle on the specified \p memory_node.
			
 
				-
			
 
				-\fn void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important)
			
 
				-\ingroup API_Data_Management
			
 
				-Specify that the data \p handle can be discarded without impacting the application.
			
 
				-
			
 
				-\fn void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl)
			
 
				-\ingroup API_Data_Management
			
 
				-Set the codelets to be used for \p handle when it is accessed in the
			
 
				-mode ::STARPU_REDUX. Per-worker buffers will be initialized with
			
 
				-the codelet \p init_cl, and reduction between per-worker buffers will be
			
 
				-done with the codelet \p redux_cl.
			
 
				-
			
 
				-\fn struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle)
			
 
				-\ingroup API_Data_Management
			
 
				-todo
			
 
				-
			
 
				-\fn void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data)
			
 
				-\ingroup API_Data_Management
			
 
				-Sset the field \c user_data for the \p handle to \p user_data . It can
			
 
				-then be retrieved with starpu_data_get_user_data(). \p user_data can be any
			
 
				-application-defined value, for instance a pointer to an object-oriented
			
 
				-container for the data.
			
 
				-
			
 
				-\fn void *starpu_data_get_user_data(starpu_data_handle_t handle)
			
 
				-\ingroup API_Data_Management
			
 
				-This retrieves the field \c user_data previously set for the \p handle.
			
 
				-
			
 
				-@name Access registered data from the application
			
 
				-\ingroup API_Data_Management
			
 
				-
			
 
				-\fn int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode)
			
 
				-\ingroup API_Data_Management
			
 
				-The application must call this function prior to accessing
			
 
				-registered data from main memory outside tasks. StarPU ensures that
			
 
				-the application will get an up-to-date copy of \p handle in main memory
			
 
				-located where the data was originally registered, and that all
			
 
				-concurrent accesses (e.g. from tasks) will be consistent with the
			
 
				-access mode specified with \p mode. starpu_data_release() must
			
 
				-be called once the application no longer needs to access the piece of
			
 
				-data. Note that implicit data dependencies are also enforced
			
 
				-by starpu_data_acquire(), i.e. starpu_data_acquire() will wait for all
			
 
				-tasks scheduled to work on the data, unless they have been disabled
			
 
				-explictly by calling starpu_data_set_default_sequential_consistency_flag() or
			
 
				-starpu_data_set_sequential_consistency_flag(). starpu_data_acquire() is a
			
 
				-blocking call, so that it cannot be called from tasks or from their
			
 
				-callbacks (in that case, starpu_data_acquire() returns <c>-EDEADLK</c>). Upon
			
 
				-successful completion, this function returns 0.
			
 
				-
			
 
				-\fn int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg)
			
 
				-\ingroup API_Data_Management
			
 
				-Asynchronous equivalent of starpu_data_acquire(). When the data
			
 
				-specified in \p handle is available in the access \p mode, the \p
			
 
				-callback function is executed. The application may access
			
 
				-the requested data during the execution of \p callback. The \p callback
			
 
				-function must call starpu_data_release() once the application no longer
			
 
				-needs to access the piece of data. Note that implicit data
			
 
				-dependencies are also enforced by starpu_data_acquire_cb() in case they
			
 
				-are not disabled. Contrary to starpu_data_acquire(), this function is
			
 
				-non-blocking and may be called from task callbacks. Upon successful
			
 
				-completion, this function returns 0.
			
 
				-
			
 
				-\fn int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				-\ingroup API_Data_Management
			
 
				-Equivalent of starpu_data_acquire_cb() with the possibility of enabling or disabling data dependencies.
			
 
				-When the data specified in \p handle is available in the access
			
 
				-\p mode, the \p callback function is executed. The application may access
			
 
				-the requested data during the execution of this \p callback. The \p callback
			
 
				-function must call starpu_data_release() once the application no longer
			
 
				-needs to access the piece of data. Note that implicit data
			
 
				-dependencies are also enforced by starpu_data_acquire_cb_sequential_consistency() in case they
			
 
				-are not disabled specifically for the given \p handle or by the parameter \p sequential_consistency.
			
 
				-Similarly to starpu_data_acquire_cb(), this function is
			
 
				-non-blocking and may be called from task callbacks. Upon successful
			
 
				-completion, this function returns 0.
			
 
				-
			
 
				-\fn int starpu_data_acquire_try(starpu_data_handle_t handle, enum starpu_data_access_mode mode)
			
 
				-\ingroup API_Data_Management
			
 
				-The application can call this function instead of starpu_data_acquire() so as to
			
 
				-acquire the data like starpu_data_acquire(), but only if all
			
 
				-previously-submitted tasks have completed, in which case starpu_data_acquire_try()
			
 
				-returns 0. StarPU will have ensured that the application will get an up-to-date
			
 
				-copy of \p handle in main memory located where the data was originally
			
 
				-registered. starpu_data_release() must be called once the application no longer
			
 
				-needs to access the piece of data.
			
 
				-
			
 
				-If not all previously-submitted tasks have completed, starpu_data_acquire_try
			
 
				-returns -EAGAIN, and starpu_data_release() must not be called.
			
 
				-
			
 
				-\def STARPU_ACQUIRE_NO_NODE
			
 
				-\ingroup API_Data_Management
			
 
				-This macro can be used to acquire data, but not require it to be available on a given node, only enforce R/W dependencies.
			
 
				-This can for instance be used to wait for tasks which produce the data, but without requesting a fetch to the main memory.
			
 
				-
			
 
				-\def STARPU_ACQUIRE_NO_NODE_LOCK_ALL
			
 
				-\ingroup API_Data_Management
			
 
				-This is the same as ::STARPU_ACQUIRE_NO_NODE, but will lock the data on all nodes, preventing them from being evicted for instance.
			
 
				-This is mostly useful inside starpu only.
			
 
				-
			
 
				-\fn int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode)
			
 
				-\ingroup API_Data_Management
			
 
				-This is the same as starpu_data_acquire(), except that the data
			
 
				-will be available on the given memory node instead of main
			
 
				-memory.
			
 
				-::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be
			
 
				-used instead of an explicit node number.
			
 
				-
			
 
				-\fn int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg)
			
 
				-\ingroup API_Data_Management
			
 
				-This is the same as starpu_data_acquire_cb(), except that the
			
 
				-data will be available on the given memory node instead of main
			
 
				-memory.
			
 
				-::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be
			
 
				-used instead of an explicit node number.
			
 
				-
			
 
				-\fn int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				-\ingroup API_Data_Management
			
 
				-This is the same as starpu_data_acquire_cb_sequential_consistency(), except that the
			
 
				-data will be available on the given memory node instead of main
			
 
				-memory.
			
 
				-::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
			
 
				-explicit node number.
			
 
				-
			
 
				-\fn int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick, long *pre_sync_jobid, long *post_sync_jobid)
			
 
				-\ingroup API_Data_Management
			
 
				-This is the same as starpu_data_acquire_on_node_cb_sequential_consistency(),
			
 
				-except that the \e pre_sync_jobid and \e post_sync_jobid parameters can be used
			
 
				-to retrieve the jobid of the synchronization tasks. \e pre_sync_jobid happens
			
 
				-just before the acquisition, and \e post_sync_jobid happens just after the
			
 
				-release.
			
 
				-
			
 
				-\fn int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode)
			
 
				-\ingroup API_Data_Management
			
 
				-This is the same as starpu_data_acquire_try(), except that the
			
 
				-data will be available on the given memory node instead of main
			
 
				-memory.
			
 
				-::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
			
 
				-explicit node number.
			
 
				-
			
 
				-\def STARPU_DATA_ACQUIRE_CB(handle, mode, code)
			
 
				-\ingroup API_Data_Management
			
 
				-STARPU_DATA_ACQUIRE_CB() is the same as starpu_data_acquire_cb(),
			
 
				-except that the code to be executed in a callback is directly provided
			
 
				-as a macro parameter, and the data \p handle is automatically released
			
 
				-after it. This permits to easily execute code which depends on the
			
 
				-value of some registered data. This is non-blocking too and may be
			
 
				-called from task callbacks.
			
 
				-
			
 
				-\fn void starpu_data_release(starpu_data_handle_t handle)
			
 
				-\ingroup API_Data_Management
			
 
				-Release the piece of data acquired by the
			
 
				-application either by starpu_data_acquire() or by
			
 
				-starpu_data_acquire_cb().
			
 
				-
			
 
				-\fn void starpu_data_release_on_node(starpu_data_handle_t handle, int node)
			
 
				-\ingroup API_Data_Management
			
 
				-This is the same as starpu_data_release(), except that the data
			
 
				-will be available on the given memory \p node instead of main memory.
			
 
				-The \p node parameter must be exactly the same as the corresponding \c
			
 
				-starpu_data_acquire_on_node* call.
			
 
				-
			
 
				-\fn starpu_arbiter_t starpu_arbiter_create(void)
			
 
				-\ingroup API_Data_Management
			
 
				-Create a data access arbiter, see \ref ConcurrentDataAccess for the details
			
 
				-
			
 
				-\fn void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter)
			
 
				-\ingroup API_Data_Management
			
 
				-Make access to \p handle managed by \p arbiter
			
 
				-
			
 
				-\fn void starpu_arbiter_destroy(starpu_arbiter_t arbiter)
			
 
				-\ingroup API_Data_Management
			
 
				-Destroy the \p arbiter . This must only be called after all data
			
 
				-assigned to it have been unregistered.
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/chapters/api/data_out_of_core.doxy
+++ b/doc/doxygen/chapters/api/data_out_of_core.doxy
@@ -1,154 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2013,2017                                Inria
			
 
				- * Copyright (C) 2013,2015,2017                           CNRS
			
 
				- * Copyright (C) 2013,2014,2017                           Université de Bordeaux
			
 
				- * Copyright (C) 2013                                     Corentin Salingue
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Out_Of_Core Out Of Core
			
 
				-
			
 
				-\def STARPU_DISK_SIZE_MIN
			
 
				-\ingroup API_Out_Of_Core
			
 
				-Minimum size of a registered disk. The size of a disk is the last parameter of the function starpu_disk_register().
			
 
				-
			
 
				-\struct starpu_disk_ops
			
 
				-\ingroup API_Out_Of_Core
			
 
				-This is a set of functions to manipulate datas on disk.
			
 
				-
			
 
				-\var void* (*starpu_disk_ops::alloc)(void *base, size_t size)
			
 
				-Create a new location for datas of size \p size. This returns an opaque object pointer.
			
 
				-
			
 
				-\var void (*starpu_disk_ops::free)(void *base, void *obj, size_t size)
			
 
				-Free a data \p obj previously allocated with \c alloc.
			
 
				-
			
 
				-\var void* (*starpu_disk_ops::open)(void *base, void *pos, size_t size)
			
 
				-Open an existing location of datas, at a specific position \p pos dependent on the backend.
			
 
				-
			
 
				-\var void (*starpu_disk_ops::close)(void *base, void *obj, size_t size)
			
 
				-Close, without deleting it, a location of datas \p obj.
			
 
				-
			
 
				-\var int (*starpu_disk_ops::read)(void *base, void *obj, void *buf, off_t offset, size_t size)
			
 
				-Read \p size bytes of data from \p obj in \p base, at offset \p offset, and put
			
 
				-into \p buf. Returns the actual number of read bytes.
			
 
				-
			
 
				-\var int (*starpu_disk_ops::write)(void *base, void *obj, const void *buf, off_t offset, size_t size)
			
 
				-Write \p size bytes of data to \p obj in \p base, at offset \p offset, from \p buf. Returns 0 on success.
			
 
				-
			
 
				-\var int (*starpu_disk_ops::full_read)(void * base, void * obj, void ** ptr, size_t * size)
			
 
				-Read all data from \p obj of \p base, from offset 0. Returns it in an allocated buffer \p ptr, of size \p size
			
 
				-
			
 
				-\var int (*starpu_disk_ops::full_write)(void * base, void * obj, void * ptr, size_t size)
			
 
				-Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to
			
 
				-\p size, so that a \c full_read will get it.
			
 
				-
			
 
				-\var void* (*starpu_disk_ops::plug) (void *parameters, size_t size)
			
 
				-Connect a disk memory at location \p parameter with size \p size, and return a
			
 
				-base as void*, which will be passed by StarPU to all other methods.
			
 
				-
			
 
				-\var void (*starpu_disk_ops::unplug) (void* base)
			
 
				-Disconnect a disk memory \p base.
			
 
				-
			
 
				-\var void* (*starpu_disk_ops::async_read)(void *base, void *obj, void *buf, off_t offset, size_t size)
			
 
				-Asynchronously read \p size bytes of data from \p obj in \p base, at offset \p
			
 
				-offset, and put into \p buf. Returns a void* pointer that StarPU will pass to \c
			
 
				-*_request methods for testing for the completion.
			
 
				-
			
 
				-\var void* (*starpu_disk_ops::async_write)(void *base, void *obj, const void *buf, off_t offset, size_t size)
			
 
				-Asynchronously write \p size bytes of data to \p obj in \p base, at offset \p
			
 
				-offset, from \p buf. Returns a void* pointer that StarPU will pass to \c
			
 
				-*_request methods for testing for the completion.
			
 
				-
			
 
				-\var void * (*starpu_disk_ops::async_full_read)(void * base, void * obj, void ** ptr, size_t * size)
			
 
				-Read all data from \p obj of \p base, from offset 0. Returns it in an allocated buffer \p ptr, of size \p size
			
 
				-
			
 
				-\var void * (*starpu_disk_ops::async_full_write)(void * base, void * obj, void * ptr, size_t size)
			
 
				-Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to
			
 
				-\p size, so that a \c full_read will get it.
			
 
				-
			
 
				-\var void* (*starpu_disk_ops::copy)(void *base_src, void* obj_src, off_t offset_src,  void *base_dst, void* obj_dst, off_t offset_dst, size_t size)
			
 
				-Copy from offset \p offset_src of disk object \p obj_src in \p base_src to
			
 
				-offset \p offset_dst of disk object \p obj_dst in \p base_dst. Returns a void*
			
 
				-pointer that StarPU will pass to \c *_request methods for testing for the
			
 
				-completion.
			
 
				-
			
 
				-\var int (*starpu_disk_ops::bandwidth) (unsigned node)
			
 
				-Measure the bandwidth and the latency for the disk \p node and save it. Returns
			
 
				-1 if it could measure it.
			
 
				-
			
 
				-\var void (*starpu_disk_ops::wait_request)(void *async_channel)
			
 
				-Wait for completion of request \p async_channel returned by a previous
			
 
				-asynchronous read, write or copy.
			
 
				-
			
 
				-\var void (*starpu_disk_ops::test_request)(void *async_channel)
			
 
				-Test for completion of request \p async_channel returned by a previous
			
 
				-asynchronous read, write or copy. Returns 1 on completion, 0 otherwise.
			
 
				-
			
 
				-\var void (*starpu_disk_ops::free_request)(void *async_channel)
			
 
				-Free the request allocated by a previous asynchronous read, write or copy.
			
 
				-
			
 
				-\fn int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_ssize_t size)
			
 
				-\ingroup API_Out_Of_Core
			
 
				-Register a disk memory node with a set of functions to manipulate datas. The \c
			
 
				-plug member of \p func will be passed \p parameter, and return a \c base which will be passed to all \p func methods. <br />
			
 
				-SUCCESS: return the disk node. <br />
			
 
				-FAIL: return an error code. <br />
			
 
				-\p size must be at least \ref STARPU_DISK_SIZE_MIN bytes ! \p size being negative means infinite size.
			
 
				-
			
 
				-\fn void *starpu_disk_open(unsigned node, void *pos, size_t size)
			
 
				-\ingroup API_Out_Of_Core
			
 
				-Open an existing file memory in a disk node. \p size is the size of the
			
 
				-file. \p pos is the specific position dependent on the backend, given to the \c open
			
 
				-method of the disk operations. This returns an opaque object pointer.
			
 
				-
			
 
				-\fn void starpu_disk_close(unsigned node, void *obj, size_t size)
			
 
				-\ingroup API_Out_Of_Core
			
 
				-Close an existing data opened with starpu_disk_open().
			
 
				-
			
 
				-\var starpu_disk_swap_node
			
 
				-\ingroup API_Out_Of_Core
			
 
				-This contains the node number of the disk swap, if set up through the
			
 
				-\ref STARPU_DISK_SWAP variable.
			
 
				-
			
 
				-\var starpu_disk_stdio_ops
			
 
				-\ingroup API_Out_Of_Core
			
 
				-This set uses the stdio library (fwrite, fread...) to read/write on disk. <br />
			
 
				-<strong>Warning: It creates one file per allocation !</strong>  <br />
			
 
				-It doesn't support asynchronous transfers.
			
 
				-
			
 
				-\var starpu_disk_unistd_ops
			
 
				-\ingroup API_Out_Of_Core
			
 
				-This set uses the unistd library (write, read...) to read/write on disk. <br />
			
 
				-<strong>Warning: It creates one file per allocation !</strong>  <br />
			
 
				-
			
 
				-\var starpu_disk_unistd_o_direct_ops
			
 
				-\ingroup API_Out_Of_Core
			
 
				-This set uses the unistd library (write, read...) to read/write on disk with the O_DIRECT flag. <br />
			
 
				-<strong>Warning: It creates one file per allocation !</strong>  <br />
			
 
				-Only available on Linux systems.
			
 
				-
			
 
				-\var starpu_disk_leveldb_ops
			
 
				-\ingroup API_Out_Of_Core
			
 
				-This set uses the leveldb created by Google <br />
			
 
				-More information at https://code.google.com/p/leveldb/ <br />
			
 
				-It doesn't support asynchronous transfers.
			
 
				-
			
 
				-\var starpu_disk_hdf5_ops
			
 
				-\ingroup API_Out_Of_Core
			
 
				-This set uses the HDF5 library.<br />
			
 
				-<strong>It doesn't support multiple opening from different processes. </strong> <br />
			
 
				-You may only allow one process to write in the HDF5 file. <br />
			
 
				-<strong>If HDF5 library is not compiled with --thread-safe you can't open more than one HDF5 file at the same time. </strong>
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/chapters/api/data_partition.doxy
+++ b/doc/doxygen/chapters/api/data_partition.doxy
@@ -1,416 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
			
 
				- * Copyright (C) 2009-2011,2014,2015,2017,2018-2019       Université de Bordeaux
			
 
				- * Copyright (C) 2011-2013                                Inria
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Data_Partition Data Partition
			
 
				-
			
 
				-\struct starpu_data_filter
			
 
				-The filter structure describes a data partitioning operation, to be
			
 
				-given to the starpu_data_partition() function.
			
 
				-\ingroup API_Data_Partition
			
 
				-\var void (*starpu_data_filter::filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *filter, unsigned i, unsigned nparts)
			
 
				-    Fill the \p child_interface structure with interface information
			
 
				-    for the \p i -th child of the parent \p father_interface (among
			
 
				-    \p nparts). The \p filter structure is provided, allowing to inspect the
			
 
				-    starpu_data_filter::filter_arg and starpu_data_filter::filter_arg_ptr
			
 
				-    parameters.
			
 
				-
			
 
				-    The details of what needs to be filled in \p child_interface vary according
			
 
				-    to the data interface, but generally speaking:
			
 
				-    <ul>
			
 
				-    <li> <c>id</c> is usually just copied over from the father, when the sub data has the same structure as the father, e.g. a subvector is a vector, a submatrix is a matrix, etc. This is however not the case for instance when dividing a BCSR matrix into its dense blocks, which then are matrices. </li>
			
 
				-    <li> <c>nx</c>, <c>ny</c> and alike are usually divided by the number of subdata, depending how the subdivision is done (e.g. nx division vs ny division for vertical matrix division vs horizontal matrix division). </li>
			
 
				-    <li> <c>ld</c> for matrix interfaces are usually just copied over: the leading dimension (ld) usually does not change. </li>
			
 
				-    <li> <c>elemsize</c> is usually just copied over. </li>
			
 
				-    <li> <c>ptr</c>, the pointer to the data, has to be computed according to \p i and the father's <c>ptr</c>, so as to point to the start of the sub data. This should however be done only if the father has <c>ptr</c> different from NULL: in the OpenCL case notably, the <c>dev_handle</c> and <c>offset</c> fields are used instead. </li>
			
 
				-    <li> <c>dev_handle</c> should be just copied over from the parent. </li>
			
 
				-    <li> <c>offset</c> has to be computed according to \p i and the father's <c>offset</c>, so as to provide the offset of the start of the sub data. This is notably used for the OpenCL case.
			
 
				-    </ul>
			
 
				-\var unsigned starpu_data_filter::nchildren
			
 
				-    Number of parts to partition the data into.
			
 
				-\var unsigned (*starpu_data_filter::get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle)
			
 
				-    Return the number of children. This can be used instead of
			
 
				-    starpu_data_filter::nchildren when the number of children depends
			
 
				-    on the actual data (e.g. the number of blocks in a sparse matrix).
			
 
				-\var struct starpu_data_interface_ops *(*starpu_data_filter::get_child_ops)(struct starpu_data_filter *, unsigned id)
			
 
				-    In case the resulting children use a different data interface,
			
 
				-    this function returns which interface is used by child number \p
			
 
				-    id.
			
 
				-\var unsigned starpu_data_filter::filter_arg
			
 
				-    Additional parameter for the filter function
			
 
				-\var void *starpu_data_filter::filter_arg_ptr
			
 
				-    Additional pointer parameter for the filter
			
 
				-    function, such as the sizes of the different parts.
			
 
				-
			
 
				-@name Basic API
			
 
				-\ingroup API_Data_Partition
			
 
				-
			
 
				-\fn void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f)
			
 
				-\ingroup API_Data_Partition
			
 
				-Request the partitioning of \p initial_handle into several subdata
			
 
				-according to the filter \p f.
			
 
				-
			
 
				-Here an example of how to use the function.
			
 
				-\code{.c}
			
 
				-struct starpu_data_filter f =
			
 
				-{
			
 
				-        .filter_func = starpu_matrix_filter_block,
			
 
				-        .nchildren = nslicesx
			
 
				-};
			
 
				-starpu_data_partition(A_handle, &f);
			
 
				-\endcode
			
 
				-
			
 
				-\fn void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node)
			
 
				-\ingroup API_Data_Partition
			
 
				-Unapply the filter which has been applied to \p root_data, thus
			
 
				-unpartitioning the data. The pieces of data are collected back into
			
 
				-one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM).
			
 
				-Tasks working on the partitioned data will be waited for
			
 
				-by starpu_data_unpartition().
			
 
				-
			
 
				-Here an example of how to use the function.
			
 
				-\code{.c}
			
 
				-starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
			
 
				-\endcode
			
 
				-
			
 
				-\fn int starpu_data_get_nb_children(starpu_data_handle_t handle)
			
 
				-\ingroup API_Data_Partition
			
 
				-Return the number of children \p handle has been partitioned into.
			
 
				-
			
 
				-\fn starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i)
			
 
				-\ingroup API_Data_Partition
			
 
				-Return the \p i -th child of the given \p handle, which must have been
			
 
				-partitionned beforehand.
			
 
				-
			
 
				-\fn starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... )
			
 
				-\ingroup API_Data_Partition
			
 
				-After partitioning a StarPU data by applying a filter,
			
 
				-starpu_data_get_sub_data() can be used to get handles for each of the
			
 
				-data portions. \p root_data is the parent data that was partitioned.
			
 
				-\p depth is the number of filters to traverse (in case several filters
			
 
				-have been applied, to e.g. partition in row blocks, and then in column
			
 
				-blocks), and the subsequent parameters are the indexes. The function
			
 
				-returns a handle to the subdata.
			
 
				-
			
 
				-Here an example of how to use the function.
			
 
				-\code{.c}
			
 
				-h = starpu_data_get_sub_data(A_handle, 1, taskx);
			
 
				-\endcode
			
 
				-
			
 
				-\fn starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa)
			
 
				-\ingroup API_Data_Partition
			
 
				-This function is similar to starpu_data_get_sub_data() but uses a
			
 
				-va_list for the parameter list.
			
 
				-
			
 
				-\fn void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...)
			
 
				-\ingroup API_Data_Partition
			
 
				-Apply \p nfilters filters to the handle designated by
			
 
				-\p root_handle recursively. \p nfilters pointers to variables of the type
			
 
				-starpu_data_filter should be given.
			
 
				-
			
 
				-\fn void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa)
			
 
				-\ingroup API_Data_Partition
			
 
				-Apply \p nfilters filters to the handle designated by
			
 
				-\p root_handle recursively. It uses a va_list of pointers to variables of
			
 
				-the type starpu_data_filter.
			
 
				-
			
 
				-@name Asynchronous API
			
 
				-\ingroup API_Data_Partition
			
 
				-
			
 
				-\fn void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children)
			
 
				-\ingroup API_Data_Partition
			
 
				-Plan to partition \p initial_handle into several subdata according to
			
 
				-the filter \p f.
			
 
				-The handles are returned into the \p children array, which has to be
			
 
				-the same size as the number of parts described in \p f. These handles
			
 
				-are not immediately usable, starpu_data_partition_submit() has to be
			
 
				-called to submit the actual partitioning.
			
 
				-
			
 
				-Here is an example of how to use the function:
			
 
				-
			
 
				-\code{.c}
			
 
				-starpu_data_handle_t children[nslicesx];
			
 
				-struct starpu_data_filter f =
			
 
				-{
			
 
				-        .filter_func = starpu_matrix_filter_block,
			
 
				-        .nchildren = nslicesx
			
 
				-};
			
 
				-starpu_data_partition_plan(A_handle, &f, children);
			
 
				-\endcode
			
 
				-
			
 
				-\fn void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
			
 
				-\ingroup API_Data_Partition
			
 
				-Submit the actual partitioning of \p initial_handle into the \p nparts
			
 
				-\p children handles. This call is asynchronous, it only submits that the
			
 
				-partitioning should be done, so that the \p children handles can now be used to
			
 
				-submit tasks, and \p initial_handle can not be used to submit tasks any more (to
			
 
				-guarantee coherency).
			
 
				-
			
 
				-For instance,
			
 
				-
			
 
				-\code{.c}
			
 
				-starpu_data_partition_submit(A_handle, nslicesx, children);
			
 
				-\endcode
			
 
				-
			
 
				-\fn void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
			
 
				-\ingroup API_Data_Partition
			
 
				-This is the same as starpu_data_partition_submit(), but it does not invalidate \p
			
 
				-initial_handle. This allows to continue using it, but the application has to be
			
 
				-careful not to write to \p initial_handle or \p children handles, only read from
			
 
				-them, since the coherency is otherwise not guaranteed.  This thus allows to
			
 
				-submit various tasks which concurrently read from various partitions of the data.
			
 
				-
			
 
				-When the application wants to write to \p initial_handle again, it should call
			
 
				-starpu_data_unpartition_submit(), which will properly add dependencies between the
			
 
				-reads on the \p children and the writes to be submitted.
			
 
				-
			
 
				-If instead the application wants to write to \p children handles, it should
			
 
				-call starpu_data_partition_readwrite_upgrade_submit(), which will correctly add
			
 
				-dependencies between the reads on the \p initial_handle and the writes to be
			
 
				-submitted.
			
 
				-
			
 
				-\fn void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
			
 
				-\ingroup API_Data_Partition
			
 
				-This assumes that a partitioning of \p initial_handle has already been submited
			
 
				-in readonly mode through starpu_data_partition_readonly_submit(), and will upgrade
			
 
				-that partitioning into read-write mode for the \p children, by invalidating \p
			
 
				-initial_handle, and adding the necessary dependencies.
			
 
				-
			
 
				-\fn void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency)
			
 
				-\ingroup API_Data_Partition
			
 
				-Similar to starpu_data_partition_submit() but also allows to
			
 
				-specify the coherency to be used for the main data \p initial_handle
			
 
				-through the parameter \p sequential_consistency.
			
 
				-
			
 
				-\fn void starpu_data_unpartition_submit_sequential_consistency_cb(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency, void (*callback_func)(void *), void *callback_arg)
			
 
				-\ingroup API_Data_Partition
			
 
				-Similar to starpu_data_partition_submit_sequential_consistency() but
			
 
				-allow to specify a callback function for the unpartitiong task
			
 
				-
			
 
				-\fn void starpu_data_partition_not_automatic(starpu_data_handle_t handle)
			
 
				-\ingroup API_Data_Partition
			
 
				-Disable the automatic partitioning of the data \p handle for which a
			
 
				-asynchronous plan has previously been submitted
			
 
				-
			
 
				-\fn void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node)
			
 
				-\ingroup API_Data_Partition
			
 
				-This assumes that \p initial_handle is partitioned into \p children, and submits
			
 
				-an unpartitionning of it, i.e. submitting a gathering of the pieces on the
			
 
				-requested \p gathering_node memory node, and submitting an invalidation of the
			
 
				-children.
			
 
				-
			
 
				-\p gathering_node can be set to -1 to let the runtime decide which memory node
			
 
				-should be used to gather the pieces.
			
 
				-
			
 
				-This call is asynchronous, it only submits that the unpartitioning should be
			
 
				-done, so that the \p children handles should not be used to submit tasks any
			
 
				-more, and \p initial_handle can now be used again to submit tasks.
			
 
				-
			
 
				-\fn void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node)
			
 
				-\ingroup API_Data_Partition
			
 
				-This assumes that \p initial_handle is partitioned into \p children, and submits
			
 
				-just a readonly unpartitionning of it, i.e. submitting a gathering of the pieces
			
 
				-on the requested \p gathering_node memory node. It does not invalidate the
			
 
				-children. This brings \p initial_handle and \p children handles to the same
			
 
				-state as obtained with starpu_data_partition_readonly_submit().
			
 
				-
			
 
				-\p gathering_node can be set to -1 to let the runtime decide which memory node
			
 
				-should be used to gather the pieces.
			
 
				-
			
 
				-\fn void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node, int sequential_consistency)
			
 
				-\ingroup API_Data_Partition
			
 
				-Similar to starpu_data_unpartition_submit() but also allows to
			
 
				-specify the coherency to be used for the main data \p initial_handle
			
 
				-through the parameter \p sequential_consistency.
			
 
				-
			
 
				-\fn void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children)
			
 
				-\ingroup API_Data_Partition
			
 
				-This should be used to clear the partition planning established between \p
			
 
				-root_data and \p children with starpu_data_partition_plan(). This will notably
			
 
				-submit an unregister all the \p children, which can thus not be used any more
			
 
				-afterwards.
			
 
				-
			
 
				-@name Predefined Vector Filter Functions
			
 
				-\ingroup API_Data_Partition
			
 
				-
			
 
				-This section gives a partial list of the predefined partitioning
			
 
				-functions for vector data. Examples on how to use them are shown in
			
 
				-\ref PartitioningData. The complete list can be found in the file
			
 
				-<c>starpu_data_filters.h</c>.
			
 
				-
			
 
				-\fn void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Return in \p child_interface the \p id th element of the vector
			
 
				-represented by \p father_interface once partitioned in \p nparts chunks of
			
 
				-equal size.
			
 
				-
			
 
				-\fn void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Return in \p child_interface the \p id th element of the vector
			
 
				-represented by \p father_interface once partitioned in \p nparts chunks of
			
 
				-equal size with a shadow border <c>filter_arg_ptr</c>, thus getting a vector
			
 
				-of size <c>(n-2*shadow)/nparts+2*shadow</c>. The <c>filter_arg_ptr</c> field
			
 
				-of \p f must be the shadow size casted into \c void*.
			
 
				-
			
 
				-<b>IMPORTANT</b>: This can only be used for read-only access, as no coherency is
			
 
				-enforced for the shadowed parts. An usage example is available in
			
 
				-examples/filters/shadow.c
			
 
				-
			
 
				-\fn void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Return in \p child_interface the \p id th element of the vector
			
 
				-represented by \p father_interface once partitioned into \p nparts chunks
			
 
				-according to the <c>filter_arg_ptr</c> field of \p f. The
			
 
				-<c>filter_arg_ptr</c> field must point to an array of \p nparts long
			
 
				-elements, each of which specifies the number of elements in each chunk
			
 
				-of the partition.
			
 
				-
			
 
				-\fn void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Return in \p child_interface the \p id th element of the vector
			
 
				-represented by \p father_interface once partitioned into \p nparts chunks
			
 
				-according to the <c>filter_arg_ptr</c> field of \p f. The
			
 
				-<c>filter_arg_ptr</c> field must point to an array of \p nparts uint32_t
			
 
				-elements, each of which specifies the number of elements in each chunk
			
 
				-of the partition.
			
 
				-
			
 
				-\fn void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Return in \p child_interface the \p id th element of the vector
			
 
				-represented by \p father_interface once partitioned in <c>2</c> chunks of
			
 
				-equal size, ignoring nparts. Thus, \p id must be <c>0</c> or <c>1</c>.
			
 
				-
			
 
				-@name Predefined Matrix Filter Functions
			
 
				-\ingroup API_Data_Partition
			
 
				-
			
 
				-This section gives a partial list of the predefined partitioning
			
 
				-functions for matrix data. Examples on how to use them are shown in
			
 
				-\ref PartitioningData. The complete list can be found in the file
			
 
				-<c>starpu_data_filters.h</c>.
			
 
				-
			
 
				-\fn void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Partition a dense Matrix along the x dimension, thus
			
 
				-getting (x/\p nparts ,y) matrices. If \p nparts does not divide x, the
			
 
				-last submatrix contains the remainder.
			
 
				-
			
 
				-\fn void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Partition a dense Matrix along the x dimension, with a
			
 
				-shadow border <c>filter_arg_ptr</c>, thus getting ((x-2*shadow)/\p
			
 
				-nparts +2*shadow,y) matrices. If \p nparts does not divide x-2*shadow,
			
 
				-the last submatrix contains the remainder.
			
 
				-
			
 
				-<b>IMPORTANT</b>: This can
			
 
				-only be used for read-only access, as no coherency is enforced for the
			
 
				-shadowed parts. A usage example is available in
			
 
				-examples/filters/shadow2d.c
			
 
				-
			
 
				-\fn void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Partition a dense Matrix along the y dimension, thus
			
 
				-getting (x,y/\p nparts) matrices. If \p nparts does not divide y, the
			
 
				-last submatrix contains the remainder.
			
 
				-
			
 
				-\fn void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Partition a dense Matrix along the y dimension, with a
			
 
				-shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				-(x,(y-2*shadow)/\p nparts +2*shadow) matrices. If \p nparts does not
			
 
				-divide y-2*shadow, the last submatrix contains the remainder.
			
 
				-
			
 
				-<b>IMPORTANT</b>: This can only be used for read-only access, as no
			
 
				-coherency is enforced for the shadowed parts. A usage example is
			
 
				-available in examples/filters/shadow2d.c
			
 
				-
			
 
				-@name Predefined Block Filter Functions
			
 
				-\ingroup API_Data_Partition
			
 
				-
			
 
				-This section gives a partial list of the predefined partitioning
			
 
				-functions for block data. Examples on how to use them are shown in
			
 
				-\ref PartitioningData. The complete list can be found in the file
			
 
				-<c>starpu_data_filters.h</c>. A usage example is available in
			
 
				-examples/filters/shadow3d.c
			
 
				-
			
 
				-\fn void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Partition a block along the X dimension, thus getting
			
 
				-(x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last
			
 
				-submatrix contains the remainder.
			
 
				-
			
 
				-\fn void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Partition a block along the X dimension, with a
			
 
				-shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				-((x-2*shadow)/\p nparts +2*shadow,y,z) blocks. If \p nparts does not
			
 
				-divide x, the last submatrix contains the remainder.
			
 
				-
			
 
				-<b>IMPORTANT</b>:
			
 
				-This can only be used for read-only access, as no coherency is
			
 
				-enforced for the shadowed parts.
			
 
				-
			
 
				-\fn void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Partition a block along the Y dimension, thus getting
			
 
				-(x,y/\p nparts ,z) blocks. If \p nparts does not divide y, the last
			
 
				-submatrix contains the remainder.
			
 
				-
			
 
				-\fn void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Partition a block along the Y dimension, with a
			
 
				-shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				-(x,(y-2*shadow)/\p nparts +2*shadow,z) 3D matrices. If \p nparts does not
			
 
				-divide y, the last submatrix contains the remainder.
			
 
				-
			
 
				-<b>IMPORTANT</b>:
			
 
				-This can only be used for read-only access, as no coherency is
			
 
				-enforced for the shadowed parts.
			
 
				-
			
 
				-\fn void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Partition a block along the Z dimension, thus getting
			
 
				-(x,y,z/\p nparts) blocks. If \p nparts does not divide z, the last
			
 
				-submatrix contains the remainder.
			
 
				-
			
 
				-\fn void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Partition a block along the Z dimension, with a
			
 
				-shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				-(x,y,(z-2*shadow)/\p nparts +2*shadow) blocks. If \p nparts does not
			
 
				-divide z, the last submatrix contains the remainder.
			
 
				-
			
 
				-<b>IMPORTANT</b>:
			
 
				-This can only be used for read-only access, as no coherency is
			
 
				-enforced for the shadowed parts.
			
 
				-
			
 
				-@name Predefined BCSR Filter Functions
			
 
				-\ingroup API_Data_Partition
			
 
				-
			
 
				-This section gives a partial list of the predefined partitioning
			
 
				-functions for BCSR data. Examples on how to use them are shown in
			
 
				-\ref PartitioningData. The complete list can be found in the file
			
 
				-<c>starpu_data_filters.h</c>.
			
 
				-
			
 
				-\fn void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Partition a block-sparse matrix into dense matrices.
			
 
				-
			
 
				-\fn void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				-\ingroup API_Data_Partition
			
 
				-Partition a block-sparse matrix into vertical block-sparse matrices.
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/chapters/api/expert_mode.doxy
+++ b/doc/doxygen/chapters/api/expert_mode.doxy
@@ -1,35 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
			
 
				- * Copyright (C) 2011,2012                                Inria
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Expert_Mode Expert Mode
			
 
				-
			
 
				-\fn void starpu_wake_all_blocked_workers(void)
			
 
				-\ingroup API_Expert_Mode
			
 
				-Wake all the workers, so they can inspect data requests and task
			
 
				-submissions again.
			
 
				-
			
 
				-\fn int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg)
			
 
				-\ingroup API_Expert_Mode
			
 
				-Register a progression hook, to be called when workers are idle.
			
 
				-
			
 
				-\fn void starpu_progression_hook_deregister(int hook_id)
			
 
				-\ingroup API_Expert_Mode
			
 
				-Unregister a given progression hook.
			
 
				-
			
 
				-*/
			
 
				-
			
--- a/doc/doxygen/chapters/api/fxt_support.doxy
+++ b/doc/doxygen/chapters/api/fxt_support.doxy
@@ -1,120 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2010-2015,2017                           CNRS
			
 
				- * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
			
 
				- * Copyright (C) 2011,2012                                Inria
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_FxT_Support FxT Support
			
 
				-
			
 
				-\struct starpu_fxt_codelet_event
			
 
				-todo
			
 
				-\ingroup API_FxT_Support
			
 
				-\var char starpu_fxt_codelet_event::symbol[256]
			
 
				-    name of the codelet
			
 
				-\var int starpu_fxt_codelet_event::workerid
			
 
				-    todo
			
 
				-\var char starpu_fxt_codelet_event::perfmodel_archname[256]
			
 
				-    todo
			
 
				-\var uint32_t starpu_fxt_codelet_event::hash
			
 
				-    todo
			
 
				-\var size_t starpu_fxt_codelet_event::size
			
 
				-    todo
			
 
				-\var float starpu_fxt_codelet_event::time
			
 
				-    todo
			
 
				-
			
 
				-\struct starpu_fxt_options
			
 
				-todo
			
 
				-\ingroup API_FxT_Support
			
 
				-\var unsigned starpu_fxt_options::per_task_colour
			
 
				-    todo
			
 
				-\var unsigned starpu_fxt_options::no_counter
			
 
				-    todo
			
 
				-\var starpu_unsigned fxt_options::no_bus
			
 
				-    todo
			
 
				-\var unsigned starpu_fxt_options::ninputfiles
			
 
				-    todo
			
 
				-\var char *starpu_fxt_options::filenames[STARPU_FXT_MAX_FILES]
			
 
				-    todo
			
 
				-\var char *starpu_fxt_options::out_paje_path
			
 
				-    todo
			
 
				-\var char *starpu_fxt_options::distrib_time_path
			
 
				-    todo
			
 
				-\var char *starpu_fxt_options::activity_path
			
 
				-    todo
			
 
				-\var char *starpu_fxt_options::dag_path
			
 
				-    todo
			
 
				-\var char *starpu_fxt_options::file_prefix
			
 
				-    In case we are going to gather multiple traces (e.g in the case of
			
 
				-    MPI processes), we may need to prefix the name of the containers.
			
 
				-\var uint64_t starpu_fxt_options::file_offset
			
 
				-    In case we are going to gather multiple traces (e.g in the case of
			
 
				-    MPI processes), we may need to prefix the name of the containers.
			
 
				-\var int starpu_fxt_options::file_rank
			
 
				-    In case we are going to gather multiple traces (e.g in the case of
			
 
				-    MPI processes), we may need to prefix the name of the containers.
			
 
				-\var char starpu_fxt_options::worker_names[STARPU_NMAXWORKERS][256]
			
 
				-    Output parameters
			
 
				-\var struct starpu_perfmodel_arch starpu_fxt_options::worker_archtypes[STARPU_NMAXWORKERS]
			
 
				-    Output parameters
			
 
				-\var int starpu_fxt_options::nworkers
			
 
				-    Output parameters
			
 
				-\var struct starpu_fxt_codelet_event **starpu_fxt_options::dumped_codelets
			
 
				-    In case we want to dump the list of codelets to an external tool
			
 
				-\var long starpu_fxt_options::dumped_codelets_count
			
 
				-    In case we want to dump the list of codelets to an external tool
			
 
				-
			
 
				-\fn void starpu_fxt_options_init(struct starpu_fxt_options *options)
			
 
				-\ingroup API_FxT_Support
			
 
				-todo
			
 
				-
			
 
				-\fn void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
			
 
				-\ingroup API_FxT_Support
			
 
				-todo
			
 
				-
			
 
				-\fn void starpu_fxt_start_profiling(void)
			
 
				-\ingroup API_FxT_Support
			
 
				-Start recording the trace. The trace is by default started from
			
 
				-starpu_init() call, but can be paused by using
			
 
				-starpu_fxt_stop_profiling(), in which case
			
 
				-starpu_fxt_start_profiling() should be called to resume recording
			
 
				-events.
			
 
				-
			
 
				-\fn void starpu_fxt_stop_profiling(void)
			
 
				-\ingroup API_FxT_Support
			
 
				-Stop recording the trace. The trace is by default stopped when calling
			
 
				-starpu_shutdown(). starpu_fxt_stop_profiling() can however be used to
			
 
				-stop it earlier. starpu_fxt_start_profiling() can then be called to
			
 
				-start recording it again, etc.
			
 
				-
			
 
				-\fn void starpu_fxt_autostart_profiling(int autostart)
			
 
				-\ingroup API_FxT_Support
			
 
				-Determine whether profiling should be started by starpu_init(), or only when
			
 
				-starpu_fxt_start_profiling() is called. \p autostart should be 1 to do so, or 0 to
			
 
				-prevent it.
			
 
				-
			
 
				-\fn void starpu_fxt_write_data_trace(char *filename_in)
			
 
				-\ingroup API_FxT_Support
			
 
				-todo
			
 
				-
			
 
				-\fn void starpu_fxt_trace_user_event(unsigned long code)
			
 
				-\ingroup API_FxT_Support
			
 
				-Add an event in the execution trace if FxT is enabled.
			
 
				-
			
 
				-\fn void starpu_fxt_trace_user_event_string(const char *s)
			
 
				-\ingroup API_FxT_Support
			
 
				-Add a string event in the execution trace if FxT is enabled.
			
 
				-
			
 
				-*/
			
 
				-
			
--- a/doc/doxygen/chapters/api/implicit_dependencies.doxy
+++ b/doc/doxygen/chapters/api/implicit_dependencies.doxy
@@ -1,56 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2010-2017                                CNRS
			
 
				- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
			
 
				- * Copyright (C) 2011,2012                                Inria
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Implicit_Data_Dependencies Implicit Data Dependencies
			
 
				-
			
 
				-\brief In this section, we describe how StarPU makes it possible to
			
 
				-insert implicit task dependencies in order to enforce sequential data
			
 
				-consistency. When this data consistency is enabled on a specific data
			
 
				-handle, any data access will appear as sequentially consistent from
			
 
				-the application. For instance, if the application submits two tasks
			
 
				-that access the same piece of data in read-only mode, and then a third
			
 
				-task that access it in write mode, dependencies will be added between
			
 
				-the two first tasks and the third one. Implicit data dependencies are
			
 
				-also inserted in the case of data accesses from the application.
			
 
				-
			
 
				-\fn void starpu_data_set_default_sequential_consistency_flag(unsigned flag)
			
 
				-\ingroup API_Implicit_Data_Dependencies
			
 
				-Set the default sequential consistency flag. If a non-zero
			
 
				-value is passed, a sequential data consistency will be enforced for
			
 
				-all handles registered after this function call, otherwise it is
			
 
				-disabled. By default, StarPU enables sequential data consistency. It
			
 
				-is also possible to select the data consistency mode of a specific
			
 
				-data handle with the function
			
 
				-starpu_data_set_sequential_consistency_flag().
			
 
				-
			
 
				-\fn unsigned starpu_data_get_default_sequential_consistency_flag(void)
			
 
				-\ingroup API_Implicit_Data_Dependencies
			
 
				-Return the default sequential consistency flag
			
 
				-
			
 
				-\fn void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag)
			
 
				-\ingroup API_Implicit_Data_Dependencies
			
 
				-Set the data consistency mode associated to a data handle. The
			
 
				-consistency mode set using this function has the priority over the
			
 
				-default mode which can be set with
			
 
				-starpu_data_set_default_sequential_consistency_flag().
			
 
				-
			
 
				-\fn unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle)
			
 
				-\ingroup API_Implicit_Data_Dependencies
			
 
				-Get the data consistency mode associated to the data handle \p handle
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/chapters/api/lower_bound.doxy
+++ b/doc/doxygen/chapters/api/lower_bound.doxy
@@ -1,60 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
			
 
				- * Copyright (C) 2011,2012                                Inria
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Theoretical_Lower_Bound_on_Execution_Time Theoretical Lower Bound on Execution Time
			
 
				-
			
 
				-\brief Compute theoretical upper computation efficiency bound
			
 
				-corresponding to some actual execution.
			
 
				-
			
 
				-\fn void starpu_bound_start(int deps, int prio)
			
 
				-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
			
 
				-Start recording tasks (resets stats). \p deps tells whether
			
 
				-dependencies should be recorded too (this is quite expensive)
			
 
				-
			
 
				-\fn void starpu_bound_stop(void)
			
 
				-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
			
 
				-Stop recording tasks
			
 
				-
			
 
				-\fn void starpu_bound_print_dot(FILE *output)
			
 
				-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
			
 
				-Emit the DAG that was recorded on \p output.
			
 
				-
			
 
				-\fn void starpu_bound_compute(double *res, double *integer_res, int integer)
			
 
				-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
			
 
				-Get theoretical upper bound (in ms) (needs glpk support
			
 
				-detected by configure script). It returns 0 if some performance models
			
 
				-are not calibrated.
			
 
				-
			
 
				-\fn void starpu_bound_print_lp(FILE *output)
			
 
				-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
			
 
				-Emit the Linear Programming system on \p output for the recorded
			
 
				-tasks, in the lp format
			
 
				-
			
 
				-\fn void starpu_bound_print_mps(FILE *output)
			
 
				-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
			
 
				-Emit the Linear Programming system on \p output for the recorded
			
 
				-tasks, in the mps format
			
 
				-
			
 
				-\fn void starpu_bound_print(FILE *output, int integer)
			
 
				-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
			
 
				-Emit on \p output the statistics of actual execution vs theoretical upper bound.
			
 
				-\p integer permits to choose between integer solving (which takes a
			
 
				-long time but is correct), and relaxed solving (which provides an
			
 
				-approximate solution).
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/chapters/api/mic_extensions.doxy
+++ b/doc/doxygen/chapters/api/mic_extensions.doxy
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
			
 
				  * Copyright (C) 2009-2011,2014                           Université de Bordeaux
			
 
				  * Copyright (C) 2011,2012                                Inria
			
 
				  *
			
@@ -16,7 +16,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-/*! \defgroup API_MIC_Extensions MIC Extensions
			
 
				+/*! \ingroup API_MIC_Extensions
			
 
				 
			
 
				 \def STARPU_USE_MIC
			
 
				 \ingroup API_MIC_Extensions
			
@@ -27,20 +27,4 @@ It should be used in your code to detect the availability of MIC.
 
				 \ingroup API_MIC_Extensions
			
 
				 Define the maximum number of MIC devices that are supported by StarPU.
			
 
				 
			
 
				-\typedef starpu_mic_func_symbol_t
			
 
				-\ingroup API_MIC_Extensions
			
 
				-Type for MIC function symbols
			
 
				-
			
 
				-\fn int starpu_mic_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name)
			
 
				-\ingroup API_MIC_Extensions
			
 
				-Initiate a lookup on each MIC device to find the address of the
			
 
				-function named \p func_name, store it in the global array kernels
			
 
				-and return the index in the array through \p symbol.
			
 
				-
			
 
				-\fn starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol)
			
 
				-\ingroup API_MIC_Extensions
			
 
				-If successfull, return the pointer to the function defined by \p symbol on
			
 
				-the device linked to the called device. This can for instance be used
			
 
				-in a starpu_mic_func_t implementation.
			
 
				-
			
 
				 */
			
--- a/doc/doxygen/chapters/api/multiformat_data_interface.doxy
+++ b/doc/doxygen/chapters/api/multiformat_data_interface.doxy
@@ -1,92 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2010-2015,2017                           CNRS
			
 
				- * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
			
 
				- * Copyright (C) 2011,2012                                Inria
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Multiformat_Data_Interface Multiformat Data Interface
			
 
				-
			
 
				-\struct starpu_multiformat_data_interface_ops
			
 
				-\ingroup API_Multiformat_Data_Interface
			
 
				-The different fields are:
			
 
				-\var size_t starpu_multiformat_data_interface_ops::cpu_elemsize
			
 
				-        the size of each element on CPUs
			
 
				-\var size_t starpu_multiformat_data_interface_ops::opencl_elemsize
			
 
				-        the size of each element on OpenCL devices
			
 
				-\var struct starpu_codelet *starpu_multiformat_data_interface_ops::cpu_to_opencl_cl
			
 
				-        pointer to a codelet which converts from CPU to OpenCL
			
 
				-\var struct starpu_codelet *starpu_multiformat_data_interface_ops::opencl_to_cpu_cl
			
 
				-        pointer to a codelet which converts from OpenCL to CPU
			
 
				-\var size_t starpu_multiformat_data_interface_ops::cuda_elemsize
			
 
				-        the size of each element on CUDA devices
			
 
				-\var struct starpu_codelet *starpu_multiformat_data_interface_ops::cpu_to_cuda_cl
			
 
				-        pointer to a codelet which converts from CPU to CUDA
			
 
				-\var struct starpu_codelet *starpu_multiformat_data_interface_ops::cuda_to_cpu_cl
			
 
				-        pointer to a codelet which converts from CUDA to CPU
			
 
				-\var size_t starpu_multiformat_data_interface_ops::mic_elemsize
			
 
				-        the size of each element on MIC devices
			
 
				-\var struct starpu_codelet *starpu_multiformat_data_interface_ops::cpu_to_mic_cl
			
 
				-        pointer to a codelet which converts from CPU to MIC
			
 
				-\var struct starpu_codelet *starpu_multiformat_data_interface_ops::mic_to_cpu_cl
			
 
				-        pointer to a codelet which converts from MIC to CPU
			
 
				-
			
 
				-\struct starpu_multiformat_interface
			
 
				-todo
			
 
				-\ingroup API_Multiformat_Data_Interface
			
 
				-\var enum starpu_data_interface_id starpu_multiformat_interface::id
			
 
				-    todo
			
 
				-\var void *starpu_multiformat_interface::cpu_ptr
			
 
				-    todo
			
 
				-\var void *starpu_multiformat_interface::cuda_ptr
			
 
				-    todo
			
 
				-\var void *starpu_multiformat_interface::opencl_ptr
			
 
				-    todo
			
 
				-\var void *starpu_multiformat_interface::mic_ptr
			
 
				-    todo
			
 
				-\var uint32_t starpu_multiformat_interface::nx
			
 
				-    todo
			
 
				-\var struct starpu_multiformat_data_interface_ops *starpu_multiformat_interface::ops
			
 
				-    todo
			
 
				-
			
 
				-\fn void starpu_multiformat_data_register(starpu_data_handle_t *handle, int home_node, void *ptr, uint32_t nobjects, struct starpu_multiformat_data_interface_ops *format_ops)
			
 
				-\ingroup API_Multiformat_Data_Interface
			
 
				-Register a piece of data that can be represented in different
			
 
				-ways, depending upon the processing unit that manipulates it. It
			
 
				-allows the programmer, for instance, to use an array of structures
			
 
				-when working on a CPU, and a structure of arrays when working on a
			
 
				-GPU. \p nobjects is the number of elements in the data. \p format_ops
			
 
				-describes the format.
			
 
				-
			
 
				-\def STARPU_MULTIFORMAT_GET_CPU_PTR(interface)
			
 
				-\ingroup API_Multiformat_Data_Interface
			
 
				-Return the local pointer to the data with CPU format.
			
 
				-
			
 
				-\def STARPU_MULTIFORMAT_GET_CUDA_PTR(interface)
			
 
				-\ingroup API_Multiformat_Data_Interface
			
 
				-Return the local pointer to the data with CUDA format.
			
 
				-
			
 
				-\def STARPU_MULTIFORMAT_GET_OPENCL_PTR(interface)
			
 
				-\ingroup API_Multiformat_Data_Interface
			
 
				-Return the local pointer to the data with OpenCL format.
			
 
				-
			
 
				-\def STARPU_MULTIFORMAT_GET_MIC_PTR(interface)
			
 
				-\ingroup API_Multiformat_Data_Interface
			
 
				-Return the local pointer to the data with MIC format.
			
 
				-
			
 
				-\def STARPU_MULTIFORMAT_GET_NX(interface)
			
 
				-\ingroup API_Multiformat_Data_Interface
			
 
				-Return the number of elements in the data.
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/chapters/api/opencl_extensions.doxy
+++ b/doc/doxygen/chapters/api/opencl_extensions.doxy
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2018                                CNRS
			
 
				+ * Copyright (C) 2010-2019                                CNRS
			
 
				  * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
			
 
				  * Copyright (C) 2011,2012                                Inria
			
 
				  *
			
@@ -16,7 +16,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-/*! \defgroup API_OpenCL_Extensions OpenCL Extensions
			
 
				+/*! \ingroup API_OpenCL_Extensions
			
 
				 
			
 
				 \def STARPU_USE_OPENCL
			
 
				 \ingroup API_OpenCL_Extensions
			
@@ -34,236 +34,4 @@ supported by StarPU.
 
				 Define the directory in which the OpenCL codelets of the
			
 
				 applications provided with StarPU have been installed.
			
 
				 
			
 
				-\struct starpu_opencl_program
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Store the OpenCL programs as compiled for the different OpenCL
			
 
				-devices.
			
 
				-\var cl_program starpu_opencl_program::programs[STARPU_MAXOPENCLDEVS]
			
 
				-    Store each program for each OpenCL device.
			
 
				-
			
 
				-@name Writing OpenCL kernels
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-
			
 
				-\fn void starpu_opencl_get_context(int devid, cl_context *context)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Return the OpenCL context of the device designated by \p devid
			
 
				-in \p context.
			
 
				-
			
 
				-\fn void starpu_opencl_get_device(int devid, cl_device_id *device)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Return the cl_device_id corresponding to \p devid in \p device.
			
 
				-
			
 
				-\fn void starpu_opencl_get_queue(int devid, cl_command_queue *queue)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Return the command queue of the device designated by \p devid
			
 
				-into \p queue.
			
 
				-
			
 
				-\fn void starpu_opencl_get_current_context(cl_context *context)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Return the context of the current worker.
			
 
				-
			
 
				-\fn void starpu_opencl_get_current_queue(cl_command_queue *queue)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Return the computation kernel command queue of the current
			
 
				-worker.
			
 
				-
			
 
				-\fn int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Set the arguments of a given kernel. The list of arguments
			
 
				-must be given as <c>(size_t size_of_the_argument, cl_mem *
			
 
				-pointer_to_the_argument)</c>. The last argument must be 0. Return the
			
 
				-number of arguments that were successfully set. In case of failure,
			
 
				-return the id of the argument that could not be set and \p err is set to
			
 
				-the error returned by OpenCL. Otherwise, return the number of
			
 
				-arguments that were set.
			
 
				-
			
 
				-Here an example:
			
 
				-\code{.c}
			
 
				-int n;
			
 
				-cl_int err;
			
 
				-cl_kernel kernel;
			
 
				-n = starpu_opencl_set_kernel_args(&err, 2, &kernel, sizeof(foo), &foo, sizeof(bar), &bar, 0);
			
 
				-if (n != 2)
			
 
				-   fprintf(stderr, "Error : %d\n", err);
			
 
				-\endcode
			
 
				-
			
 
				-@name Compiling OpenCL kernels
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-
			
 
				-Source codes for OpenCL kernels can be stored in a file or in a
			
 
				-string. StarPU provides functions to build the program executable for
			
 
				-each available OpenCL device as a cl_program object. This program
			
 
				-executable can then be loaded within a specific queue as explained in
			
 
				-the next section. These are only helpers, Applications can also fill a
			
 
				-starpu_opencl_program array by hand for more advanced use (e.g.
			
 
				-different programs on the different OpenCL devices, for relocation
			
 
				-purpose for instance).
			
 
				-
			
 
				-\fn int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char *build_options)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Compile an OpenCL source code stored in a file.
			
 
				-
			
 
				-\fn int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char *build_options)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Compile an OpenCL source code stored in a string.
			
 
				-
			
 
				-\fn int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Unload an OpenCL compiled code.
			
 
				-
			
 
				-\fn void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Store the contents of the file \p source_file_name in the buffer
			
 
				-\p opencl_program_source. The file \p source_file_name can be located in the
			
 
				-current directory, or in the directory specified by the environment
			
 
				-variable \ref STARPU_OPENCL_PROGRAM_DIR, or
			
 
				-in the directory <c>share/starpu/opencl</c> of the installation
			
 
				-directory of StarPU, or in the source directory of StarPU. When the
			
 
				-file is found, \p located_file_name is the full name of the file as it
			
 
				-has been located on the system, \p located_dir_name the directory
			
 
				-where it has been located. Otherwise, they are both set to the empty
			
 
				-string.
			
 
				-
			
 
				-\fn void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Similar to function starpu_opencl_load_program_source() but allocate the buffers \p located_file_name, \p located_dir_name and \p opencl_program_source.
			
 
				-
			
 
				-\fn int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char *build_options)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Compile the OpenCL kernel stored in the file \p source_file_name
			
 
				-with the given options \p build_options and store the result in the
			
 
				-directory <c>$STARPU_HOME/.starpu/opencl</c> with the same filename as
			
 
				-\p source_file_name. The compilation is done for every OpenCL device,
			
 
				-and the filename is suffixed with the vendor id and the device id of
			
 
				-the OpenCL device.
			
 
				-
			
 
				-\fn int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char *build_options)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Compile the OpenCL kernel in the string \p opencl_program_source
			
 
				-with the given options \p build_options and store the result in the
			
 
				-directory <c>$STARPU_HOME/.starpu/opencl</c> with the filename \p
			
 
				-file_name. The compilation is done for every OpenCL device, and the
			
 
				-filename is suffixed with the vendor id and the device id of the
			
 
				-OpenCL device.
			
 
				-
			
 
				-\fn int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Compile the binary OpenCL kernel identified with \p kernel_id.
			
 
				-For every OpenCL device, the binary OpenCL kernel will be loaded from
			
 
				-the file
			
 
				-<c>$STARPU_HOME/.starpu/opencl/\<kernel_id\>.\<device_type\>.vendor_id_\<vendor_id\>_device_id_\<device_id\></c>.
			
 
				-
			
 
				-@name Loading OpenCL kernels
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-
			
 
				-\fn int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Create a kernel \p kernel for device \p devid, on its computation
			
 
				-command queue returned in \p queue, using program \p opencl_programs
			
 
				-and name \p kernel_name.
			
 
				-
			
 
				-\fn int starpu_opencl_release_kernel(cl_kernel kernel)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Release the given \p kernel, to be called after kernel execution.
			
 
				-
			
 
				-@name OpenCL statistics
			
 
				-
			
 
				-\fn int starpu_opencl_collect_stats(cl_event event)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Collect statistics on a kernel execution.
			
 
				-After termination of the kernels, the OpenCL codelet should call this
			
 
				-function with the event returned by \c clEnqueueNDRangeKernel(), to
			
 
				-let StarPU collect statistics about the kernel execution (used cycles,
			
 
				-consumed energy).
			
 
				-
			
 
				-@name OpenCL utilities
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-
			
 
				-\fn const char *starpu_opencl_error_string(cl_int status)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Return the error message in English corresponding to \p status, an OpenCL
			
 
				-error code.
			
 
				-
			
 
				-\fn void starpu_opencl_display_error(const char *func, const char *file, int line, const char *msg, cl_int status)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Given a valid error status, print the corresponding error message on
			
 
				-\c stdout, along with the function name \p func, the filename
			
 
				-\p file, the line number \p line and the message \p msg.
			
 
				-
			
 
				-\def STARPU_OPENCL_DISPLAY_ERROR(status)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Call the function starpu_opencl_display_error() with the error
			
 
				-\p status, the current function name, current file and line number,
			
 
				-and a empty message.
			
 
				-
			
 
				-\fn void starpu_opencl_report_error(const char *func, const char *file, int line, const char *msg, cl_int status)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Call the function starpu_opencl_display_error() and abort.
			
 
				-
			
 
				-\def STARPU_OPENCL_REPORT_ERROR(status)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Call the function starpu_opencl_report_error() with the error \p
			
 
				-status, the current function name, current file and line number,
			
 
				-and a empty message.
			
 
				-
			
 
				-\def STARPU_OPENCL_REPORT_ERROR_WITH_MSG(msg, status)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Call the function starpu_opencl_report_error() with \p msg
			
 
				-and \p status, the current function name, current file and line number.
			
 
				-
			
 
				-\fn cl_int starpu_opencl_allocate_memory(int devid, cl_mem *addr, size_t size, cl_mem_flags flags)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Allocate \p size bytes of memory, stored in \p addr. \p flags must be a valid
			
 
				-combination of \c cl_mem_flags values.
			
 
				-
			
 
				-\fn cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Copy \p size bytes from the given \p ptr on RAM \p src_node to the
			
 
				-given \p buffer on OpenCL \p dst_node. \p offset is the offset, in
			
 
				-bytes, in \p buffer. if \p event is <c>NULL</c>, the copy is
			
 
				-synchronous, i.e the queue is synchronised before returning. If not
			
 
				-<c>NULL</c>, \p event can be used after the call to wait for this
			
 
				-particular copy to complete. This function returns <c>CL_SUCCESS</c>
			
 
				-if the copy was successful, or a valid OpenCL error code otherwise.
			
 
				-The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
			
 
				-asynchronous launch was successful, or to 0 if \p event was
			
 
				-<c>NULL</c>.
			
 
				-
			
 
				-\fn cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Copy \p size bytes asynchronously from the given \p buffer on OpenCL
			
 
				-\p src_node to the given \p ptr on RAM \p dst_node. \p offset is the
			
 
				-offset, in bytes, in \p buffer. if \p event is <c>NULL</c>, the copy
			
 
				-is synchronous, i.e the queue is synchronised before returning. If not
			
 
				-<c>NULL</c>, \p event can be used after the call to wait for this
			
 
				-particular copy to complete. This function returns <c>CL_SUCCESS</c>
			
 
				-if the copy was successful, or a valid OpenCL error code otherwise.
			
 
				-The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
			
 
				-asynchronous launch was successful, or to 0 if \p event was
			
 
				-<c>NULL</c>.
			
 
				-
			
 
				-\fn cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, size_t src_offset, cl_mem dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event, int *ret)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Copy \p size bytes asynchronously from byte offset \p src_offset of \p
			
 
				-src on OpenCL \p src_node to byte offset \p dst_offset of \p dst on
			
 
				-OpenCL \p dst_node. if \p event is <c>NULL</c>, the copy is
			
 
				-synchronous, i.e. the queue is synchronised before returning. If not
			
 
				-<c>NULL</c>, \p event can be used after the call to wait for this
			
 
				-particular copy to complete. This function returns <c>CL_SUCCESS</c>
			
 
				-if the copy was successful, or a valid OpenCL error code otherwise.
			
 
				-The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
			
 
				-asynchronous launch was successful, or to 0 if \p event was
			
 
				-<c>NULL</c>.
			
 
				-
			
 
				-\fn cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event)
			
 
				-\ingroup API_OpenCL_Extensions
			
 
				-Copy \p size bytes from byte offset \p src_offset of \p src on \p
			
 
				-src_node to byte offset \p dst_offset of \p dst on \p dst_node. if \p
			
 
				-event is <c>NULL</c>, the copy is synchronous, i.e. the queue is
			
 
				-synchronised before returning. If not <c>NULL</c>, \p event can be
			
 
				-used after the call to wait for this particular copy to complete. The
			
 
				-function returns <c>-EAGAIN</c> if the asynchronous launch was
			
 
				-successfull. It returns 0 if the synchronous copy was successful, or
			
 
				-fails otherwise.
			
 
				-
			
 
				 */
			
--- a/doc/doxygen/chapters/api/openmp_runtime_support.doxy
+++ b/doc/doxygen/chapters/api/openmp_runtime_support.doxy
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2014,2015,2017                           CNRS
			
 
				+ * Copyright (C) 2014,2015,2017,2019                      CNRS
			
 
				  * Copyright (C) 2014,2016                                Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,201 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-/*! \defgroup API_OpenMP_Runtime_Support OpenMP Runtime Support
			
 
				-
			
 
				-\brief This section describes the interface provided for implementing OpenMP runtimes on top of StarPU.
			
 
				-
			
 
				-
			
 
				-\struct starpu_omp_lock_t
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Opaque Simple Lock object (\ref SimpleLock) for inter-task synchronization operations.
			
 
				-
			
 
				-\sa starpu_omp_init_lock()
			
 
				-\sa starpu_omp_destroy_lock()
			
 
				-\sa starpu_omp_set_lock()
			
 
				-\sa starpu_omp_unset_lock()
			
 
				-\sa starpu_omp_test_lock()
			
 
				-
			
 
				-\var starpu_omp_lock_t::internal
			
 
				-Is an opaque pointer for internal use.
			
 
				-
			
 
				-
			
 
				-\struct starpu_omp_nest_lock_t
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Opaque Nestable Lock object (\ref NestableLock) for inter-task synchronization operations.
			
 
				-
			
 
				-\sa starpu_omp_init_nest_lock()
			
 
				-\sa starpu_omp_destroy_nest_lock()
			
 
				-\sa starpu_omp_set_nest_lock()
			
 
				-\sa starpu_omp_unset_nest_lock()
			
 
				-\sa starpu_omp_test_nest_lock()
			
 
				-\var starpu_omp_nest_lock_t::internal
			
 
				-Is an opaque pointer for internal use.
			
 
				-
			
 
				-
			
 
				-\enum starpu_omp_sched_value
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Set of constants for selecting the for loop iteration scheduling algorithm
			
 
				-(\ref OMPFor) as defined by the OpenMP specification.
			
 
				-
			
 
				-\var starpu_omp_sched_value::starpu_omp_sched_undefined
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Undefined iteration scheduling algorithm.
			
 
				-
			
 
				-\var starpu_omp_sched_value::starpu_omp_sched_static
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-\b Static iteration scheduling algorithm.
			
 
				-
			
 
				-\var starpu_omp_sched_value::starpu_omp_sched_dynamic
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-\b Dynamic iteration scheduling algorithm.
			
 
				-
			
 
				-\var starpu_omp_sched_value::starpu_omp_sched_guided
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-\b Guided iteration scheduling algorithm.
			
 
				-
			
 
				-\var starpu_omp_sched_value::starpu_omp_sched_auto
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-\b Automatically choosen iteration scheduling algorithm.
			
 
				-
			
 
				-\var starpu_omp_sched_value::starpu_omp_sched_runtime
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Choice of iteration scheduling algorithm deferred at \b runtime.
			
 
				-
			
 
				-\sa starpu_omp_for()
			
 
				-\sa starpu_omp_for_inline_first()
			
 
				-\sa starpu_omp_for_inline_next()
			
 
				-\sa starpu_omp_for_alt()
			
 
				-\sa starpu_omp_for_inline_first_alt()
			
 
				-\sa starpu_omp_for_inline_next_alt()
			
 
				-
			
 
				-
			
 
				-\enum starpu_omp_proc_bind_value
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Set of constants for selecting the processor binding method, as defined in the
			
 
				-OpenMP specification.
			
 
				-
			
 
				-\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_undefined
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Undefined processor binding method.
			
 
				-
			
 
				-\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_false
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Team threads may be moved between places at any time.
			
 
				-
			
 
				-\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_true
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Team threads may not be moved between places.
			
 
				-
			
 
				-\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_master
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Assign every thread in the team to the same place as the \b master thread.
			
 
				-
			
 
				-\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_close
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Assign every thread in the team to a place \b close to the parent thread.
			
 
				-
			
 
				-\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_spread
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Assign team threads as a sparse distribution over the selected places.
			
 
				-
			
 
				-\sa starpu_omp_get_proc_bind()
			
 
				-
			
 
				-
			
 
				-\struct starpu_omp_parallel_region_attr
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Set of attributes used for creating a new parallel region.
			
 
				-
			
 
				-\sa starpu_omp_parallel_region()
			
 
				-
			
 
				-\var struct starpu_codelet starpu_omp_parallel_region_attr::cl
			
 
				-
			
 
				-Is a ::starpu_codelet (\ref API_Codelet_And_Tasks) to use for the parallel region
			
 
				-implicit tasks. The codelet must provide a CPU implementation function.
			
 
				-
			
 
				-\var starpu_data_handle_t *starpu_omp_parallel_region_attr::handles
			
 
				-
			
 
				-Is an array of zero or more ::starpu_data_handle_t data handle to be passed to
			
 
				-the parallel region implicit tasks.
			
 
				-
			
 
				-\var void *starpu_omp_parallel_region_attr::cl_arg
			
 
				-
			
 
				-Is an optional pointer to an inline argument to be passed to the region implicit tasks.
			
 
				-
			
 
				-\var size_t starpu_omp_parallel_region_attr::cl_arg_size
			
 
				-
			
 
				-Is the size of the optional inline argument to be passed to the region implicit tasks, or 0 if unused.
			
 
				-
			
 
				-\var unsigned starpu_omp_parallel_region_attr::cl_arg_free
			
 
				-
			
 
				-Is a boolean indicating whether the optional inline argument should be automatically freed (true), or not (false).
			
 
				-
			
 
				-\var int starpu_omp_parallel_region_attr::if_clause
			
 
				-
			
 
				-Is a boolean indicating whether the \b if clause of the corresponding <c>pragma
			
 
				-omp parallel</c> is true or false.
			
 
				-
			
 
				-\var int starpu_omp_parallel_region_attr::num_threads
			
 
				-
			
 
				-Is an integer indicating the requested number of threads in the team of the
			
 
				-newly created parallel region, or 0 to let the runtime choose the number of
			
 
				-threads alone. This attribute may be ignored by the runtime system if the
			
 
				-requested number of threads is higher than the number of threads that the
			
 
				-runtime can create.
			
 
				-
			
 
				-\struct starpu_omp_task_region_attr
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Set of attributes used for creating a new task region.
			
 
				-
			
 
				-\sa starpu_omp_task_region()
			
 
				-
			
 
				-\var struct starpu_codelet starpu_omp_task_region_attr::cl
			
 
				-
			
 
				-Is a ::starpu_codelet (\ref API_Codelet_And_Tasks) to use for the task region
			
 
				-explicit task. The codelet must provide a CPU implementation function or an
			
 
				-accelerator implementation for offloaded target regions.
			
 
				-
			
 
				-\var starpu_data_handle_t *starpu_omp_task_region_attr::handles
			
 
				-
			
 
				-Is an array of zero or more ::starpu_data_handle_t data handle to be passed to
			
 
				-the task region explicit tasks.
			
 
				-
			
 
				-\var void *starpu_omp_task_region_attr::cl_arg
			
 
				-
			
 
				-Is an optional pointer to an inline argument to be passed to the region implicit tasks.
			
 
				-
			
 
				-\var size_t starpu_omp_task_region_attr::cl_arg_size
			
 
				-
			
 
				-Is the size of the optional inline argument to be passed to the region implicit
			
 
				-tasks, or 0 if unused.
			
 
				-
			
 
				-\var unsigned starpu_omp_task_region_attr::cl_arg_free
			
 
				-
			
 
				-Is a boolean indicating whether the optional inline argument should be
			
 
				-automatically freed (true), or not (false).
			
 
				-
			
 
				-\var int starpu_omp_task_region_attr::if_clause
			
 
				-
			
 
				-Is a boolean indicating whether the \b if clause of the corresponding <c>pragma
			
 
				-omp task</c> is true or false.
			
 
				-
			
 
				-\var int starpu_omp_task_region_attr::final_clause
			
 
				-
			
 
				-Is a boolean indicating whether the \b final clause of the corresponding <c>pragma
			
 
				-omp task</c> is true or false.
			
 
				-
			
 
				-\var int starpu_omp_task_region_attr::untied_clause
			
 
				-
			
 
				-Is a boolean indicating whether the \b untied clause of the corresponding <c>pragma
			
 
				-omp task</c> is true or false.
			
 
				-
			
 
				-\var int starpu_omp_task_region_attr::mergeable_clause
			
 
				-
			
 
				-Is a boolean indicating whether the \b mergeable clause of the corresponding <c>pragma
			
 
				-omp task</c> is true or false.
			
 
				-
			
 
				-@name Initialisation
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				+/*! \ingroup API_OpenMP_Runtime_Support
			
 
				 
			
 
				 \def STARPU_OPENMP
			
 
				 \ingroup API_OpenMP_Runtime_Support
			
@@ -217,764 +23,4 @@ This macro is defined when StarPU has been installed with OpenMP Runtime
 
				 support. It should be used in your code to detect the availability of
			
 
				 the runtime support for OpenMP.
			
 
				 
			
 
				-\fn int starpu_omp_init(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Initializes StarPU and its OpenMP Runtime support.
			
 
				-
			
 
				-\fn void starpu_omp_shutdown(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Shutdown StarPU and its OpenMP Runtime support.
			
 
				-
			
 
				-@name Parallel
			
 
				-\anchor ORS_Parallel
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-
			
 
				-\fn void starpu_omp_parallel_region(const struct starpu_omp_parallel_region_attr *attr)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Generates and launch an OpenMP parallel region and return after its
			
 
				-completion. \p attr specifies the attributes for the generated parallel region.
			
 
				-If this function is called from inside another, generating, parallel region, the
			
 
				-generated parallel region is nested within the generating parallel region.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp parallel</c>.
			
 
				-
			
 
				-\fn void starpu_omp_master(void (*f)(void *arg), void *arg)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Executes a function only on the master thread of the OpenMP
			
 
				-parallel region it is called from. When called from a thread that is not the
			
 
				-master of the parallel region it is called from, this function does nothing. \p
			
 
				-f is the function to be called. \p arg is an argument passed to function \p f.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp master</c>.
			
 
				-
			
 
				-\fn int starpu_omp_master_inline(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Determines whether the calling thread is the master of the OpenMP parallel region
			
 
				-it is called from or not.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp master</c> without code
			
 
				-outlining.
			
 
				-\return <c>!0</c> if called by the region's master thread.
			
 
				-\return <c>0</c> if not called by the region's master thread.
			
 
				-
			
 
				-@name Synchronization
			
 
				-\anchor ORS_Synchronization
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-
			
 
				-\fn void starpu_omp_barrier(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Waits until each participating thread of the innermost OpenMP parallel region
			
 
				-has reached the barrier and each explicit OpenMP task bound to this region has
			
 
				-completed its execution.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp barrier</c>.
			
 
				-
			
 
				-\fn void starpu_omp_critical(void (*f)(void *arg), void *arg, const char *name)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Waits until no other thread is executing within the context of the selected
			
 
				-critical section, then proceeds to the exclusive execution of a function within
			
 
				-the critical section. \p f is the function to be executed in the critical
			
 
				-section. \p arg is an argument passed to function \p f. \p name is the name of
			
 
				-the selected critical section. If <c>name == NULL</c>, the selected critical
			
 
				-section is the unique anonymous critical section.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp critical</c>.
			
 
				-
			
 
				-\fn void starpu_omp_critical_inline_begin(const char *name)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Waits until execution can proceed exclusively within the context of the
			
 
				-selected critical section. \p name is the name of the selected critical
			
 
				-section. If <c>name == NULL</c>, the selected critical section is the unique
			
 
				-anonymous critical section.
			
 
				-
			
 
				-This function together with #starpu_omp_critical_inline_end can be used to
			
 
				-implement <c>\#pragma omp critical</c> without code outlining.
			
 
				-
			
 
				-\fn void starpu_omp_critical_inline_end(const char *name)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Ends the exclusive execution within the context of the selected critical
			
 
				-section. \p name is the name of the selected critical section. If
			
 
				-<c>name==NULL</c>, the selected critical section is the unique anonymous
			
 
				-critical section.
			
 
				-
			
 
				-This function together with #starpu_omp_critical_inline_begin can be used to
			
 
				-implement <c>\#pragma omp critical</c> without code outlining.
			
 
				-
			
 
				-@name Worksharing
			
 
				-\anchor ORS_Worksharing
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-
			
 
				-\fn void starpu_omp_single(void (*f)(void *arg), void *arg, int nowait)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Ensures that a single participating thread of the innermost OpenMP parallel
			
 
				-region executes a function. \p f is the function to be executed by a single
			
 
				-thread. \p arg is an argument passed to function \p f. \p nowait is a flag
			
 
				-indicating whether an implicit barrier is requested after the single section
			
 
				-(<c>nowait==0</c>) or not (<c>nowait==!0</c>).
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp single</c>.
			
 
				-
			
 
				-\fn int starpu_omp_single_inline(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Decides whether the current thread is elected to run the following single
			
 
				-section among the participating threads of the innermost OpenMP parallel
			
 
				-region.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp single</c> without code
			
 
				-outlining.
			
 
				-\return <c>!0</c> if the calling thread has won the election.
			
 
				-\return <c>0</c> if the calling thread has lost the election.
			
 
				-
			
 
				-\fn void starpu_omp_single_copyprivate(void (*f)(void *arg, void *data, unsigned long long data_size), void *arg, void *data, unsigned long long data_size)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function executes \p f on a single task of the current parallel region
			
 
				-task, and then broadcast the contents of the memory block pointed by the
			
 
				-copyprivate pointer \p data and of size \p data_size to the corresponding \p
			
 
				-data pointed memory blocks of all the other participating region tasks. This
			
 
				-function can be used to implement <c>\#pragma omp single</c> with a copyprivate
			
 
				-clause.
			
 
				-
			
 
				-\sa starpu_omp_single_copyprivate_inline
			
 
				-\sa starpu_omp_single_copyprivate_inline_begin
			
 
				-\sa starpu_omp_single_copyprivate_inline_end
			
 
				-
			
 
				-\fn void *starpu_omp_single_copyprivate_inline_begin(void *data)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function elects one task among the tasks of the current parallel region
			
 
				-task to execute the following single section, and then broadcast the
			
 
				-copyprivate pointer \p data to all the other participating region tasks. This
			
 
				-function can be used to implement <c>\#pragma omp single</c> with a copyprivate
			
 
				-clause without code outlining.
			
 
				-
			
 
				-\sa starpu_omp_single_copyprivate_inline
			
 
				-\sa starpu_omp_single_copyprivate_inline_end
			
 
				-
			
 
				-\fn void starpu_omp_single_copyprivate_inline_end(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function completes the execution of a single section and returns the
			
 
				-broadcasted copyprivate pointer for tasks that lost the election and <c>NULL</c> for
			
 
				-the task that won the election. This function can be used to implement
			
 
				-<c>\#pragma omp single</c> with a copyprivate clause without code outlining.
			
 
				-
			
 
				-\return the copyprivate pointer for tasks that lost the election and therefore did not execute the code of the single section.
			
 
				-\return <c>NULL</c> for the task that won the election and executed the code of the single section.
			
 
				-
			
 
				-\sa starpu_omp_single_copyprivate_inline
			
 
				-\sa starpu_omp_single_copyprivate_inline_begin
			
 
				-
			
 
				-\fn void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Executes a parallel loop together with the other threads participating to the
			
 
				-innermost parallel region. \p f is the function to be executed iteratively. \p
			
 
				-arg is an argument passed to function \p f. \p nb_iterations is the number of
			
 
				-iterations to be performed by the parallel loop. \p chunk is the number of
			
 
				-consecutive iterations that should be affected to the same thread when
			
 
				-scheduling the loop workshares, it follows the semantics of the \c modifier
			
 
				-argument in OpenMP <c>\#pragma omp for</c> specification. \p schedule is the
			
 
				-scheduling mode according to the OpenMP specification. \p ordered is a flag
			
 
				-indicating whether the loop region may contain an ordered section
			
 
				-(<c>ordered==!0</c>) or not (<c>ordered==0</c>). \p nowait is a flag
			
 
				-indicating whether an implicit barrier is requested after the for section
			
 
				-(<c>nowait==0</c>) or not (<c>nowait==!0</c>).
			
 
				-
			
 
				-The function \p f will be called with arguments \p _first_i, the first iteration
			
 
				-to perform, \p _nb_i, the number of consecutive iterations to perform before
			
 
				-returning, \p arg, the free \p arg argument.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp for</c>.
			
 
				-
			
 
				-\fn int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Decides whether the current thread should start to execute a parallel loop
			
 
				-section. See #starpu_omp_for for the argument description.
			
 
				-
			
 
				-This function together with #starpu_omp_for_inline_next can be used to
			
 
				-implement <c>\#pragma omp for</c> without code outlining.
			
 
				-
			
 
				-\return <c>!0</c> if the calling thread participates to the loop region and
			
 
				-should execute a first chunk of iterations. In that case, \p *_first_i will be
			
 
				-set to the first iteration of the chunk to perform and \p *_nb_i will be set to
			
 
				-the number of iterations of the chunk to perform.
			
 
				-
			
 
				-\return <c>0</c> if the calling thread does not participate to the loop region
			
 
				-because all the available iterations have been affected to the other threads of
			
 
				-the parallel region.
			
 
				-
			
 
				-\sa starpu_omp_for
			
 
				-
			
 
				-\fn int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Decides whether the current thread should continue to execute a parallel loop
			
 
				-section. See #starpu_omp_for for the argument description.
			
 
				-
			
 
				-This function together with #starpu_omp_for_inline_first can be used to
			
 
				-implement <c>\#pragma omp for</c> without code outlining.
			
 
				-
			
 
				-\return <c>!0</c> if the calling thread should execute a next chunk of
			
 
				-iterations. In that case, \p *_first_i will be set to the first iteration of the
			
 
				-chunk to perform and \p *_nb_i will be set to the number of iterations of the
			
 
				-chunk to perform.
			
 
				-
			
 
				-\return <c>0</c> if the calling thread does not participate anymore to the loop
			
 
				-region because all the available iterations have been affected to the other
			
 
				-threads of the parallel region.
			
 
				-
			
 
				-\sa starpu_omp_for
			
 
				-
			
 
				-\fn void starpu_omp_for_alt(void (*f)(unsigned long long _begin_i, unsigned long long _end_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Alternative implementation of a parallel loop. This function differs from
			
 
				-#starpu_omp_for in the expected arguments of the loop function \c f.
			
 
				-
			
 
				-The function \p f will be called with arguments \p _begin_i, the first iteration
			
 
				-to perform, \p _end_i, the first iteration not to perform before
			
 
				-returning, \p arg, the free \p arg argument.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp for</c>.
			
 
				-
			
 
				-\sa starpu_omp_for
			
 
				-
			
 
				-\fn int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Inline version of the alternative implementation of a parallel loop.
			
 
				-
			
 
				-This function together with #starpu_omp_for_inline_next_alt can be used to
			
 
				-implement <c>\#pragma omp for</c> without code outlining.
			
 
				-
			
 
				-\sa starpu_omp_for
			
 
				-\sa starpu_omp_for_alt
			
 
				-\sa starpu_omp_for_inline_first
			
 
				-
			
 
				-\fn int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Inline version of the alternative implementation of a parallel loop.
			
 
				-
			
 
				-This function together with #starpu_omp_for_inline_first_alt can be used to
			
 
				-implement <c>\#pragma omp for</c> without code outlining.
			
 
				-
			
 
				-\sa starpu_omp_for
			
 
				-\sa starpu_omp_for_alt
			
 
				-\sa starpu_omp_for_inline_next
			
 
				-
			
 
				-\fn void starpu_omp_ordered(void (*f)(void *arg), void *arg)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Ensures that a function is sequentially executed once for each iteration in
			
 
				-order within a parallel loop, by the thread that own the iteration. \p f is the
			
 
				-function to be executed by the thread that own the current iteration. \p arg is
			
 
				-an argument passed to function \p f.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp ordered</c>.
			
 
				-
			
 
				-\fn void starpu_omp_ordered_inline_begin(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Waits until all the iterations of a parallel loop below the iteration owned by
			
 
				-the current thread have been executed.
			
 
				-
			
 
				-This function together with #starpu_omp_ordered_inline_end can be used to
			
 
				-implement <c>\#pragma omp ordered</c> without code code outlining.
			
 
				-
			
 
				-\fn void starpu_omp_ordered_inline_end(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Notifies that the ordered section for the current iteration has been completed.
			
 
				-
			
 
				-This function together with #starpu_omp_ordered_inline_begin can be used to
			
 
				-implement <c>\#pragma omp ordered</c> without code code outlining.
			
 
				-
			
 
				-\fn void starpu_omp_sections(unsigned long long nb_sections, void (**section_f)(void *arg), void **section_arg, int nowait)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Ensures that each function of a given array of functions is executed by one and
			
 
				-only one thread. \p nb_sections is the number of functions in the array \p
			
 
				-section_f. \p section_f is the array of functions to be executed as sections. \p
			
 
				-section_arg is an array of arguments to be passed to the corresponding function.
			
 
				-\p nowait is a flag indicating whether an implicit barrier is requested after
			
 
				-the execution of all the sections (<c>nowait==0</c>) or not (<c>nowait==!0</c>).
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp sections</c> and <c>\#pragma omp section</c>.
			
 
				-
			
 
				-\fn void starpu_omp_sections_combined(unsigned long long nb_sections, void (*section_f)(unsigned long long section_num, void *arg), void *section_arg, int nowait)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Alternative implementation of sections. This function differs from
			
 
				-#starpu_omp_sections in that all the sections are combined within a single
			
 
				-function in this version. \p section_f is the function implementing the combined
			
 
				-sections.
			
 
				-
			
 
				-The function \p section_f will be called with arguments \p section_num, the
			
 
				-section number to be executed, \p arg, the entry of \p section_arg corresponding
			
 
				-to this section.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp sections</c> and <c>\#pragma omp section</c>.
			
 
				-
			
 
				-\sa starpu_omp_sections
			
 
				-
			
 
				-@name Task
			
 
				-\anchor ORS_Task
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-
			
 
				-\fn void starpu_omp_task_region(const struct starpu_omp_task_region_attr *attr)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Generates an explicit child task. The execution of the generated task is
			
 
				-asynchronous with respect to the calling code unless specified otherwise.
			
 
				-\p attr specifies the attributes for the generated task region.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp task</c>.
			
 
				-
			
 
				-\fn void starpu_omp_taskwait(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Waits for the completion of the tasks generated by the current task. This
			
 
				-function does not wait for the descendants of the tasks generated by the current
			
 
				-task.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp taskwait</c>.
			
 
				-
			
 
				-\fn void starpu_omp_taskgroup(void (*f)(void *arg), void *arg)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Launches a function and wait for the completion of every descendant task
			
 
				-generated during the execution of the function.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp taskgroup</c>.
			
 
				-
			
 
				-\sa starpu_omp_taskgroup_inline_begin
			
 
				-\sa starpu_omp_taskgroup_inline_end
			
 
				-
			
 
				-\fn void starpu_omp_taskgroup_inline_begin(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Launches a function and gets ready to wait for the completion of every descendant task
			
 
				-generated during the dynamic scope of the taskgroup.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp taskgroup</c> without code outlining.
			
 
				-
			
 
				-\sa starpu_omp_taskgroup
			
 
				-\sa starpu_omp_taskgroup_inline_end
			
 
				-
			
 
				-\fn void starpu_omp_taskgroup_inline_end(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-Waits for the completion of every descendant task
			
 
				-generated during the dynamic scope of the taskgroup.
			
 
				-
			
 
				-This function can be used to implement <c>\#pragma omp taskgroup</c> without code outlining.
			
 
				-
			
 
				-\sa starpu_omp_taskgroup
			
 
				-\sa starpu_omp_taskgroup_inline_begin
			
 
				-
			
 
				-
			
 
				-@name API
			
 
				-\anchor ORS_API
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-
			
 
				-\fn void starpu_omp_set_num_threads(int threads)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function sets ICVS nthreads_var for the parallel regions to be created
			
 
				-with the current region.
			
 
				-
			
 
				-Note: The StarPU OpenMP runtime support currently ignores
			
 
				-this setting for nested parallel regions.
			
 
				-
			
 
				-\sa starpu_omp_get_num_threads
			
 
				-\sa starpu_omp_get_thread_num
			
 
				-\sa starpu_omp_get_max_threads
			
 
				-\sa starpu_omp_get_num_procs
			
 
				-
			
 
				-\fn int starpu_omp_get_num_threads()
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the number of threads of the current region.
			
 
				-
			
 
				-\return the number of threads of the current region.
			
 
				-
			
 
				-\sa starpu_omp_set_num_threads
			
 
				-\sa starpu_omp_get_thread_num
			
 
				-\sa starpu_omp_get_max_threads
			
 
				-\sa starpu_omp_get_num_procs
			
 
				-
			
 
				-\fn int starpu_omp_get_thread_num()
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the rank of the current thread among the threads
			
 
				-of the current region.
			
 
				-
			
 
				-\return the rank of the current thread in the current region.
			
 
				-
			
 
				-\sa starpu_omp_set_num_threads
			
 
				-\sa starpu_omp_get_num_threads
			
 
				-\sa starpu_omp_get_max_threads
			
 
				-\sa starpu_omp_get_num_procs
			
 
				-
			
 
				-\fn int starpu_omp_get_max_threads()
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the maximum number of threads that can be used to
			
 
				-create a region from the current region.
			
 
				-
			
 
				-\return the maximum number of threads that can be used to create a region from the current region.
			
 
				-
			
 
				-\sa starpu_omp_set_num_threads
			
 
				-\sa starpu_omp_get_num_threads
			
 
				-\sa starpu_omp_get_thread_num
			
 
				-\sa starpu_omp_get_num_procs
			
 
				-
			
 
				-\fn int starpu_omp_get_num_procs(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the number of StarPU CPU workers.
			
 
				-
			
 
				-\return the number of StarPU CPU workers.
			
 
				-
			
 
				-\sa starpu_omp_set_num_threads
			
 
				-\sa starpu_omp_get_num_threads
			
 
				-\sa starpu_omp_get_thread_num
			
 
				-\sa starpu_omp_get_max_threads
			
 
				-
			
 
				-\fn int starpu_omp_in_parallel(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns whether it is called from the scope of a parallel region or not.
			
 
				-
			
 
				-\return <c>!0</c> if called from a parallel region scope.
			
 
				-\return <c>0</c> otherwise.
			
 
				-
			
 
				-\fn void starpu_omp_set_dynamic(int dynamic_threads)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function enables (1) or disables (0) dynamically adjusting the number of parallel threads.
			
 
				-
			
 
				-Note: The StarPU OpenMP runtime support currently ignores the argument of this function.
			
 
				-
			
 
				-\sa starpu_omp_get_dynamic
			
 
				-
			
 
				-\fn int starpu_omp_get_dynamic(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the state of dynamic thread number adjustment.
			
 
				-
			
 
				-\return <c>!0</c> if dynamic thread number adjustment is enabled.
			
 
				-\return <c>0</c> otherwise.
			
 
				-
			
 
				-\sa starpu_omp_set_dynamic
			
 
				-
			
 
				-\fn void starpu_omp_set_nested(int nested)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function enables (1) or disables (0) nested parallel regions.
			
 
				-
			
 
				-Note: The StarPU OpenMP runtime support currently ignores the argument of this function.
			
 
				-
			
 
				-\sa starpu_omp_get_nested
			
 
				-\sa starpu_omp_get_max_active_levels
			
 
				-\sa starpu_omp_set_max_active_levels
			
 
				-\sa starpu_omp_get_level
			
 
				-\sa starpu_omp_get_active_level
			
 
				-
			
 
				-\fn int starpu_omp_get_nested(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns whether nested parallel sections are enabled or not.
			
 
				-
			
 
				-\return <c>!0</c> if nested parallel sections are enabled.
			
 
				-\return <c>0</c> otherwise.
			
 
				-
			
 
				-\sa starpu_omp_set_nested
			
 
				-\sa starpu_omp_get_max_active_levels
			
 
				-\sa starpu_omp_set_max_active_levels
			
 
				-\sa starpu_omp_get_level
			
 
				-\sa starpu_omp_get_active_level
			
 
				-
			
 
				-\fn int starpu_omp_get_cancellation(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the state of the cancel ICVS var.
			
 
				-
			
 
				-\fn void starpu_omp_set_schedule(enum starpu_omp_sched_value kind, int modifier)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function sets the default scheduling kind for upcoming loops within the
			
 
				-current parallel section. \p kind is the scheduler kind, \p modifier
			
 
				-complements the scheduler kind with informations such as the chunk size,
			
 
				-in accordance with the OpenMP specification.
			
 
				-
			
 
				-\sa starpu_omp_get_schedule
			
 
				-
			
 
				-\fn void starpu_omp_get_schedule(enum starpu_omp_sched_value *kind, int *modifier)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the current selected default loop scheduler.
			
 
				-
			
 
				-\return the kind and the modifier of the current default loop scheduler.
			
 
				-
			
 
				-\sa starpu_omp_set_schedule
			
 
				-
			
 
				-\fn int starpu_omp_get_thread_limit(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the number of StarPU CPU workers.
			
 
				-
			
 
				-\return the number of StarPU CPU workers.
			
 
				-
			
 
				-\fn void starpu_omp_set_max_active_levels(int max_levels)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function sets the maximum number of allowed active parallel section levels.
			
 
				-
			
 
				-Note: The StarPU OpenMP runtime support currently ignores the argument of this function and assume \p max_levels equals <c>1</c> instead.
			
 
				-
			
 
				-\sa starpu_omp_set_nested
			
 
				-\sa starpu_omp_get_nested
			
 
				-\sa starpu_omp_get_max_active_levels
			
 
				-\sa starpu_omp_get_level
			
 
				-\sa starpu_omp_get_active_level
			
 
				-
			
 
				-\fn int starpu_omp_get_max_active_levels(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the current maximum number of allowed active parallel section levels
			
 
				-
			
 
				-\return the current maximum number of allowed active parallel section levels.
			
 
				-
			
 
				-\sa starpu_omp_set_nested
			
 
				-\sa starpu_omp_get_nested
			
 
				-\sa starpu_omp_set_max_active_levels
			
 
				-\sa starpu_omp_get_level
			
 
				-\sa starpu_omp_get_active_level
			
 
				-
			
 
				-\fn int starpu_omp_get_level(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the nesting level of the current parallel section.
			
 
				-
			
 
				-\return the nesting level of the current parallel section.
			
 
				-
			
 
				-\sa starpu_omp_set_nested
			
 
				-\sa starpu_omp_get_nested
			
 
				-\sa starpu_omp_get_max_active_levels
			
 
				-\sa starpu_omp_set_max_active_levels
			
 
				-\sa starpu_omp_get_active_level
			
 
				-
			
 
				-\fn int starpu_omp_get_ancestor_thread_num(int level)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the number of the ancestor of the current parallel section.
			
 
				-
			
 
				-\return the number of the ancestor of the current parallel section.
			
 
				-
			
 
				-\fn int starpu_omp_get_team_size(int level)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the size of the team of the current parallel section.
			
 
				-
			
 
				-\return the size of the team of the current parallel section.
			
 
				-
			
 
				-\fn int starpu_omp_get_active_level(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the nestinglevel of the current innermost active parallel section.
			
 
				-
			
 
				-\return the nestinglevel of the current innermost active parallel section.
			
 
				-
			
 
				-\sa starpu_omp_set_nested
			
 
				-\sa starpu_omp_get_nested
			
 
				-\sa starpu_omp_get_max_active_levels
			
 
				-\sa starpu_omp_set_max_active_levels
			
 
				-\sa starpu_omp_get_level
			
 
				-
			
 
				-\fn int starpu_omp_in_final(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function checks whether the current task is final or not.
			
 
				-
			
 
				-\return <c>!0</c> if called from a final task.
			
 
				-\return <c>0</c> otherwise.
			
 
				-
			
 
				-\fn enum starpu_omp_proc_bind_value starpu_omp_get_proc_bind(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the proc_bind setting of the current parallel region.
			
 
				-
			
 
				-\return the proc_bind setting of the current parallel region.
			
 
				-
			
 
				-\fn void starpu_omp_set_default_device(int device_num)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function sets the number of the device to use as default.
			
 
				-
			
 
				-Note: The StarPU OpenMP runtime support currently ignores the argument of this function.
			
 
				-
			
 
				-\sa starpu_omp_get_default_device
			
 
				-\sa starpu_omp_is_initial_device
			
 
				-
			
 
				-\fn int starpu_omp_get_default_device(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the number of the device used as default.
			
 
				-
			
 
				-\return the number of the device used as default.
			
 
				-
			
 
				-\sa starpu_omp_set_default_device
			
 
				-\sa starpu_omp_is_initial_device
			
 
				-
			
 
				-\fn int starpu_omp_get_num_devices(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the number of the devices.
			
 
				-
			
 
				-\return the number of the devices.
			
 
				-
			
 
				-\fn int starpu_omp_get_num_teams(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the number of teams in the current teams region.
			
 
				-
			
 
				-\return the number of teams in the current teams region.
			
 
				-
			
 
				-\sa starpu_omp_get_num_teams
			
 
				-
			
 
				-\fn int starpu_omp_get_team_num(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the team number of the calling thread.
			
 
				-
			
 
				-\return the team number of the calling thread.
			
 
				-
			
 
				-\sa starpu_omp_get_num_teams
			
 
				-
			
 
				-\fn int starpu_omp_is_initial_device(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function checks whether the current device is the initial device or not.
			
 
				-
			
 
				-\fn int starpu_omp_get_max_task_priority
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-The omp_get_max_task_priority routine returns the maximum value that can be
			
 
				-specified in the priority clause.
			
 
				-
			
 
				-\return <c>!0</c> if called from the host device.
			
 
				-\return <c>0</c> otherwise.
			
 
				-
			
 
				-\sa starpu_omp_set_default_device
			
 
				-\sa starpu_omp_get_default_device
			
 
				-
			
 
				-\fn void starpu_omp_init_lock(starpu_omp_lock_t *lock)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function initializes an opaque lock object.
			
 
				-
			
 
				-\sa starpu_omp_destroy_lock
			
 
				-\sa starpu_omp_set_lock
			
 
				-\sa starpu_omp_unset_lock
			
 
				-\sa starpu_omp_test_lock
			
 
				-
			
 
				-\fn void starpu_omp_destroy_lock(starpu_omp_lock_t *lock)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function destroys an opaque lock object.
			
 
				-
			
 
				-\sa starpu_omp_init_lock
			
 
				-\sa starpu_omp_set_lock
			
 
				-\sa starpu_omp_unset_lock
			
 
				-\sa starpu_omp_test_lock
			
 
				-
			
 
				-\fn void starpu_omp_set_lock(starpu_omp_lock_t *lock)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function locks an opaque lock object. If the lock is already locked, the
			
 
				-function will block until it succeeds in exclusively acquiring the lock.
			
 
				-
			
 
				-\sa starpu_omp_init_lock
			
 
				-\sa starpu_omp_destroy_lock
			
 
				-\sa starpu_omp_unset_lock
			
 
				-\sa starpu_omp_test_lock
			
 
				-
			
 
				-\fn void starpu_omp_unset_lock(starpu_omp_lock_t *lock)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function unlocks a previously locked lock object. The behaviour of this
			
 
				-function is unspecified if it is called on an unlocked lock object.
			
 
				-
			
 
				-\sa starpu_omp_init_lock
			
 
				-\sa starpu_omp_destroy_lock
			
 
				-\sa starpu_omp_set_lock
			
 
				-\sa starpu_omp_test_lock
			
 
				-
			
 
				-\fn int starpu_omp_test_lock(starpu_omp_lock_t *lock)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function unblockingly attempts to lock a lock object and returns whether
			
 
				-it succeeded or not.
			
 
				-
			
 
				-\return <c>!0</c> if the function succeeded in acquiring the lock.
			
 
				-\return <c>0</c> if the lock was already locked.
			
 
				-
			
 
				-\sa starpu_omp_init_lock
			
 
				-\sa starpu_omp_destroy_lock
			
 
				-\sa starpu_omp_set_lock
			
 
				-\sa starpu_omp_unset_lock
			
 
				-
			
 
				-\fn void starpu_omp_init_nest_lock(starpu_omp_nest_lock_t *lock)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function initializes an opaque lock object supporting nested locking operations.
			
 
				-
			
 
				-\sa starpu_omp_destroy_nest_lock
			
 
				-\sa starpu_omp_set_nest_lock
			
 
				-\sa starpu_omp_unset_nest_lock
			
 
				-\sa starpu_omp_test_nest_lock
			
 
				-
			
 
				-\fn void starpu_omp_destroy_nest_lock(starpu_omp_nest_lock_t *lock)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function destroys an opaque lock object supporting nested locking operations.
			
 
				-
			
 
				-\sa starpu_omp_init_nest_lock
			
 
				-\sa starpu_omp_set_nest_lock
			
 
				-\sa starpu_omp_unset_nest_lock
			
 
				-\sa starpu_omp_test_nest_lock
			
 
				-
			
 
				-\fn void starpu_omp_set_nest_lock(starpu_omp_nest_lock_t *lock)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function locks an opaque lock object supporting nested locking operations.
			
 
				-If the lock is already locked by another task, the function will block until
			
 
				-it succeeds in exclusively acquiring the lock. If the lock is already taken by
			
 
				-the current task, the function will increase the nested locking level of the
			
 
				-lock object.
			
 
				-
			
 
				-\sa starpu_omp_init_nest_lock
			
 
				-\sa starpu_omp_destroy_nest_lock
			
 
				-\sa starpu_omp_unset_nest_lock
			
 
				-\sa starpu_omp_test_nest_lock
			
 
				-
			
 
				-\fn void starpu_omp_unset_nest_lock(starpu_omp_nest_lock_t *lock)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function unlocks a previously locked lock object supporting nested locking
			
 
				-operations. If the lock has been locked multiple times in nested fashion, the
			
 
				-nested locking level is decreased and the lock remains locked. Otherwise, if
			
 
				-the lock has only been locked once, it becomes unlocked. The behaviour of this
			
 
				-function is unspecified if it is called on an unlocked lock object. The
			
 
				-behaviour of this function is unspecified if it is called from a different task
			
 
				-than the one that locked the lock object.
			
 
				-
			
 
				-\sa starpu_omp_init_nest_lock
			
 
				-\sa starpu_omp_destroy_nest_lock
			
 
				-\sa starpu_omp_set_nest_lock
			
 
				-\sa starpu_omp_test_nest_lock
			
 
				-
			
 
				-\fn int starpu_omp_test_nest_lock(starpu_omp_nest_lock_t *lock)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function unblocking attempts to lock an opaque lock object supporting
			
 
				-nested locking operations and returns whether it succeeded or not. If the lock
			
 
				-is already locked by another task, the function will return without having
			
 
				-acquired the lock. If the lock is already taken by the current task, the
			
 
				-function will increase the nested locking level of the lock object.
			
 
				-
			
 
				-\return <c>!0</c> if the function succeeded in acquiring the lock.
			
 
				-\return <c>0</c> if the lock was already locked.
			
 
				-
			
 
				-\sa starpu_omp_init_nest_lock
			
 
				-\sa starpu_omp_destroy_nest_lock
			
 
				-\sa starpu_omp_set_nest_lock
			
 
				-\sa starpu_omp_unset_nest_lock
			
 
				-
			
 
				-\fn void starpu_omp_atomic_fallback_inline_begin(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function implements the entry point of a fallback global atomic region. It
			
 
				-blocks until it succeeds in acquiring exclusive access to the global atomic
			
 
				-region.
			
 
				-
			
 
				-\sa starpu_omp_atomic_fallback_inline_end
			
 
				-
			
 
				-\fn void starpu_omp_atomic_fallback_inline_end(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function implements the exit point of a fallback global atomic region. It
			
 
				-release the exclusive access to the global atomic region.
			
 
				-
			
 
				-\sa starpu_omp_atomic_fallback_inline_begin
			
 
				-
			
 
				-\fn double starpu_omp_get_wtime(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the elapsed wallclock time in seconds.
			
 
				-
			
 
				-\return the elapsed wallclock time in seconds.
			
 
				-
			
 
				-\sa starpu_omp_get_wtick
			
 
				-
			
 
				-\fn double starpu_omp_get_wtick(void)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function returns the precision of the time used by \p starpu_omp_get_wtime.
			
 
				-
			
 
				-\return the precision of the time used by \p starpu_omp_get_wtime.
			
 
				-
			
 
				-\sa starpu_omp_get_wtime
			
 
				-
			
 
				-\fn void starpu_omp_vector_annotate(starpu_data_handle_t handle, uint32_t slice_base)
			
 
				-\ingroup API_OpenMP_Runtime_Support
			
 
				-This function enables setting additional vector metadata needed by the OpenMP Runtime Support.
			
 
				-
			
 
				-\p handle is vector data handle.
			
 
				-\p slice_base is the base of an array slice, expressed in number of vector elements from the array base.
			
 
				-
			
 
				-\sa STARPU_VECTOR_GET_SLICE_BASE
			
 
				-
			
 
				 */
			
--- a/doc/doxygen/chapters/api/running_driver.doxy
+++ b/doc/doxygen/chapters/api/running_driver.doxy
@@ -1,59 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
			
 
				- * Copyright (C) 2011,2012                                Inria
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Running_Drivers Running Drivers
			
 
				-
			
 
				-\struct starpu_driver
			
 
				-structure for a driver
			
 
				-\ingroup API_Running_Drivers
			
 
				-\var enum starpu_worker_archtype starpu_driver::type
			
 
				-    Type of the driver. Only ::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER
			
 
				-    and ::STARPU_OPENCL_WORKER are currently supported.
			
 
				-\var union starpu_driver::id
			
 
				-    Identifier of the driver.
			
 
				-
			
 
				-\fn int starpu_driver_run(struct starpu_driver *d)
			
 
				-\ingroup API_Running_Drivers
			
 
				-Initialize the given driver, run it until it receives a request to
			
 
				-terminate, deinitialize it and return 0 on success. Return
			
 
				-<c>-EINVAL</c> if starpu_driver::type is not a valid StarPU device type
			
 
				-(::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER or ::STARPU_OPENCL_WORKER).
			
 
				-
			
 
				-This is the same as using the following functions: calling
			
 
				-starpu_driver_init(), then calling starpu_driver_run_once() in a loop,
			
 
				-and finally starpu_driver_deinit().
			
 
				-
			
 
				-\fn int starpu_driver_init(struct starpu_driver *d)
			
 
				-\ingroup API_Running_Drivers
			
 
				-Initialize the given driver. Return 0 on success, <c>-EINVAL</c>
			
 
				-if starpu_driver::type is not a valid ::starpu_worker_archtype.
			
 
				-
			
 
				-\fn int starpu_driver_run_once(struct starpu_driver *d)
			
 
				-\ingroup API_Running_Drivers
			
 
				-Run the driver once, then return 0 on success, <c>-EINVAL</c> if starpu_driver::type is not a valid ::starpu_worker_archtype.
			
 
				-
			
 
				-\fn int starpu_driver_deinit(struct starpu_driver *d)
			
 
				-\ingroup API_Running_Drivers
			
 
				-Deinitialize the given driver. Return 0 on success, <c>-EINVAL</c> if
			
 
				-starpu_driver::type is not a valid ::starpu_worker_archtype.
			
 
				-
			
 
				-\fn void starpu_drivers_request_termination(void)
			
 
				-\ingroup API_Running_Drivers
			
 
				-Notify all running drivers that they should terminate.
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/refman.tex
+++ b/doc/doxygen/refman.tex
@@ -232,11 +232,9 @@ Documentation License”.
 
				 \input{group__API__Data__Interfaces}
			
 
				 \input{group__API__Data__Partition}
			
 
				 \input{group__API__Out__Of__Core}
			
 
				-\input{group__API__Multiformat__Data__Interface}
			
 
				 \input{group__API__Codelet__And__Tasks}
			
 
				 \input{group__API__Insert__Task}
			
 
				 \input{group__API__Explicit__Dependencies}
			
 
				-\input{group__API__Implicit__Data__Dependencies}
			
 
				 \input{group__API__Performance__Model}
			
 
				 \input{group__API__Profiling}
			
 
				 \input{group__API__Theoretical__Lower__Bound__on__Execution__Time}
			
--- a/include/starpu_bitmap.h
+++ b/include/starpu_bitmap.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2013-2015,2017                           CNRS
			
 
				+ * Copyright (C) 2013-2015,2017,2019                      CNRS
			
 
				  * Copyright (C) 2013,2016                                Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Simon Archipoff
			
 
				  *
			
@@ -19,31 +19,54 @@
 
				 #ifndef __STARPU_BITMAP_H__
			
 
				 #define __STARPU_BITMAP_H__
			
 
				 
			
 
				+/** @defgroup API_Bitmap Bitmap
			
 
				+
			
 
				+    @brief This is the interface for the bitmap utilities provided by StarPU.
			
 
				+
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 extern "C"
			
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/** create a empty starpu_bitmap */
			
 
				 struct starpu_bitmap *starpu_bitmap_create(void) STARPU_ATTRIBUTE_MALLOC;
			
 
				+/** free \b */
			
 
				 void starpu_bitmap_destroy(struct starpu_bitmap *b);
			
 
				 
			
 
				+/** set bit \p e in \p b */
			
 
				 void starpu_bitmap_set(struct starpu_bitmap *b, int e);
			
 
				+/** unset bit \p e in \p b */
			
 
				 void starpu_bitmap_unset(struct starpu_bitmap *b, int e);
			
 
				+/** unset all bits in \p b */
			
 
				 void starpu_bitmap_unset_all(struct starpu_bitmap *b);
			
 
				 
			
 
				+/** return true iff bit \p e is set in \p b */
			
 
				 int starpu_bitmap_get(struct starpu_bitmap *b, int e);
			
 
				+/** Basically compute \c starpu_bitmap_unset_all(\p a) ; \p a = \p b & \p c; */
			
 
				 void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c);
			
 
				+/** Basically compute \p a |= \p b */
			
 
				 void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b);
			
 
				+/** return 1 iff \p e is set in \p b1 AND \p e is set in \p b2 */
			
 
				 int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e);
			
 
				+/** return the number of set bits in \p b */
			
 
				 int starpu_bitmap_cardinal(struct starpu_bitmap *b);
			
 
				 
			
 
				+/** return the index of the first set bit of \p b, -1 if none */
			
 
				 int starpu_bitmap_first(struct starpu_bitmap *b);
			
 
				+/** return the position of the last set bit of \p b, -1 if none */
			
 
				 int starpu_bitmap_last(struct starpu_bitmap *b);
			
 
				+/** return the position of set bit right after \p e in \p b, -1 if none */
			
 
				 int starpu_bitmap_next(struct starpu_bitmap *b, int e);
			
 
				+/** todo */
			
 
				 int starpu_bitmap_has_next(struct starpu_bitmap *b, int e);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif
			
--- a/include/starpu_bound.h
+++ b/include/starpu_bound.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2011,2013,2017                           CNRS
			
 
				+ * Copyright (C) 2011,2013,2017,2019                      CNRS
			
 
				  * Copyright (C) 2010,2011,2014                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -18,6 +18,13 @@
 
				 #ifndef __STARPU_BOUND_H__
			
 
				 #define __STARPU_BOUND_H__
			
 
				 
			
 
				+/** @defgroup API_Theoretical_Lower_Bound_on_Execution_Time Theoretical Lower Bound on Execution Time
			
 
				+
			
 
				+    @brief Compute theoretical upper computation efficiency bound corresponding to some actual execution.
			
 
				+
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				 #include <stdio.h>
			
 
				 
			
 
				 #ifdef __cplusplus
			
@@ -25,19 +32,41 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/** Start recording tasks (resets stats). \p deps tells whether dependencies should be recorded too (this is quite expensive) */
			
 
				 void starpu_bound_start(int deps, int prio);
			
 
				+/** Stop recording tasks */
			
 
				 void starpu_bound_stop(void);
			
 
				 
			
 
				+/** Emit the DAG that was recorded on \p output. */
			
 
				 void starpu_bound_print_dot(FILE *output);
			
 
				 
			
 
				+/** Get theoretical upper bound (in ms) (needs glpk support
			
 
				+    detected by configure script). It returns 0 if some performance models
			
 
				+    are not calibrated.
			
 
				+*/
			
 
				 void starpu_bound_compute(double *res, double *integer_res, int integer);
			
 
				 
			
 
				+/** Emit the Linear Programming system on \p output for the recorded
			
 
				+    tasks, in the lp format
			
 
				+*/
			
 
				 void starpu_bound_print_lp(FILE *output);
			
 
				+
			
 
				+/** Emit the Linear Programming system on \p output for the recorded
			
 
				+    tasks, in the mps format
			
 
				+*/
			
 
				 void starpu_bound_print_mps(FILE *output);
			
 
				+
			
 
				+/** Emit on \p output the statistics of actual execution vs theoretical upper bound.
			
 
				+    \p integer permits to choose between integer solving (which takes a
			
 
				+    long time but is correct), and relaxed solving (which provides an
			
 
				+    approximate solution).
			
 
				+*/
			
 
				 void starpu_bound_print(FILE *output, int integer);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_BOUND_H__ */
			
--- a/include/starpu_clusters.h
+++ b/include/starpu_clusters.h
@@ -19,6 +19,11 @@
 
				 #ifndef __STARPU_CLUSTERS_UTIL_H__
			
 
				 #define __STARPU_CLUSTERS_UTIL_H__
			
 
				 
			
 
				+/** @defgroup API_Clustering_Machine Clustering Machine
			
 
				+
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 
			
 
				 #include <hwloc.h>
			
@@ -43,14 +48,16 @@ extern "C"
 
				 #define STARPU_CLUSTER_NEW			(13<<STARPU_MODE_SHIFT)
			
 
				 #define STARPU_CLUSTER_NCORES			(14<<STARPU_MODE_SHIFT)
			
 
				 
			
 
				-/* These represent the default available functions to enforce cluster
			
 
				- * use by the sub-runtime */
			
 
				+/**
			
 
				+   These represent the default available functions to enforce cluster
			
 
				+   use by the sub-runtime
			
 
				+*/
			
 
				 enum starpu_cluster_types
			
 
				 {
			
 
				-	STARPU_CLUSTER_OPENMP,
			
 
				-	STARPU_CLUSTER_INTEL_OPENMP_MKL,
			
 
				+	STARPU_CLUSTER_OPENMP, /**< todo */
			
 
				+	STARPU_CLUSTER_INTEL_OPENMP_MKL,  /**< todo */
			
 
				 #ifdef STARPU_MKL
			
 
				-	STARPU_CLUSTER_GNU_OPENMP_MKL,
			
 
				+	STARPU_CLUSTER_GNU_OPENMP_MKL,  /**< todo */
			
 
				 #endif
			
 
				 };
			
 
				 
			
@@ -72,4 +79,7 @@ void starpu_gnu_openmp_mkl_prologue(void*);
 
				 #endif
			
 
				 
			
 
				 #endif
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_CLUSTERS_UTIL_H__ */
			
--- a/include/starpu_cublas.h
+++ b/include/starpu_cublas.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
			
 
				  * Copyright (C) 2010-2014,2017                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -18,17 +18,44 @@
 
				 #ifndef __STARPU_CUBLAS_H__
			
 
				 #define __STARPU_CUBLAS_H__
			
 
				 
			
 
				+/** @ingroup API_CUDA_Extensions
			
 
				+
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 extern "C"
			
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+   Initialize CUBLAS on every CUDA device. The
			
 
				+   CUBLAS library must be initialized prior to any CUBLAS call. Calling
			
 
				+   starpu_cublas_init() will initialize CUBLAS on every CUDA device
			
 
				+   controlled by StarPU. This call blocks until CUBLAS has been properly
			
 
				+   initialized on every device.
			
 
				+*/
			
 
				 void starpu_cublas_init(void);
			
 
				+
			
 
				+/**
			
 
				+   Set the proper CUBLAS stream for CUBLAS v1. This must be called from the CUDA
			
 
				+   codelet before calling CUBLAS v1 kernels, so that they are queued on the proper
			
 
				+   CUDA stream. When using one thread per CUDA worker, this function does not
			
 
				+   do anything since the CUBLAS stream does not change, and is set once by
			
 
				+   starpu_cublas_init().
			
 
				+*/
			
 
				 void starpu_cublas_set_stream(void);
			
 
				+
			
 
				+/**
			
 
				+   Synchronously deinitialize the CUBLAS library on
			
 
				+   every CUDA device.
			
 
				+*/
			
 
				 void starpu_cublas_shutdown(void);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_CUBLAS_H__ */
			
--- a/include/starpu_cublas_v2.h
+++ b/include/starpu_cublas_v2.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2013,2017                           CNRS
			
 
				+ * Copyright (C) 2010-2013,2017,2019                      CNRS
			
 
				  * Copyright (C) 2010-2012,2017                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -18,6 +18,11 @@
 
				 #ifndef __STARPU_CUBLAS_V2_H__
			
 
				 #define __STARPU_CUBLAS_V2_H__
			
 
				 
			
 
				+/** @ingroup API_CUDA_Extensions
			
 
				+
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
			
 
				 
			
 
				 #include <cublas_v2.h>
			
@@ -27,6 +32,11 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+   Return the CUSPARSE handle to be used to queue CUSPARSE
			
 
				+   kernels. It is properly initialized and configured for multistream by
			
 
				+   starpu_cusparse_init().
			
 
				+*/
			
 
				 cublasHandle_t starpu_cublas_get_local_handle(void);
			
 
				 
			
 
				 #ifdef __cplusplus
			
@@ -35,4 +45,6 @@ cublasHandle_t starpu_cublas_get_local_handle(void);
 
				 
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_CUBLAS_V2_H__ */
			
--- a/include/starpu_cuda.h
+++ b/include/starpu_cuda.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2010-2012,2014                           Université de Bordeaux
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,6 +19,11 @@
 
				 #ifndef __STARPU_CUDA_H__
			
 
				 #define __STARPU_CUDA_H__
			
 
				 
			
 
				+/** @defgroup API_CUDA_Extensions CUDA Extensions
			
 
				+
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu_config.h>
			
 
				 
			
 
				 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
			
@@ -31,20 +36,50 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/** Report a CUBLAS error. */
			
 
				 void starpu_cublas_report_error(const char *func, const char *file, int line, int status);
			
 
				-#define STARPU_CUBLAS_REPORT_ERROR(status) \
			
 
				-	starpu_cublas_report_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				 
			
 
				+/** Calls starpu_cublas_report_error(), passing the current function, file and line position.*/
			
 
				+#define STARPU_CUBLAS_REPORT_ERROR(status) starpu_cublas_report_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				+
			
 
				+/** Report a CUDA error. */
			
 
				 void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status);
			
 
				-#define STARPU_CUDA_REPORT_ERROR(status) \
			
 
				-	starpu_cuda_report_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				 
			
 
				+/** Calls starpu_cuda_report_error(), passing the current function, file and line position.*/
			
 
				+#define STARPU_CUDA_REPORT_ERROR(status) starpu_cuda_report_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				+
			
 
				+/**
			
 
				+    Return the current worker’s CUDA stream. StarPU
			
 
				+    provides a stream for every CUDA device controlled by StarPU. This
			
 
				+    function is only provided for convenience so that programmers can
			
 
				+    easily use asynchronous operations within codelets without having to
			
 
				+    create a stream by hand. Note that the application is not forced to
			
 
				+    use the stream provided by starpu_cuda_get_local_stream() and may also
			
 
				+    create its own streams. Synchronizing with <c>cudaThreadSynchronize()</c> is
			
 
				+    allowed, but will reduce the likelihood of having all transfers
			
 
				+    overlapped.
			
 
				+*/
			
 
				 cudaStream_t starpu_cuda_get_local_stream(void);
			
 
				 
			
 
				+/** Return a pointer to device properties for worker \p workerid (assumed to be a CUDA worker). */
			
 
				 const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid);
			
 
				 
			
 
				+/**
			
 
				+    Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
			
 
				+    to the pointer \p dst_ptr on \p dst_node. The function first tries to
			
 
				+    copy the data asynchronous (unless \p stream is <c>NULL</c>). If the
			
 
				+    asynchronous copy fails or if \p stream is <c>NULL</c>, it copies the
			
 
				+    data synchronously. The function returns <c>-EAGAIN</c> if the
			
 
				+    asynchronous launch was successfull. It returns 0 if the synchronous
			
 
				+    copy was successful, or fails otherwise.
			
 
				+*/
			
 
				 int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind);
			
 
				 
			
 
				+/**
			
 
				+    Calls <c>cudaSetDevice(\p devid)</c> or <c>cudaGLSetGLDevice(\p devid)</c>,
			
 
				+    according to whether \p devid is among the field
			
 
				+    starpu_conf::cuda_opengl_interoperability.
			
 
				+*/
			
 
				 void starpu_cuda_set_device(unsigned devid);
			
 
				 
			
 
				 #ifdef __cplusplus
			
@@ -52,5 +87,7 @@ void starpu_cuda_set_device(unsigned devid);
 
				 #endif
			
 
				 
			
 
				 #endif /* STARPU_USE_CUDA && !STARPU_DONT_INCLUDE_CUDA_HEADERS */
			
 
				-#endif /* __STARPU_CUDA_H__ */
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+#endif /* __STARPU_CUDA_H__ */
			
--- a/include/starpu_cusparse.h
+++ b/include/starpu_cusparse.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
			
 
				  * Copyright (C) 2010-2014,2017                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -18,6 +18,11 @@
 
				 #ifndef __STARPU_CUSPARSE_H__
			
 
				 #define __STARPU_CUSPARSE_H__
			
 
				 
			
 
				+/** @ingroup API_CUDA_Extensions
			
 
				+
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
			
 
				 #include <cusparse.h>
			
 
				 #endif
			
@@ -27,10 +32,25 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+   Initialize CUSPARSE on every CUDA device
			
 
				+   controlled by StarPU. This call blocks until CUSPARSE has been properly
			
 
				+   initialized on every device.
			
 
				+*/
			
 
				 void starpu_cusparse_init(void);
			
 
				+
			
 
				+/**
			
 
				+   Synchronously deinitialize the CUSPARSE library on
			
 
				+   every CUDA device.
			
 
				+*/
			
 
				 void starpu_cusparse_shutdown(void);
			
 
				 
			
 
				 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
			
 
				+/**
			
 
				+   Return the CUSPARSE handle to be used to queue CUSPARSE
			
 
				+   kernels. It is properly initialized and configured for multistream by
			
 
				+   starpu_cusparse_init().
			
 
				+*/
			
 
				 cusparseHandle_t starpu_cusparse_get_local_handle(void);
			
 
				 #endif
			
 
				 
			
@@ -38,4 +58,6 @@ cusparseHandle_t starpu_cusparse_get_local_handle(void);
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_CUSPARSE_H__ */
			
--- a/include/starpu_data.h
+++ b/include/starpu_data.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2019                                Université de Bordeaux
			
 
				  * Copyright (C) 2011-2013,2016,2017                      Inria
			
 
				- * Copyright (C) 2010-2015,2017                           CNRS
			
 
				+ * Copyright (C) 2010-2015,2017,2019                           CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,6 +19,15 @@
 
				 #ifndef __STARPU_DATA_H__
			
 
				 #define __STARPU_DATA_H__
			
 
				 
			
 
				+/** @defgroup API_Data_Management Data Management
			
 
				+
			
 
				+    @brief Data management facilities provided by StarPU. We show how
			
 
				+    to use existing data interfaces in \ref API_Data_Interfaces, but
			
 
				+    developers can design their own data interfaces if required.
			
 
				+
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #ifdef __cplusplus
			
@@ -26,60 +35,302 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+   This macro is used when the RAM memory node is specified.
			
 
				+*/
			
 
				+#define STARPU_MAIN_RAM 0
			
 
				+
			
 
				 struct _starpu_data_state;
			
 
				+/**
			
 
				+   StarPU uses ::starpu_data_handle_t as an opaque handle to manage a
			
 
				+   piece of data. Once a piece of data has been registered to StarPU,
			
 
				+   it is associated to a ::starpu_data_handle_t which keeps track of
			
 
				+   the state of the piece of data over the entire machine, so that we
			
 
				+   can maintain data consistency and locate data replicates for
			
 
				+   instance.
			
 
				+*/
			
 
				 typedef struct _starpu_data_state* starpu_data_handle_t;
			
 
				 
			
 
				-/* Note: when adding a flag here, update _starpu_detect_implicit_data_deps_with_handle */
			
 
				+/**
			
 
				+    Describe a StarPU data access mode
			
 
				+
			
 
				+    Note: when adding a flag here, update
			
 
				+    _starpu_detect_implicit_data_deps_with_handle
			
 
				+
			
 
				+    Note: other STARPU_* values in include/starpu_task_util.h
			
 
				+ */
			
 
				 enum starpu_data_access_mode
			
 
				 {
			
 
				-	STARPU_NONE=0,
			
 
				-	STARPU_R=(1<<0),
			
 
				-	STARPU_W=(1<<1),
			
 
				-	STARPU_RW=(STARPU_R|STARPU_W),
			
 
				-	STARPU_SCRATCH=(1<<2),
			
 
				-	STARPU_REDUX=(1<<3),
			
 
				-	STARPU_COMMUTE=(1<<4),
			
 
				-	STARPU_SSEND=(1<<5),
			
 
				-	STARPU_LOCALITY=(1<<6),
			
 
				-	STARPU_ACCESS_MODE_MAX=(1<<7)
			
 
				-	/* Note: other STARPU_* values in include/starpu_task_util.h */
			
 
				+	STARPU_NONE=0, /**< todo */
			
 
				+	STARPU_R=(1<<0), /**< read-only mode */
			
 
				+	STARPU_W=(1<<1), /**< write-only mode */
			
 
				+	STARPU_RW=(STARPU_R|STARPU_W), /**< read-write mode. Equivalent to ::STARPU_R|::STARPU_W  */
			
 
				+	STARPU_SCRATCH=(1<<2), /**< A temporary buffer is allocated
			
 
				+				  for the task, but StarPU does not
			
 
				+				  enforce data consistency---i.e. each
			
 
				+				  device has its own buffer,
			
 
				+				  independently from each other (even
			
 
				+				  for CPUs), and no data transfer is
			
 
				+				  ever performed. This is useful for
			
 
				+				  temporary variables to avoid
			
 
				+				  allocating/freeing buffers inside
			
 
				+				  each task. Currently, no behavior is
			
 
				+				  defined concerning the relation with
			
 
				+				  the ::STARPU_R and ::STARPU_W modes
			
 
				+				  and the value provided at
			
 
				+				  registration --- i.e., the value of
			
 
				+				  the scratch buffer is undefined at
			
 
				+				  entry of the codelet function.  It
			
 
				+				  is being considered for future
			
 
				+				  extensions at least to define the
			
 
				+				  initial value.  For now, data to be
			
 
				+				  used in ::STARPU_SCRATCH mode should
			
 
				+				  be registered with node -1 and a
			
 
				+				  <c>NULL</c> pointer, since the value
			
 
				+				  of the provided buffer is simply
			
 
				+				  ignored for now.
			
 
				+			       */
			
 
				+	STARPU_REDUX=(1<<3), /**< todo */
			
 
				+	STARPU_COMMUTE=(1<<4), /**<  ::STARPU_COMMUTE can be passed
			
 
				+				  along ::STARPU_W or ::STARPU_RW to
			
 
				+				  express that StarPU can let tasks
			
 
				+				  commute, which is useful e.g. when
			
 
				+				  bringing a contribution into some
			
 
				+				  data, which can be done in any order
			
 
				+				  (but still require sequential
			
 
				+				  consistency against reads or
			
 
				+				  non-commutative writes).
			
 
				+			       */
			
 
				+	STARPU_SSEND=(1<<5), /**< used in starpu_mpi_insert_task() to
			
 
				+				specify the data has to be sent using
			
 
				+				a synchronous and non-blocking mode
			
 
				+				(see starpu_mpi_issend())
			
 
				+			     */
			
 
				+	STARPU_LOCALITY=(1<<6), /**< used to tell the scheduler which
			
 
				+				   data is the most important for the
			
 
				+				   task, and should thus be used to
			
 
				+				   try to group tasks on the same core
			
 
				+				   or cache, etc. For now only the ws
			
 
				+				   and lws schedulers take this flag
			
 
				+				   into account, and only when rebuild
			
 
				+				   with \c USE_LOCALITY flag defined in
			
 
				+				   the
			
 
				+				   src/sched_policies/work_stealing_policy.c
			
 
				+				   source code.
			
 
				+				*/
			
 
				+	STARPU_ACCESS_MODE_MAX=(1<<7) /**< todo */
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+   Describe a data handle along with an access mode.
			
 
				+*/
			
 
				 struct starpu_data_descr
			
 
				 {
			
 
				-	starpu_data_handle_t handle;
			
 
				-	enum starpu_data_access_mode mode;
			
 
				+	starpu_data_handle_t handle; /**< data */
			
 
				+	enum starpu_data_access_mode mode; /**< access mode */
			
 
				 };
			
 
				 
			
 
				 struct starpu_data_interface_ops;
			
 
				 
			
 
				+/** Set the name of the data, to be shown in various profiling tools. */
			
 
				 void starpu_data_set_name(starpu_data_handle_t handle, const char *name);
			
 
				+
			
 
				+/**
			
 
				+   Set the coordinates of the data, to be shown in various profiling
			
 
				+   tools. \p dimensions is the size of the \p dims array. This can be
			
 
				+   for instance the tile coordinates within a big matrix.
			
 
				+*/
			
 
				 void starpu_data_set_coordinates_array(starpu_data_handle_t handle, int dimensions, int dims[]);
			
 
				+
			
 
				+/**
			
 
				+   Set the coordinates of the data, to be shown in various profiling
			
 
				+   tools. \p dimensions is the number of subsequent \c int parameters.
			
 
				+   This can be for instance the tile coordinates within a big matrix.
			
 
				+*/
			
 
				 void starpu_data_set_coordinates(starpu_data_handle_t handle, unsigned dimensions, ...);
			
 
				 
			
 
				+/**
			
 
				+   Unregister a data \p handle from StarPU. If the data was
			
 
				+   automatically allocated by StarPU because the home node was -1, all
			
 
				+   automatically allocated buffers are freed. Otherwise, a valid copy
			
 
				+   of the data is put back into the home node in the buffer that was
			
 
				+   initially registered. Using a data handle that has been
			
 
				+   unregistered from StarPU results in an undefined behaviour. In case
			
 
				+   we do not need to update the value of the data in the home node, we
			
 
				+   can use the function starpu_data_unregister_no_coherency() instead.
			
 
				+*/
			
 
				 void starpu_data_unregister(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+    Similar to starpu_data_unregister(), except that StarPU does not
			
 
				+    put back a valid copy into the home node, in the buffer that was
			
 
				+    initially registered.
			
 
				+*/
			
 
				 void starpu_data_unregister_no_coherency(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Destroy the data \p handle once it is no longer needed by any
			
 
				+   submitted task. No coherency is assumed.
			
 
				+*/
			
 
				 void starpu_data_unregister_submit(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Destroy all replicates of the data \p handle immediately. After
			
 
				+   data invalidation, the first access to \p handle must be performed
			
 
				+   in ::STARPU_W mode. Accessing an invalidated data in ::STARPU_R
			
 
				+   mode results in undefined behaviour.
			
 
				+*/
			
 
				 void starpu_data_invalidate(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Submit invalidation of the data \p handle after completion of
			
 
				+   previously submitted tasks.
			
 
				+*/
			
 
				 void starpu_data_invalidate_submit(starpu_data_handle_t handle);
			
 
				 
			
 
				+/**
			
 
				+   Specify that the data \p handle can be discarded without impacting
			
 
				+   the application.
			
 
				+*/
			
 
				 void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important);
			
 
				 
			
 
				+/** @name Access registered data from the application
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   This macro can be used to acquire data, but not require it to be
			
 
				+   available on a given node, only enforce R/W dependencies. This can
			
 
				+   for instance be used to wait for tasks which produce the data, but
			
 
				+   without requesting a fetch to the main memory.
			
 
				+*/
			
 
				 #define STARPU_ACQUIRE_NO_NODE -1
			
 
				+
			
 
				+/**
			
 
				+   Similar to ::STARPU_ACQUIRE_NO_NODE, but will lock the data on all
			
 
				+   nodes, preventing them from being evicted for instance. This is
			
 
				+   mostly useful inside StarPU only.
			
 
				+*/
			
 
				 #define STARPU_ACQUIRE_NO_NODE_LOCK_ALL -2
			
 
				+
			
 
				+/**
			
 
				+   The application must call this function prior to accessing
			
 
				+   registered data from main memory outside tasks. StarPU ensures that
			
 
				+   the application will get an up-to-date copy of \p handle in main
			
 
				+   memory located where the data was originally registered, and that
			
 
				+   all concurrent accesses (e.g. from tasks) will be consistent with
			
 
				+   the access mode specified with \p mode. starpu_data_release() must
			
 
				+   be called once the application no longer needs to access the piece
			
 
				+   of data. Note that implicit data dependencies are also enforced by
			
 
				+   starpu_data_acquire(), i.e. starpu_data_acquire() will wait for all
			
 
				+   tasks scheduled to work on the data, unless they have been disabled
			
 
				+   explictly by calling
			
 
				+   starpu_data_set_default_sequential_consistency_flag() or
			
 
				+   starpu_data_set_sequential_consistency_flag().
			
 
				+   starpu_data_acquire() is a blocking call, so that it cannot be
			
 
				+   called from tasks or from their callbacks (in that case,
			
 
				+   starpu_data_acquire() returns <c>-EDEADLK</c>). Upon successful
			
 
				+   completion, this function returns 0.
			
 
				+*/
			
 
				 int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_acquire(), except that the data will be
			
 
				+   available on the given memory node instead of main memory.
			
 
				+   ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can
			
 
				+   be used instead of an explicit node number.
			
 
				+*/
			
 
				 int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode);
			
 
				+
			
 
				+/**
			
 
				+   Asynchronous equivalent of starpu_data_acquire(). When the data
			
 
				+   specified in \p handle is available in the access \p mode, the \p
			
 
				+   callback function is executed. The application may access
			
 
				+   the requested data during the execution of \p callback. The \p callback
			
 
				+   function must call starpu_data_release() once the application no longer
			
 
				+   needs to access the piece of data. Note that implicit data
			
 
				+   dependencies are also enforced by starpu_data_acquire_cb() in case they
			
 
				+   are not disabled. Contrary to starpu_data_acquire(), this function is
			
 
				+   non-blocking and may be called from task callbacks. Upon successful
			
 
				+   completion, this function returns 0.
			
 
				+*/
			
 
				 int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_acquire_cb(), except that the
			
 
				+   data will be available on the given memory node instead of main
			
 
				+   memory.
			
 
				+   ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be
			
 
				+   used instead of an explicit node number.
			
 
				+*/
			
 
				 int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_acquire_cb() with the possibility of
			
 
				+   enabling or disabling data dependencies.
			
 
				+   When the data specified in \p handle is available in the access
			
 
				+   \p mode, the \p callback function is executed. The application may access
			
 
				+   the requested data during the execution of this \p callback. The \p callback
			
 
				+   function must call starpu_data_release() once the application no longer
			
 
				+   needs to access the piece of data. Note that implicit data
			
 
				+   dependencies are also enforced by starpu_data_acquire_cb_sequential_consistency() in case they
			
 
				+   are not disabled specifically for the given \p handle or by the parameter \p sequential_consistency.
			
 
				+   Similarly to starpu_data_acquire_cb(), this function is
			
 
				+   non-blocking and may be called from task callbacks. Upon successful
			
 
				+   completion, this function returns 0.
			
 
				+*/
			
 
				 int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_acquire_cb_sequential_consistency(), except that the
			
 
				+   data will be available on the given memory node instead of main
			
 
				+   memory.
			
 
				+   ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
			
 
				+   explicit node number.
			
 
				+*/
			
 
				 int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency);
			
 
				+
			
 
				 int starpu_data_acquire_on_node_cb_sequential_consistency_quick(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_acquire_on_node_cb_sequential_consistency(),
			
 
				+   except that the \e pre_sync_jobid and \e post_sync_jobid parameters can be used
			
 
				+   to retrieve the jobid of the synchronization tasks. \e pre_sync_jobid happens
			
 
				+   just before the acquisition, and \e post_sync_jobid happens just after the
			
 
				+   release.
			
 
				+*/
			
 
				 int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick, long *pre_sync_jobid, long *post_sync_jobid);
			
 
				 
			
 
				+/**
			
 
				+   The application can call this function instead of starpu_data_acquire() so as to
			
 
				+   acquire the data like starpu_data_acquire(), but only if all
			
 
				+   previously-submitted tasks have completed, in which case starpu_data_acquire_try()
			
 
				+   returns 0. StarPU will have ensured that the application will get an up-to-date
			
 
				+   copy of \p handle in main memory located where the data was originally
			
 
				+   registered. starpu_data_release() must be called once the application no longer
			
 
				+   needs to access the piece of data.
			
 
				+*/
			
 
				 int starpu_data_acquire_try(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_acquire_try(), except that the
			
 
				+   data will be available on the given memory node instead of main
			
 
				+   memory.
			
 
				+   ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
			
 
				+   explicit node number.
			
 
				+*/
			
 
				 int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode);
			
 
				 
			
 
				 #ifdef __GCC__
			
 
				+
			
 
				+/**
			
 
				+   STARPU_DATA_ACQUIRE_CB() is the same as starpu_data_acquire_cb(),
			
 
				+   except that the code to be executed in a callback is directly provided
			
 
				+   as a macro parameter, and the data \p handle is automatically released
			
 
				+   after it. This permits to easily execute code which depends on the
			
 
				+   value of some registered data. This is non-blocking too and may be
			
 
				+   called from task callbacks.
			
 
				+*/
			
 
				 #  define STARPU_DATA_ACQUIRE_CB(handle, mode, code) do \
			
 
				 	{ \						\
			
 
				 		void callback(void *arg)		\
			
@@ -92,70 +343,181 @@ int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum
 
				 	while(0)
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+   Release the piece of data acquired by the
			
 
				+   application either by starpu_data_acquire() or by
			
 
				+   starpu_data_acquire_cb().
			
 
				+*/
			
 
				 void starpu_data_release(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_release(), except that the data
			
 
				+   will be available on the given memory \p node instead of main memory.
			
 
				+   The \p node parameter must be exactly the same as the corresponding \c
			
 
				+   starpu_data_acquire_on_node* call.
			
 
				+*/
			
 
				 void starpu_data_release_on_node(starpu_data_handle_t handle, int node);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   This is an arbiter, which implements an advanced but centralized
			
 
				+   management of concurrent data accesses, see \ref
			
 
				+   ConcurrentDataAccess for the details.
			
 
				+*/
			
 
				 typedef struct starpu_arbiter *starpu_arbiter_t;
			
 
				+
			
 
				+/**
			
 
				+   Create a data access arbiter, see \ref ConcurrentDataAccess for the
			
 
				+   details
			
 
				+*/
			
 
				 starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC;
			
 
				-void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter);
			
 
				-void starpu_arbiter_destroy(starpu_arbiter_t arbiter);
			
 
				 
			
 
				-void starpu_data_display_memory_stats();
			
 
				+/**
			
 
				+   Make access to \p handle managed by \p arbiter
			
 
				+*/
			
 
				+void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter);
			
 
				 
			
 
				-#define starpu_data_malloc_pinned_if_possible	starpu_malloc
			
 
				-#define starpu_data_free_pinned_if_possible	starpu_free
			
 
				+/**
			
 
				+   Destroy the \p arbiter . This must only be called after all data
			
 
				+   assigned to it have been unregistered.
			
 
				+*/
			
 
				+void starpu_arbiter_destroy(starpu_arbiter_t arbiter);
			
 
				 
			
 
				+/**
			
 
				+   Explicitly ask StarPU to allocate room for a piece of data on
			
 
				+   the specified memory \p node.
			
 
				+*/
			
 
				 int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node);
			
 
				 
			
 
				+/**
			
 
				+   Issue a fetch request for the data \p handle to \p node, i.e.
			
 
				+   requests that the data be replicated to the given node as soon as possible, so that it is
			
 
				+   available there for tasks. If \p async is 0, the call will
			
 
				+   block until the transfer is achieved, else the call will return immediately,
			
 
				+   after having just queued the request. In the latter case, the request will
			
 
				+   asynchronously wait for the completion of any task writing on the
			
 
				+   data.
			
 
				+*/
			
 
				 int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
			
 
				+
			
 
				+/**
			
 
				+   Issue a prefetch request for the data \p handle to \p node, i.e.
			
 
				+   requests that the data be replicated to \p node when there is room for it, so that it is
			
 
				+   available there for tasks. If \p async is 0, the call will
			
 
				+   block until the transfer is achieved, else the call will return immediately,
			
 
				+   after having just queued the request. In the latter case, the request will
			
 
				+   asynchronously wait for the completion of any task writing on the
			
 
				+   data.
			
 
				+*/
			
 
				 int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
			
 
				+
			
 
				 int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio);
			
 
				+
			
 
				+/**
			
 
				+   Issue an idle prefetch request for the data \p handle to \p node, i.e.
			
 
				+   requests that the data be replicated to \p node, so that it is
			
 
				+   available there for tasks, but only when the bus is really idle. If \p async is 0, the call will
			
 
				+   block until the transfer is achieved, else the call will return immediately,
			
 
				+   after having just queued the request. In the latter case, the request will
			
 
				+   asynchronously wait for the completion of any task writing on the data.
			
 
				+*/
			
 
				 int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
			
 
				 int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio);
			
 
				 
			
 
				+/**
			
 
				+   Check whether a valid copy of \p handle is currently available on
			
 
				+   memory node \p node.
			
 
				+*/
			
 
				 unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node);
			
 
				 
			
 
				+/**
			
 
				+   Advise StarPU that \p handle will not be used in the close future, and is
			
 
				+   thus a good candidate for eviction from GPUs. StarPU will thus write its value
			
 
				+   back to its home node when the bus is idle, and select this data in priority
			
 
				+   for eviction when memory gets low.
			
 
				+*/
			
 
				 void starpu_data_wont_use(starpu_data_handle_t handle);
			
 
				 
			
 
				-#define STARPU_MAIN_RAM 0
			
 
				-
			
 
				-enum starpu_node_kind
			
 
				-{
			
 
				-	STARPU_UNUSED     = 0x00,
			
 
				-	STARPU_CPU_RAM    = 0x01,
			
 
				-	STARPU_CUDA_RAM   = 0x02,
			
 
				-	STARPU_OPENCL_RAM = 0x03,
			
 
				-	STARPU_DISK_RAM   = 0x04,
			
 
				-	STARPU_MIC_RAM    = 0x05,
			
 
				-	STARPU_SCC_RAM    = 0x06,
			
 
				-	STARPU_SCC_SHM    = 0x07,
			
 
				-	STARPU_MPI_MS_RAM = 0x08
			
 
				-
			
 
				-};
			
 
				-
			
 
				-unsigned starpu_worker_get_memory_node(unsigned workerid);
			
 
				-unsigned starpu_memory_nodes_get_count(void);
			
 
				-int starpu_memory_node_get_name(unsigned node, char *name, size_t size);
			
 
				-int starpu_memory_nodes_get_numa_count(void);
			
 
				-int starpu_memory_nodes_numa_id_to_devid(int osid);
			
 
				-int starpu_memory_nodes_numa_devid_to_id(unsigned id);
			
 
				-
			
 
				-enum starpu_node_kind starpu_node_get_kind(unsigned node);
			
 
				-
			
 
				+/**
			
 
				+   Set the write-through mask of the data \p handle (and
			
 
				+   its children), i.e. a bitmask of nodes where the data should be always
			
 
				+   replicated after modification. It also prevents the data from being
			
 
				+   evicted from these nodes when memory gets scarse. When the data is
			
 
				+   modified, it is automatically transfered into those memory nodes. For
			
 
				+   instance a <c>1<<0</c> write-through mask means that the CUDA workers
			
 
				+   will commit their changes in main memory (node 0).
			
 
				+*/
			
 
				 void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask);
			
 
				 
			
 
				+/** @name Implicit Data Dependencies
			
 
				+    In this section, we describe how StarPU makes it possible to
			
 
				+    insert implicit task dependencies in order to enforce sequential data
			
 
				+    consistency. When this data consistency is enabled on a specific data
			
 
				+    handle, any data access will appear as sequentially consistent from
			
 
				+    the application. For instance, if the application submits two tasks
			
 
				+    that access the same piece of data in read-only mode, and then a third
			
 
				+    task that access it in write mode, dependencies will be added between
			
 
				+    the two first tasks and the third one. Implicit data dependencies are
			
 
				+    also inserted in the case of data accesses from the application.
			
 
				+    @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Set the data consistency mode associated to a data handle. The
			
 
				+   consistency mode set using this function has the priority over the
			
 
				+   default mode which can be set with
			
 
				+   starpu_data_set_default_sequential_consistency_flag().
			
 
				+*/
			
 
				 void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag);
			
 
				+
			
 
				+/**
			
 
				+   Get the data consistency mode associated to the data handle \p handle
			
 
				+*/
			
 
				 unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Return the default sequential consistency flag
			
 
				+*/
			
 
				 unsigned starpu_data_get_default_sequential_consistency_flag(void);
			
 
				+
			
 
				+/**
			
 
				+   Set the default sequential consistency flag. If a non-zero
			
 
				+   value is passed, a sequential data consistency will be enforced for
			
 
				+   all handles registered after this function call, otherwise it is
			
 
				+   disabled. By default, StarPU enables sequential data consistency. It
			
 
				+   is also possible to select the data consistency mode of a specific
			
 
				+   data handle with the function
			
 
				+   starpu_data_set_sequential_consistency_flag().
			
 
				+*/
			
 
				 void starpu_data_set_default_sequential_consistency_flag(unsigned flag);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   Set whether this data should be elligible to be evicted to disk
			
 
				+   storage (1) or not (0). The default is 1.
			
 
				+*/
			
 
				 void starpu_data_set_ooc_flag(starpu_data_handle_t handle, unsigned flag);
			
 
				+/**
			
 
				+   Get whether this data was set to be elligible to be evicted to disk
			
 
				+   storage (1) or not (0).
			
 
				+*/
			
 
				 unsigned starpu_data_get_ooc_flag(starpu_data_handle_t handle);
			
 
				 
			
 
				+/**
			
 
				+   Query the status of \p handle on the specified \p memory_node.
			
 
				+*/
			
 
				 void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested);
			
 
				 
			
 
				 struct starpu_codelet;
			
 
				 
			
 
				+/**
			
 
				+   Set the codelets to be used for \p handle when it is accessed in the
			
 
				+   mode ::STARPU_REDUX. Per-worker buffers will be initialized with
			
 
				+   the codelet \p init_cl, and reduction between per-worker buffers will be
			
 
				+   done with the codelet \p redux_cl.
			
 
				+*/
			
 
				 void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl);
			
 
				 
			
 
				 struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle);
			
@@ -164,13 +526,35 @@ unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsi
 
				 
			
 
				 void starpu_memchunk_tidy(unsigned memory_node);
			
 
				 
			
 
				+/**
			
 
				+   Set the field \c user_data for the \p handle to \p user_data . It can
			
 
				+   then be retrieved with starpu_data_get_user_data(). \p user_data can be any
			
 
				+   application-defined value, for instance a pointer to an object-oriented
			
 
				+   container for the data.
			
 
				+*/
			
 
				 void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data);
			
 
				+
			
 
				+/**
			
 
				+   Retrieve the field \c user_data previously set for the \p handle.
			
 
				+*/
			
 
				 void *starpu_data_get_user_data(starpu_data_handle_t handle);
			
 
				 
			
 
				+/**
			
 
				+   Copy the content of \p src_handle into \p dst_handle. The parameter \p
			
 
				+   asynchronous indicates whether the function should block or not. In
			
 
				+   the case of an asynchronous call, it is possible to synchronize with
			
 
				+   the termination of this operation either by the means of implicit
			
 
				+   dependencies (if enabled) or by calling starpu_task_wait_for_all(). If
			
 
				+   \p callback_func is not <c>NULL</c>, this callback function is executed after
			
 
				+   the handle has been copied, and it is given the pointer \p
			
 
				+   callback_arg as argument.
			
 
				+*/
			
 
				 int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_DATA_H__ */
			
--- a/include/starpu_data_filters.h
+++ b/include/starpu_data_filters.h
@@ -21,6 +21,11 @@
 
				 #ifndef __STARPU_DATA_FILTERS_H__
			
 
				 #define __STARPU_DATA_FILTERS_H__
			
 
				 
			
 
				+/** @defgroup API_Data_Partition Data Partition
			
 
				+
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 #include <stdarg.h>
			
 
				 
			
@@ -31,65 +36,473 @@ extern "C"
 
				 
			
 
				 struct starpu_data_interface_ops;
			
 
				 
			
 
				+/** Describe a data partitioning operation, to be given to starpu_data_partition() */
			
 
				 struct starpu_data_filter
			
 
				 {
			
 
				+	/**
			
 
				+	   Fill the \p child_interface structure with interface information
			
 
				+	   for the \p i -th child of the parent \p father_interface (among
			
 
				+	   \p nparts). The \p filter structure is provided, allowing to inspect the
			
 
				+	   starpu_data_filter::filter_arg and starpu_data_filter::filter_arg_ptr
			
 
				+	   parameters.
			
 
				+	   The details of what needs to be filled in \p child_interface vary according
			
 
				+	   to the data interface, but generally speaking:
			
 
				+	   <ul>
			
 
				+	   <li> <c>id</c> is usually just copied over from the father,
			
 
				+	   when the sub data has the same structure as the father,
			
 
				+	   e.g. a subvector is a vector, a submatrix is a matrix, etc.
			
 
				+	   This is however not the case for instance when dividing a
			
 
				+	   BCSR matrix into its dense blocks, which then are matrices.
			
 
				+	   </li>
			
 
				+	   <li> <c>nx</c>, <c>ny</c> and alike are usually divided by
			
 
				+	   the number of subdata, depending how the subdivision is
			
 
				+	   done (e.g. nx division vs ny division for vertical matrix
			
 
				+	   division vs horizontal matrix division). </li>
			
 
				+	   <li> <c>ld</c> for matrix interfaces are usually just
			
 
				+	   copied over: the leading dimension (ld) usually does not
			
 
				+	   change. </li>
			
 
				+	   <li> <c>elemsize</c> is usually just copied over. </li>
			
 
				+	   <li> <c>ptr</c>, the pointer to the data, has to be
			
 
				+	   computed according to \p i and the father's <c>ptr</c>, so
			
 
				+	   as to point to the start of the sub data. This should
			
 
				+	   however be done only if the father has <c>ptr</c> different
			
 
				+	   from NULL: in the OpenCL case notably, the
			
 
				+	   <c>dev_handle</c> and <c>offset</c> fields are used
			
 
				+	   instead. </li>
			
 
				+	   <li> <c>dev_handle</c> should be just copied over from the
			
 
				+	   parent. </li>
			
 
				+	   <li> <c>offset</c> has to be computed according to \p i and
			
 
				+	   the father's <c>offset</c>, so as to provide the offset of
			
 
				+	   the start of the sub data. This is notably used for the
			
 
				+	   OpenCL case.
			
 
				+	   </ul>
			
 
				+	*/
			
 
				 	void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts);
			
 
				-	unsigned nchildren;
			
 
				+	unsigned nchildren; /**< Number of parts to partition the data into. */
			
 
				+	/**
			
 
				+	   Return the number of children. This can be used instead of
			
 
				+	   starpu_data_filter::nchildren when the number of children depends
			
 
				+	   on the actual data (e.g. the number of blocks in a sparse
			
 
				+	   matrix).
			
 
				+	*/
			
 
				 	unsigned (*get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle);
			
 
				+	/**
			
 
				+	   When children use different data interface,
			
 
				+	   return which interface is used by child number \p id.
			
 
				+	*/
			
 
				 	struct starpu_data_interface_ops *(*get_child_ops)(struct starpu_data_filter *, unsigned id);
			
 
				-	unsigned filter_arg;
			
 
				+	unsigned filter_arg; /**< Additional parameter for the filter function */
			
 
				+	/**
			
 
				+	   Additional pointer parameter for
			
 
				+	   the filter function, such as the
			
 
				+	   sizes of the different parts. */
			
 
				 	void *filter_arg_ptr;
			
 
				 };
			
 
				 
			
 
				+/** @name Basic API
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+    Request the partitioning of \p initial_handle into several subdata
			
 
				+    according to the filter \p f.
			
 
				+    Here an example of how to use the function.
			
 
				+    \code{.c}
			
 
				+    struct starpu_data_filter f =
			
 
				+    {
			
 
				+      .filter_func = starpu_matrix_filter_block,
			
 
				+      .nchildren = nslicesx
			
 
				+    };
			
 
				+    starpu_data_partition(A_handle, &f);
			
 
				+    \endcode
			
 
				+*/
			
 
				 void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f);
			
 
				+
			
 
				+/**
			
 
				+   Unapply the filter which has been applied to \p root_data, thus
			
 
				+   unpartitioning the data. The pieces of data are collected back into
			
 
				+   one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM).
			
 
				+   Tasks working on the partitioned data will be waited for
			
 
				+   by starpu_data_unpartition().
			
 
				+
			
 
				+   Here an example of how to use the function.
			
 
				+   \code{.c}
			
 
				+   starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
			
 
				+   \endcode
			
 
				+*/
			
 
				 void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node);
			
 
				 
			
 
				+/**
			
 
				+   Return the \p i -th child of the given \p handle, which must have
			
 
				+   been partitionned beforehand.
			
 
				+*/
			
 
				+starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i);
			
 
				+
			
 
				+/**
			
 
				+   Return the number of children \p handle has been partitioned into.
			
 
				+*/
			
 
				+int starpu_data_get_nb_children(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   After partitioning a StarPU data by applying a filter,
			
 
				+   starpu_data_get_sub_data() can be used to get handles for each of the
			
 
				+   data portions. \p root_data is the parent data that was partitioned.
			
 
				+   \p depth is the number of filters to traverse (in case several filters
			
 
				+   have been applied, to e.g. partition in row blocks, and then in column
			
 
				+   blocks), and the subsequent parameters are the indexes. The function
			
 
				+   returns a handle to the subdata.
			
 
				+
			
 
				+   Here an example of how to use the function.
			
 
				+   \code{.c}
			
 
				+   h = starpu_data_get_sub_data(A_handle, 1, taskx);
			
 
				+   \endcode
			
 
				+*/
			
 
				+starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... );
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_get_sub_data() but use a \c va_list for the
			
 
				+   parameter list.
			
 
				+*/
			
 
				+starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa);
			
 
				+
			
 
				+/**
			
 
				+   Apply \p nfilters filters to the handle designated by \p
			
 
				+   root_handle recursively. \p nfilters pointers to variables of the
			
 
				+   type starpu_data_filter should be given.
			
 
				+*/
			
 
				+void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...);
			
 
				+
			
 
				+/**
			
 
				+   Apply \p nfilters filters to the handle designated by
			
 
				+   \p root_handle recursively. Use a \p va_list of pointers to
			
 
				+   variables of the type starpu_data_filter.
			
 
				+*/
			
 
				+void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/** @name Asynchronous API
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Plan to partition \p initial_handle into several subdata according to
			
 
				+   the filter \p f.
			
 
				+   The handles are returned into the \p children array, which has to be
			
 
				+   the same size as the number of parts described in \p f. These handles
			
 
				+   are not immediately usable, starpu_data_partition_submit() has to be
			
 
				+   called to submit the actual partitioning.
			
 
				+
			
 
				+   Here is an example of how to use the function:
			
 
				+   \code{.c}
			
 
				+   starpu_data_handle_t children[nslicesx];
			
 
				+   struct starpu_data_filter f =
			
 
				+   {
			
 
				+     .filter_func = starpu_matrix_filter_block,
			
 
				+     .nchildren = nslicesx
			
 
				+     };
			
 
				+     starpu_data_partition_plan(A_handle, &f, children);
			
 
				+\endcode
			
 
				+*/
			
 
				 void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children);
			
 
				+
			
 
				+/**
			
 
				+   Submit the actual partitioning of \p initial_handle into the \p nparts
			
 
				+   \p children handles. This call is asynchronous, it only submits that the
			
 
				+   partitioning should be done, so that the \p children handles can now be used to
			
 
				+   submit tasks, and \p initial_handle can not be used to submit tasks any more (to
			
 
				+   guarantee coherency).
			
 
				+   For instance,
			
 
				+   \code{.c}
			
 
				+   starpu_data_partition_submit(A_handle, nslicesx, children);
			
 
				+   \endcode
			
 
				+*/
			
 
				 void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_partition_submit(), but do not invalidate \p
			
 
				+   initial_handle. This allows to continue using it, but the application has to be
			
 
				+   careful not to write to \p initial_handle or \p children handles, only read from
			
 
				+   them, since the coherency is otherwise not guaranteed.  This thus allows to
			
 
				+   submit various tasks which concurrently read from various partitions of the data.
			
 
				+
			
 
				+   When the application wants to write to \p initial_handle again, it should call
			
 
				+   starpu_data_unpartition_submit(), which will properly add dependencies between the
			
 
				+   reads on the \p children and the writes to be submitted.
			
 
				+
			
 
				+   If instead the application wants to write to \p children handles, it should
			
 
				+   call starpu_data_partition_readwrite_upgrade_submit(), which will correctly add
			
 
				+   dependencies between the reads on the \p initial_handle and the writes to be
			
 
				+   submitted.
			
 
				+*/
			
 
				 void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
			
 
				+
			
 
				+/**
			
 
				+   Assume that a partitioning of \p initial_handle has already been submited
			
 
				+   in readonly mode through starpu_data_partition_readonly_submit(), and will upgrade
			
 
				+   that partitioning into read-write mode for the \p children, by invalidating \p
			
 
				+   initial_handle, and adding the necessary dependencies.
			
 
				+*/
			
 
				 void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
			
 
				+
			
 
				+/**
			
 
				+   Assuming that \p initial_handle is partitioned into \p children,
			
 
				+   submit an unpartitionning of \p initial_handle, i.e. submit a
			
 
				+   gathering of the pieces on the requested \p gathering_node memory
			
 
				+   node, and submit an invalidation of the children.
			
 
				+ */
			
 
				 void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node);
			
 
				+
			
 
				 void starpu_data_unpartition_submit_r(starpu_data_handle_t initial_handle, int gathering_node);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_partition_submit(), but do not invalidate \p
			
 
				+   initial_handle. This allows to continue using it, but the application has to be
			
 
				+   careful not to write to \p initial_handle or \p children handles, only read from
			
 
				+   them, since the coherency is otherwise not guaranteed.  This thus allows to
			
 
				+   submit various tasks which concurrently read from various
			
 
				+   partitions of the data.
			
 
				+*/
			
 
				 void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node);
			
 
				+
			
 
				+/**
			
 
				+   Clear the partition planning established between \p root_data and
			
 
				+   \p children with starpu_data_partition_plan(). This will notably
			
 
				+   submit an unregister all the \p children, which can thus not be
			
 
				+   used any more afterwards.
			
 
				+*/
			
 
				 void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children);
			
 
				 
			
 
				+/**
			
 
				+   Similar to starpu_data_unpartition_submit_sequential_consistency()
			
 
				+   but allow to specify a callback function for the unpartitiong task
			
 
				+*/
			
 
				 void starpu_data_unpartition_submit_sequential_consistency_cb(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency, void (*callback_func)(void *), void *callback_arg);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_partition_submit() but also allow to specify
			
 
				+   the coherency to be used for the main data \p initial_handle
			
 
				+   through the parameter \p sequential_consistency.
			
 
				+*/
			
 
				 void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_unpartition_submit() but also allow to specify
			
 
				+   the coherency to be used for the main data \p initial_handle
			
 
				+   through the parameter \p sequential_consistency.
			
 
				+*/
			
 
				 void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node, int sequential_consistency);
			
 
				+
			
 
				+/**
			
 
				+   Disable the automatic partitioning of the data \p handle for which
			
 
				+   a asynchronous plan has previously been submitted
			
 
				+*/
			
 
				 void starpu_data_partition_not_automatic(starpu_data_handle_t handle);
			
 
				 
			
 
				-int starpu_data_get_nb_children(starpu_data_handle_t handle);
			
 
				-starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i);
			
 
				+/** @} */
			
 
				 
			
 
				-starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... );
			
 
				-starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa);
			
 
				-
			
 
				-void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...);
			
 
				-void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa);
			
 
				+/** @name Predefined BCSR Filter Functions
			
 
				+ * Predefined partitioning functions for BCSR data. Examples on how to
			
 
				+ * use them are shown in \ref PartitioningData.
			
 
				+ * @{
			
 
				+ */
			
 
				 
			
 
				+/**
			
 
				+   Partition a block-sparse matrix into dense matrices.
			
 
				+ */
			
 
				 void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/** @name Predefined CSR Filter Functions
			
 
				+ * Predefined partitioning functions for CSR data. Examples on how to
			
 
				+ * use them are shown in \ref PartitioningData.
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Partition a block-sparse matrix into vertical block-sparse matrices.
			
 
				+ */
			
 
				 void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/** @name Predefined Matrix Filter Functions
			
 
				+ * Predefined partitioning functions for matrix
			
 
				+ * data. Examples on how to use them are shown in \ref
			
 
				+ * PartitioningData.
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Partition a dense Matrix along the x dimension, thus getting (x/\p
			
 
				+   nparts ,y) matrices. If \p nparts does not divide x, the last
			
 
				+   submatrix contains the remainder.
			
 
				+ */
			
 
				 void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a dense Matrix along the x dimension, with a
			
 
				+   shadow border <c>filter_arg_ptr</c>, thus getting ((x-2*shadow)/\p
			
 
				+   nparts +2*shadow,y) matrices. If \p nparts does not divide x-2*shadow,
			
 
				+   the last submatrix contains the remainder.
			
 
				+
			
 
				+   <b>IMPORTANT</b>: This can
			
 
				+   only be used for read-only access, as no coherency is enforced for the
			
 
				+   shadowed parts. A usage example is available in
			
 
				+   examples/filters/shadow2d.c
			
 
				+ */
			
 
				 void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a dense Matrix along the y dimension, thus getting
			
 
				+   (x,y/\p nparts) matrices. If \p nparts does not divide y, the last
			
 
				+   submatrix contains the remainder.
			
 
				+ */
			
 
				 void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a dense Matrix along the y dimension, with a
			
 
				+   shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				+   (x,(y-2*shadow)/\p nparts +2*shadow) matrices. If \p nparts does not
			
 
				+   divide y-2*shadow, the last submatrix contains the remainder.
			
 
				+
			
 
				+   <b>IMPORTANT</b>: This can only be used for read-only access, as no
			
 
				+   coherency is enforced for the shadowed parts. A usage example is
			
 
				+   available in examples/filters/shadow2d.c
			
 
				+*/
			
 
				 void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/** @name Predefined Vector Filter Functions
			
 
				+ * Predefined partitioning functions for vector
			
 
				+ * data. Examples on how to use them are shown in \ref
			
 
				+ * PartitioningData.
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Return in \p child_interface the \p id th element of the vector
			
 
				+   represented by \p father_interface once partitioned in \p nparts chunks of
			
 
				+   equal size.
			
 
				+ */
			
 
				 void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Return in \p child_interface the \p id th element of the vector
			
 
				+   represented by \p father_interface once partitioned in \p nparts chunks of
			
 
				+   equal size with a shadow border <c>filter_arg_ptr</c>, thus getting a vector
			
 
				+   of size <c>(n-2*shadow)/nparts+2*shadow</c>. The <c>filter_arg_ptr</c> field
			
 
				+   of \p f must be the shadow size casted into \c void*.
			
 
				+
			
 
				+   <b>IMPORTANT</b>: This can only be used for read-only access, as no coherency is
			
 
				+   enforced for the shadowed parts. An usage example is available in
			
 
				+   examples/filters/shadow.c
			
 
				+*/
			
 
				 void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Return in \p child_interface the \p id th element of the vector
			
 
				+   represented by \p father_interface once partitioned into \p nparts chunks
			
 
				+   according to the <c>filter_arg_ptr</c> field of \p f. The
			
 
				+   <c>filter_arg_ptr</c> field must point to an array of \p nparts long
			
 
				+   elements, each of which specifies the number of elements in each chunk
			
 
				+   of the partition.
			
 
				+ */
			
 
				 void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Return in \p child_interface the \p id th element of the vector
			
 
				+   represented by \p father_interface once partitioned into \p nparts chunks
			
 
				+   according to the <c>filter_arg_ptr</c> field of \p f. The
			
 
				+   <c>filter_arg_ptr</c> field must point to an array of \p nparts uint32_t
			
 
				+   elements, each of which specifies the number of elements in each chunk
			
 
				+   of the partition.
			
 
				+ */
			
 
				 void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Return in \p child_interface the \p id th element of the vector
			
 
				+   represented by \p father_interface once partitioned in <c>2</c> chunks of
			
 
				+   equal size, ignoring nparts. Thus, \p id must be <c>0</c> or <c>1</c>.
			
 
				+ */
			
 
				 void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/** @name Predefined Block Filter Functions
			
 
				+ * Predefined partitioning functions for block data. Examples on how
			
 
				+ * to use them are shown in \ref PartitioningData. An example is
			
 
				+ * available in \c examples/filters/shadow3d.c
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Partition a block along the X dimension, thus getting
			
 
				+   (x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last
			
 
				+   submatrix contains the remainder.
			
 
				+ */
			
 
				 void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a block along the X dimension, with a
			
 
				+   shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				+   ((x-2*shadow)/\p nparts +2*shadow,y,z) blocks. If \p nparts does not
			
 
				+   divide x, the last submatrix contains the remainder.
			
 
				+
			
 
				+   <b>IMPORTANT</b>:
			
 
				+   This can only be used for read-only access, as no coherency is
			
 
				+   enforced for the shadowed parts.
			
 
				+*/
			
 
				 void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a block along the Y dimension, thus getting
			
 
				+   (x,y/\p nparts ,z) blocks. If \p nparts does not divide y, the last
			
 
				+   submatrix contains the remainder.
			
 
				+ */
			
 
				 void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a block along the Y dimension, with a
			
 
				+   shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				+   (x,(y-2*shadow)/\p nparts +2*shadow,z) 3D matrices. If \p nparts does not
			
 
				+   divide y, the last submatrix contains the remainder.
			
 
				+
			
 
				+   <b>IMPORTANT</b>:
			
 
				+   This can only be used for read-only access, as no coherency is
			
 
				+   enforced for the shadowed parts.
			
 
				+ */
			
 
				 void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a block along the Z dimension, thus getting
			
 
				+   (x,y,z/\p nparts) blocks. If \p nparts does not divide z, the last
			
 
				+   submatrix contains the remainder.
			
 
				+ */
			
 
				 void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a block along the Z dimension, with a
			
 
				+   shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				+   (x,y,(z-2*shadow)/\p nparts +2*shadow) blocks. If \p nparts does not
			
 
				+   divide z, the last submatrix contains the remainder.
			
 
				+
			
 
				+   <b>IMPORTANT</b>:
			
 
				+   This can only be used for read-only access, as no coherency is
			
 
				+   enforced for the shadowed parts.
			
 
				+ */
			
 
				 void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif
			
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
--- a/include/starpu_disk.h
+++ b/include/starpu_disk.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2013,2017                                Inria
			
 
				- * Copyright (C) 2013,2014,2017                           CNRS
			
 
				+ * Copyright (C) 2013,2014,2017,2019                           CNRS
			
 
				  * Copyright (C) 2013,2014,2017                           Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Corentin Salingue
			
 
				  *
			
@@ -20,58 +20,200 @@
 
				 #ifndef __STARPU_DISK_H__
			
 
				 #define __STARPU_DISK_H__
			
 
				 
			
 
				+/** @defgroup API_Out_Of_Core Out Of Core
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				 #include <sys/types.h>
			
 
				 #include <starpu_config.h>
			
 
				 
			
 
				-/* list of functions to use on disk */
			
 
				+/** Set of functions to manipulate datas on disk. */
			
 
				 struct starpu_disk_ops
			
 
				 {
			
 
				-	 void *  (*plug)   (void *parameter, starpu_ssize_t size);
			
 
				-	 void    (*unplug) (void *base);
			
 
				+	/**
			
 
				+	   Connect a disk memory at location \p parameter with size \p size, and return a
			
 
				+	   base as void*, which will be passed by StarPU to all other methods.
			
 
				+	*/
			
 
				+	void *  (*plug)   (void *parameter, starpu_ssize_t size);
			
 
				+	/**
			
 
				+	   Disconnect a disk memory \p base.
			
 
				+	*/
			
 
				+	void    (*unplug) (void *base);
			
 
				+
			
 
				+	/**
			
 
				+	   Measure the bandwidth and the latency for the disk \p node and save it. Returns
			
 
				+	   1 if it could measure it.
			
 
				+	*/
			
 
				+	int    (*bandwidth)    (unsigned node, void *base);
			
 
				+
			
 
				+	/**
			
 
				+	   Create a new location for datas of size \p size. Return an opaque object pointer.
			
 
				+	*/
			
 
				+	void *  (*alloc)  (void *base, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Free a data \p obj previously allocated with starpu_disk_ops::alloc.
			
 
				+	*/
			
 
				+	void    (*free)   (void *base, void *obj, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Open an existing location of datas, at a specific position \p pos dependent on the backend.
			
 
				+	*/
			
 
				+	void *  (*open)   (void *base, void *pos, size_t size);
			
 
				+	/**
			
 
				+	   Close, without deleting it, a location of datas \p obj.
			
 
				+	*/
			
 
				+	void    (*close)  (void *base, void *obj, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Read \p size bytes of data from \p obj in \p base, at offset \p offset, and put
			
 
				+	   into \p buf. Return the actual number of read bytes.
			
 
				+	*/
			
 
				+	int     (*read)   (void *base, void *obj, void *buf, off_t offset, size_t size);
			
 
				+	/**
			
 
				+	   Write \p size bytes of data to \p obj in \p base, at offset \p offset, from \p buf. Return 0 on success.
			
 
				+	*/
			
 
				+	int     (*write)  (void *base, void *obj, const void *buf, off_t offset, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Read all data from \p obj of \p base, from offset 0. Returns it in an allocated buffer \p ptr, of size \p size
			
 
				+	*/
			
 
				+	int	(*full_read)    (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node);
			
 
				+	/**
			
 
				+	   Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to
			
 
				+	   \p size, so that a \c full_read will get it.
			
 
				+	*/
			
 
				+	int 	(*full_write)   (void * base, void * obj, void * ptr, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Asynchronously write \p size bytes of data to \p obj in \p base, at offset \p
			
 
				+	   offset, from \p buf. Return a void* pointer that StarPU will pass to \c
			
 
				+	   xxx_request methods for testing for the completion.
			
 
				+	*/
			
 
				+	void *  (*async_write)  (void *base, void *obj, void *buf, off_t offset, size_t size);
			
 
				+	/**
			
 
				+	   Asynchronously read \p size bytes of data from \p obj in \p base, at offset \p
			
 
				+	   offset, and put into \p buf. Return a void* pointer that StarPU will pass to \c
			
 
				+	   xxx_request methods for testing for the completion.
			
 
				+	*/
			
 
				+	void *  (*async_read)   (void *base, void *obj, void *buf, off_t offset, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Read all data from \p obj of \p base, from offset 0. Return it in an allocated buffer \p ptr, of size \p size
			
 
				+	*/
			
 
				+	void *	(*async_full_read)    (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node);
			
 
				+	/**
			
 
				+	   Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to
			
 
				+	   \p size, so that a starpu_disk_ops::full_read will get it.
			
 
				+	*/
			
 
				+	void *	(*async_full_write)   (void * base, void * obj, void * ptr, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Copy from offset \p offset_src of disk object \p obj_src in \p base_src to
			
 
				+	   offset \p offset_dst of disk object \p obj_dst in \p base_dst. Return a void*
			
 
				+	   pointer that StarPU will pass to \c xxx_request methods for testing for the
			
 
				+	   completion.
			
 
				+	*/
			
 
				+	void *  (*copy)   (void *base_src, void* obj_src, off_t offset_src,  void *base_dst, void* obj_dst, off_t offset_dst, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Wait for completion of request \p async_channel returned by a previous
			
 
				+	   asynchronous read, write or copy.
			
 
				+	*/
			
 
				+	void   (*wait_request) (void * async_channel);
			
 
				+	/**
			
 
				+	   Test for completion of request \p async_channel returned by a previous
			
 
				+	   asynchronous read, write or copy. Return 1 on completion, 0 otherwise.
			
 
				+	*/
			
 
				+	int    (*test_request) (void * async_channel);
			
 
				+
			
 
				+	/**
			
 
				+	   Free the request allocated by a previous asynchronous read, write or copy.
			
 
				+	*/
			
 
				+	void   (*free_request)(void * async_channel);
			
 
				 
			
 
				-	 int    (*bandwidth)    (unsigned node, void *base);
			
 
				+	/* TODO: readv, writev, read2d, write2d, etc. */
			
 
				+};
			
 
				 
			
 
				-	 void *  (*alloc)  (void *base, size_t size);
			
 
				-	 void    (*free)   (void *base, void *obj, size_t size);
			
 
				+/**
			
 
				+   Use the stdio library (fwrite, fread...) to read/write on disk.
			
 
				 
			
 
				-	 void *  (*open)   (void *base, void *pos, size_t size);     /* open an existing file */
			
 
				-	 void    (*close)  (void *base, void *obj, size_t size);
			
 
				+   <strong>Warning: It creates one file per allocation !</strong>
			
 
				 
			
 
				-	 int     (*read)   (void *base, void *obj, void *buf, off_t offset, size_t size);
			
 
				-	 int     (*write)  (void *base, void *obj, const void *buf, off_t offset, size_t size);
			
 
				+   Do not support asynchronous transfers.
			
 
				+*/
			
 
				+extern struct starpu_disk_ops starpu_disk_stdio_ops;
			
 
				 
			
 
				-	 int	(*full_read)    (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node);
			
 
				-	 int 	(*full_write)   (void * base, void * obj, void * ptr, size_t size);
			
 
				+/**
			
 
				+   Use the HDF5 library.
			
 
				 
			
 
				-	 void *  (*async_write)  (void *base, void *obj, void *buf, off_t offset, size_t size);
			
 
				-	 void *  (*async_read)   (void *base, void *obj, void *buf, off_t offset, size_t size);
			
 
				+   <strong>It doesn't support multiple opening from different processes. </strong>
			
 
				 
			
 
				-	 void *	(*async_full_read)    (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node);
			
 
				-	 void *	(*async_full_write)   (void * base, void * obj, void * ptr, size_t size);
			
 
				+   You may only allow one process to write in the HDF5 file.
			
 
				 
			
 
				-	 void *  (*copy)   (void *base_src, void* obj_src, off_t offset_src,  void *base_dst, void* obj_dst, off_t offset_dst, size_t size);
			
 
				-	 void   (*wait_request) (void * async_channel);
			
 
				-	 int    (*test_request) (void * async_channel);
			
 
				-	 void   (*free_request)(void * async_channel);
			
 
				+   <strong>If HDF5 library is not compiled with --thread-safe you can't open more than one HDF5 file at the same time. </strong>
			
 
				+*/
			
 
				+extern struct starpu_disk_ops starpu_disk_hdf5_ops;
			
 
				 
			
 
				-	/* TODO: readv, writev, read2d, write2d, etc. */
			
 
				-};
			
 
				+/**
			
 
				+   Use the unistd library (write, read...) to read/write on disk.
			
 
				 
			
 
				-/* Posix functions to use disk memory */
			
 
				-extern struct starpu_disk_ops starpu_disk_stdio_ops;
			
 
				-extern struct starpu_disk_ops starpu_disk_hdf5_ops;
			
 
				+   <strong>Warning: It creates one file per allocation !</strong>
			
 
				+*/
			
 
				 extern struct starpu_disk_ops starpu_disk_unistd_ops;
			
 
				+
			
 
				+/**
			
 
				+   Use the unistd library (write, read...) to read/write on disk with the O_DIRECT flag.
			
 
				+
			
 
				+   <strong>Warning: It creates one file per allocation !</strong>
			
 
				+
			
 
				+   Only available on Linux systems.
			
 
				+*/
			
 
				 extern struct starpu_disk_ops starpu_disk_unistd_o_direct_ops;
			
 
				+
			
 
				+/**
			
 
				+   Use the leveldb created by Google. More information at https://code.google.com/p/leveldb/
			
 
				+   Do not support asynchronous transfers.
			
 
				+*/
			
 
				 extern struct starpu_disk_ops starpu_disk_leveldb_ops;
			
 
				 
			
 
				+/**
			
 
				+   Close an existing data opened with starpu_disk_open().
			
 
				+*/
			
 
				 void starpu_disk_close(unsigned node, void *obj, size_t size);
			
 
				 
			
 
				+/**
			
 
				+   Open an existing file memory in a disk node. \p size is the size of
			
 
				+   the file. \p pos is the specific position dependent on the backend,
			
 
				+   given to the \c open  method of the disk operations. Return an
			
 
				+   opaque object pointer.
			
 
				+*/
			
 
				 void *starpu_disk_open(unsigned node, void *pos, size_t size);
			
 
				 
			
 
				+/**
			
 
				+   Register a disk memory node with a set of functions to manipulate
			
 
				+   datas. The \c plug member of \p func will be passed \p parameter,
			
 
				+   and return a \c base which will be passed to all \p func methods.
			
 
				+   <br />
			
 
				+   SUCCESS: return the disk node. <br />
			
 
				+   FAIL: return an error code. <br />
			
 
				+   \p size must be at least \ref STARPU_DISK_SIZE_MIN bytes ! \p size
			
 
				+   being negative means infinite size.
			
 
				+*/
			
 
				 int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_ssize_t size);
			
 
				 
			
 
				+/**
			
 
				+   Minimum size of a registered disk. The size of a disk is the last
			
 
				+   parameter of the function starpu_disk_register().
			
 
				+*/
			
 
				 #define STARPU_DISK_SIZE_MIN (16*1024*1024)
			
 
				 
			
 
				+/**
			
 
				+   Contain the node number of the disk swap, if set up through the
			
 
				+   \ref STARPU_DISK_SWAP variable.
			
 
				+*/
			
 
				 extern int starpu_disk_swap_node;
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_DISK_H__ */
			
--- a/include/starpu_driver.h
+++ b/include/starpu_driver.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2019                      CNRS
			
 
				  * Copyright (C) 2009-2014                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -18,6 +18,11 @@
 
				 #ifndef __STARPU_DRIVER_H__
			
 
				 #define __STARPU_DRIVER_H__
			
 
				 
			
 
				+/** @defgroup API_Running_Drivers Running Drivers
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu_config.h>
			
 
				 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
			
 
				 #include <starpu_opencl.h>
			
@@ -28,9 +33,17 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/** structure for a driver */
			
 
				 struct starpu_driver
			
 
				 {
			
 
				+	/**
			
 
				+	    Type of the driver. Only ::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER
			
 
				+	    and ::STARPU_OPENCL_WORKER are currently supported.
			
 
				+	*/
			
 
				 	enum starpu_worker_archtype type;
			
 
				+	/**
			
 
				+	   Identifier of the driver.
			
 
				+	*/
			
 
				 	union
			
 
				 	{
			
 
				 		unsigned cpu_id;
			
@@ -43,15 +56,45 @@ struct starpu_driver
 
				 	} id;
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+   Initialize the given driver, run it until it receives a request to
			
 
				+   terminate, deinitialize it and return 0 on success. Return
			
 
				+   <c>-EINVAL</c> if starpu_driver::type is not a valid StarPU device type
			
 
				+   (::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER or ::STARPU_OPENCL_WORKER).
			
 
				+
			
 
				+   This is the same as using the following functions: calling
			
 
				+   starpu_driver_init(), then calling starpu_driver_run_once() in a loop,
			
 
				+   and finally starpu_driver_deinit().
			
 
				+*/
			
 
				 int starpu_driver_run(struct starpu_driver *d);
			
 
				+
			
 
				+/**
			
 
				+   Notify all running drivers that they should terminate.
			
 
				+*/
			
 
				 void starpu_drivers_request_termination(void);
			
 
				 
			
 
				+/**
			
 
				+   Initialize the given driver. Return 0 on success, <c>-EINVAL</c>
			
 
				+   if starpu_driver::type is not a valid ::starpu_worker_archtype.
			
 
				+*/
			
 
				 int starpu_driver_init(struct starpu_driver *d);
			
 
				+
			
 
				+/**
			
 
				+   Run the driver once, then return 0 on success, <c>-EINVAL</c> if
			
 
				+   starpu_driver::type is not a valid ::starpu_worker_archtype.
			
 
				+*/
			
 
				 int starpu_driver_run_once(struct starpu_driver *d);
			
 
				+
			
 
				+/**
			
 
				+   Deinitialize the given driver. Return 0 on success, <c>-EINVAL</c> if
			
 
				+   starpu_driver::type is not a valid ::starpu_worker_archtype.
			
 
				+*/
			
 
				 int starpu_driver_deinit(struct starpu_driver *d);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_DRIVER_H__ */
			
--- a/include/starpu_expert.h
+++ b/include/starpu_expert.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010,2011,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010,2011,2015,2017,2019                      CNRS
			
 
				  * Copyright (C) 2010,2011                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -18,20 +18,36 @@
 
				 #ifndef __STARPU_EXPERT_H__
			
 
				 #define __STARPU_EXPERT_H__
			
 
				 
			
 
				-#include <starpu.h>
			
 
				+/** @defgroup API_Expert_Mode Expert Mode
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 extern "C"
			
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+   Wake all the workers, so they can inspect data requests and task
			
 
				+   submissions again.
			
 
				+*/
			
 
				 void starpu_wake_all_blocked_workers(void);
			
 
				 
			
 
				+/**
			
 
				+   Register a progression hook, to be called when workers are idle.
			
 
				+*/
			
 
				 int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg);
			
 
				+
			
 
				+/**
			
 
				+   Unregister a given progression hook.
			
 
				+*/
			
 
				 void starpu_progression_hook_deregister(int hook_id);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_H__ */
			
--- a/include/starpu_fxt.h
+++ b/include/starpu_fxt.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2012,2013,2016                           Inria
			
 
				  * Copyright (C) 2013                                     Joris Pablo
			
 
				- * Copyright (C) 2010-2015,2017,2018                      CNRS
			
 
				+ * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
			
 
				  * Copyright (C) 2010,2011,2013-2018                      Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  *
			
@@ -21,6 +21,11 @@
 
				 #ifndef __STARPU_FXT_H__
			
 
				 #define __STARPU_FXT_H__
			
 
				 
			
 
				+/** @defgroup API_FxT_Support FxT Support
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu_perfmodel.h>
			
 
				 
			
 
				 #ifdef __cplusplus
			
@@ -63,29 +68,87 @@ struct starpu_fxt_options
 
				 	char *anim_path;
			
 
				 	char *states_path;
			
 
				 
			
 
				+	/**
			
 
				+	   In case we are going to gather multiple traces (e.g in the case of
			
 
				+	   MPI processes), we may need to prefix the name of the containers.
			
 
				+	*/
			
 
				 	char *file_prefix;
			
 
				+	/**
			
 
				+	   In case we are going to gather multiple traces (e.g in the case of
			
 
				+	   MPI processes), we may need to prefix the name of the containers.
			
 
				+	*/
			
 
				 	uint64_t file_offset;
			
 
				+	/**
			
 
				+	   In case we are going to gather multiple traces (e.g in the case of
			
 
				+	   MPI processes), we may need to prefix the name of the containers.
			
 
				+	*/
			
 
				 	int file_rank;
			
 
				 
			
 
				+	/**
			
 
				+	   Output parameters
			
 
				+	*/
			
 
				 	char worker_names[STARPU_NMAXWORKERS][256];
			
 
				+	/**
			
 
				+	   Output parameters
			
 
				+	*/
			
 
				 	struct starpu_perfmodel_arch worker_archtypes[STARPU_NMAXWORKERS];
			
 
				+	/**
			
 
				+	   Output parameters
			
 
				+	*/
			
 
				 	int nworkers;
			
 
				 
			
 
				+	/**
			
 
				+	   In case we want to dump the list of codelets to an external tool
			
 
				+	*/
			
 
				 	struct starpu_fxt_codelet_event **dumped_codelets;
			
 
				+	/**
			
 
				+	   In case we want to dump the list of codelets to an external tool
			
 
				+	*/
			
 
				 	long dumped_codelets_count;
			
 
				 };
			
 
				 
			
 
				 void starpu_fxt_options_init(struct starpu_fxt_options *options);
			
 
				 void starpu_fxt_generate_trace(struct starpu_fxt_options *options);
			
 
				+
			
 
				+/**
			
 
				+   Determine whether profiling should be started by starpu_init(), or only when
			
 
				+   starpu_fxt_start_profiling() is called. \p autostart should be 1 to do so, or 0 to
			
 
				+   prevent it.
			
 
				+*/
			
 
				 void starpu_fxt_autostart_profiling(int autostart);
			
 
				+
			
 
				+/**
			
 
				+   Start recording the trace. The trace is by default started from
			
 
				+   starpu_init() call, but can be paused by using
			
 
				+   starpu_fxt_stop_profiling(), in which case
			
 
				+   starpu_fxt_start_profiling() should be called to resume recording
			
 
				+   events.
			
 
				+*/
			
 
				 void starpu_fxt_start_profiling(void);
			
 
				+
			
 
				+/**
			
 
				+   Stop recording the trace. The trace is by default stopped when calling
			
 
				+   starpu_shutdown(). starpu_fxt_stop_profiling() can however be used to
			
 
				+   stop it earlier. starpu_fxt_start_profiling() can then be called to
			
 
				+   start recording it again, etc.
			
 
				+*/
			
 
				 void starpu_fxt_stop_profiling(void);
			
 
				 void starpu_fxt_write_data_trace(char *filename_in);
			
 
				+
			
 
				+/**
			
 
				+   Add an event in the execution trace if FxT is enabled.
			
 
				+*/
			
 
				 void starpu_fxt_trace_user_event(unsigned long code);
			
 
				+
			
 
				+/**
			
 
				+   Add a string event in the execution trace if FxT is enabled.
			
 
				+*/
			
 
				 void starpu_fxt_trace_user_event_string(const char *s);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_FXT_H__ */
			
--- a/include/starpu_hash.h
+++ b/include/starpu_hash.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012                                     Inria
			
 
				- * Copyright (C) 2010,2012,2013,2015,2017                 CNRS
			
 
				+ * Copyright (C) 2010,2012,2013,2015,2017,2019                 CNRS
			
 
				  * Copyright (C) 2009-2014                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -19,6 +19,11 @@
 
				 #ifndef __STARPU_HASH_H__
			
 
				 #define __STARPU_HASH_H__
			
 
				 
			
 
				+/** @ingroup API_Data_Interfaces
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <stdint.h>
			
 
				 #include <stddef.h>
			
 
				 
			
@@ -27,14 +32,34 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+   Compute the CRC of a byte buffer seeded by the \p inputcrc
			
 
				+   <em>current state</em>. The return value should be considered as the new
			
 
				+   <em>current state</em> for future CRC computation. This is used for computing
			
 
				+   data size footprint.
			
 
				+*/
			
 
				 uint32_t starpu_hash_crc32c_be_n(const void *input, size_t n, uint32_t inputcrc);
			
 
				 
			
 
				+/**
			
 
				+   Compute the CRC of a 32bit number seeded by the \p inputcrc
			
 
				+   <em>current state</em>. The return value should be considered as the new
			
 
				+   <em>current state</em> for future CRC computation. This is used for computing
			
 
				+   data size footprint.
			
 
				+*/
			
 
				 uint32_t starpu_hash_crc32c_be(uint32_t input, uint32_t inputcrc);
			
 
				 
			
 
				+/**
			
 
				+   Compute the CRC of a string seeded by the \p inputcrc <em>current
			
 
				+   state</em>. The return value should be considered as the new <em>current
			
 
				+   state</em> for future CRC computation. This is used for computing data
			
 
				+   size footprint.
			
 
				+*/
			
 
				 uint32_t starpu_hash_crc32c_string(const char *str, uint32_t inputcrc);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_HASH_H__ */
			
--- a/include/starpu_mic.h
+++ b/include/starpu_mic.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2015,2017                                CNRS
			
 
				+ * Copyright (C) 2015,2017,2019                                CNRS
			
 
				  * Copyright (C) 2013                                     Université de Bordeaux
			
 
				  * Copyright (C) 2012                                     Inria
			
 
				  *
			
@@ -19,6 +19,11 @@
 
				 #ifndef __STARPU_MIC_H__
			
 
				 #define __STARPU_MIC_H__
			
 
				 
			
 
				+/** @defgroup API_MIC_Extensions MIC Extensions
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu_config.h>
			
 
				 
			
 
				 #ifdef STARPU_USE_MIC
			
@@ -28,10 +33,23 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+   Type for MIC function symbols
			
 
				+*/
			
 
				 typedef void *starpu_mic_func_symbol_t;
			
 
				 
			
 
				+/**
			
 
				+   Initiate a lookup on each MIC device to find the address of the
			
 
				+   function named \p func_name, store it in the global array kernels
			
 
				+   and return the index in the array through \p symbol.
			
 
				+*/
			
 
				 int starpu_mic_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name);
			
 
				 
			
 
				+/**
			
 
				+   If successfull, return the pointer to the function defined by \p symbol on
			
 
				+   the device linked to the called device. This can for instance be used
			
 
				+   in a starpu_mic_func_t implementation.
			
 
				+*/
			
 
				 starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol);
			
 
				 
			
 
				 #ifdef __cplusplus
			
@@ -39,4 +57,7 @@ starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol);
 
				 #endif
			
 
				 
			
 
				 #endif /* STARPU_USE_MIC */
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_MIC_H__ */
			
--- a/include/starpu_mpi_ms.h
+++ b/include/starpu_mpi_ms.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2016,2017                                Inria
			
 
				- * Copyright (C) 2017                                     CNRS
			
 
				+ * Copyright (C) 2017, 2019                                     CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,6 +18,11 @@
 
				 #ifndef __STARPU_MPI_MS_H__
			
 
				 #define __STARPU_MPI_MS_H__
			
 
				 
			
 
				+/** @defgroup API_Master_Slave Master Slave Extension
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu_config.h>
			
 
				 
			
 
				 #ifdef STARPU_USE_MPI_MASTER_SLAVE
			
@@ -38,4 +43,7 @@ starpu_mpi_ms_kernel_t starpu_mpi_ms_get_kernel(starpu_mpi_ms_func_symbol_t symb
 
				 #endif
			
 
				 
			
 
				 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_MPI_MS_H__ */
			
--- a/include/starpu_opencl.h
+++ b/include/starpu_opencl.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2010-2014,2018                           Université de Bordeaux
			
 
				  * Copyright (C) 2011,2012                                Inria
			
 
				- * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015-2017,2019                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,6 +19,11 @@
 
				 #ifndef __STARPU_OPENCL_H__
			
 
				 #define __STARPU_OPENCL_H__
			
 
				 
			
 
				+/** @defgroup API_OpenCL_Extensions OpenCL Extensions
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu_config.h>
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 #ifndef CL_TARGET_OPENCL_VERSION
			
@@ -36,65 +41,294 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				-const char *starpu_opencl_error_string(cl_int status);
			
 
				-void starpu_opencl_display_error(const char *func, const char *file, int line, const char *msg, cl_int status);
			
 
				-#define STARPU_OPENCL_DISPLAY_ERROR(status) \
			
 
				-	starpu_opencl_display_error(__starpu_func__, __FILE__, __LINE__, NULL, status)
			
 
				-
			
 
				-static __starpu_inline void starpu_opencl_report_error(const char *func, const char *file, int line, const char *msg, cl_int status)
			
 
				-{
			
 
				-	starpu_opencl_display_error(func, file, line, msg, status);
			
 
				-	assert(0);
			
 
				-}
			
 
				-#define STARPU_OPENCL_REPORT_ERROR(status)			\
			
 
				-	starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, NULL, status)
			
 
				-
			
 
				-#define STARPU_OPENCL_REPORT_ERROR_WITH_MSG(msg, status)			\
			
 
				-	starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, msg, status)
			
 
				-
			
 
				+/**
			
 
				+   Store the OpenCL programs as compiled for the different OpenCL
			
 
				+   devices.
			
 
				+*/
			
 
				 struct starpu_opencl_program
			
 
				 {
			
 
				+	/** Store each program for each OpenCL device. */
			
 
				 	cl_program programs[STARPU_MAXOPENCLDEVS];
			
 
				 };
			
 
				 
			
 
				+/** @name Writing OpenCL kernels
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Return the OpenCL context of the device designated by \p devid
			
 
				+   in \p context.
			
 
				+*/
			
 
				 void starpu_opencl_get_context(int devid, cl_context *context);
			
 
				+
			
 
				+/**
			
 
				+   Return the cl_device_id corresponding to \p devid in \p device.
			
 
				+*/
			
 
				 void starpu_opencl_get_device(int devid, cl_device_id *device);
			
 
				+
			
 
				+/**
			
 
				+   Return the command queue of the device designated by \p devid
			
 
				+   into \p queue.
			
 
				+*/
			
 
				 void starpu_opencl_get_queue(int devid, cl_command_queue *queue);
			
 
				+
			
 
				+/**
			
 
				+   Return the context of the current worker.
			
 
				+*/
			
 
				 void starpu_opencl_get_current_context(cl_context *context);
			
 
				+
			
 
				+/**
			
 
				+   Return the computation kernel command queue of the current
			
 
				+   worker.
			
 
				+*/
			
 
				 void starpu_opencl_get_current_queue(cl_command_queue *queue);
			
 
				 
			
 
				+/**
			
 
				+   Set the arguments of a given kernel. The list of arguments
			
 
				+   must be given as <c>(size_t size_of_the_argument, cl_mem *
			
 
				+   pointer_to_the_argument)</c>. The last argument must be 0. Return the
			
 
				+   number of arguments that were successfully set. In case of failure,
			
 
				+   return the id of the argument that could not be set and \p err is set to
			
 
				+   the error returned by OpenCL. Otherwise, return the number of
			
 
				+   arguments that were set.
			
 
				+
			
 
				+   Here an example:
			
 
				+   \code{.c}
			
 
				+   int n;
			
 
				+   cl_int err;
			
 
				+   cl_kernel kernel;
			
 
				+   n = starpu_opencl_set_kernel_args(&err, 2, &kernel, sizeof(foo), &foo, sizeof(bar), &bar, 0);
			
 
				+   if (n != 2) fprintf(stderr, "Error : %d\n", err);
			
 
				+   \endcode
			
 
				+*/
			
 
				+int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/** @name Compiling OpenCL kernels
			
 
				+    Source codes for OpenCL kernels can be stored in a file or in a
			
 
				+    string. StarPU provides functions to build the program executable for
			
 
				+    each available OpenCL device as a cl_program object. This program
			
 
				+    executable can then be loaded within a specific queue as explained in
			
 
				+    the next section. These are only helpers, Applications can also fill a
			
 
				+    starpu_opencl_program array by hand for more advanced use (e.g.
			
 
				+    different programs on the different OpenCL devices, for relocation
			
 
				+    purpose for instance).
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Store the contents of the file \p source_file_name in the buffer
			
 
				+   \p opencl_program_source. The file \p source_file_name can be located in the
			
 
				+   current directory, or in the directory specified by the environment
			
 
				+   variable \ref STARPU_OPENCL_PROGRAM_DIR, or
			
 
				+   in the directory <c>share/starpu/opencl</c> of the installation
			
 
				+   directory of StarPU, or in the source directory of StarPU. When the
			
 
				+   file is found, \p located_file_name is the full name of the file as it
			
 
				+   has been located on the system, \p located_dir_name the directory
			
 
				+   where it has been located. Otherwise, they are both set to the empty
			
 
				+   string.
			
 
				+*/
			
 
				 void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source);
			
 
				+
			
 
				+/**
			
 
				+   Similar to function starpu_opencl_load_program_source() but
			
 
				+   allocate the buffers \p located_file_name, \p located_dir_name and
			
 
				+   \p opencl_program_source.
			
 
				+*/
			
 
				 void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source);
			
 
				+
			
 
				+/**
			
 
				+   Compile the OpenCL kernel stored in the file \p source_file_name
			
 
				+   with the given options \p build_options and store the result in the
			
 
				+   directory <c>$STARPU_HOME/.starpu/opencl</c> with the same filename as
			
 
				+   \p source_file_name. The compilation is done for every OpenCL device,
			
 
				+   and the filename is suffixed with the vendor id and the device id of
			
 
				+   the OpenCL device.
			
 
				+*/
			
 
				 int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char *build_options);
			
 
				+
			
 
				+/**
			
 
				+   Compile the OpenCL kernel in the string \p opencl_program_source
			
 
				+   with the given options \p build_options and store the result in the
			
 
				+   directory <c>$STARPU_HOME/.starpu/opencl</c> with the filename \p
			
 
				+   file_name. The compilation is done for every OpenCL device, and the
			
 
				+   filename is suffixed with the vendor id and the device id of the
			
 
				+   OpenCL device.
			
 
				+*/
			
 
				 int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char *build_options);
			
 
				 
			
 
				+/**
			
 
				+   Compile the binary OpenCL kernel identified with \p kernel_id.
			
 
				+   For every OpenCL device, the binary OpenCL kernel will be loaded from
			
 
				+   the file
			
 
				+   <c>$STARPU_HOME/.starpu/opencl/\<kernel_id\>.\<device_type\>.vendor_id_\<vendor_id\>_device_id_\<device_id\></c>.
			
 
				+*/
			
 
				 int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs);
			
 
				 
			
 
				+/**
			
 
				+   Compile an OpenCL source code stored in a file.
			
 
				+*/
			
 
				 int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char *build_options);
			
 
				+/**
			
 
				+   Compile an OpenCL source code stored in a string.
			
 
				+ */
			
 
				 int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char *build_options);
			
 
				+
			
 
				+/**
			
 
				+   Unload an OpenCL compiled code.
			
 
				+*/
			
 
				 int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/** @name Loading OpenCL kernels
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Create a kernel \p kernel for device \p devid, on its computation
			
 
				+   command queue returned in \p queue, using program \p opencl_programs
			
 
				+   and name \p kernel_name.
			
 
				+*/
			
 
				 int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid);
			
 
				+
			
 
				+/**
			
 
				+   Release the given \p kernel, to be called after kernel execution.
			
 
				+*/
			
 
				 int starpu_opencl_release_kernel(cl_kernel kernel);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/** @name OpenCL Statistics
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Collect statistics on a kernel execution.
			
 
				+   After termination of the kernels, the OpenCL codelet should call this
			
 
				+   function with the event returned by \c clEnqueueNDRangeKernel(), to
			
 
				+   let StarPU collect statistics about the kernel execution (used cycles,
			
 
				+   consumed energy).
			
 
				+*/
			
 
				 int starpu_opencl_collect_stats(cl_event event);
			
 
				 
			
 
				-int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...);
			
 
				+/** @} */
			
 
				+
			
 
				+/** @name OpenCL Utilities
			
 
				+    @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Return the error message in English corresponding to \p status, an OpenCL
			
 
				+   error code.
			
 
				+*/
			
 
				+const char *starpu_opencl_error_string(cl_int status);
			
 
				+
			
 
				+/**
			
 
				+   Given a valid error status, print the corresponding error message on
			
 
				+   \c stdout, along with the function name \p func, the filename
			
 
				+   \p file, the line number \p line and the message \p msg.
			
 
				+*/
			
 
				+void starpu_opencl_display_error(const char *func, const char *file, int line, const char *msg, cl_int status);
			
 
				+
			
 
				+/**
			
 
				+   Call the function starpu_opencl_display_error() with the error
			
 
				+   \p status, the current function name, current file and line number,
			
 
				+   and a empty message.
			
 
				+*/
			
 
				+#define STARPU_OPENCL_DISPLAY_ERROR(status) starpu_opencl_display_error(__starpu_func__, __FILE__, __LINE__, NULL, status)
			
 
				+
			
 
				+/**
			
 
				+   Call the function starpu_opencl_display_error() and abort.
			
 
				+*/
			
 
				+static __starpu_inline void starpu_opencl_report_error(const char *func, const char *file, int line, const char *msg, cl_int status)
			
 
				+{
			
 
				+	starpu_opencl_display_error(func, file, line, msg, status);
			
 
				+	assert(0);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+   Call the function starpu_opencl_report_error() with the error \p
			
 
				+   status, the current function name, current file and line number,
			
 
				+   and a empty message.
			
 
				+*/
			
 
				+#define STARPU_OPENCL_REPORT_ERROR(status) starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, NULL, status)
			
 
				 
			
 
				+/**
			
 
				+   Call the function starpu_opencl_report_error() with \p msg
			
 
				+   and \p status, the current function name, current file and line number.
			
 
				+*/
			
 
				+#define STARPU_OPENCL_REPORT_ERROR_WITH_MSG(msg, status) starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, msg, status)
			
 
				+
			
 
				+/**
			
 
				+   Allocate \p size bytes of memory, stored in \p addr. \p flags must be a valid
			
 
				+   combination of \c cl_mem_flags values.
			
 
				+*/
			
 
				 cl_int starpu_opencl_allocate_memory(int devid, cl_mem *addr, size_t size, cl_mem_flags flags);
			
 
				 
			
 
				+/**
			
 
				+   Copy \p size bytes from the given \p ptr on RAM \p src_node to the
			
 
				+   given \p buffer on OpenCL \p dst_node. \p offset is the offset, in
			
 
				+   bytes, in \p buffer. if \p event is <c>NULL</c>, the copy is
			
 
				+   synchronous, i.e the queue is synchronised before returning. If not
			
 
				+   <c>NULL</c>, \p event can be used after the call to wait for this
			
 
				+   particular copy to complete. This function returns <c>CL_SUCCESS</c>
			
 
				+   if the copy was successful, or a valid OpenCL error code otherwise.
			
 
				+   The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
			
 
				+   asynchronous launch was successful, or to 0 if \p event was
			
 
				+   <c>NULL</c>.
			
 
				+*/
			
 
				 cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
			
 
				 
			
 
				+/**
			
 
				+   Copy \p size bytes asynchronously from the given \p buffer on OpenCL
			
 
				+   \p src_node to the given \p ptr on RAM \p dst_node. \p offset is the
			
 
				+   offset, in bytes, in \p buffer. if \p event is <c>NULL</c>, the copy
			
 
				+   is synchronous, i.e the queue is synchronised before returning. If not
			
 
				+   <c>NULL</c>, \p event can be used after the call to wait for this
			
 
				+   particular copy to complete. This function returns <c>CL_SUCCESS</c>
			
 
				+   if the copy was successful, or a valid OpenCL error code otherwise.
			
 
				+   The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
			
 
				+   asynchronous launch was successful, or to 0 if \p event was
			
 
				+   <c>NULL</c>.
			
 
				+*/
			
 
				 cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
			
 
				 
			
 
				+/**
			
 
				+   Copy \p size bytes asynchronously from byte offset \p src_offset of \p
			
 
				+   src on OpenCL \p src_node to byte offset \p dst_offset of \p dst on
			
 
				+   OpenCL \p dst_node. if \p event is <c>NULL</c>, the copy is
			
 
				+   synchronous, i.e. the queue is synchronised before returning. If not
			
 
				+   <c>NULL</c>, \p event can be used after the call to wait for this
			
 
				+   particular copy to complete. This function returns <c>CL_SUCCESS</c>
			
 
				+   if the copy was successful, or a valid OpenCL error code otherwise.
			
 
				+   The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
			
 
				+   asynchronous launch was successful, or to 0 if \p event was
			
 
				+   <c>NULL</c>.
			
 
				+*/
			
 
				 cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, size_t src_offset, cl_mem dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event, int *ret);
			
 
				 
			
 
				+/**
			
 
				+   Copy \p size bytes from byte offset \p src_offset of \p src on \p
			
 
				+   src_node to byte offset \p dst_offset of \p dst on \p dst_node. if \p
			
 
				+   event is <c>NULL</c>, the copy is synchronous, i.e. the queue is
			
 
				+   synchronised before returning. If not <c>NULL</c>, \p event can be
			
 
				+   used after the call to wait for this particular copy to complete. The
			
 
				+   function returns <c>-EAGAIN</c> if the asynchronous launch was
			
 
				+   successfull. It returns 0 if the synchronous copy was successful, or
			
 
				+   fails otherwise.
			
 
				+*/
			
 
				 cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				 #endif /* STARPU_USE_OPENCL */
			
 
				-#endif /* __STARPU_OPENCL_H__ */
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+#endif /* __STARPU_OPENCL_H__ */
			
--- a/include/starpu_openmp.h
+++ b/include/starpu_openmp.h
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2014,2016                           Inria
			
 
				  * Copyright (C) 2009-2018                                Université de Bordeaux
			
 
				- * Copyright (C) 2010-2017                                CNRS
			
 
				+ * Copyright (C) 2010-2017, 2019                                CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  * Copyright (C) 2011                                     Télécom-SudParis
			
 
				  *
			
@@ -21,13 +21,14 @@
 
				 #ifndef __STARPU_PERFMODEL_H__
			
 
				 #define __STARPU_PERFMODEL_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 #include <stdio.h>
			
 
				 
			
 
				-#include <starpu_util.h>
			
 
				-#include <starpu_worker.h>
			
 
				-#include <starpu_task.h>
			
 
				-
			
 
				 #ifdef __cplusplus
			
 
				 extern "C"
			
 
				 {
			
@@ -205,8 +206,18 @@ double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size
 
				 
			
 
				 extern struct starpu_perfmodel starpu_perfmodel_nop;
			
 
				 
			
 
				+/**
			
 
				+   Display statistics about the current data handles registered
			
 
				+   within StarPU. StarPU must have been configured with the configure
			
 
				+   option \ref enable-memory-stats "--enable-memory-stats" (see \ref
			
 
				+   MemoryFeedback).
			
 
				+*/
			
 
				+void starpu_data_display_memory_stats();
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_PERFMODEL_H__ */
			
--- a/include/starpu_profiling.h
+++ b/include/starpu_profiling.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2014,2016,2017                      Université de Bordeaux
			
 
				- * Copyright (C) 2010,2011,2013,2015,2017                 CNRS
			
 
				+ * Copyright (C) 2010,2011,2013,2015,2017,2019                 CNRS
			
 
				  * Copyright (C) 2016                                     Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -19,10 +19,14 @@
 
				 #ifndef __STARPU_PROFILING_H__
			
 
				 #define __STARPU_PROFILING_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 #include <errno.h>
			
 
				 #include <time.h>
			
 
				-#include <starpu_util.h>
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 extern "C"
			
@@ -183,4 +187,6 @@ void starpu_profiling_worker_helper_display_summary(void);
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_PROFILING_H__ */
			
--- a/include/starpu_rand.h
+++ b/include/starpu_rand.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2012,2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2012,2013,2015,2017,2019                      CNRS
			
 
				  * Copyright (C) 2012,2015,2016                           Université de Bordeaux
			
 
				  * Copyright (C) 2012                                     Inria
			
 
				  *
			
@@ -19,6 +19,11 @@
 
				 #ifndef __STARPU_RAND_H__
			
 
				 #define __STARPU_RAND_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <stdlib.h>
			
 
				 #include <starpu_config.h>
			
 
				 
			
@@ -66,4 +71,6 @@ typedef int starpu_drand48_data;
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_RAND_H__ */
			
--- a/include/starpu_scc.h
+++ b/include/starpu_scc.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2015,2017                                CNRS
			
 
				+ * Copyright (C) 2015,2017,2019                                CNRS
			
 
				  * Copyright (C) 2013                                     Université de Bordeaux
			
 
				  * Copyright (C) 2012                                     Inria
			
 
				  *
			
@@ -16,12 +16,15 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-
			
 
				 #ifndef __STARPU_SCC_H__
			
 
				 #define __STARPU_SCC_H__
			
 
				 
			
 
				-#include <starpu_config.h>
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				 
			
 
				+#include <starpu_config.h>
			
 
				 
			
 
				 #ifdef STARPU_USE_SCC
			
 
				 
			
@@ -33,5 +36,6 @@ starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t symbol);
 
				 
			
 
				 #endif /* STARPU_USE_SCC */
			
 
				 
			
 
				+/** @} */
			
 
				 
			
 
				 #endif /* __STARPU_SCC_H__ */
			
--- a/include/starpu_sched_component.h
+++ b/include/starpu_sched_component.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2017                                     Arthur Chevalier
			
 
				  * Copyright (C) 2013,2014,2017                           Inria
			
 
				- * Copyright (C) 2014,2015,2017                           CNRS
			
 
				+ * Copyright (C) 2014,2015,2017,2019                           CNRS
			
 
				  * Copyright (C) 2014-2019                                Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Simon Archipoff
			
 
				  *
			
@@ -21,6 +21,11 @@
 
				 #ifndef __STARPU_SCHED_COMPONENT_H__
			
 
				 #define __STARPU_SCHED_COMPONENT_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
@@ -246,4 +251,6 @@ while(0)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_SCHED_COMPONENT_H__ */
			
--- a/include/starpu_sched_ctx.h
+++ b/include/starpu_sched_ctx.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2010,2012-2017                           Inria
			
 
				  * Copyright (C) 2017                                     Arthur Chevalier
			
 
				- * Copyright (C) 2012-2014,2017                           CNRS
			
 
				+ * Copyright (C) 2012-2014,2017,2019                           CNRS
			
 
				  * Copyright (C) 2012,2014,2016                           Université de Bordeaux
			
 
				  * Copyright (C) 2016                                     Uppsala University
			
 
				  *
			
@@ -21,6 +21,11 @@
 
				 #ifndef __STARPU_SCHED_CTX_H__
			
 
				 #define __STARPU_SCHED_CTX_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #ifdef __cplusplus
			
@@ -184,4 +189,6 @@ void starpu_sched_ctx_get_sms_interval(int stream_workerid, int *start, int *end
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_SCHED_CTX_H__ */
			
--- a/include/starpu_sched_ctx_hypervisor.h
+++ b/include/starpu_sched_ctx_hypervisor.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010,2012,2013,2015                      Inria
			
 
				- * Copyright (C) 2013,2017                                CNRS
			
 
				+ * Copyright (C) 2013,2017,2019                                CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,13 +18,16 @@
 
				 #ifndef __STARPU_SCHED_CTX_HYPERVISOR_H__
			
 
				 #define __STARPU_SCHED_CTX_HYPERVISOR_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 extern "C"
			
 
				 {
			
 
				 #endif
			
 
				 
			
 
				-
			
 
				-
			
 
				 struct starpu_sched_ctx_performance_counters
			
 
				 {
			
 
				 	void (*notify_idle_cycle)(unsigned sched_ctx_id, int worker, double idle_time);
			
@@ -50,4 +53,6 @@ void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_SCHED_CTX_HYPERVISOR_H__ */
			
--- a/include/starpu_scheduler.h
+++ b/include/starpu_scheduler.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011-2013,2015-2017                      Inria
			
 
				  * Copyright (C) 2010-2018                                Université de Bordeaux
			
 
				- * Copyright (C) 2011-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2011-2013,2015,2017,2019                      CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  * Copyright (C) 2011                                     Télécom-SudParis
			
 
				  * Copyright (C) 2016                                     Uppsala University
			
@@ -22,6 +22,11 @@
 
				 #ifndef __STARPU_SCHEDULER_H__
			
 
				 #define __STARPU_SCHEDULER_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #ifdef __cplusplus
			
@@ -115,4 +120,6 @@ void starpu_sched_task_break(struct starpu_task *task);
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_SCHEDULER_H__ */
			
--- a/include/starpu_sink.h
+++ b/include/starpu_sink.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2015,2017                                CNRS
			
 
				+ * Copyright (C) 2015,2017,2019                                CNRS
			
 
				  * Copyright (C) 2013                                     Université de Bordeaux
			
 
				  * Copyright (C) 2012                                     Inria
			
 
				  *
			
@@ -16,10 +16,16 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-
			
 
				 #ifndef __STARPU_SINK_H__
			
 
				 #define __STARPU_SINK_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 void starpu_sink_common_worker(int argc, char **argv);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_SINK_H__ */
			
--- a/include/starpu_stdlib.h
+++ b/include/starpu_stdlib.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015-2017,2019                      CNRS
			
 
				  * Copyright (C) 2017                                     Inria
			
 
				  * Copyright (C) 2010-2016,2019                           Université de Bordeaux
			
 
				  *
			
@@ -19,6 +19,11 @@
 
				 #ifndef __STARPU_STDLIB_H__
			
 
				 #define __STARPU_STDLIB_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #ifdef __cplusplus
			
@@ -35,6 +40,14 @@ extern "C"
 
				 
			
 
				 #define STARPU_MALLOC_SIMULATION_FOLDED	((1ULL)<<6)
			
 
				 
			
 
				+/**
			
 
				+   \deprecated
			
 
				+   Equivalent to starpu_malloc(). This macro is provided to avoid
			
 
				+   breaking old codes.
			
 
				+*/
			
 
				+#define starpu_data_malloc_pinned_if_possible	starpu_malloc
			
 
				+#define starpu_data_free_pinned_if_possible	starpu_free
			
 
				+
			
 
				 void starpu_malloc_set_align(size_t align);
			
 
				 
			
 
				 int starpu_malloc(void **A, size_t dim);
			
@@ -65,4 +78,6 @@ void starpu_sleep(float nb_sec);
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_STDLIB_H__ */
			
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -21,10 +21,12 @@
 
				 #ifndef __STARPU_TASK_H__
			
 
				 #define __STARPU_TASK_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				-#include <starpu_data.h>
			
 
				-#include <starpu_util.h>
			
 
				-#include <starpu_task_bundle.h>
			
 
				 #include <errno.h>
			
 
				 #include <assert.h>
			
 
				 
			
@@ -380,4 +382,6 @@ unsigned starpu_task_get_implementation(struct starpu_task *task);
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_TASK_H__ */
			
--- a/include/starpu_task_bundle.h
+++ b/include/starpu_task_bundle.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2010,2011,2014                           Université de Bordeaux
			
 
				  * Copyright (C) 2011,2012                                Inria
			
 
				- * Copyright (C) 2011-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2011-2013,2015,2017,2019                      CNRS
			
 
				  * Copyright (C) 2011                                     Télécom-SudParis
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -20,6 +20,11 @@
 
				 #ifndef __STARPU_TASK_BUNDLE_H__
			
 
				 #define __STARPU_TASK_BUNDLE_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 extern "C"
			
 
				 {
			
@@ -41,4 +46,6 @@ void starpu_task_bundle_close(starpu_task_bundle_t bundle);
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_TASK_BUNDLE_H__ */
			
--- a/include/starpu_task_list.h
+++ b/include/starpu_task_list.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2012,2014,2016,2017                 Université de Bordeaux
			
 
				- * Copyright (C) 2011-2014,2017,2018                      CNRS
			
 
				+ * Copyright (C) 2011-2014,2017,2018,2019                      CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -18,6 +18,11 @@
 
				 #ifndef __STARPU_TASK_LIST_H__
			
 
				 #define __STARPU_TASK_LIST_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu_task.h>
			
 
				 #include <starpu_util.h>
			
 
				 
			
@@ -84,4 +89,6 @@ void starpu_task_list_move(struct starpu_task_list *ldst, struct starpu_task_lis
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_TASK_LIST_H__ */
			
--- a/include/starpu_task_util.h
+++ b/include/starpu_task_util.h
@@ -19,6 +19,11 @@
 
				 #ifndef __STARPU_TASK_UTIL_H__
			
 
				 #define __STARPU_TASK_UTIL_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
 
				 #include <string.h>
			
@@ -103,4 +108,6 @@ void starpu_codelet_unpack_args_and_copyleft(void *cl_arg, void *buffer, size_t
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_TASK_UTIL_H__ */
			
--- a/include/starpu_thread.h
+++ b/include/starpu_thread.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2013,2015-2017                           Inria
			
 
				- * Copyright (C) 2010-2015,2017                           CNRS
			
 
				+ * Copyright (C) 2010-2015,2017,2019                           CNRS
			
 
				  * Copyright (C) 2010,2012-2019                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -19,6 +19,11 @@
 
				 #ifndef __STARPU_THREAD_H__
			
 
				 #define __STARPU_THREAD_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu_config.h>
			
 
				 #include <starpu_util.h>
			
 
				 #ifdef STARPU_SIMGRID
			
@@ -441,6 +446,6 @@ int starpu_sem_wait(starpu_sem_t *);
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-#endif /* __STARPU_THREAD_H__ */
			
 
				-
			
 
				+/** @} */
			
 
				 
			
 
				+#endif /* __STARPU_THREAD_H__ */
			
--- a/include/starpu_thread_util.h
+++ b/include/starpu_thread_util.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012,2013                                Inria
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2019                      CNRS
			
 
				  * Copyright (C) 2010-2014,2016,2017                      Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -19,6 +19,11 @@
 
				 #ifndef __STARPU_THREAD_UTIL_H__
			
 
				 #define __STARPU_THREAD_UTIL_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu_util.h>
			
 
				 #include <starpu_thread.h>
			
 
				 #include <errno.h>
			
@@ -383,4 +388,6 @@ int _starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_m
 
				 } while (0)
			
 
				 #endif /* _MSC_VER */
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_THREAD_UTIL_H__ */
			
--- a/include/starpu_top.h
+++ b/include/starpu_top.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011                                     Inria
			
 
				- * Copyright (C) 2011-2013,2017                           CNRS
			
 
				+ * Copyright (C) 2011-2013,2017,2019                           CNRS
			
 
				  * Copyright (C) 2011-2013                                Université de Bordeaux
			
 
				  * Copyright (C) 2011                                     William Braik, Yann Courtois, Jean-Marie Couteyen, Anthony
			
 
				  *
			
@@ -20,6 +20,11 @@
 
				 #ifndef __STARPU_TOP_H__
			
 
				 #define __STARPU_TOP_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 #include <stdlib.h>
			
 
				 #include <time.h>
			
@@ -109,5 +114,6 @@ void starpu_top_debug_lock(const char *message);
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-#endif /* __STARPU_TOP_H__ */
			
 
				+/** @} */
			
 
				 
			
 
				+#endif /* __STARPU_TOP_H__ */
			
--- a/include/starpu_tree.h
+++ b/include/starpu_tree.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2014                                     Inria
			
 
				- * Copyright (C) 2010-2015,2017                           CNRS
			
 
				+ * Copyright (C) 2010-2015,2017,2019                           CNRS
			
 
				  * Copyright (C) 2009-2014,2016                           Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -19,6 +19,11 @@
 
				 #ifndef __STARPU_TREE_H__
			
 
				 #define __STARPU_TREE_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 extern "C"
			
 
				 {
			
@@ -49,4 +54,6 @@ void starpu_tree_free(struct starpu_tree *tree);
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_TREE_H__ */
			
--- a/include/starpu_util.h
+++ b/include/starpu_util.h
@@ -19,6 +19,11 @@
 
				 #ifndef __STARPU_UTIL_H__
			
 
				 #define __STARPU_UTIL_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
 
				 #include <stdint.h>
			
@@ -486,4 +491,6 @@ struct timespec
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #endif /* __STARPU_UTIL_H__ */
			
--- a/include/starpu_worker.h
+++ b/include/starpu_worker.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2013-2017                                Inria
			
 
				- * Copyright (C) 2010-2015,2017                           CNRS
			
 
				+ * Copyright (C) 2010-2015,2017,2019                           CNRS
			
 
				  * Copyright (C) 2009-2014,2016,2017,2019                 Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  * Copyright (C) 2016                                     Uppsala University
			
@@ -21,6 +21,11 @@
 
				 #ifndef __STARPU_WORKER_H__
			
 
				 #define __STARPU_WORKER_H__
			
 
				 
			
 
				+/** @defgroup
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				 #include <stdlib.h>
			
 
				 #include <starpu_config.h>
			
 
				 #include <starpu_thread.h>
			
@@ -35,6 +40,20 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+enum starpu_node_kind
			
 
				+{
			
 
				+	STARPU_UNUSED     = 0x00,
			
 
				+	STARPU_CPU_RAM    = 0x01,
			
 
				+	STARPU_CUDA_RAM   = 0x02,
			
 
				+	STARPU_OPENCL_RAM = 0x03,
			
 
				+	STARPU_DISK_RAM   = 0x04,
			
 
				+	STARPU_MIC_RAM    = 0x05,
			
 
				+	STARPU_SCC_RAM    = 0x06,
			
 
				+	STARPU_SCC_SHM    = 0x07,
			
 
				+	STARPU_MPI_MS_RAM = 0x08
			
 
				+
			
 
				+};
			
 
				+
			
 
				 enum starpu_worker_archtype
			
 
				 {
			
 
				 	STARPU_CPU_WORKER,
			
@@ -52,7 +71,7 @@ struct starpu_sched_ctx_iterator
 
				 	void *value;
			
 
				 	void *possible_value;
			
 
				 	char visited[STARPU_NMAXWORKERS];
			
 
				-	int possibly_parallel; 
			
 
				+	int possibly_parallel;
			
 
				 };
			
 
				 
			
 
				 enum starpu_worker_collection_type
			
@@ -178,9 +197,19 @@ hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid);
 
				 hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid);
			
 
				 #endif
			
 
				 
			
 
				+unsigned starpu_worker_get_memory_node(unsigned workerid);
			
 
				+unsigned starpu_memory_nodes_get_count(void);
			
 
				+int starpu_memory_node_get_name(unsigned node, char *name, size_t size);
			
 
				+int starpu_memory_nodes_get_numa_count(void);
			
 
				+int starpu_memory_nodes_numa_id_to_devid(int osid);
			
 
				+int starpu_memory_nodes_numa_devid_to_id(unsigned id);
			
 
				+
			
 
				+enum starpu_node_kind starpu_node_get_kind(unsigned node);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-#endif /* __STARPU_WORKER_H__ */
			
 
				+/** @} */
			
 
				 
			
 
				+#endif /* __STARPU_WORKER_H__ */