Browse Source

doc: start putting api documentation in public .h files instead of
doxygen files (it will be easier to ensure the documentation is
updated along with the code)

Nathalie Furmento 6 years ago
parent
commit
af93366acd
57 changed files with 4084 additions and 4586 deletions
  1. 0 13
      doc/doxygen/Makefile.am
  2. 0 81
      doc/doxygen/chapters/api/bitmap.doxy
  3. 0 103
      doc/doxygen/chapters/api/clustering_machine.doxy
  4. 0 125
      doc/doxygen/chapters/api/cuda_extensions.doxy
  5. 0 1270
      doc/doxygen/chapters/api/data_interfaces.doxy
  6. 0 443
      doc/doxygen/chapters/api/data_management.doxy
  7. 0 154
      doc/doxygen/chapters/api/data_out_of_core.doxy
  8. 0 416
      doc/doxygen/chapters/api/data_partition.doxy
  9. 0 35
      doc/doxygen/chapters/api/expert_mode.doxy
  10. 0 120
      doc/doxygen/chapters/api/fxt_support.doxy
  11. 0 56
      doc/doxygen/chapters/api/implicit_dependencies.doxy
  12. 0 60
      doc/doxygen/chapters/api/lower_bound.doxy
  13. 2 18
      doc/doxygen/chapters/api/mic_extensions.doxy
  14. 0 92
      doc/doxygen/chapters/api/multiformat_data_interface.doxy
  15. 2 234
      doc/doxygen/chapters/api/opencl_extensions.doxy
  16. 2 956
      doc/doxygen/chapters/api/openmp_runtime_support.doxy
  17. 0 59
      doc/doxygen/chapters/api/running_driver.doxy
  18. 0 2
      doc/doxygen/refman.tex
  19. 24 1
      include/starpu_bitmap.h
  20. 30 1
      include/starpu_bound.h
  21. 15 5
      include/starpu_clusters.h
  22. 28 1
      include/starpu_cublas.h
  23. 13 1
      include/starpu_cublas_v2.h
  24. 43 6
      include/starpu_cuda.h
  25. 23 1
      include/starpu_cusparse.h
  26. 429 45
      include/starpu_data.h
  27. 422 9
      include/starpu_data_filters.h
  28. 1306 152
      include/starpu_data_interfaces.h
  29. 168 26
      include/starpu_disk.h
  30. 44 1
      include/starpu_driver.h
  31. 18 2
      include/starpu_expert.h
  32. 64 1
      include/starpu_fxt.h
  33. 26 1
      include/starpu_hash.h
  34. 22 1
      include/starpu_mic.h
  35. 9 1
      include/starpu_mpi_ms.h
  36. 253 19
      include/starpu_opencl.h
  37. 945 40
      include/starpu_openmp.h
  38. 16 5
      include/starpu_perfmodel.h
  39. 8 2
      include/starpu_profiling.h
  40. 8 1
      include/starpu_rand.h
  41. 7 3
      include/starpu_scc.h
  42. 8 1
      include/starpu_sched_component.h
  43. 8 1
      include/starpu_sched_ctx.h
  44. 8 3
      include/starpu_sched_ctx_hypervisor.h
  45. 8 1
      include/starpu_scheduler.h
  46. 8 2
      include/starpu_sink.h
  47. 16 1
      include/starpu_stdlib.h
  48. 7 3
      include/starpu_task.h
  49. 8 1
      include/starpu_task_bundle.h
  50. 8 1
      include/starpu_task_list.h
  51. 7 0
      include/starpu_task_util.h
  52. 8 3
      include/starpu_thread.h
  53. 8 1
      include/starpu_thread_util.h
  54. 8 2
      include/starpu_top.h
  55. 8 1
      include/starpu_tree.h
  56. 7 0
      include/starpu_util.h
  57. 32 3
      include/starpu_worker.h

+ 0 - 13
doc/doxygen/Makefile.am

@@ -105,22 +105,12 @@ chapters =	\
 	chapters/code/disk_compute.c \
 	chapters/code/disk_compute.c \
 	chapters/code/nf_initexit.f90 \
 	chapters/code/nf_initexit.f90 \
 	chapters/api/codelet_and_tasks.doxy \
 	chapters/api/codelet_and_tasks.doxy \
-	chapters/api/cuda_extensions.doxy \
-	chapters/api/data_interfaces.doxy \
-	chapters/api/data_management.doxy \
-	chapters/api/data_partition.doxy \
-	chapters/api/data_out_of_core.doxy \
-	chapters/api/expert_mode.doxy \
 	chapters/api/explicit_dependencies.doxy \
 	chapters/api/explicit_dependencies.doxy \
 	chapters/api/fft_support.doxy \
 	chapters/api/fft_support.doxy \
-	chapters/api/fxt_support.doxy \
-	chapters/api/implicit_dependencies.doxy \
 	chapters/api/initialization.doxy \
 	chapters/api/initialization.doxy \
 	chapters/api/insert_task.doxy \
 	chapters/api/insert_task.doxy \
-	chapters/api/lower_bound.doxy \
 	chapters/api/misc_helpers.doxy \
 	chapters/api/misc_helpers.doxy \
 	chapters/api/mpi.doxy \
 	chapters/api/mpi.doxy \
-	chapters/api/multiformat_data_interface.doxy \
 	chapters/api/opencl_extensions.doxy \
 	chapters/api/opencl_extensions.doxy \
 	chapters/api/openmp_runtime_support.doxy \
 	chapters/api/openmp_runtime_support.doxy \
 	chapters/api/mic_extensions.doxy \
 	chapters/api/mic_extensions.doxy \
@@ -128,7 +118,6 @@ chapters =	\
 	chapters/api/parallel_tasks.doxy \
 	chapters/api/parallel_tasks.doxy \
 	chapters/api/performance_model.doxy \
 	chapters/api/performance_model.doxy \
 	chapters/api/profiling.doxy \
 	chapters/api/profiling.doxy \
-	chapters/api/running_driver.doxy \
 	chapters/api/scheduling_contexts.doxy \
 	chapters/api/scheduling_contexts.doxy \
 	chapters/api/scheduling_policy.doxy \
 	chapters/api/scheduling_policy.doxy \
 	chapters/api/standard_memory_library.doxy \
 	chapters/api/standard_memory_library.doxy \
@@ -138,13 +127,11 @@ chapters =	\
 	chapters/api/versioning.doxy \
 	chapters/api/versioning.doxy \
 	chapters/api/workers.doxy \
 	chapters/api/workers.doxy \
 	chapters/api/threads.doxy \
 	chapters/api/threads.doxy \
-	chapters/api/bitmap.doxy \
 	chapters/api/tree.doxy \
 	chapters/api/tree.doxy \
 	chapters/api/toolbox.doxy \
 	chapters/api/toolbox.doxy \
 	chapters/api/sc_hypervisor/sc_hypervisor.doxy \
 	chapters/api/sc_hypervisor/sc_hypervisor.doxy \
 	chapters/api/sc_hypervisor/sc_hypervisor_usage.doxy \
 	chapters/api/sc_hypervisor/sc_hypervisor_usage.doxy \
 	chapters/api/modularized_scheduler.doxy \
 	chapters/api/modularized_scheduler.doxy \
-	chapters/api/clustering_machine.doxy \
 	chapters/api/interoperability.doxy
 	chapters/api/interoperability.doxy
 
 
 images = 	\
 images = 	\

+ 0 - 81
doc/doxygen/chapters/api/bitmap.doxy

@@ -1,81 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2014,2015,2017                           CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Bitmap  Bitmap
-
-\brief This section describes the bitmap facilities provided by StarPU.
-
-\struct starpu_bitmap
-\ingroup API_Bitmap
-todo
-
-\fn struct starpu_bitmap *starpu_bitmap_create(void)
-\ingroup API_Bitmap
-create a empty starpu_bitmap
-
-\fn void starpu_bitmap_destroy(struct starpu_bitmap *b)
-\ingroup API_Bitmap
-free \b
-
-\fn void starpu_bitmap_set(struct starpu_bitmap *b, int e)
-\ingroup API_Bitmap
-set bit \p e in \p b
-
-\fn void starpu_bitmap_unset(struct starpu_bitmap *b, int e)
-\ingroup API_Bitmap
-unset bit \p e in \p b
-
-\fn void starpu_bitmap_unset_all(struct starpu_bitmap *b)
-\ingroup API_Bitmap
-unset all bits in \p b
-
-\fn int starpu_bitmap_get(struct starpu_bitmap *b, int e)
-\ingroup API_Bitmap
-return true iff bit \p e is set in \p b
-
-\fn void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c)
-\ingroup API_Bitmap
-Basically compute \c starpu_bitmap_unset_all(\p a) ; \p a = \p b & \p c;
-
-\fn void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b)
-\ingroup API_Bitmap
-Basically compute \p a |= \p b
-
-\fn int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e)
-\ingroup API_Bitmap
-return 1 iff \p e is set in \p b1 AND \p e is set in \p b2
-
-\fn int starpu_bitmap_cardinal(struct starpu_bitmap *b)
-\ingroup API_Bitmap
-return the number of set bits in \p b
-
-\fn int starpu_bitmap_first(struct starpu_bitmap *b)
-\ingroup API_Bitmap
-return the index of the first set bit of \p b, -1 if none
-
-\fn int starpu_bitmap_last(struct starpu_bitmap *b)
-\ingroup API_Bitmap
-return the position of the last set bit of \p b, -1 if none
-
-\fn int starpu_bitmap_next(struct starpu_bitmap *b, int e)
-\ingroup API_Bitmap
-return the position of set bit right after \p e in \p b, -1 if none
-
-\fn int starpu_bitmap_has_next(struct starpu_bitmap *b, int e)
-\ingroup API_Bitmap
-todo
-
-*/

+ 0 - 103
doc/doxygen/chapters/api/clustering_machine.doxy

@@ -1,103 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2017, 2019                                     CNRS
- * Copyright (C) 2017                                     Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Clustering_Machine Clustering Machine
-
-\def STARPU_CLUSTER_MIN_NB
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_MAX_NB
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_NB
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_POLICY_NAME
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_POLICY_STRUCT
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_KEEP_HOMOGENEOUS
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_PREFERE_MIN
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_CREATE_FUNC
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_CREATE_FUNC_ARG
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_TYPE
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_AWAKE_WORKERS
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_PARTITION_ONE
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_NEW
-\ingroup API_Clustering_Machine
-TODO
-
-\def STARPU_CLUSTER_NCORES
-\ingroup API_Clustering_Machine
-TODO
-
-\enum starpu_cluster_types
-\ingroup API_Clustering_Machine
-todo
-\var starpu_cluster_types::STARPU_CLUSTER_OPENMP
-todo
-\var starpu_cluster_types::STARPU_CLUSTER_INTEL_OPENMP_MKL
-todo
-\var starpu_cluster_types::STARPU_CLUSTER_GNU_OPENMP_MKL
-todo
-
-\struct starpu_cluster_machine
-\ingroup API_Clustering_Machine
-todo
-
-\fn struct starpu_cluster_machine* starpu_cluster_machine(hwloc_obj_type_t cluster_level, ...)
-\ingroup API_Clustering_Machine
-todo
-
-\fn int starpu_uncluster_machine(struct starpu_cluster_machine* clusters)
-\ingroup API_Clustering_Machine
-todo
-
-\fn int starpu_cluster_print(struct starpu_cluster_machine* clusters)
-\ingroup API_Clustering_Machine
-todo
-
-
-*/

+ 0 - 125
doc/doxygen/chapters/api/cuda_extensions.doxy

@@ -1,125 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2009-2011,2014,2017                      Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_CUDA_Extensions CUDA Extensions
-
-\def STARPU_USE_CUDA
-\ingroup API_CUDA_Extensions
-This macro is defined when StarPU has been installed with CUDA
-support. It should be used in your code to detect the availability of
-CUDA as shown in \ref FullSourceCodeVectorScal.
-
-\def STARPU_MAXCUDADEVS
-\ingroup API_CUDA_Extensions
-This macro defines the maximum number of CUDA devices that are
-supported by StarPU.
-
-\fn cudaStream_t starpu_cuda_get_local_stream(void)
-\ingroup API_CUDA_Extensions
-Return the current worker’s CUDA stream. StarPU
-provides a stream for every CUDA device controlled by StarPU. This
-function is only provided for convenience so that programmers can
-easily use asynchronous operations within codelets without having to
-create a stream by hand. Note that the application is not forced to
-use the stream provided by starpu_cuda_get_local_stream() and may also
-create its own streams. Synchronizing with <c>cudaThreadSynchronize()</c> is
-allowed, but will reduce the likelihood of having all transfers
-overlapped.
-
-\fn const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid)
-\ingroup API_CUDA_Extensions
-Return a pointer to device properties for worker \p workerid (assumed to be a CUDA worker).
-
-\fn void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status)
-\ingroup API_CUDA_Extensions
-Report a CUDA error.
-
-\def STARPU_CUDA_REPORT_ERROR(status)
-\ingroup API_CUDA_Extensions
-Calls starpu_cuda_report_error(), passing the current function, file and line position.
-
-\fn int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind)
-\ingroup API_CUDA_Extensions
-Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
-to the pointer \p dst_ptr on \p dst_node. The function first tries to
-copy the data asynchronous (unless \p stream is <c>NULL</c>). If the
-asynchronous copy fails or if \p stream is <c>NULL</c>, it copies the
-data synchronously. The function returns <c>-EAGAIN</c> if the
-asynchronous launch was successfull. It returns 0 if the synchronous
-copy was successful, or fails otherwise.
-
-\fn void starpu_cuda_set_device(unsigned devid)
-\ingroup API_CUDA_Extensions
-Calls <c>cudaSetDevice(\p devid)</c> or <c>cudaGLSetGLDevice(\p devid)</c>,
-according to whether \p devid is among the field
-starpu_conf::cuda_opengl_interoperability.
-
-\fn void starpu_cublas_init(void)
-\ingroup API_CUDA_Extensions
-This function initializes CUBLAS on every CUDA device. The
-CUBLAS library must be initialized prior to any CUBLAS call. Calling
-starpu_cublas_init() will initialize CUBLAS on every CUDA device
-controlled by StarPU. This call blocks until CUBLAS has been properly
-initialized on every device.
-
-\fn void starpu_cublas_set_stream(void)
-\ingroup API_CUDA_Extensions
-This function sets the proper CUBLAS stream for CUBLAS v1. This must be called from the CUDA
-codelet before calling CUBLAS v1 kernels, so that they are queued on the proper
-CUDA stream. When using one thread per CUDA worker, this function does not
-do anything since the CUBLAS stream does not change, and is set once by
-starpu_cublas_init().
-
-\fn cublasHandle_t starpu_cublas_get_local_handle(void)
-\ingroup API_CUDA_Extensions
-This function returns the CUBLAS v2 handle to be used to queue CUBLAS v2
-kernels. It is properly initialized and configured for multistream by
-starpu_cublas_init().
-
-\fn void starpu_cublas_shutdown(void)
-\ingroup API_CUDA_Extensions
-This function synchronously deinitializes the CUBLAS library on
-every CUDA device.
-
-\fn void starpu_cublas_report_error(const char *func, const char *file, int line, int status)
-\ingroup API_CUDA_Extensions
-Report a cublas error.
-
-\def STARPU_CUBLAS_REPORT_ERROR(status)
-\ingroup API_CUDA_Extensions
-Calls starpu_cublas_report_error(), passing the current
-function, file and line position.
-
-\fn void starpu_cusparse_init(void)
-\ingroup API_CUDA_Extensions
-Calling starpu_cusparse_init() will initialize CUSPARSE on every CUDA device
-controlled by StarPU. This call blocks until CUSPARSE has been properly
-initialized on every device.
-
-\fn cusparseHandle_t starpu_cusparse_get_local_handle(void)
-\ingroup API_CUDA_Extensions
-This function returns the CUSPARSE handle to be used to queue CUSPARSE
-kernels. It is properly initialized and configured for multistream by
-starpu_cusparse_init().
-
-\fn void starpu_cusparse_shutdown(void)
-\ingroup API_CUDA_Extensions
-This function synchronously deinitializes the CUSPARSE library on
-every CUDA device.
-
-*/

File diff suppressed because it is too large
+ 0 - 1270
doc/doxygen/chapters/api/data_interfaces.doxy


+ 0 - 443
doc/doxygen/chapters/api/data_management.doxy

@@ -1,443 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011,2012,2017                           Inria
- * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014-2017,2019                 Université de Bordeaux
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Data_Management Data Management
-
-\brief This section describes the data management facilities provided
-by StarPU. We show how to use existing data interfaces in
-\ref API_Data_Interfaces, but developers can design their own data interfaces if
-required.
-
-\typedef starpu_data_handle_t
-\ingroup API_Data_Management
-StarPU uses ::starpu_data_handle_t as an opaque handle to
-manage a piece of data. Once a piece of data has been registered to
-StarPU, it is associated to a ::starpu_data_handle_t which keeps track
-of the state of the piece of data over the entire machine, so that we
-can maintain data consistency and locate data replicates for instance.
-
-\typedef starpu_arbiter_t
-\ingroup API_Data_Management
-This is an arbiter, which implements an advanced but centralized management of
-concurrent data accesses, see \ref ConcurrentDataAccess for the details.
-
-\enum starpu_data_access_mode
-\ingroup API_Data_Management
-This datatype describes a data access mode.
-\var starpu_data_access_mode::STARPU_NONE
-    TODO
-\var starpu_data_access_mode::STARPU_R
-    read-only mode.
-\var starpu_data_access_mode::STARPU_W
-    write-only mode.
-\var starpu_data_access_mode::STARPU_RW
-    read-write mode. This is equivalent to ::STARPU_R|::STARPU_W
-\var starpu_data_access_mode::STARPU_SCRATCH
-    A temporary buffer is allocated for the task, but StarPU does not
-    enforce data consistency---i.e. each device has its own buffer,
-    independently from each other (even for CPUs), and no data
-    transfer is ever performed. This is useful for temporary variables
-    to avoid allocating/freeing buffers inside each task. Currently,
-    no behavior is defined concerning the relation with the ::STARPU_R
-    and ::STARPU_W modes and the value provided at registration ---
-    i.e., the value of the scratch buffer is undefined at entry of the
-    codelet function.  It is being considered for future extensions at
-    least to define the initial value.  For now, data to be used in
-    ::STARPU_SCRATCH mode should be registered with node -1 and
-    a <c>NULL</c> pointer, since the value of the provided buffer is
-    simply ignored for now.
-\var starpu_data_access_mode::STARPU_REDUX
-    todo
-\var starpu_data_access_mode::STARPU_COMMUTE
-    ::STARPU_COMMUTE can be passed along
-    ::STARPU_W or ::STARPU_RW to express that StarPU can let tasks
-    commute, which is useful e.g. when bringing a contribution into
-    some data, which can be done in any order (but still require
-    sequential consistency against reads or non-commutative writes).
-\var starpu_data_access_mode::STARPU_SSEND
-    used in starpu_mpi_insert_task() to specify the data has to be
-    sent using a synchronous and non-blocking mode (see
-    starpu_mpi_issend())
-\var starpu_data_access_mode::STARPU_LOCALITY
-    used to tell the scheduler which data is the most important for
-    the task, and should thus be used to try to group tasks on the
-    same core or cache, etc. For now only the ws and lws schedulers
-    take this flag into account, and only when rebuild with
-    USE_LOCALITY flag defined in the
-    src/sched_policies/work_stealing_policy.c source code.
-\var starpu_data_access_mode::STARPU_ACCESS_MODE_MAX
-    todo
-
-@name Basic Data Management API
-\ingroup API_Data_Management
-
-Data management is done at a high-level in StarPU: rather than
-accessing a mere list of contiguous buffers, the tasks may manipulate
-data that are described by a high-level construct which we call data
-interface.
-
-An example of data interface is the "vector" interface which describes
-a contiguous data array on a spefic memory node. This interface is a
-simple structure containing the number of elements in the array, the
-size of the elements, and the address of the array in the appropriate
-address space (this address may be invalid if there is no valid copy
-of the array in the memory node). More informations on the data
-interfaces provided by StarPU are given in \ref API_Data_Interfaces.
-
-When a piece of data managed by StarPU is used by a task, the task
-implementation is given a pointer to an interface describing a valid
-copy of the data that is accessible from the current processing unit.
-
-Every worker is associated to a memory node which is a logical
-abstraction of the address space from which the processing unit gets
-its data. For instance, the memory node associated to the different
-CPU workers represents main memory (RAM), the memory node associated
-to a GPU is DRAM embedded on the device. Every memory node is
-identified by a logical index which is accessible from the
-function starpu_worker_get_memory_node(). When registering a piece of
-data to StarPU, the specified memory node indicates where the piece of
-data initially resides (we also call this memory node the home node of
-a piece of data).
-
-In the case of NUMA systems, functions starpu_memory_nodes_numa_devid_to_id()
-and starpu_memory_nodes_numa_id_to_devid() can be used to convert from NUMA node
-numbers as seen by the Operating System and NUMA node numbers as seen by StarPU.
-
-\fn void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops)
-\ingroup API_Data_Management
-Register a piece of data into the handle located at the
-\p handleptr address. The \p data_interface buffer contains the initial
-description of the data in the \p home_node. The \p ops argument is a
-pointer to a structure describing the different methods used to
-manipulate this type of interface. See starpu_data_interface_ops for
-more details on this structure.
-If \p home_node is -1, StarPU will automatically allocate the memory when
-it is used for the first time in write-only mode. Once such data
-handle has been automatically allocated, it is possible to access it
-using any access mode.
-Note that StarPU supplies a set of predefined types of interface (e.g.
-vector or matrix) which can be registered by the means of helper
-functions (e.g. starpu_vector_data_register() or
-starpu_matrix_data_register()).
-
-\fn void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node)
-\ingroup API_Data_Management
-Register that a buffer for \p handle on \p node will be set. This is typically
-used by starpu_*_ptr_register helpers before setting the interface pointers for
-this node, to tell the core that that is now allocated.
-
-\fn void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc)
-\ingroup API_Data_Management
-Register a new piece of data into the handle \p handledst with the
-same interface as the handle \p handlesrc.
-
-\fn void starpu_data_unregister(starpu_data_handle_t handle)
-\ingroup API_Data_Management
-Unregister a data \p handle from StarPU. If the
-data was automatically allocated by StarPU because the home node was
--1, all automatically allocated buffers are freed. Otherwise, a valid
-copy of the data is put back into the home node in the buffer that was
-initially registered. Using a data handle that has been unregistered
-from StarPU results in an undefined behaviour. In case we do not need
-to update the value of the data in the home node, we can use
-the function starpu_data_unregister_no_coherency() instead.
-
-\fn void starpu_data_unregister_no_coherency(starpu_data_handle_t handle)
-\ingroup API_Data_Management
-This is the same as starpu_data_unregister(), except that
-StarPU does not put back a valid copy into the home node, in the
-buffer that was initially registered.
-
-\fn void starpu_data_unregister_submit(starpu_data_handle_t handle)
-\ingroup API_Data_Management
-Destroy the data \p handle once it is no longer needed by any
-submitted task. No coherency is assumed.
-
-\fn void starpu_data_invalidate(starpu_data_handle_t handle)
-\ingroup API_Data_Management
-Destroy all replicates of the data \p handle immediately. After
-data invalidation, the first access to \p handle must be performed in
-::STARPU_W mode. Accessing an invalidated data in ::STARPU_R mode
-results in undefined behaviour.
-
-\fn void starpu_data_invalidate_submit(starpu_data_handle_t handle)
-\ingroup API_Data_Management
-Submit invalidation of the data \p handle after completion of
-previously submitted tasks.
-
-\fn void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask)
-\ingroup API_Data_Management
-Set the write-through mask of the data \p handle (and
-its children), i.e. a bitmask of nodes where the data should be always
-replicated after modification. It also prevents the data from being
-evicted from these nodes when memory gets scarse. When the data is
-modified, it is automatically transfered into those memory nodes. For
-instance a <c>1<<0</c> write-through mask means that the CUDA workers
-will commit their changes in main memory (node 0).
-
-\fn void starpu_data_set_name(starpu_data_handle_t handle, const char *name)
-\ingroup API_Data_Management
-Set the name of the data, to be shown in various profiling tools.
-
-\fn void starpu_data_set_coordinates_array(starpu_data_handle_t handle, int dimensions, int dims[])
-\ingroup API_Data_Management
-Set the coordinates of the data, to be shown in various profiling tools.
-\p dimensions is the size of the \p dims array
-This can be for instance the tile coordinates within a big matrix.
-
-\fn void starpu_data_set_coordinates(starpu_data_handle_t handle, unsigned dimensions, ...)
-\ingroup API_Data_Management
-Set the coordinates of the data, to be shown in various profiling tools.
-\p dimensions is the number of subsequent \c int parameters.
-This can be for instance the tile coordinates within a big matrix.
-
-\fn void starpu_data_set_ooc_flag(starpu_data_handle_t handle, unsigned flag)
-\ingroup API_Data_Management
-Set whether this data should be elligible to be evicted to disk storage (1) or
-not (0). The default is 1.
-
-\fn unsigned starpu_data_get_ooc_flag(starpu_data_handle_t handle)
-\ingroup API_Data_Management
-Get whether this data was set to be elligible to be evicted to disk storage (1) or
-not (0).
-
-\fn int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async)
-\ingroup API_Data_Management
-Issue a fetch request for the data \p handle to \p node, i.e.
-requests that the data be replicated to the given node as soon as possible, so that it is
-available there for tasks. If \p async is 0, the call will
-block until the transfer is achieved, else the call will return immediately,
-after having just queued the request. In the latter case, the request will
-asynchronously wait for the completion of any task writing on the data.
-
-\fn int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async)
-\ingroup API_Data_Management
-Issue a prefetch request for the data \p handle to \p node, i.e.
-requests that the data be replicated to \p node when there is room for it, so that it is
-available there for tasks. If \p async is 0, the call will
-block until the transfer is achieved, else the call will return immediately,
-after having just queued the request. In the latter case, the request will
-asynchronously wait for the completion of any task writing on the data.
-
-\fn int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async)
-\ingroup API_Data_Management
-Issue an idle prefetch request for the data \p handle to \p node, i.e.
-requests that the data be replicated to \p node, so that it is
-available there for tasks, but only when the bus is really idle. If \p async is 0, the call will
-block until the transfer is achieved, else the call will return immediately,
-after having just queued the request. In the latter case, the request will
-asynchronously wait for the completion of any task writing on the data.
-
-\fn unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node)
-\ingroup API_Data_Management
-Check whether a valid copy of \p handle is currently available on memory node \p
-node .
-
-\fn void starpu_data_wont_use(starpu_data_handle_t handle)
-\ingroup API_Data_Management
-Advise StarPU that \p handle will not be used in the close future, and is
-thus a good candidate for eviction from GPUs. StarPU will thus write its value
-back to its home node when the bus is idle, and select this data in priority
-for eviction when memory gets low.
-
-\fn starpu_data_handle_t starpu_data_lookup(const void *ptr)
-\ingroup API_Data_Management
-Return the handle corresponding to the data pointed to by the \p ptr host pointer.
-
-\fn int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node)
-\ingroup API_Data_Management
-Explicitly ask StarPU to allocate room for a piece of data on
-the specified memory \p node.
-
-\fn void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested)
-\ingroup API_Data_Management
-Query the status of \p handle on the specified \p memory_node.
-
-\fn void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important)
-\ingroup API_Data_Management
-Specify that the data \p handle can be discarded without impacting the application.
-
-\fn void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl)
-\ingroup API_Data_Management
-Set the codelets to be used for \p handle when it is accessed in the
-mode ::STARPU_REDUX. Per-worker buffers will be initialized with
-the codelet \p init_cl, and reduction between per-worker buffers will be
-done with the codelet \p redux_cl.
-
-\fn struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle)
-\ingroup API_Data_Management
-todo
-
-\fn void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data)
-\ingroup API_Data_Management
-Sset the field \c user_data for the \p handle to \p user_data . It can
-then be retrieved with starpu_data_get_user_data(). \p user_data can be any
-application-defined value, for instance a pointer to an object-oriented
-container for the data.
-
-\fn void *starpu_data_get_user_data(starpu_data_handle_t handle)
-\ingroup API_Data_Management
-This retrieves the field \c user_data previously set for the \p handle.
-
-@name Access registered data from the application
-\ingroup API_Data_Management
-
-\fn int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode)
-\ingroup API_Data_Management
-The application must call this function prior to accessing
-registered data from main memory outside tasks. StarPU ensures that
-the application will get an up-to-date copy of \p handle in main memory
-located where the data was originally registered, and that all
-concurrent accesses (e.g. from tasks) will be consistent with the
-access mode specified with \p mode. starpu_data_release() must
-be called once the application no longer needs to access the piece of
-data. Note that implicit data dependencies are also enforced
-by starpu_data_acquire(), i.e. starpu_data_acquire() will wait for all
-tasks scheduled to work on the data, unless they have been disabled
-explictly by calling starpu_data_set_default_sequential_consistency_flag() or
-starpu_data_set_sequential_consistency_flag(). starpu_data_acquire() is a
-blocking call, so that it cannot be called from tasks or from their
-callbacks (in that case, starpu_data_acquire() returns <c>-EDEADLK</c>). Upon
-successful completion, this function returns 0.
-
-\fn int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg)
-\ingroup API_Data_Management
-Asynchronous equivalent of starpu_data_acquire(). When the data
-specified in \p handle is available in the access \p mode, the \p
-callback function is executed. The application may access
-the requested data during the execution of \p callback. The \p callback
-function must call starpu_data_release() once the application no longer
-needs to access the piece of data. Note that implicit data
-dependencies are also enforced by starpu_data_acquire_cb() in case they
-are not disabled. Contrary to starpu_data_acquire(), this function is
-non-blocking and may be called from task callbacks. Upon successful
-completion, this function returns 0.
-
-\fn int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency)
-\ingroup API_Data_Management
-Equivalent of starpu_data_acquire_cb() with the possibility of enabling or disabling data dependencies.
-When the data specified in \p handle is available in the access
-\p mode, the \p callback function is executed. The application may access
-the requested data during the execution of this \p callback. The \p callback
-function must call starpu_data_release() once the application no longer
-needs to access the piece of data. Note that implicit data
-dependencies are also enforced by starpu_data_acquire_cb_sequential_consistency() in case they
-are not disabled specifically for the given \p handle or by the parameter \p sequential_consistency.
-Similarly to starpu_data_acquire_cb(), this function is
-non-blocking and may be called from task callbacks. Upon successful
-completion, this function returns 0.
-
-\fn int starpu_data_acquire_try(starpu_data_handle_t handle, enum starpu_data_access_mode mode)
-\ingroup API_Data_Management
-The application can call this function instead of starpu_data_acquire() so as to
-acquire the data like starpu_data_acquire(), but only if all
-previously-submitted tasks have completed, in which case starpu_data_acquire_try()
-returns 0. StarPU will have ensured that the application will get an up-to-date
-copy of \p handle in main memory located where the data was originally
-registered. starpu_data_release() must be called once the application no longer
-needs to access the piece of data.
-
-If not all previously-submitted tasks have completed, starpu_data_acquire_try
-returns -EAGAIN, and starpu_data_release() must not be called.
-
-\def STARPU_ACQUIRE_NO_NODE
-\ingroup API_Data_Management
-This macro can be used to acquire data, but not require it to be available on a given node, only enforce R/W dependencies.
-This can for instance be used to wait for tasks which produce the data, but without requesting a fetch to the main memory.
-
-\def STARPU_ACQUIRE_NO_NODE_LOCK_ALL
-\ingroup API_Data_Management
-This is the same as ::STARPU_ACQUIRE_NO_NODE, but will lock the data on all nodes, preventing them from being evicted for instance.
-This is mostly useful inside starpu only.
-
-\fn int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode)
-\ingroup API_Data_Management
-This is the same as starpu_data_acquire(), except that the data
-will be available on the given memory node instead of main
-memory.
-::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be
-used instead of an explicit node number.
-
-\fn int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg)
-\ingroup API_Data_Management
-This is the same as starpu_data_acquire_cb(), except that the
-data will be available on the given memory node instead of main
-memory.
-::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be
-used instead of an explicit node number.
-
-\fn int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency)
-\ingroup API_Data_Management
-This is the same as starpu_data_acquire_cb_sequential_consistency(), except that the
-data will be available on the given memory node instead of main
-memory.
-::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
-explicit node number.
-
-\fn int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick, long *pre_sync_jobid, long *post_sync_jobid)
-\ingroup API_Data_Management
-This is the same as starpu_data_acquire_on_node_cb_sequential_consistency(),
-except that the \e pre_sync_jobid and \e post_sync_jobid parameters can be used
-to retrieve the jobid of the synchronization tasks. \e pre_sync_jobid happens
-just before the acquisition, and \e post_sync_jobid happens just after the
-release.
-
-\fn int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode)
-\ingroup API_Data_Management
-This is the same as starpu_data_acquire_try(), except that the
-data will be available on the given memory node instead of main
-memory.
-::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
-explicit node number.
-
-\def STARPU_DATA_ACQUIRE_CB(handle, mode, code)
-\ingroup API_Data_Management
-STARPU_DATA_ACQUIRE_CB() is the same as starpu_data_acquire_cb(),
-except that the code to be executed in a callback is directly provided
-as a macro parameter, and the data \p handle is automatically released
-after it. This permits to easily execute code which depends on the
-value of some registered data. This is non-blocking too and may be
-called from task callbacks.
-
-\fn void starpu_data_release(starpu_data_handle_t handle)
-\ingroup API_Data_Management
-Release the piece of data acquired by the
-application either by starpu_data_acquire() or by
-starpu_data_acquire_cb().
-
-\fn void starpu_data_release_on_node(starpu_data_handle_t handle, int node)
-\ingroup API_Data_Management
-This is the same as starpu_data_release(), except that the data
-will be available on the given memory \p node instead of main memory.
-The \p node parameter must be exactly the same as the corresponding \c
-starpu_data_acquire_on_node* call.
-
-\fn starpu_arbiter_t starpu_arbiter_create(void)
-\ingroup API_Data_Management
-Create a data access arbiter, see \ref ConcurrentDataAccess for the details
-
-\fn void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter)
-\ingroup API_Data_Management
-Make access to \p handle managed by \p arbiter
-
-\fn void starpu_arbiter_destroy(starpu_arbiter_t arbiter)
-\ingroup API_Data_Management
-Destroy the \p arbiter . This must only be called after all data
-assigned to it have been unregistered.
-
-*/

+ 0 - 154
doc/doxygen/chapters/api/data_out_of_core.doxy

@@ -1,154 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2013,2017                                Inria
- * Copyright (C) 2013,2015,2017                           CNRS
- * Copyright (C) 2013,2014,2017                           Université de Bordeaux
- * Copyright (C) 2013                                     Corentin Salingue
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Out_Of_Core Out Of Core
-
-\def STARPU_DISK_SIZE_MIN
-\ingroup API_Out_Of_Core
-Minimum size of a registered disk. The size of a disk is the last parameter of the function starpu_disk_register().
-
-\struct starpu_disk_ops
-\ingroup API_Out_Of_Core
-This is a set of functions to manipulate datas on disk.
-
-\var void* (*starpu_disk_ops::alloc)(void *base, size_t size)
-Create a new location for datas of size \p size. This returns an opaque object pointer.
-
-\var void (*starpu_disk_ops::free)(void *base, void *obj, size_t size)
-Free a data \p obj previously allocated with \c alloc.
-
-\var void* (*starpu_disk_ops::open)(void *base, void *pos, size_t size)
-Open an existing location of datas, at a specific position \p pos dependent on the backend.
-
-\var void (*starpu_disk_ops::close)(void *base, void *obj, size_t size)
-Close, without deleting it, a location of datas \p obj.
-
-\var int (*starpu_disk_ops::read)(void *base, void *obj, void *buf, off_t offset, size_t size)
-Read \p size bytes of data from \p obj in \p base, at offset \p offset, and put
-into \p buf. Returns the actual number of read bytes.
-
-\var int (*starpu_disk_ops::write)(void *base, void *obj, const void *buf, off_t offset, size_t size)
-Write \p size bytes of data to \p obj in \p base, at offset \p offset, from \p buf. Returns 0 on success.
-
-\var int (*starpu_disk_ops::full_read)(void * base, void * obj, void ** ptr, size_t * size)
-Read all data from \p obj of \p base, from offset 0. Returns it in an allocated buffer \p ptr, of size \p size
-
-\var int (*starpu_disk_ops::full_write)(void * base, void * obj, void * ptr, size_t size)
-Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to
-\p size, so that a \c full_read will get it.
-
-\var void* (*starpu_disk_ops::plug) (void *parameters, size_t size)
-Connect a disk memory at location \p parameter with size \p size, and return a
-base as void*, which will be passed by StarPU to all other methods.
-
-\var void (*starpu_disk_ops::unplug) (void* base)
-Disconnect a disk memory \p base.
-
-\var void* (*starpu_disk_ops::async_read)(void *base, void *obj, void *buf, off_t offset, size_t size)
-Asynchronously read \p size bytes of data from \p obj in \p base, at offset \p
-offset, and put into \p buf. Returns a void* pointer that StarPU will pass to \c
-*_request methods for testing for the completion.
-
-\var void* (*starpu_disk_ops::async_write)(void *base, void *obj, const void *buf, off_t offset, size_t size)
-Asynchronously write \p size bytes of data to \p obj in \p base, at offset \p
-offset, from \p buf. Returns a void* pointer that StarPU will pass to \c
-*_request methods for testing for the completion.
-
-\var void * (*starpu_disk_ops::async_full_read)(void * base, void * obj, void ** ptr, size_t * size)
-Read all data from \p obj of \p base, from offset 0. Returns it in an allocated buffer \p ptr, of size \p size
-
-\var void * (*starpu_disk_ops::async_full_write)(void * base, void * obj, void * ptr, size_t size)
-Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to
-\p size, so that a \c full_read will get it.
-
-\var void* (*starpu_disk_ops::copy)(void *base_src, void* obj_src, off_t offset_src,  void *base_dst, void* obj_dst, off_t offset_dst, size_t size)
-Copy from offset \p offset_src of disk object \p obj_src in \p base_src to
-offset \p offset_dst of disk object \p obj_dst in \p base_dst. Returns a void*
-pointer that StarPU will pass to \c *_request methods for testing for the
-completion.
-
-\var int (*starpu_disk_ops::bandwidth) (unsigned node)
-Measure the bandwidth and the latency for the disk \p node and save it. Returns
-1 if it could measure it.
-
-\var void (*starpu_disk_ops::wait_request)(void *async_channel)
-Wait for completion of request \p async_channel returned by a previous
-asynchronous read, write or copy.
-
-\var void (*starpu_disk_ops::test_request)(void *async_channel)
-Test for completion of request \p async_channel returned by a previous
-asynchronous read, write or copy. Returns 1 on completion, 0 otherwise.
-
-\var void (*starpu_disk_ops::free_request)(void *async_channel)
-Free the request allocated by a previous asynchronous read, write or copy.
-
-\fn int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_ssize_t size)
-\ingroup API_Out_Of_Core
-Register a disk memory node with a set of functions to manipulate datas. The \c
-plug member of \p func will be passed \p parameter, and return a \c base which will be passed to all \p func methods. <br />
-SUCCESS: return the disk node. <br />
-FAIL: return an error code. <br />
-\p size must be at least \ref STARPU_DISK_SIZE_MIN bytes ! \p size being negative means infinite size.
-
-\fn void *starpu_disk_open(unsigned node, void *pos, size_t size)
-\ingroup API_Out_Of_Core
-Open an existing file memory in a disk node. \p size is the size of the
-file. \p pos is the specific position dependent on the backend, given to the \c open
-method of the disk operations. This returns an opaque object pointer.
-
-\fn void starpu_disk_close(unsigned node, void *obj, size_t size)
-\ingroup API_Out_Of_Core
-Close an existing data opened with starpu_disk_open().
-
-\var starpu_disk_swap_node
-\ingroup API_Out_Of_Core
-This contains the node number of the disk swap, if set up through the
-\ref STARPU_DISK_SWAP variable.
-
-\var starpu_disk_stdio_ops
-\ingroup API_Out_Of_Core
-This set uses the stdio library (fwrite, fread...) to read/write on disk. <br />
-<strong>Warning: It creates one file per allocation !</strong>  <br />
-It doesn't support asynchronous transfers.
-
-\var starpu_disk_unistd_ops
-\ingroup API_Out_Of_Core
-This set uses the unistd library (write, read...) to read/write on disk. <br />
-<strong>Warning: It creates one file per allocation !</strong>  <br />
-
-\var starpu_disk_unistd_o_direct_ops
-\ingroup API_Out_Of_Core
-This set uses the unistd library (write, read...) to read/write on disk with the O_DIRECT flag. <br />
-<strong>Warning: It creates one file per allocation !</strong>  <br />
-Only available on Linux systems.
-
-\var starpu_disk_leveldb_ops
-\ingroup API_Out_Of_Core
-This set uses the leveldb created by Google <br />
-More information at https://code.google.com/p/leveldb/ <br />
-It doesn't support asynchronous transfers.
-
-\var starpu_disk_hdf5_ops
-\ingroup API_Out_Of_Core
-This set uses the HDF5 library.<br />
-<strong>It doesn't support multiple opening from different processes. </strong> <br />
-You may only allow one process to write in the HDF5 file. <br />
-<strong>If HDF5 library is not compiled with --thread-safe you can't open more than one HDF5 file at the same time. </strong>
-
-*/

+ 0 - 416
doc/doxygen/chapters/api/data_partition.doxy

@@ -1,416 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
- * Copyright (C) 2009-2011,2014,2015,2017,2018-2019       Université de Bordeaux
- * Copyright (C) 2011-2013                                Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Data_Partition Data Partition
-
-\struct starpu_data_filter
-The filter structure describes a data partitioning operation, to be
-given to the starpu_data_partition() function.
-\ingroup API_Data_Partition
-\var void (*starpu_data_filter::filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *filter, unsigned i, unsigned nparts)
-    Fill the \p child_interface structure with interface information
-    for the \p i -th child of the parent \p father_interface (among
-    \p nparts). The \p filter structure is provided, allowing to inspect the
-    starpu_data_filter::filter_arg and starpu_data_filter::filter_arg_ptr
-    parameters.
-
-    The details of what needs to be filled in \p child_interface vary according
-    to the data interface, but generally speaking:
-    <ul>
-    <li> <c>id</c> is usually just copied over from the father, when the sub data has the same structure as the father, e.g. a subvector is a vector, a submatrix is a matrix, etc. This is however not the case for instance when dividing a BCSR matrix into its dense blocks, which then are matrices. </li>
-    <li> <c>nx</c>, <c>ny</c> and alike are usually divided by the number of subdata, depending how the subdivision is done (e.g. nx division vs ny division for vertical matrix division vs horizontal matrix division). </li>
-    <li> <c>ld</c> for matrix interfaces are usually just copied over: the leading dimension (ld) usually does not change. </li>
-    <li> <c>elemsize</c> is usually just copied over. </li>
-    <li> <c>ptr</c>, the pointer to the data, has to be computed according to \p i and the father's <c>ptr</c>, so as to point to the start of the sub data. This should however be done only if the father has <c>ptr</c> different from NULL: in the OpenCL case notably, the <c>dev_handle</c> and <c>offset</c> fields are used instead. </li>
-    <li> <c>dev_handle</c> should be just copied over from the parent. </li>
-    <li> <c>offset</c> has to be computed according to \p i and the father's <c>offset</c>, so as to provide the offset of the start of the sub data. This is notably used for the OpenCL case.
-    </ul>
-\var unsigned starpu_data_filter::nchildren
-    Number of parts to partition the data into.
-\var unsigned (*starpu_data_filter::get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle)
-    Return the number of children. This can be used instead of
-    starpu_data_filter::nchildren when the number of children depends
-    on the actual data (e.g. the number of blocks in a sparse matrix).
-\var struct starpu_data_interface_ops *(*starpu_data_filter::get_child_ops)(struct starpu_data_filter *, unsigned id)
-    In case the resulting children use a different data interface,
-    this function returns which interface is used by child number \p
-    id.
-\var unsigned starpu_data_filter::filter_arg
-    Additional parameter for the filter function
-\var void *starpu_data_filter::filter_arg_ptr
-    Additional pointer parameter for the filter
-    function, such as the sizes of the different parts.
-
-@name Basic API
-\ingroup API_Data_Partition
-
-\fn void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f)
-\ingroup API_Data_Partition
-Request the partitioning of \p initial_handle into several subdata
-according to the filter \p f.
-
-Here an example of how to use the function.
-\code{.c}
-struct starpu_data_filter f =
-{
-        .filter_func = starpu_matrix_filter_block,
-        .nchildren = nslicesx
-};
-starpu_data_partition(A_handle, &f);
-\endcode
-
-\fn void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node)
-\ingroup API_Data_Partition
-Unapply the filter which has been applied to \p root_data, thus
-unpartitioning the data. The pieces of data are collected back into
-one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM).
-Tasks working on the partitioned data will be waited for
-by starpu_data_unpartition().
-
-Here an example of how to use the function.
-\code{.c}
-starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
-\endcode
-
-\fn int starpu_data_get_nb_children(starpu_data_handle_t handle)
-\ingroup API_Data_Partition
-Return the number of children \p handle has been partitioned into.
-
-\fn starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i)
-\ingroup API_Data_Partition
-Return the \p i -th child of the given \p handle, which must have been
-partitionned beforehand.
-
-\fn starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... )
-\ingroup API_Data_Partition
-After partitioning a StarPU data by applying a filter,
-starpu_data_get_sub_data() can be used to get handles for each of the
-data portions. \p root_data is the parent data that was partitioned.
-\p depth is the number of filters to traverse (in case several filters
-have been applied, to e.g. partition in row blocks, and then in column
-blocks), and the subsequent parameters are the indexes. The function
-returns a handle to the subdata.
-
-Here an example of how to use the function.
-\code{.c}
-h = starpu_data_get_sub_data(A_handle, 1, taskx);
-\endcode
-
-\fn starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa)
-\ingroup API_Data_Partition
-This function is similar to starpu_data_get_sub_data() but uses a
-va_list for the parameter list.
-
-\fn void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...)
-\ingroup API_Data_Partition
-Apply \p nfilters filters to the handle designated by
-\p root_handle recursively. \p nfilters pointers to variables of the type
-starpu_data_filter should be given.
-
-\fn void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa)
-\ingroup API_Data_Partition
-Apply \p nfilters filters to the handle designated by
-\p root_handle recursively. It uses a va_list of pointers to variables of
-the type starpu_data_filter.
-
-@name Asynchronous API
-\ingroup API_Data_Partition
-
-\fn void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children)
-\ingroup API_Data_Partition
-Plan to partition \p initial_handle into several subdata according to
-the filter \p f.
-The handles are returned into the \p children array, which has to be
-the same size as the number of parts described in \p f. These handles
-are not immediately usable, starpu_data_partition_submit() has to be
-called to submit the actual partitioning.
-
-Here is an example of how to use the function:
-
-\code{.c}
-starpu_data_handle_t children[nslicesx];
-struct starpu_data_filter f =
-{
-        .filter_func = starpu_matrix_filter_block,
-        .nchildren = nslicesx
-};
-starpu_data_partition_plan(A_handle, &f, children);
-\endcode
-
-\fn void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
-\ingroup API_Data_Partition
-Submit the actual partitioning of \p initial_handle into the \p nparts
-\p children handles. This call is asynchronous, it only submits that the
-partitioning should be done, so that the \p children handles can now be used to
-submit tasks, and \p initial_handle can not be used to submit tasks any more (to
-guarantee coherency).
-
-For instance,
-
-\code{.c}
-starpu_data_partition_submit(A_handle, nslicesx, children);
-\endcode
-
-\fn void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
-\ingroup API_Data_Partition
-This is the same as starpu_data_partition_submit(), but it does not invalidate \p
-initial_handle. This allows to continue using it, but the application has to be
-careful not to write to \p initial_handle or \p children handles, only read from
-them, since the coherency is otherwise not guaranteed.  This thus allows to
-submit various tasks which concurrently read from various partitions of the data.
-
-When the application wants to write to \p initial_handle again, it should call
-starpu_data_unpartition_submit(), which will properly add dependencies between the
-reads on the \p children and the writes to be submitted.
-
-If instead the application wants to write to \p children handles, it should
-call starpu_data_partition_readwrite_upgrade_submit(), which will correctly add
-dependencies between the reads on the \p initial_handle and the writes to be
-submitted.
-
-\fn void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
-\ingroup API_Data_Partition
-This assumes that a partitioning of \p initial_handle has already been submited
-in readonly mode through starpu_data_partition_readonly_submit(), and will upgrade
-that partitioning into read-write mode for the \p children, by invalidating \p
-initial_handle, and adding the necessary dependencies.
-
-\fn void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency)
-\ingroup API_Data_Partition
-Similar to starpu_data_partition_submit() but also allows to
-specify the coherency to be used for the main data \p initial_handle
-through the parameter \p sequential_consistency.
-
-\fn void starpu_data_unpartition_submit_sequential_consistency_cb(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency, void (*callback_func)(void *), void *callback_arg)
-\ingroup API_Data_Partition
-Similar to starpu_data_partition_submit_sequential_consistency() but
-allow to specify a callback function for the unpartitiong task
-
-\fn void starpu_data_partition_not_automatic(starpu_data_handle_t handle)
-\ingroup API_Data_Partition
-Disable the automatic partitioning of the data \p handle for which a
-asynchronous plan has previously been submitted
-
-\fn void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node)
-\ingroup API_Data_Partition
-This assumes that \p initial_handle is partitioned into \p children, and submits
-an unpartitionning of it, i.e. submitting a gathering of the pieces on the
-requested \p gathering_node memory node, and submitting an invalidation of the
-children.
-
-\p gathering_node can be set to -1 to let the runtime decide which memory node
-should be used to gather the pieces.
-
-This call is asynchronous, it only submits that the unpartitioning should be
-done, so that the \p children handles should not be used to submit tasks any
-more, and \p initial_handle can now be used again to submit tasks.
-
-\fn void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node)
-\ingroup API_Data_Partition
-This assumes that \p initial_handle is partitioned into \p children, and submits
-just a readonly unpartitionning of it, i.e. submitting a gathering of the pieces
-on the requested \p gathering_node memory node. It does not invalidate the
-children. This brings \p initial_handle and \p children handles to the same
-state as obtained with starpu_data_partition_readonly_submit().
-
-\p gathering_node can be set to -1 to let the runtime decide which memory node
-should be used to gather the pieces.
-
-\fn void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node, int sequential_consistency)
-\ingroup API_Data_Partition
-Similar to starpu_data_unpartition_submit() but also allows to
-specify the coherency to be used for the main data \p initial_handle
-through the parameter \p sequential_consistency.
-
-\fn void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children)
-\ingroup API_Data_Partition
-This should be used to clear the partition planning established between \p
-root_data and \p children with starpu_data_partition_plan(). This will notably
-submit an unregister all the \p children, which can thus not be used any more
-afterwards.
-
-@name Predefined Vector Filter Functions
-\ingroup API_Data_Partition
-
-This section gives a partial list of the predefined partitioning
-functions for vector data. Examples on how to use them are shown in
-\ref PartitioningData. The complete list can be found in the file
-<c>starpu_data_filters.h</c>.
-
-\fn void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Return in \p child_interface the \p id th element of the vector
-represented by \p father_interface once partitioned in \p nparts chunks of
-equal size.
-
-\fn void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Return in \p child_interface the \p id th element of the vector
-represented by \p father_interface once partitioned in \p nparts chunks of
-equal size with a shadow border <c>filter_arg_ptr</c>, thus getting a vector
-of size <c>(n-2*shadow)/nparts+2*shadow</c>. The <c>filter_arg_ptr</c> field
-of \p f must be the shadow size casted into \c void*.
-
-<b>IMPORTANT</b>: This can only be used for read-only access, as no coherency is
-enforced for the shadowed parts. An usage example is available in
-examples/filters/shadow.c
-
-\fn void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Return in \p child_interface the \p id th element of the vector
-represented by \p father_interface once partitioned into \p nparts chunks
-according to the <c>filter_arg_ptr</c> field of \p f. The
-<c>filter_arg_ptr</c> field must point to an array of \p nparts long
-elements, each of which specifies the number of elements in each chunk
-of the partition.
-
-\fn void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Return in \p child_interface the \p id th element of the vector
-represented by \p father_interface once partitioned into \p nparts chunks
-according to the <c>filter_arg_ptr</c> field of \p f. The
-<c>filter_arg_ptr</c> field must point to an array of \p nparts uint32_t
-elements, each of which specifies the number of elements in each chunk
-of the partition.
-
-\fn void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Return in \p child_interface the \p id th element of the vector
-represented by \p father_interface once partitioned in <c>2</c> chunks of
-equal size, ignoring nparts. Thus, \p id must be <c>0</c> or <c>1</c>.
-
-@name Predefined Matrix Filter Functions
-\ingroup API_Data_Partition
-
-This section gives a partial list of the predefined partitioning
-functions for matrix data. Examples on how to use them are shown in
-\ref PartitioningData. The complete list can be found in the file
-<c>starpu_data_filters.h</c>.
-
-\fn void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Partition a dense Matrix along the x dimension, thus
-getting (x/\p nparts ,y) matrices. If \p nparts does not divide x, the
-last submatrix contains the remainder.
-
-\fn void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Partition a dense Matrix along the x dimension, with a
-shadow border <c>filter_arg_ptr</c>, thus getting ((x-2*shadow)/\p
-nparts +2*shadow,y) matrices. If \p nparts does not divide x-2*shadow,
-the last submatrix contains the remainder.
-
-<b>IMPORTANT</b>: This can
-only be used for read-only access, as no coherency is enforced for the
-shadowed parts. A usage example is available in
-examples/filters/shadow2d.c
-
-\fn void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Partition a dense Matrix along the y dimension, thus
-getting (x,y/\p nparts) matrices. If \p nparts does not divide y, the
-last submatrix contains the remainder.
-
-\fn void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Partition a dense Matrix along the y dimension, with a
-shadow border <c>filter_arg_ptr</c>, thus getting
-(x,(y-2*shadow)/\p nparts +2*shadow) matrices. If \p nparts does not
-divide y-2*shadow, the last submatrix contains the remainder.
-
-<b>IMPORTANT</b>: This can only be used for read-only access, as no
-coherency is enforced for the shadowed parts. A usage example is
-available in examples/filters/shadow2d.c
-
-@name Predefined Block Filter Functions
-\ingroup API_Data_Partition
-
-This section gives a partial list of the predefined partitioning
-functions for block data. Examples on how to use them are shown in
-\ref PartitioningData. The complete list can be found in the file
-<c>starpu_data_filters.h</c>. A usage example is available in
-examples/filters/shadow3d.c
-
-\fn void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Partition a block along the X dimension, thus getting
-(x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last
-submatrix contains the remainder.
-
-\fn void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Partition a block along the X dimension, with a
-shadow border <c>filter_arg_ptr</c>, thus getting
-((x-2*shadow)/\p nparts +2*shadow,y,z) blocks. If \p nparts does not
-divide x, the last submatrix contains the remainder.
-
-<b>IMPORTANT</b>:
-This can only be used for read-only access, as no coherency is
-enforced for the shadowed parts.
-
-\fn void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Partition a block along the Y dimension, thus getting
-(x,y/\p nparts ,z) blocks. If \p nparts does not divide y, the last
-submatrix contains the remainder.
-
-\fn void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Partition a block along the Y dimension, with a
-shadow border <c>filter_arg_ptr</c>, thus getting
-(x,(y-2*shadow)/\p nparts +2*shadow,z) 3D matrices. If \p nparts does not
-divide y, the last submatrix contains the remainder.
-
-<b>IMPORTANT</b>:
-This can only be used for read-only access, as no coherency is
-enforced for the shadowed parts.
-
-\fn void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Partition a block along the Z dimension, thus getting
-(x,y,z/\p nparts) blocks. If \p nparts does not divide z, the last
-submatrix contains the remainder.
-
-\fn void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Partition a block along the Z dimension, with a
-shadow border <c>filter_arg_ptr</c>, thus getting
-(x,y,(z-2*shadow)/\p nparts +2*shadow) blocks. If \p nparts does not
-divide z, the last submatrix contains the remainder.
-
-<b>IMPORTANT</b>:
-This can only be used for read-only access, as no coherency is
-enforced for the shadowed parts.
-
-@name Predefined BCSR Filter Functions
-\ingroup API_Data_Partition
-
-This section gives a partial list of the predefined partitioning
-functions for BCSR data. Examples on how to use them are shown in
-\ref PartitioningData. The complete list can be found in the file
-<c>starpu_data_filters.h</c>.
-
-\fn void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Partition a block-sparse matrix into dense matrices.
-
-\fn void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
-\ingroup API_Data_Partition
-Partition a block-sparse matrix into vertical block-sparse matrices.
-
-*/

+ 0 - 35
doc/doxygen/chapters/api/expert_mode.doxy

@@ -1,35 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Expert_Mode Expert Mode
-
-\fn void starpu_wake_all_blocked_workers(void)
-\ingroup API_Expert_Mode
-Wake all the workers, so they can inspect data requests and task
-submissions again.
-
-\fn int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg)
-\ingroup API_Expert_Mode
-Register a progression hook, to be called when workers are idle.
-
-\fn void starpu_progression_hook_deregister(int hook_id)
-\ingroup API_Expert_Mode
-Unregister a given progression hook.
-
-*/
-

+ 0 - 120
doc/doxygen/chapters/api/fxt_support.doxy

@@ -1,120 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2015,2017                           CNRS
- * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_FxT_Support FxT Support
-
-\struct starpu_fxt_codelet_event
-todo
-\ingroup API_FxT_Support
-\var char starpu_fxt_codelet_event::symbol[256]
-    name of the codelet
-\var int starpu_fxt_codelet_event::workerid
-    todo
-\var char starpu_fxt_codelet_event::perfmodel_archname[256]
-    todo
-\var uint32_t starpu_fxt_codelet_event::hash
-    todo
-\var size_t starpu_fxt_codelet_event::size
-    todo
-\var float starpu_fxt_codelet_event::time
-    todo
-
-\struct starpu_fxt_options
-todo
-\ingroup API_FxT_Support
-\var unsigned starpu_fxt_options::per_task_colour
-    todo
-\var unsigned starpu_fxt_options::no_counter
-    todo
-\var starpu_unsigned fxt_options::no_bus
-    todo
-\var unsigned starpu_fxt_options::ninputfiles
-    todo
-\var char *starpu_fxt_options::filenames[STARPU_FXT_MAX_FILES]
-    todo
-\var char *starpu_fxt_options::out_paje_path
-    todo
-\var char *starpu_fxt_options::distrib_time_path
-    todo
-\var char *starpu_fxt_options::activity_path
-    todo
-\var char *starpu_fxt_options::dag_path
-    todo
-\var char *starpu_fxt_options::file_prefix
-    In case we are going to gather multiple traces (e.g in the case of
-    MPI processes), we may need to prefix the name of the containers.
-\var uint64_t starpu_fxt_options::file_offset
-    In case we are going to gather multiple traces (e.g in the case of
-    MPI processes), we may need to prefix the name of the containers.
-\var int starpu_fxt_options::file_rank
-    In case we are going to gather multiple traces (e.g in the case of
-    MPI processes), we may need to prefix the name of the containers.
-\var char starpu_fxt_options::worker_names[STARPU_NMAXWORKERS][256]
-    Output parameters
-\var struct starpu_perfmodel_arch starpu_fxt_options::worker_archtypes[STARPU_NMAXWORKERS]
-    Output parameters
-\var int starpu_fxt_options::nworkers
-    Output parameters
-\var struct starpu_fxt_codelet_event **starpu_fxt_options::dumped_codelets
-    In case we want to dump the list of codelets to an external tool
-\var long starpu_fxt_options::dumped_codelets_count
-    In case we want to dump the list of codelets to an external tool
-
-\fn void starpu_fxt_options_init(struct starpu_fxt_options *options)
-\ingroup API_FxT_Support
-todo
-
-\fn void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
-\ingroup API_FxT_Support
-todo
-
-\fn void starpu_fxt_start_profiling(void)
-\ingroup API_FxT_Support
-Start recording the trace. The trace is by default started from
-starpu_init() call, but can be paused by using
-starpu_fxt_stop_profiling(), in which case
-starpu_fxt_start_profiling() should be called to resume recording
-events.
-
-\fn void starpu_fxt_stop_profiling(void)
-\ingroup API_FxT_Support
-Stop recording the trace. The trace is by default stopped when calling
-starpu_shutdown(). starpu_fxt_stop_profiling() can however be used to
-stop it earlier. starpu_fxt_start_profiling() can then be called to
-start recording it again, etc.
-
-\fn void starpu_fxt_autostart_profiling(int autostart)
-\ingroup API_FxT_Support
-Determine whether profiling should be started by starpu_init(), or only when
-starpu_fxt_start_profiling() is called. \p autostart should be 1 to do so, or 0 to
-prevent it.
-
-\fn void starpu_fxt_write_data_trace(char *filename_in)
-\ingroup API_FxT_Support
-todo
-
-\fn void starpu_fxt_trace_user_event(unsigned long code)
-\ingroup API_FxT_Support
-Add an event in the execution trace if FxT is enabled.
-
-\fn void starpu_fxt_trace_user_event_string(const char *s)
-\ingroup API_FxT_Support
-Add a string event in the execution trace if FxT is enabled.
-
-*/
-

+ 0 - 56
doc/doxygen/chapters/api/implicit_dependencies.doxy

@@ -1,56 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2017                                CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Implicit_Data_Dependencies Implicit Data Dependencies
-
-\brief In this section, we describe how StarPU makes it possible to
-insert implicit task dependencies in order to enforce sequential data
-consistency. When this data consistency is enabled on a specific data
-handle, any data access will appear as sequentially consistent from
-the application. For instance, if the application submits two tasks
-that access the same piece of data in read-only mode, and then a third
-task that access it in write mode, dependencies will be added between
-the two first tasks and the third one. Implicit data dependencies are
-also inserted in the case of data accesses from the application.
-
-\fn void starpu_data_set_default_sequential_consistency_flag(unsigned flag)
-\ingroup API_Implicit_Data_Dependencies
-Set the default sequential consistency flag. If a non-zero
-value is passed, a sequential data consistency will be enforced for
-all handles registered after this function call, otherwise it is
-disabled. By default, StarPU enables sequential data consistency. It
-is also possible to select the data consistency mode of a specific
-data handle with the function
-starpu_data_set_sequential_consistency_flag().
-
-\fn unsigned starpu_data_get_default_sequential_consistency_flag(void)
-\ingroup API_Implicit_Data_Dependencies
-Return the default sequential consistency flag
-
-\fn void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag)
-\ingroup API_Implicit_Data_Dependencies
-Set the data consistency mode associated to a data handle. The
-consistency mode set using this function has the priority over the
-default mode which can be set with
-starpu_data_set_default_sequential_consistency_flag().
-
-\fn unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle)
-\ingroup API_Implicit_Data_Dependencies
-Get the data consistency mode associated to the data handle \p handle
-
-*/

+ 0 - 60
doc/doxygen/chapters/api/lower_bound.doxy

@@ -1,60 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Theoretical_Lower_Bound_on_Execution_Time Theoretical Lower Bound on Execution Time
-
-\brief Compute theoretical upper computation efficiency bound
-corresponding to some actual execution.
-
-\fn void starpu_bound_start(int deps, int prio)
-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
-Start recording tasks (resets stats). \p deps tells whether
-dependencies should be recorded too (this is quite expensive)
-
-\fn void starpu_bound_stop(void)
-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
-Stop recording tasks
-
-\fn void starpu_bound_print_dot(FILE *output)
-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
-Emit the DAG that was recorded on \p output.
-
-\fn void starpu_bound_compute(double *res, double *integer_res, int integer)
-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
-Get theoretical upper bound (in ms) (needs glpk support
-detected by configure script). It returns 0 if some performance models
-are not calibrated.
-
-\fn void starpu_bound_print_lp(FILE *output)
-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
-Emit the Linear Programming system on \p output for the recorded
-tasks, in the lp format
-
-\fn void starpu_bound_print_mps(FILE *output)
-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
-Emit the Linear Programming system on \p output for the recorded
-tasks, in the mps format
-
-\fn void starpu_bound_print(FILE *output, int integer)
-\ingroup API_Theoretical_Lower_Bound_on_Execution_Time
-Emit on \p output the statistics of actual execution vs theoretical upper bound.
-\p integer permits to choose between integer solving (which takes a
-long time but is correct), and relaxed solving (which provides an
-approximate solution).
-
-*/

+ 2 - 18
doc/doxygen/chapters/api/mic_extensions.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
  * Copyright (C) 2009-2011,2014                           Université de Bordeaux
  * Copyright (C) 2009-2011,2014                           Université de Bordeaux
  * Copyright (C) 2011,2012                                Inria
  * Copyright (C) 2011,2012                                Inria
  *
  *
@@ -16,7 +16,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-/*! \defgroup API_MIC_Extensions MIC Extensions
+/*! \ingroup API_MIC_Extensions
 
 
 \def STARPU_USE_MIC
 \def STARPU_USE_MIC
 \ingroup API_MIC_Extensions
 \ingroup API_MIC_Extensions
@@ -27,20 +27,4 @@ It should be used in your code to detect the availability of MIC.
 \ingroup API_MIC_Extensions
 \ingroup API_MIC_Extensions
 Define the maximum number of MIC devices that are supported by StarPU.
 Define the maximum number of MIC devices that are supported by StarPU.
 
 
-\typedef starpu_mic_func_symbol_t
-\ingroup API_MIC_Extensions
-Type for MIC function symbols
-
-\fn int starpu_mic_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name)
-\ingroup API_MIC_Extensions
-Initiate a lookup on each MIC device to find the address of the
-function named \p func_name, store it in the global array kernels
-and return the index in the array through \p symbol.
-
-\fn starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol)
-\ingroup API_MIC_Extensions
-If successfull, return the pointer to the function defined by \p symbol on
-the device linked to the called device. This can for instance be used
-in a starpu_mic_func_t implementation.
-
 */
 */

+ 0 - 92
doc/doxygen/chapters/api/multiformat_data_interface.doxy

@@ -1,92 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2015,2017                           CNRS
- * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Multiformat_Data_Interface Multiformat Data Interface
-
-\struct starpu_multiformat_data_interface_ops
-\ingroup API_Multiformat_Data_Interface
-The different fields are:
-\var size_t starpu_multiformat_data_interface_ops::cpu_elemsize
-        the size of each element on CPUs
-\var size_t starpu_multiformat_data_interface_ops::opencl_elemsize
-        the size of each element on OpenCL devices
-\var struct starpu_codelet *starpu_multiformat_data_interface_ops::cpu_to_opencl_cl
-        pointer to a codelet which converts from CPU to OpenCL
-\var struct starpu_codelet *starpu_multiformat_data_interface_ops::opencl_to_cpu_cl
-        pointer to a codelet which converts from OpenCL to CPU
-\var size_t starpu_multiformat_data_interface_ops::cuda_elemsize
-        the size of each element on CUDA devices
-\var struct starpu_codelet *starpu_multiformat_data_interface_ops::cpu_to_cuda_cl
-        pointer to a codelet which converts from CPU to CUDA
-\var struct starpu_codelet *starpu_multiformat_data_interface_ops::cuda_to_cpu_cl
-        pointer to a codelet which converts from CUDA to CPU
-\var size_t starpu_multiformat_data_interface_ops::mic_elemsize
-        the size of each element on MIC devices
-\var struct starpu_codelet *starpu_multiformat_data_interface_ops::cpu_to_mic_cl
-        pointer to a codelet which converts from CPU to MIC
-\var struct starpu_codelet *starpu_multiformat_data_interface_ops::mic_to_cpu_cl
-        pointer to a codelet which converts from MIC to CPU
-
-\struct starpu_multiformat_interface
-todo
-\ingroup API_Multiformat_Data_Interface
-\var enum starpu_data_interface_id starpu_multiformat_interface::id
-    todo
-\var void *starpu_multiformat_interface::cpu_ptr
-    todo
-\var void *starpu_multiformat_interface::cuda_ptr
-    todo
-\var void *starpu_multiformat_interface::opencl_ptr
-    todo
-\var void *starpu_multiformat_interface::mic_ptr
-    todo
-\var uint32_t starpu_multiformat_interface::nx
-    todo
-\var struct starpu_multiformat_data_interface_ops *starpu_multiformat_interface::ops
-    todo
-
-\fn void starpu_multiformat_data_register(starpu_data_handle_t *handle, int home_node, void *ptr, uint32_t nobjects, struct starpu_multiformat_data_interface_ops *format_ops)
-\ingroup API_Multiformat_Data_Interface
-Register a piece of data that can be represented in different
-ways, depending upon the processing unit that manipulates it. It
-allows the programmer, for instance, to use an array of structures
-when working on a CPU, and a structure of arrays when working on a
-GPU. \p nobjects is the number of elements in the data. \p format_ops
-describes the format.
-
-\def STARPU_MULTIFORMAT_GET_CPU_PTR(interface)
-\ingroup API_Multiformat_Data_Interface
-Return the local pointer to the data with CPU format.
-
-\def STARPU_MULTIFORMAT_GET_CUDA_PTR(interface)
-\ingroup API_Multiformat_Data_Interface
-Return the local pointer to the data with CUDA format.
-
-\def STARPU_MULTIFORMAT_GET_OPENCL_PTR(interface)
-\ingroup API_Multiformat_Data_Interface
-Return the local pointer to the data with OpenCL format.
-
-\def STARPU_MULTIFORMAT_GET_MIC_PTR(interface)
-\ingroup API_Multiformat_Data_Interface
-Return the local pointer to the data with MIC format.
-
-\def STARPU_MULTIFORMAT_GET_NX(interface)
-\ingroup API_Multiformat_Data_Interface
-Return the number of elements in the data.
-
-*/

+ 2 - 234
doc/doxygen/chapters/api/opencl_extensions.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
  * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
  * Copyright (C) 2011,2012                                Inria
  * Copyright (C) 2011,2012                                Inria
  *
  *
@@ -16,7 +16,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-/*! \defgroup API_OpenCL_Extensions OpenCL Extensions
+/*! \ingroup API_OpenCL_Extensions
 
 
 \def STARPU_USE_OPENCL
 \def STARPU_USE_OPENCL
 \ingroup API_OpenCL_Extensions
 \ingroup API_OpenCL_Extensions
@@ -34,236 +34,4 @@ supported by StarPU.
 Define the directory in which the OpenCL codelets of the
 Define the directory in which the OpenCL codelets of the
 applications provided with StarPU have been installed.
 applications provided with StarPU have been installed.
 
 
-\struct starpu_opencl_program
-\ingroup API_OpenCL_Extensions
-Store the OpenCL programs as compiled for the different OpenCL
-devices.
-\var cl_program starpu_opencl_program::programs[STARPU_MAXOPENCLDEVS]
-    Store each program for each OpenCL device.
-
-@name Writing OpenCL kernels
-\ingroup API_OpenCL_Extensions
-
-\fn void starpu_opencl_get_context(int devid, cl_context *context)
-\ingroup API_OpenCL_Extensions
-Return the OpenCL context of the device designated by \p devid
-in \p context.
-
-\fn void starpu_opencl_get_device(int devid, cl_device_id *device)
-\ingroup API_OpenCL_Extensions
-Return the cl_device_id corresponding to \p devid in \p device.
-
-\fn void starpu_opencl_get_queue(int devid, cl_command_queue *queue)
-\ingroup API_OpenCL_Extensions
-Return the command queue of the device designated by \p devid
-into \p queue.
-
-\fn void starpu_opencl_get_current_context(cl_context *context)
-\ingroup API_OpenCL_Extensions
-Return the context of the current worker.
-
-\fn void starpu_opencl_get_current_queue(cl_command_queue *queue)
-\ingroup API_OpenCL_Extensions
-Return the computation kernel command queue of the current
-worker.
-
-\fn int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...)
-\ingroup API_OpenCL_Extensions
-Set the arguments of a given kernel. The list of arguments
-must be given as <c>(size_t size_of_the_argument, cl_mem *
-pointer_to_the_argument)</c>. The last argument must be 0. Return the
-number of arguments that were successfully set. In case of failure,
-return the id of the argument that could not be set and \p err is set to
-the error returned by OpenCL. Otherwise, return the number of
-arguments that were set.
-
-Here an example:
-\code{.c}
-int n;
-cl_int err;
-cl_kernel kernel;
-n = starpu_opencl_set_kernel_args(&err, 2, &kernel, sizeof(foo), &foo, sizeof(bar), &bar, 0);
-if (n != 2)
-   fprintf(stderr, "Error : %d\n", err);
-\endcode
-
-@name Compiling OpenCL kernels
-\ingroup API_OpenCL_Extensions
-
-Source codes for OpenCL kernels can be stored in a file or in a
-string. StarPU provides functions to build the program executable for
-each available OpenCL device as a cl_program object. This program
-executable can then be loaded within a specific queue as explained in
-the next section. These are only helpers, Applications can also fill a
-starpu_opencl_program array by hand for more advanced use (e.g.
-different programs on the different OpenCL devices, for relocation
-purpose for instance).
-
-\fn int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char *build_options)
-\ingroup API_OpenCL_Extensions
-Compile an OpenCL source code stored in a file.
-
-\fn int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char *build_options)
-\ingroup API_OpenCL_Extensions
-Compile an OpenCL source code stored in a string.
-
-\fn int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs)
-\ingroup API_OpenCL_Extensions
-Unload an OpenCL compiled code.
-
-\fn void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source)
-\ingroup API_OpenCL_Extensions
-Store the contents of the file \p source_file_name in the buffer
-\p opencl_program_source. The file \p source_file_name can be located in the
-current directory, or in the directory specified by the environment
-variable \ref STARPU_OPENCL_PROGRAM_DIR, or
-in the directory <c>share/starpu/opencl</c> of the installation
-directory of StarPU, or in the source directory of StarPU. When the
-file is found, \p located_file_name is the full name of the file as it
-has been located on the system, \p located_dir_name the directory
-where it has been located. Otherwise, they are both set to the empty
-string.
-
-\fn void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source)
-\ingroup API_OpenCL_Extensions
-Similar to function starpu_opencl_load_program_source() but allocate the buffers \p located_file_name, \p located_dir_name and \p opencl_program_source.
-
-\fn int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char *build_options)
-\ingroup API_OpenCL_Extensions
-Compile the OpenCL kernel stored in the file \p source_file_name
-with the given options \p build_options and store the result in the
-directory <c>$STARPU_HOME/.starpu/opencl</c> with the same filename as
-\p source_file_name. The compilation is done for every OpenCL device,
-and the filename is suffixed with the vendor id and the device id of
-the OpenCL device.
-
-\fn int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char *build_options)
-\ingroup API_OpenCL_Extensions
-Compile the OpenCL kernel in the string \p opencl_program_source
-with the given options \p build_options and store the result in the
-directory <c>$STARPU_HOME/.starpu/opencl</c> with the filename \p
-file_name. The compilation is done for every OpenCL device, and the
-filename is suffixed with the vendor id and the device id of the
-OpenCL device.
-
-\fn int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs)
-\ingroup API_OpenCL_Extensions
-Compile the binary OpenCL kernel identified with \p kernel_id.
-For every OpenCL device, the binary OpenCL kernel will be loaded from
-the file
-<c>$STARPU_HOME/.starpu/opencl/\<kernel_id\>.\<device_type\>.vendor_id_\<vendor_id\>_device_id_\<device_id\></c>.
-
-@name Loading OpenCL kernels
-\ingroup API_OpenCL_Extensions
-
-\fn int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid)
-\ingroup API_OpenCL_Extensions
-Create a kernel \p kernel for device \p devid, on its computation
-command queue returned in \p queue, using program \p opencl_programs
-and name \p kernel_name.
-
-\fn int starpu_opencl_release_kernel(cl_kernel kernel)
-\ingroup API_OpenCL_Extensions
-Release the given \p kernel, to be called after kernel execution.
-
-@name OpenCL statistics
-
-\fn int starpu_opencl_collect_stats(cl_event event)
-\ingroup API_OpenCL_Extensions
-Collect statistics on a kernel execution.
-After termination of the kernels, the OpenCL codelet should call this
-function with the event returned by \c clEnqueueNDRangeKernel(), to
-let StarPU collect statistics about the kernel execution (used cycles,
-consumed energy).
-
-@name OpenCL utilities
-\ingroup API_OpenCL_Extensions
-
-\fn const char *starpu_opencl_error_string(cl_int status)
-\ingroup API_OpenCL_Extensions
-Return the error message in English corresponding to \p status, an OpenCL
-error code.
-
-\fn void starpu_opencl_display_error(const char *func, const char *file, int line, const char *msg, cl_int status)
-\ingroup API_OpenCL_Extensions
-Given a valid error status, print the corresponding error message on
-\c stdout, along with the function name \p func, the filename
-\p file, the line number \p line and the message \p msg.
-
-\def STARPU_OPENCL_DISPLAY_ERROR(status)
-\ingroup API_OpenCL_Extensions
-Call the function starpu_opencl_display_error() with the error
-\p status, the current function name, current file and line number,
-and a empty message.
-
-\fn void starpu_opencl_report_error(const char *func, const char *file, int line, const char *msg, cl_int status)
-\ingroup API_OpenCL_Extensions
-Call the function starpu_opencl_display_error() and abort.
-
-\def STARPU_OPENCL_REPORT_ERROR(status)
-\ingroup API_OpenCL_Extensions
-Call the function starpu_opencl_report_error() with the error \p
-status, the current function name, current file and line number,
-and a empty message.
-
-\def STARPU_OPENCL_REPORT_ERROR_WITH_MSG(msg, status)
-\ingroup API_OpenCL_Extensions
-Call the function starpu_opencl_report_error() with \p msg
-and \p status, the current function name, current file and line number.
-
-\fn cl_int starpu_opencl_allocate_memory(int devid, cl_mem *addr, size_t size, cl_mem_flags flags)
-\ingroup API_OpenCL_Extensions
-Allocate \p size bytes of memory, stored in \p addr. \p flags must be a valid
-combination of \c cl_mem_flags values.
-
-\fn cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret)
-\ingroup API_OpenCL_Extensions
-Copy \p size bytes from the given \p ptr on RAM \p src_node to the
-given \p buffer on OpenCL \p dst_node. \p offset is the offset, in
-bytes, in \p buffer. if \p event is <c>NULL</c>, the copy is
-synchronous, i.e the queue is synchronised before returning. If not
-<c>NULL</c>, \p event can be used after the call to wait for this
-particular copy to complete. This function returns <c>CL_SUCCESS</c>
-if the copy was successful, or a valid OpenCL error code otherwise.
-The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
-asynchronous launch was successful, or to 0 if \p event was
-<c>NULL</c>.
-
-\fn cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret)
-\ingroup API_OpenCL_Extensions
-Copy \p size bytes asynchronously from the given \p buffer on OpenCL
-\p src_node to the given \p ptr on RAM \p dst_node. \p offset is the
-offset, in bytes, in \p buffer. if \p event is <c>NULL</c>, the copy
-is synchronous, i.e the queue is synchronised before returning. If not
-<c>NULL</c>, \p event can be used after the call to wait for this
-particular copy to complete. This function returns <c>CL_SUCCESS</c>
-if the copy was successful, or a valid OpenCL error code otherwise.
-The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
-asynchronous launch was successful, or to 0 if \p event was
-<c>NULL</c>.
-
-\fn cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, size_t src_offset, cl_mem dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event, int *ret)
-\ingroup API_OpenCL_Extensions
-Copy \p size bytes asynchronously from byte offset \p src_offset of \p
-src on OpenCL \p src_node to byte offset \p dst_offset of \p dst on
-OpenCL \p dst_node. if \p event is <c>NULL</c>, the copy is
-synchronous, i.e. the queue is synchronised before returning. If not
-<c>NULL</c>, \p event can be used after the call to wait for this
-particular copy to complete. This function returns <c>CL_SUCCESS</c>
-if the copy was successful, or a valid OpenCL error code otherwise.
-The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
-asynchronous launch was successful, or to 0 if \p event was
-<c>NULL</c>.
-
-\fn cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event)
-\ingroup API_OpenCL_Extensions
-Copy \p size bytes from byte offset \p src_offset of \p src on \p
-src_node to byte offset \p dst_offset of \p dst on \p dst_node. if \p
-event is <c>NULL</c>, the copy is synchronous, i.e. the queue is
-synchronised before returning. If not <c>NULL</c>, \p event can be
-used after the call to wait for this particular copy to complete. The
-function returns <c>-EAGAIN</c> if the asynchronous launch was
-successfull. It returns 0 if the synchronous copy was successful, or
-fails otherwise.
-
 */
 */

+ 2 - 956
doc/doxygen/chapters/api/openmp_runtime_support.doxy

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2014,2015,2017                           CNRS
+ * Copyright (C) 2014,2015,2017,2019                      CNRS
  * Copyright (C) 2014,2016                                Inria
  * Copyright (C) 2014,2016                                Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,201 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-/*! \defgroup API_OpenMP_Runtime_Support OpenMP Runtime Support
-
-\brief This section describes the interface provided for implementing OpenMP runtimes on top of StarPU.
-
-
-\struct starpu_omp_lock_t
-\ingroup API_OpenMP_Runtime_Support
-Opaque Simple Lock object (\ref SimpleLock) for inter-task synchronization operations.
-
-\sa starpu_omp_init_lock()
-\sa starpu_omp_destroy_lock()
-\sa starpu_omp_set_lock()
-\sa starpu_omp_unset_lock()
-\sa starpu_omp_test_lock()
-
-\var starpu_omp_lock_t::internal
-Is an opaque pointer for internal use.
-
-
-\struct starpu_omp_nest_lock_t
-\ingroup API_OpenMP_Runtime_Support
-Opaque Nestable Lock object (\ref NestableLock) for inter-task synchronization operations.
-
-\sa starpu_omp_init_nest_lock()
-\sa starpu_omp_destroy_nest_lock()
-\sa starpu_omp_set_nest_lock()
-\sa starpu_omp_unset_nest_lock()
-\sa starpu_omp_test_nest_lock()
-\var starpu_omp_nest_lock_t::internal
-Is an opaque pointer for internal use.
-
-
-\enum starpu_omp_sched_value
-\ingroup API_OpenMP_Runtime_Support
-Set of constants for selecting the for loop iteration scheduling algorithm
-(\ref OMPFor) as defined by the OpenMP specification.
-
-\var starpu_omp_sched_value::starpu_omp_sched_undefined
-\ingroup API_OpenMP_Runtime_Support
-Undefined iteration scheduling algorithm.
-
-\var starpu_omp_sched_value::starpu_omp_sched_static
-\ingroup API_OpenMP_Runtime_Support
-\b Static iteration scheduling algorithm.
-
-\var starpu_omp_sched_value::starpu_omp_sched_dynamic
-\ingroup API_OpenMP_Runtime_Support
-\b Dynamic iteration scheduling algorithm.
-
-\var starpu_omp_sched_value::starpu_omp_sched_guided
-\ingroup API_OpenMP_Runtime_Support
-\b Guided iteration scheduling algorithm.
-
-\var starpu_omp_sched_value::starpu_omp_sched_auto
-\ingroup API_OpenMP_Runtime_Support
-\b Automatically choosen iteration scheduling algorithm.
-
-\var starpu_omp_sched_value::starpu_omp_sched_runtime
-\ingroup API_OpenMP_Runtime_Support
-Choice of iteration scheduling algorithm deferred at \b runtime.
-
-\sa starpu_omp_for()
-\sa starpu_omp_for_inline_first()
-\sa starpu_omp_for_inline_next()
-\sa starpu_omp_for_alt()
-\sa starpu_omp_for_inline_first_alt()
-\sa starpu_omp_for_inline_next_alt()
-
-
-\enum starpu_omp_proc_bind_value
-\ingroup API_OpenMP_Runtime_Support
-Set of constants for selecting the processor binding method, as defined in the
-OpenMP specification.
-
-\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_undefined
-\ingroup API_OpenMP_Runtime_Support
-Undefined processor binding method.
-
-\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_false
-\ingroup API_OpenMP_Runtime_Support
-Team threads may be moved between places at any time.
-
-\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_true
-\ingroup API_OpenMP_Runtime_Support
-Team threads may not be moved between places.
-
-\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_master
-\ingroup API_OpenMP_Runtime_Support
-Assign every thread in the team to the same place as the \b master thread.
-
-\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_close
-\ingroup API_OpenMP_Runtime_Support
-Assign every thread in the team to a place \b close to the parent thread.
-
-\var starpu_omp_proc_bind_value::starpu_omp_proc_bind_spread
-\ingroup API_OpenMP_Runtime_Support
-Assign team threads as a sparse distribution over the selected places.
-
-\sa starpu_omp_get_proc_bind()
-
-
-\struct starpu_omp_parallel_region_attr
-\ingroup API_OpenMP_Runtime_Support
-Set of attributes used for creating a new parallel region.
-
-\sa starpu_omp_parallel_region()
-
-\var struct starpu_codelet starpu_omp_parallel_region_attr::cl
-
-Is a ::starpu_codelet (\ref API_Codelet_And_Tasks) to use for the parallel region
-implicit tasks. The codelet must provide a CPU implementation function.
-
-\var starpu_data_handle_t *starpu_omp_parallel_region_attr::handles
-
-Is an array of zero or more ::starpu_data_handle_t data handle to be passed to
-the parallel region implicit tasks.
-
-\var void *starpu_omp_parallel_region_attr::cl_arg
-
-Is an optional pointer to an inline argument to be passed to the region implicit tasks.
-
-\var size_t starpu_omp_parallel_region_attr::cl_arg_size
-
-Is the size of the optional inline argument to be passed to the region implicit tasks, or 0 if unused.
-
-\var unsigned starpu_omp_parallel_region_attr::cl_arg_free
-
-Is a boolean indicating whether the optional inline argument should be automatically freed (true), or not (false).
-
-\var int starpu_omp_parallel_region_attr::if_clause
-
-Is a boolean indicating whether the \b if clause of the corresponding <c>pragma
-omp parallel</c> is true or false.
-
-\var int starpu_omp_parallel_region_attr::num_threads
-
-Is an integer indicating the requested number of threads in the team of the
-newly created parallel region, or 0 to let the runtime choose the number of
-threads alone. This attribute may be ignored by the runtime system if the
-requested number of threads is higher than the number of threads that the
-runtime can create.
-
-\struct starpu_omp_task_region_attr
-\ingroup API_OpenMP_Runtime_Support
-Set of attributes used for creating a new task region.
-
-\sa starpu_omp_task_region()
-
-\var struct starpu_codelet starpu_omp_task_region_attr::cl
-
-Is a ::starpu_codelet (\ref API_Codelet_And_Tasks) to use for the task region
-explicit task. The codelet must provide a CPU implementation function or an
-accelerator implementation for offloaded target regions.
-
-\var starpu_data_handle_t *starpu_omp_task_region_attr::handles
-
-Is an array of zero or more ::starpu_data_handle_t data handle to be passed to
-the task region explicit tasks.
-
-\var void *starpu_omp_task_region_attr::cl_arg
-
-Is an optional pointer to an inline argument to be passed to the region implicit tasks.
-
-\var size_t starpu_omp_task_region_attr::cl_arg_size
-
-Is the size of the optional inline argument to be passed to the region implicit
-tasks, or 0 if unused.
-
-\var unsigned starpu_omp_task_region_attr::cl_arg_free
-
-Is a boolean indicating whether the optional inline argument should be
-automatically freed (true), or not (false).
-
-\var int starpu_omp_task_region_attr::if_clause
-
-Is a boolean indicating whether the \b if clause of the corresponding <c>pragma
-omp task</c> is true or false.
-
-\var int starpu_omp_task_region_attr::final_clause
-
-Is a boolean indicating whether the \b final clause of the corresponding <c>pragma
-omp task</c> is true or false.
-
-\var int starpu_omp_task_region_attr::untied_clause
-
-Is a boolean indicating whether the \b untied clause of the corresponding <c>pragma
-omp task</c> is true or false.
-
-\var int starpu_omp_task_region_attr::mergeable_clause
-
-Is a boolean indicating whether the \b mergeable clause of the corresponding <c>pragma
-omp task</c> is true or false.
-
-@name Initialisation
-\ingroup API_OpenMP_Runtime_Support
+/*! \ingroup API_OpenMP_Runtime_Support
 
 
 \def STARPU_OPENMP
 \def STARPU_OPENMP
 \ingroup API_OpenMP_Runtime_Support
 \ingroup API_OpenMP_Runtime_Support
@@ -217,764 +23,4 @@ This macro is defined when StarPU has been installed with OpenMP Runtime
 support. It should be used in your code to detect the availability of
 support. It should be used in your code to detect the availability of
 the runtime support for OpenMP.
 the runtime support for OpenMP.
 
 
-\fn int starpu_omp_init(void)
-\ingroup API_OpenMP_Runtime_Support
-Initializes StarPU and its OpenMP Runtime support.
-
-\fn void starpu_omp_shutdown(void)
-\ingroup API_OpenMP_Runtime_Support
-Shutdown StarPU and its OpenMP Runtime support.
-
-@name Parallel
-\anchor ORS_Parallel
-\ingroup API_OpenMP_Runtime_Support
-
-\fn void starpu_omp_parallel_region(const struct starpu_omp_parallel_region_attr *attr)
-\ingroup API_OpenMP_Runtime_Support
-Generates and launch an OpenMP parallel region and return after its
-completion. \p attr specifies the attributes for the generated parallel region.
-If this function is called from inside another, generating, parallel region, the
-generated parallel region is nested within the generating parallel region.
-
-This function can be used to implement <c>\#pragma omp parallel</c>.
-
-\fn void starpu_omp_master(void (*f)(void *arg), void *arg)
-\ingroup API_OpenMP_Runtime_Support
-Executes a function only on the master thread of the OpenMP
-parallel region it is called from. When called from a thread that is not the
-master of the parallel region it is called from, this function does nothing. \p
-f is the function to be called. \p arg is an argument passed to function \p f.
-
-This function can be used to implement <c>\#pragma omp master</c>.
-
-\fn int starpu_omp_master_inline(void)
-\ingroup API_OpenMP_Runtime_Support
-Determines whether the calling thread is the master of the OpenMP parallel region
-it is called from or not.
-
-This function can be used to implement <c>\#pragma omp master</c> without code
-outlining.
-\return <c>!0</c> if called by the region's master thread.
-\return <c>0</c> if not called by the region's master thread.
-
-@name Synchronization
-\anchor ORS_Synchronization
-\ingroup API_OpenMP_Runtime_Support
-
-\fn void starpu_omp_barrier(void)
-\ingroup API_OpenMP_Runtime_Support
-Waits until each participating thread of the innermost OpenMP parallel region
-has reached the barrier and each explicit OpenMP task bound to this region has
-completed its execution.
-
-This function can be used to implement <c>\#pragma omp barrier</c>.
-
-\fn void starpu_omp_critical(void (*f)(void *arg), void *arg, const char *name)
-\ingroup API_OpenMP_Runtime_Support
-Waits until no other thread is executing within the context of the selected
-critical section, then proceeds to the exclusive execution of a function within
-the critical section. \p f is the function to be executed in the critical
-section. \p arg is an argument passed to function \p f. \p name is the name of
-the selected critical section. If <c>name == NULL</c>, the selected critical
-section is the unique anonymous critical section.
-
-This function can be used to implement <c>\#pragma omp critical</c>.
-
-\fn void starpu_omp_critical_inline_begin(const char *name)
-\ingroup API_OpenMP_Runtime_Support
-Waits until execution can proceed exclusively within the context of the
-selected critical section. \p name is the name of the selected critical
-section. If <c>name == NULL</c>, the selected critical section is the unique
-anonymous critical section.
-
-This function together with #starpu_omp_critical_inline_end can be used to
-implement <c>\#pragma omp critical</c> without code outlining.
-
-\fn void starpu_omp_critical_inline_end(const char *name)
-\ingroup API_OpenMP_Runtime_Support
-Ends the exclusive execution within the context of the selected critical
-section. \p name is the name of the selected critical section. If
-<c>name==NULL</c>, the selected critical section is the unique anonymous
-critical section.
-
-This function together with #starpu_omp_critical_inline_begin can be used to
-implement <c>\#pragma omp critical</c> without code outlining.
-
-@name Worksharing
-\anchor ORS_Worksharing
-\ingroup API_OpenMP_Runtime_Support
-
-\fn void starpu_omp_single(void (*f)(void *arg), void *arg, int nowait)
-\ingroup API_OpenMP_Runtime_Support
-Ensures that a single participating thread of the innermost OpenMP parallel
-region executes a function. \p f is the function to be executed by a single
-thread. \p arg is an argument passed to function \p f. \p nowait is a flag
-indicating whether an implicit barrier is requested after the single section
-(<c>nowait==0</c>) or not (<c>nowait==!0</c>).
-
-This function can be used to implement <c>\#pragma omp single</c>.
-
-\fn int starpu_omp_single_inline(void)
-\ingroup API_OpenMP_Runtime_Support
-Decides whether the current thread is elected to run the following single
-section among the participating threads of the innermost OpenMP parallel
-region.
-
-This function can be used to implement <c>\#pragma omp single</c> without code
-outlining.
-\return <c>!0</c> if the calling thread has won the election.
-\return <c>0</c> if the calling thread has lost the election.
-
-\fn void starpu_omp_single_copyprivate(void (*f)(void *arg, void *data, unsigned long long data_size), void *arg, void *data, unsigned long long data_size)
-\ingroup API_OpenMP_Runtime_Support
-This function executes \p f on a single task of the current parallel region
-task, and then broadcast the contents of the memory block pointed by the
-copyprivate pointer \p data and of size \p data_size to the corresponding \p
-data pointed memory blocks of all the other participating region tasks. This
-function can be used to implement <c>\#pragma omp single</c> with a copyprivate
-clause.
-
-\sa starpu_omp_single_copyprivate_inline
-\sa starpu_omp_single_copyprivate_inline_begin
-\sa starpu_omp_single_copyprivate_inline_end
-
-\fn void *starpu_omp_single_copyprivate_inline_begin(void *data)
-\ingroup API_OpenMP_Runtime_Support
-This function elects one task among the tasks of the current parallel region
-task to execute the following single section, and then broadcast the
-copyprivate pointer \p data to all the other participating region tasks. This
-function can be used to implement <c>\#pragma omp single</c> with a copyprivate
-clause without code outlining.
-
-\sa starpu_omp_single_copyprivate_inline
-\sa starpu_omp_single_copyprivate_inline_end
-
-\fn void starpu_omp_single_copyprivate_inline_end(void)
-\ingroup API_OpenMP_Runtime_Support
-This function completes the execution of a single section and returns the
-broadcasted copyprivate pointer for tasks that lost the election and <c>NULL</c> for
-the task that won the election. This function can be used to implement
-<c>\#pragma omp single</c> with a copyprivate clause without code outlining.
-
-\return the copyprivate pointer for tasks that lost the election and therefore did not execute the code of the single section.
-\return <c>NULL</c> for the task that won the election and executed the code of the single section.
-
-\sa starpu_omp_single_copyprivate_inline
-\sa starpu_omp_single_copyprivate_inline_begin
-
-\fn void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait)
-\ingroup API_OpenMP_Runtime_Support
-Executes a parallel loop together with the other threads participating to the
-innermost parallel region. \p f is the function to be executed iteratively. \p
-arg is an argument passed to function \p f. \p nb_iterations is the number of
-iterations to be performed by the parallel loop. \p chunk is the number of
-consecutive iterations that should be affected to the same thread when
-scheduling the loop workshares, it follows the semantics of the \c modifier
-argument in OpenMP <c>\#pragma omp for</c> specification. \p schedule is the
-scheduling mode according to the OpenMP specification. \p ordered is a flag
-indicating whether the loop region may contain an ordered section
-(<c>ordered==!0</c>) or not (<c>ordered==0</c>). \p nowait is a flag
-indicating whether an implicit barrier is requested after the for section
-(<c>nowait==0</c>) or not (<c>nowait==!0</c>).
-
-The function \p f will be called with arguments \p _first_i, the first iteration
-to perform, \p _nb_i, the number of consecutive iterations to perform before
-returning, \p arg, the free \p arg argument.
-
-This function can be used to implement <c>\#pragma omp for</c>.
-
-\fn int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)
-\ingroup API_OpenMP_Runtime_Support
-Decides whether the current thread should start to execute a parallel loop
-section. See #starpu_omp_for for the argument description.
-
-This function together with #starpu_omp_for_inline_next can be used to
-implement <c>\#pragma omp for</c> without code outlining.
-
-\return <c>!0</c> if the calling thread participates to the loop region and
-should execute a first chunk of iterations. In that case, \p *_first_i will be
-set to the first iteration of the chunk to perform and \p *_nb_i will be set to
-the number of iterations of the chunk to perform.
-
-\return <c>0</c> if the calling thread does not participate to the loop region
-because all the available iterations have been affected to the other threads of
-the parallel region.
-
-\sa starpu_omp_for
-
-\fn int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)
-\ingroup API_OpenMP_Runtime_Support
-Decides whether the current thread should continue to execute a parallel loop
-section. See #starpu_omp_for for the argument description.
-
-This function together with #starpu_omp_for_inline_first can be used to
-implement <c>\#pragma omp for</c> without code outlining.
-
-\return <c>!0</c> if the calling thread should execute a next chunk of
-iterations. In that case, \p *_first_i will be set to the first iteration of the
-chunk to perform and \p *_nb_i will be set to the number of iterations of the
-chunk to perform.
-
-\return <c>0</c> if the calling thread does not participate anymore to the loop
-region because all the available iterations have been affected to the other
-threads of the parallel region.
-
-\sa starpu_omp_for
-
-\fn void starpu_omp_for_alt(void (*f)(unsigned long long _begin_i, unsigned long long _end_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait)
-\ingroup API_OpenMP_Runtime_Support
-Alternative implementation of a parallel loop. This function differs from
-#starpu_omp_for in the expected arguments of the loop function \c f.
-
-The function \p f will be called with arguments \p _begin_i, the first iteration
-to perform, \p _end_i, the first iteration not to perform before
-returning, \p arg, the free \p arg argument.
-
-This function can be used to implement <c>\#pragma omp for</c>.
-
-\sa starpu_omp_for
-
-\fn int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i)
-\ingroup API_OpenMP_Runtime_Support
-Inline version of the alternative implementation of a parallel loop.
-
-This function together with #starpu_omp_for_inline_next_alt can be used to
-implement <c>\#pragma omp for</c> without code outlining.
-
-\sa starpu_omp_for
-\sa starpu_omp_for_alt
-\sa starpu_omp_for_inline_first
-
-\fn int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i)
-\ingroup API_OpenMP_Runtime_Support
-Inline version of the alternative implementation of a parallel loop.
-
-This function together with #starpu_omp_for_inline_first_alt can be used to
-implement <c>\#pragma omp for</c> without code outlining.
-
-\sa starpu_omp_for
-\sa starpu_omp_for_alt
-\sa starpu_omp_for_inline_next
-
-\fn void starpu_omp_ordered(void (*f)(void *arg), void *arg)
-\ingroup API_OpenMP_Runtime_Support
-Ensures that a function is sequentially executed once for each iteration in
-order within a parallel loop, by the thread that own the iteration. \p f is the
-function to be executed by the thread that own the current iteration. \p arg is
-an argument passed to function \p f.
-
-This function can be used to implement <c>\#pragma omp ordered</c>.
-
-\fn void starpu_omp_ordered_inline_begin(void)
-\ingroup API_OpenMP_Runtime_Support
-Waits until all the iterations of a parallel loop below the iteration owned by
-the current thread have been executed.
-
-This function together with #starpu_omp_ordered_inline_end can be used to
-implement <c>\#pragma omp ordered</c> without code code outlining.
-
-\fn void starpu_omp_ordered_inline_end(void)
-\ingroup API_OpenMP_Runtime_Support
-Notifies that the ordered section for the current iteration has been completed.
-
-This function together with #starpu_omp_ordered_inline_begin can be used to
-implement <c>\#pragma omp ordered</c> without code code outlining.
-
-\fn void starpu_omp_sections(unsigned long long nb_sections, void (**section_f)(void *arg), void **section_arg, int nowait)
-\ingroup API_OpenMP_Runtime_Support
-Ensures that each function of a given array of functions is executed by one and
-only one thread. \p nb_sections is the number of functions in the array \p
-section_f. \p section_f is the array of functions to be executed as sections. \p
-section_arg is an array of arguments to be passed to the corresponding function.
-\p nowait is a flag indicating whether an implicit barrier is requested after
-the execution of all the sections (<c>nowait==0</c>) or not (<c>nowait==!0</c>).
-
-This function can be used to implement <c>\#pragma omp sections</c> and <c>\#pragma omp section</c>.
-
-\fn void starpu_omp_sections_combined(unsigned long long nb_sections, void (*section_f)(unsigned long long section_num, void *arg), void *section_arg, int nowait)
-\ingroup API_OpenMP_Runtime_Support
-Alternative implementation of sections. This function differs from
-#starpu_omp_sections in that all the sections are combined within a single
-function in this version. \p section_f is the function implementing the combined
-sections.
-
-The function \p section_f will be called with arguments \p section_num, the
-section number to be executed, \p arg, the entry of \p section_arg corresponding
-to this section.
-
-This function can be used to implement <c>\#pragma omp sections</c> and <c>\#pragma omp section</c>.
-
-\sa starpu_omp_sections
-
-@name Task
-\anchor ORS_Task
-\ingroup API_OpenMP_Runtime_Support
-
-\fn void starpu_omp_task_region(const struct starpu_omp_task_region_attr *attr)
-\ingroup API_OpenMP_Runtime_Support
-Generates an explicit child task. The execution of the generated task is
-asynchronous with respect to the calling code unless specified otherwise.
-\p attr specifies the attributes for the generated task region.
-
-This function can be used to implement <c>\#pragma omp task</c>.
-
-\fn void starpu_omp_taskwait(void)
-\ingroup API_OpenMP_Runtime_Support
-Waits for the completion of the tasks generated by the current task. This
-function does not wait for the descendants of the tasks generated by the current
-task.
-
-This function can be used to implement <c>\#pragma omp taskwait</c>.
-
-\fn void starpu_omp_taskgroup(void (*f)(void *arg), void *arg)
-\ingroup API_OpenMP_Runtime_Support
-Launches a function and wait for the completion of every descendant task
-generated during the execution of the function.
-
-This function can be used to implement <c>\#pragma omp taskgroup</c>.
-
-\sa starpu_omp_taskgroup_inline_begin
-\sa starpu_omp_taskgroup_inline_end
-
-\fn void starpu_omp_taskgroup_inline_begin(void)
-\ingroup API_OpenMP_Runtime_Support
-Launches a function and gets ready to wait for the completion of every descendant task
-generated during the dynamic scope of the taskgroup.
-
-This function can be used to implement <c>\#pragma omp taskgroup</c> without code outlining.
-
-\sa starpu_omp_taskgroup
-\sa starpu_omp_taskgroup_inline_end
-
-\fn void starpu_omp_taskgroup_inline_end(void)
-\ingroup API_OpenMP_Runtime_Support
-Waits for the completion of every descendant task
-generated during the dynamic scope of the taskgroup.
-
-This function can be used to implement <c>\#pragma omp taskgroup</c> without code outlining.
-
-\sa starpu_omp_taskgroup
-\sa starpu_omp_taskgroup_inline_begin
-
-
-@name API
-\anchor ORS_API
-\ingroup API_OpenMP_Runtime_Support
-
-\fn void starpu_omp_set_num_threads(int threads)
-\ingroup API_OpenMP_Runtime_Support
-This function sets ICVS nthreads_var for the parallel regions to be created
-with the current region.
-
-Note: The StarPU OpenMP runtime support currently ignores
-this setting for nested parallel regions.
-
-\sa starpu_omp_get_num_threads
-\sa starpu_omp_get_thread_num
-\sa starpu_omp_get_max_threads
-\sa starpu_omp_get_num_procs
-
-\fn int starpu_omp_get_num_threads()
-\ingroup API_OpenMP_Runtime_Support
-This function returns the number of threads of the current region.
-
-\return the number of threads of the current region.
-
-\sa starpu_omp_set_num_threads
-\sa starpu_omp_get_thread_num
-\sa starpu_omp_get_max_threads
-\sa starpu_omp_get_num_procs
-
-\fn int starpu_omp_get_thread_num()
-\ingroup API_OpenMP_Runtime_Support
-This function returns the rank of the current thread among the threads
-of the current region.
-
-\return the rank of the current thread in the current region.
-
-\sa starpu_omp_set_num_threads
-\sa starpu_omp_get_num_threads
-\sa starpu_omp_get_max_threads
-\sa starpu_omp_get_num_procs
-
-\fn int starpu_omp_get_max_threads()
-\ingroup API_OpenMP_Runtime_Support
-This function returns the maximum number of threads that can be used to
-create a region from the current region.
-
-\return the maximum number of threads that can be used to create a region from the current region.
-
-\sa starpu_omp_set_num_threads
-\sa starpu_omp_get_num_threads
-\sa starpu_omp_get_thread_num
-\sa starpu_omp_get_num_procs
-
-\fn int starpu_omp_get_num_procs(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the number of StarPU CPU workers.
-
-\return the number of StarPU CPU workers.
-
-\sa starpu_omp_set_num_threads
-\sa starpu_omp_get_num_threads
-\sa starpu_omp_get_thread_num
-\sa starpu_omp_get_max_threads
-
-\fn int starpu_omp_in_parallel(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns whether it is called from the scope of a parallel region or not.
-
-\return <c>!0</c> if called from a parallel region scope.
-\return <c>0</c> otherwise.
-
-\fn void starpu_omp_set_dynamic(int dynamic_threads)
-\ingroup API_OpenMP_Runtime_Support
-This function enables (1) or disables (0) dynamically adjusting the number of parallel threads.
-
-Note: The StarPU OpenMP runtime support currently ignores the argument of this function.
-
-\sa starpu_omp_get_dynamic
-
-\fn int starpu_omp_get_dynamic(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the state of dynamic thread number adjustment.
-
-\return <c>!0</c> if dynamic thread number adjustment is enabled.
-\return <c>0</c> otherwise.
-
-\sa starpu_omp_set_dynamic
-
-\fn void starpu_omp_set_nested(int nested)
-\ingroup API_OpenMP_Runtime_Support
-This function enables (1) or disables (0) nested parallel regions.
-
-Note: The StarPU OpenMP runtime support currently ignores the argument of this function.
-
-\sa starpu_omp_get_nested
-\sa starpu_omp_get_max_active_levels
-\sa starpu_omp_set_max_active_levels
-\sa starpu_omp_get_level
-\sa starpu_omp_get_active_level
-
-\fn int starpu_omp_get_nested(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns whether nested parallel sections are enabled or not.
-
-\return <c>!0</c> if nested parallel sections are enabled.
-\return <c>0</c> otherwise.
-
-\sa starpu_omp_set_nested
-\sa starpu_omp_get_max_active_levels
-\sa starpu_omp_set_max_active_levels
-\sa starpu_omp_get_level
-\sa starpu_omp_get_active_level
-
-\fn int starpu_omp_get_cancellation(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the state of the cancel ICVS var.
-
-\fn void starpu_omp_set_schedule(enum starpu_omp_sched_value kind, int modifier)
-\ingroup API_OpenMP_Runtime_Support
-This function sets the default scheduling kind for upcoming loops within the
-current parallel section. \p kind is the scheduler kind, \p modifier
-complements the scheduler kind with informations such as the chunk size,
-in accordance with the OpenMP specification.
-
-\sa starpu_omp_get_schedule
-
-\fn void starpu_omp_get_schedule(enum starpu_omp_sched_value *kind, int *modifier)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the current selected default loop scheduler.
-
-\return the kind and the modifier of the current default loop scheduler.
-
-\sa starpu_omp_set_schedule
-
-\fn int starpu_omp_get_thread_limit(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the number of StarPU CPU workers.
-
-\return the number of StarPU CPU workers.
-
-\fn void starpu_omp_set_max_active_levels(int max_levels)
-\ingroup API_OpenMP_Runtime_Support
-This function sets the maximum number of allowed active parallel section levels.
-
-Note: The StarPU OpenMP runtime support currently ignores the argument of this function and assume \p max_levels equals <c>1</c> instead.
-
-\sa starpu_omp_set_nested
-\sa starpu_omp_get_nested
-\sa starpu_omp_get_max_active_levels
-\sa starpu_omp_get_level
-\sa starpu_omp_get_active_level
-
-\fn int starpu_omp_get_max_active_levels(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the current maximum number of allowed active parallel section levels
-
-\return the current maximum number of allowed active parallel section levels.
-
-\sa starpu_omp_set_nested
-\sa starpu_omp_get_nested
-\sa starpu_omp_set_max_active_levels
-\sa starpu_omp_get_level
-\sa starpu_omp_get_active_level
-
-\fn int starpu_omp_get_level(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the nesting level of the current parallel section.
-
-\return the nesting level of the current parallel section.
-
-\sa starpu_omp_set_nested
-\sa starpu_omp_get_nested
-\sa starpu_omp_get_max_active_levels
-\sa starpu_omp_set_max_active_levels
-\sa starpu_omp_get_active_level
-
-\fn int starpu_omp_get_ancestor_thread_num(int level)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the number of the ancestor of the current parallel section.
-
-\return the number of the ancestor of the current parallel section.
-
-\fn int starpu_omp_get_team_size(int level)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the size of the team of the current parallel section.
-
-\return the size of the team of the current parallel section.
-
-\fn int starpu_omp_get_active_level(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the nestinglevel of the current innermost active parallel section.
-
-\return the nestinglevel of the current innermost active parallel section.
-
-\sa starpu_omp_set_nested
-\sa starpu_omp_get_nested
-\sa starpu_omp_get_max_active_levels
-\sa starpu_omp_set_max_active_levels
-\sa starpu_omp_get_level
-
-\fn int starpu_omp_in_final(void)
-\ingroup API_OpenMP_Runtime_Support
-This function checks whether the current task is final or not.
-
-\return <c>!0</c> if called from a final task.
-\return <c>0</c> otherwise.
-
-\fn enum starpu_omp_proc_bind_value starpu_omp_get_proc_bind(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the proc_bind setting of the current parallel region.
-
-\return the proc_bind setting of the current parallel region.
-
-\fn void starpu_omp_set_default_device(int device_num)
-\ingroup API_OpenMP_Runtime_Support
-This function sets the number of the device to use as default.
-
-Note: The StarPU OpenMP runtime support currently ignores the argument of this function.
-
-\sa starpu_omp_get_default_device
-\sa starpu_omp_is_initial_device
-
-\fn int starpu_omp_get_default_device(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the number of the device used as default.
-
-\return the number of the device used as default.
-
-\sa starpu_omp_set_default_device
-\sa starpu_omp_is_initial_device
-
-\fn int starpu_omp_get_num_devices(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the number of the devices.
-
-\return the number of the devices.
-
-\fn int starpu_omp_get_num_teams(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the number of teams in the current teams region.
-
-\return the number of teams in the current teams region.
-
-\sa starpu_omp_get_num_teams
-
-\fn int starpu_omp_get_team_num(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the team number of the calling thread.
-
-\return the team number of the calling thread.
-
-\sa starpu_omp_get_num_teams
-
-\fn int starpu_omp_is_initial_device(void)
-\ingroup API_OpenMP_Runtime_Support
-This function checks whether the current device is the initial device or not.
-
-\fn int starpu_omp_get_max_task_priority
-\ingroup API_OpenMP_Runtime_Support
-The omp_get_max_task_priority routine returns the maximum value that can be
-specified in the priority clause.
-
-\return <c>!0</c> if called from the host device.
-\return <c>0</c> otherwise.
-
-\sa starpu_omp_set_default_device
-\sa starpu_omp_get_default_device
-
-\fn void starpu_omp_init_lock(starpu_omp_lock_t *lock)
-\ingroup API_OpenMP_Runtime_Support
-This function initializes an opaque lock object.
-
-\sa starpu_omp_destroy_lock
-\sa starpu_omp_set_lock
-\sa starpu_omp_unset_lock
-\sa starpu_omp_test_lock
-
-\fn void starpu_omp_destroy_lock(starpu_omp_lock_t *lock)
-\ingroup API_OpenMP_Runtime_Support
-This function destroys an opaque lock object.
-
-\sa starpu_omp_init_lock
-\sa starpu_omp_set_lock
-\sa starpu_omp_unset_lock
-\sa starpu_omp_test_lock
-
-\fn void starpu_omp_set_lock(starpu_omp_lock_t *lock)
-\ingroup API_OpenMP_Runtime_Support
-This function locks an opaque lock object. If the lock is already locked, the
-function will block until it succeeds in exclusively acquiring the lock.
-
-\sa starpu_omp_init_lock
-\sa starpu_omp_destroy_lock
-\sa starpu_omp_unset_lock
-\sa starpu_omp_test_lock
-
-\fn void starpu_omp_unset_lock(starpu_omp_lock_t *lock)
-\ingroup API_OpenMP_Runtime_Support
-This function unlocks a previously locked lock object. The behaviour of this
-function is unspecified if it is called on an unlocked lock object.
-
-\sa starpu_omp_init_lock
-\sa starpu_omp_destroy_lock
-\sa starpu_omp_set_lock
-\sa starpu_omp_test_lock
-
-\fn int starpu_omp_test_lock(starpu_omp_lock_t *lock)
-\ingroup API_OpenMP_Runtime_Support
-This function unblockingly attempts to lock a lock object and returns whether
-it succeeded or not.
-
-\return <c>!0</c> if the function succeeded in acquiring the lock.
-\return <c>0</c> if the lock was already locked.
-
-\sa starpu_omp_init_lock
-\sa starpu_omp_destroy_lock
-\sa starpu_omp_set_lock
-\sa starpu_omp_unset_lock
-
-\fn void starpu_omp_init_nest_lock(starpu_omp_nest_lock_t *lock)
-\ingroup API_OpenMP_Runtime_Support
-This function initializes an opaque lock object supporting nested locking operations.
-
-\sa starpu_omp_destroy_nest_lock
-\sa starpu_omp_set_nest_lock
-\sa starpu_omp_unset_nest_lock
-\sa starpu_omp_test_nest_lock
-
-\fn void starpu_omp_destroy_nest_lock(starpu_omp_nest_lock_t *lock)
-\ingroup API_OpenMP_Runtime_Support
-This function destroys an opaque lock object supporting nested locking operations.
-
-\sa starpu_omp_init_nest_lock
-\sa starpu_omp_set_nest_lock
-\sa starpu_omp_unset_nest_lock
-\sa starpu_omp_test_nest_lock
-
-\fn void starpu_omp_set_nest_lock(starpu_omp_nest_lock_t *lock)
-\ingroup API_OpenMP_Runtime_Support
-This function locks an opaque lock object supporting nested locking operations.
-If the lock is already locked by another task, the function will block until
-it succeeds in exclusively acquiring the lock. If the lock is already taken by
-the current task, the function will increase the nested locking level of the
-lock object.
-
-\sa starpu_omp_init_nest_lock
-\sa starpu_omp_destroy_nest_lock
-\sa starpu_omp_unset_nest_lock
-\sa starpu_omp_test_nest_lock
-
-\fn void starpu_omp_unset_nest_lock(starpu_omp_nest_lock_t *lock)
-\ingroup API_OpenMP_Runtime_Support
-This function unlocks a previously locked lock object supporting nested locking
-operations. If the lock has been locked multiple times in nested fashion, the
-nested locking level is decreased and the lock remains locked. Otherwise, if
-the lock has only been locked once, it becomes unlocked. The behaviour of this
-function is unspecified if it is called on an unlocked lock object. The
-behaviour of this function is unspecified if it is called from a different task
-than the one that locked the lock object.
-
-\sa starpu_omp_init_nest_lock
-\sa starpu_omp_destroy_nest_lock
-\sa starpu_omp_set_nest_lock
-\sa starpu_omp_test_nest_lock
-
-\fn int starpu_omp_test_nest_lock(starpu_omp_nest_lock_t *lock)
-\ingroup API_OpenMP_Runtime_Support
-This function unblocking attempts to lock an opaque lock object supporting
-nested locking operations and returns whether it succeeded or not. If the lock
-is already locked by another task, the function will return without having
-acquired the lock. If the lock is already taken by the current task, the
-function will increase the nested locking level of the lock object.
-
-\return <c>!0</c> if the function succeeded in acquiring the lock.
-\return <c>0</c> if the lock was already locked.
-
-\sa starpu_omp_init_nest_lock
-\sa starpu_omp_destroy_nest_lock
-\sa starpu_omp_set_nest_lock
-\sa starpu_omp_unset_nest_lock
-
-\fn void starpu_omp_atomic_fallback_inline_begin(void)
-\ingroup API_OpenMP_Runtime_Support
-This function implements the entry point of a fallback global atomic region. It
-blocks until it succeeds in acquiring exclusive access to the global atomic
-region.
-
-\sa starpu_omp_atomic_fallback_inline_end
-
-\fn void starpu_omp_atomic_fallback_inline_end(void)
-\ingroup API_OpenMP_Runtime_Support
-This function implements the exit point of a fallback global atomic region. It
-release the exclusive access to the global atomic region.
-
-\sa starpu_omp_atomic_fallback_inline_begin
-
-\fn double starpu_omp_get_wtime(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the elapsed wallclock time in seconds.
-
-\return the elapsed wallclock time in seconds.
-
-\sa starpu_omp_get_wtick
-
-\fn double starpu_omp_get_wtick(void)
-\ingroup API_OpenMP_Runtime_Support
-This function returns the precision of the time used by \p starpu_omp_get_wtime.
-
-\return the precision of the time used by \p starpu_omp_get_wtime.
-
-\sa starpu_omp_get_wtime
-
-\fn void starpu_omp_vector_annotate(starpu_data_handle_t handle, uint32_t slice_base)
-\ingroup API_OpenMP_Runtime_Support
-This function enables setting additional vector metadata needed by the OpenMP Runtime Support.
-
-\p handle is vector data handle.
-\p slice_base is the base of an array slice, expressed in number of vector elements from the array base.
-
-\sa STARPU_VECTOR_GET_SLICE_BASE
-
 */
 */

+ 0 - 59
doc/doxygen/chapters/api/running_driver.doxy

@@ -1,59 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Running_Drivers Running Drivers
-
-\struct starpu_driver
-structure for a driver
-\ingroup API_Running_Drivers
-\var enum starpu_worker_archtype starpu_driver::type
-    Type of the driver. Only ::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER
-    and ::STARPU_OPENCL_WORKER are currently supported.
-\var union starpu_driver::id
-    Identifier of the driver.
-
-\fn int starpu_driver_run(struct starpu_driver *d)
-\ingroup API_Running_Drivers
-Initialize the given driver, run it until it receives a request to
-terminate, deinitialize it and return 0 on success. Return
-<c>-EINVAL</c> if starpu_driver::type is not a valid StarPU device type
-(::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER or ::STARPU_OPENCL_WORKER).
-
-This is the same as using the following functions: calling
-starpu_driver_init(), then calling starpu_driver_run_once() in a loop,
-and finally starpu_driver_deinit().
-
-\fn int starpu_driver_init(struct starpu_driver *d)
-\ingroup API_Running_Drivers
-Initialize the given driver. Return 0 on success, <c>-EINVAL</c>
-if starpu_driver::type is not a valid ::starpu_worker_archtype.
-
-\fn int starpu_driver_run_once(struct starpu_driver *d)
-\ingroup API_Running_Drivers
-Run the driver once, then return 0 on success, <c>-EINVAL</c> if starpu_driver::type is not a valid ::starpu_worker_archtype.
-
-\fn int starpu_driver_deinit(struct starpu_driver *d)
-\ingroup API_Running_Drivers
-Deinitialize the given driver. Return 0 on success, <c>-EINVAL</c> if
-starpu_driver::type is not a valid ::starpu_worker_archtype.
-
-\fn void starpu_drivers_request_termination(void)
-\ingroup API_Running_Drivers
-Notify all running drivers that they should terminate.
-
-*/

+ 0 - 2
doc/doxygen/refman.tex

@@ -232,11 +232,9 @@ Documentation License”.
 \input{group__API__Data__Interfaces}
 \input{group__API__Data__Interfaces}
 \input{group__API__Data__Partition}
 \input{group__API__Data__Partition}
 \input{group__API__Out__Of__Core}
 \input{group__API__Out__Of__Core}
-\input{group__API__Multiformat__Data__Interface}
 \input{group__API__Codelet__And__Tasks}
 \input{group__API__Codelet__And__Tasks}
 \input{group__API__Insert__Task}
 \input{group__API__Insert__Task}
 \input{group__API__Explicit__Dependencies}
 \input{group__API__Explicit__Dependencies}
-\input{group__API__Implicit__Data__Dependencies}
 \input{group__API__Performance__Model}
 \input{group__API__Performance__Model}
 \input{group__API__Profiling}
 \input{group__API__Profiling}
 \input{group__API__Theoretical__Lower__Bound__on__Execution__Time}
 \input{group__API__Theoretical__Lower__Bound__on__Execution__Time}

+ 24 - 1
include/starpu_bitmap.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2013-2015,2017                           CNRS
+ * Copyright (C) 2013-2015,2017,2019                      CNRS
  * Copyright (C) 2013,2016                                Université de Bordeaux
  * Copyright (C) 2013,2016                                Université de Bordeaux
  * Copyright (C) 2013                                     Simon Archipoff
  * Copyright (C) 2013                                     Simon Archipoff
  *
  *
@@ -19,31 +19,54 @@
 #ifndef __STARPU_BITMAP_H__
 #ifndef __STARPU_BITMAP_H__
 #define __STARPU_BITMAP_H__
 #define __STARPU_BITMAP_H__
 
 
+/** @defgroup API_Bitmap Bitmap
+
+    @brief This is the interface for the bitmap utilities provided by StarPU.
+
+    @{
+ */
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 extern "C"
 extern "C"
 {
 {
 #endif
 #endif
 
 
+/** create a empty starpu_bitmap */
 struct starpu_bitmap *starpu_bitmap_create(void) STARPU_ATTRIBUTE_MALLOC;
 struct starpu_bitmap *starpu_bitmap_create(void) STARPU_ATTRIBUTE_MALLOC;
+/** free \b */
 void starpu_bitmap_destroy(struct starpu_bitmap *b);
 void starpu_bitmap_destroy(struct starpu_bitmap *b);
 
 
+/** set bit \p e in \p b */
 void starpu_bitmap_set(struct starpu_bitmap *b, int e);
 void starpu_bitmap_set(struct starpu_bitmap *b, int e);
+/** unset bit \p e in \p b */
 void starpu_bitmap_unset(struct starpu_bitmap *b, int e);
 void starpu_bitmap_unset(struct starpu_bitmap *b, int e);
+/** unset all bits in \p b */
 void starpu_bitmap_unset_all(struct starpu_bitmap *b);
 void starpu_bitmap_unset_all(struct starpu_bitmap *b);
 
 
+/** return true iff bit \p e is set in \p b */
 int starpu_bitmap_get(struct starpu_bitmap *b, int e);
 int starpu_bitmap_get(struct starpu_bitmap *b, int e);
+/** Basically compute \c starpu_bitmap_unset_all(\p a) ; \p a = \p b & \p c; */
 void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c);
 void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c);
+/** Basically compute \p a |= \p b */
 void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b);
 void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b);
+/** return 1 iff \p e is set in \p b1 AND \p e is set in \p b2 */
 int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e);
 int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e);
+/** return the number of set bits in \p b */
 int starpu_bitmap_cardinal(struct starpu_bitmap *b);
 int starpu_bitmap_cardinal(struct starpu_bitmap *b);
 
 
+/** return the index of the first set bit of \p b, -1 if none */
 int starpu_bitmap_first(struct starpu_bitmap *b);
 int starpu_bitmap_first(struct starpu_bitmap *b);
+/** return the position of the last set bit of \p b, -1 if none */
 int starpu_bitmap_last(struct starpu_bitmap *b);
 int starpu_bitmap_last(struct starpu_bitmap *b);
+/** return the position of set bit right after \p e in \p b, -1 if none */
 int starpu_bitmap_next(struct starpu_bitmap *b, int e);
 int starpu_bitmap_next(struct starpu_bitmap *b, int e);
+/** todo */
 int starpu_bitmap_has_next(struct starpu_bitmap *b, int e);
 int starpu_bitmap_has_next(struct starpu_bitmap *b, int e);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif
 #endif

+ 30 - 1
include/starpu_bound.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011,2013,2017                           CNRS
+ * Copyright (C) 2011,2013,2017,2019                      CNRS
  * Copyright (C) 2010,2011,2014                           Université de Bordeaux
  * Copyright (C) 2010,2011,2014                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -18,6 +18,13 @@
 #ifndef __STARPU_BOUND_H__
 #ifndef __STARPU_BOUND_H__
 #define __STARPU_BOUND_H__
 #define __STARPU_BOUND_H__
 
 
+/** @defgroup API_Theoretical_Lower_Bound_on_Execution_Time Theoretical Lower Bound on Execution Time
+
+    @brief Compute theoretical upper computation efficiency bound corresponding to some actual execution.
+
+    @{
+ */
+
 #include <stdio.h>
 #include <stdio.h>
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
@@ -25,19 +32,41 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
+/** Start recording tasks (resets stats). \p deps tells whether dependencies should be recorded too (this is quite expensive) */
 void starpu_bound_start(int deps, int prio);
 void starpu_bound_start(int deps, int prio);
+/** Stop recording tasks */
 void starpu_bound_stop(void);
 void starpu_bound_stop(void);
 
 
+/** Emit the DAG that was recorded on \p output. */
 void starpu_bound_print_dot(FILE *output);
 void starpu_bound_print_dot(FILE *output);
 
 
+/** Get theoretical upper bound (in ms) (needs glpk support
+    detected by configure script). It returns 0 if some performance models
+    are not calibrated.
+*/
 void starpu_bound_compute(double *res, double *integer_res, int integer);
 void starpu_bound_compute(double *res, double *integer_res, int integer);
 
 
+/** Emit the Linear Programming system on \p output for the recorded
+    tasks, in the lp format
+*/
 void starpu_bound_print_lp(FILE *output);
 void starpu_bound_print_lp(FILE *output);
+
+/** Emit the Linear Programming system on \p output for the recorded
+    tasks, in the mps format
+*/
 void starpu_bound_print_mps(FILE *output);
 void starpu_bound_print_mps(FILE *output);
+
+/** Emit on \p output the statistics of actual execution vs theoretical upper bound.
+    \p integer permits to choose between integer solving (which takes a
+    long time but is correct), and relaxed solving (which provides an
+    approximate solution).
+*/
 void starpu_bound_print(FILE *output, int integer);
 void starpu_bound_print(FILE *output, int integer);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_BOUND_H__ */
 #endif /* __STARPU_BOUND_H__ */

+ 15 - 5
include/starpu_clusters.h

@@ -19,6 +19,11 @@
 #ifndef __STARPU_CLUSTERS_UTIL_H__
 #ifndef __STARPU_CLUSTERS_UTIL_H__
 #define __STARPU_CLUSTERS_UTIL_H__
 #define __STARPU_CLUSTERS_UTIL_H__
 
 
+/** @defgroup API_Clustering_Machine Clustering Machine
+
+    @{
+ */
+
 #ifdef STARPU_HAVE_HWLOC
 #ifdef STARPU_HAVE_HWLOC
 
 
 #include <hwloc.h>
 #include <hwloc.h>
@@ -43,14 +48,16 @@ extern "C"
 #define STARPU_CLUSTER_NEW			(13<<STARPU_MODE_SHIFT)
 #define STARPU_CLUSTER_NEW			(13<<STARPU_MODE_SHIFT)
 #define STARPU_CLUSTER_NCORES			(14<<STARPU_MODE_SHIFT)
 #define STARPU_CLUSTER_NCORES			(14<<STARPU_MODE_SHIFT)
 
 
-/* These represent the default available functions to enforce cluster
- * use by the sub-runtime */
+/**
+   These represent the default available functions to enforce cluster
+   use by the sub-runtime
+*/
 enum starpu_cluster_types
 enum starpu_cluster_types
 {
 {
-	STARPU_CLUSTER_OPENMP,
-	STARPU_CLUSTER_INTEL_OPENMP_MKL,
+	STARPU_CLUSTER_OPENMP, /**< todo */
+	STARPU_CLUSTER_INTEL_OPENMP_MKL,  /**< todo */
 #ifdef STARPU_MKL
 #ifdef STARPU_MKL
-	STARPU_CLUSTER_GNU_OPENMP_MKL,
+	STARPU_CLUSTER_GNU_OPENMP_MKL,  /**< todo */
 #endif
 #endif
 };
 };
 
 
@@ -72,4 +79,7 @@ void starpu_gnu_openmp_mkl_prologue(void*);
 #endif
 #endif
 
 
 #endif
 #endif
+
+/** @} */
+
 #endif /* __STARPU_CLUSTERS_UTIL_H__ */
 #endif /* __STARPU_CLUSTERS_UTIL_H__ */

+ 28 - 1
include/starpu_cublas.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
  * Copyright (C) 2010-2014,2017                           Université de Bordeaux
  * Copyright (C) 2010-2014,2017                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -18,17 +18,44 @@
 #ifndef __STARPU_CUBLAS_H__
 #ifndef __STARPU_CUBLAS_H__
 #define __STARPU_CUBLAS_H__
 #define __STARPU_CUBLAS_H__
 
 
+/** @ingroup API_CUDA_Extensions
+
+    @{
+ */
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 extern "C"
 extern "C"
 {
 {
 #endif
 #endif
 
 
+/**
+   Initialize CUBLAS on every CUDA device. The
+   CUBLAS library must be initialized prior to any CUBLAS call. Calling
+   starpu_cublas_init() will initialize CUBLAS on every CUDA device
+   controlled by StarPU. This call blocks until CUBLAS has been properly
+   initialized on every device.
+*/
 void starpu_cublas_init(void);
 void starpu_cublas_init(void);
+
+/**
+   Set the proper CUBLAS stream for CUBLAS v1. This must be called from the CUDA
+   codelet before calling CUBLAS v1 kernels, so that they are queued on the proper
+   CUDA stream. When using one thread per CUDA worker, this function does not
+   do anything since the CUBLAS stream does not change, and is set once by
+   starpu_cublas_init().
+*/
 void starpu_cublas_set_stream(void);
 void starpu_cublas_set_stream(void);
+
+/**
+   Synchronously deinitialize the CUBLAS library on
+   every CUDA device.
+*/
 void starpu_cublas_shutdown(void);
 void starpu_cublas_shutdown(void);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_CUBLAS_H__ */
 #endif /* __STARPU_CUBLAS_H__ */

+ 13 - 1
include/starpu_cublas_v2.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2013,2017                           CNRS
+ * Copyright (C) 2010-2013,2017,2019                      CNRS
  * Copyright (C) 2010-2012,2017                           Université de Bordeaux
  * Copyright (C) 2010-2012,2017                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -18,6 +18,11 @@
 #ifndef __STARPU_CUBLAS_V2_H__
 #ifndef __STARPU_CUBLAS_V2_H__
 #define __STARPU_CUBLAS_V2_H__
 #define __STARPU_CUBLAS_V2_H__
 
 
+/** @ingroup API_CUDA_Extensions
+
+    @{
+ */
+
 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
 
 
 #include <cublas_v2.h>
 #include <cublas_v2.h>
@@ -27,6 +32,11 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
+/**
+   Return the CUSPARSE handle to be used to queue CUSPARSE
+   kernels. It is properly initialized and configured for multistream by
+   starpu_cusparse_init().
+*/
 cublasHandle_t starpu_cublas_get_local_handle(void);
 cublasHandle_t starpu_cublas_get_local_handle(void);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
@@ -35,4 +45,6 @@ cublasHandle_t starpu_cublas_get_local_handle(void);
 
 
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_CUBLAS_V2_H__ */
 #endif /* __STARPU_CUBLAS_V2_H__ */

+ 43 - 6
include/starpu_cuda.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2010-2012,2014                           Université de Bordeaux
  * Copyright (C) 2010-2012,2014                           Université de Bordeaux
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,6 +19,11 @@
 #ifndef __STARPU_CUDA_H__
 #ifndef __STARPU_CUDA_H__
 #define __STARPU_CUDA_H__
 #define __STARPU_CUDA_H__
 
 
+/** @defgroup API_CUDA_Extensions CUDA Extensions
+
+    @{
+ */
+
 #include <starpu_config.h>
 #include <starpu_config.h>
 
 
 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
@@ -31,20 +36,50 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
+/** Report a CUBLAS error. */
 void starpu_cublas_report_error(const char *func, const char *file, int line, int status);
 void starpu_cublas_report_error(const char *func, const char *file, int line, int status);
-#define STARPU_CUBLAS_REPORT_ERROR(status) \
-	starpu_cublas_report_error(__starpu_func__, __FILE__, __LINE__, status)
 
 
+/** Calls starpu_cublas_report_error(), passing the current function, file and line position.*/
+#define STARPU_CUBLAS_REPORT_ERROR(status) starpu_cublas_report_error(__starpu_func__, __FILE__, __LINE__, status)
+
+/** Report a CUDA error. */
 void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status);
 void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status);
-#define STARPU_CUDA_REPORT_ERROR(status) \
-	starpu_cuda_report_error(__starpu_func__, __FILE__, __LINE__, status)
 
 
+/** Calls starpu_cuda_report_error(), passing the current function, file and line position.*/
+#define STARPU_CUDA_REPORT_ERROR(status) starpu_cuda_report_error(__starpu_func__, __FILE__, __LINE__, status)
+
+/**
+    Return the current worker’s CUDA stream. StarPU
+    provides a stream for every CUDA device controlled by StarPU. This
+    function is only provided for convenience so that programmers can
+    easily use asynchronous operations within codelets without having to
+    create a stream by hand. Note that the application is not forced to
+    use the stream provided by starpu_cuda_get_local_stream() and may also
+    create its own streams. Synchronizing with <c>cudaThreadSynchronize()</c> is
+    allowed, but will reduce the likelihood of having all transfers
+    overlapped.
+*/
 cudaStream_t starpu_cuda_get_local_stream(void);
 cudaStream_t starpu_cuda_get_local_stream(void);
 
 
+/** Return a pointer to device properties for worker \p workerid (assumed to be a CUDA worker). */
 const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid);
 const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid);
 
 
+/**
+    Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
+    to the pointer \p dst_ptr on \p dst_node. The function first tries to
+    copy the data asynchronous (unless \p stream is <c>NULL</c>). If the
+    asynchronous copy fails or if \p stream is <c>NULL</c>, it copies the
+    data synchronously. The function returns <c>-EAGAIN</c> if the
+    asynchronous launch was successfull. It returns 0 if the synchronous
+    copy was successful, or fails otherwise.
+*/
 int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind);
 int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind);
 
 
+/**
+    Calls <c>cudaSetDevice(\p devid)</c> or <c>cudaGLSetGLDevice(\p devid)</c>,
+    according to whether \p devid is among the field
+    starpu_conf::cuda_opengl_interoperability.
+*/
 void starpu_cuda_set_device(unsigned devid);
 void starpu_cuda_set_device(unsigned devid);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
@@ -52,5 +87,7 @@ void starpu_cuda_set_device(unsigned devid);
 #endif
 #endif
 
 
 #endif /* STARPU_USE_CUDA && !STARPU_DONT_INCLUDE_CUDA_HEADERS */
 #endif /* STARPU_USE_CUDA && !STARPU_DONT_INCLUDE_CUDA_HEADERS */
-#endif /* __STARPU_CUDA_H__ */
 
 
+/** @} */
+
+#endif /* __STARPU_CUDA_H__ */

+ 23 - 1
include/starpu_cusparse.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
  * Copyright (C) 2010-2014,2017                           Université de Bordeaux
  * Copyright (C) 2010-2014,2017                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -18,6 +18,11 @@
 #ifndef __STARPU_CUSPARSE_H__
 #ifndef __STARPU_CUSPARSE_H__
 #define __STARPU_CUSPARSE_H__
 #define __STARPU_CUSPARSE_H__
 
 
+/** @ingroup API_CUDA_Extensions
+
+    @{
+ */
+
 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
 #include <cusparse.h>
 #include <cusparse.h>
 #endif
 #endif
@@ -27,10 +32,25 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
+/**
+   Initialize CUSPARSE on every CUDA device
+   controlled by StarPU. This call blocks until CUSPARSE has been properly
+   initialized on every device.
+*/
 void starpu_cusparse_init(void);
 void starpu_cusparse_init(void);
+
+/**
+   Synchronously deinitialize the CUSPARSE library on
+   every CUDA device.
+*/
 void starpu_cusparse_shutdown(void);
 void starpu_cusparse_shutdown(void);
 
 
 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
+/**
+   Return the CUSPARSE handle to be used to queue CUSPARSE
+   kernels. It is properly initialized and configured for multistream by
+   starpu_cusparse_init().
+*/
 cusparseHandle_t starpu_cusparse_get_local_handle(void);
 cusparseHandle_t starpu_cusparse_get_local_handle(void);
 #endif
 #endif
 
 
@@ -38,4 +58,6 @@ cusparseHandle_t starpu_cusparse_get_local_handle(void);
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_CUSPARSE_H__ */
 #endif /* __STARPU_CUSPARSE_H__ */

+ 429 - 45
include/starpu_data.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2009-2019                                Université de Bordeaux
  * Copyright (C) 2009-2019                                Université de Bordeaux
  * Copyright (C) 2011-2013,2016,2017                      Inria
  * Copyright (C) 2011-2013,2016,2017                      Inria
- * Copyright (C) 2010-2015,2017                           CNRS
+ * Copyright (C) 2010-2015,2017,2019                           CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,6 +19,15 @@
 #ifndef __STARPU_DATA_H__
 #ifndef __STARPU_DATA_H__
 #define __STARPU_DATA_H__
 #define __STARPU_DATA_H__
 
 
+/** @defgroup API_Data_Management Data Management
+
+    @brief Data management facilities provided by StarPU. We show how
+    to use existing data interfaces in \ref API_Data_Interfaces, but
+    developers can design their own data interfaces if required.
+
+    @{
+ */
+
 #include <starpu.h>
 #include <starpu.h>
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
@@ -26,60 +35,302 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
+/**
+   This macro is used when the RAM memory node is specified.
+*/
+#define STARPU_MAIN_RAM 0
+
 struct _starpu_data_state;
 struct _starpu_data_state;
+/**
+   StarPU uses ::starpu_data_handle_t as an opaque handle to manage a
+   piece of data. Once a piece of data has been registered to StarPU,
+   it is associated to a ::starpu_data_handle_t which keeps track of
+   the state of the piece of data over the entire machine, so that we
+   can maintain data consistency and locate data replicates for
+   instance.
+*/
 typedef struct _starpu_data_state* starpu_data_handle_t;
 typedef struct _starpu_data_state* starpu_data_handle_t;
 
 
-/* Note: when adding a flag here, update _starpu_detect_implicit_data_deps_with_handle */
+/**
+    Describe a StarPU data access mode
+
+    Note: when adding a flag here, update
+    _starpu_detect_implicit_data_deps_with_handle
+
+    Note: other STARPU_* values in include/starpu_task_util.h
+ */
 enum starpu_data_access_mode
 enum starpu_data_access_mode
 {
 {
-	STARPU_NONE=0,
-	STARPU_R=(1<<0),
-	STARPU_W=(1<<1),
-	STARPU_RW=(STARPU_R|STARPU_W),
-	STARPU_SCRATCH=(1<<2),
-	STARPU_REDUX=(1<<3),
-	STARPU_COMMUTE=(1<<4),
-	STARPU_SSEND=(1<<5),
-	STARPU_LOCALITY=(1<<6),
-	STARPU_ACCESS_MODE_MAX=(1<<7)
-	/* Note: other STARPU_* values in include/starpu_task_util.h */
+	STARPU_NONE=0, /**< todo */
+	STARPU_R=(1<<0), /**< read-only mode */
+	STARPU_W=(1<<1), /**< write-only mode */
+	STARPU_RW=(STARPU_R|STARPU_W), /**< read-write mode. Equivalent to ::STARPU_R|::STARPU_W  */
+	STARPU_SCRATCH=(1<<2), /**< A temporary buffer is allocated
+				  for the task, but StarPU does not
+				  enforce data consistency---i.e. each
+				  device has its own buffer,
+				  independently from each other (even
+				  for CPUs), and no data transfer is
+				  ever performed. This is useful for
+				  temporary variables to avoid
+				  allocating/freeing buffers inside
+				  each task. Currently, no behavior is
+				  defined concerning the relation with
+				  the ::STARPU_R and ::STARPU_W modes
+				  and the value provided at
+				  registration --- i.e., the value of
+				  the scratch buffer is undefined at
+				  entry of the codelet function.  It
+				  is being considered for future
+				  extensions at least to define the
+				  initial value.  For now, data to be
+				  used in ::STARPU_SCRATCH mode should
+				  be registered with node -1 and a
+				  <c>NULL</c> pointer, since the value
+				  of the provided buffer is simply
+				  ignored for now.
+			       */
+	STARPU_REDUX=(1<<3), /**< todo */
+	STARPU_COMMUTE=(1<<4), /**<  ::STARPU_COMMUTE can be passed
+				  along ::STARPU_W or ::STARPU_RW to
+				  express that StarPU can let tasks
+				  commute, which is useful e.g. when
+				  bringing a contribution into some
+				  data, which can be done in any order
+				  (but still require sequential
+				  consistency against reads or
+				  non-commutative writes).
+			       */
+	STARPU_SSEND=(1<<5), /**< used in starpu_mpi_insert_task() to
+				specify the data has to be sent using
+				a synchronous and non-blocking mode
+				(see starpu_mpi_issend())
+			     */
+	STARPU_LOCALITY=(1<<6), /**< used to tell the scheduler which
+				   data is the most important for the
+				   task, and should thus be used to
+				   try to group tasks on the same core
+				   or cache, etc. For now only the ws
+				   and lws schedulers take this flag
+				   into account, and only when rebuild
+				   with \c USE_LOCALITY flag defined in
+				   the
+				   src/sched_policies/work_stealing_policy.c
+				   source code.
+				*/
+	STARPU_ACCESS_MODE_MAX=(1<<7) /**< todo */
 };
 };
 
 
+/**
+   Describe a data handle along with an access mode.
+*/
 struct starpu_data_descr
 struct starpu_data_descr
 {
 {
-	starpu_data_handle_t handle;
-	enum starpu_data_access_mode mode;
+	starpu_data_handle_t handle; /**< data */
+	enum starpu_data_access_mode mode; /**< access mode */
 };
 };
 
 
 struct starpu_data_interface_ops;
 struct starpu_data_interface_ops;
 
 
+/** Set the name of the data, to be shown in various profiling tools. */
 void starpu_data_set_name(starpu_data_handle_t handle, const char *name);
 void starpu_data_set_name(starpu_data_handle_t handle, const char *name);
+
+/**
+   Set the coordinates of the data, to be shown in various profiling
+   tools. \p dimensions is the size of the \p dims array. This can be
+   for instance the tile coordinates within a big matrix.
+*/
 void starpu_data_set_coordinates_array(starpu_data_handle_t handle, int dimensions, int dims[]);
 void starpu_data_set_coordinates_array(starpu_data_handle_t handle, int dimensions, int dims[]);
+
+/**
+   Set the coordinates of the data, to be shown in various profiling
+   tools. \p dimensions is the number of subsequent \c int parameters.
+   This can be for instance the tile coordinates within a big matrix.
+*/
 void starpu_data_set_coordinates(starpu_data_handle_t handle, unsigned dimensions, ...);
 void starpu_data_set_coordinates(starpu_data_handle_t handle, unsigned dimensions, ...);
 
 
+/**
+   Unregister a data \p handle from StarPU. If the data was
+   automatically allocated by StarPU because the home node was -1, all
+   automatically allocated buffers are freed. Otherwise, a valid copy
+   of the data is put back into the home node in the buffer that was
+   initially registered. Using a data handle that has been
+   unregistered from StarPU results in an undefined behaviour. In case
+   we do not need to update the value of the data in the home node, we
+   can use the function starpu_data_unregister_no_coherency() instead.
+*/
 void starpu_data_unregister(starpu_data_handle_t handle);
 void starpu_data_unregister(starpu_data_handle_t handle);
+
+/**
+    Similar to starpu_data_unregister(), except that StarPU does not
+    put back a valid copy into the home node, in the buffer that was
+    initially registered.
+*/
 void starpu_data_unregister_no_coherency(starpu_data_handle_t handle);
 void starpu_data_unregister_no_coherency(starpu_data_handle_t handle);
+
+/**
+   Destroy the data \p handle once it is no longer needed by any
+   submitted task. No coherency is assumed.
+*/
 void starpu_data_unregister_submit(starpu_data_handle_t handle);
 void starpu_data_unregister_submit(starpu_data_handle_t handle);
+
+/**
+   Destroy all replicates of the data \p handle immediately. After
+   data invalidation, the first access to \p handle must be performed
+   in ::STARPU_W mode. Accessing an invalidated data in ::STARPU_R
+   mode results in undefined behaviour.
+*/
 void starpu_data_invalidate(starpu_data_handle_t handle);
 void starpu_data_invalidate(starpu_data_handle_t handle);
+
+/**
+   Submit invalidation of the data \p handle after completion of
+   previously submitted tasks.
+*/
 void starpu_data_invalidate_submit(starpu_data_handle_t handle);
 void starpu_data_invalidate_submit(starpu_data_handle_t handle);
 
 
+/**
+   Specify that the data \p handle can be discarded without impacting
+   the application.
+*/
 void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important);
 void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important);
 
 
+/** @name Access registered data from the application
+ * @{
+ */
+
+/**
+   This macro can be used to acquire data, but not require it to be
+   available on a given node, only enforce R/W dependencies. This can
+   for instance be used to wait for tasks which produce the data, but
+   without requesting a fetch to the main memory.
+*/
 #define STARPU_ACQUIRE_NO_NODE -1
 #define STARPU_ACQUIRE_NO_NODE -1
+
+/**
+   Similar to ::STARPU_ACQUIRE_NO_NODE, but will lock the data on all
+   nodes, preventing them from being evicted for instance. This is
+   mostly useful inside StarPU only.
+*/
 #define STARPU_ACQUIRE_NO_NODE_LOCK_ALL -2
 #define STARPU_ACQUIRE_NO_NODE_LOCK_ALL -2
+
+/**
+   The application must call this function prior to accessing
+   registered data from main memory outside tasks. StarPU ensures that
+   the application will get an up-to-date copy of \p handle in main
+   memory located where the data was originally registered, and that
+   all concurrent accesses (e.g. from tasks) will be consistent with
+   the access mode specified with \p mode. starpu_data_release() must
+   be called once the application no longer needs to access the piece
+   of data. Note that implicit data dependencies are also enforced by
+   starpu_data_acquire(), i.e. starpu_data_acquire() will wait for all
+   tasks scheduled to work on the data, unless they have been disabled
+   explictly by calling
+   starpu_data_set_default_sequential_consistency_flag() or
+   starpu_data_set_sequential_consistency_flag().
+   starpu_data_acquire() is a blocking call, so that it cannot be
+   called from tasks or from their callbacks (in that case,
+   starpu_data_acquire() returns <c>-EDEADLK</c>). Upon successful
+   completion, this function returns 0.
+*/
 int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
 int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
+
+/**
+   Similar to starpu_data_acquire(), except that the data will be
+   available on the given memory node instead of main memory.
+   ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can
+   be used instead of an explicit node number.
+*/
 int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode);
 int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode);
+
+/**
+   Asynchronous equivalent of starpu_data_acquire(). When the data
+   specified in \p handle is available in the access \p mode, the \p
+   callback function is executed. The application may access
+   the requested data during the execution of \p callback. The \p callback
+   function must call starpu_data_release() once the application no longer
+   needs to access the piece of data. Note that implicit data
+   dependencies are also enforced by starpu_data_acquire_cb() in case they
+   are not disabled. Contrary to starpu_data_acquire(), this function is
+   non-blocking and may be called from task callbacks. Upon successful
+   completion, this function returns 0.
+*/
 int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg);
 int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg);
+
+/**
+   Similar to starpu_data_acquire_cb(), except that the
+   data will be available on the given memory node instead of main
+   memory.
+   ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be
+   used instead of an explicit node number.
+*/
 int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg);
 int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg);
+
+/**
+   Similar to starpu_data_acquire_cb() with the possibility of
+   enabling or disabling data dependencies.
+   When the data specified in \p handle is available in the access
+   \p mode, the \p callback function is executed. The application may access
+   the requested data during the execution of this \p callback. The \p callback
+   function must call starpu_data_release() once the application no longer
+   needs to access the piece of data. Note that implicit data
+   dependencies are also enforced by starpu_data_acquire_cb_sequential_consistency() in case they
+   are not disabled specifically for the given \p handle or by the parameter \p sequential_consistency.
+   Similarly to starpu_data_acquire_cb(), this function is
+   non-blocking and may be called from task callbacks. Upon successful
+   completion, this function returns 0.
+*/
 int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency);
 int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency);
+
+/**
+   Similar to starpu_data_acquire_cb_sequential_consistency(), except that the
+   data will be available on the given memory node instead of main
+   memory.
+   ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
+   explicit node number.
+*/
 int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency);
 int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency);
+
 int starpu_data_acquire_on_node_cb_sequential_consistency_quick(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick);
 int starpu_data_acquire_on_node_cb_sequential_consistency_quick(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick);
+
+/**
+   Similar to starpu_data_acquire_on_node_cb_sequential_consistency(),
+   except that the \e pre_sync_jobid and \e post_sync_jobid parameters can be used
+   to retrieve the jobid of the synchronization tasks. \e pre_sync_jobid happens
+   just before the acquisition, and \e post_sync_jobid happens just after the
+   release.
+*/
 int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick, long *pre_sync_jobid, long *post_sync_jobid);
 int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick, long *pre_sync_jobid, long *post_sync_jobid);
 
 
+/**
+   The application can call this function instead of starpu_data_acquire() so as to
+   acquire the data like starpu_data_acquire(), but only if all
+   previously-submitted tasks have completed, in which case starpu_data_acquire_try()
+   returns 0. StarPU will have ensured that the application will get an up-to-date
+   copy of \p handle in main memory located where the data was originally
+   registered. starpu_data_release() must be called once the application no longer
+   needs to access the piece of data.
+*/
 int starpu_data_acquire_try(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
 int starpu_data_acquire_try(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
+
+/**
+   Similar to starpu_data_acquire_try(), except that the
+   data will be available on the given memory node instead of main
+   memory.
+   ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
+   explicit node number.
+*/
 int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode);
 int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode);
 
 
 #ifdef __GCC__
 #ifdef __GCC__
+
+/**
+   STARPU_DATA_ACQUIRE_CB() is the same as starpu_data_acquire_cb(),
+   except that the code to be executed in a callback is directly provided
+   as a macro parameter, and the data \p handle is automatically released
+   after it. This permits to easily execute code which depends on the
+   value of some registered data. This is non-blocking too and may be
+   called from task callbacks.
+*/
 #  define STARPU_DATA_ACQUIRE_CB(handle, mode, code) do \
 #  define STARPU_DATA_ACQUIRE_CB(handle, mode, code) do \
 	{ \						\
 	{ \						\
 		void callback(void *arg)		\
 		void callback(void *arg)		\
@@ -92,70 +343,181 @@ int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum
 	while(0)
 	while(0)
 #endif
 #endif
 
 
+/**
+   Release the piece of data acquired by the
+   application either by starpu_data_acquire() or by
+   starpu_data_acquire_cb().
+*/
 void starpu_data_release(starpu_data_handle_t handle);
 void starpu_data_release(starpu_data_handle_t handle);
+
+/**
+   Similar to starpu_data_release(), except that the data
+   will be available on the given memory \p node instead of main memory.
+   The \p node parameter must be exactly the same as the corresponding \c
+   starpu_data_acquire_on_node* call.
+*/
 void starpu_data_release_on_node(starpu_data_handle_t handle, int node);
 void starpu_data_release_on_node(starpu_data_handle_t handle, int node);
 
 
+/** @} */
+
+/**
+   This is an arbiter, which implements an advanced but centralized
+   management of concurrent data accesses, see \ref
+   ConcurrentDataAccess for the details.
+*/
 typedef struct starpu_arbiter *starpu_arbiter_t;
 typedef struct starpu_arbiter *starpu_arbiter_t;
+
+/**
+   Create a data access arbiter, see \ref ConcurrentDataAccess for the
+   details
+*/
 starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC;
 starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC;
-void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter);
-void starpu_arbiter_destroy(starpu_arbiter_t arbiter);
 
 
-void starpu_data_display_memory_stats();
+/**
+   Make access to \p handle managed by \p arbiter
+*/
+void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter);
 
 
-#define starpu_data_malloc_pinned_if_possible	starpu_malloc
-#define starpu_data_free_pinned_if_possible	starpu_free
+/**
+   Destroy the \p arbiter . This must only be called after all data
+   assigned to it have been unregistered.
+*/
+void starpu_arbiter_destroy(starpu_arbiter_t arbiter);
 
 
+/**
+   Explicitly ask StarPU to allocate room for a piece of data on
+   the specified memory \p node.
+*/
 int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node);
 int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node);
 
 
+/**
+   Issue a fetch request for the data \p handle to \p node, i.e.
+   requests that the data be replicated to the given node as soon as possible, so that it is
+   available there for tasks. If \p async is 0, the call will
+   block until the transfer is achieved, else the call will return immediately,
+   after having just queued the request. In the latter case, the request will
+   asynchronously wait for the completion of any task writing on the
+   data.
+*/
 int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
 int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
+
+/**
+   Issue a prefetch request for the data \p handle to \p node, i.e.
+   requests that the data be replicated to \p node when there is room for it, so that it is
+   available there for tasks. If \p async is 0, the call will
+   block until the transfer is achieved, else the call will return immediately,
+   after having just queued the request. In the latter case, the request will
+   asynchronously wait for the completion of any task writing on the
+   data.
+*/
 int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
 int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
+
 int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio);
 int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio);
+
+/**
+   Issue an idle prefetch request for the data \p handle to \p node, i.e.
+   requests that the data be replicated to \p node, so that it is
+   available there for tasks, but only when the bus is really idle. If \p async is 0, the call will
+   block until the transfer is achieved, else the call will return immediately,
+   after having just queued the request. In the latter case, the request will
+   asynchronously wait for the completion of any task writing on the data.
+*/
 int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
 int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
 int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio);
 int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio);
 
 
+/**
+   Check whether a valid copy of \p handle is currently available on
+   memory node \p node.
+*/
 unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node);
 unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node);
 
 
+/**
+   Advise StarPU that \p handle will not be used in the close future, and is
+   thus a good candidate for eviction from GPUs. StarPU will thus write its value
+   back to its home node when the bus is idle, and select this data in priority
+   for eviction when memory gets low.
+*/
 void starpu_data_wont_use(starpu_data_handle_t handle);
 void starpu_data_wont_use(starpu_data_handle_t handle);
 
 
-#define STARPU_MAIN_RAM 0
-
-enum starpu_node_kind
-{
-	STARPU_UNUSED     = 0x00,
-	STARPU_CPU_RAM    = 0x01,
-	STARPU_CUDA_RAM   = 0x02,
-	STARPU_OPENCL_RAM = 0x03,
-	STARPU_DISK_RAM   = 0x04,
-	STARPU_MIC_RAM    = 0x05,
-	STARPU_SCC_RAM    = 0x06,
-	STARPU_SCC_SHM    = 0x07,
-	STARPU_MPI_MS_RAM = 0x08
-
-};
-
-unsigned starpu_worker_get_memory_node(unsigned workerid);
-unsigned starpu_memory_nodes_get_count(void);
-int starpu_memory_node_get_name(unsigned node, char *name, size_t size);
-int starpu_memory_nodes_get_numa_count(void);
-int starpu_memory_nodes_numa_id_to_devid(int osid);
-int starpu_memory_nodes_numa_devid_to_id(unsigned id);
-
-enum starpu_node_kind starpu_node_get_kind(unsigned node);
-
+/**
+   Set the write-through mask of the data \p handle (and
+   its children), i.e. a bitmask of nodes where the data should be always
+   replicated after modification. It also prevents the data from being
+   evicted from these nodes when memory gets scarse. When the data is
+   modified, it is automatically transfered into those memory nodes. For
+   instance a <c>1<<0</c> write-through mask means that the CUDA workers
+   will commit their changes in main memory (node 0).
+*/
 void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask);
 void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask);
 
 
+/** @name Implicit Data Dependencies
+    In this section, we describe how StarPU makes it possible to
+    insert implicit task dependencies in order to enforce sequential data
+    consistency. When this data consistency is enabled on a specific data
+    handle, any data access will appear as sequentially consistent from
+    the application. For instance, if the application submits two tasks
+    that access the same piece of data in read-only mode, and then a third
+    task that access it in write mode, dependencies will be added between
+    the two first tasks and the third one. Implicit data dependencies are
+    also inserted in the case of data accesses from the application.
+    @{
+*/
+
+/**
+   Set the data consistency mode associated to a data handle. The
+   consistency mode set using this function has the priority over the
+   default mode which can be set with
+   starpu_data_set_default_sequential_consistency_flag().
+*/
 void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag);
 void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag);
+
+/**
+   Get the data consistency mode associated to the data handle \p handle
+*/
 unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle);
 unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle);
+
+/**
+   Return the default sequential consistency flag
+*/
 unsigned starpu_data_get_default_sequential_consistency_flag(void);
 unsigned starpu_data_get_default_sequential_consistency_flag(void);
+
+/**
+   Set the default sequential consistency flag. If a non-zero
+   value is passed, a sequential data consistency will be enforced for
+   all handles registered after this function call, otherwise it is
+   disabled. By default, StarPU enables sequential data consistency. It
+   is also possible to select the data consistency mode of a specific
+   data handle with the function
+   starpu_data_set_sequential_consistency_flag().
+*/
 void starpu_data_set_default_sequential_consistency_flag(unsigned flag);
 void starpu_data_set_default_sequential_consistency_flag(unsigned flag);
 
 
+/** @} */
+
+/**
+   Set whether this data should be elligible to be evicted to disk
+   storage (1) or not (0). The default is 1.
+*/
 void starpu_data_set_ooc_flag(starpu_data_handle_t handle, unsigned flag);
 void starpu_data_set_ooc_flag(starpu_data_handle_t handle, unsigned flag);
+/**
+   Get whether this data was set to be elligible to be evicted to disk
+   storage (1) or not (0).
+*/
 unsigned starpu_data_get_ooc_flag(starpu_data_handle_t handle);
 unsigned starpu_data_get_ooc_flag(starpu_data_handle_t handle);
 
 
+/**
+   Query the status of \p handle on the specified \p memory_node.
+*/
 void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested);
 void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested);
 
 
 struct starpu_codelet;
 struct starpu_codelet;
 
 
+/**
+   Set the codelets to be used for \p handle when it is accessed in the
+   mode ::STARPU_REDUX. Per-worker buffers will be initialized with
+   the codelet \p init_cl, and reduction between per-worker buffers will be
+   done with the codelet \p redux_cl.
+*/
 void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl);
 void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl);
 
 
 struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle);
 struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle);
@@ -164,13 +526,35 @@ unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsi
 
 
 void starpu_memchunk_tidy(unsigned memory_node);
 void starpu_memchunk_tidy(unsigned memory_node);
 
 
+/**
+   Set the field \c user_data for the \p handle to \p user_data . It can
+   then be retrieved with starpu_data_get_user_data(). \p user_data can be any
+   application-defined value, for instance a pointer to an object-oriented
+   container for the data.
+*/
 void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data);
 void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data);
+
+/**
+   Retrieve the field \c user_data previously set for the \p handle.
+*/
 void *starpu_data_get_user_data(starpu_data_handle_t handle);
 void *starpu_data_get_user_data(starpu_data_handle_t handle);
 
 
+/**
+   Copy the content of \p src_handle into \p dst_handle. The parameter \p
+   asynchronous indicates whether the function should block or not. In
+   the case of an asynchronous call, it is possible to synchronize with
+   the termination of this operation either by the means of implicit
+   dependencies (if enabled) or by calling starpu_task_wait_for_all(). If
+   \p callback_func is not <c>NULL</c>, this callback function is executed after
+   the handle has been copied, and it is given the pointer \p
+   callback_arg as argument.
+*/
 int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
 int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_DATA_H__ */
 #endif /* __STARPU_DATA_H__ */

+ 422 - 9
include/starpu_data_filters.h

@@ -21,6 +21,11 @@
 #ifndef __STARPU_DATA_FILTERS_H__
 #ifndef __STARPU_DATA_FILTERS_H__
 #define __STARPU_DATA_FILTERS_H__
 #define __STARPU_DATA_FILTERS_H__
 
 
+/** @defgroup API_Data_Partition Data Partition
+
+    @{
+ */
+
 #include <starpu.h>
 #include <starpu.h>
 #include <stdarg.h>
 #include <stdarg.h>
 
 
@@ -31,65 +36,473 @@ extern "C"
 
 
 struct starpu_data_interface_ops;
 struct starpu_data_interface_ops;
 
 
+/** Describe a data partitioning operation, to be given to starpu_data_partition() */
 struct starpu_data_filter
 struct starpu_data_filter
 {
 {
+	/**
+	   Fill the \p child_interface structure with interface information
+	   for the \p i -th child of the parent \p father_interface (among
+	   \p nparts). The \p filter structure is provided, allowing to inspect the
+	   starpu_data_filter::filter_arg and starpu_data_filter::filter_arg_ptr
+	   parameters.
+	   The details of what needs to be filled in \p child_interface vary according
+	   to the data interface, but generally speaking:
+	   <ul>
+	   <li> <c>id</c> is usually just copied over from the father,
+	   when the sub data has the same structure as the father,
+	   e.g. a subvector is a vector, a submatrix is a matrix, etc.
+	   This is however not the case for instance when dividing a
+	   BCSR matrix into its dense blocks, which then are matrices.
+	   </li>
+	   <li> <c>nx</c>, <c>ny</c> and alike are usually divided by
+	   the number of subdata, depending how the subdivision is
+	   done (e.g. nx division vs ny division for vertical matrix
+	   division vs horizontal matrix division). </li>
+	   <li> <c>ld</c> for matrix interfaces are usually just
+	   copied over: the leading dimension (ld) usually does not
+	   change. </li>
+	   <li> <c>elemsize</c> is usually just copied over. </li>
+	   <li> <c>ptr</c>, the pointer to the data, has to be
+	   computed according to \p i and the father's <c>ptr</c>, so
+	   as to point to the start of the sub data. This should
+	   however be done only if the father has <c>ptr</c> different
+	   from NULL: in the OpenCL case notably, the
+	   <c>dev_handle</c> and <c>offset</c> fields are used
+	   instead. </li>
+	   <li> <c>dev_handle</c> should be just copied over from the
+	   parent. </li>
+	   <li> <c>offset</c> has to be computed according to \p i and
+	   the father's <c>offset</c>, so as to provide the offset of
+	   the start of the sub data. This is notably used for the
+	   OpenCL case.
+	   </ul>
+	*/
 	void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts);
 	void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts);
-	unsigned nchildren;
+	unsigned nchildren; /**< Number of parts to partition the data into. */
+	/**
+	   Return the number of children. This can be used instead of
+	   starpu_data_filter::nchildren when the number of children depends
+	   on the actual data (e.g. the number of blocks in a sparse
+	   matrix).
+	*/
 	unsigned (*get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle);
 	unsigned (*get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle);
+	/**
+	   When children use different data interface,
+	   return which interface is used by child number \p id.
+	*/
 	struct starpu_data_interface_ops *(*get_child_ops)(struct starpu_data_filter *, unsigned id);
 	struct starpu_data_interface_ops *(*get_child_ops)(struct starpu_data_filter *, unsigned id);
-	unsigned filter_arg;
+	unsigned filter_arg; /**< Additional parameter for the filter function */
+	/**
+	   Additional pointer parameter for
+	   the filter function, such as the
+	   sizes of the different parts. */
 	void *filter_arg_ptr;
 	void *filter_arg_ptr;
 };
 };
 
 
+/** @name Basic API
+ *
+ * @{
+ */
+
+/**
+    Request the partitioning of \p initial_handle into several subdata
+    according to the filter \p f.
+    Here an example of how to use the function.
+    \code{.c}
+    struct starpu_data_filter f =
+    {
+      .filter_func = starpu_matrix_filter_block,
+      .nchildren = nslicesx
+    };
+    starpu_data_partition(A_handle, &f);
+    \endcode
+*/
 void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f);
 void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f);
+
+/**
+   Unapply the filter which has been applied to \p root_data, thus
+   unpartitioning the data. The pieces of data are collected back into
+   one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM).
+   Tasks working on the partitioned data will be waited for
+   by starpu_data_unpartition().
+
+   Here an example of how to use the function.
+   \code{.c}
+   starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
+   \endcode
+*/
 void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node);
 void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node);
 
 
+/**
+   Return the \p i -th child of the given \p handle, which must have
+   been partitionned beforehand.
+*/
+starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i);
+
+/**
+   Return the number of children \p handle has been partitioned into.
+*/
+int starpu_data_get_nb_children(starpu_data_handle_t handle);
+
+/**
+   After partitioning a StarPU data by applying a filter,
+   starpu_data_get_sub_data() can be used to get handles for each of the
+   data portions. \p root_data is the parent data that was partitioned.
+   \p depth is the number of filters to traverse (in case several filters
+   have been applied, to e.g. partition in row blocks, and then in column
+   blocks), and the subsequent parameters are the indexes. The function
+   returns a handle to the subdata.
+
+   Here an example of how to use the function.
+   \code{.c}
+   h = starpu_data_get_sub_data(A_handle, 1, taskx);
+   \endcode
+*/
+starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... );
+
+/**
+   Similar to starpu_data_get_sub_data() but use a \c va_list for the
+   parameter list.
+*/
+starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa);
+
+/**
+   Apply \p nfilters filters to the handle designated by \p
+   root_handle recursively. \p nfilters pointers to variables of the
+   type starpu_data_filter should be given.
+*/
+void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...);
+
+/**
+   Apply \p nfilters filters to the handle designated by
+   \p root_handle recursively. Use a \p va_list of pointers to
+   variables of the type starpu_data_filter.
+*/
+void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa);
+
+/** @} */
+
+/** @name Asynchronous API
+ *
+ * @{
+ */
+
+/**
+   Plan to partition \p initial_handle into several subdata according to
+   the filter \p f.
+   The handles are returned into the \p children array, which has to be
+   the same size as the number of parts described in \p f. These handles
+   are not immediately usable, starpu_data_partition_submit() has to be
+   called to submit the actual partitioning.
+
+   Here is an example of how to use the function:
+   \code{.c}
+   starpu_data_handle_t children[nslicesx];
+   struct starpu_data_filter f =
+   {
+     .filter_func = starpu_matrix_filter_block,
+     .nchildren = nslicesx
+     };
+     starpu_data_partition_plan(A_handle, &f, children);
+\endcode
+*/
 void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children);
 void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children);
+
+/**
+   Submit the actual partitioning of \p initial_handle into the \p nparts
+   \p children handles. This call is asynchronous, it only submits that the
+   partitioning should be done, so that the \p children handles can now be used to
+   submit tasks, and \p initial_handle can not be used to submit tasks any more (to
+   guarantee coherency).
+   For instance,
+   \code{.c}
+   starpu_data_partition_submit(A_handle, nslicesx, children);
+   \endcode
+*/
 void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
 void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
+
+/**
+   Similar to starpu_data_partition_submit(), but do not invalidate \p
+   initial_handle. This allows to continue using it, but the application has to be
+   careful not to write to \p initial_handle or \p children handles, only read from
+   them, since the coherency is otherwise not guaranteed.  This thus allows to
+   submit various tasks which concurrently read from various partitions of the data.
+
+   When the application wants to write to \p initial_handle again, it should call
+   starpu_data_unpartition_submit(), which will properly add dependencies between the
+   reads on the \p children and the writes to be submitted.
+
+   If instead the application wants to write to \p children handles, it should
+   call starpu_data_partition_readwrite_upgrade_submit(), which will correctly add
+   dependencies between the reads on the \p initial_handle and the writes to be
+   submitted.
+*/
 void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
 void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
+
+/**
+   Assume that a partitioning of \p initial_handle has already been submited
+   in readonly mode through starpu_data_partition_readonly_submit(), and will upgrade
+   that partitioning into read-write mode for the \p children, by invalidating \p
+   initial_handle, and adding the necessary dependencies.
+*/
 void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
 void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
+
+/**
+   Assuming that \p initial_handle is partitioned into \p children,
+   submit an unpartitionning of \p initial_handle, i.e. submit a
+   gathering of the pieces on the requested \p gathering_node memory
+   node, and submit an invalidation of the children.
+ */
 void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node);
 void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node);
+
 void starpu_data_unpartition_submit_r(starpu_data_handle_t initial_handle, int gathering_node);
 void starpu_data_unpartition_submit_r(starpu_data_handle_t initial_handle, int gathering_node);
+
+/**
+   Similar to starpu_data_partition_submit(), but do not invalidate \p
+   initial_handle. This allows to continue using it, but the application has to be
+   careful not to write to \p initial_handle or \p children handles, only read from
+   them, since the coherency is otherwise not guaranteed.  This thus allows to
+   submit various tasks which concurrently read from various
+   partitions of the data.
+*/
 void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node);
 void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node);
+
+/**
+   Clear the partition planning established between \p root_data and
+   \p children with starpu_data_partition_plan(). This will notably
+   submit an unregister all the \p children, which can thus not be
+   used any more afterwards.
+*/
 void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children);
 void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children);
 
 
+/**
+   Similar to starpu_data_unpartition_submit_sequential_consistency()
+   but allow to specify a callback function for the unpartitiong task
+*/
 void starpu_data_unpartition_submit_sequential_consistency_cb(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency, void (*callback_func)(void *), void *callback_arg);
 void starpu_data_unpartition_submit_sequential_consistency_cb(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency, void (*callback_func)(void *), void *callback_arg);
+
+/**
+   Similar to starpu_data_partition_submit() but also allow to specify
+   the coherency to be used for the main data \p initial_handle
+   through the parameter \p sequential_consistency.
+*/
 void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency);
 void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency);
+
+/**
+   Similar to starpu_data_unpartition_submit() but also allow to specify
+   the coherency to be used for the main data \p initial_handle
+   through the parameter \p sequential_consistency.
+*/
 void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node, int sequential_consistency);
 void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node, int sequential_consistency);
+
+/**
+   Disable the automatic partitioning of the data \p handle for which
+   a asynchronous plan has previously been submitted
+*/
 void starpu_data_partition_not_automatic(starpu_data_handle_t handle);
 void starpu_data_partition_not_automatic(starpu_data_handle_t handle);
 
 
-int starpu_data_get_nb_children(starpu_data_handle_t handle);
-starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i);
+/** @} */
 
 
-starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... );
-starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa);
-
-void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...);
-void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa);
+/** @name Predefined BCSR Filter Functions
+ * Predefined partitioning functions for BCSR data. Examples on how to
+ * use them are shown in \ref PartitioningData.
+ * @{
+ */
 
 
+/**
+   Partition a block-sparse matrix into dense matrices.
+ */
 void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/** @} */
+
+/** @name Predefined CSR Filter Functions
+ * Predefined partitioning functions for CSR data. Examples on how to
+ * use them are shown in \ref PartitioningData.
+ * @{
+ */
+
+/**
+   Partition a block-sparse matrix into vertical block-sparse matrices.
+ */
 void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 
 
+/** @} */
+
+/** @name Predefined Matrix Filter Functions
+ * Predefined partitioning functions for matrix
+ * data. Examples on how to use them are shown in \ref
+ * PartitioningData.
+ * @{
+ */
+
+/**
+   Partition a dense Matrix along the x dimension, thus getting (x/\p
+   nparts ,y) matrices. If \p nparts does not divide x, the last
+   submatrix contains the remainder.
+ */
 void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a dense Matrix along the x dimension, with a
+   shadow border <c>filter_arg_ptr</c>, thus getting ((x-2*shadow)/\p
+   nparts +2*shadow,y) matrices. If \p nparts does not divide x-2*shadow,
+   the last submatrix contains the remainder.
+
+   <b>IMPORTANT</b>: This can
+   only be used for read-only access, as no coherency is enforced for the
+   shadowed parts. A usage example is available in
+   examples/filters/shadow2d.c
+ */
 void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a dense Matrix along the y dimension, thus getting
+   (x,y/\p nparts) matrices. If \p nparts does not divide y, the last
+   submatrix contains the remainder.
+ */
 void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a dense Matrix along the y dimension, with a
+   shadow border <c>filter_arg_ptr</c>, thus getting
+   (x,(y-2*shadow)/\p nparts +2*shadow) matrices. If \p nparts does not
+   divide y-2*shadow, the last submatrix contains the remainder.
+
+   <b>IMPORTANT</b>: This can only be used for read-only access, as no
+   coherency is enforced for the shadowed parts. A usage example is
+   available in examples/filters/shadow2d.c
+*/
 void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 
 
+/** @} */
+
+/** @name Predefined Vector Filter Functions
+ * Predefined partitioning functions for vector
+ * data. Examples on how to use them are shown in \ref
+ * PartitioningData.
+ * @{
+ */
+
+/**
+   Return in \p child_interface the \p id th element of the vector
+   represented by \p father_interface once partitioned in \p nparts chunks of
+   equal size.
+ */
 void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Return in \p child_interface the \p id th element of the vector
+   represented by \p father_interface once partitioned in \p nparts chunks of
+   equal size with a shadow border <c>filter_arg_ptr</c>, thus getting a vector
+   of size <c>(n-2*shadow)/nparts+2*shadow</c>. The <c>filter_arg_ptr</c> field
+   of \p f must be the shadow size casted into \c void*.
+
+   <b>IMPORTANT</b>: This can only be used for read-only access, as no coherency is
+   enforced for the shadowed parts. An usage example is available in
+   examples/filters/shadow.c
+*/
 void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Return in \p child_interface the \p id th element of the vector
+   represented by \p father_interface once partitioned into \p nparts chunks
+   according to the <c>filter_arg_ptr</c> field of \p f. The
+   <c>filter_arg_ptr</c> field must point to an array of \p nparts long
+   elements, each of which specifies the number of elements in each chunk
+   of the partition.
+ */
 void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Return in \p child_interface the \p id th element of the vector
+   represented by \p father_interface once partitioned into \p nparts chunks
+   according to the <c>filter_arg_ptr</c> field of \p f. The
+   <c>filter_arg_ptr</c> field must point to an array of \p nparts uint32_t
+   elements, each of which specifies the number of elements in each chunk
+   of the partition.
+ */
 void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Return in \p child_interface the \p id th element of the vector
+   represented by \p father_interface once partitioned in <c>2</c> chunks of
+   equal size, ignoring nparts. Thus, \p id must be <c>0</c> or <c>1</c>.
+ */
 void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 
 
+/** @} */
+
+/** @name Predefined Block Filter Functions
+ * Predefined partitioning functions for block data. Examples on how
+ * to use them are shown in \ref PartitioningData. An example is
+ * available in \c examples/filters/shadow3d.c
+ * @{
+ */
+
+/**
+   Partition a block along the X dimension, thus getting
+   (x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last
+   submatrix contains the remainder.
+ */
 void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a block along the X dimension, with a
+   shadow border <c>filter_arg_ptr</c>, thus getting
+   ((x-2*shadow)/\p nparts +2*shadow,y,z) blocks. If \p nparts does not
+   divide x, the last submatrix contains the remainder.
+
+   <b>IMPORTANT</b>:
+   This can only be used for read-only access, as no coherency is
+   enforced for the shadowed parts.
+*/
 void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a block along the Y dimension, thus getting
+   (x,y/\p nparts ,z) blocks. If \p nparts does not divide y, the last
+   submatrix contains the remainder.
+ */
 void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a block along the Y dimension, with a
+   shadow border <c>filter_arg_ptr</c>, thus getting
+   (x,(y-2*shadow)/\p nparts +2*shadow,z) 3D matrices. If \p nparts does not
+   divide y, the last submatrix contains the remainder.
+
+   <b>IMPORTANT</b>:
+   This can only be used for read-only access, as no coherency is
+   enforced for the shadowed parts.
+ */
 void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a block along the Z dimension, thus getting
+   (x,y,z/\p nparts) blocks. If \p nparts does not divide z, the last
+   submatrix contains the remainder.
+ */
 void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a block along the Z dimension, with a
+   shadow border <c>filter_arg_ptr</c>, thus getting
+   (x,y,(z-2*shadow)/\p nparts +2*shadow) blocks. If \p nparts does not
+   divide z, the last submatrix contains the remainder.
+
+   <b>IMPORTANT</b>:
+   This can only be used for read-only access, as no coherency is
+   enforced for the shadowed parts.
+ */
 void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 
 
+/** @} */
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif
 #endif

File diff suppressed because it is too large
+ 1306 - 152
include/starpu_data_interfaces.h


+ 168 - 26
include/starpu_disk.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2013,2017                                Inria
  * Copyright (C) 2013,2017                                Inria
- * Copyright (C) 2013,2014,2017                           CNRS
+ * Copyright (C) 2013,2014,2017,2019                           CNRS
  * Copyright (C) 2013,2014,2017                           Université de Bordeaux
  * Copyright (C) 2013,2014,2017                           Université de Bordeaux
  * Copyright (C) 2013                                     Corentin Salingue
  * Copyright (C) 2013                                     Corentin Salingue
  *
  *
@@ -20,58 +20,200 @@
 #ifndef __STARPU_DISK_H__
 #ifndef __STARPU_DISK_H__
 #define __STARPU_DISK_H__
 #define __STARPU_DISK_H__
 
 
+/** @defgroup API_Out_Of_Core Out Of Core
+    @{
+ */
+
 #include <sys/types.h>
 #include <sys/types.h>
 #include <starpu_config.h>
 #include <starpu_config.h>
 
 
-/* list of functions to use on disk */
+/** Set of functions to manipulate datas on disk. */
 struct starpu_disk_ops
 struct starpu_disk_ops
 {
 {
-	 void *  (*plug)   (void *parameter, starpu_ssize_t size);
-	 void    (*unplug) (void *base);
+	/**
+	   Connect a disk memory at location \p parameter with size \p size, and return a
+	   base as void*, which will be passed by StarPU to all other methods.
+	*/
+	void *  (*plug)   (void *parameter, starpu_ssize_t size);
+	/**
+	   Disconnect a disk memory \p base.
+	*/
+	void    (*unplug) (void *base);
+
+	/**
+	   Measure the bandwidth and the latency for the disk \p node and save it. Returns
+	   1 if it could measure it.
+	*/
+	int    (*bandwidth)    (unsigned node, void *base);
+
+	/**
+	   Create a new location for datas of size \p size. Return an opaque object pointer.
+	*/
+	void *  (*alloc)  (void *base, size_t size);
+
+	/**
+	   Free a data \p obj previously allocated with starpu_disk_ops::alloc.
+	*/
+	void    (*free)   (void *base, void *obj, size_t size);
+
+	/**
+	   Open an existing location of datas, at a specific position \p pos dependent on the backend.
+	*/
+	void *  (*open)   (void *base, void *pos, size_t size);
+	/**
+	   Close, without deleting it, a location of datas \p obj.
+	*/
+	void    (*close)  (void *base, void *obj, size_t size);
+
+	/**
+	   Read \p size bytes of data from \p obj in \p base, at offset \p offset, and put
+	   into \p buf. Return the actual number of read bytes.
+	*/
+	int     (*read)   (void *base, void *obj, void *buf, off_t offset, size_t size);
+	/**
+	   Write \p size bytes of data to \p obj in \p base, at offset \p offset, from \p buf. Return 0 on success.
+	*/
+	int     (*write)  (void *base, void *obj, const void *buf, off_t offset, size_t size);
+
+	/**
+	   Read all data from \p obj of \p base, from offset 0. Returns it in an allocated buffer \p ptr, of size \p size
+	*/
+	int	(*full_read)    (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node);
+	/**
+	   Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to
+	   \p size, so that a \c full_read will get it.
+	*/
+	int 	(*full_write)   (void * base, void * obj, void * ptr, size_t size);
+
+	/**
+	   Asynchronously write \p size bytes of data to \p obj in \p base, at offset \p
+	   offset, from \p buf. Return a void* pointer that StarPU will pass to \c
+	   xxx_request methods for testing for the completion.
+	*/
+	void *  (*async_write)  (void *base, void *obj, void *buf, off_t offset, size_t size);
+	/**
+	   Asynchronously read \p size bytes of data from \p obj in \p base, at offset \p
+	   offset, and put into \p buf. Return a void* pointer that StarPU will pass to \c
+	   xxx_request methods for testing for the completion.
+	*/
+	void *  (*async_read)   (void *base, void *obj, void *buf, off_t offset, size_t size);
+
+	/**
+	   Read all data from \p obj of \p base, from offset 0. Return it in an allocated buffer \p ptr, of size \p size
+	*/
+	void *	(*async_full_read)    (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node);
+	/**
+	   Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to
+	   \p size, so that a starpu_disk_ops::full_read will get it.
+	*/
+	void *	(*async_full_write)   (void * base, void * obj, void * ptr, size_t size);
+
+	/**
+	   Copy from offset \p offset_src of disk object \p obj_src in \p base_src to
+	   offset \p offset_dst of disk object \p obj_dst in \p base_dst. Return a void*
+	   pointer that StarPU will pass to \c xxx_request methods for testing for the
+	   completion.
+	*/
+	void *  (*copy)   (void *base_src, void* obj_src, off_t offset_src,  void *base_dst, void* obj_dst, off_t offset_dst, size_t size);
+
+	/**
+	   Wait for completion of request \p async_channel returned by a previous
+	   asynchronous read, write or copy.
+	*/
+	void   (*wait_request) (void * async_channel);
+	/**
+	   Test for completion of request \p async_channel returned by a previous
+	   asynchronous read, write or copy. Return 1 on completion, 0 otherwise.
+	*/
+	int    (*test_request) (void * async_channel);
+
+	/**
+	   Free the request allocated by a previous asynchronous read, write or copy.
+	*/
+	void   (*free_request)(void * async_channel);
 
 
-	 int    (*bandwidth)    (unsigned node, void *base);
+	/* TODO: readv, writev, read2d, write2d, etc. */
+};
 
 
-	 void *  (*alloc)  (void *base, size_t size);
-	 void    (*free)   (void *base, void *obj, size_t size);
+/**
+   Use the stdio library (fwrite, fread...) to read/write on disk.
 
 
-	 void *  (*open)   (void *base, void *pos, size_t size);     /* open an existing file */
-	 void    (*close)  (void *base, void *obj, size_t size);
+   <strong>Warning: It creates one file per allocation !</strong>
 
 
-	 int     (*read)   (void *base, void *obj, void *buf, off_t offset, size_t size);
-	 int     (*write)  (void *base, void *obj, const void *buf, off_t offset, size_t size);
+   Do not support asynchronous transfers.
+*/
+extern struct starpu_disk_ops starpu_disk_stdio_ops;
 
 
-	 int	(*full_read)    (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node);
-	 int 	(*full_write)   (void * base, void * obj, void * ptr, size_t size);
+/**
+   Use the HDF5 library.
 
 
-	 void *  (*async_write)  (void *base, void *obj, void *buf, off_t offset, size_t size);
-	 void *  (*async_read)   (void *base, void *obj, void *buf, off_t offset, size_t size);
+   <strong>It doesn't support multiple opening from different processes. </strong>
 
 
-	 void *	(*async_full_read)    (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node);
-	 void *	(*async_full_write)   (void * base, void * obj, void * ptr, size_t size);
+   You may only allow one process to write in the HDF5 file.
 
 
-	 void *  (*copy)   (void *base_src, void* obj_src, off_t offset_src,  void *base_dst, void* obj_dst, off_t offset_dst, size_t size);
-	 void   (*wait_request) (void * async_channel);
-	 int    (*test_request) (void * async_channel);
-	 void   (*free_request)(void * async_channel);
+   <strong>If HDF5 library is not compiled with --thread-safe you can't open more than one HDF5 file at the same time. </strong>
+*/
+extern struct starpu_disk_ops starpu_disk_hdf5_ops;
 
 
-	/* TODO: readv, writev, read2d, write2d, etc. */
-};
+/**
+   Use the unistd library (write, read...) to read/write on disk.
 
 
-/* Posix functions to use disk memory */
-extern struct starpu_disk_ops starpu_disk_stdio_ops;
-extern struct starpu_disk_ops starpu_disk_hdf5_ops;
+   <strong>Warning: It creates one file per allocation !</strong>
+*/
 extern struct starpu_disk_ops starpu_disk_unistd_ops;
 extern struct starpu_disk_ops starpu_disk_unistd_ops;
+
+/**
+   Use the unistd library (write, read...) to read/write on disk with the O_DIRECT flag.
+
+   <strong>Warning: It creates one file per allocation !</strong>
+
+   Only available on Linux systems.
+*/
 extern struct starpu_disk_ops starpu_disk_unistd_o_direct_ops;
 extern struct starpu_disk_ops starpu_disk_unistd_o_direct_ops;
+
+/**
+   Use the leveldb created by Google. More information at https://code.google.com/p/leveldb/
+   Do not support asynchronous transfers.
+*/
 extern struct starpu_disk_ops starpu_disk_leveldb_ops;
 extern struct starpu_disk_ops starpu_disk_leveldb_ops;
 
 
+/**
+   Close an existing data opened with starpu_disk_open().
+*/
 void starpu_disk_close(unsigned node, void *obj, size_t size);
 void starpu_disk_close(unsigned node, void *obj, size_t size);
 
 
+/**
+   Open an existing file memory in a disk node. \p size is the size of
+   the file. \p pos is the specific position dependent on the backend,
+   given to the \c open  method of the disk operations. Return an
+   opaque object pointer.
+*/
 void *starpu_disk_open(unsigned node, void *pos, size_t size);
 void *starpu_disk_open(unsigned node, void *pos, size_t size);
 
 
+/**
+   Register a disk memory node with a set of functions to manipulate
+   datas. The \c plug member of \p func will be passed \p parameter,
+   and return a \c base which will be passed to all \p func methods.
+   <br />
+   SUCCESS: return the disk node. <br />
+   FAIL: return an error code. <br />
+   \p size must be at least \ref STARPU_DISK_SIZE_MIN bytes ! \p size
+   being negative means infinite size.
+*/
 int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_ssize_t size);
 int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_ssize_t size);
 
 
+/**
+   Minimum size of a registered disk. The size of a disk is the last
+   parameter of the function starpu_disk_register().
+*/
 #define STARPU_DISK_SIZE_MIN (16*1024*1024)
 #define STARPU_DISK_SIZE_MIN (16*1024*1024)
 
 
+/**
+   Contain the node number of the disk swap, if set up through the
+   \ref STARPU_DISK_SWAP variable.
+*/
 extern int starpu_disk_swap_node;
 extern int starpu_disk_swap_node;
 
 
+/** @} */
+
 #endif /* __STARPU_DISK_H__ */
 #endif /* __STARPU_DISK_H__ */

+ 44 - 1
include/starpu_driver.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2019                      CNRS
  * Copyright (C) 2009-2014                                Université de Bordeaux
  * Copyright (C) 2009-2014                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -18,6 +18,11 @@
 #ifndef __STARPU_DRIVER_H__
 #ifndef __STARPU_DRIVER_H__
 #define __STARPU_DRIVER_H__
 #define __STARPU_DRIVER_H__
 
 
+/** @defgroup API_Running_Drivers Running Drivers
+ *
+ * @{
+ */
+
 #include <starpu_config.h>
 #include <starpu_config.h>
 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
 #include <starpu_opencl.h>
 #include <starpu_opencl.h>
@@ -28,9 +33,17 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
+/** structure for a driver */
 struct starpu_driver
 struct starpu_driver
 {
 {
+	/**
+	    Type of the driver. Only ::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER
+	    and ::STARPU_OPENCL_WORKER are currently supported.
+	*/
 	enum starpu_worker_archtype type;
 	enum starpu_worker_archtype type;
+	/**
+	   Identifier of the driver.
+	*/
 	union
 	union
 	{
 	{
 		unsigned cpu_id;
 		unsigned cpu_id;
@@ -43,15 +56,45 @@ struct starpu_driver
 	} id;
 	} id;
 };
 };
 
 
+/**
+   Initialize the given driver, run it until it receives a request to
+   terminate, deinitialize it and return 0 on success. Return
+   <c>-EINVAL</c> if starpu_driver::type is not a valid StarPU device type
+   (::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER or ::STARPU_OPENCL_WORKER).
+
+   This is the same as using the following functions: calling
+   starpu_driver_init(), then calling starpu_driver_run_once() in a loop,
+   and finally starpu_driver_deinit().
+*/
 int starpu_driver_run(struct starpu_driver *d);
 int starpu_driver_run(struct starpu_driver *d);
+
+/**
+   Notify all running drivers that they should terminate.
+*/
 void starpu_drivers_request_termination(void);
 void starpu_drivers_request_termination(void);
 
 
+/**
+   Initialize the given driver. Return 0 on success, <c>-EINVAL</c>
+   if starpu_driver::type is not a valid ::starpu_worker_archtype.
+*/
 int starpu_driver_init(struct starpu_driver *d);
 int starpu_driver_init(struct starpu_driver *d);
+
+/**
+   Run the driver once, then return 0 on success, <c>-EINVAL</c> if
+   starpu_driver::type is not a valid ::starpu_worker_archtype.
+*/
 int starpu_driver_run_once(struct starpu_driver *d);
 int starpu_driver_run_once(struct starpu_driver *d);
+
+/**
+   Deinitialize the given driver. Return 0 on success, <c>-EINVAL</c> if
+   starpu_driver::type is not a valid ::starpu_worker_archtype.
+*/
 int starpu_driver_deinit(struct starpu_driver *d);
 int starpu_driver_deinit(struct starpu_driver *d);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_DRIVER_H__ */
 #endif /* __STARPU_DRIVER_H__ */

+ 18 - 2
include/starpu_expert.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010,2011,2015,2017                      CNRS
+ * Copyright (C) 2010,2011,2015,2017,2019                      CNRS
  * Copyright (C) 2010,2011                                Université de Bordeaux
  * Copyright (C) 2010,2011                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -18,20 +18,36 @@
 #ifndef __STARPU_EXPERT_H__
 #ifndef __STARPU_EXPERT_H__
 #define __STARPU_EXPERT_H__
 #define __STARPU_EXPERT_H__
 
 
-#include <starpu.h>
+/** @defgroup API_Expert_Mode Expert Mode
+ *
+ * @{
+ */
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 extern "C"
 extern "C"
 {
 {
 #endif
 #endif
 
 
+/**
+   Wake all the workers, so they can inspect data requests and task
+   submissions again.
+*/
 void starpu_wake_all_blocked_workers(void);
 void starpu_wake_all_blocked_workers(void);
 
 
+/**
+   Register a progression hook, to be called when workers are idle.
+*/
 int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg);
 int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg);
+
+/**
+   Unregister a given progression hook.
+*/
 void starpu_progression_hook_deregister(int hook_id);
 void starpu_progression_hook_deregister(int hook_id);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_H__ */
 #endif /* __STARPU_H__ */

+ 64 - 1
include/starpu_fxt.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012,2013,2016                           Inria
  * Copyright (C) 2012,2013,2016                           Inria
  * Copyright (C) 2013                                     Joris Pablo
  * Copyright (C) 2013                                     Joris Pablo
- * Copyright (C) 2010-2015,2017,2018                      CNRS
+ * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
  * Copyright (C) 2010,2011,2013-2018                      Université de Bordeaux
  * Copyright (C) 2010,2011,2013-2018                      Université de Bordeaux
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  *
@@ -21,6 +21,11 @@
 #ifndef __STARPU_FXT_H__
 #ifndef __STARPU_FXT_H__
 #define __STARPU_FXT_H__
 #define __STARPU_FXT_H__
 
 
+/** @defgroup API_FxT_Support FxT Support
+ *
+ * @{
+ */
+
 #include <starpu_perfmodel.h>
 #include <starpu_perfmodel.h>
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
@@ -63,29 +68,87 @@ struct starpu_fxt_options
 	char *anim_path;
 	char *anim_path;
 	char *states_path;
 	char *states_path;
 
 
+	/**
+	   In case we are going to gather multiple traces (e.g in the case of
+	   MPI processes), we may need to prefix the name of the containers.
+	*/
 	char *file_prefix;
 	char *file_prefix;
+	/**
+	   In case we are going to gather multiple traces (e.g in the case of
+	   MPI processes), we may need to prefix the name of the containers.
+	*/
 	uint64_t file_offset;
 	uint64_t file_offset;
+	/**
+	   In case we are going to gather multiple traces (e.g in the case of
+	   MPI processes), we may need to prefix the name of the containers.
+	*/
 	int file_rank;
 	int file_rank;
 
 
+	/**
+	   Output parameters
+	*/
 	char worker_names[STARPU_NMAXWORKERS][256];
 	char worker_names[STARPU_NMAXWORKERS][256];
+	/**
+	   Output parameters
+	*/
 	struct starpu_perfmodel_arch worker_archtypes[STARPU_NMAXWORKERS];
 	struct starpu_perfmodel_arch worker_archtypes[STARPU_NMAXWORKERS];
+	/**
+	   Output parameters
+	*/
 	int nworkers;
 	int nworkers;
 
 
+	/**
+	   In case we want to dump the list of codelets to an external tool
+	*/
 	struct starpu_fxt_codelet_event **dumped_codelets;
 	struct starpu_fxt_codelet_event **dumped_codelets;
+	/**
+	   In case we want to dump the list of codelets to an external tool
+	*/
 	long dumped_codelets_count;
 	long dumped_codelets_count;
 };
 };
 
 
 void starpu_fxt_options_init(struct starpu_fxt_options *options);
 void starpu_fxt_options_init(struct starpu_fxt_options *options);
 void starpu_fxt_generate_trace(struct starpu_fxt_options *options);
 void starpu_fxt_generate_trace(struct starpu_fxt_options *options);
+
+/**
+   Determine whether profiling should be started by starpu_init(), or only when
+   starpu_fxt_start_profiling() is called. \p autostart should be 1 to do so, or 0 to
+   prevent it.
+*/
 void starpu_fxt_autostart_profiling(int autostart);
 void starpu_fxt_autostart_profiling(int autostart);
+
+/**
+   Start recording the trace. The trace is by default started from
+   starpu_init() call, but can be paused by using
+   starpu_fxt_stop_profiling(), in which case
+   starpu_fxt_start_profiling() should be called to resume recording
+   events.
+*/
 void starpu_fxt_start_profiling(void);
 void starpu_fxt_start_profiling(void);
+
+/**
+   Stop recording the trace. The trace is by default stopped when calling
+   starpu_shutdown(). starpu_fxt_stop_profiling() can however be used to
+   stop it earlier. starpu_fxt_start_profiling() can then be called to
+   start recording it again, etc.
+*/
 void starpu_fxt_stop_profiling(void);
 void starpu_fxt_stop_profiling(void);
 void starpu_fxt_write_data_trace(char *filename_in);
 void starpu_fxt_write_data_trace(char *filename_in);
+
+/**
+   Add an event in the execution trace if FxT is enabled.
+*/
 void starpu_fxt_trace_user_event(unsigned long code);
 void starpu_fxt_trace_user_event(unsigned long code);
+
+/**
+   Add a string event in the execution trace if FxT is enabled.
+*/
 void starpu_fxt_trace_user_event_string(const char *s);
 void starpu_fxt_trace_user_event_string(const char *s);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_FXT_H__ */
 #endif /* __STARPU_FXT_H__ */

+ 26 - 1
include/starpu_hash.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
- * Copyright (C) 2010,2012,2013,2015,2017                 CNRS
+ * Copyright (C) 2010,2012,2013,2015,2017,2019                 CNRS
  * Copyright (C) 2009-2014                                Université de Bordeaux
  * Copyright (C) 2009-2014                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -19,6 +19,11 @@
 #ifndef __STARPU_HASH_H__
 #ifndef __STARPU_HASH_H__
 #define __STARPU_HASH_H__
 #define __STARPU_HASH_H__
 
 
+/** @ingroup API_Data_Interfaces
+ *
+ * @{
+ */
+
 #include <stdint.h>
 #include <stdint.h>
 #include <stddef.h>
 #include <stddef.h>
 
 
@@ -27,14 +32,34 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
+/**
+   Compute the CRC of a byte buffer seeded by the \p inputcrc
+   <em>current state</em>. The return value should be considered as the new
+   <em>current state</em> for future CRC computation. This is used for computing
+   data size footprint.
+*/
 uint32_t starpu_hash_crc32c_be_n(const void *input, size_t n, uint32_t inputcrc);
 uint32_t starpu_hash_crc32c_be_n(const void *input, size_t n, uint32_t inputcrc);
 
 
+/**
+   Compute the CRC of a 32bit number seeded by the \p inputcrc
+   <em>current state</em>. The return value should be considered as the new
+   <em>current state</em> for future CRC computation. This is used for computing
+   data size footprint.
+*/
 uint32_t starpu_hash_crc32c_be(uint32_t input, uint32_t inputcrc);
 uint32_t starpu_hash_crc32c_be(uint32_t input, uint32_t inputcrc);
 
 
+/**
+   Compute the CRC of a string seeded by the \p inputcrc <em>current
+   state</em>. The return value should be considered as the new <em>current
+   state</em> for future CRC computation. This is used for computing data
+   size footprint.
+*/
 uint32_t starpu_hash_crc32c_string(const char *str, uint32_t inputcrc);
 uint32_t starpu_hash_crc32c_string(const char *str, uint32_t inputcrc);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_HASH_H__ */
 #endif /* __STARPU_HASH_H__ */

+ 22 - 1
include/starpu_mic.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2015,2017                                CNRS
+ * Copyright (C) 2015,2017,2019                                CNRS
  * Copyright (C) 2013                                     Université de Bordeaux
  * Copyright (C) 2013                                     Université de Bordeaux
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
  *
  *
@@ -19,6 +19,11 @@
 #ifndef __STARPU_MIC_H__
 #ifndef __STARPU_MIC_H__
 #define __STARPU_MIC_H__
 #define __STARPU_MIC_H__
 
 
+/** @defgroup API_MIC_Extensions MIC Extensions
+ *
+ * @{
+ */
+
 #include <starpu_config.h>
 #include <starpu_config.h>
 
 
 #ifdef STARPU_USE_MIC
 #ifdef STARPU_USE_MIC
@@ -28,10 +33,23 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
+/**
+   Type for MIC function symbols
+*/
 typedef void *starpu_mic_func_symbol_t;
 typedef void *starpu_mic_func_symbol_t;
 
 
+/**
+   Initiate a lookup on each MIC device to find the address of the
+   function named \p func_name, store it in the global array kernels
+   and return the index in the array through \p symbol.
+*/
 int starpu_mic_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name);
 int starpu_mic_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name);
 
 
+/**
+   If successfull, return the pointer to the function defined by \p symbol on
+   the device linked to the called device. This can for instance be used
+   in a starpu_mic_func_t implementation.
+*/
 starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol);
 starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
@@ -39,4 +57,7 @@ starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol);
 #endif
 #endif
 
 
 #endif /* STARPU_USE_MIC */
 #endif /* STARPU_USE_MIC */
+
+/** @} */
+
 #endif /* __STARPU_MIC_H__ */
 #endif /* __STARPU_MIC_H__ */

+ 9 - 1
include/starpu_mpi_ms.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2016,2017                                Inria
  * Copyright (C) 2016,2017                                Inria
- * Copyright (C) 2017                                     CNRS
+ * Copyright (C) 2017, 2019                                     CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,11 @@
 #ifndef __STARPU_MPI_MS_H__
 #ifndef __STARPU_MPI_MS_H__
 #define __STARPU_MPI_MS_H__
 #define __STARPU_MPI_MS_H__
 
 
+/** @defgroup API_Master_Slave Master Slave Extension
+ *
+ * @{
+ */
+
 #include <starpu_config.h>
 #include <starpu_config.h>
 
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
@@ -38,4 +43,7 @@ starpu_mpi_ms_kernel_t starpu_mpi_ms_get_kernel(starpu_mpi_ms_func_symbol_t symb
 #endif
 #endif
 
 
 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
+
+/** @} */
+
 #endif /* __STARPU_MPI_MS_H__ */
 #endif /* __STARPU_MPI_MS_H__ */

+ 253 - 19
include/starpu_opencl.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2010-2014,2018                           Université de Bordeaux
  * Copyright (C) 2010-2014,2018                           Université de Bordeaux
  * Copyright (C) 2011,2012                                Inria
  * Copyright (C) 2011,2012                                Inria
- * Copyright (C) 2010-2013,2015-2017                      CNRS
+ * Copyright (C) 2010-2013,2015-2017,2019                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,6 +19,11 @@
 #ifndef __STARPU_OPENCL_H__
 #ifndef __STARPU_OPENCL_H__
 #define __STARPU_OPENCL_H__
 #define __STARPU_OPENCL_H__
 
 
+/** @defgroup API_OpenCL_Extensions OpenCL Extensions
+ *
+ * @{
+ */
+
 #include <starpu_config.h>
 #include <starpu_config.h>
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
 #ifndef CL_TARGET_OPENCL_VERSION
 #ifndef CL_TARGET_OPENCL_VERSION
@@ -36,65 +41,294 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
-const char *starpu_opencl_error_string(cl_int status);
-void starpu_opencl_display_error(const char *func, const char *file, int line, const char *msg, cl_int status);
-#define STARPU_OPENCL_DISPLAY_ERROR(status) \
-	starpu_opencl_display_error(__starpu_func__, __FILE__, __LINE__, NULL, status)
-
-static __starpu_inline void starpu_opencl_report_error(const char *func, const char *file, int line, const char *msg, cl_int status)
-{
-	starpu_opencl_display_error(func, file, line, msg, status);
-	assert(0);
-}
-#define STARPU_OPENCL_REPORT_ERROR(status)			\
-	starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, NULL, status)
-
-#define STARPU_OPENCL_REPORT_ERROR_WITH_MSG(msg, status)			\
-	starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, msg, status)
-
+/**
+   Store the OpenCL programs as compiled for the different OpenCL
+   devices.
+*/
 struct starpu_opencl_program
 struct starpu_opencl_program
 {
 {
+	/** Store each program for each OpenCL device. */
 	cl_program programs[STARPU_MAXOPENCLDEVS];
 	cl_program programs[STARPU_MAXOPENCLDEVS];
 };
 };
 
 
+/** @name Writing OpenCL kernels
+    @{
+ */
+
+/**
+   Return the OpenCL context of the device designated by \p devid
+   in \p context.
+*/
 void starpu_opencl_get_context(int devid, cl_context *context);
 void starpu_opencl_get_context(int devid, cl_context *context);
+
+/**
+   Return the cl_device_id corresponding to \p devid in \p device.
+*/
 void starpu_opencl_get_device(int devid, cl_device_id *device);
 void starpu_opencl_get_device(int devid, cl_device_id *device);
+
+/**
+   Return the command queue of the device designated by \p devid
+   into \p queue.
+*/
 void starpu_opencl_get_queue(int devid, cl_command_queue *queue);
 void starpu_opencl_get_queue(int devid, cl_command_queue *queue);
+
+/**
+   Return the context of the current worker.
+*/
 void starpu_opencl_get_current_context(cl_context *context);
 void starpu_opencl_get_current_context(cl_context *context);
+
+/**
+   Return the computation kernel command queue of the current
+   worker.
+*/
 void starpu_opencl_get_current_queue(cl_command_queue *queue);
 void starpu_opencl_get_current_queue(cl_command_queue *queue);
 
 
+/**
+   Set the arguments of a given kernel. The list of arguments
+   must be given as <c>(size_t size_of_the_argument, cl_mem *
+   pointer_to_the_argument)</c>. The last argument must be 0. Return the
+   number of arguments that were successfully set. In case of failure,
+   return the id of the argument that could not be set and \p err is set to
+   the error returned by OpenCL. Otherwise, return the number of
+   arguments that were set.
+
+   Here an example:
+   \code{.c}
+   int n;
+   cl_int err;
+   cl_kernel kernel;
+   n = starpu_opencl_set_kernel_args(&err, 2, &kernel, sizeof(foo), &foo, sizeof(bar), &bar, 0);
+   if (n != 2) fprintf(stderr, "Error : %d\n", err);
+   \endcode
+*/
+int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...);
+
+/** @} */
+
+/** @name Compiling OpenCL kernels
+    Source codes for OpenCL kernels can be stored in a file or in a
+    string. StarPU provides functions to build the program executable for
+    each available OpenCL device as a cl_program object. This program
+    executable can then be loaded within a specific queue as explained in
+    the next section. These are only helpers, Applications can also fill a
+    starpu_opencl_program array by hand for more advanced use (e.g.
+    different programs on the different OpenCL devices, for relocation
+    purpose for instance).
+    @{
+ */
+
+/**
+   Store the contents of the file \p source_file_name in the buffer
+   \p opencl_program_source. The file \p source_file_name can be located in the
+   current directory, or in the directory specified by the environment
+   variable \ref STARPU_OPENCL_PROGRAM_DIR, or
+   in the directory <c>share/starpu/opencl</c> of the installation
+   directory of StarPU, or in the source directory of StarPU. When the
+   file is found, \p located_file_name is the full name of the file as it
+   has been located on the system, \p located_dir_name the directory
+   where it has been located. Otherwise, they are both set to the empty
+   string.
+*/
 void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source);
 void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source);
+
+/**
+   Similar to function starpu_opencl_load_program_source() but
+   allocate the buffers \p located_file_name, \p located_dir_name and
+   \p opencl_program_source.
+*/
 void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source);
 void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source);
+
+/**
+   Compile the OpenCL kernel stored in the file \p source_file_name
+   with the given options \p build_options and store the result in the
+   directory <c>$STARPU_HOME/.starpu/opencl</c> with the same filename as
+   \p source_file_name. The compilation is done for every OpenCL device,
+   and the filename is suffixed with the vendor id and the device id of
+   the OpenCL device.
+*/
 int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char *build_options);
 int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char *build_options);
+
+/**
+   Compile the OpenCL kernel in the string \p opencl_program_source
+   with the given options \p build_options and store the result in the
+   directory <c>$STARPU_HOME/.starpu/opencl</c> with the filename \p
+   file_name. The compilation is done for every OpenCL device, and the
+   filename is suffixed with the vendor id and the device id of the
+   OpenCL device.
+*/
 int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char *build_options);
 int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char *build_options);
 
 
+/**
+   Compile the binary OpenCL kernel identified with \p kernel_id.
+   For every OpenCL device, the binary OpenCL kernel will be loaded from
+   the file
+   <c>$STARPU_HOME/.starpu/opencl/\<kernel_id\>.\<device_type\>.vendor_id_\<vendor_id\>_device_id_\<device_id\></c>.
+*/
 int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs);
 int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs);
 
 
+/**
+   Compile an OpenCL source code stored in a file.
+*/
 int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char *build_options);
 int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char *build_options);
+/**
+   Compile an OpenCL source code stored in a string.
+ */
 int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char *build_options);
 int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char *build_options);
+
+/**
+   Unload an OpenCL compiled code.
+*/
 int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs);
 int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs);
 
 
+/** @} */
+
+/** @name Loading OpenCL kernels
+    @{
+ */
+
+/**
+   Create a kernel \p kernel for device \p devid, on its computation
+   command queue returned in \p queue, using program \p opencl_programs
+   and name \p kernel_name.
+*/
 int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid);
 int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid);
+
+/**
+   Release the given \p kernel, to be called after kernel execution.
+*/
 int starpu_opencl_release_kernel(cl_kernel kernel);
 int starpu_opencl_release_kernel(cl_kernel kernel);
 
 
+/** @} */
+
+/** @name OpenCL Statistics
+    @{
+ */
+
+/**
+   Collect statistics on a kernel execution.
+   After termination of the kernels, the OpenCL codelet should call this
+   function with the event returned by \c clEnqueueNDRangeKernel(), to
+   let StarPU collect statistics about the kernel execution (used cycles,
+   consumed energy).
+*/
 int starpu_opencl_collect_stats(cl_event event);
 int starpu_opencl_collect_stats(cl_event event);
 
 
-int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...);
+/** @} */
+
+/** @name OpenCL Utilities
+    @{
+ */
+
+/**
+   Return the error message in English corresponding to \p status, an OpenCL
+   error code.
+*/
+const char *starpu_opencl_error_string(cl_int status);
+
+/**
+   Given a valid error status, print the corresponding error message on
+   \c stdout, along with the function name \p func, the filename
+   \p file, the line number \p line and the message \p msg.
+*/
+void starpu_opencl_display_error(const char *func, const char *file, int line, const char *msg, cl_int status);
+
+/**
+   Call the function starpu_opencl_display_error() with the error
+   \p status, the current function name, current file and line number,
+   and a empty message.
+*/
+#define STARPU_OPENCL_DISPLAY_ERROR(status) starpu_opencl_display_error(__starpu_func__, __FILE__, __LINE__, NULL, status)
+
+/**
+   Call the function starpu_opencl_display_error() and abort.
+*/
+static __starpu_inline void starpu_opencl_report_error(const char *func, const char *file, int line, const char *msg, cl_int status)
+{
+	starpu_opencl_display_error(func, file, line, msg, status);
+	assert(0);
+}
+
+/**
+   Call the function starpu_opencl_report_error() with the error \p
+   status, the current function name, current file and line number,
+   and a empty message.
+*/
+#define STARPU_OPENCL_REPORT_ERROR(status) starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, NULL, status)
 
 
+/**
+   Call the function starpu_opencl_report_error() with \p msg
+   and \p status, the current function name, current file and line number.
+*/
+#define STARPU_OPENCL_REPORT_ERROR_WITH_MSG(msg, status) starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, msg, status)
+
+/**
+   Allocate \p size bytes of memory, stored in \p addr. \p flags must be a valid
+   combination of \c cl_mem_flags values.
+*/
 cl_int starpu_opencl_allocate_memory(int devid, cl_mem *addr, size_t size, cl_mem_flags flags);
 cl_int starpu_opencl_allocate_memory(int devid, cl_mem *addr, size_t size, cl_mem_flags flags);
 
 
+/**
+   Copy \p size bytes from the given \p ptr on RAM \p src_node to the
+   given \p buffer on OpenCL \p dst_node. \p offset is the offset, in
+   bytes, in \p buffer. if \p event is <c>NULL</c>, the copy is
+   synchronous, i.e the queue is synchronised before returning. If not
+   <c>NULL</c>, \p event can be used after the call to wait for this
+   particular copy to complete. This function returns <c>CL_SUCCESS</c>
+   if the copy was successful, or a valid OpenCL error code otherwise.
+   The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
+   asynchronous launch was successful, or to 0 if \p event was
+   <c>NULL</c>.
+*/
 cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
 cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
 
 
+/**
+   Copy \p size bytes asynchronously from the given \p buffer on OpenCL
+   \p src_node to the given \p ptr on RAM \p dst_node. \p offset is the
+   offset, in bytes, in \p buffer. if \p event is <c>NULL</c>, the copy
+   is synchronous, i.e the queue is synchronised before returning. If not
+   <c>NULL</c>, \p event can be used after the call to wait for this
+   particular copy to complete. This function returns <c>CL_SUCCESS</c>
+   if the copy was successful, or a valid OpenCL error code otherwise.
+   The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
+   asynchronous launch was successful, or to 0 if \p event was
+   <c>NULL</c>.
+*/
 cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
 cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
 
 
+/**
+   Copy \p size bytes asynchronously from byte offset \p src_offset of \p
+   src on OpenCL \p src_node to byte offset \p dst_offset of \p dst on
+   OpenCL \p dst_node. if \p event is <c>NULL</c>, the copy is
+   synchronous, i.e. the queue is synchronised before returning. If not
+   <c>NULL</c>, \p event can be used after the call to wait for this
+   particular copy to complete. This function returns <c>CL_SUCCESS</c>
+   if the copy was successful, or a valid OpenCL error code otherwise.
+   The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
+   asynchronous launch was successful, or to 0 if \p event was
+   <c>NULL</c>.
+*/
 cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, size_t src_offset, cl_mem dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event, int *ret);
 cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, size_t src_offset, cl_mem dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event, int *ret);
 
 
+/**
+   Copy \p size bytes from byte offset \p src_offset of \p src on \p
+   src_node to byte offset \p dst_offset of \p dst on \p dst_node. if \p
+   event is <c>NULL</c>, the copy is synchronous, i.e. the queue is
+   synchronised before returning. If not <c>NULL</c>, \p event can be
+   used after the call to wait for this particular copy to complete. The
+   function returns <c>-EAGAIN</c> if the asynchronous launch was
+   successfull. It returns 0 if the synchronous copy was successful, or
+   fails otherwise.
+*/
 cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event);
 cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event);
 
 
+/** @} */
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
 #endif /* STARPU_USE_OPENCL */
 #endif /* STARPU_USE_OPENCL */
-#endif /* __STARPU_OPENCL_H__ */
 
 
+/** @} */
+
+#endif /* __STARPU_OPENCL_H__ */

File diff suppressed because it is too large
+ 945 - 40
include/starpu_openmp.h


+ 16 - 5
include/starpu_perfmodel.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2014,2016                           Inria
  * Copyright (C) 2011-2014,2016                           Inria
  * Copyright (C) 2009-2018                                Université de Bordeaux
  * Copyright (C) 2009-2018                                Université de Bordeaux
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2017, 2019                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2011                                     Télécom-SudParis
  *
  *
@@ -21,13 +21,14 @@
 #ifndef __STARPU_PERFMODEL_H__
 #ifndef __STARPU_PERFMODEL_H__
 #define __STARPU_PERFMODEL_H__
 #define __STARPU_PERFMODEL_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <starpu.h>
 #include <starpu.h>
 #include <stdio.h>
 #include <stdio.h>
 
 
-#include <starpu_util.h>
-#include <starpu_worker.h>
-#include <starpu_task.h>
-
 #ifdef __cplusplus
 #ifdef __cplusplus
 extern "C"
 extern "C"
 {
 {
@@ -205,8 +206,18 @@ double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size
 
 
 extern struct starpu_perfmodel starpu_perfmodel_nop;
 extern struct starpu_perfmodel starpu_perfmodel_nop;
 
 
+/**
+   Display statistics about the current data handles registered
+   within StarPU. StarPU must have been configured with the configure
+   option \ref enable-memory-stats "--enable-memory-stats" (see \ref
+   MemoryFeedback).
+*/
+void starpu_data_display_memory_stats();
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_PERFMODEL_H__ */
 #endif /* __STARPU_PERFMODEL_H__ */

+ 8 - 2
include/starpu_profiling.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2014,2016,2017                      Université de Bordeaux
  * Copyright (C) 2010-2014,2016,2017                      Université de Bordeaux
- * Copyright (C) 2010,2011,2013,2015,2017                 CNRS
+ * Copyright (C) 2010,2011,2013,2015,2017,2019                 CNRS
  * Copyright (C) 2016                                     Inria
  * Copyright (C) 2016                                     Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -19,10 +19,14 @@
 #ifndef __STARPU_PROFILING_H__
 #ifndef __STARPU_PROFILING_H__
 #define __STARPU_PROFILING_H__
 #define __STARPU_PROFILING_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <starpu.h>
 #include <starpu.h>
 #include <errno.h>
 #include <errno.h>
 #include <time.h>
 #include <time.h>
-#include <starpu_util.h>
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 extern "C"
 extern "C"
@@ -183,4 +187,6 @@ void starpu_profiling_worker_helper_display_summary(void);
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_PROFILING_H__ */
 #endif /* __STARPU_PROFILING_H__ */

+ 8 - 1
include/starpu_rand.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2012,2013,2015,2017                      CNRS
+ * Copyright (C) 2012,2013,2015,2017,2019                      CNRS
  * Copyright (C) 2012,2015,2016                           Université de Bordeaux
  * Copyright (C) 2012,2015,2016                           Université de Bordeaux
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
  *
  *
@@ -19,6 +19,11 @@
 #ifndef __STARPU_RAND_H__
 #ifndef __STARPU_RAND_H__
 #define __STARPU_RAND_H__
 #define __STARPU_RAND_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <stdlib.h>
 #include <stdlib.h>
 #include <starpu_config.h>
 #include <starpu_config.h>
 
 
@@ -66,4 +71,6 @@ typedef int starpu_drand48_data;
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_RAND_H__ */
 #endif /* __STARPU_RAND_H__ */

+ 7 - 3
include/starpu_scc.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2015,2017                                CNRS
+ * Copyright (C) 2015,2017,2019                                CNRS
  * Copyright (C) 2013                                     Université de Bordeaux
  * Copyright (C) 2013                                     Université de Bordeaux
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
  *
  *
@@ -16,12 +16,15 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-
 #ifndef __STARPU_SCC_H__
 #ifndef __STARPU_SCC_H__
 #define __STARPU_SCC_H__
 #define __STARPU_SCC_H__
 
 
-#include <starpu_config.h>
+/** @defgroup
+ *
+ * @{
+ */
 
 
+#include <starpu_config.h>
 
 
 #ifdef STARPU_USE_SCC
 #ifdef STARPU_USE_SCC
 
 
@@ -33,5 +36,6 @@ starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t symbol);
 
 
 #endif /* STARPU_USE_SCC */
 #endif /* STARPU_USE_SCC */
 
 
+/** @} */
 
 
 #endif /* __STARPU_SCC_H__ */
 #endif /* __STARPU_SCC_H__ */

+ 8 - 1
include/starpu_sched_component.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2017                                     Arthur Chevalier
  * Copyright (C) 2017                                     Arthur Chevalier
  * Copyright (C) 2013,2014,2017                           Inria
  * Copyright (C) 2013,2014,2017                           Inria
- * Copyright (C) 2014,2015,2017                           CNRS
+ * Copyright (C) 2014,2015,2017,2019                           CNRS
  * Copyright (C) 2014-2019                                Université de Bordeaux
  * Copyright (C) 2014-2019                                Université de Bordeaux
  * Copyright (C) 2013                                     Simon Archipoff
  * Copyright (C) 2013                                     Simon Archipoff
  *
  *
@@ -21,6 +21,11 @@
 #ifndef __STARPU_SCHED_COMPONENT_H__
 #ifndef __STARPU_SCHED_COMPONENT_H__
 #define __STARPU_SCHED_COMPONENT_H__
 #define __STARPU_SCHED_COMPONENT_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <starpu.h>
 #include <starpu.h>
 
 
 #ifdef STARPU_HAVE_HWLOC
 #ifdef STARPU_HAVE_HWLOC
@@ -246,4 +251,6 @@ while(0)
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_SCHED_COMPONENT_H__ */
 #endif /* __STARPU_SCHED_COMPONENT_H__ */

+ 8 - 1
include/starpu_sched_ctx.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2010,2012-2017                           Inria
  * Copyright (C) 2010,2012-2017                           Inria
  * Copyright (C) 2017                                     Arthur Chevalier
  * Copyright (C) 2017                                     Arthur Chevalier
- * Copyright (C) 2012-2014,2017                           CNRS
+ * Copyright (C) 2012-2014,2017,2019                           CNRS
  * Copyright (C) 2012,2014,2016                           Université de Bordeaux
  * Copyright (C) 2012,2014,2016                           Université de Bordeaux
  * Copyright (C) 2016                                     Uppsala University
  * Copyright (C) 2016                                     Uppsala University
  *
  *
@@ -21,6 +21,11 @@
 #ifndef __STARPU_SCHED_CTX_H__
 #ifndef __STARPU_SCHED_CTX_H__
 #define __STARPU_SCHED_CTX_H__
 #define __STARPU_SCHED_CTX_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <starpu.h>
 #include <starpu.h>
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
@@ -184,4 +189,6 @@ void starpu_sched_ctx_get_sms_interval(int stream_workerid, int *start, int *end
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_SCHED_CTX_H__ */
 #endif /* __STARPU_SCHED_CTX_H__ */

+ 8 - 3
include/starpu_sched_ctx_hypervisor.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010,2012,2013,2015                      Inria
  * Copyright (C) 2010,2012,2013,2015                      Inria
- * Copyright (C) 2013,2017                                CNRS
+ * Copyright (C) 2013,2017,2019                                CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,13 +18,16 @@
 #ifndef __STARPU_SCHED_CTX_HYPERVISOR_H__
 #ifndef __STARPU_SCHED_CTX_HYPERVISOR_H__
 #define __STARPU_SCHED_CTX_HYPERVISOR_H__
 #define __STARPU_SCHED_CTX_HYPERVISOR_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 extern "C"
 extern "C"
 {
 {
 #endif
 #endif
 
 
-
-
 struct starpu_sched_ctx_performance_counters
 struct starpu_sched_ctx_performance_counters
 {
 {
 	void (*notify_idle_cycle)(unsigned sched_ctx_id, int worker, double idle_time);
 	void (*notify_idle_cycle)(unsigned sched_ctx_id, int worker, double idle_time);
@@ -50,4 +53,6 @@ void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_SCHED_CTX_HYPERVISOR_H__ */
 #endif /* __STARPU_SCHED_CTX_HYPERVISOR_H__ */

+ 8 - 1
include/starpu_scheduler.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011-2013,2015-2017                      Inria
  * Copyright (C) 2011-2013,2015-2017                      Inria
  * Copyright (C) 2010-2018                                Université de Bordeaux
  * Copyright (C) 2010-2018                                Université de Bordeaux
- * Copyright (C) 2011-2013,2015,2017                      CNRS
+ * Copyright (C) 2011-2013,2015,2017,2019                      CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2016                                     Uppsala University
  * Copyright (C) 2016                                     Uppsala University
@@ -22,6 +22,11 @@
 #ifndef __STARPU_SCHEDULER_H__
 #ifndef __STARPU_SCHEDULER_H__
 #define __STARPU_SCHEDULER_H__
 #define __STARPU_SCHEDULER_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <starpu.h>
 #include <starpu.h>
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
@@ -115,4 +120,6 @@ void starpu_sched_task_break(struct starpu_task *task);
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_SCHEDULER_H__ */
 #endif /* __STARPU_SCHEDULER_H__ */

+ 8 - 2
include/starpu_sink.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2015,2017                                CNRS
+ * Copyright (C) 2015,2017,2019                                CNRS
  * Copyright (C) 2013                                     Université de Bordeaux
  * Copyright (C) 2013                                     Université de Bordeaux
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
  *
  *
@@ -16,10 +16,16 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-
 #ifndef __STARPU_SINK_H__
 #ifndef __STARPU_SINK_H__
 #define __STARPU_SINK_H__
 #define __STARPU_SINK_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 void starpu_sink_common_worker(int argc, char **argv);
 void starpu_sink_common_worker(int argc, char **argv);
 
 
+/** @} */
+
 #endif /* __STARPU_SINK_H__ */
 #endif /* __STARPU_SINK_H__ */

+ 16 - 1
include/starpu_stdlib.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2013,2015-2017                      CNRS
+ * Copyright (C) 2010-2013,2015-2017,2019                      CNRS
  * Copyright (C) 2017                                     Inria
  * Copyright (C) 2017                                     Inria
  * Copyright (C) 2010-2016,2019                           Université de Bordeaux
  * Copyright (C) 2010-2016,2019                           Université de Bordeaux
  *
  *
@@ -19,6 +19,11 @@
 #ifndef __STARPU_STDLIB_H__
 #ifndef __STARPU_STDLIB_H__
 #define __STARPU_STDLIB_H__
 #define __STARPU_STDLIB_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <starpu.h>
 #include <starpu.h>
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
@@ -35,6 +40,14 @@ extern "C"
 
 
 #define STARPU_MALLOC_SIMULATION_FOLDED	((1ULL)<<6)
 #define STARPU_MALLOC_SIMULATION_FOLDED	((1ULL)<<6)
 
 
+/**
+   \deprecated
+   Equivalent to starpu_malloc(). This macro is provided to avoid
+   breaking old codes.
+*/
+#define starpu_data_malloc_pinned_if_possible	starpu_malloc
+#define starpu_data_free_pinned_if_possible	starpu_free
+
 void starpu_malloc_set_align(size_t align);
 void starpu_malloc_set_align(size_t align);
 
 
 int starpu_malloc(void **A, size_t dim);
 int starpu_malloc(void **A, size_t dim);
@@ -65,4 +78,6 @@ void starpu_sleep(float nb_sec);
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_STDLIB_H__ */
 #endif /* __STARPU_STDLIB_H__ */

+ 7 - 3
include/starpu_task.h

@@ -21,10 +21,12 @@
 #ifndef __STARPU_TASK_H__
 #ifndef __STARPU_TASK_H__
 #define __STARPU_TASK_H__
 #define __STARPU_TASK_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <starpu.h>
 #include <starpu.h>
-#include <starpu_data.h>
-#include <starpu_util.h>
-#include <starpu_task_bundle.h>
 #include <errno.h>
 #include <errno.h>
 #include <assert.h>
 #include <assert.h>
 
 
@@ -380,4 +382,6 @@ unsigned starpu_task_get_implementation(struct starpu_task *task);
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_TASK_H__ */
 #endif /* __STARPU_TASK_H__ */

+ 8 - 1
include/starpu_task_bundle.h

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2010,2011,2014                           Université de Bordeaux
  * Copyright (C) 2010,2011,2014                           Université de Bordeaux
  * Copyright (C) 2011,2012                                Inria
  * Copyright (C) 2011,2012                                Inria
- * Copyright (C) 2011-2013,2015,2017                      CNRS
+ * Copyright (C) 2011-2013,2015,2017,2019                      CNRS
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2011                                     Télécom-SudParis
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -20,6 +20,11 @@
 #ifndef __STARPU_TASK_BUNDLE_H__
 #ifndef __STARPU_TASK_BUNDLE_H__
 #define __STARPU_TASK_BUNDLE_H__
 #define __STARPU_TASK_BUNDLE_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 extern "C"
 extern "C"
 {
 {
@@ -41,4 +46,6 @@ void starpu_task_bundle_close(starpu_task_bundle_t bundle);
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_TASK_BUNDLE_H__ */
 #endif /* __STARPU_TASK_BUNDLE_H__ */

+ 8 - 1
include/starpu_task_list.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2012,2014,2016,2017                 Université de Bordeaux
  * Copyright (C) 2010-2012,2014,2016,2017                 Université de Bordeaux
- * Copyright (C) 2011-2014,2017,2018                      CNRS
+ * Copyright (C) 2011-2014,2017,2018,2019                      CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,6 +18,11 @@
 #ifndef __STARPU_TASK_LIST_H__
 #ifndef __STARPU_TASK_LIST_H__
 #define __STARPU_TASK_LIST_H__
 #define __STARPU_TASK_LIST_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <starpu_task.h>
 #include <starpu_task.h>
 #include <starpu_util.h>
 #include <starpu_util.h>
 
 
@@ -84,4 +89,6 @@ void starpu_task_list_move(struct starpu_task_list *ldst, struct starpu_task_lis
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_TASK_LIST_H__ */
 #endif /* __STARPU_TASK_LIST_H__ */

+ 7 - 0
include/starpu_task_util.h

@@ -19,6 +19,11 @@
 #ifndef __STARPU_TASK_UTIL_H__
 #ifndef __STARPU_TASK_UTIL_H__
 #define __STARPU_TASK_UTIL_H__
 #define __STARPU_TASK_UTIL_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <stdio.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdlib.h>
 #include <string.h>
 #include <string.h>
@@ -103,4 +108,6 @@ void starpu_codelet_unpack_args_and_copyleft(void *cl_arg, void *buffer, size_t
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_TASK_UTIL_H__ */
 #endif /* __STARPU_TASK_UTIL_H__ */

+ 8 - 3
include/starpu_thread.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2013,2015-2017                           Inria
  * Copyright (C) 2013,2015-2017                           Inria
- * Copyright (C) 2010-2015,2017                           CNRS
+ * Copyright (C) 2010-2015,2017,2019                           CNRS
  * Copyright (C) 2010,2012-2019                           Université de Bordeaux
  * Copyright (C) 2010,2012-2019                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -19,6 +19,11 @@
 #ifndef __STARPU_THREAD_H__
 #ifndef __STARPU_THREAD_H__
 #define __STARPU_THREAD_H__
 #define __STARPU_THREAD_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <starpu_config.h>
 #include <starpu_config.h>
 #include <starpu_util.h>
 #include <starpu_util.h>
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
@@ -441,6 +446,6 @@ int starpu_sem_wait(starpu_sem_t *);
 }
 }
 #endif
 #endif
 
 
-#endif /* __STARPU_THREAD_H__ */
-
+/** @} */
 
 
+#endif /* __STARPU_THREAD_H__ */

+ 8 - 1
include/starpu_thread_util.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012,2013                                Inria
  * Copyright (C) 2012,2013                                Inria
- * Copyright (C) 2010-2013,2015,2017                      CNRS
+ * Copyright (C) 2010-2013,2015,2017,2019                      CNRS
  * Copyright (C) 2010-2014,2016,2017                      Université de Bordeaux
  * Copyright (C) 2010-2014,2016,2017                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -19,6 +19,11 @@
 #ifndef __STARPU_THREAD_UTIL_H__
 #ifndef __STARPU_THREAD_UTIL_H__
 #define __STARPU_THREAD_UTIL_H__
 #define __STARPU_THREAD_UTIL_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <starpu_util.h>
 #include <starpu_util.h>
 #include <starpu_thread.h>
 #include <starpu_thread.h>
 #include <errno.h>
 #include <errno.h>
@@ -383,4 +388,6 @@ int _starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_m
 } while (0)
 } while (0)
 #endif /* _MSC_VER */
 #endif /* _MSC_VER */
 
 
+/** @} */
+
 #endif /* __STARPU_THREAD_UTIL_H__ */
 #endif /* __STARPU_THREAD_UTIL_H__ */

+ 8 - 2
include/starpu_top.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011                                     Inria
  * Copyright (C) 2011                                     Inria
- * Copyright (C) 2011-2013,2017                           CNRS
+ * Copyright (C) 2011-2013,2017,2019                           CNRS
  * Copyright (C) 2011-2013                                Université de Bordeaux
  * Copyright (C) 2011-2013                                Université de Bordeaux
  * Copyright (C) 2011                                     William Braik, Yann Courtois, Jean-Marie Couteyen, Anthony
  * Copyright (C) 2011                                     William Braik, Yann Courtois, Jean-Marie Couteyen, Anthony
  *
  *
@@ -20,6 +20,11 @@
 #ifndef __STARPU_TOP_H__
 #ifndef __STARPU_TOP_H__
 #define __STARPU_TOP_H__
 #define __STARPU_TOP_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <starpu.h>
 #include <starpu.h>
 #include <stdlib.h>
 #include <stdlib.h>
 #include <time.h>
 #include <time.h>
@@ -109,5 +114,6 @@ void starpu_top_debug_lock(const char *message);
 }
 }
 #endif
 #endif
 
 
-#endif /* __STARPU_TOP_H__ */
+/** @} */
 
 
+#endif /* __STARPU_TOP_H__ */

+ 8 - 1
include/starpu_tree.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2014                                     Inria
  * Copyright (C) 2014                                     Inria
- * Copyright (C) 2010-2015,2017                           CNRS
+ * Copyright (C) 2010-2015,2017,2019                           CNRS
  * Copyright (C) 2009-2014,2016                           Université de Bordeaux
  * Copyright (C) 2009-2014,2016                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -19,6 +19,11 @@
 #ifndef __STARPU_TREE_H__
 #ifndef __STARPU_TREE_H__
 #define __STARPU_TREE_H__
 #define __STARPU_TREE_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 extern "C"
 extern "C"
 {
 {
@@ -49,4 +54,6 @@ void starpu_tree_free(struct starpu_tree *tree);
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_TREE_H__ */
 #endif /* __STARPU_TREE_H__ */

+ 7 - 0
include/starpu_util.h

@@ -19,6 +19,11 @@
 #ifndef __STARPU_UTIL_H__
 #ifndef __STARPU_UTIL_H__
 #define __STARPU_UTIL_H__
 #define __STARPU_UTIL_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <stdio.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdint.h>
@@ -486,4 +491,6 @@ struct timespec
 }
 }
 #endif
 #endif
 
 
+/** @} */
+
 #endif /* __STARPU_UTIL_H__ */
 #endif /* __STARPU_UTIL_H__ */

+ 32 - 3
include/starpu_worker.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2013-2017                                Inria
  * Copyright (C) 2013-2017                                Inria
- * Copyright (C) 2010-2015,2017                           CNRS
+ * Copyright (C) 2010-2015,2017,2019                           CNRS
  * Copyright (C) 2009-2014,2016,2017,2019                 Université de Bordeaux
  * Copyright (C) 2009-2014,2016,2017,2019                 Université de Bordeaux
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2016                                     Uppsala University
  * Copyright (C) 2016                                     Uppsala University
@@ -21,6 +21,11 @@
 #ifndef __STARPU_WORKER_H__
 #ifndef __STARPU_WORKER_H__
 #define __STARPU_WORKER_H__
 #define __STARPU_WORKER_H__
 
 
+/** @defgroup
+ *
+ * @{
+ */
+
 #include <stdlib.h>
 #include <stdlib.h>
 #include <starpu_config.h>
 #include <starpu_config.h>
 #include <starpu_thread.h>
 #include <starpu_thread.h>
@@ -35,6 +40,20 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
+enum starpu_node_kind
+{
+	STARPU_UNUSED     = 0x00,
+	STARPU_CPU_RAM    = 0x01,
+	STARPU_CUDA_RAM   = 0x02,
+	STARPU_OPENCL_RAM = 0x03,
+	STARPU_DISK_RAM   = 0x04,
+	STARPU_MIC_RAM    = 0x05,
+	STARPU_SCC_RAM    = 0x06,
+	STARPU_SCC_SHM    = 0x07,
+	STARPU_MPI_MS_RAM = 0x08
+
+};
+
 enum starpu_worker_archtype
 enum starpu_worker_archtype
 {
 {
 	STARPU_CPU_WORKER,
 	STARPU_CPU_WORKER,
@@ -52,7 +71,7 @@ struct starpu_sched_ctx_iterator
 	void *value;
 	void *value;
 	void *possible_value;
 	void *possible_value;
 	char visited[STARPU_NMAXWORKERS];
 	char visited[STARPU_NMAXWORKERS];
-	int possibly_parallel; 
+	int possibly_parallel;
 };
 };
 
 
 enum starpu_worker_collection_type
 enum starpu_worker_collection_type
@@ -178,9 +197,19 @@ hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid);
 hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid);
 hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid);
 #endif
 #endif
 
 
+unsigned starpu_worker_get_memory_node(unsigned workerid);
+unsigned starpu_memory_nodes_get_count(void);
+int starpu_memory_node_get_name(unsigned node, char *name, size_t size);
+int starpu_memory_nodes_get_numa_count(void);
+int starpu_memory_nodes_numa_id_to_devid(int osid);
+int starpu_memory_nodes_numa_devid_to_id(unsigned id);
+
+enum starpu_node_kind starpu_node_get_kind(unsigned node);
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
-#endif /* __STARPU_WORKER_H__ */
+/** @} */
 
 
+#endif /* __STARPU_WORKER_H__ */