Quellcode durchsuchen

move documentation from separate doxygen files to public .h files

Nathalie Furmento vor 6 Jahren
Ursprung
Commit
e8e8cf9266

+ 1 - 3
doc/doxygen/Makefile.am

@@ -111,15 +111,13 @@ chapters =	\
 	chapters/api/openmp_runtime_support.doxy \
 	chapters/api/mic_extensions.doxy \
 	chapters/api/scc_extensions.doxy \
-	chapters/api/parallel_tasks.doxy \
 	chapters/api/scheduling_contexts.doxy \
 	chapters/api/scheduling_policy.doxy \
 	chapters/api/versioning.doxy \
 	chapters/api/workers.doxy \
 	chapters/api/threads.doxy \
 	chapters/api/sc_hypervisor/sc_hypervisor.doxy \
-	chapters/api/sc_hypervisor/sc_hypervisor_usage.doxy \
-	chapters/api/interoperability.doxy
+	chapters/api/sc_hypervisor/sc_hypervisor_usage.doxy
 
 images = 	\
 	chapters/images/data_trace.eps \

+ 0 - 336
doc/doxygen/chapters/api/interoperability.doxy

@@ -1,336 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2018                                     Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Interop_Support Interoperability Support
-
-\brief This section describes the interface supplied by StarPU to interoperate with other runtime systems.
-
-\enum e_starpurm_drs_ret
-\ingroup API_Interop_Support
-StarPU Resource Manager return type.
-\var starpurm_drs_ret::starpurm_DRS_SUCCESS
-  Dynamic resource sharing operation succeeded.
-\var starpurm_drs_ret::starpurm_DRS_DISABLD
-  Dynamic resource sharing is disabled.
-\var starpurm_drs_ret::starpurm_DRS_PERM
-  Dynamic resource sharing operation is not authorized or implemented.
-\var starpurm_drs_ret::starpurm_DRS_EINVAL
-  Dynamic resource sharing operation has been called with one or more invalid parameters.
-
-@name Initialisation
-\ingroup API_Interop_Support
-
-\fn void starpurm_initialize(void)
-\ingroup API_Interop_Support
-Initialize StarPU and the StarPU-RM resource management module. The starpu_init() function should not have been called before the call to starpurm_initialize(). The starpurm_initialize() function will take care of this
-
-\fn void starpurm_shutdown(void)
-\ingroup API_Interop_Support
-Shutdown StarPU-RM and StarPU. The starpu_shutdown() function should not be called before. The starpurm_shutdown() function will take care of this.
-
-
-@name Spawn
-\ingroup API_Interop_Support
-
-\fn void starpurm_spawn_kernel_on_cpus(void *data, void(*f)(void *), void *args, hwloc_cpuset_t cpuset)
-\ingroup API_Interop_Support
-Allocate a temporary context spanning the units selected in the cpuset bitmap,
-set it as the default context for the current thread, and call user function \p f.
-Upon the return of user function \p f, the temporary context is freed and the previous
-default context for the current thread is restored.
-
-\fn void starpurm_spawn_kernel_on_cpus_callback(void *data, void(*f)(void *), void *args, hwloc_cpuset_t cpuset, void(*cb_f)(void *), void *cb_args)
-\ingroup API_Interop_Support
-Spawn a POSIX thread and returns immediately. The thread spawned will allocate
-a temporary context spanning the units selected in the cpuset bitmap, set it as
-the default context for the current thread, and call user function \p f. Upon
-the return of user function \p f, the temporary context will be freed and the
-previous default context for the current thread restored. A user specified callback \p cb_f
-will be called just before the termination of the thread.
-
-@name DynamicResourceSharing
-\ingroup API_Interop_Support
-
-\fn starpurm_drs_ret_t starpurm_set_drs_enable(starpurm_drs_desc_t *spd)
-\ingroup API_Interop_Support
-Turn-on dynamic resource sharing support.
-
-\fn starpurm_drs_ret_t starpurm_set_drs_disable(starpurm_drs_desc_t *spd)
-\ingroup API_Interop_Support
-Turn-off dynamic resource sharing support.
-
-\fn int starpurm_drs_enabled_p(void)
-\ingroup API_Interop_Support
-Return the state of the dynamic resource sharing support (\p =!0 enabled, \p =0 disabled).
-
-\fn starpurm_drs_ret_t starpurm_set_max_parallelism(starpurm_drs_desc_t *spd, int max)
-\ingroup API_Interop_Support
-Set the maximum number of CPU computing units available for StarPU computations
-to \p max. This number cannot exceed the maximum number of StarPU's CPU worker
-allocated at start-up time.
-
-\fn starpurm_drs_ret_t starpurm_assign_cpu_to_starpu(starpurm_drs_desc_t *spd, int cpuid)
-\ingroup API_Interop_Support
-Extend StarPU's default scheduling context to execute tasks on worker
-corresponding to logical unit \p cpuid. If StarPU does not have a worker
-thread initialized for logical unit \p cpuid, do nothing.
-
-\fn starpurm_drs_ret_t starpurm_assign_cpus_to_starpu(starpurm_drs_desc_t *spd, int ncpus)
-\ingroup API_Interop_Support
-Extend StarPU's default scheduling context to execute tasks on \p ncpus
-more workers, up to the number of StarPU worker threads initialized.
-
-\fn starpurm_drs_ret_t starpurm_assign_cpu_mask_to_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask)
-\ingroup API_Interop_Support
-Extend StarPU's default scheduling context to execute tasks on the
-additional logical units selected in \p mask. Logical units of \p mask for which no
-StarPU worker is initialized are silently ignored.
-
-\fn starpurm_drs_ret_t starpurm_assign_all_cpus_to_starpu(starpurm_drs_desc_t *spd)
-\ingroup API_Interop_Support
-Set StarPU's default scheduling context to execute tasks on all
-available logical units for which a StarPU worker has been initialized.
-
-\fn starpurm_drs_ret_t starpurm_withdraw_cpu_from_starpu(starpurm_drs_desc_t *spd, int cpuid)
-\ingroup API_Interop_Support
-Shrink StarPU's default scheduling context so as to not execute tasks on worker
-corresponding to logical unit \p cpuid. If StarPU does not have a worker
-thread initialized for logical unit \p cpuid, do nothing.
-
-\fn starpurm_drs_ret_t starpurm_withdraw_cpus_from_starpu(starpurm_drs_desc_t *spd, int ncpus)
-\ingroup API_Interop_Support
-Shrink StarPU's default scheduling context to execute tasks on \p ncpus
-less workers.
-
-\fn starpurm_drs_ret_t starpurm_withdraw_cpu_mask_from_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask)
-\ingroup API_Interop_Support
-Shrink StarPU's default scheduling context so as to not execute tasks on the
-logical units selected in \p mask. Logical units of \p mask for which no
-StarPU worker is initialized are silently ignored.
-
-\fn starpurm_drs_ret_t starpurm_withdraw_all_cpus_from_starpu(starpurm_drs_desc_t *spd)
-\ingroup API_Interop_Support
-Shrink StarPU's default scheduling context so as to remove all logical units.
-
-\fn starpurm_drs_ret_t starpurm_lend(starpurm_drs_desc_t *spd)
-\ingroup API_Interop_Support
-Synonym for starpurm_assign_all_cpus_to_starpu().
-
-\fn starpurm_drs_ret_t starpurm_lend_cpu(starpurm_drs_desc_t *spd, int cpuid)
-\ingroup API_Interop_Support
-Synonym for starpurm_assign_cpu_to_starpu().
-
-\fn starpurm_drs_ret_t starpurm_lend_cpus(starpurm_drs_desc_t *spd, int ncpus)
-\ingroup API_Interop_Support
-Synonym for starpurm_assign_cpus_to_starpu().
-
-\fn starpurm_drs_ret_t starpurm_lend_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask)
-\ingroup API_Interop_Support
-Synonym for starpurm_assign_cpu_mask_to_starpu().
-
-\fn starpurm_drs_ret_t starpurm_reclaim(starpurm_drs_desc_t *spd)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_all_cpus_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_reclaim_cpu(starpurm_drs_desc_t *spd, int cpuid)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_cpu_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_reclaim_cpus(starpurm_drs_desc_t *spd, int ncpus)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_cpus_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_reclaim_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_cpu_mask_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_acquire(starpurm_drs_desc_t *spd)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_all_cpus_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_acquire_cpu(starpurm_drs_desc_t *spd, int cpuid)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_cpu_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_acquire_cpus(starpurm_drs_desc_t *spd, int ncpus)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_cpus_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_acquire_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_cpu_mask_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_return_all(starpurm_drs_desc_t *spd)
-\ingroup API_Interop_Support
-Synonym for starpurm_assign_all_cpus_to_starpu().
-
-\fn starpurm_drs_ret_t starpurm_return_cpu(starpurm_drs_desc_t *spd, int cpuid)
-\ingroup API_Interop_Support
-Synonym for starpurm_assign_cpu_to_starpu().
-
-@name Devices
-\ingroup API_Interop_Support
-
-\fn int starpurm_get_device_type_id(const char *type_str)
-\ingroup API_Interop_Support
-Return the device type ID constant associated to the device type name.
-Valid names for \p type_str are:
-- \c "cpu": regular CPU unit;
-- \c "opencl": OpenCL device unit;
-- \c "cuda": nVidia CUDA device unit;
-- \c "mic": Intel KNC type device unit.
-
-\fn const char *starpurm_get_device_type_name(int type_id)
-\ingroup API_Interop_Support
-Return the device type name associated to the device type ID constant.
-
-\fn int starpurm_get_nb_devices_by_type(int type_id)
-\ingroup API_Interop_Support
-Return the number of initialized StarPU worker for the device type \p type_id.
-
-\fn int starpurm_get_device_id(int type_id, int device_rank)
-\ingroup API_Interop_Support
-Return the unique ID assigned to the \p device_rank nth device of type
-\p type_id.
-
-\fn starpurm_drs_ret_t starpurm_assign_device_to_starpu(starpurm_drs_desc_t *spd, int type_id, int unit_rank)
-\ingroup API_Interop_Support
-Extend StarPU's default scheduling context to use \p unit_rank nth
-device of type \p type_id.
-
-\fn starpurm_drs_ret_t starpurm_assign_devices_to_starpu(starpurm_drs_desc_t *spd, int type_id, int ndevices)
-\ingroup API_Interop_Support
-Extend StarPU's default scheduling context to use \p ndevices more
-devices of type \p type_id, up to the number of StarPU workers
-initialized for such device type.
-
-\fn starpurm_drs_ret_t starpurm_assign_device_mask_to_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask)
-\ingroup API_Interop_Support
-Extend StarPU's default scheduling context to use additional devices as
-designated by their corresponding StarPU worker thread(s) CPU-set \p mask.
-
-\fn starpurm_drs_ret_t starpurm_assign_all_devices_to_starpu(starpurm_drs_desc_t *spd, int type_id)
-\ingroup API_Interop_Support
-Extend StarPU's default scheduling context to use all devices of type \p
-type_id for which it has a worker thread initialized.
-
-\fn starpurm_drs_ret_t starpurm_withdraw_device_from_starpu(starpurm_drs_desc_t *spd, int type_id, int unit_rank)
-\ingroup API_Interop_Support
-Shrink StarPU's default scheduling context to not use \p unit_rank nth
-device of type \p type_id.
-
-\fn starpurm_drs_ret_t starpurm_withdraw_devices_from_starpu(starpurm_drs_desc_t *spd, int type_id, int ndevices)
-\ingroup API_Interop_Support
-Shrink StarPU's default scheduling context to use \p ndevices less
-devices of type \p type_id.
-
-\fn starpurm_drs_ret_t starpurm_withdraw_device_mask_from_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask)
-\ingroup API_Interop_Support
-Shrink StarPU's default scheduling context to not use devices
-designated by their corresponding StarPU worker thread(s) CPU-set \p mask.
-
-\fn starpurm_drs_ret_t starpurm_withdraw_all_devices_from_starpu(starpurm_drs_desc_t *spd, int type_id)
-\ingroup API_Interop_Support
-Shrink StarPU's default scheduling context to use no devices of type \p
-type_id.
-
-\fn starpurm_drs_ret_t starpurm_lend_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank)
-\ingroup API_Interop_Support
-Synonym for starpurm_assign_device_to_starpu().
-
-\fn starpurm_drs_ret_t starpurm_lend_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices)
-\ingroup API_Interop_Support
-Synonym for starpurm_assign_devices_to_starpu().
-
-\fn starpurm_drs_ret_t starpurm_lend_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask)
-\ingroup API_Interop_Support
-Synonym for starpurm_assign_device_mask_to_starpu().
-
-\fn starpurm_drs_ret_t starpurm_lend_all_devices(starpurm_drs_desc_t *spd, int type_id)
-\ingroup API_Interop_Support
-Synonym for starpurm_assign_all_devices_to_starpu().
-
-\fn starpurm_drs_ret_t starpurm_reclaim_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_device_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_reclaim_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_devices_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_reclaim_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_device_mask_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_reclaim_all_devices(starpurm_drs_desc_t *spd, int type_id)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_all_devices_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_acquire_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_device_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_acquire_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_devices_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_acquire_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_device_mask_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_acquire_all_devices(starpurm_drs_desc_t *spd, int type_id)
-\ingroup API_Interop_Support
-Synonym for starpurm_withdraw_all_devices_from_starpu().
-
-\fn starpurm_drs_ret_t starpurm_return_all_devices(starpurm_drs_desc_t *spd, int type_id)
-\ingroup API_Interop_Support
-Synonym for starpurm_assign_all_devices_to_starpu().
-
-\fn starpurm_drs_ret_t starpurm_return_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank)
-\ingroup API_Interop_Support
-Synonym for starpurm_assign_device_to_starpu().
-
-@name CpusetsQueries
-\ingroup API_Interop_Support
-
-\fn hwloc_cpuset_t starpurm_get_device_worker_cpuset(int type_id, int unit_rank)
-\ingroup API_Interop_Support
-Return the CPU-set of the StarPU worker associated to the \p unit_rank
-nth unit of type \p type_id.
-
-\fn hwloc_cpuset_t starpurm_get_global_cpuset(void)
-\ingroup API_Interop_Support
-Return the cumulated CPU-set of all StarPU worker threads.
-
-\fn hwloc_cpuset_t starpurm_get_selected_cpuset(void)
-\ingroup API_Interop_Support
-Return the CPU-set of the StarPU worker threads currently selected in
-the default StarPU's scheduling context.
-
-\fn hwloc_cpuset_t starpurm_get_all_cpu_workers_cpuset(void)
-\ingroup API_Interop_Support
-Return the cumulated CPU-set of all CPU StarPU worker threads.
-
-\fn hwloc_cpuset_t starpurm_get_all_device_workers_cpuset(void)
-\ingroup API_Interop_Support
-Return the cumulated CPU-set of all "non-CPU" StarPU worker threads.
-
-\fn hwloc_cpuset_t starpurm_get_all_device_workers_cpuset_by_type(int typeid)
-\ingroup API_Interop_Support
-Return the cumulated CPU-set of all StarPU worker threads for devices of
-type \p typeid.
-
-*/

+ 2 - 551
doc/doxygen/chapters/api/mpi.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011,2012,2016,2017                      Inria
- * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2009-2011,2014-2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -16,562 +16,13 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
-/*! \defgroup API_MPI_Support MPI Support
-
-@name Initialisation
-\ingroup API_MPI_Support
+/*! \ingroup API_MPI_Support MPI Support
 
 \def STARPU_USE_MPI
 \ingroup API_MPI_Support
 Defined when StarPU has been installed with MPI support. It should be
 used in your code to detect the availability of MPI.
 
-\fn int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm, struct starpu_conf *conf)
-\ingroup API_MPI_Support
-Initialize the StarPU library with the given \p conf, and initialize the
-StarPU-MPI library with the given MPI communicator \p comm. \p initialize_mpi
-indicates if MPI should be initialized or not by StarPU.
-StarPU-MPI takes the opportunity to modify \p conf to either reserve a core for
-its MPI thread (by default), or execute MPI calls on the CPU driver 0 between tasks.
-
-\fn int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm)
-\ingroup API_MPI_Support
-This is the same as starpu_mpi_init_conf(), except that this does not initialize
-the StarPU library. The caller thus has to call starpu_init() before this.
-
-\fn int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi)
-\ingroup API_MPI_Support
-Call starpu_mpi_init_comm() with the MPI communicator \c MPI_COMM_WORLD.
-
-\fn int starpu_mpi_initialize(void)
-\deprecated
-\ingroup API_MPI_Support
-This function has been made deprecated. One should use instead the
-function starpu_mpi_init(). This function does not call \c MPI_Init(), it
-should be called beforehand.
-
-\fn int starpu_mpi_initialize_extended(int *rank, int *world_size)
-\deprecated
-\ingroup API_MPI_Support
-This function has been made deprecated. One should use instead the
-function starpu_mpi_init(). MPI will be initialized by starpumpi by
-calling <c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED,
-...)</c>.
-
-\fn int starpu_mpi_shutdown(void)
-\ingroup API_MPI_Support
-Clean the starpumpi library. This must be called after calling any
-\c starpu_mpi functions and before the call to starpu_shutdown(), if any.
-\c MPI_Finalize() will be called if StarPU-MPI has been initialized by starpu_mpi_init().
-
-\fn void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts)
-\ingroup API_MPI_Support
-Retrieve the current amount of communications from the current node in
-the array \p comm_amounts which must have a size greater or equal to
-the world size. Communications statistics must be enabled (see
-\ref STARPU_COMM_STATS).
-
-\fn int starpu_mpi_comm_size(MPI_Comm comm, int *size)
-\ingroup API_MPI_Support
-Return in \p size the size of the communicator \p comm
-
-\fn int starpu_mpi_comm_rank(MPI_Comm comm, int *rank)
-\ingroup API_MPI_Support
-Return in \p rank the rank of the calling process in the communicator \p comm
-
-\fn int starpu_mpi_world_rank(void)
-\ingroup API_MPI_Support
-Return the rank of the calling process in the communicator \c MPI_COMM_WORLD
-
-\fn int starpu_mpi_world_size(void)
-\ingroup API_MPI_Support
-Return the size of the communicator \c MPI_COMM_WORLD
-
-@name Communication
-\anchor MPIPtpCommunication
-\ingroup API_MPI_Support
-
-\fn int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm)
-\ingroup API_MPI_Support
-Perform a standard-mode, blocking send of \p data_handle to the node
-\p dest using the message tag \p data_tag within the communicator \p
-comm.
-
-\fn int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
-\ingroup API_MPI_Support
-Similar to starpu_mpi_send, but takes a priority \p prio.
-
-\fn int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status)
-\ingroup API_MPI_Support
-Perform a standard-mode, blocking receive in \p data_handle from the
-node \p source using the message tag \p data_tag within the
-communicator \p comm.
-
-\fn int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm)
-\ingroup API_MPI_Support
-Post a standard-mode, non blocking send of \p data_handle to the node
-\p dest using the message tag \p data_tag within the communicator \p
-comm. After the call, the pointer to the request \p req can be used to
-test or to wait for the completion of the communication.
-
-\fn int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
-\ingroup API_MPI_Support
-Similar to starpu_mpi_isend, but takes a priority \p prio.
-
-\fn int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm)
-\ingroup API_MPI_Support
-Post a nonblocking receive in \p data_handle from the node \p source
-using the message tag \p data_tag within the communicator \p comm.
-After the call, the pointer to the request \p req can be used to test
-or to wait for the completion of the communication.
-
-\fn int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
-\ingroup API_MPI_Support
-Post a standard-mode, non blocking send of \p data_handle to the node
-\p dest using the message tag \p data_tag within the communicator \p
-comm. On completion, the \p callback function is called with the
-argument \p arg.
-Similarly to the pthread detached functionality, when a detached
-communication completes, its resources are automatically released back
-to the system, there is no need to test or to wait for the completion
-of the request.
-
-\fn int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
-\ingroup API_MPI_Support
-Similar to starpu_mpi_isend_detached, but takes a priority \p prio.
-
-\fn int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
-\ingroup API_MPI_Support
-Post a nonblocking receive in \p data_handle from the node \p source
-using the message tag \p data_tag within the communicator \p comm. On
-completion, the \p callback function is called with the argument \p
-arg.
-Similarly to the pthread detached functionality, when a detached
-communication completes, its resources are automatically released back
-to the system, there is no need to test or to wait for the completion
-of the request.
-
-\fn int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
-\ingroup API_MPI_Support
-Post a nonblocking receive in \p data_handle from the node \p source
-using the message tag \p data_tag within the communicator \p comm. On
-completion, the \p callback function is called with the argument \p
-arg.
-The parameter \p sequential_consistency allows to enable or disable
-the sequential consistency for \p data handle (sequential consistency
-will be enabled or disabled based on the value of the parameter \p
-sequential_consistency and the value of the sequential consistency
-defined for \p data_handle).
-Similarly to the pthread detached functionality, when a detached
-communication completes, its resources are automatically released back
-to the system, there is no need to test or to wait for the completion
-of the request.
-
-\fn int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm)
-\ingroup API_MPI_Support
-Perform a synchronous-mode, non-blocking send of \p data_handle to the node
-\p dest using the message tag \p data_tag within the communicator \p
-comm.
-
-\fn int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
-\ingroup API_MPI_Support
-Similar to starpu_mpi_issend, but takes a priority \p prio.
-
-\fn int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
-\ingroup API_MPI_Support
-Perform a synchronous-mode, non-blocking send of \p data_handle to the node
-\p dest using the message tag \p data_tag within the communicator \p
-comm. On completion, the \p callback function is called with the argument \p
-arg.
-Similarly to the pthread detached functionality, when a detached
-communication completes, its resources are automatically released back
-to the system, there is no need to test or to wait for the completion
-of the request.
-
-\fn int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status)
-\ingroup API_MPI_Support
-Return when the operation identified by request \p req is complete.
-
-\fn int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status)
-\ingroup API_MPI_Support
-If the operation identified by \p req is complete, set \p flag to 1.
-The \p status object is set to contain information on the completed
-operation.
-
-\fn int starpu_mpi_barrier(MPI_Comm comm)
-\ingroup API_MPI_Support
-Block the caller until all group members of the communicator \p comm
-have called it.
-
-\fn int starpu_mpi_wait_for_all(MPI_Comm comm)
-\ingroup API_MPI_Support
-Wait until all StarPU tasks and communications for the given communicator are completed.
-
-\fn int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag)
-\ingroup API_MPI_Support
-Post a standard-mode, non blocking send of \p data_handle to the node
-\p dest using the message tag \p data_tag within the communicator \p
-comm. On completion, \p tag is unlocked.
-
-\fn int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag)
-\ingroup API_MPI_Support
-Similar to starpu_mpi_isend_detached_unlock_tag(), but takes a priority \p prio.
-
-\fn int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag)
-\ingroup API_MPI_Support
-Post a nonblocking receive in \p data_handle from the node \p source
-using the message tag \p data_tag within the communicator \p comm. On
-completion, \p tag is unlocked.
-
-\fn int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
-\ingroup API_MPI_Support
-Post \p array_size standard-mode, non blocking send. Each post sends
-the n-th data of the array \p data_handle to the n-th node of the
-array \p dest using the n-th message tag of the array \p data_tag
-within the n-th communicator of the array \p comm. On completion of
-the all the requests, \p tag is unlocked.
-
-\fn int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag)
-\ingroup API_MPI_Support
-Similar to starpu_mpi_isend_array_detached_unlock_tag(), but takes a priority \p prio.
-
-\fn int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
-\ingroup API_MPI_Support
-Post \p array_size nonblocking receive. Each post receives in the n-th
-data of the array \p data_handle from the n-th node of the array \p
-source using the n-th message tag of the array \p data_tag within the
-n-th communicator of the array \p comm. On completion of the all the
-requests, \p tag is unlocked.
-
-\fn int starpu_mpi_get_communication_tag(void)
-\ingroup API_MPI_Support
-todo
-
-\fn void starpu_mpi_set_communication_tag(int tag)
-\ingroup API_MPI_Support
-todo
-
-\fn int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func)
-\ingroup API_MPI_Support
-Register functions to create and free a MPI datatype for the given handle.
-It is important that the function is called before any communication can take place for a data with the given handle. See \ref ExchangingUserDefinedDataInterface for an example.
-
-\fn int starpu_mpi_datatype_unregister(starpu_data_handle_t handle);
-\ingroup API_MPI_Support
-Unregister the MPI datatype functions stored for the interface of the given handle.
-
-\def STARPU_MPI_TAG_UB
-\ingroup API_MPI_Support
-When given to the function starpu_mpi_comm_get_attr(), retrieve the
-value for the upper bound for tag value.
-
-\fn int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag);
-\ingroup API_MPI_Support
-Retrieve an attribute value by key, similarly to the MPI function \c MPI_comm_get_attr(), except that the value is a pointer to int64_t instead of int.
-If an attribute is attached on \p comm to \p keyval, then the call
-returns \p flag equal to \c 1, and the attribute value in \p
-attribute_val. Otherwise, \p flag is set to \0.
-
-@name Communication Cache
-\ingroup API_MPI_Support
-
-\fn int starpu_mpi_cache_is_enabled()
-\ingroup API_MPI_Support
-Return 1 if the communication cache is enabled, 0 otherwise
-
-\fn int starpu_mpi_cache_set(int enabled)
-\ingroup API_MPI_Support
-If \p enabled is 1, enable the communication cache. Otherwise, clean the cache if it was enabled and disable it.
-
-\fn void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)
-\ingroup API_MPI_Support
-Clear the send and receive communication cache for the data
-\p data_handle and invalidate the value. The function has to be called at the
-same point of task graph submission by all the MPI nodes on which the handle was
-registered. The function does nothing if the cache mechanism is
-disabled (see \ref STARPU_MPI_CACHE).
-
-\fn void starpu_mpi_cache_flush_all_data(MPI_Comm comm)
-\ingroup API_MPI_Support
-Clear the send and receive communication cache for all data and invalidate their values. The
-function has to be called at the same point of task graph submission by all the MPI nodes. The
-function does nothing if the cache mechanism is disabled (see
-\ref STARPU_MPI_CACHE).
-
-\fn int starpu_mpi_cached_receive(starpu_data_handle_t data_handle);
-\ingroup API_MPI_Support
-Test whether \p data_handle is cached for reception, i.e. the value was
-previously received from the owner node, and not flushed since then.
-
-\fn int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest);
-\ingroup API_MPI_Support
-Test whether \p data_handle is cached for emission to node \p dest , i.e. the
-value was previously sent to \p dest, and not flushed since then.
-
-@name MPI Insert Task
-\anchor MPIInsertTask
-\ingroup API_MPI_Support
-
-\fn void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm)
-\ingroup API_MPI_Support
-Register to MPI a StarPU data handle with the given tag, rank and MPI communicator.
-It also automatically clears the MPI communication cache when unregistering the data.
-
-\def starpu_mpi_data_register(data_handle, data_tag, rank)
-\ingroup API_MPI_Support
-Register to MPI a StarPU data handle with the given tag, rank and the MPI communicator \c MPI_COMM_WORLD.
-It also automatically clears the MPI communication cache when unregistering the data.
-
-\fn void starpu_mpi_data_set_tag(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag)
-\ingroup API_MPI_Support
-Register to MPI a StarPU data handle with the given tag. No rank will be defined.
-It also automatically clears the MPI communication cache when unregistering the data.
-
-\def starpu_data_set_tag
-\ingroup API_MPI_Support
-Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_tag()
-
-\fn void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm)
-\ingroup API_MPI_Support
-Register to MPI a StarPU data handle with the given rank and given communicator. No tag will be defined.
-It also automatically clears the MPI communication cache when unregistering the data.
-
-\def starpu_mpi_data_set_rank
-\ingroup API_MPI_Support
-Register to MPI a StarPU data handle with the given rank and the MPI communicator \c MPI_COMM_WORLD. No tag will be defined.
-It also automatically clears the MPI communication cache when unregistering the data.
-Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_rank()
-
-\def starpu_data_set_rank
-\ingroup API_MPI_Support
-Register to MPI a StarPU data handle with the given rank and the MPI communicator \c MPI_COMM_WORLD. No tag will be defined.
-It also automatically clears the MPI communication cache when unregistering the data.
-Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_rank()
-
-\fn int starpu_mpi_data_get_rank(starpu_data_handle_t handle)
-\ingroup API_MPI_Support
-Return the rank of the given data.
-
-\def starpu_data_get_rank
-\ingroup API_MPI_Support
-Return the rank of the given data.
-Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_rank()
-
-\fn starpu_mpi_tag_t starpu_mpi_data_get_tag(starpu_data_handle_t handle)
-\ingroup API_MPI_Support
-Return the tag of the given data.
-
-\def starpu_data_get_tag
-\ingroup API_MPI_Support
-Return the tag of the given data.
-Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_tag()
-
-\def STARPU_MPI_PER_NODE
-\ingroup API_MPI_Support
-Can be used as rank when calling starpu_mpi_data_register() and alike, to
-specify that the data is per-node: each node will have its own value. Tasks
-writing to such data will be replicated on all nodes (and all parameters then
-have to be per-node). Tasks not writing to such data will just take the
-node-local value without any MPI communication.
-
-\fn void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t handle, int new_rank)
-\ingroup API_MPI_Support
-Submits migration of the data onto the \p new_rank MPI node. This means both submitting the transfer of
-the data to node \p new_rank if it hasn't been submitted already, and setting
-the home node of the data to the new node. Further data transfers submitted by
-starpu_mpi_task_insert() will be done from that new node. This function thus
-needs to be called on all nodes which have registered the data at the same point of tasks submissions. This also
-flushes the cache for this data to avoid incoherencies.
-
-\def STARPU_EXECUTE_ON_NODE
-\ingroup API_MPI_Support
-Used when calling starpu_mpi_task_insert(), must be
-followed by a integer value which specified the node on which to
-execute the codelet.
-
-\def STARPU_EXECUTE_ON_DATA
-\ingroup API_MPI_Support
-Used when calling starpu_mpi_task_insert(), must be
-followed by a data handle to specify that the node owning the given
-data will execute the codelet.
-
-\def STARPU_NODE_SELECTION_POLICY
-\ingroup API_MPI_Support
-Used when calling starpu_mpi_task_insert(), must be
-followed by a identifier to a node selection policy. This is needed when several
-nodes own data in ::STARPU_W mode.
-
-\fn int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
-\ingroup API_MPI_Support
-Call starpu_mpi_task_insert(). Symbol kept for backward compatibility.
-
-\fn int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...)
-\ingroup API_MPI_Support
-Create and submit a task corresponding to codelet with the following
-arguments. The argument list must be zero-terminated.
-
-The arguments following the codelet are the same types as for the
-function starpu_task_insert(). Access modes for data can also be set
-with ::STARPU_SSEND to specify the data has to be sent using a
-synchronous and non-blocking mode (see starpu_mpi_issend()).
-The extra argument
-::STARPU_EXECUTE_ON_NODE followed by an integer allows to specify the
-MPI node to execute the codelet. It is also possible to specify that
-the node owning a specific data will execute the codelet, by using
-::STARPU_EXECUTE_ON_DATA followed by a data handle.
-
-The internal algorithm is as follows:
-<ol>
-<li>
-        Find out which MPI node is going to execute the codelet.
-        <ul>
-            <li>If there is only one node owning data in ::STARPU_W mode, it will be selected;
-            <li>If there is several nodes owning data in ::STARPU_W mode, a node will be selected according to a given node selection policy (see ::STARPU_NODE_SELECTION_POLICY or starpu_mpi_node_selection_set_current_policy())
-            <li>The argument ::STARPU_EXECUTE_ON_NODE followed by an integer can be used to specify the node;
-            <li>The argument ::STARPU_EXECUTE_ON_DATA followed by a data handle can be used to specify that the node owing the given data will execute the codelet.
-        </ul>
-</li>
-<li>
-        Send and receive data as requested. Nodes owning data which need to be read by the task are sending them to the MPI node which will execute it. The latter receives them.
-</li>
-<li>
-        Execute the codelet. This is done by the MPI node selected in the 1st step of the algorithm.
-</li>
-<li>
-        If several MPI nodes own data to be written to, send written data back to their owners.
-</li>
-</ol>
-
-The algorithm also includes a communication cache mechanism that
-allows not to send data twice to the same MPI node, unless the data
-has been modified. The cache can be disabled (see \ref STARPU_MPI_CACHE).
-
-\fn struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...)
-\ingroup API_MPI_Support
-Create a task corresponding to \p codelet with the following given arguments.
-The argument list must be zero-terminated. The function performs the
-first two steps of the function starpu_mpi_task_insert(), i.e. submitting the
-MPI communications needed before the execution of the task, and the creation of
-the task on one node. Only the MPI
-node selected in the first step of the algorithm will return a valid
-task structure which can then be submitted, others will return <c>NULL</c>. The function
-starpu_mpi_task_post_build() MUST be called after that on all nodes, and after the submission of
-the task on the node which creates it, with the SAME list of arguments.
-
-\fn int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...)
-\ingroup API_MPI_Support
-MUST be called after a call to starpu_mpi_task_build(),
-with the SAME list of arguments. Perform the fourth -- last -- step of
-the algorithm described in starpu_mpi_task_insert().
-
-\fn void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node)
-\ingroup API_MPI_Support
-Transfer data \p data_handle to MPI node \p node, sending it from its
-owner if needed. At least the target node and the owner have to call
-the function.
-
-\fn void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg)
-\ingroup API_MPI_Support
-Transfer data \p data_handle to MPI node \p node, sending it from its
-owner if needed. At least the target node and the owner have to call
-the function. On reception, the \p callback function is called with
-the argument \p arg.
-
-\fn void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle_t data_handle)
-\ingroup API_MPI_Support
-Transfer data \p data_handle to all MPI nodes, sending it from its
-owner if needed. All nodes have to call the function.
-
-@name Node Selection Policy
-\anchor MPINodeSelectionPolicy
-\ingroup API_MPI_Support
-
-\def STARPU_MPI_NODE_SELECTION_CURRENT_POLICY
-\ingroup API_MPI_Support
-todo
-
-\def STARPU_MPI_NODE_SELECTION_MOST_R_DATA
-\ingroup API_MPI_Support
-todo
-
-\fn int starpu_mpi_node_selection_get_current_policy()
-\ingroup API_MPI_Support
-Return the current policy used to select the node which will execute the codelet
-
-\fn int starpu_mpi_node_selection_set_current_policy(int policy)
-\ingroup API_MPI_Support
-Set the current policy used to select the node which will
-execute the codelet. The policy ::STARPU_MPI_NODE_SELECTION_MOST_R_DATA selects the
-node having the most data in ::STARPU_R mode so as to minimize the amount of
-data to be transfered.
-
-\fn int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func)
-\ingroup API_MPI_Support
-Register a new policy which can then be used when there is several nodes owning data in ::STARPU_W mode.
-Here an example of function defining a node selection policy.
-The codelet will be executed on the node owing the first data with a size bigger than 1M, or on the node
-0 if no data fits the given size.
-\code{.c}
-int my_node_selection_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)
-{
-	// me is the current MPI rank
-	// nb_nodes is the number of MPI nodes
-	// descr is the description of the data specified when calling starpu_mpi_task_insert
-	// nb_data is the number of data in descr
-	int i;
-	for(i= 0 ; i<nb_data ; i++)
-	{
-		starpu_data_handle_t data = descr[i].handle;
-		enum starpu_data_access_mode mode = descr[i].mode;
-		if (mode & STARPU_R)
-		{
-			int rank = starpu_data_get_rank(data);
-			size_t size = starpu_data_get_size(data);
-			if (size > 1024*1024) return rank;
-		}
-	}
-	return 0;
-}
-\endcode
-
-\fn int starpu_mpi_node_selection_unregister_policy(int policy)
-\ingroup API_MPI_Support
-Unregister a previously registered policy.
-
-@name Collective Operations
-\anchor MPICollectiveOperations
-\ingroup API_MPI_Support
-
-\fn void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle)
-\ingroup API_MPI_Support
-Perform a reduction on the given data \p handle. All nodes send the data to its
-owner node which will perform a reduction.
-
-\fn void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio)
-\ingroup API_MPI_Support
-Similar to starpu_mpi_redux_data, but takes a priority \p prio.
-
-\fn int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
-\ingroup API_MPI_Support
-Scatter data among processes of the communicator based on the
-ownership of the data. For each data of the array \p data_handles, the
-process \p root sends the data to the process owning this data. Processes
-receiving data must have valid data handles to receive them. On
-completion of the collective communication, the \p scallback function is
-called with the argument \p sarg on the process \p root, the \p
-rcallback function is called with the argument \p rarg on any other
-process.
-
-\fn int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
-\ingroup API_MPI_Support
-Gather data from the different processes of the communicator onto the
-process \p root. Each process owning data handle in the array
-\p data_handles will send them to the process \p root. The process \p
-root must have valid data handles to receive the data. On completion
-of the collective communication, the \p rcallback function is called
-with the argument \p rarg on the process root, the \p scallback
-function is called with the argument \p sarg on any other process.
-
 @name MPI Master Slave
 \anchor MPIMasterSlaveSupport
 \ingroup API_MPI_Support

+ 0 - 66
doc/doxygen/chapters/api/parallel_tasks.doxy

@@ -1,66 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Parallel_Tasks Parallel Tasks
-
-\fn int starpu_combined_worker_get_size(void)
-\ingroup API_Parallel_Tasks
-Return the size of the current combined worker, i.e. the total number
-of cpus running the same task in the case of ::STARPU_SPMD parallel
-tasks, or the total number of threads that the task is allowed to
-start in the case of ::STARPU_FORKJOIN parallel tasks.
-
-\fn int starpu_combined_worker_get_rank(void)
-\ingroup API_Parallel_Tasks
-Return the rank of the current thread within the combined worker. Can
-only be used in ::STARPU_FORKJOIN parallel tasks, to know which part
-of the task to work on.
-
-\fn unsigned starpu_combined_worker_get_count(void)
-\ingroup API_Parallel_Tasks
-Return the number of different combined workers.
-
-\fn int starpu_combined_worker_get_id(void)
-\ingroup API_Parallel_Tasks
-Return the identifier of the current combined worker.
-
-\fn int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
-\ingroup API_Parallel_Tasks
-Register a new combined worker and get its identifier
-
-\fn int starpu_combined_worker_get_description(int workerid, int *worker_size, int **combined_workerid)
-\ingroup API_Parallel_Tasks
-Get the description of a combined worker
-
-\fn int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
-\ingroup API_Parallel_Tasks
-Variant of starpu_worker_can_execute_task() compatible with combined
-workers
-
-\fn void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid)
-\ingroup API_Parallel_Tasks
-Initialise the barrier for the parallel task, and dispatch the task
-between the different workers of the given combined worker.
-
-\fn void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size)
-\ingroup API_Parallel_Tasks
-Initialise the barrier for the parallel task, to be pushed to \p worker_size
-workers (without having to explicit a given combined worker).
-
-*/
-

+ 2 - 2
include/starpu_cusparse.h

@@ -40,14 +40,14 @@ extern "C"
 void starpu_cusparse_init(void);
 
 /**
-   @brief Synchronously deinitialize the CUSPARSE library on
+   Synchronously deinitialize the CUSPARSE library on
    every CUDA device.
 */
 void starpu_cusparse_shutdown(void);
 
 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
 /**
-   @brief Return the CUSPARSE handle to be used to queue CUSPARSE
+   Return the CUSPARSE handle to be used to queue CUSPARSE
    kernels. It is properly initialized and configured for multistream by
    starpu_cusparse_init().
 */

+ 16 - 14
include/starpu_data.h

@@ -179,9 +179,10 @@ void starpu_data_invalidate_submit(starpu_data_handle_t handle);
 */
 void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important);
 
-/** @name Access registered data from the application
- * @{
- */
+/**
+   @name Access registered data from the application
+   @{
+*/
 
 /**
    This macro can be used to acquire data, but not require it to be
@@ -435,17 +436,18 @@ void starpu_data_wont_use(starpu_data_handle_t handle);
 */
 void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask);
 
-/** @name Implicit Data Dependencies
-    In this section, we describe how StarPU makes it possible to
-    insert implicit task dependencies in order to enforce sequential data
-    consistency. When this data consistency is enabled on a specific data
-    handle, any data access will appear as sequentially consistent from
-    the application. For instance, if the application submits two tasks
-    that access the same piece of data in read-only mode, and then a third
-    task that access it in write mode, dependencies will be added between
-    the two first tasks and the third one. Implicit data dependencies are
-    also inserted in the case of data accesses from the application.
-    @{
+/**
+   @name Implicit Data Dependencies
+   In this section, we describe how StarPU makes it possible to
+   insert implicit task dependencies in order to enforce sequential data
+   consistency. When this data consistency is enabled on a specific data
+   handle, any data access will appear as sequentially consistent from
+   the application. For instance, if the application submits two tasks
+   that access the same piece of data in read-only mode, and then a third
+   task that access it in write mode, dependencies will be added between
+   the two first tasks and the third one. Implicit data dependencies are
+   also inserted in the case of data accesses from the application.
+   @{
 */
 
 /**

+ 0 - 4
include/starpu_scheduler.h

@@ -299,10 +299,6 @@ int starpu_push_local_task(int workerid, struct starpu_task *task, int back);
 */
 int starpu_push_task_end(struct starpu_task *task);
 
-int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[]);
-int starpu_combined_worker_get_description(int workerid, int *worker_size, int **combined_workerid);
-int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl);
-
 /**
    Whether \ref STARPU_PREFETCH was set
 */

+ 0 - 3
include/starpu_task.h

@@ -1383,9 +1383,6 @@ const char *starpu_task_get_model_name(struct starpu_task *task);
 */
 const char *starpu_task_get_name(struct starpu_task *task);
 
-void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid);
-void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size);
-
 /**
    Allocate a task structure which is the exact duplicate of \p task.
 */

+ 20 - 0
include/starpu_task_util.h

@@ -73,7 +73,20 @@ extern "C"
 */
 #define STARPU_PRIORITY		 (5<<STARPU_MODE_SHIFT)
 
+/**
+   \ingroup API_MPI_Support
+   Used when calling starpu_mpi_task_insert(), must be followed by a
+   integer value which specified the node on which to execute the
+   codelet.
+ */
 #define STARPU_EXECUTE_ON_NODE	 (6<<STARPU_MODE_SHIFT)
+
+/**
+   \ingroup API_MPI_Support
+   Used when calling starpu_mpi_task_insert(), must be followed by a
+   data handle to specify that the node owning the given data will
+   execute the codelet.
+*/
 #define STARPU_EXECUTE_ON_DATA	 (7<<STARPU_MODE_SHIFT)
 #define STARPU_DATA_ARRAY        (8<<STARPU_MODE_SHIFT)
 #define STARPU_DATA_MODE_ARRAY   (9<<STARPU_MODE_SHIFT)
@@ -126,6 +139,13 @@ extern "C"
    to execute the tasks (as specified by starpu_task::workerorder)
 */
 #define STARPU_WORKER_ORDER      (22<<STARPU_MODE_SHIFT)
+
+/**
+   \ingroup API_MPI_Support
+   Used when calling starpu_mpi_task_insert(), must be followed by a
+   identifier to a node selection policy. This is needed when several
+   nodes own data in ::STARPU_W mode.
+*/
 #define STARPU_NODE_SELECTION_POLICY (23<<STARPU_MODE_SHIFT)
 
 /**

+ 62 - 6
include/starpu_worker.h

@@ -162,8 +162,6 @@ extern struct starpu_worker_collection worker_tree;
    STARPU_NMAXWORKERS.
 */
 unsigned starpu_worker_get_count(void);
-unsigned starpu_combined_worker_get_count(void);
-unsigned starpu_worker_is_combined_worker(int id);
 
 /**
    Return the number of CPUs controlled by StarPU. The return value
@@ -226,10 +224,6 @@ unsigned starpu_worker_get_id_check(void);
 #define starpu_worker_get_id_check() _starpu_worker_get_id_check(__FILE__, __LINE__)
 int starpu_worker_get_bindid(int workerid);
 
-int starpu_combined_worker_get_id(void);
-int starpu_combined_worker_get_size(void);
-int starpu_combined_worker_get_rank(void);
-
 void starpu_sched_find_all_worker_combinations(void);
 
 /**
@@ -467,6 +461,68 @@ void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid));
 
 /** @} */
 
+/**
+   @defgroup API_Parallel_Tasks Parallel Tasks
+   @{
+*/
+
+/**
+   Return the number of different combined workers.
+*/
+unsigned starpu_combined_worker_get_count(void);
+unsigned starpu_worker_is_combined_worker(int id);
+
+/**
+   Return the identifier of the current combined worker.
+*/
+int starpu_combined_worker_get_id(void);
+
+/**
+   Return the size of the current combined worker, i.e. the total
+   number of CPUS running the same task in the case of ::STARPU_SPMD
+   parallel tasks, or the total number of threads that the task is
+   allowed to start in the case of ::STARPU_FORKJOIN parallel tasks.
+*/
+int starpu_combined_worker_get_size(void);
+
+/**
+   Return the rank of the current thread within the combined worker.
+   Can only be used in ::STARPU_FORKJOIN parallel tasks, to know which
+   part of the task to work on.
+*/
+int starpu_combined_worker_get_rank(void);
+
+/**
+   Register a new combined worker and get its identifier
+*/
+int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[]);
+
+/**
+   Get the description of a combined worker
+*/
+int starpu_combined_worker_get_description(int workerid, int *worker_size, int **combined_workerid);
+
+/**
+   Variant of starpu_worker_can_execute_task() compatible with
+   combined workers
+*/
+int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl);
+
+/**
+   Initialise the barrier for the parallel task, and dispatch the task
+   between the different workers of the given combined worker.
+ */
+void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid);
+
+/**
+   Initialise the barrier for the parallel task, to be pushed to \p
+   worker_size workers (without having to explicit a given combined
+   worker).
+*/
+void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size);
+
+/** @} */
+
 #ifdef __cplusplus
 }
 #endif

+ 624 - 45
mpi/include/starpu_mpi.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2013,2016                                Inria
- * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2009-2012,2014-2018                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -31,118 +31,697 @@ extern "C"
 {
 #endif
 
+/**
+   @defgroup API_MPI_Support MPI Support
+   @{
+*/
+
+/**
+   @name Initialisation
+   @{
+*/
+
+/**
+   Initialize the StarPU library with the given \p conf, and
+   initialize the StarPU-MPI library with the given MPI communicator
+   \p comm. \p initialize_mpi indicates if MPI should be initialized
+   or not by StarPU. StarPU-MPI takes the opportunity to modify \p
+   conf to either reserve a core for its MPI thread (by default), or
+   execute MPI calls on the CPU driver 0 between tasks.
+*/
+int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm, struct starpu_conf *conf);
+
+/**
+   Same as starpu_mpi_init_conf(), except that this does not
+   initialize the StarPU library. The caller thus has to call
+   starpu_init() before this.
+*/
+int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm);
+
+/**
+   Call starpu_mpi_init_comm() with the MPI communicator \c MPI_COMM_WORLD.
+*/
+int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi);
+
+/**
+   @deprecated
+   This function has been made deprecated. One should use instead the
+   function starpu_mpi_init(). This function does not call \c
+   MPI_Init(), it should be called beforehand.
+*/
+int starpu_mpi_initialize(void) STARPU_DEPRECATED;
+
+/**
+   @deprecated
+   This function has been made deprecated. One should use instead the
+   function starpu_mpi_init(). MPI will be initialized by starpumpi by
+   calling <c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED,
+   ...)</c>.
+*/
+int starpu_mpi_initialize_extended(int *rank, int *world_size) STARPU_DEPRECATED;
+
+/**
+   Clean the starpumpi library. This must be called after calling any
+   \c starpu_mpi functions and before the call to starpu_shutdown(),
+   if any. \c MPI_Finalize() will be called if StarPU-MPI has been
+   initialized by starpu_mpi_init().
+*/
+int starpu_mpi_shutdown(void);
+
+/**
+   Retrieve the current amount of communications from the current node
+   in the array \p comm_amounts which must have a size greater or
+   equal to the world size. Communications statistics must be enabled
+   (see \ref STARPU_COMM_STATS).
+*/
+void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts);
+
+/**
+   Return in \p size the size of the communicator \p comm
+*/
+int starpu_mpi_comm_size(MPI_Comm comm, int *size);
+
+/**
+   Return in \p rank the rank of the calling process in the
+   communicator \p comm
+*/
+int starpu_mpi_comm_rank(MPI_Comm comm, int *rank);
+
+/**
+   Return the rank of the calling process in the communicator \c
+   MPI_COMM_WORLD
+*/
+int starpu_mpi_world_rank(void);
+
+/**
+   Return the size of the communicator \c MPI_COMM_WORLD
+*/
+int starpu_mpi_world_size(void);
+
+/**
+   When given to the function starpu_mpi_comm_get_attr(), retrieve the
+   value for the upper bound for tag value.
+*/
+#define STARPU_MPI_TAG_UB MPI_TAG_UB
+
+/**
+   Retrieve an attribute value by key, similarly to the MPI function
+   \c MPI_comm_get_attr(), except that the value is a pointer to
+   int64_t instead of int. If an attribute is attached on \p comm to
+   \p keyval, then the call returns \p flag equal to \c 1, and the
+   attribute value in \p attribute_val. Otherwise, \p flag is set to
+   \0.
+*/
+int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag);
+
+int starpu_mpi_get_communication_tag(void);
+void starpu_mpi_set_communication_tag(int tag);
+
+/** @} */
+
+/**
+   @name Communication
+   \anchor MPIPtpCommunication
+   @{
+*/
+
+/**
+   Opaque type for communication request
+*/
 typedef void *starpu_mpi_req;
 
+/**
+   Define the type which can be used to set communication tag when exchanging data.
+*/
 typedef int64_t starpu_mpi_tag_t;
 
+/**
+   Post a standard-mode, non blocking send of \p data_handle to the
+   node \p dest using the message tag \p data_tag within the
+   communicator \p comm. After the call, the pointer to the request \p
+   req can be used to test or to wait for the completion of the
+   communication.
+*/
 int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm);
+
+/**
+   Similar to starpu_mpi_isend(), but take a priority \p prio.
+*/
 int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm);
+
+/**
+   Post a nonblocking receive in \p data_handle from the node \p
+   source using the message tag \p data_tag within the communicator \p
+   comm. After the call, the pointer to the request \p req can be used
+   to test or to wait for the completion of the communication.
+*/
 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm);
+
+/**
+   Perform a standard-mode, blocking send of \p data_handle to the
+   node \p dest using the message tag \p data_tag within the
+   communicator \p comm.
+*/
 int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm);
+
+/**
+   Similar to starpu_mpi_send(), but take a priority \p prio.
+*/
 int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm);
+
+/**
+   Perform a standard-mode, blocking receive in \p data_handle from
+   the node \p source using the message tag \p data_tag within the
+   communicator \p comm.
+*/
 int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status);
+
+/**
+   Post a standard-mode, non blocking send of \p data_handle to the
+   node \p dest using the message tag \p data_tag within the
+   communicator \p comm. On completion, the \p callback function is
+   called with the argument \p arg.
+   Similarly to the pthread detached functionality, when a detached
+   communication completes, its resources are automatically released
+   back to the system, there is no need to test or to wait for the
+   completion of the request.
+*/
 int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+
+/**
+   Similar to starpu_mpi_isend_detached, but take a priority \p prio.
+*/
 int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
+
+/**
+   Post a nonblocking receive in \p data_handle from the node \p
+   source using the message tag \p data_tag within the communicator \p
+   comm. On completion, the \p callback function is called with the
+   argument \p arg.
+   Similarly to the pthread detached functionality, when a detached
+   communication completes, its resources are automatically released
+   back to the system, there is no need to test or to wait for the
+   completion of the request.
+*/
 int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+
+/**
+   Post a nonblocking receive in \p data_handle from the node \p
+   source using the message tag \p data_tag within the communicator \p
+   comm. On completion, the \p callback function is called with the
+   argument \p arg.
+   The parameter \p sequential_consistency allows to enable or disable
+   the sequential consistency for \p data handle (sequential
+   consistency will be enabled or disabled based on the value of the
+   parameter \p sequential_consistency and the value of the sequential
+   consistency defined for \p data_handle).
+   Similarly to the pthread detached functionality, when a detached
+   communication completes, its resources are automatically released
+   back to the system, there is no need to test or to wait for the
+   completion of the request.
+*/
+int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
+
+/**
+   Perform a synchronous-mode, non-blocking send of \p data_handle to
+   the node \p dest using the message tag \p data_tag within the
+   communicator \p comm.
+*/
 int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm);
+
+/**
+   Similar to starpu_mpi_issend(), but take a priority \p prio.
+*/
 int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm);
+
+/**
+   Perform a synchronous-mode, non-blocking send of \p data_handle to
+   the node \p dest using the message tag \p data_tag within the
+   communicator \p comm. On completion, the \p callback function is
+   called with the argument \p arg.
+   Similarly to the pthread detached functionality, when a detached
+   communication completes, its resources are automatically released
+   back to the system, there is no need to test or to wait for the
+   completion of the request.
+*/
 int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status);
-int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status);
-int starpu_mpi_barrier(MPI_Comm comm);
 
-int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
+/**
+   Similar to starpu_mpi_issend_detached(), but take a priority \p prio.
+*/
+int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
 
-int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm, struct starpu_conf *conf);
-int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm);
-int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi);
-int starpu_mpi_initialize(void) STARPU_DEPRECATED;
-int starpu_mpi_initialize_extended(int *rank, int *world_size) STARPU_DEPRECATED;
-int starpu_mpi_shutdown(void);
+/**
+   Return when the operation identified by request \p req is complete.
+*/
+int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status);
 
-struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-/* the function starpu_mpi_insert_task has the same semantics as starpu_mpi_task_insert, it is kept to avoid breaking old codes */
-int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...);
+/**
+   If the operation identified by \p req is complete, set \p flag to
+   1. The \p status object is set to contain information on the
+   completed operation.
+*/
+int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status);
 
-void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node);
-void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg);
-void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle_t data_handle);
-void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle);
-void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio);
+/**
+   Block the caller until all group members of the communicator \p
+   comm have called it.
+*/
+int starpu_mpi_barrier(MPI_Comm comm);
 
-int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg);
-int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg);
+/**
+   Wait until all StarPU tasks and communications for the given
+   communicator are completed.
+*/
+int starpu_mpi_wait_for_all(MPI_Comm comm);
 
+/**
+   Post a standard-mode, non blocking send of \p data_handle to the
+   node \p dest using the message tag \p data_tag within the
+   communicator \p comm. On completion, \p tag is unlocked.
+*/
 int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag);
+
+/**
+   Similar to starpu_mpi_isend_detached_unlock_tag(), but take a
+   priority \p prio.
+*/
 int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag);
+
+/**
+   Post a nonblocking receive in \p data_handle from the node \p
+   source using the message tag \p data_tag within the communicator \p
+   comm. On completion, \p tag is unlocked.
+*/
 int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag);
 
+/**
+   Post \p array_size standard-mode, non blocking send. Each post
+   sends the n-th data of the array \p data_handle to the n-th node of
+   the array \p dest using the n-th message tag of the array \p
+   data_tag within the n-th communicator of the array \p comm. On
+   completion of the all the requests, \p tag is unlocked.
+*/
 int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag);
+
+/**
+   Similar to starpu_mpi_isend_array_detached_unlock_tag(), but take a
+   priority \p prio.
+*/
 int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag);
+
+/**
+   Post \p array_size nonblocking receive. Each post receives in the
+   n-th data of the array \p data_handle from the n-th node of the
+   array \p source using the n-th message tag of the array \p data_tag
+   within the n-th communicator of the array \p comm. On completion of
+   the all the requests, \p tag is unlocked.
+*/
 int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag);
 
-void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts);
+typedef void (*starpu_mpi_datatype_allocate_func_t)(starpu_data_handle_t, MPI_Datatype *);
+typedef void (*starpu_mpi_datatype_free_func_t)(MPI_Datatype *);
+
+/**
+   Register functions to create and free a MPI datatype for the given
+   handle.
+   It is important that the function is called before any
+   communication can take place for a data with the given handle. See
+   \ref ExchangingUserDefinedDataInterface for an example.
+*/
+int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func);
+
+/**
+   Unregister the MPI datatype functions stored for the interface of
+   the given handle.
+*/
+int starpu_mpi_datatype_unregister(starpu_data_handle_t handle);
+
+/** @} */
+
+/**
+   @name Communication Cache
+   @{
+*/
 
+/**
+   Return 1 if the communication cache is enabled, 0 otherwise
+*/
+int starpu_mpi_cache_is_enabled();
+
+/**
+   If \p enabled is 1, enable the communication cache. Otherwise,
+   clean the cache if it was enabled and disable it.
+*/
+int starpu_mpi_cache_set(int enabled);
+
+/**
+   Clear the send and receive communication cache for the data \p
+   data_handle and invalidate the value. The function has to be called
+   at the same point of task graph submission by all the MPI nodes on
+   which the handle was registered. The function does nothing if the
+   cache mechanism is disabled (see \ref STARPU_MPI_CACHE).
+*/
 void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle);
+
+/**
+   Clear the send and receive communication cache for all data and
+   invalidate their values. The function has to be called at the same
+   point of task graph submission by all the MPI nodes. The function
+   does nothing if the cache mechanism is disabled (see \ref
+   STARPU_MPI_CACHE).
+*/
 void starpu_mpi_cache_flush_all_data(MPI_Comm comm);
 
+/**
+   Test whether \p data_handle is cached for reception, i.e. the value
+   was previously received from the owner node, and not flushed since
+   then.
+*/
 int starpu_mpi_cached_receive(starpu_data_handle_t data_handle);
-int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest);
 
-int starpu_mpi_comm_size(MPI_Comm comm, int *size);
-int starpu_mpi_comm_rank(MPI_Comm comm, int *rank);
-int starpu_mpi_world_rank(void);
-int starpu_mpi_world_size(void);
+/**
+   Test whether \p data_handle is cached for emission to node \p dest,
+   i.e. the value was previously sent to \p dest, and not flushed
+   since then.
+*/
+int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest);
 
-int starpu_mpi_get_communication_tag(void);
-void starpu_mpi_set_communication_tag(int tag);
+/** @} */
+
+/**
+   @name MPI Insert Task
+   @{
+*/
+
+/**
+   Can be used as rank when calling starpu_mpi_data_register() and
+   alike, to specify that the data is per-node: each node will have
+   its own value. Tasks writing to such data will be replicated on all
+   nodes (and all parameters then have to be per-node). Tasks not
+   writing to such data will just take the node-local value without
+   any MPI communication.
+*/
+#define STARPU_MPI_PER_NODE -2
 
+/**
+   Register to MPI a StarPU data handle with the given tag, rank and
+   MPI communicator. It also automatically clears the MPI
+   communication cache when unregistering the data.
+*/
 void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm);
+
+/**
+   Register to MPI a StarPU data handle with the given tag, rank and
+   the MPI communicator \c MPI_COMM_WORLD.
+   It also automatically clears the MPI communication cache when
+   unregistering the data.
+*/
 #define starpu_mpi_data_register(data_handle, data_tag, rank) starpu_mpi_data_register_comm(data_handle, data_tag, rank, MPI_COMM_WORLD)
 
-#define STARPU_MPI_PER_NODE -2
+/**
+   Register to MPI a StarPU data handle with the given tag. No rank
+   will be defined.
+   It also automatically clears the MPI communication cache when
+   unregistering the data.
+*/
+void starpu_mpi_data_set_tag(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag);
+
+/**
+   Symbol kept for backward compatibility. Call function starpu_mpi_data_set_tag()
+*/
+#define starpu_data_set_tag starpu_mpi_data_set_tag
 
+/**
+   Register to MPI a StarPU data handle with the given rank and given
+   communicator. No tag will be defined.
+   It also automatically clears the MPI communication cache when
+   unregistering the data.
+*/
 void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm);
+
+/**
+   Register to MPI a StarPU data handle with the given rank and the
+   MPI communicator \c MPI_COMM_WORLD. No tag will be defined.
+   It also automatically clears the MPI communication cache when
+   unregistering the data.
+*/
 #define starpu_mpi_data_set_rank(handle, rank) starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD)
-void starpu_mpi_data_set_tag(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag);
+
+/**
+   Symbol kept for backward compatibility. Call function starpu_mpi_data_set_rank()
+*/
 #define starpu_data_set_rank starpu_mpi_data_set_rank
-#define starpu_data_set_tag starpu_mpi_data_set_tag
 
+/**
+   Return the rank of the given data.
+*/
 int starpu_mpi_data_get_rank(starpu_data_handle_t handle);
-starpu_mpi_tag_t starpu_mpi_data_get_tag(starpu_data_handle_t handle);
+
+/**
+   Symbol kept for backward compatibility. Call function starpu_mpi_data_get_rank()
+*/
 #define starpu_data_get_rank starpu_mpi_data_get_rank
+
+/**
+   Return the tag of the given data.
+*/
+starpu_mpi_tag_t starpu_mpi_data_get_tag(starpu_data_handle_t handle);
+
+/**
+   Symbol kept for backward compatibility. Call function starpu_mpi_data_get_tag()
+*/
 #define starpu_data_get_tag starpu_mpi_data_get_tag
 
+/**
+   Create and submit a task corresponding to codelet with the
+   following arguments. The argument list must be zero-terminated.
+   The arguments following the codelet are the same types as for the
+   function starpu_task_insert().
+   Access modes for data can also be
+   set with ::STARPU_SSEND to specify the data has to be sent using a
+   synchronous and non-blocking mode (see starpu_mpi_issend()).
+   The extra argument ::STARPU_EXECUTE_ON_NODE followed by an integer
+   allows to specify the MPI node to execute the codelet. It is also
+   possible to specify that the node owning a specific data will
+   execute the codelet, by using ::STARPU_EXECUTE_ON_DATA followed by
+   a data handle.
+
+   The internal algorithm is as follows:
+   <ol>
+   <li>
+   Find out which MPI node is going to execute the codelet.
+   	<ul>
+	<li>
+	If there is only one node owning data in ::STARPU_W mode, it
+	will be selected;
+	<li>
+	If there is several nodes owning data in ::STARPU_W mode, a
+	node will be selected according to a given node selection
+	policy (see ::STARPU_NODE_SELECTION_POLICY or
+	starpu_mpi_node_selection_set_current_policy()) 
+	<li>
+	The argument ::STARPU_EXECUTE_ON_NODE followed by an integer
+	can be used to specify the node;
+	<li>
+	The argument ::STARPU_EXECUTE_ON_DATA followed by a data handle can be used to specify that the node owing the given data will execute the codelet.
+	</ul>
+   </li>
+   <li>
+   Send and receive data as requested. Nodes owning data which need to
+   be read by the task are sending them to the MPI node which will
+   execute it. The latter receives them.
+   </li>
+   <li>
+   Execute the codelet. This is done by the MPI node selected in the
+   1st step of the algorithm.
+   </li>
+   <li>
+   If several MPI nodes own data to be written to, send written data
+   back to their owners.
+   </li>
+   </ol>
+
+   The algorithm also includes a communication cache mechanism that
+   allows not to send data twice to the same MPI node, unless the data
+   has been modified. The cache can be disabled (see \ref
+   STARPU_MPI_CACHE).
+*/
+int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...);
+
+/**
+   Call starpu_mpi_task_insert(). Symbol kept for backward compatibility.
+*/
+int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...);
+
+/**
+   Create a task corresponding to \p codelet with the following given
+   arguments. The argument list must be zero-terminated. The function
+   performs the first two steps of the function
+   starpu_mpi_task_insert(), i.e. submitting the MPI communications
+   needed before the execution of the task, and the creation of the
+   task on one node. Only the MPI node selected in the first step of
+   the algorithm will return a valid task structure which can then be
+   submitted, others will return <c>NULL</c>. The function
+   starpu_mpi_task_post_build() MUST be called after that on all
+   nodes, and after the submission of the task on the node which
+   creates it, with the SAME list of arguments.
+*/
+struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...);
+
+/**
+   MUST be called after a call to starpu_mpi_task_build(),
+   with the SAME list of arguments. Perform the fourth -- last -- step of
+   the algorithm described in starpu_mpi_task_insert().
+*/
+int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...);
+
+/**
+   Transfer data \p data_handle to MPI node \p node, sending it from
+   its owner if needed. At least the target node and the owner have to
+   call the function.
+*/
+void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node);
+
+/**
+   Transfer data \p data_handle to MPI node \p node, sending it from
+   its owner if needed. At least the target node and the owner have to
+   call the function. On reception, the \p callback function is called
+   with the argument \p arg.
+*/
+void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg);
+
+/**
+   Transfer data \p data_handle to all MPI nodes, sending it from its
+   owner if needed. All nodes have to call the function.
+*/
+void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle_t data_handle);
+
+/**
+   Submit migration of the data onto the \p new_rank MPI node. This
+   means both submitting the transfer of the data to node \p new_rank
+   if it hasn't been submitted already, and setting the home node of
+   the data to the new node. Further data transfers submitted by
+   starpu_mpi_task_insert() will be done from that new node. This
+   function thus needs to be called on all nodes which have registered
+   the data at the same point of tasks submissions. This also flushes
+   the cache for this data to avoid incoherencies.
+*/
 void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t handle, int new_rank);
 
+/** @} */
+
+/**
+   @name Node Selection Policy
+   \anchor MPINodeSelectionPolicy
+   @{
+*/
+
 #define STARPU_MPI_NODE_SELECTION_CURRENT_POLICY -1
 #define STARPU_MPI_NODE_SELECTION_MOST_R_DATA    0
 
 typedef int (*starpu_mpi_select_node_policy_func_t)(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data);
+
+/**
+   Register a new policy which can then be used when there is several
+   nodes owning data in ::STARPU_W mode.
+   Here an example of function defining a node selection policy.
+   The codelet will be executed on the node owing the first data with
+   a size bigger than 1M, or on the node 0 if no data fits the given
+   size.
+   \code{.c}
+   int my_node_selection_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)
+   {
+	// me is the current MPI rank
+	// nb_nodes is the number of MPI nodes
+	// descr is the description of the data specified when calling starpu_mpi_task_insert
+	// nb_data is the number of data in descr
+	int i;
+	for(i= 0 ; i<nb_data ; i++)
+	{
+		starpu_data_handle_t data = descr[i].handle;
+		enum starpu_data_access_mode mode = descr[i].mode;
+		if (mode & STARPU_R)
+		{
+			int rank = starpu_data_get_rank(data);
+			size_t size = starpu_data_get_size(data);
+			if (size > 1024*1024) return rank;
+		}
+	}
+	return 0;
+	}
+	\endcode
+*/
 int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func);
+
+/**
+   Unregister a previously registered policy.
+*/
 int starpu_mpi_node_selection_unregister_policy(int policy);
 
+/**
+   Return the current policy used to select the node which will
+   execute the codelet
+*/
 int starpu_mpi_node_selection_get_current_policy();
+
+/**
+   Set the current policy used to select the node which will execute
+   the codelet. The policy ::STARPU_MPI_NODE_SELECTION_MOST_R_DATA
+   selects the node having the most data in ::STARPU_R mode so as to
+   minimize the amount of data to be transfered.
+*/
 int starpu_mpi_node_selection_set_current_policy(int policy);
 
-int starpu_mpi_cache_is_enabled();
-int starpu_mpi_cache_set(int enabled);
+/** @} */
 
-int starpu_mpi_wait_for_all(MPI_Comm comm);
+/**
+   @name Collective Operations
+   \anchor MPICollectiveOperations
+   @{
+*/
 
-typedef void (*starpu_mpi_datatype_allocate_func_t)(starpu_data_handle_t, MPI_Datatype *);
-typedef void (*starpu_mpi_datatype_free_func_t)(MPI_Datatype *);
-int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func);
-int starpu_mpi_datatype_unregister(starpu_data_handle_t handle);
+/**
+   Perform a reduction on the given data \p handle. All nodes send the
+   data to its owner node which will perform a reduction.
+*/
+void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle);
+
+/**
+   Similar to starpu_mpi_redux_data, but take a priority \p prio.
+*/
+void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio);
+
+/**
+   Scatter data among processes of the communicator based on the
+   ownership of the data. For each data of the array \p data_handles,
+   the process \p root sends the data to the process owning this data.
+   Processes receiving data must have valid data handles to receive
+   them. On completion of the collective communication, the \p
+   scallback function is called with the argument \p sarg on the
+   process \p root, the \p rcallback function is called with the
+   argument \p rarg on any other process.
+*/
+int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg);
+
+/**
+   Gather data from the different processes of the communicator onto
+   the process \p root. Each process owning data handle in the array
+   \p data_handles will send them to the process \p root. The process
+   \p root must have valid data handles to receive the data. On
+   completion of the collective communication, the \p rcallback
+   function is called with the argument \p rarg on the process root,
+   the \p scallback function is called with the argument \p sarg on
+   any other process.
+*/
+int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg);
+
+/** @} */
 
 int starpu_mpi_pre_submit_hook_register(void (*f)(struct starpu_task *));
 int starpu_mpi_pre_submit_hook_unregister();
 
-#define STARPU_MPI_TAG_UB MPI_TAG_UB
-int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag);
+/** @} */
 
 #ifdef __cplusplus
 }

+ 9 - 3
starpufft/include/starpufft.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010,2012,2014,2015,2017                 CNRS
+ * Copyright (C) 2010,2012,2014,2015,2017,2019            CNRS
  * Copyright (C) 2009,2011,2014                           Université de Bordeaux
  * Copyright (C) 2012                                     Inria
  *
@@ -16,13 +16,17 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+// The documentation for this file is in doc/doxygen/chapters/api/fft_support.doxy
+
+#ifndef __STARPU_FFT_H__
+#define __STARPU_FFT_H__
+
 #include <stdio.h>
 #include <complex.h>
 #include <starpu.h>
 #ifdef STARPU_USE_CUDA
 #include <cufft.h>
-#define STARPU_CUFFT_REPORT_ERROR(status) \
-	STARPUFFT(report_error)(__starpu_func__, __FILE__, __LINE__, status)
+#define STARPU_CUFFT_REPORT_ERROR(status) STARPUFFT(report_error)(__starpu_func__, __FILE__, __LINE__, status)
 #endif /* !STARPU_USE_CUDA */
 
 #define STARPUFFT_FORWARD -1
@@ -64,3 +68,5 @@ __STARPUFFT_INTERFACE(__STARPUFFTL, long double)
 
 /* Internal use */
 extern int starpufft_last_plan_number;
+
+#endif // __STARPU_FFT_H__

+ 352 - 7
starpurm/include/starpurm.h

@@ -24,13 +24,36 @@ extern "C"
 {
 #endif
 
-/* type mapping */
+/**
+   @defgroup API_Interop_Support Interoperability Support
+   @brief This section describes the interface supplied by StarPU to
+   interoperate with other runtime systems.
+   @{
+*/
+
+/**
+   StarPU Resource Manager return type.
+*/
 enum e_starpurm_drs_ret
 {
+	/**
+	   Dynamic resource sharing operation succeeded.
+	*/
 	starpurm_DRS_SUCCESS = 0,
 
+	/**
+	   Dynamic resource sharing is disabled.
+	*/
 	starpurm_DRS_DISABLD = -1,
+	/**
+	   Dynamic resource sharing operation is not authorized or
+	   implemented.
+	*/
 	starpurm_DRS_PERM    = -2,
+	/**
+	   Dynamic resource sharing operation has been called with one
+	   or more invalid parameters.
+	*/
 	starpurm_DRS_EINVAL  = -3
 #if 0
 	/* Unused for now */
@@ -45,21 +68,89 @@ typedef void (*starpurm_drs_cb_t)(void *);
 typedef void *starpurm_block_cond_t;
 typedef int (*starpurm_polling_t)(void *);
 
-/* Resource enforcement */
+/**
+   @name Initialisation
+   @{
+*/
+
+/**
+   Resource enforcement
+*/
 void starpurm_initialize_with_cpuset(hwloc_cpuset_t initially_owned_cpuset);
+
+/**
+   Initialize StarPU and the StarPU-RM resource management module. The
+   starpu_init() function should not have been called before the call
+   to starpurm_initialize(). The starpurm_initialize() function will
+   take care of this
+*/
 void starpurm_initialize(void);
 
+/**
+   Shutdown StarPU-RM and StarPU. The starpu_shutdown() function
+   should not be called before. The starpurm_shutdown() function will
+   take care of this.
+*/
 void starpurm_shutdown(void);
 
+/** @} */
+
+/**
+   @name Spawn
+   @{
+*/
+
+/**
+   Allocate a temporary context spanning the units selected in the
+   cpuset bitmap, set it as the default context for the current
+   thread, and call user function \p f. Upon the return of user
+   function \p f, the temporary context is freed and the previous
+   default context for the current thread is restored.
+*/
 void starpurm_spawn_kernel_on_cpus(void *data, void(*f)(void *), void *args, hwloc_cpuset_t cpuset);
+
+/**
+   Spawn a POSIX thread and returns immediately. The thread spawned
+   will allocate a temporary context spanning the units selected in
+   the cpuset bitmap, set it as the default context for the current
+   thread, and call user function \p f. Upon the return of user
+   function \p f, the temporary context will be freed and the previous
+   default context for the current thread restored. A user specified
+   callback \p cb_f will be called just before the termination of the
+   thread.
+*/
 void starpurm_spawn_kernel_on_cpus_callback(void *data, void(*f)(void *), void *args, hwloc_cpuset_t cpuset, void(*cb_f)(void *), void *cb_args);
+
 void starpurm_spawn_kernel_callback(void *data, void(*f)(void *), void *args, void(*cb_f)(void *), void *cb_args);
 
-/* Dynamic resource sharing */
+/** @} */
+
+/**
+   @name DynamicResourceSharing
+   @{
+*/
+
+/**
+   Turn-on dynamic resource sharing support.
+*/
 starpurm_drs_ret_t starpurm_set_drs_enable(starpurm_drs_desc_t *spd);
+
+/**
+   Turn-off dynamic resource sharing support.
+*/
 starpurm_drs_ret_t starpurm_set_drs_disable(starpurm_drs_desc_t *spd);
+
+/**
+   Return the state of the dynamic resource sharing support (\p =!0
+   enabled, \p =0 disabled).
+*/
 int starpurm_drs_enabled_p(void);
 
+/**
+   Set the maximum number of CPU computing units available for StarPU
+   computations to \p max. This number cannot exceed the maximum
+   number of StarPU's CPU worker allocated at start-up time.
+*/
 starpurm_drs_ret_t starpurm_set_max_parallelism(starpurm_drs_desc_t *spd, int max);
 
 #if 0
@@ -68,34 +159,132 @@ starpurm_drs_ret_t starpurm_callback_set(starpurm_drs_desc_t *spd, starpurm_drs_
 starpurm_drs_ret_t starpurm_callback_get(starpurm_drs_desc_t *spd, starpurm_drs_cbs_t which, starpurm_drs_cb_t *callback);
 #endif
 
+/**
+   Extend StarPU's default scheduling context to execute tasks on
+   worker corresponding to logical unit \p cpuid. If StarPU does not
+   have a worker thread initialized for logical unit \p cpuid, do
+   nothing.
+*/
 starpurm_drs_ret_t starpurm_assign_cpu_to_starpu(starpurm_drs_desc_t *spd, int cpuid);
+
+/**
+   Extend StarPU's default scheduling context to execute tasks on \p
+   ncpus more workers, up to the number of StarPU worker threads
+   initialized.
+*/
 starpurm_drs_ret_t starpurm_assign_cpus_to_starpu(starpurm_drs_desc_t *spd, int ncpus);
+
+/**
+   Extend StarPU's default scheduling context to execute tasks on the
+   additional logical units selected in \p mask. Logical units of \p
+   mask for which no StarPU worker is initialized are silently ignored.
+*/
 starpurm_drs_ret_t starpurm_assign_cpu_mask_to_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask);
+
+/**
+   Set StarPU's default scheduling context to execute tasks on all
+   available logical units for which a StarPU worker has been
+   initialized.
+*/
 starpurm_drs_ret_t starpurm_assign_all_cpus_to_starpu(starpurm_drs_desc_t *spd);
 
+/**
+   Shrink StarPU's default scheduling context so as to not execute
+   tasks on worker corresponding to logical unit \p cpuid. If StarPU
+   does not have a worker thread initialized for logical unit \p
+   cpuid, do nothing.
+*/
 starpurm_drs_ret_t starpurm_withdraw_cpu_from_starpu(starpurm_drs_desc_t *spd, int cpuid);
+
+/**
+   Shrink StarPU's default scheduling context to execute tasks on \p
+   ncpus less workers.
+*/
 starpurm_drs_ret_t starpurm_withdraw_cpus_from_starpu(starpurm_drs_desc_t *spd, int ncpus);
+
+/**
+   Shrink StarPU's default scheduling context so as to not execute
+   tasks on the logical units selected in \p mask. Logical units of \p
+   mask for which no StarPU worker is initialized are silently ignored.
+*/
 starpurm_drs_ret_t starpurm_withdraw_cpu_mask_from_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask);
+
+/**
+   Shrink StarPU's default scheduling context so as to remove all
+   logical units.
+*/
 starpurm_drs_ret_t starpurm_withdraw_all_cpus_from_starpu(starpurm_drs_desc_t *spd);
 
 /* --- */
 
+/**
+   Synonym for starpurm_assign_all_cpus_to_starpu().
+*/
 starpurm_drs_ret_t starpurm_lend(starpurm_drs_desc_t *spd);
+
+/**
+   Synonym for starpurm_assign_cpu_to_starpu().
+*/
 starpurm_drs_ret_t starpurm_lend_cpu(starpurm_drs_desc_t *spd, int cpuid);
+
+/**
+   Synonym for starpurm_assign_cpus_to_starpu().
+*/
 starpurm_drs_ret_t starpurm_lend_cpus(starpurm_drs_desc_t *spd, int ncpus);
+
+/**
+   Synonym for starpurm_assign_cpu_mask_to_starpu().
+*/
 starpurm_drs_ret_t starpurm_lend_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask);
 
+/**
+   Synonym for starpurm_withdraw_all_cpus_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_reclaim(starpurm_drs_desc_t *spd);
+
+/**
+   Synonym for starpurm_withdraw_cpu_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_reclaim_cpu(starpurm_drs_desc_t *spd, int cpuid);
+
+/**
+   Synonym for starpurm_withdraw_cpus_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_reclaim_cpus(starpurm_drs_desc_t *spd, int ncpus);
+
+/**
+   Synonym for starpurm_withdraw_cpu_mask_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_reclaim_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask);
 
+/**
+   Synonym for starpurm_withdraw_all_cpus_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_acquire(starpurm_drs_desc_t *spd);
+
+/**
+   Synonym for starpurm_withdraw_cpu_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_acquire_cpu(starpurm_drs_desc_t *spd, int cpuid);
+
+/**
+   Synonym for starpurm_withdraw_cpus_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_acquire_cpus(starpurm_drs_desc_t *spd, int ncpus);
+
+/**
+   Synonym for starpurm_withdraw_cpu_mask_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_acquire_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask);
 
+/**
+   Synonym for starpurm_assign_all_cpus_to_starpu().
+*/
 starpurm_drs_ret_t starpurm_return_all(starpurm_drs_desc_t *spd);
+
+/**
+   Synonym for starpurm_assign_cpu_to_starpu().
+*/
 starpurm_drs_ret_t starpurm_return_cpu(starpurm_drs_desc_t *spd, int cpuid);
 
 #if 0
@@ -108,52 +297,208 @@ void starpurm_register_polling_service(const char *service_name, starpurm_pollin
 void starpurm_unregister_polling_service(const char *service_name, starpurm_polling_t function, void *data);
 #endif
 
-/* Devices */
+/** @} */
+
+/**
+   @name Devices
+   @{
+*/
+
+/**
+   Return the device type ID constant associated to the device type name.
+   Valid names for \p type_str are:
+   - \c "cpu": regular CPU unit;
+   - \c "opencl": OpenCL device unit;
+   - \c "cuda": nVidia CUDA device unit;
+   - \c "mic": Intel KNC type device unit.
+*/
 int starpurm_get_device_type_id(const char *type_str);
+
+/**
+   Return the device type name associated to the device type ID
+   constant.
+*/
 const char *starpurm_get_device_type_name(int type_id);
+
+/**
+   Return the number of initialized StarPU worker for the device type
+   \p type_id.
+*/
 int starpurm_get_nb_devices_by_type(int type_id);
+
+/**
+   Return the unique ID assigned to the \p device_rank nth device of
+   type \p type_id.
+*/
 int starpurm_get_device_id(int type_id, int device_rank);
 
+/**
+   Extend StarPU's default scheduling context to use \p unit_rank nth
+   device of type \p type_id.
+*/
 starpurm_drs_ret_t starpurm_assign_device_to_starpu(starpurm_drs_desc_t *spd, int type_id, int unit_rank);
+
+/**
+   Extend StarPU's default scheduling context to use \p ndevices more
+   devices of type \p type_id, up to the number of StarPU workers
+   initialized for such device type.
+ */
 starpurm_drs_ret_t starpurm_assign_devices_to_starpu(starpurm_drs_desc_t *spd, int type_id, int ndevices);
+
+/**
+   Extend StarPU's default scheduling context to use additional
+   devices as designated by their corresponding StarPU worker
+   thread(s) CPU-set \p mask.
+ */
 starpurm_drs_ret_t starpurm_assign_device_mask_to_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask);
+
+/**
+   Extend StarPU's default scheduling context to use all devices of
+   type \p type_id for which it has a worker thread initialized.
+*/
 starpurm_drs_ret_t starpurm_assign_all_devices_to_starpu(starpurm_drs_desc_t *spd, int type_id);
 
+/**
+   Shrink StarPU's default scheduling context to not use \p unit_rank
+   nth device of type \p type_id.
+ */
 starpurm_drs_ret_t starpurm_withdraw_device_from_starpu(starpurm_drs_desc_t *spd, int type_id, int unit_rank);
+
+/**
+   Shrink StarPU's default scheduling context to use \p ndevices less
+   devices of type \p type_id.
+*/
 starpurm_drs_ret_t starpurm_withdraw_devices_from_starpu(starpurm_drs_desc_t *spd, int type_id, int ndevices);
+
+/**
+   Shrink StarPU's default scheduling context to not use devices
+   designated by their corresponding StarPU worker thread(s) CPU-set
+   \p mask.
+*/
 starpurm_drs_ret_t starpurm_withdraw_device_mask_from_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask);
+
+/**
+   Shrink StarPU's default scheduling context to use no devices of
+   type \p type_id.
+*/
 starpurm_drs_ret_t starpurm_withdraw_all_devices_from_starpu(starpurm_drs_desc_t *spd, int type_id);
 
 /* --- */
 
+/**
+   Synonym for starpurm_assign_device_to_starpu().
+*/
 starpurm_drs_ret_t starpurm_lend_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank);
+
+/**
+   Synonym for starpurm_assign_devices_to_starpu().
+*/
 starpurm_drs_ret_t starpurm_lend_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices);
+
+/**
+   Synonym for starpurm_assign_device_mask_to_starpu().
+*/
 starpurm_drs_ret_t starpurm_lend_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask);
+
+/**
+   Synonym for starpurm_assign_all_devices_to_starpu().
+*/
 starpurm_drs_ret_t starpurm_lend_all_devices(starpurm_drs_desc_t *spd, int type_id);
 
+/**
+   Synonym for starpurm_withdraw_device_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_reclaim_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank);
+
+/**
+   Synonym for starpurm_withdraw_devices_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_reclaim_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices);
+
+/**
+   Synonym for starpurm_withdraw_device_mask_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_reclaim_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask);
+
+/**
+   Synonym for starpurm_withdraw_all_devices_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_reclaim_all_devices(starpurm_drs_desc_t *spd, int type_id);
 
+/**
+   Synonym for starpurm_withdraw_device_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_acquire_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank);
+
+/**
+   Synonym for starpurm_withdraw_devices_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_acquire_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices);
+
+/**
+   Synonym for starpurm_withdraw_device_mask_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_acquire_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask);
+
+/**
+   Synonym for starpurm_withdraw_all_devices_from_starpu().
+*/
 starpurm_drs_ret_t starpurm_acquire_all_devices(starpurm_drs_desc_t *spd, int type_id);
 
+/**
+   Synonym for starpurm_assign_all_devices_to_starpu().
+*/
 starpurm_drs_ret_t starpurm_return_all_devices(starpurm_drs_desc_t *spd, int type_id);
+
+/**
+   Synonym for starpurm_assign_device_to_starpu().
+*/
 starpurm_drs_ret_t starpurm_return_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank);
 
-/* cpusets */
-hwloc_cpuset_t starpurm_get_unit_cpuset(int unitid);
-hwloc_cpuset_t starpurm_get_cpu_worker_cpuset(int unit_rank);
+/** @} */
+
+/**
+   @name CpusetsQueries
+   @{
+*/
+
+/**
+   Return the CPU-set of the StarPU worker associated to the \p
+   unit_rank nth unit of type \p type_id.
+*/
 hwloc_cpuset_t starpurm_get_device_worker_cpuset(int type_id, int unit_rank);
+
+/**
+   Return the cumulated CPU-set of all StarPU worker threads.
+*/
 hwloc_cpuset_t starpurm_get_global_cpuset(void);
+
+/**
+   Return the CPU-set of the StarPU worker threads currently selected
+   in the default StarPU's scheduling context.
+ */
 hwloc_cpuset_t starpurm_get_selected_cpuset(void);
+
+/**
+   Return the cumulated CPU-set of all CPU StarPU worker threads.
+*/
 hwloc_cpuset_t starpurm_get_all_cpu_workers_cpuset(void);
+
+/**
+   Return the cumulated CPU-set of all "non-CPU" StarPU worker
+   threads.
+ */
 hwloc_cpuset_t starpurm_get_all_device_workers_cpuset(void);
+
+/**
+   Return the cumulated CPU-set of all StarPU worker threads for
+   devices of type \p typeid.
+*/
 hwloc_cpuset_t starpurm_get_all_device_workers_cpuset_by_type(int typeid);
 
+/** @} */
+/** @} */
+
 #ifdef __cplusplus
 }
 #endif