Bläddra i källkod

move documentation from separate doxygen files to public .h files

Nathalie Furmento 6 år sedan
förälder
incheckning
3966292204

+ 1 - 0
Makefile.am

@@ -69,6 +69,7 @@ pkgconfig_DATA = libstarpu.pc starpu-1.0.pc starpu-1.1.pc starpu-1.2.pc starpu-1
 versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION)
 versinclude_HEADERS = 				\
 	include/starpu.h			\
+	include/starpu_helper.h			\
 	include/starpu_bitmap.h			\
 	include/starpu_data_filters.h		\
 	include/starpu_data_interfaces.h	\

+ 0 - 3
doc/doxygen/Makefile.am

@@ -106,8 +106,6 @@ chapters =	\
 	chapters/code/nf_initexit.f90 \
 	chapters/api/codelet_and_tasks.doxy \
 	chapters/api/fft_support.doxy \
-	chapters/api/initialization.doxy \
-	chapters/api/misc_helpers.doxy \
 	chapters/api/mpi.doxy \
 	chapters/api/opencl_extensions.doxy \
 	chapters/api/openmp_runtime_support.doxy \
@@ -119,7 +117,6 @@ chapters =	\
 	chapters/api/versioning.doxy \
 	chapters/api/workers.doxy \
 	chapters/api/threads.doxy \
-	chapters/api/toolbox.doxy \
 	chapters/api/sc_hypervisor/sc_hypervisor.doxy \
 	chapters/api/sc_hypervisor/sc_hypervisor_usage.doxy \
 	chapters/api/interoperability.doxy

+ 0 - 363
doc/doxygen/chapters/api/initialization.doxy

@@ -1,363 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2011,2012,2014,2017                      Inria
- * Copyright (C) 2010-2018                                CNRS
- * Copyright (C) 2009-2011,2014,2018                      Université de Bordeaux
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Initialization_and_Termination Initialization and Termination
-
-\struct starpu_conf
-\ingroup API_Initialization_and_Termination
-This structure is passed to the starpu_init() function in order to
-configure StarPU. It has to be initialized with starpu_conf_init().
-When the default value is used, StarPU automatically selects the
-number of processing units and takes the default scheduling policy.
-The environment variables overwrite the equivalent parameters.
-\var int starpu_conf::magic
-\private
-    Will be initialized by starpu_conf_init(). Should not be set by
-    hand.
-
-\var const char*starpu_conf::sched_policy_name
-    Name of the scheduling policy. This can also be specified with the
-    environment variable \ref STARPU_SCHED. (default = <c>NULL</c>).
-
-\var struct starpu_sched_policy *starpu_conf::sched_policy
-    Definition of the scheduling policy. This field is ignored if
-    starpu_conf::sched_policy_name is set. (default = <c>NULL</c>)
-
-\var void (*starpu_conf::sched_policy_init)(unsigned)
-    todo
-
-\var int starpu_conf::ncpus
-    Number of CPU cores that StarPU can use. This can also be
-    specified with the environment variable \ref STARPU_NCPU .
-    (default = -1)
-\var int starpu_conf::ncuda
-    Number of CUDA devices that StarPU can use. This can also be
-    specified with the environment variable \ref STARPU_NCUDA.
-    (default = -1)
-\var int starpu_conf::nopencl
-    Number of OpenCL devices that StarPU can use. This can also be
-    specified with the environment variable \ref STARPU_NOPENCL.
-    (default = -1)
-\var int starpu_conf::nmic
-    Number of MIC devices that StarPU can use. This can also be
-    specified with the environment variable \ref STARPU_NMIC.
-    (default = -1)
-\var int starpu_conf::nscc
-    Number of SCC devices that StarPU can use. This can also be
-    specified with the environment variable \ref STARPU_NSCC.
-    (default = -1)
-\var int starpu_conf::nmpi_ms
-    Number of MPI Master Slave devices that StarPU can use. This can
-    also be specified with the environment variable \ref
-    STARPU_NMPI_MS. (default = -1)
-
-\var unsigned starpu_conf::use_explicit_workers_bindid
-    If this flag is set, the starpu_conf::workers_bindid array
-    indicates where the different workers are bound, otherwise StarPU
-    automatically selects where to bind the different workers. This
-    can also be specified with the environment variable \ref
-    STARPU_WORKERS_CPUID. (default = 0)
-\var unsigned starpu_conf::workers_bindid[STARPU_NMAXWORKERS]
-    If the starpu_conf::use_explicit_workers_bindid flag is set, this
-    array indicates where to bind the different workers. The i-th
-    entry of the starpu_conf::workers_bindid indicates the logical
-    identifier of the processor which should execute the i-th worker.
-    Note that the logical ordering of the CPUs is either determined by
-    the OS, or provided by the hwloc library in case it is available.
-\var unsigned starpu_conf::use_explicit_workers_cuda_gpuid
-    If this flag is set, the CUDA workers will be attached to the CUDA
-    devices specified in the starpu_conf::workers_cuda_gpuid array.
-    Otherwise, StarPU affects the CUDA devices in a round-robin
-    fashion. This can also be specified with the environment variable
-    \ref STARPU_WORKERS_CUDAID. (default = 0)
-\var unsigned starpu_conf::workers_cuda_gpuid[STARPU_NMAXWORKERS]
-    If the starpu_conf::use_explicit_workers_cuda_gpuid flag is set,
-    this array contains the logical identifiers of the CUDA devices
-    (as used by \c cudaGetDevice()).
-\var unsigned starpu_conf::use_explicit_workers_opencl_gpuid
-    If this flag is set, the OpenCL workers will be attached to the
-    OpenCL devices specified in the starpu_conf::workers_opencl_gpuid
-    array. Otherwise, StarPU affects the OpenCL devices in a
-    round-robin fashion. This can also be specified with the
-    environment variable \ref STARPU_WORKERS_OPENCLID. (default = 0)
-\var unsigned starpu_conf::workers_opencl_gpuid[STARPU_NMAXWORKERS]
-    If the starpu_conf::use_explicit_workers_opencl_gpuid flag is set,
-    this array contains the logical identifiers of the OpenCL devices
-    to be used.
-\var unsigned starpu_conf::use_explicit_workers_mic_deviceid
-    If this flag is set, the MIC workers will be attached to the MIC
-    devices specified in the array starpu_conf::workers_mic_deviceid.
-    Otherwise, StarPU affects the MIC devices in a round-robin
-    fashion. This can also be specified with the environment variable
-    \ref STARPU_WORKERS_MICID. (default = 0)
-\var unsigned starpu_conf::workers_mic_deviceid[STARPU_NMAXWORKERS]
-    If the flag starpu_conf::use_explicit_workers_mic_deviceid is set,
-    the array contains the logical identifiers of the MIC devices to
-    be used.
-\var unsigned starpu_conf::use_explicit_workers_scc_deviceid
-    If this flag is set, the SCC workers will be attached to the SCC
-    devices specified in the array starpu_conf::workers_scc_deviceid.
-    (default = 0)
-\var unsigned starpu_conf::workers_scc_deviceid[STARPU_NMAXWORKERS]
-    If the flag starpu_conf::use_explicit_workers_scc_deviceid is set,
-    the array contains the logical identifiers of the SCC devices to
-    be used. Otherwise, StarPU affects the SCC devices in a
-    round-robin fashion. This can also be specified with the
-    environment variable \ref STARPU_WORKERS_SCCID.
-\var unsigned starpu_conf::use_explicit_workers_mpi_ms_deviceid
-    If this flag is set, the MPI Master Slave workers will be attached
-    to the MPI Master Slave devices specified in the array
-    starpu_conf::workers_mpi_ms_deviceid. Otherwise, StarPU affects
-    the MPI Master Slave devices in a round-robin fashion. (default =
-    0)
-\var unsigned starpu_conf::workers_mpi_ms_deviceid[STARPU_NMAXWORKERS]
-    If the flag starpu_conf::use_explicit_workers_mpi_ms_deviceid is
-    set, the array contains the logical identifiers of the MPI Master
-    Slave devices to be used.
-
-\var int starpu_conf::bus_calibrate
-    If this flag is set, StarPU will recalibrate the bus.  If this
-    value is equal to -1, the default value is used. This can
-    also be specified with the environment variable \ref
-    STARPU_BUS_CALIBRATE. (default = 0)
-\var int starpu_conf::calibrate
-    If this flag is set, StarPU will calibrate the performance models
-    when executing tasks. If this value is equal to -1, the
-    default value is used. If the value is equal to 1, it will
-    force continuing calibration. If the value is equal to 2,
-    the existing performance models will be overwritten. This can also
-    be specified with the environment variable \ref STARPU_CALIBRATE.
-    (default = 0)
-\var int starpu_conf::single_combined_worker
-    By default, StarPU executes parallel tasks concurrently. Some
-    parallel libraries (e.g. most OpenMP implementations) however do
-    not support concurrent calls to parallel code. In such case,
-    setting this flag makes StarPU only start one parallel task at a
-    time (but other CPU and GPU tasks are not affected and can be run
-    concurrently). The parallel task scheduler will however still try
-    varying combined worker sizes to look for the most efficient ones.
-    This can also be specified with the environment variable \ref
-    STARPU_SINGLE_COMBINED_WORKER. (default = 0)
-
-\var char *starpu_conf::mic_sink_program_path
-    Path to the kernel to execute on the MIC device, compiled for MIC
-    architecture. When set to <c>NULL</c>, StarPU automatically looks
-    next to the host program location. (default = <c>NULL</c>)
-
-\var int starpu_conf::disable_asynchronous_copy
-    This flag should be set to 1 to disable asynchronous copies
-    between CPUs and all accelerators. This can also be specified with
-    the environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_COPY.
-    The AMD implementation of OpenCL is known to fail when copying
-    data asynchronously. When using this implementation, it is
-    therefore necessary to disable asynchronous data transfers. This
-    can also be specified at compilation time by giving to the
-    configure script the option
-    \ref disable-asynchronous-copy "--disable-asynchronous-copy".
-    (default = 0)
-\var int starpu_conf::disable_asynchronous_cuda_copy
-    This flag should be set to 1 to disable asynchronous copies
-    between CPUs and CUDA accelerators. This can also be specified
-    with the environment variable \ref
-    STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY.
-    This can also be specified at compilation time by giving to the
-    configure script the option
-    \ref disable-asynchronous-cuda-copy "--disable-asynchronous-cuda-copy".
-    (default = 0)
-\var int starpu_conf::disable_asynchronous_opencl_copy
-    This flag should be set to 1 to disable asynchronous copies
-    between CPUs and OpenCL accelerators. This can also be specified
-    with the environment variable \ref
-    STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY. The AMD implementation of
-    OpenCL is known to fail when copying data asynchronously. When
-    using this implementation, it is therefore necessary to disable
-    asynchronous data transfers. This can also be specified at
-    compilation time by giving to the configure script the
-    option
-    \ref disable-asynchronous-opencl-copy "--disable-asynchronous-opencl-copy".
-    (default = 0)
-\var int starpu_conf::disable_asynchronous_mic_copy
-    This flag should be set to 1 to disable asynchronous copies
-    between CPUs and MIC accelerators. This can also be specified with
-    the environment variable \ref
-    STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY. This can also be specified
-    at compilation time by giving to the configure script the option
-    \ref disable-asynchronous-mic-copy "--disable-asynchronous-mic-copy".
-    (default = 0).
-\var int starpu_conf::disable_asynchronous_mpi_ms_copy
-    This flag should be set to 1 to disable asynchronous copies
-    between CPUs and MPI Master Slave devices. This can also be
-    specified with the environment variable \ref
-    STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY. This can also be
-    specified at compilation time by giving to the configure script
-    the option
-    \ref disable-asynchronous-mpi-master-slave-copy "--disable-asynchronous-mpi-master-slave-copy".
-    (default = 0).
-
-\var unsigned *starpu_conf::cuda_opengl_interoperability
-    Enable CUDA/OpenGL interoperation on these CUDA devices. This can
-    be set to an array of CUDA device identifiers for which
-    \c cudaGLSetGLDevice() should be called instead of
-    \c cudaSetDevice(). Its size is specified by the
-    starpu_conf::n_cuda_opengl_interoperability field below
-    (default = <c>NULL</c>)
-\var unsigned starpu_conf::n_cuda_opengl_interoperability
-    todo
-
-\var struct starpu_driver *starpu_conf::not_launched_drivers
-    Array of drivers that should not be launched by StarPU. The
-    application will run in one of its own threads. (default =
-    <c>NULL</c>)
-\var unsigned starpu_conf::n_not_launched_drivers
-    The number of StarPU drivers that should not be launched by
-    StarPU. (default = 0)
-
-\var starpu_conf::trace_buffer_size
-    Specify the buffer size used for FxT tracing. Starting from FxT
-    version 0.2.12, the buffer will automatically be flushed when it
-    fills in, but it may still be interesting to specify a bigger
-    value to avoid any flushing (which would disturb the trace).
-
-\var starpu_conf::global_sched_ctx_min_priority
-    todo
-\var starpu_conf::global_sched_ctx_max_priority
-    todo
-
-\var starpu_conf::callback_worker_going_to_sleep
-    If StarPU was compiled with blocking drivers support and worker
-    callbacks support enabled, allow to specify an external resource
-    manager callback to be notified about workers going to sleep.
-
-\var starpu_conf::callback_worker_waking_up
-    If StarPU was compiled with blocking drivers support and worker
-    callbacks support enabled, allow to specify an external resource
-    manager callback to be notified about workers waking-up.
-
-\fn int starpu_init(struct starpu_conf *conf)
-\ingroup API_Initialization_and_Termination
-This is StarPU initialization method, which must be called prior to
-any other StarPU call. It is possible to specify StarPU’s
-configuration (e.g. scheduling policy, number of cores, ...) by
-passing a non-<c>NULL</c> \p conf. Default configuration is used if \p
-conf is <c>NULL</c>. Upon successful completion, this function
-returns 0. Otherwise, <c>-ENODEV</c> indicates that no worker was
-available (and thus StarPU was not initialized).
-
-\fn int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
-\ingroup API_Initialization_and_Termination
-This is the same as starpu_init(), but also takes the \p argc and \p
-argv as defined by the application. This is needed for SCC execution
-to initialize the communication library.
-Do not call starpu_init() and starpu_initialize() in the
-same program.
-
-\fn int starpu_conf_init(struct starpu_conf *conf)
-\ingroup API_Initialization_and_Termination
-Initialize the \p conf structure with the default values. In case some
-configuration parameters are already
-specified through environment variables, starpu_conf_init() initializes
-the fields of \p conf according to the environment variables.
-For instance if \ref STARPU_CALIBRATE is set, its value is put in the
-field starpu_conf::calibrate of \p conf. Upon successful
-completion, this function returns 0. Otherwise, <c>-EINVAL</c> indicates that
-the argument was <c>NULL</c>.
-
-\fn int starpu_is_initialized(void)
-\ingroup API_Initialization_and_Termination
-Return 1 if StarPU is already initialized.
-
-\fn void starpu_wait_initialized(void)
-\ingroup API_Initialization_and_Termination
-Wait for starpu_init() call to finish.
-
-\fn void starpu_shutdown(void)
-\ingroup API_Initialization_and_Termination
-This is StarPU termination method. It must be called at the end of the
-application: statistics and other post-mortem debugging information
-are not guaranteed to be available until this method has been called.
-
-\def STARPU_THREAD_ACTIVE
-\ingroup API_Initialization_and_Termination
-This flag should be passed to starpu_get_next_bindid() and
-starpu_bind_thread_on() when binding a thread which will significantly eat CPU
-time, and should thus have its own dedicated CPU.
-
-\fn unsigned starpu_get_next_bindid(unsigned flags, unsigned *preferred, unsigned npreferred)
-\ingroup API_Initialization_and_Termination
-This returns a PU binding ID which can be used to bind threads with
-starpu_bind_thread_on(). \p flags can be set to STARPU_THREAD_ACTIVE or 0.
-When \p npreferred is set to non-zero, \p preferred is an array of size \p
-npreferred in which a preference of PU binding IDs can be set. By default StarPU
-will return the first PU available for binding.
-
-\fn int starpu_bind_thread_on(int cpuid, unsigned flags, const char *name)
-\ingroup API_Initialization_and_Termination
-This binds the calling thread on the given \p cpuid (which should have been
-obtained with starpu_get_next_bindid()).
-
-This returns -1 if a thread was already bound to this PU (but binding will still
-have been done, and a warning will have been printed), so the caller can tell
-the user how to avoid the issue.
-
-\p name should be set to a unique string so that different calls with the same
-name for the same cpuid does not produce a warning.
-
-
-\fn void starpu_pause(void)
-\ingroup API_Initialization_and_Termination
-Suspend the processing of new tasks by
-workers. It can be used in a program where StarPU is used during only
-a part of the execution. Without this call, the workers continue to
-poll for new tasks in a tight loop, wasting CPU time. The symmetric
-call to starpu_resume() should be used to unfreeze the workers.
-
-\fn void starpu_resume(void)
-\ingroup API_Initialization_and_Termination
-This is the symmetrical call to starpu_pause(), used to resume
-the workers polling for new tasks.
-
-\fn int starpu_asynchronous_copy_disabled(void)
-\ingroup API_Initialization_and_Termination
-Return 1 if asynchronous data transfers between CPU and accelerators
-are disabled.
-
-\fn int starpu_asynchronous_cuda_copy_disabled(void)
-\ingroup API_Initialization_and_Termination
-Return 1 if asynchronous data transfers between CPU and CUDA
-accelerators are disabled.
-
-\fn int starpu_asynchronous_opencl_copy_disabled(void)
-\ingroup API_Initialization_and_Termination
-Return 1 if asynchronous data transfers between CPU and OpenCL
-accelerators are disabled.
-
-\fn int starpu_asynchronous_mic_copy_disabled(void)
-\ingroup API_Initialization_and_Termination
-Return 1 if asynchronous data transfers between CPU and MIC
-devices are disabled.
-
-\fn int starpu_asynchronous_mpi_ms_copy_disabled(void)
-\ingroup API_Initialization_and_Termination
-Return 1 if asynchronous data transfers between CPU and MPI Slave
-devices are disabled.
-
-\fn void starpu_topology_print(FILE *f)
-\ingroup API_Initialization_and_Termination
-Print a description of the topology on \p f.
-
-*/

+ 0 - 64
doc/doxygen/chapters/api/misc_helpers.doxy

@@ -1,64 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2015,2017                           CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Miscellaneous_Helpers Miscellaneous Helpers
-
-\fn int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg)
-\ingroup API_Miscellaneous_Helpers
-Copy the content of \p src_handle into \p dst_handle. The parameter \p
-asynchronous indicates whether the function should block or not. In
-the case of an asynchronous call, it is possible to synchronize with
-the termination of this operation either by the means of implicit
-dependencies (if enabled) or by calling starpu_task_wait_for_all(). If
-\p callback_func is not <c>NULL</c>, this callback function is executed after
-the handle has been copied, and it is given the pointer \p callback_arg as argument.
-
-\fn void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where)
-\ingroup API_Miscellaneous_Helpers
-Execute the given function \p func on a subset of workers. When
-calling this method, the offloaded function \p func is executed by
-every StarPU worker that are eligible to execute the function.
-The argument \p arg
-is passed to the offloaded function. The argument \p where specifies
-on which types of processing units the function should be executed.
-Similarly to the field starpu_codelet::where, it is possible to
-specify that the function should be executed on every CUDA device and
-every CPU by passing ::STARPU_CPU|::STARPU_CUDA. This function blocks
-until \p func has been executed on every appropriate processing
-units, and thus may not be called from a callback function for
-instance.
-
-\fn void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t where, const char *name)
-\ingroup API_Miscellaneous_Helpers
-Same as starpu_execute_on_each_worker(), except that the task name is
-specified in the argument \p name.
-
-\fn void starpu_execute_on_specific_workers(void (*func)(void*), void *arg, unsigned num_workers, unsigned *workers, const char *name);
-\ingroup API_Miscellaneous_Helpers
-Call \p func(\p arg) on every worker in the \p workers array. \p
-num_workers indicates the number of workers in this array.  This
-function is synchronous, but the different workers may execute the
-function in parallel.
-
-\fn double starpu_timing_now(void)
-\ingroup API_Miscellaneous_Helpers
-Return the current date in micro-seconds.
-
-
-*/
-

+ 0 - 118
doc/doxygen/chapters/api/toolbox.doxy

@@ -1,118 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2009-2011,2014                           Université de Bordeaux
- * Copyright (C) 2011,2012                                Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-/*! \defgroup API_Toolbox Toolbox
-
-\brief The following macros allow to make GCC extensions portable, and
-to have a code which can be compiled with any C compiler.
-
-\def STARPU_GNUC_PREREQ
-\ingroup API_Toolbox
-Return true (non-zero) if GCC version \p maj.\p min or later is being used (macro taken from glibc.)
-
-\def STARPU_UNLIKELY
-\ingroup API_Toolbox
-When building with a GNU C Compiler, allow programmers to mark an expression as unlikely.
-
-\def STARPU_LIKELY
-\ingroup API_Toolbox
-When building with a GNU C Compiler, allow programmers to mark an expression as likely.
-
-\def STARPU_ATTRIBUTE_UNUSED
-\ingroup API_Toolbox
-When building with a GNU C Compiler, defined to __attribute__((unused))
-
-\def STARPU_ATTRIBUTE_INTERNAL
-\ingroup API_Toolbox
-When building with a GNU C Compiler, defined to __attribute__((visibility ("internal")))
-
-\def STARPU_ATTRIBUTE_MALLOC
-\ingroup API_Toolbox
-When building with a GNU C Compiler, defined to __attribute__((malloc))
-
-\def STARPU_ATTRIBUTE_WARN_UNUSED_RESULT
-\ingroup API_Toolbox
-When building with a GNU C Compiler, defined to __attribute__((warn_unused_result))
-
-\def STARPU_ATTRIBUTE_PURE
-\ingroup API_Toolbox
-When building with a GNU C Compiler, defined to __attribute__((pure))
-
-\def STARPU_ATTRIBUTE_ALIGNED
-\ingroup API_Toolbox
-When building with a GNU C Compiler, defined to__attribute__((aligned(size)))
-
-\def STARPU_WARN_UNUSED_RESULT
-\ingroup API_Toolbox
-When building with a GNU C Compiler, defined to__attribute__((__warn_unused_result__))
-
-\def STARPU_POISON_PTR
-\ingroup API_Toolbox
-Define a value which can be used to mark pointers as invalid values.
-
-\def STARPU_MIN
-\ingroup API_Toolbox
-Return the min of the two parameters.
-
-\def STARPU_MAX
-\ingroup API_Toolbox
-Return the max of the two parameters.
-
-\def STARPU_ASSERT
-\ingroup API_Toolbox
-Unless StarPU has been configured with the option \ref enable-fast
-"--enable-fast", this macro will abort if the expression is false.
-
-\def STARPU_ASSERT_MSG
-\ingroup API_Toolbox
-Unless StarPU has been configured with the option \ref enable-fast
-"--enable-fast", this macro will abort if the expression is false. The
-given message will be displayed.
-
-\def STARPU_ABORT
-\ingroup API_Toolbox
-Abort the program.
-
-\def STARPU_ABORT_MSG
-\ingroup API_Toolbox
-Abort the program, and display the given message.
-
-\def STARPU_CHECK_RETURN_VALUE
-\ingroup API_Toolbox
-Abort the program (after displaying \p message) if \p err has a value which is not 0.
-
-\def STARPU_CHECK_RETURN_VALUE_IS
-\ingroup API_Toolbox
-Abort the program (after displaying \p message) if \p err is different from \p value.
-
-\def STARPU_RMB
-\ingroup API_Toolbox
-This macro can be used to do a synchronization.
-
-\def STARPU_WMB
-\ingroup API_Toolbox
-This macro can be used to do a synchronization.
-
-\fn int starpu_get_env_number(const char *str)
-\ingroup API_Toolbox
-Return the integer value of the environment variable named \p str.
-Return 0 otherwise (the variable does not exist or has a non-integer
-value).
-
-*/
-

+ 1 - 370
doc/doxygen/chapters/api/workers.doxy

@@ -16,7 +16,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
-/*! \defgroup API_Workers_Properties Workers’ Properties
+/*! \ingroup API_Workers_Properties
 
 \def STARPU_NMAXWORKERS
 \ingroup API_Workers_Properties
@@ -38,373 +38,4 @@ Define the maximum number of memory nodes managed by StarPU. The default value c
 configure by using the option \ref enable-maxnodes "--enable-maxnodes". Reducing it allows to
 considerably reduce memory used by StarPU data structures.
 
-\enum starpu_node_kind
-\ingroup API_Workers_Properties
-    TODO
-\var starpu_node_kind::STARPU_UNUSED
-    TODO
-\var starpu_node_kind::STARPU_CPU_RAM
-    TODO
-\var starpu_node_kind::STARPU_CUDA_RAM
-    TODO
-\var starpu_node_kind::STARPU_OPENCL_RAM
-    TODO
-\var starpu_node_kind::STARPU_DISK_RAM
-    TODO
-\var starpu_node_kind::STARPU_MIC_RAM
-    TODO
-\var starpu_node_kind::STARPU_SCC_RAM
-    This node kind is not used anymore, but implementations in
-    interfaces will be useful for MPI.
-\var starpu_node_kind::STARPU_SCC_SHM
-    TODO
-\var starpu_node_kind::STARPU_MPI_MS_RAM
-    TODO
-
-\enum starpu_worker_archtype
-\ingroup API_Workers_Properties
-Worker Architecture Type
-\var starpu_worker_archtype::STARPU_ANY_WORKER
-    any worker, used in the hypervisor
-\var starpu_worker_archtype::STARPU_CPU_WORKER
-    CPU core
-\var starpu_worker_archtype::STARPU_CUDA_WORKER
-    NVIDIA CUDA device
-\var starpu_worker_archtype::STARPU_OPENCL_WORKER
-    OpenCL device
-\var starpu_worker_archtype::STARPU_MIC_WORKER
-    Intel MIC device
-\var starpu_worker_archtype::STARPU_SCC_WORKER
-    Intel SCC device
-\var starpu_worker_archtype::STARPU_MPI_MS_WORKER
-    MPI Slave device
-
-\struct starpu_worker_collection
-\ingroup API_Workers_Properties
-A scheduling context manages a collection of workers that can
-be memorized using different data structures. Thus, a generic
-structure is available in order to simplify the choice of its type.
-Only the list data structure is available but further data
-structures(like tree) implementations are foreseen.
-\var void *starpu_worker_collection::workerids
-        The workerids managed by the collection
-\var void *starpu_worker_collection::collection_private
-        todo
-\var void *starpu_worker_collection::unblocked_workers
-        todo
-\var unsigned starpu_worker_collection::nunblocked_workers
-        todo
-\var void *starpu_worker_collection::masters
-        todo
-\var unsigned starpu_worker_collection::nmasters
-        todo
-\var char starpu_worker_collection::present[STARPU_NMAXWORKERS]
-        todo
-\var char starpu_worker_collection::is_unblocked[STARPU_NMAXWORKERS]
-        todo
-\var char starpu_worker_collection::is_master[STARPU_NMAXWORKERS]
-        todo
-\var unsigned starpu_worker_collection::nworkers
-        The number of workers in the collection
-\var enum starpu_worker_collection_type starpu_worker_collection::type
-        The type of structure
-\var unsigned (*starpu_worker_collection::has_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it)
-        Check if there is another element in collection
-\var int (*starpu_worker_collection::get_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it)
-        Return the next element in the collection
-\var int (*starpu_worker_collection::add)(struct starpu_worker_collection *workers, int worker)
-        Add a new element in the collection
-\var int (*starpu_worker_collection::remove)(struct starpu_worker_collection *workers, int worker)
-        Remove an element from the collection
-\var void (*starpu_worker_collection::init)(struct starpu_worker_collection *workers)
-        Initialize the collection
-\var void (*starpu_worker_collection::deinit)(struct starpu_worker_collection *workers)
-        Deinitialize the colection
-\var void (*starpu_worker_collection::init_iterator)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it)
-        Initialize the cursor if there is one
-\var void (*starpu_worker_collection::init_iterator_for_parallel_tasks)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it, struct starpu_task *task);
-        todo
-
-\enum starpu_worker_collection_type
-\ingroup API_Workers_Properties
-Types of structures the worker collection can implement
-\var starpu_worker_collection_type::STARPU_WORKER_LIST
-    The collection is an array
-\var starpu_worker_collection_type::STARPU_WORKER_TREE
-    The collection is a tree
-
-\struct starpu_sched_ctx_iterator
-\ingroup API_Workers_Properties
-Structure needed to iterate on the collection
-\var int starpu_sched_ctx_iterator::cursor
-    The index of the current worker in the collection, needed when
-    iterating on the collection.
-
-\fn unsigned starpu_worker_get_sched_ctx_id_stream(unsigned stream_workerid)
-\ingroup API_Workers_Properties
-todo
-
-\fn unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx)
-\ingroup API_Workers_Properties
-todo
-
-\fn int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum starpu_worker_archtype type)
-\ingroup API_Workers_Properties
-todo
-
-\fn unsigned starpu_worker_is_blocked_in_parallel(int workerid)
-\ingroup API_Workers_Properties
-todo
-
-\fn unsigned starpu_worker_is_slave_somewhere(int workerid)
-\ingroup API_Workers_Properties
-todo
-
-\fn unsigned starpu_worker_is_combined_worker(int id)
-\ingroup API_Workers_Properties
-todo
-
-\fn unsigned starpu_worker_get_count(void)
-\ingroup API_Workers_Properties
-Return the number of workers (i.e. processing units executing StarPU
-tasks). The return value should be at most \ref STARPU_NMAXWORKERS.
-
-\fn int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
-\ingroup API_Workers_Properties
-Return the number of workers of \p type. A positive (or
-<c>NULL</c>) value is returned in case of success, <c>-EINVAL</c>
-indicates that \p type is not valid otherwise.
-
-\fn unsigned starpu_cpu_worker_get_count(void)
-\ingroup API_Workers_Properties
-Return the number of CPUs controlled by StarPU. The return value should be at most \ref STARPU_MAXCPUS.
-
-\fn unsigned starpu_cuda_worker_get_count(void)
-\ingroup API_Workers_Properties
-Return the number of CUDA devices controlled by StarPU. The return value should be at most \ref STARPU_MAXCUDADEVS.
-
-\fn unsigned starpu_mic_worker_get_count(void)
-\ingroup API_Workers_Properties
-Return the number of MIC workers controlled by StarPU.
-
-\fn unsigned starpu_mic_device_get_count(void)
-\ingroup API_Workers_Properties
-Return the number of MIC devices controlled by StarPU. The return value should be at most \ref STARPU_MAXMICDEVS.
-
-\fn unsigned starpu_mpi_ms_worker_get_count(void)
-\ingroup API_Workers_Properties
-Return the number of MPI Master Slave workers controlled by StarPU.
-
-\fn unsigned starpu_scc_worker_get_count(void)
-\ingroup API_Workers_Properties
-Return the number of SCC devices controlled by StarPU. The return value should be at most \ref STARPU_MAXSCCDEVS.
-
-\fn unsigned starpu_opencl_worker_get_count(void)
-\ingroup API_Workers_Properties
-Return the number of OpenCL devices controlled by StarPU. The return value should be at most \ref STARPU_MAXOPENCLDEVS.
-
-\fn int starpu_worker_get_id(void)
-\ingroup API_Workers_Properties
-Return the identifier of the current worker, i.e the one associated to
-the calling thread. The return value is either \c -1 if the current
-context is not a StarPU worker (i.e. when called from the application
-outside a task or a callback), or an integer between \c 0 and
-starpu_worker_get_count() - \c 1.
-
-\fn unsigned starpu_worker_get_id_check(void)
-\ingroup API_Workers_Properties
-Similar to starpu_worker_get_id(), but abort when called from outside
-a worker (i.e. when starpu_worker_get_id() would return \c -1).
-
-\fn unsigned starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, unsigned maxsize)
-\ingroup API_Workers_Properties
-Get the list of identifiers of workers of \p type. Fill the array \p
-workerids with the identifiers of the \p workers. The argument \p
-maxsize indicates the size of the array \p workerids. The return
-value gives the number of identifiers that were put in the array.
-<c>-ERANGE</c> is returned is \p maxsize is lower than the number of workers
-with the appropriate type: in that case, the array is filled with the
-\p maxsize first elements. To avoid such overflows, the value of maxsize
-can be chosen by the means of the function
-starpu_worker_get_count_by_type(), or by passing a value greater or
-equal to \ref STARPU_NMAXWORKERS.
-
-\fn int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num)
-\ingroup API_Workers_Properties
-Return the identifier of the \p num -th worker that has the
-specified \p type. If there is no such worker, -1 is returned.
-
-\fn int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid)
-\ingroup API_Workers_Properties
-Return the identifier of the worker that has the specified \p type
-and device id \p devid (which may not be the n-th, if some
-devices are skipped for instance). If there is no such worker, \c -1 is
-returned.
-
-\fn int starpu_worker_get_devid(int id)
-\ingroup API_Workers_Properties
-Return the device id of the worker \p id. The
-worker should be identified with the value returned by the
-starpu_worker_get_id() function. In the case of a CUDA worker, this
-device identifier is the logical device identifier exposed by CUDA
-(used by the function \c cudaGetDevice() for instance). The device
-identifier of a CPU worker is the logical identifier of the core on
-which the worker was bound; this identifier is either provided by the
-OS or by the library <c>hwloc</c> in case it is available.
-
-\fn int starpu_worker_get_devids(enum starpu_worker_archtype type, int *devids, int num)
-\ingroup API_Workers_Properties
-todo
-
-\fn int starpu_worker_get_bindid(int workerid)
-\ingroup API_Workers_Properties
-todo
-
-\fn enum starpu_worker_archtype starpu_worker_get_type(int id)
-\ingroup API_Workers_Properties
-Return the type of processing unit associated to the worker \p id. The
-worker identifier is a value returned by the function
-starpu_worker_get_id()). The return value indicates the architecture
-of the worker: ::STARPU_CPU_WORKER for a CPU core,
-::STARPU_CUDA_WORKER for a CUDA device, and ::STARPU_OPENCL_WORKER for
-a OpenCL device. The return value for an invalid identifier is
-unspecified.
-
-\fn void starpu_worker_get_name(int id, char *dst, size_t maxlen)
-\ingroup API_Workers_Properties
-Allow to get the name of the worker \p id. StarPU associates a unique
-human readable string to each processing unit. This function copies at
-most the \p maxlen first bytes of the unique string associated to the
-worker \p id into the \p dst buffer. The caller is responsible for
-ensuring that \p dst is a valid pointer to a buffer of \p maxlen bytes
-at least. Calling this function on an invalid identifier results in an
-unspecified behaviour.
-
-\fn void starpu_worker_display_names(FILE *output, enum starpu_worker_archtype type)
-\ingroup API_Workers_Properties
-Display on \p output the list (if any) of all the workers of the given
-\p type.
-
-\fn unsigned starpu_worker_get_memory_node(unsigned workerid)
-\ingroup API_Workers_Properties
-Return the identifier of the memory node associated to the worker
-identified by \p workerid.
-
-\fn enum starpu_node_kind starpu_node_get_kind(unsigned node)
-\ingroup API_Workers_Properties
-Return the type of \p node as defined by
-::starpu_node_kind. For example, when defining a new data interface,
-this function should be used in the allocation function to determine
-on which device the memory needs to be allocated.
-
-\fn int starpu_memory_nodes_numa_id_to_devid(int osid)
-\ingroup API_Workers_Properties
-Return the identifier of the memory node associated to the NUMA
-node identified by \p osid by the Operating System.
-
-\fn int starpu_memory_nodes_numa_devid_to_id(unsigned id);
-\ingroup API_Workers_Properties
-Return the Operating System identifier of the memory node
-whose StarPU identifier is \p id.
-
-\fn char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
-\ingroup API_Workers_Properties
-Return worker \p type as a string.
-
-\fn int starpu_worker_sched_op_pending(void)
-\ingroup API_Workers_Properties
-Return \c !0 if current worker has a scheduling operation in progress,
-and \c 0 otherwise.
-
-\fn void starpu_worker_relax_on(void)
-\ingroup API_Workers_Properties
-Allow other threads and workers to temporarily observe the current
-worker state, even though it is performing a scheduling operation.
-Must be called by a worker before performing a potentially blocking
-call such as acquiring a mutex other than its own sched_mutex. This
-function increases \c state_relax_refcnt from the current worker. No
-more than <c>UINT_MAX-1</c> nested starpu_worker_relax_on() calls should performed on
-the same worker. This function is automatically called by 
-starpu_worker_lock() to relax the caller worker state while attempting
-to lock the target worker.
-
-\fn void starpu_worker_relax_off(void)
-\ingroup API_Workers_Properties
-Must be called after a potentially blocking call is complete, to
-restore the relax state in place before the corresponding starpu_worker_relax_on().
-Decreases \c state_relax_refcnt. Calls to starpu_worker_relax_on()
-and starpu_worker_relax_off() must be properly paired. This
-function is automatically called by starpu_worker_unlock() after the
-target worker has been unlocked.
-
-\fn int starpu_worker_get_relax_state(void)
-\ingroup API_Workers_Properties
-Returns \c !0 if the current worker \c state_relax_refcnt!=0 and \c 0
-otherwise.
-
-\fn void starpu_worker_lock(int workerid)
-\ingroup API_Workers_Properties
-Acquire the sched mutex of \p workerid. If the caller is a worker,
-distinct from \p workerid, the caller worker automatically enters a relax
-state while acquiring the target worker lock.
-
-\fn int starpu_worker_trylock(int workerid)
-\ingroup API_Workers_Properties
-Attempt to acquire the sched mutex of \p workerid. Returns \c 0 if
-successful, \c !0 if \p workerid sched mutex is held or the
-corresponding worker is not in a relax state.
-If the caller is a worker, distinct from \p workerid, the caller
-worker automatically enters relax state if successfully acquiring the target
-worker lock.
-
-\fn void starpu_worker_unlock(int workerid)
-\ingroup API_Workers_Properties
-Release the previously acquired sched mutex of \p workerid. Restore
-the relax state of the caller worker if needed.
-
-\fn void starpu_worker_lock_self(void)
-\ingroup API_Workers_Properties
-Acquire the current worker sched mutex.
-
-\fn void starpu_worker_unlock_self(void)
-\ingroup API_Workers_Properties
-Release the current worker sched mutex.
-
-\fn int starpu_wake_worker_relax(int workerid)
-\ingroup API_Workers_Properties
-Wake up \p workerid while temporarily entering the current worker relax state
-if needed during the waiting process. Return 1 if \p workerid has been woken
-up or its state_keep_awake flag has been set to \c 1, and \c 0 otherwise (if \p
-workerid was not in the STATE_SLEEPING or in the STATE_SCHEDULING).
-
-\fn int starpu_wake_worker_relax_light(int workerid)
-\ingroup API_Workers_Properties
-This is a light version of starpu_wake_worker_relax() which, when possible,
-speculatively set keep_awake on the target worker without waiting for the worker
-to enter the relax state.
-
-\fn void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid))
-\ingroup API_Workers_Properties
-If StarPU was compiled with blocking drivers support and worker callbacks support
-enabled, allow to specify an external resource manager callback to be notified
-about workers waking-up.
-
-\fn void starpu_worker_set_going_to_sleep_callback(void (*callback)(unsigned workerid))
-\ingroup API_Workers_Properties
-If StarPU was compiled with blocking drivers support and worker callbacks support
-enabled, allow to specify an external resource manager callback to be notified
-about workers going to sleep.
-
-\fn hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid)
-\ingroup API_Workers_Properties
-If StarPU was compiled with \c hwloc support, return a duplicate of the
-\c hwloc cpuset associated with the worker \p workerid. The returned cpuset is obtained
-from a \c hwloc_bitmap_dup() function call. It must be freed by the caller
-using \c hwloc_bitmap_free().
-
-\fn hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid)
-\ingroup API_Workers_Properties
-If StarPU was compiled with \c hwloc support, return the \c hwloc object corresponding to 
-the worker \p workerid.
-
 */

+ 1 - 0
doc/doxygen/doxygen-config.cfg.in

@@ -37,6 +37,7 @@ INPUT                  = @top_srcdir@/doc/doxygen/chapters \
 			 @top_srcdir@/include/starpu_fxt.h \
 			 @top_srcdir@/include/starpu.h \
 			 @top_srcdir@/include/starpu_hash.h \
+			 @top_srcdir@/include/starpu_helper.h \
 			 @top_srcdir@/include/starpu_mic.h \
 			 @top_srcdir@/include/starpu_mod.f90 \
 			 @top_srcdir@/include/starpu_opencl.h \

+ 1 - 0
doc/doxygen/doxygen.cfg

@@ -1625,6 +1625,7 @@ PREDEFINED             = STARPU_USE_OPENCL=1 \
 			 STARPU_SIMGRID=1 \
 			 STARPU_OPENMP=1 \
 			 STARPU_MKL=1 \
+			 STARPU_WORKER_CALLBACKS=1 \
                          __GCC__
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then

+ 393 - 4
include/starpu.h

@@ -51,6 +51,7 @@ typedef INT_PTR intptr_t;
 #include <starpu_thread_util.h>
 #include <starpu_util.h>
 #include <starpu_data.h>
+#include <starpu_helper.h>
 #include <starpu_disk.h>
 #include <starpu_data_interfaces.h>
 #include <starpu_data_filters.h>
@@ -89,59 +90,339 @@ extern "C"
 {
 #endif
 
+/**
+   @defgroup API_Initialization_and_Termination Initialization and Termination
+   @{
+*/
+
+/**
+   Structure passed to the starpu_init() function to configure StarPU.
+   It has to be initialized with starpu_conf_init(). When the default
+   value is used, StarPU automatically selects the number of
+   processing units and takes the default scheduling policy. The
+   environment variables overwrite the equivalent parameters.
+*/
 struct starpu_conf
 {
+	/**
+	   @private
+	   Will be initialized by starpu_conf_init(). Should not be
+	   set by hand.
+	*/
 	int magic;
 
+	/**
+	   Name of the scheduling policy. This can also be specified
+	   with the environment variable \ref STARPU_SCHED. (default =
+	   <c>NULL</c>).
+	*/
 	const char *sched_policy_name;
+
+	/**
+	   Definition of the scheduling policy. This field is ignored
+	   if starpu_conf::sched_policy_name is set.
+	   (default = <c>NULL</c>)
+	*/
 	struct starpu_sched_policy *sched_policy;
 	void (*sched_policy_init)(unsigned);
 
+	/**
+	   Number of CPU cores that StarPU can use. This can also be
+	   specified with the environment variable \ref STARPU_NCPU.
+	   (default = -1)
+	*/
 	int ncpus;
 	int reserve_ncpus;
+
+	/**
+	   Number of CUDA devices that StarPU can use. This can also
+	   be specified with the environment variable \ref
+	   STARPU_NCUDA.
+	   (default = -1)
+	*/
 	int ncuda;
+
+	/**
+	   Number of OpenCL devices that StarPU can use. This can also
+	   be specified with the environment variable \ref
+	   STARPU_NOPENCL.
+	   (default = -1)
+	*/
 	int nopencl;
+
+	/**
+	   Number of MIC devices that StarPU can use. This can also be
+	   specified with the environment variable \ref STARPU_NMIC.
+	   (default = -1)
+	*/
 	int nmic;
+
+	/**
+	   Number of SCC devices that StarPU can use. This can also be
+	   specified with the environment variable \ref STARPU_NSCC.
+	   (default = -1)
+	*/
 	int nscc;
+
+	/**
+	   Number of MPI Master Slave devices that StarPU can use.
+	   This can also be specified with the environment variable
+	   \ref STARPU_NMPI_MS.
+	   (default = -1)
+	*/
         int nmpi_ms;
 
+	/**
+	   If this flag is set, the starpu_conf::workers_bindid array
+	   indicates where the different workers are bound, otherwise
+	   StarPU automatically selects where to bind the different
+	   workers. This can also be specified with the environment
+	   variable \ref STARPU_WORKERS_CPUID.
+	   (default = 0)
+	*/
 	unsigned use_explicit_workers_bindid;
+	/**
+	   If the starpu_conf::use_explicit_workers_bindid flag is
+	   set, this array indicates where to bind the different
+	   workers. The i-th entry of the starpu_conf::workers_bindid
+	   indicates the logical identifier of the processor which
+	   should execute the i-th worker. Note that the logical
+	   ordering of the CPUs is either determined by the OS, or
+	   provided by the hwloc library in case it is available.
+	*/
 	unsigned workers_bindid[STARPU_NMAXWORKERS];
 
+	/**
+	   If this flag is set, the CUDA workers will be attached to
+	   the CUDA devices specified in the
+	   starpu_conf::workers_cuda_gpuid array. Otherwise, StarPU
+	   affects the CUDA devices in a round-robin fashion. This can
+	   also be specified with the environment variable \ref
+	   STARPU_WORKERS_CUDAID.
+	   (default = 0)
+	*/
 	unsigned use_explicit_workers_cuda_gpuid;
+	/**
+	   If the starpu_conf::use_explicit_workers_cuda_gpuid flag is
+	   set, this array contains the logical identifiers of the
+	   CUDA devices (as used by \c cudaGetDevice()).
+	*/
 	unsigned workers_cuda_gpuid[STARPU_NMAXWORKERS];
 
+	/**
+	   If this flag is set, the OpenCL workers will be attached to
+	   the OpenCL devices specified in the
+	   starpu_conf::workers_opencl_gpuid array. Otherwise, StarPU
+	   affects the OpenCL devices in a round-robin fashion. This
+	   can also be specified with the environment variable \ref
+	   STARPU_WORKERS_OPENCLID.
+	   (default = 0)
+	*/
 	unsigned use_explicit_workers_opencl_gpuid;
+	/**
+	   If the starpu_conf::use_explicit_workers_opencl_gpuid flag
+	   is set, this array contains the logical identifiers of the
+	   OpenCL devices to be used.
+	*/
 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
 
+	/**
+	   If this flag is set, the MIC workers will be attached to
+	   the MIC devices specified in the array
+	   starpu_conf::workers_mic_deviceid. Otherwise, StarPU
+	   affects the MIC devices in a round-robin fashion. This can
+	   also be specified with the environment variable \ref
+	   STARPU_WORKERS_MICID.
+	   (default = 0)
+	*/
 	unsigned use_explicit_workers_mic_deviceid;
+	/**
+	   If the flag starpu_conf::use_explicit_workers_mic_deviceid
+	   is set, the array contains the logical identifiers of the
+	   MIC devices to be used.
+	*/
 	unsigned workers_mic_deviceid[STARPU_NMAXWORKERS];
 
+	/**
+	   If this flag is set, the SCC workers will be attached to
+	   the SCC devices specified in the array
+	   starpu_conf::workers_scc_deviceid.
+	   (default = 0)
+	*/
 	unsigned use_explicit_workers_scc_deviceid;
+	/**
+	   If the flag starpu_conf::use_explicit_workers_scc_deviceid
+	   is set, the array contains the logical identifiers of the
+	   SCC devices to be used. Otherwise, StarPU affects the SCC
+	   devices in a round-robin fashion. This can also be
+	   specified with the environment variable \ref
+	   STARPU_WORKERS_SCCID.
+	*/
 	unsigned workers_scc_deviceid[STARPU_NMAXWORKERS];
 
+	/**
+	   If this flag is set, the MPI Master Slave workers will be
+	   attached to the MPI Master Slave devices specified in the
+	   array starpu_conf::workers_mpi_ms_deviceid. Otherwise,
+	   StarPU affects the MPI Master Slave devices in a
+	   round-robin fashion.
+	   (default = 0)
+	*/
 	unsigned use_explicit_workers_mpi_ms_deviceid;
+	/**
+	   If the flag
+	   starpu_conf::use_explicit_workers_mpi_ms_deviceid is set,
+	   the array contains the logical identifiers of the MPI
+	   Master Slave devices to be used.
+	*/
 	unsigned workers_mpi_ms_deviceid[STARPU_NMAXWORKERS];
 
+	/**
+	   If this flag is set, StarPU will recalibrate the bus.  If
+	   this value is equal to -1, the default value is used. This
+	   can also be specified with the environment variable \ref
+	   STARPU_BUS_CALIBRATE.
+	   (default = 0)
+	*/
 	int bus_calibrate;
+	/**
+	   If this flag is set, StarPU will calibrate the performance
+	   models when executing tasks. If this value is equal to -1,
+	   the default value is used. If the value is equal to 1, it
+	   will force continuing calibration. If the value is equal to
+	   2, the existing performance models will be overwritten.
+	   This can also be specified with the environment variable
+	   \ref STARPU_CALIBRATE.
+	   (default = 0)
+	*/
 	int calibrate;
 
+	/**
+	   By default, StarPU executes parallel tasks concurrently.
+	   Some parallel libraries (e.g. most OpenMP implementations)
+	   however do not support concurrent calls to parallel code.
+	   In such case, setting this flag makes StarPU only start one
+	   parallel task at a time (but other CPU and GPU tasks are
+	   not affected and can be run concurrently). The parallel
+	   task scheduler will however still try varying combined
+	   worker sizes to look for the most efficient ones.
+	   This can also be specified with the environment variable
+	   \ref STARPU_SINGLE_COMBINED_WORKER.
+	   (default = 0)
+	*/
 	int single_combined_worker;
 
+	/**
+	   Path to the kernel to execute on the MIC device, compiled
+	   for MIC architecture. When set to <c>NULL</c>, StarPU
+	   automatically looks next to the host program location.
+	   (default = <c>NULL</c>)
+	*/
 	char *mic_sink_program_path;
 
+	/**
+	   This flag should be set to 1 to disable asynchronous copies
+	   between CPUs and all accelerators.
+	   The AMD implementation of OpenCL is known to fail when
+	   copying data asynchronously. When using this
+	   implementation, it is therefore necessary to disable
+	   asynchronous data transfers.
+	   This can also be specified with the environment variable
+	   \ref STARPU_DISABLE_ASYNCHRONOUS_COPY.
+	   This can also be specified at compilation time by giving to
+	   the configure script the option \ref
+	   disable-asynchronous-copy "--disable-asynchronous-copy".
+	   (default = 0)
+	*/
 	int disable_asynchronous_copy;
+	/**
+	   This flag should be set to 1 to disable asynchronous copies
+	   between CPUs and CUDA accelerators.
+	   This can also be specified with the environment variable
+	   \ref STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY.
+	   This can also be specified at compilation time by giving to
+	   the configure script the option \ref
+	   disable-asynchronous-cuda-copy
+	   "--disable-asynchronous-cuda-copy".
+	   (default = 0)
+	*/
 	int disable_asynchronous_cuda_copy;
+	/**
+	   This flag should be set to 1 to disable asynchronous copies
+	   between CPUs and OpenCL accelerators.
+	   The AMD implementation of OpenCL is known to fail when
+	   copying data asynchronously. When using this
+	   implementation, it is therefore necessary to disable
+	   asynchronous data transfers.
+	   This can also be specified with the environment variable
+	   \ref STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY.
+	   This can also be specified at compilation time by giving to
+	   the configure script the option \ref
+	   disable-asynchronous-opencl-copy
+	   "--disable-asynchronous-opencl-copy".
+	   (default = 0)
+	*/
 	int disable_asynchronous_opencl_copy;
+	/**
+	   This flag should be set to 1 to disable asynchronous copies
+	   between CPUs and MIC accelerators.
+	   This can also be specified with the environment variable
+	   \ref STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY.
+	   This can also be specified at compilation time by giving to
+	   the configure script the option \ref
+	   disable-asynchronous-mic-copy
+	   "--disable-asynchronous-mic-copy".
+	   (default = 0).
+	*/
 	int disable_asynchronous_mic_copy;
+	/**
+	   This flag should be set to 1 to disable asynchronous copies
+	   between CPUs and MPI Master Slave devices.
+	   This can also be specified with the environment variable
+	   \ref STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY.
+	   This can also be specified at compilation time by giving to
+	   the configure script the option \ref
+	   disable-asynchronous-mpi-master-slave-copy
+	   "--disable-asynchronous-mpi-master-slave-copy".
+	   (default = 0).
+	*/
 	int disable_asynchronous_mpi_ms_copy;
 
+	/**
+	   Enable CUDA/OpenGL interoperation on these CUDA devices.
+	   This can be set to an array of CUDA device identifiers for
+	   which \c cudaGLSetGLDevice() should be called instead of \c
+	   cudaSetDevice(). Its size is specified by the
+	   starpu_conf::n_cuda_opengl_interoperability field below
+	   (default = <c>NULL</c>)
+	*/
 	unsigned *cuda_opengl_interoperability;
+	/**
+	   Size of the array starpu_conf::cuda_opengl_interoperability
+	*/
 	unsigned n_cuda_opengl_interoperability;
 
+	/**
+	   Array of drivers that should not be launched by StarPU. The
+	   application will run in one of its own threads.
+	   (default = <c>NULL</c>)
+	*/
 	struct starpu_driver *not_launched_drivers;
+	/**
+	   The number of StarPU drivers that should not be launched by
+	   StarPU, i.e number of elements of the array
+	   starpu_conf::not_launched_drivers.
+	   (default = 0)
+	*/
 	unsigned n_not_launched_drivers;
 
+	/**
+	   Specify the buffer size used for FxT tracing. Starting from
+	   FxT version 0.2.12, the buffer will automatically be
+	   flushed when it fills in, but it may still be interesting
+	   to specify a bigger value to avoid any flushing (which
+	   would disturb the trace).
+	*/
 	unsigned trace_buffer_size;
 	int global_sched_ctx_min_priority;
 	int global_sched_ctx_max_priority;
@@ -152,34 +433,142 @@ struct starpu_conf
 #endif
 };
 
+/**
+   Initialize the \p conf structure with the default values. In case
+   some configuration parameters are already specified through
+   environment variables, starpu_conf_init() initializes the fields of
+   \p conf according to the environment variables.
+   For instance if \ref STARPU_CALIBRATE is set, its value is put in
+   the field starpu_conf::calibrate of \p conf.
+   Upon successful completion, this function returns 0. Otherwise,
+   <c>-EINVAL</c> indicates that the argument was <c>NULL</c>.
+*/
 int starpu_conf_init(struct starpu_conf *conf);
 
+/**
+   StarPU initialization method, must be called prior to any other
+   StarPU call. It is possible to specify StarPU’s configuration (e.g.
+   scheduling policy, number of cores, ...) by passing a
+   non-<c>NULL</c> \p conf. Default configuration is used if \p conf
+   is <c>NULL</c>. Upon successful completion, this function returns
+   0. Otherwise, <c>-ENODEV</c> indicates that no worker was available
+   (and thus StarPU was not initialized).
+*/
 int starpu_init(struct starpu_conf *conf) STARPU_WARN_UNUSED_RESULT;
+
+/**
+   Similar to starpu_init(), but also take the \p argc and \p argv as
+   defined by the application. This is needed for SCC execution to
+   initialize the communication library.
+   Do not call starpu_init() and starpu_initialize() in the same
+   program.
+*/
 int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv);
+
+/**
+   Return 1 if StarPU is already initialized.
+*/
 int starpu_is_initialized(void);
+
+/**
+   Wait for starpu_init() call to finish.
+*/
 void starpu_wait_initialized(void);
 
-#define STARPU_THREAD_ACTIVE (1 << 0)
-unsigned starpu_get_next_bindid(unsigned flags, unsigned *preferred, unsigned npreferred);
-int starpu_bind_thread_on(int cpuid, unsigned flags, const char *name);
+/**
+   StarPU termination method, must be called at the end of the
+   application: statistics and other post-mortem debugging information
+   are not guaranteed to be available until this method has been
+   called.
+*/
+void starpu_shutdown(void);
 
+/**
+   Suspend the processing of new tasks by workers. It can be used in a
+   program where StarPU is used during only a part of the execution.
+   Without this call, the workers continue to poll for new tasks in a
+   tight loop, wasting CPU time. The symmetric call to starpu_resume()
+   should be used to unfreeze the workers.
+*/
 void starpu_pause(void);
+/**
+   Symmetrical call to starpu_pause(), used to resume the workers
+   polling for new tasks.
+*/
 void starpu_resume(void);
 
-void starpu_shutdown(void);
+/**
+   Value to be passed to starpu_get_next_bindid() and
+   starpu_bind_thread_on() when binding a thread which will
+   significantly eat CPU time, and should thus have its own dedicated
+   CPU.
+*/
+#define STARPU_THREAD_ACTIVE (1 << 0)
+
+/**
+   Return a PU binding ID which can be used to bind threads with
+   starpu_bind_thread_on(). \p flags can be set to
+   STARPU_THREAD_ACTIVE or 0. When \p npreferred is set to non-zero,
+   \p preferred is an array of size \p npreferred in which a
+   preference of PU binding IDs can be set. By default StarPU will
+   return the first PU available for binding.
+*/
+unsigned starpu_get_next_bindid(unsigned flags, unsigned *preferred, unsigned npreferred);
 
+/**
+   Bind the calling thread on the given \p cpuid (which should have
+   been obtained with starpu_get_next_bindid()).
+
+   Return -1 if a thread was already bound to this PU (but binding
+   will still have been done, and a warning will have been printed),
+   so the caller can tell the user how to avoid the issue.
+
+   \p name should be set to a unique string so that different calls
+   with the same name for the same cpuid does not produce a warning.
+*/
+int starpu_bind_thread_on(int cpuid, unsigned flags, const char *name);
+
+/**
+   Print a description of the topology on \p f.
+*/
 void starpu_topology_print(FILE *f);
 
+/**
+   Return 1 if asynchronous data transfers between CPU and
+   accelerators are disabled.
+*/
 int starpu_asynchronous_copy_disabled(void);
+
+/**
+   Return 1 if asynchronous data transfers between CPU and CUDA
+   accelerators are disabled.
+*/
 int starpu_asynchronous_cuda_copy_disabled(void);
+
+/**
+   Return 1 if asynchronous data transfers between CPU and OpenCL
+   accelerators are disabled.
+*/
 int starpu_asynchronous_opencl_copy_disabled(void);
+
+/**
+   Return 1 if asynchronous data transfers between CPU and MIC devices
+   are disabled.
+*/
 int starpu_asynchronous_mic_copy_disabled(void);
+
+/**
+   Return 1 if asynchronous data transfers between CPU and MPI Slave
+   devices are disabled.
+*/
 int starpu_asynchronous_mpi_ms_copy_disabled(void);
 
 void starpu_display_stats();
 
 void starpu_get_version(int *major, int *minor, int *release);
 
+/** @} */
+
 #ifdef __cplusplus
 }
 #endif

+ 0 - 12
include/starpu_data.h

@@ -524,18 +524,6 @@ void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data);
 */
 void *starpu_data_get_user_data(starpu_data_handle_t handle);
 
-/**
-   Copy the content of \p src_handle into \p dst_handle. The parameter \p
-   asynchronous indicates whether the function should block or not. In
-   the case of an asynchronous call, it is possible to synchronize with
-   the termination of this operation either by the means of implicit
-   dependencies (if enabled) or by calling starpu_task_wait_for_all(). If
-   \p callback_func is not <c>NULL</c>, this callback function is executed after
-   the handle has been copied, and it is given the pointer \p
-   callback_arg as argument.
-*/
-int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
-
 /** @} */
 
 #ifdef __cplusplus

+ 177 - 0
include/starpu_helper.h

@@ -0,0 +1,177 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2008-2018                                Université de Bordeaux
+ * Copyright (C) 2011,2012,2017                           Inria
+ * Copyright (C) 2010-2017, 2019                          CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_HELPER_H__
+#define __STARPU_HELPER_H__
+
+#include <stdio.h>
+#include <starpu.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+   @defgroup API_Miscellaneous_Helpers Miscellaneous Helpers
+   @{
+*/
+
+/**
+   Return the min of the two parameters.
+*/
+#define STARPU_MIN(a,b)	((a)<(b)?(a):(b))
+/**
+   Return the max of the two parameters.
+*/
+#define STARPU_MAX(a,b)	((a)<(b)?(b):(a))
+
+/**
+   Define a value which can be used to mark pointers as invalid
+   values.
+*/
+#define STARPU_POISON_PTR	((void *)0xdeadbeef)
+
+extern int _starpu_silent;
+
+char *starpu_getenv(const char *str);
+
+/**
+   Return the integer value of the environment variable named \p str.
+   Return 0 otherwise (the variable does not exist or has a
+   non-integer value).
+*/
+static __starpu_inline int starpu_get_env_number(const char *str)
+{
+	char *strval;
+
+	strval = starpu_getenv(str);
+	if (strval)
+	{
+		/* the env variable was actually set */
+		long int val;
+		char *pcheck;
+
+		val = strtol(strval, &pcheck, 10);
+		if (*pcheck) {
+			fprintf(stderr,"The %s environment variable must contain an integer\n", str);
+			STARPU_ABORT();
+		}
+
+		/* fprintf(stderr, "ENV %s WAS %d\n", str, val); */
+		STARPU_ASSERT_MSG(val >= 0, "The value for the environment variable '%s' cannot be negative", str);
+		return (int)val;
+	}
+	else
+	{
+		/* there is no such env variable */
+		/* fprintf("There was no %s ENV\n", str); */
+		return -1;
+	}
+}
+
+static __starpu_inline int starpu_get_env_number_default(const char *str, int defval)
+{
+	int ret = starpu_get_env_number(str);
+	if (ret == -1)
+		ret = defval;
+	return ret;
+}
+
+static __starpu_inline float starpu_get_env_float_default(const char *str, float defval)
+{
+	char *strval;
+
+	strval = starpu_getenv(str);
+	if (strval)
+	{
+		/* the env variable was actually set */
+		float val;
+		char *pcheck;
+
+		val = strtof(strval, &pcheck);
+		if (*pcheck) {
+			fprintf(stderr,"The %s environment variable must contain a float\n", str);
+			STARPU_ABORT();
+		}
+
+		/* fprintf(stderr, "ENV %s WAS %f\n", str, val); */
+		return val;
+	}
+	else
+	{
+		/* there is no such env variable */
+		/* fprintf("There was no %s ENV\n", str); */
+		return defval;
+	}
+}
+
+/**
+   Execute the given function \p func on a subset of workers. When
+   calling this method, the offloaded function \p func is executed by
+   every StarPU worker that are eligible to execute the function. The
+   argument \p arg is passed to the offloaded function. The argument
+   \p where specifies on which types of processing units the function
+   should be executed.
+   Similarly to the field starpu_codelet::where, it is possible to
+   specify that the function should be executed on every CUDA device
+   and every CPU by passing ::STARPU_CPU|::STARPU_CUDA. This function
+   blocks until \p func has been executed on every appropriate
+   processing units, and thus may not be called from a callback
+   function for instance.
+*/
+void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where);
+
+/**
+   Same as starpu_execute_on_each_worker(), except that the task name
+   is specified in the argument \p name.
+*/
+void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t where, const char *name);
+
+/**
+   Call \p func(\p arg) on every worker in the \p workers array. \p
+   num_workers indicates the number of workers in this array.  This
+   function is synchronous, but the different workers may execute the
+   function in parallel.
+*/
+void starpu_execute_on_specific_workers(void (*func)(void*), void *arg, unsigned num_workers, unsigned *workers, const char *name);
+
+/**
+   Return the current date in micro-seconds.
+*/
+double starpu_timing_now(void);
+
+/**
+   Copy the content of \p src_handle into \p dst_handle. The parameter \p
+   asynchronous indicates whether the function should block or not. In
+   the case of an asynchronous call, it is possible to synchronize with
+   the termination of this operation either by the means of implicit
+   dependencies (if enabled) or by calling starpu_task_wait_for_all(). If
+   \p callback_func is not <c>NULL</c>, this callback function is executed after
+   the handle has been copied, and it is given the pointer \p
+   callback_arg as argument.
+*/
+int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
+
+/** @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __STARPU_HELPER_H__

+ 37 - 15
include/starpu_scheduler.h

@@ -213,22 +213,8 @@ struct starpu_sched_policy **starpu_sched_get_predefined_policies();
    variable would be used to block and wake up all workers.
 */
 void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond);
-unsigned long starpu_task_get_job_id(struct starpu_task *task);
 
-/**
-   Must be called to wake up a worker that is sleeping on the cond.
-   Return 0 whenever the worker is not in a sleeping state or has the
-   state_keep_awake flag on.
-*/
-int starpu_wake_worker_no_relax(int workerid);
-
-/**
-   Version of starpu_wake_worker_no_relax() which assumes that the
-   sched mutex is locked
-*/
-int starpu_wake_worker_locked(int workerid);
-
-int starpu_wake_worker_relax_light(int workerid);
+unsigned long starpu_task_get_job_id(struct starpu_task *task);
 
 /**
    TODO: check if this is correct
@@ -437,6 +423,42 @@ void starpu_sched_ctx_worker_shares_tasks_lists(int workerid, int sched_ctx_id);
 
 void starpu_sched_task_break(struct starpu_task *task);
 
+/**
+   @name Worker operations
+   @{
+*/
+
+/**
+   Wake up \p workerid while temporarily entering the current worker
+   relax state if needed during the waiting process. Return 1 if \p
+   workerid has been woken up or its state_keep_awake flag has been
+   set to \c 1, and \c 0 otherwise (if \p workerid was not in the
+   STATE_SLEEPING or in the STATE_SCHEDULING).
+*/
+int starpu_wake_worker_relax(int workerid);
+
+/**
+   Must be called to wake up a worker that is sleeping on the cond.
+   Return 0 whenever the worker is not in a sleeping state or has the
+   state_keep_awake flag on.
+*/
+int starpu_wake_worker_no_relax(int workerid);
+
+/**
+   Version of starpu_wake_worker_no_relax() which assumes that the
+   sched mutex is locked
+*/
+int starpu_wake_worker_locked(int workerid);
+
+/**
+   Light version of starpu_wake_worker_relax() which, when possible,
+   speculatively set keep_awake on the target worker without waiting
+   for the worker to enter the relax state.
+*/
+int starpu_wake_worker_relax_light(int workerid);
+
+/** @} */
+
 /** @} */
 
 #ifdef __cplusplus

+ 152 - 121
include/starpu_util.h

@@ -40,11 +40,17 @@ extern "C"
 {
 #endif
 
-/** @defgroup
- *
- * @{
- */
-
+/**
+   @defgroup API_Toolbox Toolbox
+   @brief The following macros allow to make GCC extensions portable,
+   and to have a code which can be compiled with any C compiler.
+   @{
+*/
+
+/**
+   Return true (non-zero) if GCC version \p maj.\p min or later is
+   being used (macro taken from glibc.)
+*/
 #if defined __GNUC__ && defined __GNUC_MINOR__
 # define STARPU_GNUC_PREREQ(maj, min) \
 	((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
@@ -52,27 +58,92 @@ extern "C"
 # define STARPU_GNUC_PREREQ(maj, min) 0
 #endif
 
+/**
+   When building with a GNU C Compiler, allow programmers to mark an
+   expression as unlikely.
+*/
 #ifdef __GNUC__
 #  define STARPU_UNLIKELY(expr)          (__builtin_expect(!!(expr),0))
-#  define STARPU_LIKELY(expr)            (__builtin_expect(!!(expr),1))
-#  define STARPU_ATTRIBUTE_UNUSED                  __attribute__((unused))
-#  define STARPU_ATTRIBUTE_NORETURN                  __attribute__((noreturn))
-#  define STARPU_ATTRIBUTE_INTERNAL      __attribute__ ((visibility ("internal")))
-#  define STARPU_ATTRIBUTE_MALLOC                  __attribute__((malloc))
-#  define STARPU_ATTRIBUTE_WARN_UNUSED_RESULT      __attribute__((warn_unused_result))
-#  define STARPU_ATTRIBUTE_PURE                    __attribute__((pure))
-#  define STARPU_ATTRIBUTE_ALIGNED(size)           __attribute__((aligned(size)))
-#  define STARPU_ATTRIBUTE_FORMAT(type, string, first)                  __attribute__((format(type, string, first)))
 #else
 #  define STARPU_UNLIKELY(expr)          (expr)
+#endif
+
+/**
+   When building with a GNU C Compiler, allow programmers to mark an
+   expression as likely.
+*/
+#ifdef __GNUC__
+#  define STARPU_LIKELY(expr)            (__builtin_expect(!!(expr),1))
+#else
 #  define STARPU_LIKELY(expr)            (expr)
+#endif
+
+/**
+   When building with a GNU C Compiler, defined to __attribute__((unused))
+*/
+#ifdef __GNUC__
+#  define STARPU_ATTRIBUTE_UNUSED                  __attribute__((unused))
+#else
 #  define STARPU_ATTRIBUTE_UNUSED
+#endif
+
+/**
+   When building with a GNU C Compiler, defined to __attribute__((noreturn))
+*/
+#ifdef __GNUC__
+#  define STARPU_ATTRIBUTE_NORETURN                  __attribute__((noreturn))
+#else
 #  define STARPU_ATTRIBUTE_NORETURN
+#endif
+
+/**
+   When building with a GNU C Compiler, defined to __attribute__((visibility ("internal")))
+*/
+#ifdef __GNUC__
+#  define STARPU_ATTRIBUTE_INTERNAL      __attribute__ ((visibility ("internal")))
+#else
 #  define STARPU_ATTRIBUTE_INTERNAL
+#endif
+
+/**
+   When building with a GNU C Compiler, defined to __attribute__((malloc))
+*/
+#ifdef __GNUC__
+#  define STARPU_ATTRIBUTE_MALLOC                  __attribute__((malloc))
+#else
 #  define STARPU_ATTRIBUTE_MALLOC
+#endif
+
+/**
+   When building with a GNU C Compiler, defined to __attribute__((warn_unused_result))
+*/
+#ifdef __GNUC__
+#  define STARPU_ATTRIBUTE_WARN_UNUSED_RESULT      __attribute__((warn_unused_result))
+#else
 #  define STARPU_ATTRIBUTE_WARN_UNUSED_RESULT
+#endif
+
+/**
+   When building with a GNU C Compiler, defined to  __attribute__((pure))
+*/
+#ifdef __GNUC__
+#  define STARPU_ATTRIBUTE_PURE                    __attribute__((pure))
+#else
 #  define STARPU_ATTRIBUTE_PURE
+#endif
+
+/**
+   When building with a GNU C Compiler, defined to__attribute__((aligned(size)))
+*/
+#ifdef __GNUC__
+#  define STARPU_ATTRIBUTE_ALIGNED(size)           __attribute__((aligned(size)))
+#else
 #  define STARPU_ATTRIBUTE_ALIGNED(size)
+#endif
+
+#ifdef __GNUC__
+#  define STARPU_ATTRIBUTE_FORMAT(type, string, first)                  __attribute__((format(type, string, first)))
+#else
 #  define STARPU_ATTRIBUTE_FORMAT(type, string, first)
 #endif
 
@@ -106,11 +177,6 @@ extern "C"
 #define STARPU_WARN_UNUSED_RESULT
 #endif /* __GNUC__ */
 
-#define STARPU_POISON_PTR	((void *)0xdeadbeef)
-
-#define STARPU_MIN(a,b)	((a)<(b)?(a):(b))
-#define STARPU_MAX(a,b)	((a)<(b)?(b):(a))
-
 #define STARPU_BACKTRACE_LENGTH	32
 #ifdef __GLIBC__
 #  define STARPU_DUMP_BACKTRACE() do { \
@@ -128,22 +194,39 @@ extern "C"
 #define STARPU_SIMGRID_ASSERT(x)
 #endif
 
+/**
+   Unless StarPU has been configured with the option \ref enable-fast
+   "--enable-fast", this macro will abort if the expression \p x is false.
+*/
 #ifdef STARPU_NO_ASSERT
 #define STARPU_ASSERT(x)		do { if (0) { (void) (x); } } while(0)
+#else
+#  if defined(__CUDACC__) || defined(STARPU_HAVE_WINDOWS)
+#    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); STARPU_SIMGRID_ASSERT(x); *(int*)NULL = 0; } } while(0)
+#  else
+#    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); STARPU_SIMGRID_ASSERT(x); assert(x); } } while (0)
+#  endif
+#endif
+
+#ifdef STARPU_NO_ASSERT
 #define STARPU_ASSERT_ACCESSIBLE(x)	do { if (0) { (void) (x); } } while(0)
+#else
+#define STARPU_ASSERT_ACCESSIBLE(ptr)	do { volatile char __c STARPU_ATTRIBUTE_UNUSED = *(char*) (ptr); } while(0)
+#endif
+
+/**
+   Unless StarPU has been configured with the option \ref enable-fast
+   "--enable-fast", this macro will abort if the expression \p x is false.
+   The string \p msg will be displayed.
+*/
+#ifdef STARPU_NO_ASSERT
 #define STARPU_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); (void) msg; } } while(0)
 #else
 #  if defined(__CUDACC__) || defined(STARPU_HAVE_WINDOWS)
-#    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); STARPU_SIMGRID_ASSERT(x); *(int*)NULL = 0; } } while(0)
 #    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n\n", __starpu_func__, ## __VA_ARGS__); STARPU_SIMGRID_ASSERT(x); *(int*)NULL = 0; }} while(0)
 #  else
-#    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); STARPU_SIMGRID_ASSERT(x); assert(x); } } while (0)
 #    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n\n", __starpu_func__, ## __VA_ARGS__); STARPU_SIMGRID_ASSERT(x); assert(x); } } while(0)
-
 #  endif
-#  define STARPU_ASSERT_ACCESSIBLE(ptr)	do { \
-	volatile char __c STARPU_ATTRIBUTE_UNUSED = *(char*) (ptr); \
-} while(0)
 #endif
 
 #ifdef __APPLE_CC__
@@ -156,12 +239,20 @@ extern "C"
 #  define _starpu_abort() abort()
 #endif
 
+/**
+   Abort the program.
+*/
 #define STARPU_ABORT() do {                                          \
 	STARPU_DUMP_BACKTRACE();                                     \
         fprintf(stderr, "[starpu][abort][%s()@%s:%d]\n", __starpu_func__, __FILE__, __LINE__); \
 	_starpu_abort();				\
 } while(0)
 
+/**
+   Print the string '[starpu][abort][name of the calling function:name
+   of the file:line in the file]' followed by the given string \p msg
+   and abort the program
+*/
 #define STARPU_ABORT_MSG(msg, ...) do {					\
 	STARPU_DUMP_BACKTRACE();                                        \
 	fprintf(stderr, "[starpu][abort][%s()@%s:%d] " msg "\n", __starpu_func__, __FILE__, __LINE__, ## __VA_ARGS__); \
@@ -171,7 +262,7 @@ extern "C"
 #if defined(STARPU_HAVE_STRERROR_R)
 #if (! defined(__GLIBC__) || !__GLIBC__) || ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && (! defined(_GNU_SOURCE)))
 /* XSI-compliant version of strerror_r returns an int */
-#define starpu_strerror_r(errnum, buf, buflen) \
+#       define starpu_strerror_r(errnum, buf, buflen) \
 	do \
 	{ \
 		int _ret = strerror_r((errnum), (buf), (buflen)); \
@@ -180,7 +271,7 @@ extern "C"
 	while (0)
 #else
 /* GNU-specific version of strerror_r returns a char * */
-#define starpu_strerror_r(errnum, buf, buflen) \
+#       define starpu_strerror_r(errnum, buf, buflen) \
 	do \
 	{ \
 		char * const _user_buf = (buf); \
@@ -199,23 +290,37 @@ extern "C"
 	} \
 	while (0)
 #endif /* strerror_r ABI version */
+#endif  /* STARPU_HAVE_STRERROR_R */
 
+/**
+   Abort the program (after displaying \p message) if \p err has a
+   value which is not 0.
+*/
+#if defined(STARPU_HAVE_STRERROR_R)
 #  define STARPU_CHECK_RETURN_VALUE(err, message, ...) {if (STARPU_UNLIKELY(err != 0)) { \
 			char xmessage[256]; starpu_strerror_r(-err, xmessage, 256); \
 			fprintf(stderr, "[starpu] Unexpected value: <%d:%s> returned for " message "\n", err, xmessage, ## __VA_ARGS__); \
 			STARPU_ABORT(); }}
+#else
+#  define STARPU_CHECK_RETURN_VALUE(err, message, ...) {if (STARPU_UNLIKELY(err != 0)) { \
+			fprintf(stderr, "[starpu] Unexpected value: <%d> returned for " message "\n", err, ## __VA_ARGS__); \
+			STARPU_ABORT(); }}
+#endif
+
+/**
+   Abort the program (after displaying \p message) if \p err is
+   different from \p value.
+*/
+#if defined(STARPU_HAVE_STRERROR_R)
 #  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message, ...) {if (STARPU_UNLIKELY(err != value)) { \
 			char xmessage[256]; starpu_strerror_r(-err, xmessage, 256); \
 			fprintf(stderr, "[starpu] Unexpected value: <%d!=%d:%s> returned for " message "\n", err, value, xmessage, ## __VA_ARGS__); \
 			STARPU_ABORT(); }}
 #else
-#  define STARPU_CHECK_RETURN_VALUE(err, message, ...) {if (STARPU_UNLIKELY(err != 0)) { \
-			fprintf(stderr, "[starpu] Unexpected value: <%d> returned for " message "\n", err, ## __VA_ARGS__); \
-			STARPU_ABORT(); }}
 #  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message, ...) {if (STARPU_UNLIKELY(err != value)) { \
 	       		fprintf(stderr, "[starpu] Unexpected value: <%d != %d> returned for " message "\n", err, value, ## __VA_ARGS__); \
 			STARPU_ABORT(); }}
-#endif /* STARPU_HAVE_STRERROR_R */
+#endif
 
 #if defined(__i386__) || defined(__x86_64__)
 
@@ -353,110 +458,36 @@ STARPU_ATOMIC_SOMETHINGL(or, old | value)
 #define STARPU_SYNCHRONIZE() __asm__ __volatile__("sync" ::: "memory")
 #endif
 
+/**
+   This macro can be used to do a synchronization.
+*/
 #if defined(__i386__)
 #define STARPU_RMB() __asm__ __volatile__("lock; addl $0,0(%%esp)" ::: "memory")
-#define STARPU_WMB() __asm__ __volatile__("lock; addl $0,0(%%esp)" ::: "memory")
 #elif defined(__KNC__) || defined(__KNF__)
 #define STARPU_RMB() __asm__ __volatile__("lock; addl $0,0(%%rsp)" ::: "memory")
-#define STARPU_WMB() __asm__ __volatile__("lock; addl $0,0(%%rsp)" ::: "memory")
 #elif defined(__x86_64__)
 #define STARPU_RMB() __asm__ __volatile__("lfence" ::: "memory")
-#define STARPU_WMB() __asm__ __volatile__("sfence" ::: "memory")
 #elif defined(__ppc__) || defined(__ppc64__)
 #define STARPU_RMB() __asm__ __volatile__("sync" ::: "memory")
-#define STARPU_WMB() __asm__ __volatile__("sync" ::: "memory")
 #else
 #define STARPU_RMB() STARPU_SYNCHRONIZE()
-#define STARPU_WMB() STARPU_SYNCHRONIZE()
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
-#ifdef __cplusplus
-extern "C"
-{
+/**
+   This macro can be used to do a synchronization.
+*/
+#if defined(__i386__)
+#define STARPU_WMB() __asm__ __volatile__("lock; addl $0,0(%%esp)" ::: "memory")
+#elif defined(__KNC__) || defined(__KNF__)
+#define STARPU_WMB() __asm__ __volatile__("lock; addl $0,0(%%rsp)" ::: "memory")
+#elif defined(__x86_64__)
+#define STARPU_WMB() __asm__ __volatile__("sfence" ::: "memory")
+#elif defined(__ppc__) || defined(__ppc64__)
+#define STARPU_WMB() __asm__ __volatile__("sync" ::: "memory")
+#else
+#define STARPU_WMB() STARPU_SYNCHRONIZE()
 #endif
 
-extern int _starpu_silent;
-
-char *starpu_getenv(const char *str);
-
-
-static __starpu_inline int starpu_get_env_number(const char *str)
-{
-	char *strval;
-
-	strval = starpu_getenv(str);
-	if (strval)
-	{
-		/* the env variable was actually set */
-		long int val;
-		char *pcheck;
-
-		val = strtol(strval, &pcheck, 10);
-		if (*pcheck) {
-			fprintf(stderr,"The %s environment variable must contain an integer\n", str);
-			STARPU_ABORT();
-		}
-
-		/* fprintf(stderr, "ENV %s WAS %d\n", str, val); */
-		STARPU_ASSERT_MSG(val >= 0, "The value for the environment variable '%s' cannot be negative", str);
-		return (int)val;
-	}
-	else
-	{
-		/* there is no such env variable */
-		/* fprintf("There was no %s ENV\n", str); */
-		return -1;
-	}
-}
-
-static __starpu_inline int starpu_get_env_number_default(const char *str, int defval)
-{
-	int ret = starpu_get_env_number(str);
-	if (ret == -1)
-		ret = defval;
-	return ret;
-}
-
-static __starpu_inline float starpu_get_env_float_default(const char *str, float defval)
-{
-	char *strval;
-
-	strval = starpu_getenv(str);
-	if (strval)
-	{
-		/* the env variable was actually set */
-		float val;
-		char *pcheck;
-
-		val = strtof(strval, &pcheck);
-		if (*pcheck) {
-			fprintf(stderr,"The %s environment variable must contain a float\n", str);
-			STARPU_ABORT();
-		}
-
-		/* fprintf(stderr, "ENV %s WAS %f\n", str, val); */
-		return val;
-	}
-	else
-	{
-		/* there is no such env variable */
-		/* fprintf("There was no %s ENV\n", str); */
-		return defval;
-	}
-}
-
-void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where);
-
-void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t where, const char *name);
-
-void starpu_execute_on_specific_workers(void (*func)(void*), void *arg, unsigned num_workers, unsigned *workers, const char *name);
-
-double starpu_timing_now(void);
-
 #ifdef _WIN32
 /* Try to fetch the system definition of timespec */
 #include <sys/types.h>

+ 287 - 30
include/starpu_worker.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2013-2017                                Inria
- * Copyright (C) 2010-2015,2017,2019                           CNRS
+ * Copyright (C) 2010-2015,2017,2019                      CNRS
  * Copyright (C) 2009-2014,2016,2017,2019                 Université de Bordeaux
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2016                                     Uppsala University
@@ -35,10 +35,10 @@ extern "C"
 {
 #endif
 
-/** @defgroup
- *
- * @{
- */
+/**
+   @defgroup API_Workers_Properties Workers’ Properties
+   @{
+*/
 
 enum starpu_node_kind
 {
@@ -54,19 +54,29 @@ enum starpu_node_kind
 
 };
 
+/**
+   Worker Architecture Type
+*/
 enum starpu_worker_archtype
 {
-	STARPU_CPU_WORKER,
-	STARPU_CUDA_WORKER,
-	STARPU_OPENCL_WORKER,
-	STARPU_MIC_WORKER,
-	STARPU_SCC_WORKER,
-	STARPU_MPI_MS_WORKER,
-	STARPU_ANY_WORKER
+	STARPU_CPU_WORKER,        /**< CPU core */
+	STARPU_CUDA_WORKER,       /**< NVIDIA CUDA device */
+	STARPU_OPENCL_WORKER,     /**< OpenCL device */
+	STARPU_MIC_WORKER,        /**< Intel MIC device */
+	STARPU_SCC_WORKER,        /**< Intel SCC device */
+	STARPU_MPI_MS_WORKER,     /**< MPI Slave device */
+	STARPU_ANY_WORKER         /**< any worker, used in the hypervisor */
 };
 
+/**
+   Structure needed to iterate on the collection
+*/
 struct starpu_sched_ctx_iterator
 {
+	/**
+	   The index of the current worker in the collection, needed
+	   when iterating on the collection.
+	*/
 	int cursor;
 	void *value;
 	void *possible_value;
@@ -74,17 +84,32 @@ struct starpu_sched_ctx_iterator
 	int possibly_parallel;
 };
 
+/**
+   Types of structures the worker collection can implement
+*/
 enum starpu_worker_collection_type
 {
-	STARPU_WORKER_TREE,
-	STARPU_WORKER_LIST
+	STARPU_WORKER_TREE,  /**< The collection is a tree */
+	STARPU_WORKER_LIST   /**< The collection is an array */
 };
 
-
+/**
+   A scheduling context manages a collection of workers that can be
+   memorized using different data structures. Thus, a generic
+   structure is available in order to simplify the choice of its type.
+   Only the list data structure is available but further data
+   structures(like tree) implementations are foreseen.
+*/
 struct starpu_worker_collection
 {
+	/**
+	   The workerids managed by the collection
+	*/
 	int *workerids;
 	void *collection_private;
+	/**
+	   The number of workers in the collection
+	*/
 	unsigned nworkers;
 	void *unblocked_workers;
 	unsigned nunblocked_workers;
@@ -93,13 +118,37 @@ struct starpu_worker_collection
 	char present[STARPU_NMAXWORKERS];
 	char is_unblocked[STARPU_NMAXWORKERS];
 	char is_master[STARPU_NMAXWORKERS];
+	/**
+	   The type of structure
+	*/
 	enum starpu_worker_collection_type type;
+	/**
+	   Check if there is another element in collection
+	*/
 	unsigned (*has_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
+	/**
+	   Return the next element in the collection
+	*/
 	int (*get_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
+	/**
+	   Add a new element in the collection
+	*/
 	int (*add)(struct starpu_worker_collection *workers, int worker);
+	/**
+	   Remove an element from the collection
+	*/
 	int (*remove)(struct starpu_worker_collection *workers, int worker);
+	/**
+	   Initialize the collection
+	*/
 	void (*init)(struct starpu_worker_collection *workers);
+	/**
+	   Deinitialize the colection
+	*/
 	void (*deinit)(struct starpu_worker_collection *workers);
+	/**
+	   Initialize the cursor if there is one
+	*/
 	void (*init_iterator)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
 	void (*init_iterator_for_parallel_tasks)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it, struct starpu_task *task);
 };
@@ -107,22 +156,73 @@ struct starpu_worker_collection
 extern struct starpu_worker_collection worker_list;
 extern struct starpu_worker_collection worker_tree;
 
+/**
+   Return the number of workers (i.e. processing units executing
+   StarPU tasks). The return value should be at most \ref
+   STARPU_NMAXWORKERS.
+*/
 unsigned starpu_worker_get_count(void);
 unsigned starpu_combined_worker_get_count(void);
 unsigned starpu_worker_is_combined_worker(int id);
 
+/**
+   Return the number of CPUs controlled by StarPU. The return value
+   should be at most \ref STARPU_MAXCPUS.
+*/
 unsigned starpu_cpu_worker_get_count(void);
+
+/**
+   Return the number of CUDA devices controlled by StarPU. The return
+   value should be at most \ref STARPU_MAXCUDADEVS.
+*/
 unsigned starpu_cuda_worker_get_count(void);
+
+/**
+   Return the number of OpenCL devices controlled by StarPU. The
+   return value should be at most \ref STARPU_MAXOPENCLDEVS.
+*/
 unsigned starpu_opencl_worker_get_count(void);
+
+/**
+   Return the number of MIC workers controlled by StarPU.
+*/
 unsigned starpu_mic_worker_get_count(void);
+
+/**
+   Return the number of SCC devices controlled by StarPU. The return
+   value should be at most \ref STARPU_MAXSCCDEVS.
+*/
 unsigned starpu_scc_worker_get_count(void);
+
+/**
+   Return the number of MPI Master Slave workers controlled by StarPU.
+*/
 unsigned starpu_mpi_ms_worker_get_count(void);
 
+/**
+   Return the number of MIC devices controlled by StarPU. The return
+   value should be at most \ref STARPU_MAXMICDEVS.
+*/
 unsigned starpu_mic_device_get_count(void);
 
+/**
+   Return the identifier of the current worker, i.e the one associated
+   to the calling thread. The return value is either \c -1 if the
+   current context is not a StarPU worker (i.e. when called from the
+   application outside a task or a callback), or an integer between \c
+   0 and starpu_worker_get_count() - \c 1.
+*/
 int starpu_worker_get_id(void);
+
 unsigned _starpu_worker_get_id_check(const char *f, int l);
+
+/**
+   Similar to starpu_worker_get_id(), but abort when called from
+   outside a worker (i.e. when starpu_worker_get_id() would return \c
+   -1).
+*/
 unsigned starpu_worker_get_id_check(void);
+
 #define starpu_worker_get_id_check() _starpu_worker_get_id_check(__FILE__, __LINE__)
 int starpu_worker_get_bindid(int workerid);
 
@@ -132,20 +232,79 @@ int starpu_combined_worker_get_rank(void);
 
 void starpu_sched_find_all_worker_combinations(void);
 
+/**
+   Return the type of processing unit associated to the worker \p id.
+   The worker identifier is a value returned by the function
+   starpu_worker_get_id()). The return value indicates the
+   architecture of the worker: ::STARPU_CPU_WORKER for a CPU core,
+   ::STARPU_CUDA_WORKER for a CUDA device, and ::STARPU_OPENCL_WORKER
+   for a OpenCL device. The return value for an invalid identifier is
+   unspecified.
+*/
 enum starpu_worker_archtype starpu_worker_get_type(int id);
 
+/**
+   Return the number of workers of \p type. A positive (or
+   <c>NULL</c>) value is returned in case of success, <c>-EINVAL</c>
+   indicates that \p type is not valid otherwise.
+*/
 int starpu_worker_get_count_by_type(enum starpu_worker_archtype type);
 
+/**
+   Get the list of identifiers of workers of \p type. Fill the array
+   \p workerids with the identifiers of the \p workers. The argument
+   \p maxsize indicates the size of the array \p workerids. The return
+   value gives the number of identifiers that were put in the array.
+   <c>-ERANGE</c> is returned is \p maxsize is lower than the number
+   of workers with the appropriate type: in that case, the array is
+   filled with the \p maxsize first elements. To avoid such overflows,
+   the value of maxsize can be chosen by the means of the function
+   starpu_worker_get_count_by_type(), or by passing a value greater or
+   equal to \ref STARPU_NMAXWORKERS.
+*/
 unsigned starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, unsigned maxsize);
 
+/**
+   Return the identifier of the \p num -th worker that has the
+   specified \p type. If there is no such worker, -1 is returned.
+*/
 int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num);
 
+/**
+   Return the identifier of the worker that has the specified \p type
+   and device id \p devid (which may not be the n-th, if some devices
+   are skipped for instance). If there is no such worker, \c -1 is
+   returned.
+*/
 int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid);
 
+/**
+   Get the name of the worker \p id. StarPU associates a unique human
+   readable string to each processing unit. This function copies at
+   most the \p maxlen first bytes of the unique string associated to
+   the worker \p id into the \p dst buffer. The caller is responsible
+   for ensuring that \p dst is a valid pointer to a buffer of \p
+   maxlen bytes at least. Calling this function on an invalid
+   identifier results in an unspecified behaviour.
+*/
 void starpu_worker_get_name(int id, char *dst, size_t maxlen);
 
+/**
+   Display on \p output the list (if any) of all the workers of the
+   given \p type.
+*/
 void starpu_worker_display_names(FILE *output, enum starpu_worker_archtype type);
 
+/**
+   Return the device id of the worker \p id. The worker should be
+   identified with the value returned by the starpu_worker_get_id()
+   function. In the case of a CUDA worker, this device identifier is
+   the logical device identifier exposed by CUDA (used by the function
+   \c cudaGetDevice() for instance). The device identifier of a CPU
+   worker is the logical identifier of the core on which the worker
+   was bound; this identifier is either provided by the OS or by the
+   library <c>hwloc</c> in case it is available.
+*/
 int starpu_worker_get_devid(int id);
 
 int starpu_worker_get_mp_nodeid(int id);
@@ -158,6 +317,9 @@ unsigned starpu_worker_is_blocked_in_parallel(int workerid);
 
 unsigned starpu_worker_is_slave_somewhere(int workerid);
 
+/**
+   Return worker \p type as a string.
+*/
 char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type);
 
 int starpu_bindid_get_workerids(int bindid, int **workerids);
@@ -168,45 +330,140 @@ int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum star
 
 unsigned starpu_worker_get_sched_ctx_id_stream(unsigned stream_workerid);
 
+#ifdef STARPU_HAVE_HWLOC
+/**
+   If StarPU was compiled with \c hwloc support, return a duplicate of
+   the \c hwloc cpuset associated with the worker \p workerid. The
+   returned cpuset is obtained from a \c hwloc_bitmap_dup() function
+   call. It must be freed by the caller using \c hwloc_bitmap_free().
+*/
+hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid);
+/**
+   If StarPU was compiled with \c hwloc support, return the \c hwloc
+   object corresponding to  the worker \p workerid.
+*/
+hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid);
+#endif
+
+/**
+   Return the identifier of the memory node associated to the worker
+   identified by \p workerid.
+*/
+unsigned starpu_worker_get_memory_node(unsigned workerid);
+
+unsigned starpu_memory_nodes_get_count(void);
+int starpu_memory_node_get_name(unsigned node, char *name, size_t size);
+int starpu_memory_nodes_get_numa_count(void);
+
+/**
+   Return the identifier of the memory node associated to the NUMA
+   node identified by \p osid by the Operating System.
+*/
+int starpu_memory_nodes_numa_id_to_devid(int osid);
+
+/**
+   Return the Operating System identifier of the memory node whose
+   StarPU identifier is \p id.
+*/
+int starpu_memory_nodes_numa_devid_to_id(unsigned id);
+
+/**
+   Return the type of \p node as defined by ::starpu_node_kind. For
+   example, when defining a new data interface, this function should
+   be used in the allocation function to determine on which device the
+   memory needs to be allocated.
+*/
+enum starpu_node_kind starpu_node_get_kind(unsigned node);
+
+/**
+   @name Scheduling operations
+   @{
+*/
+
+/**
+   Return \c !0 if current worker has a scheduling operation in
+   progress, and \c 0 otherwise.
+*/
 int starpu_worker_sched_op_pending(void);
 
+/**
+   Allow other threads and workers to temporarily observe the current
+   worker state, even though it is performing a scheduling operation.
+   Must be called by a worker before performing a potentially blocking
+   call such as acquiring a mutex other than its own sched_mutex. This
+   function increases \c state_relax_refcnt from the current worker.
+   No more than <c>UINT_MAX-1</c> nested starpu_worker_relax_on()
+   calls should performed on the same worker. This function is
+   automatically called by  starpu_worker_lock() to relax the caller
+   worker state while attempting to lock the target worker.
+*/
 void starpu_worker_relax_on(void);
 
+/**
+   Must be called after a potentially blocking call is complete, to
+   restore the relax state in place before the corresponding
+   starpu_worker_relax_on(). Decreases \c state_relax_refcnt. Calls to
+   starpu_worker_relax_on() and starpu_worker_relax_off() must be
+   properly paired. This function is automatically called by
+   starpu_worker_unlock() after the target worker has been unlocked.
+*/
 void starpu_worker_relax_off(void);
 
+/**
+   Return \c !0 if the current worker \c state_relax_refcnt!=0 and \c
+   0 otherwise.
+*/
 int starpu_worker_get_relax_state(void);
 
+/**
+   Acquire the sched mutex of \p workerid. If the caller is a worker,
+   distinct from \p workerid, the caller worker automatically enters a
+   relax state while acquiring the target worker lock.
+*/
 void starpu_worker_lock(int workerid);
 
+/**
+   Attempt to acquire the sched mutex of \p workerid. Returns \c 0 if
+   successful, \c !0 if \p workerid sched mutex is held or the
+   corresponding worker is not in a relax state. If the caller is a
+   worker, distinct from \p workerid, the caller worker automatically
+   enters relax state if successfully acquiring the target worker lock.
+*/
 int starpu_worker_trylock(int workerid);
 
+/**
+   Release the previously acquired sched mutex of \p workerid. Restore
+   the relax state of the caller worker if needed.
+*/
 void starpu_worker_unlock(int workerid);
 
+/**
+   Acquire the current worker sched mutex.
+*/
 void starpu_worker_lock_self(void);
 
+/**
+   Release the current worker sched mutex.
+*/
 void starpu_worker_unlock_self(void);
 
-int starpu_wake_worker_relax(int workerid);
-
 #ifdef STARPU_WORKER_CALLBACKS
+/**
+   If StarPU was compiled with blocking drivers support and worker
+   callbacks support enabled, allow to specify an external resource
+   manager callback to be notified about workers going to sleep.
+*/
 void starpu_worker_set_going_to_sleep_callback(void (*callback)(unsigned workerid));
 
+/**
+   If StarPU was compiled with blocking drivers support and worker
+   callbacks support enabled, allow to specify an external resource
+   manager callback to be notified about workers waking-up.
+*/
 void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid));
 #endif
 
-#ifdef STARPU_HAVE_HWLOC
-hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid);
-hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid);
-#endif
-
-unsigned starpu_worker_get_memory_node(unsigned workerid);
-unsigned starpu_memory_nodes_get_count(void);
-int starpu_memory_node_get_name(unsigned node, char *name, size_t size);
-int starpu_memory_nodes_get_numa_count(void);
-int starpu_memory_nodes_numa_id_to_devid(int osid);
-int starpu_memory_nodes_numa_devid_to_id(unsigned id);
-
-enum starpu_node_kind starpu_node_get_kind(unsigned node);
+/** @} */
 
 /** @} */