ソースを参照

doc/doxygen: more documentation

Nathalie Furmento 12 年 前
コミット
b7f492749f

+ 80 - 0
doc/doxygen/chapters/api/cuda_extensions.doxy

@@ -0,0 +1,80 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup CUDA_Extensions CUDA Extensions
+
+\def STARPU_USE_CUDA
+\ingroup CUDA_Extensions
+\brief This macro is defined when StarPU has been installed with CUDA
+support. It should be used in your code to detect the availability of
+CUDA as shown in Full source code for the 'Scaling a Vector' example.
+
+\fn cudaStream_t starpu_cuda_get_local_stream(void)
+\ingroup CUDA_Extensions
+\brief This function gets the current worker’s CUDA stream. StarPU
+provides a stream for every CUDA device controlled by StarPU. This
+function is only provided for convenience so that programmers can
+easily use asynchronous operations within codelets without having to
+create a stream by hand. Note that the application is not forced to
+use the stream provided by starpu_cuda_get_local_stream() and may also
+create its own streams. Synchronizing with cudaThreadSynchronize() is
+allowed, but will reduce the likelihood of having all transfers
+overlapped.
+
+\fn const struct cudaDeviceProp * starpu_cuda_get_device_properties(unsigned workerid)
+\ingroup CUDA_Extensions
+\brief This function returns a pointer to device properties for worker
+\p workerid (assumed to be a CUDA worker).
+
+\fn void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status)
+\ingroup CUDA_Extensions
+\brief Report a CUDA error.
+
+\def STARPU_CUDA_REPORT_ERROR (cudaError_t status)
+\ingroup CUDA_Extensions
+\brief Calls starpu_cuda_report_error(), passing the current function, file and line position.
+
+\fn int starpu_cuda_copy_async_sync (void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind)
+\ingroup CUDA_Extensions
+\brief Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
+to the pointer \p dst_ptr on \p dst_node. The function first tries to
+copy the data asynchronous (unless stream is <c>NULL</c>). If the
+asynchronous copy fails or if stream is <c>NULL</c>, it copies the
+data synchronously. The function returns <c>-EAGAIN</c> if the
+asynchronous launch was successfull. It returns 0 if the synchronous
+copy was successful, or fails otherwise.
+
+\fn void starpu_cuda_set_device(unsigned devid)
+\ingroup CUDA_Extensions
+\brief Calls cudaSetDevice(devid) or cudaGLSetGLDevice(devid),
+according to whether \p devid is among the field
+starpu_conf::cuda_opengl_interoperability.
+
+\fn void starpu_cublas_init(void)
+\ingroup CUDA_Extensions
+\brief This function initializes CUBLAS on every CUDA device. The
+CUBLAS library must be initialized prior to any CUBLAS call. Calling
+starpu_cublas_init() will initialize CUBLAS on every CUDA device
+controlled by StarPU. This call blocks until CUBLAS has been properly
+initialized on every device.
+
+\fn void starpu_cublas_shutdown(void)
+\ingroup CUDA_Extensions
+\brief This function synchronously deinitializes the CUBLAS library on
+every CUDA device.
+
+\fn void starpu_cublas_report_error(const char *func, const char *file, int line, cublasStatus status)
+\ingroup CUDA_Extensions
+\brief Report a cublas error.
+
+\def STARPU_CUBLAS_REPORT_ERROR (cublasStatus status)
+\ingroup CUDA_Extensions
+\brief Calls starpu_cublas_report_error(), passing the current
+function, file and line position.
+
+*/

+ 113 - 0
doc/doxygen/chapters/api/explicit_dependencies.doxy

@@ -0,0 +1,113 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup Explicit_Dependencies Explicit Dependencies
+
+\fn void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
+\ingroup Explicit_Dependencies
+\brief Declare task dependencies between a \p task and an array of
+tasks of length \p ndeps. This function must be called prior to the
+submission of the task, but it may called after the submission or the
+execution of the tasks in the array, provided the tasks are still
+valid (i.e. they were not automatically destroyed). Calling this
+function on a task that was already submitted or with an entry of
+\p task_array that is no longer a valid task results in an undefined
+behaviour. If \p ndeps is 0, no dependency is added. It is possible to
+call starpu_task_declare_deps_array() several times on the same task,
+in this case, the dependencies are added. It is possible to have
+redundancy in the task dependencies.
+
+\typedef starpu_tag_t
+\ingroup Explicit_Dependencies
+\brief This type defines a task logical identifer. It is possible to
+associate a task with a unique <em>tag</em> chosen by the application,
+and to express dependencies between tasks by the means of those tags.
+To do so, fill the field starpu_task::tag_id with a tag number (can be
+arbitrary) and set the field starpu_task::use_tag to 1. If
+starpu_tag_declare_deps() is called with this tag number, the task
+will not be started until the tasks which holds the declared
+dependency tags are completed.
+
+\fn void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...)
+\ingroup Explicit_Dependencies
+\brief Specify the dependencies of the task identified by tag \p id.
+The first argument specifies the tag which is configured, the second
+argument gives the number of tag(s) on which \p id depends. The
+following arguments are the tags which have to be terminated to unlock
+the task. This function must be called before the associated task is
+submitted to StarPU with starpu_task_submit().
+
+<b>WARNING! Use with caution</b>. Because of the variable arity of
+starpu_tag_declare_deps(), note that the last arguments must be of
+type starpu_tag_t : constant values typically need to be explicitly
+casted. Otherwise, due to integer sizes and argument passing on the
+stack, the C compiler might consider the tag <c>0x200000003</c>
+instead of <c>0x2</c> and <c>0x3</c> when calling
+<c>starpu_tag_declare_deps(0x1, 2, 0x2, 0x3)</c>. Using the
+starpu_tag_declare_deps_array() function avoids this hazard.
+
+\code{.c}
+/*  Tag 0x1 depends on tags 0x32 and 0x52 */
+starpu_tag_declare_deps((starpu_tag_t)0x1, 2, (starpu_tag_t)0x32, (starpu_tag_t)0x52);
+\endcode
+
+\fn void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array)
+\ingroup Explicit_Dependencies
+\brief This function is similar to starpu_tag_declare_deps(), except
+that its does not take a variable number of arguments but an array of
+tags of size \p ndeps.
+
+\code{.c}
+/*  Tag 0x1 depends on tags 0x32 and 0x52 */
+starpu_tag_t tag_array[2] = {0x32, 0x52};
+starpu_tag_declare_deps_array((starpu_tag_t)0x1, 2, tag_array);
+\endcode
+
+\fn int starpu_tag_wait(starpu_tag_t id)
+\ingroup Explicit_Dependencies
+\brief This function blocks until the task associated to tag \p id has
+been executed. This is a blocking call which must therefore not be
+called within tasks or callbacks, but only from the application
+directly. It is possible to synchronize with the same tag multiple
+times, as long as the starpu_tag_remove() function is not called. Note
+that it is still possible to synchronize with a tag associated to a
+task for which the strucuture starpu_task was freed (e.g. if the field
+starpu_task::destroy was enabled).
+
+\fn int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id)
+\ingroup Explicit_Dependencies
+\brief This function is similar to starpu_tag_wait() except that it
+blocks until all the \p ntags tags contained in the array \p id are
+terminated.
+
+\fn void starpu_tag_restart(starpu_tag_t id)
+\ingroup Explicit_Dependencies
+\brief This function can be used to clear the <em>already
+notified</em> status of a tag which is not associated with a task.
+Before that, calling starpu_tag_notify_from_apps() again will not
+notify the successors. After that, the next call to
+starpu_tag_notify_from_apps() will notify the successors.
+
+\fn void starpu_tag_remove(starpu_tag_t id)
+\ingroup Explicit_Dependencies
+\brief This function releases the resources associated to tag \p id.
+It can be called once the corresponding task has been executed and
+when there is no other tag that depend on this tag anymore.
+
+\fn void starpu_tag_notify_from_apps (starpu_tag_t id)
+\ingroup Explicit_Dependencies
+\brief This function explicitly unlocks tag \p id. It may be useful in
+the case of applications which execute part of their computation
+outside StarPU tasks (e.g. third-party libraries). It is also provided
+as a convenient tool for the programmer, for instance to entirely
+construct the task DAG before actually giving StarPU the opportunity
+to execute the tasks. When called several times on the same tag,
+notification will be done only on first call, thus implementing "OR"
+dependencies, until the tag is restarted using starpu_tag_restart().
+
+*/

+ 63 - 0
doc/doxygen/chapters/api/fft_support.doxy

@@ -0,0 +1,63 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup FFT_Support FFT Support
+
+\fn void * starpufft_malloc(size_t n)
+\ingroup FFT_Support
+Allocates memory for \p n bytes. This is preferred over malloc(),
+since it allocates pinned memory, which allows overlapped transfers.
+
+\fn void * starpufft_free(void *p)
+\ingroup FFT_Support
+Release memory previously allocated.
+
+\fn struct starpufft_plan * starpufft_plan_dft_1d(int n, int sign, unsigned flags)
+\ingroup FFT_Support
+Initializes a plan for 1D FFT of size \p n. \p sign can be STARPUFFT_FORWARD
+or STARPUFFT_INVERSE. \p flags must be 0.
+
+\fn struct starpufft_plan * starpufft_plan_dft_2d(int n, int m, int sign, unsigned flags)
+\ingroup FFT_Support
+Initializes a plan for 2D FFT of size (\p n, \p m). \p sign can be
+STARPUFFT_FORWARD or STARPUFFT_INVERSE. flags must be \p 0.
+
+\fn struct starpu_task * starpufft_start(starpufft_plan p, void *in, void *out)
+\ingroup FFT_Support
+Start an FFT previously planned as \p p, using \p in and \p out as
+input and output. This only submits the task and does not wait for it.
+The application should call starpufft_cleanup() to unregister the
+
+\fn struct starpu_task * starpufft_start_handle(starpufft_plan p, starpu_data_handle_t in, starpu_data_handle_t out)
+\ingroup FFT_Support
+Start an FFT previously planned as \p p, using data handles \p in and
+\p out as input and output (assumed to be vectors of elements of the
+expected types). This only submits the task and does not wait for it.
+
+\fn void starpufft_execute(starpufft_plan p, void *in, void *out)
+\ingroup FFT_Support
+Execute an FFT previously planned as \p p, using \p in and \p out as
+input and output. This submits and waits for the task.
+
+\fn void starpufft_execute_handle(starpufft_plan p, starpu_data_handle_t in, starpu_data_handle_t out)
+\ingroup FFT_Support
+Execute an FFT previously planned as \p p, using data handles \p in
+and \p out as input and output (assumed to be vectors of elements of
+the expected types). This submits and waits for the task.
+
+\fn void starpufft_cleanup(starpufft_plan p)
+\ingroup FFT_Support
+Releases data for plan \p p, in the starpufft_start() case.
+
+\fn void starpufft_destroy_plan(starpufft_plan p)
+\ingroup FFT_Support
+Destroys plan \p p, i.e. release all CPU (fftw) and GPU (cufft)
+resources.
+
+*/
+

+ 27 - 0
doc/doxygen/chapters/api/fxt_support.doxy

@@ -0,0 +1,27 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup FxT_Support FxT Support
+
+\fn void starpu_fxt_start_profiling(void)
+\ingroup FxT_Support
+Start recording the trace. The trace is by default started from
+starpu_init() call, but can be paused by using
+starpu_fxt_stop_profiling(), in which case
+starpu_fxt_start_profiling() should be called to resume recording
+events.
+
+\fn void starpu_fxt_stop_profiling(void)
+\ingroup FxT_Support
+Stop recording the trace. The trace is by default stopped when calling
+starpu_shutdown(). starpu_fxt_stop_profiling() can however be used to
+stop it earlier. starpu_fxt_start_profiling() can then be called to
+start recording it again, etc.
+
+*/
+

+ 42 - 0
doc/doxygen/chapters/api/implicit_dependencies.doxy

@@ -0,0 +1,42 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup Implicit_Data_Dependencies Implicit Data Dependencies
+
+\brief In this section, we describe how StarPU makes it possible to
+insert implicit task dependencies in order to enforce sequential data
+consistency. When this data consistency is enabled on a specific data
+handle, any data access will appear as sequentially consistent from
+the application. For instance, if the application submits two tasks
+that access the same piece of data in read-only mode, and then a third
+task that access it in write mode, dependencies will be added between
+the two first tasks and the third one. Implicit data dependencies are
+also inserted in the case of data accesses from the application.
+
+\fn starpu_data_set_default_sequential_consistency_flag(unsigned flag)
+\ingroup Implicit_Data_Dependencies
+\brief Set the default sequential consistency flag. If a non-zero
+value is passed, a sequential data consistency will be enforced for
+all handles registered after this function call, otherwise it is
+disabled. By default, StarPU enables sequential data consistency. It
+is also possible to select the data consistency mode of a specific
+data handle with the function
+starpu_data_set_sequential_consistency_flag().
+
+\fn unsigned starpu_data_get_default_sequential_consistency_flag(void)
+\ingroup Implicit_Data_Dependencies
+\brief Return the default sequential consistency flag
+
+\fn void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag)
+\ingroup Implicit_Data_Dependencies
+\brief Set the data consistency mode associated to a data handle. The
+consistency mode set using this function has the priority over the
+default mode which can be set with
+starpu_data_set_default_sequential_consistency_flag().
+
+*/

+ 47 - 0
doc/doxygen/chapters/api/lower_bound.doxy

@@ -0,0 +1,47 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup Theoretical_lower_bound_on_execution_time Theoretical lower bound on execution time
+
+\fn void starpu_bound_start (int deps, int prio)
+\ingroup Theoretical_lower_bound_on_execution_time
+\brief Start recording tasks (resets stats). \p deps tells whether
+dependencies should be recorded too (this is quite expensive)
+
+\fn void starpu_bound_stop (void)
+\ingroup Theoretical_lower_bound_on_execution_time
+\brief Stop recording tasks
+
+\fn void starpu_bound_print_dot (FILE *output)
+\ingroup Theoretical_lower_bound_on_execution_time
+\brief Print the DAG that was recorded
+
+\fn void starpu_bound_compute (double *res, double *integer_res, int integer)
+\ingroup Theoretical_lower_bound_on_execution_time
+\brief Get theoretical upper bound (in ms) (needs glpk support
+detected by configure script). It returns 0 if some performance models
+are not calibrated.
+
+\fn void starpu_bound_print_lp (FILE *output)
+\ingroup Theoretical_lower_bound_on_execution_time
+\brief Emit the Linear Programming system on \p output for the recorded
+tasks, in the lp format
+
+\fn void starpu_bound_print_mps (FILE *output)
+\ingroup Theoretical_lower_bound_on_execution_time
+\brief Emit the Linear Programming system on \p output for the recorded
+tasks, in the mps format
+
+\fn void starpu_bound_print (FILE *output, int integer)
+\ingroup Theoretical_lower_bound_on_execution_time
+\brief Emit statistics of actual execution vs theoretical upper bound.
+\p integer permits to choose between integer solving (which takes a
+long time but is correct), and relaxed solving (which provides an
+approximate solution).
+
+*/

+ 37 - 0
doc/doxygen/chapters/api/misc_helpers.doxy

@@ -0,0 +1,37 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup Miscellaneous_helpers Miscellaneous helpers
+
+\fn int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg)
+\ingroup Miscellaneous_helpers
+Copy the content of \p src_handle into \p dst_handle. The parameter \p
+asynchronous indicates whether the function should block or not. In
+the case of an asynchronous call, it is possible to synchronize with
+the termination of this operation either by the means of implicit
+dependencies (if enabled) or by calling starpu_task_wait_for_all(). If
+\p callback_func is not NULL, this callback function is executed after
+the handle has been copied, and it is given the pointer \p pointer
+callback_arg as argument.
+
+\fn void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where)
+\ingroup Miscellaneous_helpers
+This function executes the given function on a subset of workers. When
+calling this method, the offloaded function \p func is executed by
+every StarPU worker that may execute the function. The argument \p arg
+is passed to the offloaded function. The argument \p where specifies
+on which types of processing units the function should be executed.
+Similarly to the field starpu_codelet::where, it is possible to
+specify that the function should be executed on every CUDA device and
+every CPU by passing ::STARPU_CPU|::STARPU_CUDA. This function blocks
+until the function has been executed on every appropriate processing
+units, so that it may not be called from a callback function for
+instance.
+
+*/
+

+ 273 - 0
doc/doxygen/chapters/api/mpi.doxy

@@ -0,0 +1,273 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup MPI_Support MPI Support
+
+@name Initialisation
+\ingroup MPI_Support
+
+\fn int starpu_mpi_init (int *argc, char ***argv, int initialize_mpi)
+\ingroup MPI_Support
+Initializes the starpumpi library. \p initialize_mpi indicates if MPI
+should be initialized or not by StarPU. If the value is not 0, MPI
+will be initialized by calling <c>MPI_Init_Thread(argc, argv,
+MPI_THREAD_SERIALIZED, ...)</c>.
+
+\fn int starpu_mpi_initialize (void)
+\deprecated
+\ingroup MPI_Support
+This function has been made deprecated. One should use instead the
+function starpu_mpi_init(). This function does not call MPI_Init(), it
+should be called beforehand.
+
+\fn int starpu_mpi_initialize_extended (int *rank, int *world_size)
+\deprecated
+\ingroup MPI_Support
+This function has been made deprecated. One should use instead the
+function starpu_mpi_init(). MPI will be initialized by starpumpi by
+calling <c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED,
+...)</c>.
+
+\fn int starpu_mpi_shutdown (void)
+\ingroup MPI_Support
+Cleans the starpumpi library. This must be called between calling
+starpu_mpi functions and starpu_shutdown(). MPI_Finalize() will be
+called if StarPU-MPI has been initialized by starpu_mpi_init().
+
+\fn void starpu_mpi_comm_amounts_retrieve (size_t *comm_amounts)
+\ingroup MPI_Support
+Retrieve the current amount of communications from the current node in
+the array \p comm_amounts which must have a size greater or equal to
+the world size. Communications statistics must be enabled (see
+STARPU_COMM_STATS).
+
+@name Communication
+\ingroup MPI_Support
+
+\fn int starpu_mpi_send (starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm)
+\ingroup MPI_Support
+Performs a standard-mode, blocking send of \p data_handle to the node
+\p dest using the message tag \p mpi_tag within the communicator \p
+comm.
+
+\fn int starpu_mpi_recv (starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, MPI_Status *status)
+\ingroup MPI_Support
+Performs a standard-mode, blocking receive in \p data_handle from the
+node \p source using the message tag \p mpi_tag within the
+communicator \p comm.
+
+\fn int starpu_mpi_isend (starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm)
+\ingroup MPI_Support
+Posts a standard-mode, non blocking send of \p data_handle to the node
+\p dest using the message tag \p mpi_tag within the communicator \p
+comm. After the call, the pointer to the request \p req can be used to
+test or to wait for the completion of the communication.
+
+\fn int starpu_mpi_irecv (starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int mpi_tag, MPI_Comm comm)
+\ingroup MPI_Support
+Posts a nonblocking receive in \p data_handle from the node \p source
+using the message tag \p mpi_tag within the communicator \p comm.
+After the call, the pointer to the request \p req can be used to test
+or to wait for the completion of the communication.
+
+\fn int starpu_mpi_isend_detached (starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+\ingroup MPI_Support
+Posts a standard-mode, non blocking send of \p data_handle to the node
+\p dest using the message tag \p mpi_tag within the communicator \p
+comm. On completion, the \p callback function is called with the
+argument \p arg.
+Similarly to the pthread detached functionality, when a detached
+communication completes, its resources are automatically released back
+to the system, there is no need to test or to wait for the completion
+of the request.
+
+\fn int starpu_mpi_irecv_detached (starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+\ingroup MPI_Support
+Posts a nonblocking receive in \p data_handle from the node \p source
+using the message tag \p mpi_tag within the communicator \p comm. On
+completion, the \p callback function is called with the argument \p
+arg.
+Similarly to the pthread detached functionality, when a detached
+communication completes, its resources are automatically released back
+to the system, there is no need to test or to wait for the completion
+of the request.
+
+\fn int starpu_mpi_wait (starpu_mpi_req *req, MPI_Status *status)
+\ingroup MPI_Support
+Returns when the operation identified by request \p req is complete.
+
+\fn int starpu_mpi_test (starpu_mpi_req *req, int *flag, MPI_Status *status)
+\ingroup MPI_Support
+If the operation identified by \p req is complete, set \p flag to 1.
+The \p status object is set to contain information on the completed
+operation.
+
+\fn int starpu_mpi_barrier (MPI_Comm comm)
+\ingroup MPI_Support
+Blocks the caller until all group members of the communicator \p comm
+have called it.
+
+\fn int starpu_mpi_isend_detached_unlock_tag (starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)
+\ingroup MPI_Support
+Posts a standard-mode, non blocking send of \p data_handle to the node
+\p dest using the message tag \p mpi_tag within the communicator \p
+comm. On completion, \p tag is unlocked.
+
+\fn int starpu_mpi_irecv_detached_unlock_tag (starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)
+\ingroup MPI_Support
+Posts a nonblocking receive in \p data_handle from the node \p source
+using the message tag \p mpi_tag within the communicator \p comm. On
+completion, \p tag is unlocked.
+
+\fn int starpu_mpi_isend_array_detached_unlock_tag (unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)
+\ingroup MPI_Support
+Posts \p array_size standard-mode, non blocking send. Each post sends
+the n-th data of the array \p data_handle to the n-th node of the
+array \p dest using the n-th message tag of the array \p mpi_tag
+within the n-th communicator of the array \p comm. On completion of
+the all the requests, \p tag is unlocked.
+
+\fn int starpu_mpi_irecv_array_detached_unlock_tag (unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)
+\ingroup MPI_Support
+Posts \p array_size nonblocking receive. Each post receives in the n-th
+data of the array \p data_handle from the n-th node of the array \p
+source using the n-th message tag of the array \p mpi_tag within the
+n-th communicator of the array \p comm. On completion of the all the
+requests, \p tag is unlocked.
+
+@name Communication Cache
+\ingroup MPI_Support
+
+\fn void starpu_mpi_cache_flush (MPI_Comm comm, starpu_data_handle_t data_handle)
+\ingroup MPI_Support
+Clear the send and receive communication cache for the data
+\p data_handle. The function has to be called synchronously by all the
+MPI nodes. The function does nothing if the cache mechanism is
+disabled (see STARPU_MPI_CACHE).
+
+\fn void starpu_mpi_cache_flush_all_data (MPI_Comm comm)
+\ingroup MPI_Support
+Clear the send and receive communication cache for all data. The
+function has to be called synchronously by all the MPI nodes. The
+function does nothing if the cache mechanism is disabled (see
+STARPU_MPI_CACHE).
+
+@name MPI Insert Task
+\ingroup MPI_Support
+
+\fn int starpu_data_set_tag (starpu_data_handle_t handle, int tag)
+\ingroup MPI_Support
+Tell StarPU-MPI which MPI tag to use when exchanging the data.
+
+\fn int starpu_data_get_tag (starpu_data_handle_t handle)
+\ingroup MPI_Support
+Returns the MPI tag to be used when exchanging the data.
+
+\fn int starpu_data_set_rank (starpu_data_handle_t handle, int rank)
+\ingroup MPI_Support
+Tell StarPU-MPI which MPI node "owns" a given data, that is, the node
+which will always keep an up-to-date value, and will by default
+execute tasks which write to it.
+
+\fn int starpu_data_get_rank (starpu_data_handle_t handle)
+\ingroup MPI_Support
+Returns the last value set by starpu_data_set_rank().
+
+\def STARPU_EXECUTE_ON_NODE
+\ingroup MPI_Support
+this macro is used when calling starpu_mpi_insert_task(), and must be
+followed by a integer value which specified the node on which to
+execute the codelet.
+
+\def STARPU_EXECUTE_ON_DATA
+\ingroup MPI_Support
+this macro is used when calling starpu_mpi_insert_task(), and must be
+followed by a data handle to specify that the node owning the given
+data will execute the codelet.
+
+\fn int starpu_mpi_insert_task (MPI_Comm comm, struct starpu_codelet *codelet, ...)
+\ingroup MPI_Support
+Create and submit a task corresponding to codelet with the following
+arguments. The argument list must be zero-terminated.
+
+The arguments following the codelets are the same types as for the
+function starpu_insert_task(). The extra argument
+::STARPU_EXECUTE_ON_NODE followed by an integer allows to specify the
+MPI node to execute the codelet. It is also possible to specify that
+the node owning a specific data will execute the codelet, by using
+::STARPU_EXECUTE_ON_DATA followed by a data handle.
+
+The internal algorithm is as follows:
+<ol>
+<li>
+        Find out which MPI node is going to execute the codelet.
+        <ul>
+            <li>If there is only one node owning data in W mode, it will be selected;
+            <li>If there is several nodes owning data in W node, the one selected will be the one having the least data in R mode so as to minimize the amount of data to be transfered;
+            <li>The argument ::STARPU_EXECUTE_ON_NODE followed by an integer can be used to specify the node;
+            <li>The argument ::STARPU_EXECUTE_ON_DATA followed by a data handle can be used to specify that the node owing the given data will execute the codelet.
+        </ul>
+</li>
+<li>
+        Send and receive data as requested. Nodes owning data which need to be read by the task are sending them to the MPI node which will execute it. The latter receives them.
+</li>
+<li>
+        Execute the codelet. This is done by the MPI node selected in the 1st step of the algorithm.
+</li>
+<li>
+        If several MPI nodes own data to be written to, send written data back to their owners.
+</li>
+</ol>
+
+The algorithm also includes a communication cache mechanism that
+allows not to send data twice to the same MPI node, unless the data
+has been modified. The cache can be disabled (see STARPU_MPI_CACHE).
+
+\fn void starpu_mpi_get_data_on_node (MPI_Comm comm, starpu_data_handle_t data_handle, int node)
+\ingroup MPI_Support
+Transfer data \p data_handle to MPI node \p node, sending it from its
+owner if needed. At least the target node and the owner have to call
+the function.
+
+\fn void starpu_mpi_get_data_on_node_detached (MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg)
+\ingroup MPI_Support
+Transfer data \p data_handle to MPI node \p node, sending it from its
+owner if needed. At least the target node and the owner have to call
+the function. On reception, the \p callback function is called with
+the argument \p arg.
+
+@name Collective Operations
+\ingroup MPI_Support
+
+\fn void starpu_mpi_redux_data (MPI_Comm comm, starpu_data_handle_t data_handle)
+\ingroup MPI_Support
+Perform a reduction on the given data. All nodes send the data to its
+owner node which will perform a reduction.
+
+\fn int starpu_mpi_scatter_detached (starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
+\ingroup MPI_Support
+Scatter data among processes of the communicator based on the
+ownership of the data. For each data of the array \p data_handles, the
+process \p root sends the data to the process owning this data. Processes
+receiving data must have valid data handles to receive them. On
+completion of the collective communication, the \p scallback function is
+called with the argument \p sarg on the process \p root, the \p
+rcallback function is called with the argument \p rarg on any other
+process.
+
+\fn int starpu_mpi_gather_detached (starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
+\ingroup MPI_Support
+Gather data from the different processes of the communicator onto the
+process \p root. Each process owning data handle in the array
+\p data_handles will send them to the process \p root. The process \p
+root must have valid data handles to receive the data. On completion
+of the collective communication, the \p rcallback function is called
+with the argument \p rarg on the process root, the \p scallback
+function is called with the argument \p sarg on any other process.
+
+*/

+ 250 - 0
doc/doxygen/chapters/api/opencl_extensions.doxy

@@ -0,0 +1,250 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup OpenCL_Extensions OpenCL Extensions
+
+\def STARPU_USE_OPENCL
+\ingroup OpenCL_Extensions
+\brief This macro is defined when StarPU has been installed with
+OpenCL support. It should be used in your code to detect the
+availability of OpenCL as shown in Full source code for the 'Scaling a
+Vector' example.
+
+@name Writing OpenCL kernels
+\ingroup OpenCL_Extensions
+
+\fn void starpu_opencl_get_context(int devid, cl_context *context)
+\ingroup OpenCL_Extensions
+Places the OpenCL context of the device designated by \p devid
+into \p context.
+
+\fn void starpu_opencl_get_device(int devid, cl_device_id *device)
+\ingroup OpenCL_Extensions
+Places the cl_device_id corresponding to \p devid in \p device.
+
+\fn void starpu_opencl_get_queue(int devid, cl_command_queue *queue)
+\ingroup OpenCL_Extensions
+Places the command queue of the device designated by \p devid
+into \p queue.
+
+\fn void starpu_opencl_get_current_context(cl_context *context)
+\ingroup OpenCL_Extensions
+Return the context of the current worker.
+
+\fn void starpu_opencl_get_current_queue(cl_command_queue *queue)
+\ingroup OpenCL_Extensions
+Return the computation kernel command queue of the current
+worker.
+
+\fn int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...)
+\ingroup OpenCL_Extensions
+Sets the arguments of a given kernel. The list of arguments
+must be given as <c>(size_t size_of_the_argument, cl_mem *
+pointer_to_the_argument)</c>. The last argument must be 0. Returns the
+number of arguments that were successfully set. In case of failure,
+returns the id of the argument that could not be set and err is set to
+the error returned by OpenCL. Otherwise, returns the number of
+arguments that were set.
+
+Here an example:
+\code{.c}
+int n;
+cl_int err;
+cl_kernel kernel;
+n = starpu_opencl_set_kernel_args(&err, 2, &kernel,
+                                  sizeof(foo), &foo,
+                                  sizeof(bar), &bar,
+                                  0);
+if (n != 2)
+   fprintf(stderr, "Error : %d\n", err);
+\endcode
+
+@name Compiling OpenCL kernels
+\ingroup OpenCL_Extensions
+
+Source codes for OpenCL kernels can be stored in a file or in a
+string. StarPU provides functions to build the program executable for
+each available OpenCL device as a cl_program object. This program
+executable can then be loaded within a specific queue as explained in
+the next section. These are only helpers, Applications can also fill a
+starpu_opencl_program array by hand for more advanced use (e.g.
+different programs on the different OpenCL devices, for relocation
+purpose for instance).
+
+\struct starpu_opencl_program
+\ingroup OpenCL_Extensions
+\brief Stores the OpenCL programs as compiled for the different OpenCL
+devices.
+\var starpu_opencl_program::programs
+Stores each program for each OpenCL device.
+
+\fn int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char* build_options)
+\ingroup OpenCL_Extensions
+This function compiles an OpenCL source code stored in a file.
+
+\fn int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char* build_options)
+\ingroup OpenCL_Extensions
+This function compiles an OpenCL source code stored in a string.
+
+\fn int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs)
+\ingroup OpenCL_Extensions
+This function unloads an OpenCL compiled code.
+
+\fn void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source)
+\ingroup OpenCL_Extensions
+Store the contents of the file \p source_file_name in the buffer
+\p opencl_program_source. The file \p source_file_name can be located in the
+current directory, or in the directory specified by the environment
+variable STARPU_OPENCL_PROGRAM_DIR (see STARPU_OPENCL_PROGRAM_DIR), or
+in the directory <c>share/starpu/opencl</c> of the installation
+directory of StarPU, or in the source directory of StarPU. When the
+file is found, \p located_file_name is the full name of the file as it
+has been located on the system, \p located_dir_name the directory
+where it has been located. Otherwise, they are both set to the empty
+string.
+
+\fn int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char * build_options)
+\ingroup OpenCL_Extensions
+Compile the OpenCL kernel stored in the file \p source_file_name
+with the given options \p build_options and stores the result in the
+directory <c>$STARPU_HOME/.starpu/opencl</c> with the same filename as
+\p source_file_name. The compilation is done for every OpenCL device,
+and the filename is suffixed with the vendor id and the device id of
+the OpenCL device.
+
+\fn int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char*build_options)
+\ingroup OpenCL_Extensions
+Compile the OpenCL kernel in the string \p opencl_program_source
+with the given options \p build_options and stores the result in the
+directory <c>$STARPU_HOME/.starpu/opencl</c> with the filename \p
+file_name. The compilation is done for every OpenCL device, and the
+filename is suffixed with the vendor id and the device id of the
+OpenCL device.
+
+\fn int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs)
+\ingroup OpenCL_Extensions
+Compile the binary OpenCL kernel identified with \p kernel_id.
+For every OpenCL device, the binary OpenCL kernel will be loaded from
+the file
+<c>$STARPU_HOME/.starpu/opencl/\<kernel_id\>.\<device_type\>.vendor_id_\<vendor_id\>_device_id_\<device_id\></c>.
+
+@name Loading OpenCL kernels
+\ingroup OpenCL_Extensions
+
+\fn int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid)
+\ingroup OpenCL_Extensions
+Create a kernel \p kernel for device \p devid, on its computation
+command queue returned in \p queue, using program \p opencl_programs
+and name \p kernel_name.
+
+\fn int starpu_opencl_release_kernel(cl_kernel kernel)
+\ingroup OpenCL_Extensions
+Release the given \p kernel, to be called after kernel execution.
+
+@name OpenCL statistics
+
+\fn int starpu_opencl_collect_stats(cl_event event)
+\ingroup OpenCL_Extensions
+This function allows to collect statistics on a kernel execution.
+After termination of the kernels, the OpenCL codelet should call this
+function to pass it the even returned by clEnqueueNDRangeKernel, to
+let StarPU collect statistics about the kernel execution (used cycles,
+consumed power).
+
+@name OpenCL utilities
+\ingroup OpenCL_Extensions
+
+\fn const char * starpu_opencl_error_string(cl_int status)
+\ingroup OpenCL_Extensions
+Return the error message in English corresponding to \p status, an OpenCL
+error code.
+
+\fn void starpu_opencl_display_error(const char *func, const char *file, int line, const char *msg, cl_int status)
+\ingroup OpenCL_Extensions
+Given a valid error status, prints the corresponding error message on
+stdout, along with the given function name \p func, the given filename
+\p file, the given line number \p line and the given message \p msg.
+
+\def STARPU_OPENCL_DISPLAY_ERROR(cl_int status)
+\ingroup OpenCL_Extensions
+Call the function starpu_opencl_display_error() with the given error
+\p status, the current function name, current file and line number,
+and a empty message.
+
+\fn void starpu_opencl_report_error(const char *func, const char *file, int line, const char *msg, cl_int status)
+\ingroup OpenCL_Extensions
+Call the function starpu_opencl_display_error() and abort.
+
+\def STARPU_OPENCL_REPORT_ERROR (cl_int status)
+\ingroup OpenCL_Extensions
+Call the function starpu_opencl_report_error() with the given error \p
+status, with the current function name, current file and line number,
+and a empty message.
+
+\def STARPU_OPENCL_REPORT_ERROR_WITH_MSG(const char *msg, cl_int status)
+\ingroup OpenCL_Extensions
+Call the function starpu_opencl_report_error() with the given \p msg
+and the given error \p status, with the current function name, current
+file and line number.
+
+\fn cl_int starpu_opencl_allocate_memory(cl_mem *addr, size_t size, cl_mem_flags flags)
+\ingroup OpenCL_Extensions
+Allocate \p size bytes of memory, stored in \p addr. \p flags must be a valid
+combination of cl_mem_flags values.
+
+\fn cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret)
+\ingroup OpenCL_Extensions
+Copy \p size bytes from the given \p ptr on RAM \p src_node to the
+given \p buffer on OpenCL \p dst_node. \p offset is the offset, in
+bytes, in \p buffer. if \p event is <c>NULL</c>, the copy is
+synchronous, i.e the queue is synchronised before returning. If not
+<c>NULL</c>, \p event can be used after the call to wait for this
+particular copy to complete. This function returns <c>CL_SUCCESS</c>
+if the copy was successful, or a valid OpenCL error code otherwise.
+The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
+asynchronous launch was successful, or to 0 if \p event was
+<c>NULL</c>.
+
+\fn cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret)
+\ingroup OpenCL_Extensions
+Copy \p size bytes asynchronously from the given \p buffer on OpenCL
+\p src_node to the given \p ptr on RAM \p dst_node. \p offset is the
+offset, in bytes, in \p buffer. if \p event is <c>NULL</c>, the copy
+is synchronous, i.e the queue is synchronised before returning. If not
+<c>NULL</c>, \p event can be used after the call to wait for this
+particular copy to complete. This function returns <c>CL_SUCCESS</c>
+if the copy was successful, or a valid OpenCL error code otherwise.
+The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
+asynchronous launch was successful, or to 0 if \p event was
+<c>NULL</c>.
+
+\fn cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, size_t src_offset, cl_mem dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event, int *ret)
+\ingroup OpenCL_Extensions
+Copy \p size bytes asynchronously from byte offset \p src_offset of \p
+src on OpenCL \p src_node to byte offset \p dst_offset of \p dst on
+OpenCL \p dst_node. if \p event is <c>NULL</c>, the copy is
+synchronous, i.e. the queue is synchronised before returning. If not
+<c>NULL</c>, \p event can be used after the call to wait for this
+particular copy to complete. This function returns <c>CL_SUCCESS</c>
+if the copy was successful, or a valid OpenCL error code otherwise.
+The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
+asynchronous launch was successful, or to 0 if \p event was
+<c>NULL</c>.
+
+\fn cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event)
+\ingroup OpenCL_Extensions
+Copy \p size bytes from byte offset \p src_offset of \p src on \p
+src_node to byte offset \p dst_offset of \p dst on \p dst_node. if \p
+event is <c>NULL</c>, the copy is synchronous, i.e. the queue is
+synchronised before returning. If not <c>NULL</c>, \p event can be
+used after the call to wait for this particular copy to complete. The
+function returns <c>-EAGAIN</c> if the asynchronous launch was
+successfull. It returns 0 if the synchronous copy was successful, or
+fails otherwise.
+
+*/

+ 95 - 0
doc/doxygen/chapters/api/parallel_tasks.doxy

@@ -0,0 +1,95 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup Parallel_Tasks Parallel Tasks
+
+\fn int starpu_combined_worker_get_size(void)
+\ingroup Parallel_Tasks
+Return the size of the current combined worker, i.e. the total number
+of cpus running the same task in the case of ::STARPU_SPMD parallel
+tasks, or the total number of threads that the task is allowed to
+start in the case of ::STARPU_FORKJOIN parallel tasks.
+
+\fn int starpu_combined_worker_get_rank(void)
+\ingroup Parallel_Tasks
+Return the rank of the current thread within the combined worker. Can
+only be used in ::STARPU_FORKJOIN parallel tasks, to know which part
+of the task to work on.
+
+\fn unsigned starpu_combined_worker_get_count(void)
+\ingroup Parallel_Tasks
+Return the number of different combined workers.
+
+\fn int starpu_combined_worker_get_id(void)
+\ingroup Parallel_Tasks
+Return the identifier of the current combined worker.
+
+\fn int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
+\ingroup Parallel_Tasks
+Register a new combined worker and get its identifier
+
+\fn int starpu_combined_worker_get_description(int workerid, int *worker_size, int **combined_workerid)
+\ingroup Parallel_Tasks
+Get the description of a combined worker
+
+\fn int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
+\ingroup Parallel_Tasks
+Variant of starpu_worker_can_execute_task() compatible with combined
+workers
+
+\fn void starpu_parallel_task_barrier_init(struct starpu_task*task, int workerid)
+\ingroup Parallel_Tasks
+Initialise the barrier for the parallel task, and dispatch the task
+between the different combined workers.
+
+\struct starpu_machine_topology
+\ingroup Parallel_Tasks
+\var starpu_machine_topology::nworkers
+        Total number of workers.
+\var starpu_machine_topology::ncombinedworkers
+        Total number of combined workers.
+\var starpu_machine_topology::hwtopology
+        Topology as detected by hwloc. To maintain ABI compatibility
+	when hwloc is not available, the field is replaced with <c>void *dummy</c>
+\var starpu_machine_topology::nhwcpus
+        Total number of CPUs, as detected by the topology code. May be
+	different from the actual number of CPU workers.
+\var starpu_machine_topology::nhwcudagpus
+        Total number of CUDA devices, as detected. May be different
+	from the actual number of CUDA workers.
+\var starpu_machine_topology::nhwopenclgpus
+        Total number of OpenCL devices, as detected. May be different
+	from the actual number of OpenCL workers.
+\var starpu_machine_topology::ncpus
+        Actual number of CPU workers used by StarPU.
+\var starpu_machine_topology::ncudagpus
+        Actual number of CUDA workers used by StarPU.
+\var starpu_machine_topology::nopenclgpus
+        Actual number of OpenCL workers used by StarPU.
+\var starpu_machine_topology::workers_bindid
+        Indicates the successive cpu identifier that should be used to
+	bind the workers. It is either filled according to the user’s
+	explicit parameters (from starpu_conf()) or according to the
+	STARPU_WORKERS_CPUID environment variable. Otherwise, a
+	round-robin policy is used to distributed the workers over the cpus.
+\var starpu_machine_topology::workers_cuda_gpuid
+        Indicates the successive cuda identifier that should be used by
+	the CUDA driver. It is either filled according to the user’s
+	explicit parameters (from starpu_conf()) or according to the
+	STARPU_WORKERS_CUDAID environment variable. Otherwise, they are taken
+	in ID order.
+\var starpu_machine_topology::workers_opencl_gpuid
+        Indicates the successive OpenCL identifier that should be used
+        by the OpenCL driver. It is either filled according to the
+        user’s explicit parameters (from starpu_conf()) or according to
+        the STARPU_WORKERS_OPENCLID environment variable. Otherwise,
+        they are taken in ID order.
+
+
+*/
+

+ 209 - 0
doc/doxygen/chapters/api/performance_model.doxy

@@ -0,0 +1,209 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup Performance_Model Performance Model
+
+\struct starpu_perfmodel
+\brief Ontains all information about a performance model. At least the
+type and symbol fields have to be filled when defining a performance
+model for a codelet. For compatibility, make sure to initialize the
+whole structure to zero, either by using explicit memset, or by
+letting the compiler implicitly do it in e.g. static storage case. If
+not provided, other fields have to be zero.
+\ingroup Performance_Model
+\var starpu_perfmodel::type
+is the type of performance model
+<ul>
+<li>::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
+::STARPU_NL_REGRESSION_BASED: No other fields needs to be provided,
+this is purely history-based.
+</li>
+<li> ::STARPU_PER_ARCH: field starpu_perfmodel::per_arch has to be
+filled with functions which return the cost in micro-seconds.
+</li>
+<li> ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
+filled with a function that returns the cost in micro-seconds on a
+CPU, timing on other archs will be determined by multiplying by an
+arch-specific factor.
+</li>
+</ul>
+\var starpu_perfmodel::symbol
+is the symbol name for the performance model, which will be used as
+file name to store the model. It must be set otherwise the model will
+be ignored.
+\var starpu_perfmodel::cost_model
+\deprecated
+This field is deprecated. Use instead the field starpu_perfmodel::cost_function field.
+\var starpu_perfmodel::cost_function
+Used by ::STARPU_COMMON: takes a task and implementation number, and
+must return a task duration estimation in micro-seconds.
+\var starpu_perfmodel::size_base
+Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
+::STARPU_NL_REGRESSION_BASED. If not NULL, takes a task and
+implementation number, and returns the size to be used as index for
+history and regression.
+\var starpu_perfmodel::per_arch
+Used by ::STARPU_PER_ARCH: array of structures starpu_per_arch_perfmodel
+\var starpu_perfmodel::is_loaded
+\private
+Whether the performance model is already loaded from the disk.
+\var starpu_perfmodel::benchmarking
+\private
+Whether the performance model is still being calibrated.
+\var starpu_perfmodel::model_rwlock
+\private
+Lock to protect concurrency between loading from disk (W), updating
+the values (W), and making a performance estimation (R).
+
+\struct starpu_perfmodel_regression_model
+\brief ...
+\ingroup Performance_Model
+\var starpu_perfmodel_regression_model::sumlny
+sum of ln(measured)
+\var starpu_perfmodel_regression_model::sumlnx
+sum of ln(size)
+\var starpu_perfmodel_regression_model::sumlnx2
+sum of ln(size)^2
+\var starpu_perfmodel_regression_model::minx
+minimum size
+\var starpu_perfmodel_regression_model::maxx
+maximum size
+\var starpu_perfmodel_regression_model::sumlnxlny
+sum of ln(size)*ln(measured)
+\var starpu_perfmodel_regression_model::alpha
+estimated = alpha * size ^ beta
+\var starpu_perfmodel_regression_model::beta
+estimated = alpha * size ^ beta
+\var starpu_perfmodel_regression_model::valid
+whether the linear regression model is valid (i.e. enough measures)
+\var starpu_perfmodel_regression_model::a
+estimated = a size ^b + c
+\var starpu_perfmodel_regression_model::b
+estimated = a size ^b + c
+\var starpu_perfmodel_regression_model::c
+estimated = a size ^b + c
+\var starpu_perfmodel_regression_model::nl_valid
+whether the non-linear regression model is valid (i.e. enough measures)
+\var starpu_perfmodel_regression_model::nsample
+number of sample values for non-linear regression
+
+\struct starpu_perfmodel_per_arch
+\brief contains information about the performance model of a given
+arch.
+\ingroup Performance_Model
+\var starpu_perfmodel_per_arch::cost_model
+\deprecated
+This field is deprecated. Use instead the field
+starpu_perfmodel_per_arch::cost_function.
+\var starpu_perfmodel_per_arch::cost_function
+Used by ::STARPU_PER_ARCH, must point to functions which take a task,
+the target arch and implementation number (as mere conveniency, since
+the array is already indexed by these), and must return a task
+duration estimation in micro-seconds.
+\var starpu_perfmodel_per_arch::size_base
+Same as in structure starpu_perfmodel, but per-arch, in case it
+depends on the architecture-specific implementation.
+\var starpu_perfmodel_per_arch::history
+\private
+The history of performance measurements.
+\var starpu_perfmodel_per_arch::list
+\private
+Used by ::STARPU_HISTORY_BASED and ::STARPU_NL_REGRESSION_BASED,
+records all execution history measures.
+\var starpu_perfmodel_per_arch::regression
+\private
+Used by ::STARPU_HISTORY_BASED and
+::STARPU_NL_REGRESSION_BASED, contains the estimated factors of the
+regression.
+
+\struct starpu_perfmodel_history_list
+\brief todo
+\ingroup Performance_Model
+\var starpu_perfmodel_history_list::next
+todo
+\var starpu_perfmodel_history_list::entry
+todo
+
+\struct starpu_perfmodel_history_entry
+\brief todo
+\ingroup Performance_Model
+\var starpu_perfmodel_history_entry::mean
+mean_n = 1/n sum
+\var starpu_perfmodel_history_entry::deviation
+n dev_n = sum2 - 1/n (sum)^2
+\var starpu_perfmodel_history_entry::sum
+num of samples
+\var starpu_perfmodel_history_entry::sum2
+sum of samples^2
+\var starpu_perfmodel_history_entry::nsample
+todo
+\var starpu_perfmodel_history_entry::footprint
+todo
+\var starpu_perfmodel_history_entry::size
+in bytes
+\var starpu_perfmodel_history_entry::flops
+Provided by the application
+
+\fn int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
+\ingroup Performance_Model
+\brief loads a given performance model. The model structure has to be
+completely zero, and will be filled with the information saved in
+<c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
+external tools that should read the performance model files.
+
+\fn int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
+\ingroup Performance_Model
+\brief unloads the given model which has been previously loaded
+through the function starpu_perfmodel_load_symbol()
+
+\fn void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, enum starpu_perfmodel_archtype arch, char *path, size_t maxlen, unsigned nimpl)
+\ingroup Performance_Model
+\brief returns the path to the debugging information for the performance model.
+
+\fn void starpu_perfmodel_get_arch_name(enum starpu_perfmodel_archtype arch, char *archname, size_t maxlen, unsigned nimpl)
+\ingroup Performance_Model
+\brief returns the architecture name for \p arch
+
+\fn enum starpu_perfmodel_archtype starpu_worker_get_perf_archtype(int workerid)
+\ingroup Performance_Model
+\brief returns the architecture type of a given worker.
+
+\fn int starpu_perfmodel_list(FILE *output)
+\ingroup Performance_Model
+\brief prints a list of all performance models on \p output
+
+\fn void starpu_perfmodel_print(struct starpu_perfmodel *model, enum starpu_perfmodel_archtype arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
+\ingroup Performance_Model
+\brief todo
+
+\fn int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output)
+\ingroup Performance_Model
+\brief todo
+
+\fn void starpu_bus_print_bandwidth(FILE *f)
+\ingroup Performance_Model
+\brief prints a matrix of bus bandwidths on \p f.
+
+\fn void starpu_bus_print_affinity(FILE *f)
+\ingroup Performance_Model
+\brief prints the affinity devices on \p f.
+
+\fn void starpu_topology_print(FILE *f)
+\ingroup Performance_Model
+\brief prints a description of the topology on \p f.
+
+\fn void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, enum starpu_perfmodel_archtype arch, unsigned cpuid, unsigned nimpl, double measured);
+\ingroup Performance_Model
+\brief This feeds the performance model model with an explicit
+measurement measured, in addition to measurements done by StarPU
+itself. This can be useful when the application already has an
+existing set of measurements done in good conditions, that StarPU
+could benefit from instead of doing on-line measurements. And example
+of use can be see in \ref Performance_model_example.
+
+*/

+ 176 - 0
doc/doxygen/chapters/api/profiling.doxy

@@ -0,0 +1,176 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup Profiling Profiling
+
+\struct starpu_profiling_task_info
+\ingroup Profiling
+\brief This structure contains information about the execution of a
+task. It is accessible from the field starpu_task::profiling_info if
+profiling was enabled.
+\var starpu_profiling_task_info::submit_time
+Date of task submission (relative to the initialization of StarPU).
+
+\var starpu_profiling_task_info::push_start_time
+Time when the task was submitted to the scheduler.
+
+\var starpu_profiling_task_info::push_end_time
+Time when the scheduler finished with the task submission.
+
+\var starpu_profiling_task_info::pop_start_time
+Time when the scheduler started to be requested for a task, and eventually gave that task.
+
+\var starpu_profiling_task_info::pop_end_time
+Time when the scheduler finished providing the task for execution.
+
+\var starpu_profiling_task_info::acquire_data_start_time
+Time when the worker started fetching input data.
+
+\var starpu_profiling_task_info::acquire_data_end_time
+Time when the worker finished fetching input data.
+
+\var starpu_profiling_task_info::start_time
+Date of task execution beginning (relative to the initialization of StarPU).
+
+\var starpu_profiling_task_info::end_time
+Date of task execution termination (relative to the initialization of StarPU).
+
+\var starpu_profiling_task_info::release_data_start_time
+Time when the worker started releasing data.
+
+\var starpu_profiling_task_info::release_data_end_time
+Time when the worker finished releasing data.
+
+\var starpu_profiling_task_info::callback_start_time
+        Time when the worker started the application callback for the task.
+
+\var starpu_profiling_task_info::callback_end_time
+        Time when the worker finished the application callback for the task.
+
+\var starpu_profiling_task_info::workerid
+        Identifier of the worker which has executed the task.
+
+\var starpu_profiling_task_info::used_cycles
+        Number of cycles used by the task, only available in the MoviSim
+
+\var starpu_profiling_task_info::stall_cycles
+        Number of cycles stalled within the task, only available in the MoviSim
+
+\var starpu_profiling_task_info::power_consumed
+        Power consumed by the task, only available in the MoviSim
+
+\struct starpu_profiling_worker_info
+\brief This structure contains the profiling information associated to
+a worker. The timing is provided since the previous call to
+starpu_profiling_worker_get_info()
+\ingroup Profiling
+\var starpu_profiling_worker_info::start_time
+        Starting date for the reported profiling measurements.
+\var starpu_profiling_worker_info::total_time
+        Duration of the profiling measurement interval.
+\var starpu_profiling_worker_info::executing_time
+        Time spent by the worker to execute tasks during the profiling measurement interval.
+\var starpu_profiling_worker_info::sleeping_time
+        Time spent idling by the worker during the profiling measurement interval.
+\var starpu_profiling_worker_info::executed_tasks
+        Number of tasks executed by the worker during the profiling measurement interval.
+\var starpu_profiling_worker_info::used_cycles
+        Number of cycles used by the worker, only available in the MoviSim
+\var starpu_profiling_worker_info::stall_cycles
+        Number of cycles stalled within the worker, only available in the MoviSim
+\var starpu_profiling_worker_info::power_consumed
+        Power consumed by the worker, only available in the MoviSim
+
+\struct starpu_profiling_bus_info
+\brief todo
+\ingroup Profiling
+\var starpu_profiling_bus_info::start_time
+        Time of bus profiling startup.
+\var starpu_profiling_bus_info::total_time
+        Total time of bus profiling.
+\var starpu_profiling_bus_info::transferred_bytes
+        Number of bytes transferred during profiling.
+\var starpu_profiling_bus_info::transfer_count
+        Number of transfers during profiling.
+
+\fn int starpu_profiling_status_set(int status)
+\ingroup Profiling
+\brief This function sets the profiling status. Profiling is activated
+by passing STARPU_PROFILING_ENABLE in status. Passing
+STARPU_PROFILING_DISABLE disables profiling. Calling this function
+resets all profiling measurements. When profiling is enabled, the
+field starpu_task::profiling_info points to a valid structure
+starpu_profiling_task_info containing information about the execution
+of the task. Negative return values indicate an error, otherwise the
+previous status is returned.
+
+\fn int starpu_profiling_status_get(void)
+\ingroup Profiling
+\brief Return the current profiling status or a negative value in case
+there was an error.
+
+\fn void starpu_profiling_set_id(int new_id)
+\ingroup Profiling
+\brief This function sets the ID used for profiling trace filename. It
+needs to be called before starpu_init().
+
+\fn int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *worker_info)
+\ingroup Profiling
+\brief Get the profiling info associated to the worker identified by
+\p workerid, and reset the profiling measurements. If the argument \p
+worker_info is NULL, only reset the counters associated to worker
+\p workerid. Upon successful completion, this function returns 0.
+Otherwise, a negative value is returned.
+
+\fn int starpu_bus_get_profiling_info(int busid, struct starpu_profiling_bus_info *bus_info)
+\ingroup Profiling
+\brief todo
+
+\fn int starpu_bus_get_count(void)
+\ingroup Profiling
+\brief Return the number of buses in the machine
+
+\fn int starpu_bus_get_id(int src, int dst)
+\ingroup Profiling
+\brief Return the identifier of the bus between \p src and \p dst
+
+\fn int starpu_bus_get_src(int busid)
+\ingroup Profiling
+\brief Return the source point of bus \p busid
+
+\fn int starpu_bus_get_dst(int busid)
+\ingroup Profiling
+\brief Return the destination point of bus \p busid
+
+\fn double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end)
+\ingroup Profiling
+\brief Returns the time elapsed between \p start and \p end in microseconds.
+
+\fn double starpu_timing_timespec_to_us(struct timespec *ts)
+\ingroup Profiling
+\brief Converts the given timespec \p ts into microseconds
+
+\fn void starpu_profiling_bus_helper_display_summary(void)
+\ingroup Profiling
+\brief Displays statistics about the bus on stderr. if the environment
+variable STARPU_BUS_STATS is defined. The function is called
+automatically by starpu_shutdown().
+
+\fn void starpu_profiling_worker_helper_display_summary(void)
+\ingroup Profiling
+\brief Displays statistics about the workers on stderr if the
+environment variable STARPU_WORKER_STATS is defined. The function is
+called automatically by starpu_shutdown().
+
+\fn void starpu_data_display_memory_stats()
+\ingroup Profiling
+\brief Display statistics about the current data handles registered
+within StarPU. StarPU must have been configured with the option
+<c>--enable-memory-stats</c> (see \ref Memory_feedback).
+
+*/

+ 135 - 0
doc/doxygen/chapters/api/scheduling_contexts.doxy

@@ -0,0 +1,135 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup Scheduling_Contexts Scheduling Contexts
+
+\brief StarPU permits on one hand grouping workers in combined workers
+in order to execute a parallel task and on the other hand grouping
+tasks in bundles that will be executed by a single specified worker.
+In contrast when we group workers in scheduling contexts we submit
+starpu tasks to them and we schedule them with the policy assigned to
+the context. Scheduling contexts can be created, deleted and modified
+dynamically.
+
+\fn unsigned starpu_sched_ctx_create(const char *policy_name, int *workerids_ctx, int nworkers_ctx, const char *sched_ctx_name)
+\ingroup Scheduling_Contexts
+This function creates a scheduling context which uses the scheduling
+policy \p policy_name and assigns the workers in \p workerids_ctx to
+execute the tasks submitted to it.
+The return value represents the identifier of the context that has
+just been created. It will be further used to indicate the context the
+tasks will be submitted to. The return value should be at most
+STARPU_NMAX_SCHED_CTXS.
+
+\fn void starpu_sched_ctx_delete(unsigned sched_ctx_id)
+\ingroup Scheduling_Contexts
+Delete scheduling context \p sched_ctx_id and transfer remaining
+workers to the inheritor scheduling context.
+
+\fn void starpu_sched_ctx_add_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id)
+\ingroup Scheduling_Contexts
+This function adds dynamically the workers in \p workerids_ctx to the
+context \p sched_ctx_id. The last argument cannot be greater than
+STARPU_NMAX_SCHED_CTXS.
+
+\fn void starpu_sched_ctx_remove_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id)
+\ingroup Scheduling_Contexts
+This function removes the workers in \p workerids_ctx from the context
+\p sched_ctx_id. The last argument cannot be greater than
+STARPU_NMAX_SCHED_CTXS.
+
+\struct starpu_worker_collection
+\ingroup Scheduling_Contexts
+\brief A scheduling context manages a collection of workers that can
+be memorized using different data structures. Thus, a generic
+structure is available in order to simplify the choice of its type.
+Only the list data structure is available but further data
+structures(like tree) implementations are foreseen.
+\var starpu_worker_collection::workerids
+        The workerids managed by the collection
+\var starpu_worker_collection::nworkers
+        The number of workers in the collection
+\var starpu_worker_collection::    int type
+        The type of structure (currently STARPU_WORKER_LIST is the only one available)
+\var starpu_worker_collection::    unsigned (*has_next)(struct starpu_worker_collection *workers)
+        Checks if there is a next worker
+\var starpu_worker_collection::    int (*get_next)(struct starpu_worker_collection *workers)
+        Gets the next worker
+\var starpu_worker_collection::    int (*add)(struct starpu_worker_collection *workers, int worker)
+        Adds a worker to the collection
+\var starpu_worker_collection::    int (*remove)(struct starpu_worker_collection *workers, int worker)
+        Removes a worker from the collection
+\var starpu_worker_collection::    void* (*init)(struct starpu_worker_collection *workers)
+        Initialize the collection
+\var starpu_worker_collection::    void (*deinit)(struct starpu_worker_collection *workers)
+        Deinitialize the colection
+\var starpu_worker_collection::    void (*init_cursor)(struct starpu_worker_collection *workers) (optional)
+        Initialize the cursor if there is one
+\var starpu_worker_collection::    void (*deinit_cursor)(struct starpu_worker_collection *workers) (optional)
+        Deinitialize the cursor if there is one
+
+\fn struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, int type)
+\ingroup Scheduling_Contexts
+Create a worker collection of the type indicated by the last parameterx
+for the context specified through the first parameter.
+
+\fn void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id)
+\ingroup Scheduling_Contexts
+Delete the worker collection of the specified scheduling context
+
+\fn struct starpu_worker_collection* starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id)
+\ingroup Scheduling_Contexts
+Return the worker collection managed by the indicated context
+
+\fn void starpu_sched_ctx_set_context(unsigned *sched_ctx_id)
+\ingroup Scheduling_Contexts
+Set the scheduling context the subsequent tasks will be submitted to
+
+\fn unsigned starpu_sched_ctx_get_context(void)
+\ingroup Scheduling_Contexts
+Return the scheduling context the tasks are currently submitted to
+
+\fn unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id)
+\ingroup Scheduling_Contexts
+Return the number of workers managed by the specified contexts
+(Usually needed to verify if it manages any workers or if it should be
+blocked)
+
+\fn unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2)
+\ingroup Scheduling_Contexts
+    Return the number of workers shared by two contexts.
+
+\fn int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio)
+\ingroup Scheduling_Contexts
+Defines the minimum task priority level supported by the scheduling
+policy of the given scheduler context. The default minimum priority
+level is the same as the default priority level which is 0 by
+convention. The application may access that value by calling the
+starpu_sched_ctx_get_min_priority function. This function should only
+be called from the initialization method of the scheduling policy, and
+should not be used directly from the application.
+
+\fn int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio)
+\ingroup Scheduling_Contexts
+Defines the maximum priority level supported by the scheduling policy
+of the given scheduler context. The default maximum priority level is
+1. The application may access that value by calling the
+starpu_sched_ctx_get_max_priority function. This function should only
+be called from the initialization method of the scheduling policy, and
+should not be used directly from the application.
+
+\fn int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id)
+\ingroup Scheduling_Contexts
+Returns the current minimum priority level supported by the scheduling
+policy of the given scheduler context.
+
+\fn int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id)
+\ingroup Scheduling_Contexts
+Returns the current maximum priority level supported by the scheduling
+policy of the given scheduler context.
+*/

+ 59 - 0
doc/doxygen/chapters/api/task_bundles.doxy

@@ -0,0 +1,59 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup Task_Bundles Task Bundles
+
+\typedef starpu_task_bundle_t
+\ingroup Task_Bundles
+Opaque structure describing a list of tasks that should be scheduled
+on the same worker whenever it’s possible. It must be considered as a
+hint given to the scheduler as there is no guarantee that they will be
+executed on the same worker.
+
+\fn void starpu_task_bundle_create (starpu_task_bundle_t *bundle)
+\ingroup Task_Bundles
+Factory function creating and initializing \p bundle, when the call
+returns, memory needed is allocated and \p bundle is ready to use.
+
+\fn int starpu_task_bundle_insert (starpu_task_bundle_t bundle, struct starpu_task *task)
+\ingroup Task_Bundles
+Insert \p task in \p bundle. Until \p task is removed from \p bundle
+its expected length and data transfer time will be considered along
+those of the other tasks of bundle. This function must not be called
+if \p bundle is already closed and/or \p task is already submitted.
+On success, it returns 0. There are two cases of error : if \p bundle
+is already closed it returns <c>-EPERM</c>, if \p task was already
+submitted it returns <c>-EINVAL</c>.
+
+\fn int starpu_task_bundle_remove (starpu_task_bundle_t bundle, struct starpu_task *task)
+\ingroup Task_Bundles
+Remove \p task from \p bundle. Of course \p task must have been
+previously inserted in \p bundle. This function must not be called if
+\p bundle is already closed and/or \p task is already submitted. Doing
+so would result in undefined behaviour. On success, it returns 0. If
+\p bundle is already closed it returns <c>-ENOENT</c>.
+
+\fn void starpu_task_bundle_close (starpu_task_bundle_t bundle)
+\ingroup Task_Bundles
+Inform the runtime that the user will not modify \p bundle anymore, it
+means no more inserting or removing task. Thus the runtime can destroy
+it when possible.
+
+\fn double starpu_task_bundle_expected_length (starpu_task_bundle_t bundle, enum starpu_perfmodel_archtype arch, unsigned nimpl)
+\ingroup Task_Bundles
+Return the expected duration of \p bundle in micro-seconds.
+
+\fn double starpu_task_bundle_expected_power (starpu_task_bundle_t bundle, enum starpu_perfmodel_archtype arch, unsigned nimpl)
+\ingroup Task_Bundles
+Return the expected power consumption of \p bundle in J.
+
+\fn double starpu_task_bundle_expected_data_transfer_time (starpu_task_bundle_t bundle, unsigned memory_node)
+\ingroup Task_Bundles
+Return the time (in micro-seconds) expected to transfer all data used within \p bundle.
+
+*/

+ 68 - 0
doc/doxygen/chapters/api/task_lists.doxy

@@ -0,0 +1,68 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \defgroup Task_Lists Task Lists
+
+\struct starpu_task_list
+\brief Stores a double-chained list of tasks
+\ingroup Task_Lists
+\var starpu_task_list::head
+head of the list
+\var starpu_task_list::tail
+tail of the list
+
+\fn void starpu_task_list_init(struct starpu_task_list *list)
+\ingroup Task_Lists
+Initialize a list structure
+
+\fn void starpu_task_list_push_front(struct starpu_task_list *list, struct starpu_task *task)
+\ingroup Task_Lists
+Push \p task at the front of \p list
+
+\fn void starpu_task_list_push_back(struct starpu_task_list *list, struct starpu_task *task)
+\ingroup Task_Lists
+Push \p task at the back of \p list
+
+\fn struct starpu_task * starpu_task_list_front(struct starpu_task_list *list)
+\ingroup Task_Lists
+Get the front of \p list (without removing it)
+
+\fn struct starpu_task * starpu_task_list_back(struct starpu_task_list *list)
+\ingroup Task_Lists
+Get the back of \p list (without removing it)
+
+\fn int starpu_task_list_empty(struct starpu_task_list *list)
+\ingroup Task_Lists
+Test if \p list is empty
+
+\fn void starpu_task_list_erase(struct starpu_task_list *list, struct starpu_task *task)
+\ingroup Task_Lists
+Remove \p task from \p list
+
+\fn struct starpu_task * starpu_task_list_pop_front(struct starpu_task_list *list)
+\ingroup Task_Lists
+Remove the element at the front of \p list
+
+\fn struct starpu_task * starpu_task_list_pop_back(struct starpu_task_list *list)
+\ingroup Task_Lists
+Remove the element at the back of \p list
+
+\fn struct starpu_task * starpu_task_list_begin(struct starpu_task_list *list)
+\ingroup Task_Lists
+Get the first task of \p list.
+
+\fn struct starpu_task * starpu_task_list_end(struct starpu_task_list *list)
+\ingroup Task_Lists
+Get the end of \p list.
+
+\fn struct starpu_task * starpu_task_list_next(struct starpu_task *task)
+\ingroup Task_Lists
+Get the next task of \p list. This is not erase-safe.
+
+*/
+