6 年之前 · 6862bbddbf
--- a/doc/doxygen/Makefile.am
+++ b/doc/doxygen/Makefile.am
@@ -116,8 +116,6 @@ chapters =	\
 
																 	chapters/api/mic_extensions.doxy \
															
 
																 	chapters/api/scc_extensions.doxy \
															
 
																 	chapters/api/parallel_tasks.doxy \
															
 
																-	chapters/api/performance_model.doxy \
															
 
																-	chapters/api/profiling.doxy \
															
 
																 	chapters/api/scheduling_contexts.doxy \
															
 
																 	chapters/api/scheduling_policy.doxy \
															
 
																 	chapters/api/standard_memory_library.doxy \
															
@@ -131,7 +129,6 @@ chapters =	\
 
																 	chapters/api/toolbox.doxy \
															
 
																 	chapters/api/sc_hypervisor/sc_hypervisor.doxy \
															
 
																 	chapters/api/sc_hypervisor/sc_hypervisor_usage.doxy \
															
 
																-	chapters/api/modularized_scheduler.doxy \
															
 
																 	chapters/api/interoperability.doxy
															
 
																 images = 	\
															
--- a/doc/doxygen/chapters/api/modularized_scheduler.doxy
+++ b/doc/doxygen/chapters/api/modularized_scheduler.doxy
@@ -1,560 +0,0 @@
 
																-/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																- *
															
 
																- * Copyright (C) 2013,2014                                Inria
															
 
																- * Copyright (C) 2013-2018                                CNRS
															
 
																- * Copyright (C) 2009-2011,2014,2015,2017,2018-2019       Université de Bordeaux
															
 
																- * Copyright (C) 2013                                     Simon Archipoff
															
 
																- *
															
 
																- * StarPU is free software; you can redistribute it and/or modify
															
 
																- * it under the terms of the GNU Lesser General Public License as published by
															
 
																- * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																- * your option) any later version.
															
 
																- *
															
 
																- * StarPU is distributed in the hope that it will be useful, but
															
 
																- * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																- *
															
 
																- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																- */
															
 
																-
															
 
																-/*! \defgroup API_Modularized_Scheduler Modularized Scheduler Interface
															
 
																-
															
 
																-\enum starpu_sched_component_properties
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-flags for starpu_sched_component::properties
															
 
																-\var starpu_sched_component_properties::STARPU_SCHED_COMPONENT_HOMOGENEOUS
															
 
																-     indicate that all workers have the same starpu_worker_archtype
															
 
																-\var starpu_sched_component_properties::STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE
															
 
																-     indicate that all workers have the same memory component
															
 
																-
															
 
																-\def STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-indicate if component is homogeneous
															
 
																-\def STARPU_SCHED_COMPONENT_IS_SINGLE_MEMORY_NODE
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-indicate if all workers have the same memory component
															
 
																-
															
 
																-\struct starpu_sched_component
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-This structure represent a scheduler module.  A scheduler is a
															
 
																-tree-like structure of them, some parts of scheduler can be shared by
															
 
																-several contexes to perform some local optimisations, so, for all
															
 
																-components, a list of parent is defined by \c sched_ctx_id. They
															
 
																-embed there specialised method in a pseudo object-style, so calls are
															
 
																-like <c>component->push_task(component,task)</c>
															
 
																-
															
 
																-\var struct starpu_sched_tree *starpu_sched_component::tree
															
 
																-     The tree containing the component
															
 
																-\var struct starpu_bitmap *starpu_sched_component::workers
															
 
																-     this member contain the set of underlying workers
															
 
																-\var starpu_sched_component::workers_in_ctx
															
 
																-     this member contain the subset of starpu_sched_component::workers that is currently available in the context
															
 
																-     The push method should take this member into account.
															
 
																-     this member is set with :
															
 
																-     component->workers UNION tree->workers UNION
															
 
																-     component->child[i]->workers_in_ctx iff exist x such as component->children[i]->parents[x] == component
															
 
																-\var void *starpu_sched_component::data
															
 
																-     private data
															
 
																-\var int starpu_sched_component::nchildren
															
 
																-     the number of compoments's children
															
 
																-\var struct starpu_sched_component **starpu_sched_component::children
															
 
																-     the vector of component's children
															
 
																-\var int starpu_sched_component::nparents
															
 
																-     the numbers of component's parents
															
 
																-\var struct starpu_sched_component **starpu_sched_component::parents
															
 
																-     the vector of component's parents
															
 
																-
															
 
																-\var void(*starpu_sched_component::add_child)(struct starpu_sched_component *component, struct starpu_sched_component *child)
															
 
																-     add a child to component
															
 
																-\var void(*starpu_sched_component::remove_child)(struct starpu_sched_component *component, struct starpu_sched_component *child)
															
 
																-     remove a child from component
															
 
																-\var void(*starpu_sched_component::add_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent)
															
 
																-     todo
															
 
																-\var void(*starpu_sched_component::remove_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent)
															
 
																-     todo
															
 
																-
															
 
																-\var int (*starpu_sched_component::push_task)(struct starpu_sched_component *, struct starpu_task *)
															
 
																-     push a task in the scheduler module. this function is called to
															
 
																-     push a task on component subtree, this can either perform a
															
 
																-     recursive call on a child or store the task in the component,
															
 
																-     then it will be returned by a further pull_task call.
															
 
																-     the caller must ensure that component is able to execute task.
															
 
																-     This method must either return 0 if it the task was properly stored or
															
 
																-     passed over to a child component, or return a value different from 0 if the
															
 
																-     task could not be consumed (e.g. the queue is full).
															
 
																-\var struct starpu_task * (*starpu_sched_component::pull_task)(struct starpu_sched_component *component, struct starpu_sched_component *to)
															
 
																-     pop a task from the scheduler module. this function is called by workers to get a task from their
															
 
																-     parents. this function should first return a locally stored task
															
 
																-     or perform a recursive call on the parents.
															
 
																-     the task returned by this function should be executable by the caller
															
 
																-
															
 
																-\var int (*starpu_sched_component::can_push)(struct starpu_sched_component *component, struct starpu_sched_component *to)
															
 
																-     This function is called by a component which implements a queue,
															
 
																-     allowing it to signify to its parents that an empty slot is
															
 
																-     available in its queue. This should return 1 if some tasks could be pushed
															
 
																-     The basic implementation of this function
															
 
																-     is a recursive call to its parents, the user has to specify a
															
 
																-     personally-made function to catch those calls.
															
 
																-\var int (*starpu_sched_component::can_pull)(struct starpu_sched_component *component)
															
 
																-     This function allow a component to wake up a worker. It is
															
 
																-     currently called by component which implements a queue, to
															
 
																-     signify to its children that a task have been pushed in its local
															
 
																-     queue, and is available to be popped by a worker, for example.
															
 
																-     This should return 1 if some some container or worker could (or will) pull
															
 
																-     some tasks.
															
 
																-     The basic implementation of this function is a recursive call to
															
 
																-     its children, until at least one worker have been woken up.
															
 
																-
															
 
																-\var double (*starpu_sched_component::estimated_load)(struct starpu_sched_component *component)
															
 
																-	is an heuristic to compute load of scheduler module. Basically the number of tasks divided by the sum
															
 
																-	of relatives speedup of workers available in context.
															
 
																-	estimated_load(component) = sum(estimated_load(component_children)) + nb_local_tasks / average(relative_speedup(underlying_worker))
															
 
																-\var starpu_sched_component::estimated_end
															
 
																-	return the time when a worker will enter in starvation. This function is relevant only if the task->predicted
															
 
																-	member has been set.
															
 
																-
															
 
																-\var void (*starpu_sched_component::deinit_data)(struct starpu_sched_component *component)
															
 
																-	called by starpu_sched_component_destroy. Should free data allocated during creation
															
 
																-\var void (*starpu_sched_component::notify_change_workers)(struct starpu_sched_component *component)
															
 
																-	this function is called for each component when workers are added or removed from a context
															
 
																-\var int starpu_sched_component::properties
															
 
																-	todo
															
 
																-\var hwloc_obj_t starpu_sched_component::obj
															
 
																-	the hwloc object associated to scheduler module. points to the
															
 
																-	part of topology that is binded to this component, eg: a numa
															
 
																-	node for a ws component that would balance load between
															
 
																-	underlying sockets
															
 
																-
															
 
																-\struct starpu_sched_tree
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-The actual scheduler
															
 
																-\var struct starpu_sched_component *starpu_sched_tree::root
															
 
																-	this is the entry module of the scheduler
															
 
																-\var struct starpu_bitmap *starpu_sched_tree::workers
															
 
																-	this is the set of workers available in this context, this value is used to mask workers in modules
															
 
																-\var unsigned starpu_sched_tree::sched_ctx_id
															
 
																-	the context id of the scheduler
															
 
																-\var starpu_pthread_mutex_t starpu_sched_tree::lock
															
 
																-	this lock is used to protect the scheduler, it is taken in
															
 
																-	read mode pushing a task and in write mode for adding or
															
 
																-	removing workers
															
 
																-
															
 
																-@name Scheduling Tree API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\fn struct starpu_sched_tree *starpu_sched_tree_create(unsigned sched_ctx_id)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 create a empty initialized starpu_sched_tree
															
 
																-
															
 
																-\fn void starpu_sched_tree_destroy(struct starpu_sched_tree *tree)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 destroy tree and free all non shared component in it.
															
 
																-
															
 
																-\fn void starpu_sched_tree_update_workers(struct starpu_sched_tree *t)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 recursively set all starpu_sched_component::workers, do not take into account shared parts (except workers).
															
 
																-
															
 
																-\fn void starpu_sched_tree_update_workers_in_ctx(struct starpu_sched_tree *t)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 recursively set all starpu_sched_component::workers_in_ctx, do not take into account shared parts (except workers)
															
 
																-
															
 
																-\fn int starpu_sched_tree_push_task(struct starpu_task *task)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 compatibility with starpu_sched_policy interface
															
 
																-
															
 
																-\fn struct starpu_task *starpu_sched_tree_pop_task(unsigned sched_ctx)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 compatibility with starpu_sched_policy interface
															
 
																-
															
 
																-\fn void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 compatibility with starpu_sched_policy interface
															
 
																-
															
 
																-\fn void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 compatibility with starpu_sched_policy interface
															
 
																-
															
 
																-\fn void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 Attaches component \p child to parent \p parent. Some component may accept only one child, others accept several (e.g. MCT)
															
 
																-
															
 
																-@name Generic Scheduling Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_create(struct starpu_sched_tree *tree, const char *name)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 allocate and initialize component field with defaults values :
															
 
																-	.pop_task make recursive call on father
															
 
																-	.estimated_load compute relative speedup and tasks in sub tree
															
 
																-	.estimated_end return the minimum of recursive call on children
															
 
																-	.add_child is starpu_sched_component_add_child
															
 
																-	.remove_child is starpu_sched_component_remove_child
															
 
																-	.notify_change_workers does nothing
															
 
																-	.deinit_data does nothing
															
 
																-
															
 
																-\fn void starpu_sched_component_destroy(struct starpu_sched_component *component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 free data allocated by starpu_sched_component_create and call component->deinit_data(component)
															
 
																-	 set to <c>NULL</c> the member starpu_sched_component::fathers[sched_ctx_id] of all child if its equal to \p component
															
 
																-
															
 
																-\fn void starpu_sched_component_destroy_rec(struct starpu_sched_component *component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 recursively destroy non shared parts of a \p component 's tree
															
 
																-
															
 
																-\fn int starpu_sched_component_can_execute_task(struct starpu_sched_component *component, struct starpu_task *task)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return true iff \p component can execute \p task, this function take into account the workers available in the scheduling context
															
 
																-
															
 
																-\fn int starpu_sched_component_execute_preds(struct starpu_sched_component *component, struct starpu_task *task, double *length)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return a non <c>NULL</c> value if \p component can execute \p task.
															
 
																-	 write the execution prediction length for the best implementation of the best worker available and write this at \p length address.
															
 
																-	 this result is more relevant if starpu_sched_component::is_homogeneous is non <c>NULL</c>.
															
 
																-	 if a worker need to be calibrated for an implementation, nan is set to \p length.
															
 
																-
															
 
																-\fn double starpu_sched_component_transfer_length(struct starpu_sched_component *component, struct starpu_task *task)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return the average time to transfer \p task data to underlying \p component workers.
															
 
																-
															
 
																-@name Worker Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_worker_get(unsigned sched_ctx, int workerid)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return the struct starpu_sched_component corresponding to \p workerid. Undefined if \p workerid is not a valid workerid
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_parallel_worker_create(struct starpu_sched_tree *tree, unsigned nworkers, unsigned *workers)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 Create a combined worker that pushes tasks in parallel to workers \p workers (size \p nworkers).
															
 
																-
															
 
																-\fn int starpu_sched_component_worker_get_workerid(struct starpu_sched_component *worker_component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return the workerid of \p worker_component, undefined if starpu_sched_component_is_worker(worker_component) == 0
															
 
																-
															
 
																-\fn int starpu_sched_component_is_worker(struct starpu_sched_component *component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return true iff \p component is a worker component
															
 
																-
															
 
																-\fn int starpu_sched_component_is_simple_worker(struct starpu_sched_component *component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return true iff \p component is a simple worker component
															
 
																-
															
 
																-\fn int starpu_sched_component_is_combined_worker(struct starpu_sched_component *component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return true iff \p component is a combined worker component
															
 
																-
															
 
																-\fn void starpu_sched_component_worker_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 compatibility with starpu_sched_policy interface
															
 
																-	 update predictions for workers
															
 
																-
															
 
																-\fn void starpu_sched_component_worker_post_exec_hook(struct starpu_task *task, unsigned sched_ctx_id)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 compatibility with starpu_sched_policy interface
															
 
																-
															
 
																-@name Flow-control Fifo Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\fn int starpu_sched_component_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-default function for the can_push component method, just calls can_push of parents until one of them returns non-zero
															
 
																-
															
 
																-\fn int starpu_sched_component_can_pull(struct starpu_sched_component * component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-default function for the can_pull component method, just calls can_pull of children until one of them returns non-zero
															
 
																-
															
 
																-\fn int starpu_sched_component_can_pull_all(struct starpu_sched_component * component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-function for the can_pull component method, calls can_pull of all children
															
 
																-
															
 
																-\fn double starpu_sched_component_estimated_load(struct starpu_sched_component * component);
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-default function for the estimated_load component method, just sums up the loads
															
 
																-of the children of the component.
															
 
																-
															
 
																-\fn double starpu_sched_component_estimated_end_min(struct starpu_sched_component * component);
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-function that can be used for the estimated_end component method, which just computes the minimum completion time of the children.
															
 
																-
															
 
																-\fn double starpu_sched_component_estimated_end_min_add(struct starpu_sched_component * component, double exp_len);
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-function that can be used for the estimated_end component method, which computes
															
 
																-the minimum completion time of the children, and adds to it an estimation of how
															
 
																-existing queued work, plus the exp_len work, can be completed. This is typically
															
 
																-used instead of starpu_sched_component_estimated_end_min when the component
															
 
																-contains a queue of tasks, which thus needs to be added to the estimations.
															
 
																-
															
 
																-\fn double starpu_sched_component_estimated_end_average(struct starpu_sched_component * component);
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-default function for the estimated_end component method, which just computes the average completion time of the children.
															
 
																-
															
 
																-
															
 
																-\struct starpu_sched_component_fifo_data
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-\var unsigned starpu_sched_component_fifo_data::ntasks_threshold
															
 
																-todo
															
 
																-\var double starpu_sched_component_fifo_data::exp_len_threshold
															
 
																-todo
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_fifo_create(struct starpu_sched_tree *tree, struct starpu_sched_component_fifo_data *fifo_data)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 Return a struct starpu_sched_component with a fifo. A stable sort is performed according to tasks priorities.
															
 
																-	 A push_task call on this component does not perform recursive calls, underlying components will have to call pop_task to get it.
															
 
																-	 starpu_sched_component::estimated_end function compute the estimated length by dividing the sequential length by the number of underlying workers.
															
 
																-
															
 
																-\fn int starpu_sched_component_is_fifo(struct starpu_sched_component *component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return true iff \p component is a fifo component
															
 
																-
															
 
																-@name Flow-control Prio Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\struct starpu_sched_component_prio_data
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-\var unsigned starpu_sched_component_prio_data::ntasks_threshold
															
 
																-todo
															
 
																-\var double starpu_sched_component_prio_data::exp_len_threshold
															
 
																-todo
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_prio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_prio_data *prio_data)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-todo
															
 
																-
															
 
																-\fn int starpu_sched_component_is_prio(struct starpu_sched_component *component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-todo
															
 
																-
															
 
																-@name Resource-mapping Work-Stealing Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_work_stealing_create(struct starpu_sched_tree *tree, void *arg)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return a component that perform a work stealing scheduling. Tasks are pushed in a round robin way. estimated_end return the average of expected length of fifos, starting at the average of the expected_end of his children. When a worker have to steal a task, it steal a task in a round robin way, and get the last pushed task of the higher priority.
															
 
																-
															
 
																-\fn int starpu_sched_tree_work_stealing_push_task(struct starpu_task *task)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 undefined if there is no work stealing component in the scheduler. If any, \p task is pushed in a default way if the caller is the application, and in the caller's fifo if its a worker.
															
 
																-
															
 
																-\fn int starpu_sched_component_is_work_stealing(struct starpu_sched_component *component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return true iff \p component is a work stealing component
															
 
																-
															
 
																-@name Resource-mapping Random Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_random_create(struct starpu_sched_tree *tree, void *arg)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 create a component that perform a random scheduling
															
 
																-
															
 
																-\fn int starpu_sched_component_is_random(struct starpu_sched_component *)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return true iff \p component is a random component
															
 
																-
															
 
																-@name Resource-mapping Eager Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_eager_create(struct starpu_sched_tree *tree, void *arg)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-todo
															
 
																-
															
 
																-\fn int starpu_sched_component_is_eager(struct starpu_sched_component *)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-todo
															
 
																-
															
 
																-@name Resource-mapping Eager-Calibration Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_eager_calibration_create(struct starpu_sched_tree *tree, void *arg)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-todo
															
 
																-
															
 
																-\fn int starpu_sched_component_is_eager_calibration(struct starpu_sched_component *)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-todo
															
 
																-
															
 
																-@name Resource-mapping MCT Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\struct starpu_sched_component_mct_data
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-\var double starpu_sched_component_mct_data::alpha
															
 
																-todo
															
 
																-\var double starpu_sched_component_mct_data::beta
															
 
																-todo
															
 
																-\var double starpu_sched_component_mct_data::_gamma
															
 
																-todo
															
 
																-\var double starpu_sched_component_mct_data::idle_power
															
 
																-todo
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_mct_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-create a component with mct_data paremeters. the mct component doesnt
															
 
																-do anything but pushing tasks on no_perf_model_component and
															
 
																-calibrating_component
															
 
																-
															
 
																-\fn int starpu_sched_component_is_mct(struct starpu_sched_component *component);
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-todo
															
 
																-
															
 
																-@name Resource-mapping Heft Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_heft_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 this component perform a heft scheduling
															
 
																-
															
 
																-\fn int starpu_sched_component_is_heft(struct starpu_sched_component *component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return true iff \p component is a heft component
															
 
																-
															
 
																-@name Special-purpose Best_Implementation Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_best_implementation_create(struct starpu_sched_tree *tree, void *arg)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 Select the implementation that offer the shortest computation length for the first worker that can execute the task.
															
 
																-	 Or an implementation that need to be calibrated.
															
 
																-	 Also set starpu_task::predicted and starpu_task::predicted_transfer for memory component of the first suitable workerid.
															
 
																-	 If starpu_sched_component::push method is called and starpu_sched_component::nchild > 1 the result is undefined.
															
 
																-
															
 
																-@name Special-purpose Perfmodel_Select Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\struct starpu_sched_component_perfmodel_select_data
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-\var struct starpu_sched_component *starpu_sched_component_perfmodel_select_data::calibrator_component
															
 
																-todo
															
 
																-\var struct starpu_sched_component *starpu_sched_component_perfmodel_select_data::no_perfmodel_component
															
 
																-todo
															
 
																-\var struct starpu_sched_component *starpu_sched_component_perfmodel_select_data::perfmodel_component
															
 
																-todo
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_perfmodel_select_create(struct starpu_sched_tree *tree, struct starpu_sched_component_perfmodel_select_data *perfmodel_select_data)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-todo
															
 
																-
															
 
																-\fn int starpu_sched_component_is_perfmodel_select(struct starpu_sched_component *component)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-todo
															
 
																-
															
 
																-@name Recipe Component API
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-
															
 
																-\struct starpu_sched_component_composed_recipe
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	parameters for starpu_sched_component_composed_component_create
															
 
																-
															
 
																-\fn struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create(void)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return an empty recipe for a composed component, it should not be used without modification
															
 
																-
															
 
																-\fn struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create_singleton(struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 return a recipe to build a composed component with a \p create_component
															
 
																-
															
 
																-\fn void starpu_sched_component_composed_recipe_add(struct starpu_sched_component_composed_recipe *recipe, struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 add \p create_component under all previous components in recipe
															
 
																-
															
 
																-\fn void starpu_sched_component_composed_recipe_destroy(struct starpu_sched_component_composed_recipe *)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 destroy composed_sched_component, this should be done after starpu_sched_component_composed_component_create was called
															
 
																-
															
 
																-\fn struct starpu_sched_component *starpu_sched_component_composed_component_create(struct starpu_sched_tree *tree, struct starpu_sched_component_composed_recipe *recipe)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 create a component that behave as all component of recipe where linked. Except that you cant use starpu_sched_component_is_foo function
															
 
																-	 if recipe contain a single create_foo arg_foo pair, create_foo(arg_foo) is returned instead of a composed component
															
 
																-
															
 
																-\struct starpu_sched_component_specs
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 Define how build a scheduler according to topology. Each level (except for hwloc_machine_composed_sched_component) can be <c>NULL</c>, then
															
 
																-	 the level is just skipped. Bugs everywhere, do not rely on.
															
 
																-\var struct starpu_sched_component_composed_recipe *starpu_sched_specs::hwloc_machine_composed_sched_component
															
 
																-     the composed component to put on the top of the scheduler
															
 
																-     this member must not be <c>NULL</c> as it is the root of the topology
															
 
																-\var struct starpu_sched_component_composed_recipe *starpu_sched_specs::hwloc_component_composed_sched_component
															
 
																-     the composed component to put for each memory component
															
 
																-\var struct starpu_sched_component_composed_recipe *starpu_sched_specs::hwloc_socket_composed_sched_component
															
 
																-     the composed component to put for each socket
															
 
																-\var struct starpu_sched_component_composed_recipe *starpu_sched_specs::hwloc_cache_composed_sched_component
															
 
																-     the composed component to put for each cache
															
 
																-\var struct starpu_sched_component_composed_recipe *(*starpu_sched_specs::worker_composed_sched_component)(enum starpu_worker_archtype archtype)
															
 
																-     a function that return a starpu_sched_component_composed_recipe to put on top of a worker of type \p archtype.
															
 
																-     <c>NULL</c> is a valid return value, then no component will be added on top
															
 
																-\var starpu_sched_specs::mix_heterogeneous_workers
															
 
																-     this flag is a dirty hack because of the poor expressivity of this interface. As example, if you want to build
															
 
																-     a heft component with a fifo component per numa component, and you also have GPUs, if this flag is set, GPUs will share those fifos.
															
 
																-     If this flag is not set, a new fifo will be built for each of them (if they have the same starpu_perf_arch and the same
															
 
																-     numa component it will be shared. it indicates if heterogenous workers should be brothers or cousins, as example, if a gpu and a cpu should share or not there numa node
															
 
																-
															
 
																-\fn struct starpu_sched_tree *starpu_sched_component_make_scheduler(unsigned sched_ctx_id, struct starpu_sched_component_specs s)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-	 this function build a scheduler for \p sched_ctx_id according to \p s and the hwloc topology of the machine.
															
 
																-
															
 
																-\def STARPU_SCHED_SIMPLE_DECIDE_WORKERS
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Request to create downstream queues per worker, i.e. the scheduling decision-making component will choose exactly which workers tasks should got to.
															
 
																-
															
 
																-\def STARPU_SCHED_SIMPLE_COMBINED_WORKERS
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Request to not only choose between simple workers, but also choose between combined workers.
															
 
																-
															
 
																-\def STARPU_SCHED_SIMPLE_DECIDE_MEMNODES
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Request to create downstream queues per memory nodes, i.e. the scheduling decision-making component will choose which memory node tasks will go to.
															
 
																-
															
 
																-\def STARPU_SCHED_SIMPLE_DECIDE_ARCHS
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Request to create downstream queues per computation arch, i.e. the scheduling decision-making component will choose whether tasks go to CPUs, or CUDA, or OpenCL, etc.
															
 
																-
															
 
																-\def STARPU_SCHED_SIMPLE_PERFMODEL
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Request to add a perfmodel selector above the scheduling decision-making component. That way, only tasks with a calibrated performance model will be given to the component, other tasks will go to an eager branch that will distributed tasks so that their performance models will get calibrated.
															
 
																-
															
 
																-In other words, this is needed when using a component which needs performance models for tasks.
															
 
																-
															
 
																-\def STARPU_SCHED_SIMPLE_FIFO_ABOVE
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Request to create a fifo above the scheduling decision-making component, otherwise tasks will be pushed directly to the component.
															
 
																-
															
 
																-This is useful to store tasks if there is a fifo below which limits the number of tasks to be scheduld in advance. The scheduling decision-making component can also store tasks itself, in which case this flag is not useful.
															
 
																-
															
 
																-\def STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Request that the fifo above be sorted by priorities
															
 
																-
															
 
																-\def STARPU_SCHED_SIMPLE_FIFOS_BELOW
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Request to create fifos below the scheduling decision-making component, otherwise tasks will be pulled directly from workers.
															
 
																-
															
 
																-This is useful to be able to schedule a (tunable) small number of tasks in advance only.
															
 
																-
															
 
																-\def STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Request that the fifos below be sorted by priorities
															
 
																-
															
 
																-\def STARPU_SCHED_SIMPLE_WS_BELOW
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Request that work between workers using the same fifo below be distributed using a work stealing component.
															
 
																-
															
 
																-\def STARPU_SCHED_SIMPLE_IMPL
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Request that a component be added just above workers, that chooses the best task implementation.
															
 
																-
															
 
																-\fn void starpu_sched_component_initialize_simple_scheduler(starpu_sched_component_create_t create_decision_component, void *data, unsigned flags, unsigned sched_ctx_id)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-This creates a simple modular scheduler tree around a scheduling decision-making
															
 
																-component \p component. The details of what should be built around \p component
															
 
																-is described by \p flags. The different STARPU_SCHED_SIMPL_DECIDE_* flags are
															
 
																-mutually exclusive. \p data is passed to the \p create_decision_component
															
 
																-function when creating the decision component.
															
 
																-
															
 
																-\fn int starpu_sched_component_push_task(struct starpu_sched_component *from, struct starpu_sched_component *to, struct starpu_task *task)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Push a task to a component. This is a helper for <c>component->push_task(component, task)</c> plus tracing.
															
 
																-
															
 
																-\fn struct starpu_task *starpu_sched_component_pull_task(struct starpu_sched_component *from, struct starpu_sched_component *to)
															
 
																-\ingroup API_Modularized_Scheduler
															
 
																-Pull a task from a component. This is a helper for <c>component->pull_task(component)</c> plus tracing.
															
 
																-
															
 
																-*/
															
--- a/doc/doxygen/chapters/api/performance_model.doxy
+++ b/doc/doxygen/chapters/api/performance_model.doxy
@@ -1,349 +0,0 @@
 
																-/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																- *
															
 
																- * Copyright (C) 2011-2013,2016                           Inria
															
 
																- * Copyright (C) 2010-2017                                CNRS
															
 
																- * Copyright (C) 2009-2011,2013-2017                      Université de Bordeaux
															
 
																- *
															
 
																- * StarPU is free software; you can redistribute it and/or modify
															
 
																- * it under the terms of the GNU Lesser General Public License as published by
															
 
																- * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																- * your option) any later version.
															
 
																- *
															
 
																- * StarPU is distributed in the hope that it will be useful, but
															
 
																- * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																- *
															
 
																- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																- */
															
 
																-
															
 
																-/*! \defgroup API_Performance_Model Performance Model
															
 
																-
															
 
																-\enum starpu_perfmodel_type
															
 
																-\ingroup API_Performance_Model
															
 
																-TODO
															
 
																-\var starpu_perfmodel_type::STARPU_PERFMODEL_INVALID
															
 
																-    todo
															
 
																-\var starpu_perfmodel_type::STARPU_PER_ARCH
															
 
																-    Application-provided per-arch cost model function
															
 
																-\var starpu_perfmodel_type::STARPU_COMMON
															
 
																-    Application-provided common cost model function, with per-arch
															
 
																-    factor
															
 
																-\var starpu_perfmodel_type::STARPU_HISTORY_BASED
															
 
																-    Automatic history-based cost model
															
 
																-\var starpu_perfmodel_type::STARPU_REGRESSION_BASED
															
 
																-    Automatic linear regression-based cost model  (alpha * size ^
															
 
																-    beta)
															
 
																-\var starpu_perfmodel_type::STARPU_NL_REGRESSION_BASED
															
 
																-    Automatic non-linear regression-based cost model (a * size ^ b +
															
 
																-    c)
															
 
																-\var starpu_perfmodel_type::STARPU_MULTIPLE_REGRESSION_BASED
															
 
																-    Automatic multiple linear regression-based cost model. Application
															
 
																-    provides parameters, their combinations and exponents.
															
 
																-
															
 
																-\struct starpu_perfmodel_device
															
 
																-todo
															
 
																-\ingroup API_Performance_Model
															
 
																-\var enum starpu_worker_archtype starpu_perfmodel_device::type
															
 
																-    type of the device
															
 
																-\var int starpu_perfmodel_device::devid
															
 
																-    identifier of the precise device
															
 
																-\var int starpu_perfmodel_device::ncore
															
 
																-    number of execution in parallel, minus 1
															
 
																-
															
 
																-\struct starpu_perfmodel_arch
															
 
																-todo
															
 
																-\ingroup API_Performance_Model
															
 
																-\var int starpu_perfmodel_arch::ndevices
															
 
																-    number of the devices for the given arch
															
 
																-\var struct starpu_perfmodel_device *starpu_perfmodel_arch::devices
															
 
																-    list of the devices for the given arch
															
 
																-
															
 
																-\struct starpu_perfmodel
															
 
																-Contain all information about a performance model. At least the
															
 
																-type and symbol fields have to be filled when defining a performance
															
 
																-model for a codelet. For compatibility, make sure to initialize the
															
 
																-whole structure to zero, either by using explicit memset, or by
															
 
																-letting the compiler implicitly do it in e.g. static storage case. If
															
 
																-not provided, other fields have to be zero.
															
 
																-\ingroup API_Performance_Model
															
 
																-\var enum starpu_perfmodel_type starpu_perfmodel::type
															
 
																-    type of performance model
															
 
																-    <ul>
															
 
																-    <li>
															
 
																-    ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
															
 
																-    ::STARPU_NL_REGRESSION_BASED: No other fields needs to be
															
 
																-    provided, this is purely history-based.
															
 
																-    </li>
															
 
																-    <li>
															
 
																-    ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields
															
 
																-    starpu_perfmodel::nparameters (number of different parameters),
															
 
																-    starpu_perfmodel::ncombinations (number of parameters
															
 
																-    combinations-tuples) and table starpu_perfmodel::combinations
															
 
																-    which defines exponents of the equation. Function cl_perf_func
															
 
																-    also needs to define how to extract parameters from the task. 
															
 
																-    </li>
															
 
																-    <li>
															
 
																-    ::STARPU_PER_ARCH: either field
															
 
																-    starpu_perfmodel::arch_cost_function has to be filled with a
															
 
																-    function that returns the cost in micro-seconds on the arch given
															
 
																-    as parameter, or field starpu_perfmodel::per_arch has to be filled
															
 
																-    with functions which return the cost in micro-seconds.
															
 
																-    </li>
															
 
																-    <li>
															
 
																-    ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
															
 
																-    filled with a function that returns the cost in micro-seconds on a
															
 
																-    CPU, timing on other archs will be determined by multiplying by an
															
 
																-    arch-specific factor.
															
 
																-    </li>
															
 
																-    </ul>
															
 
																-\var const char *starpu_perfmodel::symbol
															
 
																-    symbol name for the performance model, which will be used as file
															
 
																-    name to store the model. It must be set otherwise the model will
															
 
																-    be ignored.
															
 
																-\var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl)
															
 
																-    Used by ::STARPU_COMMON. Take a task and implementation number,
															
 
																-    and must return a task duration estimation in micro-seconds.
															
 
																-\var double (*starpu_perfmodel::arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl)
															
 
																-    Used by ::STARPU_COMMON. Take a task, an arch and implementation
															
 
																-    number, and must return a task duration estimation in
															
 
																-    micro-seconds on that arch.
															
 
																-\var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl)
															
 
																-    Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
															
 
																-    ::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, take a task and
															
 
																-    implementation number, and return the size to be used as index to
															
 
																-    distinguish histories and as a base for regressions.
															
 
																-\var uint32_t (*starpu_perfmodel::footprint)(struct starpu_task *)
															
 
																-    Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, take a task
															
 
																-    and return the footprint to be used as index to distinguish
															
 
																-    histories. The default is to use the starpu_task_data_footprint()
															
 
																-    function.
															
 
																-\var unsigned starpu_perfmodel::is_loaded
															
 
																-\private
															
 
																-    Whether the performance model is already loaded from the disk.
															
 
																-\var unsigned starpu_perfmodel::benchmarking
															
 
																-\private
															
 
																-    todo
															
 
																-\var unsigned starpu_perfmodel::is_init
															
 
																-    todo
															
 
																-\var starpu_perfmodel_state_t starpu_perfmodel::state
															
 
																-\private
															
 
																-    todo
															
 
																-\var void (*starpu_perfmodel::parameters)(struct starpu_task * task, double *parameters);
															
 
																-    todo
															
 
																-\var const char ** starpu_perfmodel::parameters_names
															
 
																-\private
															
 
																-    Names of parameters used for multiple linear regression models (M,
															
 
																-    N, K)
															
 
																-\var unsigned starpu_perfmodel::nparameters
															
 
																-\private
															
 
																-    Number of parameters used for multiple linear regression models
															
 
																-\var unsigned ** starpu_perfmodel::combinations
															
 
																-\private
															
 
																-    Table of combinations of parameters (and the exponents) used for
															
 
																-    multiple linear regression models
															
 
																-\var unsigned starpu_perfmodel::ncombinations
															
 
																-\private
															
 
																-    Number of combination of parameters used for multiple linear
															
 
																-    regression models
															
 
																-
															
 
																-\struct starpu_perfmodel_regression_model
															
 
																-todo
															
 
																-\ingroup API_Performance_Model
															
 
																-\var double starpu_perfmodel_regression_model::sumlny
															
 
																-    sum of ln(measured)
															
 
																-\var double starpu_perfmodel_regression_model::sumlnx
															
 
																-    sum of ln(size)
															
 
																-\var double starpu_perfmodel_regression_model::sumlnx2
															
 
																-    sum of ln(size)^2
															
 
																-\var unsigned long starpu_perfmodel_regression_model::minx
															
 
																-    minimum size
															
 
																-\var unsigned long starpu_perfmodel_regression_model::maxx
															
 
																-    maximum size
															
 
																-\var double starpu_perfmodel_regression_model::sumlnxlny
															
 
																-    sum of ln(size)*ln(measured)
															
 
																-\var double starpu_perfmodel_regression_model::alpha
															
 
																-    estimated = alpha * size ^ beta
															
 
																-\var double starpu_perfmodel_regression_model::beta
															
 
																-    estimated = alpha * size ^ beta
															
 
																-\var unsigned starpu_perfmodel_regression_model::valid
															
 
																-    whether the linear regression model is valid (i.e. enough measures)
															
 
																-\var double starpu_perfmodel_regression_model::a
															
 
																-    estimated = a size ^b + c
															
 
																-\var double starpu_perfmodel_regression_model::b
															
 
																-    estimated = a size ^b + c
															
 
																-\var double starpu_perfmodel_regression_model::c
															
 
																-    estimated = a size ^b + c
															
 
																-\var unsigned starpu_perfmodel_regression_model::nl_valid
															
 
																-    whether the non-linear regression model is valid (i.e. enough measures)
															
 
																-\var unsigned starpu_perfmodel_regression_model::nsample
															
 
																-    number of sample values for non-linear regression
															
 
																-\var double starpu_perfmodel_regression_model::coeff[]
															
 
																-    list of computed coefficients for multiple linear regression model
															
 
																-\var double starpu_perfmodel_regression_model::ncoeff
															
 
																-    number of coefficients for multiple linear regression model
															
 
																-\var double starpu_perfmodel_regression_model::multi_valid
															
 
																-    whether the multiple linear regression model is valid
															
 
																-
															
 
																-\struct starpu_perfmodel_per_arch
															
 
																-contains information about the performance model of a given
															
 
																-arch.
															
 
																-\ingroup API_Performance_Model
															
 
																-\var starpu_perfmodel_per_arch_cost_function starpu_perfmodel_per_arch::cost_function
															
 
																-    Used by ::STARPU_PER_ARCH, must point to functions which take a
															
 
																-    task, the target arch and implementation number (as mere
															
 
																-    conveniency, since the array is already indexed by these), and
															
 
																-    must return a task duration estimation in micro-seconds.
															
 
																-\var starpu_perfmodel_per_arch_size_base starpu_perfmodel_per_arch::size_base
															
 
																-    Same as in structure starpu_perfmodel, but per-arch, in case it
															
 
																-    depends on the architecture-specific implementation.
															
 
																-\var struct starpu_perfmodel_history_table *starpu_perfmodel_per_arch::history
															
 
																-\private
															
 
																-    The history of performance measurements.
															
 
																-\var struct starpu_perfmodel_history_list *starpu_perfmodel_per_arch::list
															
 
																-\private
															
 
																-    Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and
															
 
																-    ::STARPU_MULTIPLE_REGRESSION_BASED, records all execution history
															
 
																-    measures.
															
 
																-\var struct starpu_perfmodel_regression_model starpu_perfmodel_per_arch::regression
															
 
																-\private
															
 
																-    Used by ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED
															
 
																-    and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated
															
 
																-    factors of the regression.
															
 
																-
															
 
																-\struct starpu_perfmodel_history_list
															
 
																-todo
															
 
																-\ingroup API_Performance_Model
															
 
																-\var struct starpu_perfmodel_history_list *starpu_perfmodel_history_list::next
															
 
																-    todo
															
 
																-\var struct starpu_perfmodel_history_entry *starpu_perfmodel_history_list::entry
															
 
																-    todo
															
 
																-
															
 
																-\struct starpu_perfmodel_history_entry
															
 
																-todo
															
 
																-\ingroup API_Performance_Model
															
 
																-\var double starpu_perfmodel_history_entry::mean
															
 
																-    mean_n = 1/n sum
															
 
																-\var double starpu_perfmodel_history_entry::deviation
															
 
																-    n dev_n = sum2 - 1/n (sum)^2
															
 
																-\var double starpu_perfmodel_history_entry::sum
															
 
																-    sum of samples (in µs)
															
 
																-\var double starpu_perfmodel_history_entry::sum2
															
 
																-    sum of samples^2
															
 
																-\var unsigned starpu_perfmodel_history_entry::nsample
															
 
																-    number of samples
															
 
																-\var uint32_t starpu_perfmodel_history_entry::footprint
															
 
																-    data footprint
															
 
																-\var size_t starpu_perfmodel_history_entry::size
															
 
																-    in bytes
															
 
																-\var double starpu_perfmodel_history_entry::flops
															
 
																-    Provided by the application
															
 
																-
															
 
																-\fn void starpu_perfmodel_init(struct starpu_perfmodel *model)
															
 
																-\ingroup API_Performance_Model
															
 
																-todo
															
 
																-
															
 
																-\fn void starpu_perfmodel_free_sampling_directories(void)
															
 
																-\ingroup API_Performance_Model
															
 
																-Free internal memory used for sampling directory
															
 
																-management. It should only be called by an application which is not
															
 
																-calling starpu_shutdown() as this function already calls it. See for
															
 
																-example <c>tools/starpu_perfmodel_display.c</c>.
															
 
																-
															
 
																-\fn int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model)
															
 
																-\ingroup API_Performance_Model
															
 
																-Load the performance model found in the file named \p filename. \p model has to be
															
 
																-completely zero, and will be filled with the information stored in the given file.
															
 
																-
															
 
																-\fn int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
															
 
																-\ingroup API_Performance_Model
															
 
																-Load a given performance model. \p model has to be
															
 
																-completely zero, and will be filled with the information stored in
															
 
																-<c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
															
 
																-external tools that want to read the performance model files.
															
 
																-
															
 
																-\fn int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
															
 
																-\ingroup API_Performance_Model
															
 
																-Unload \p model which has been previously loaded
															
 
																-through the function starpu_perfmodel_load_symbol()
															
 
																-
															
 
																-\fn void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl)
															
 
																-\ingroup API_Performance_Model
															
 
																-Return the path to the debugging information for the performance model.
															
 
																-
															
 
																-\fn char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
															
 
																-\ingroup API_Performance_Model
															
 
																-todo
															
 
																-
															
 
																-\fn void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl)
															
 
																-\ingroup API_Performance_Model
															
 
																-Return the architecture name for \p arch
															
 
																-
															
 
																-\fn struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id)
															
 
																-\ingroup API_Performance_Model
															
 
																-Return the architecture type of the worker \p workerid.
															
 
																-
															
 
																-\fn void starpu_perfmodel_initialize(void)
															
 
																-\ingroup API_Performance_Model
															
 
																-If starpu_init is not used, starpu_perfmodel_initialize should be used before calling starpu_perfmodel_* functions.
															
 
																-
															
 
																-\fn int starpu_perfmodel_list(FILE *output)
															
 
																-\ingroup API_Performance_Model
															
 
																-Print a list of all performance models on \p output
															
 
																-
															
 
																-\fn void starpu_perfmodel_directory(FILE *output)
															
 
																-\ingroup API_Performance_Model
															
 
																-Print the directory name storing performance models on \p output
															
 
																-
															
 
																-\fn void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
															
 
																-\ingroup API_Performance_Model
															
 
																-todo
															
 
																-
															
 
																-\fn int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output)
															
 
																-\ingroup API_Performance_Model
															
 
																-todo
															
 
																-
															
 
																-\fn int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output)
															
 
																-\ingroup API_Performance_Model
															
 
																-todo
															
 
																-
															
 
																-\fn void starpu_bus_print_bandwidth(FILE *f)
															
 
																-\ingroup API_Performance_Model
															
 
																-Print a matrix of bus bandwidths on \p f.
															
 
																-
															
 
																-\fn void starpu_bus_print_affinity(FILE *f)
															
 
																-\ingroup API_Performance_Model
															
 
																-Print the affinity devices on \p f.
															
 
																-
															
 
																-\fn void starpu_bus_print_filenames(FILE *f)
															
 
																-\ingroup API_Performance_Model
															
 
																-Print on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
															
 
																-
															
 
																-\fn void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
															
 
																-\ingroup API_Performance_Model
															
 
																-Feed the performance model model with an explicit
															
 
																-measurement measured (in µs), in addition to measurements done by StarPU
															
 
																-itself. This can be useful when the application already has an
															
 
																-existing set of measurements done in good conditions, that StarPU
															
 
																-could benefit from instead of doing on-line measurements. An example
															
 
																-of use can be seen in \ref PerformanceModelExample.
															
 
																-
															
 
																-\fn double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node)
															
 
																-\ingroup API_Performance_Model
															
 
																-Return the bandwidth of data transfer between two memory nodes
															
 
																-
															
 
																-\fn double starpu_transfer_latency(unsigned src_node, unsigned dst_node)
															
 
																-\ingroup API_Performance_Model
															
 
																-Return the latency of data transfer between two memory nodes
															
 
																-
															
 
																-\fn double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size)
															
 
																-\ingroup API_Performance_Model
															
 
																-Return the estimated time to transfer a given size between two memory nodes.
															
 
																-
															
 
																-\fn double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint)
															
 
																-\ingroup API_Performance_Model
															
 
																-Return the estimated time of a task with the given model and the given footprint.
															
 
																-
															
 
																-\var starpu_perfmodel_nop
															
 
																-Performance model which just always return 1µs.
															
 
																-
															
 
																-*/
															
--- a/doc/doxygen/chapters/api/profiling.doxy
+++ b/doc/doxygen/chapters/api/profiling.doxy
@@ -1,202 +0,0 @@
 
																-/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																- *
															
 
																- * Copyright (C) 2010-2015,2017                           CNRS
															
 
																- * Copyright (C) 2009-2011,2014,2016,2018-2019            Université de Bordeaux
															
 
																- * Copyright (C) 2011,2012                                Inria
															
 
																- *
															
 
																- * StarPU is free software; you can redistribute it and/or modify
															
 
																- * it under the terms of the GNU Lesser General Public License as published by
															
 
																- * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																- * your option) any later version.
															
 
																- *
															
 
																- * StarPU is distributed in the hope that it will be useful, but
															
 
																- * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																- *
															
 
																- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																- */
															
 
																-
															
 
																-/*! \defgroup API_Profiling Profiling
															
 
																-
															
 
																-\struct starpu_profiling_task_info
															
 
																-\ingroup API_Profiling
															
 
																-This structure contains information about the execution of a
															
 
																-task. It is accessible from the field starpu_task::profiling_info if
															
 
																-profiling was enabled.
															
 
																-\var struct timespec starpu_profiling_task_info::submit_time
															
 
																-    Date of task submission (relative to the initialization of StarPU).
															
 
																-
															
 
																-\var struct timespec starpu_profiling_task_info::push_start_time
															
 
																-    Time when the task was submitted to the scheduler.
															
 
																-
															
 
																-\var struct timespec starpu_profiling_task_info::push_end_time
															
 
																-    Time when the scheduler finished with the task submission.
															
 
																-
															
 
																-\var struct timespec starpu_profiling_task_info::pop_start_time
															
 
																-    Time when the scheduler started to be requested for a task, and eventually gave that task.
															
 
																-
															
 
																-\var struct timespec starpu_profiling_task_info::pop_end_time
															
 
																-    Time when the scheduler finished providing the task for execution.
															
 
																-
															
 
																-\var struct timespec starpu_profiling_task_info::acquire_data_start_time
															
 
																-    Time when the worker started fetching input data.
															
 
																-
															
 
																-\var struct timespec starpu_profiling_task_info::acquire_data_end_time
															
 
																-    Time when the worker finished fetching input data.
															
 
																-
															
 
																-\var struct timespec starpu_profiling_task_info::start_time
															
 
																-    Date of task execution beginning (relative to the initialization of StarPU).
															
 
																-
															
 
																-\var struct timespec starpu_profiling_task_info::end_time
															
 
																-    Date of task execution termination (relative to the initialization of StarPU).
															
 
																-
															
 
																-\var struct timespec starpu_profiling_task_info::release_data_start_time
															
 
																-    Time when the worker started releasing data.
															
 
																-
															
 
																-\var struct timespec starpu_profiling_task_info::release_data_end_time
															
 
																-    Time when the worker finished releasing data.
															
 
																-
															
 
																-\var struct timespec starpu_profiling_task_info::callback_start_time
															
 
																-    Time when the worker started the application callback for the task.
															
 
																-
															
 
																-\var struct timespec starpu_profiling_task_info::callback_end_time
															
 
																-    Time when the worker finished the application callback for the task.
															
 
																-
															
 
																-\var int starpu_profiling_task_info::workerid
															
 
																-    Identifier of the worker which has executed the task.
															
 
																-
															
 
																-\var uint64_t starpu_profiling_task_info::used_cycles
															
 
																-    Number of cycles used by the task, only available in the MoviSim
															
 
																-
															
 
																-\var uint64_t starpu_profiling_task_info::stall_cycles
															
 
																-    Number of cycles stalled within the task, only available in the MoviSim
															
 
																-
															
 
																-\var double starpu_profiling_task_info::energy_consumed
															
 
																-Energy consumed by the task, in Joules
															
 
																-
															
 
																-\struct starpu_profiling_worker_info
															
 
																-This structure contains the profiling information associated to
															
 
																-a worker. The timing is provided since the previous call to
															
 
																-starpu_profiling_worker_get_info()
															
 
																-\ingroup API_Profiling
															
 
																-\var struct timespec starpu_profiling_worker_info::start_time
															
 
																-        Starting date for the reported profiling measurements.
															
 
																-\var struct timespec starpu_profiling_worker_info::total_time
															
 
																-        Duration of the profiling measurement interval.
															
 
																-\var struct timespec starpu_profiling_worker_info::executing_time
															
 
																-        Time spent by the worker to execute tasks during the profiling measurement interval.
															
 
																-\var struct timespec starpu_profiling_worker_info::sleeping_time
															
 
																-        Time spent idling by the worker during the profiling measurement interval.
															
 
																-\var int starpu_profiling_worker_info::executed_tasks
															
 
																-        Number of tasks executed by the worker during the profiling measurement interval.
															
 
																-\var uint64_t starpu_profiling_worker_info::used_cycles
															
 
																-        Number of cycles used by the worker, only available in the MoviSim
															
 
																-\var uint64_t starpu_profiling_worker_info::stall_cycles
															
 
																-        Number of cycles stalled within the worker, only available in the MoviSim
															
 
																-\var double starpu_profiling_worker_info::energy_consumed
															
 
																-        Energy consumed by the worker, in Joules
															
 
																-
															
 
																-\struct starpu_profiling_bus_info
															
 
																-todo
															
 
																-\ingroup API_Profiling
															
 
																-\var struct timespec starpu_profiling_bus_info::start_time
															
 
																-        Time of bus profiling startup.
															
 
																-\var struct timespec starpu_profiling_bus_info::total_time
															
 
																-        Total time of bus profiling.
															
 
																-\var int long long starpu_profiling_bus_info::transferred_bytes
															
 
																-        Number of bytes transferred during profiling.
															
 
																-\var int starpu_profiling_bus_info::transfer_count
															
 
																-        Number of transfers during profiling.
															
 
																-
															
 
																-\typedef STARPU_PROFILING_DISABLE
															
 
																-\ingroup API_Profiling
															
 
																-Used when calling the function starpu_profiling_status_set() to disable profiling.
															
 
																-
															
 
																-\typedef STARPU_PROFILING_ENABLE
															
 
																-\ingroup API_Profiling
															
 
																-Used when calling the function starpu_profiling_status_set() to enable profiling.
															
 
																-
															
 
																-\fn int starpu_profiling_status_set(int status)
															
 
																-\ingroup API_Profiling
															
 
																-Set the profiling status. Profiling is activated
															
 
																-by passing \ref STARPU_PROFILING_ENABLE in \p status. Passing
															
 
																-\ref STARPU_PROFILING_DISABLE disables profiling. Calling this function
															
 
																-resets all profiling measurements. When profiling is enabled, the
															
 
																-field starpu_task::profiling_info points to a valid structure
															
 
																-starpu_profiling_task_info containing information about the execution
															
 
																-of the task. Negative return values indicate an error, otherwise the
															
 
																-previous status is returned.
															
 
																-
															
 
																-\fn int starpu_profiling_status_get(void)
															
 
																-\ingroup API_Profiling
															
 
																-Return the current profiling status or a negative value in case
															
 
																-there was an error.
															
 
																-
															
 
																-\fn void starpu_profiling_init(void)
															
 
																-\ingroup API_Profiling
															
 
																-Reset performance counters and enable profiling if the
															
 
																-environment variable \ref STARPU_PROFILING is set to a positive value.
															
 
																-
															
 
																-\fn void starpu_profiling_set_id(int new_id)
															
 
																-\ingroup API_Profiling
															
 
																-Set the ID used for profiling trace filename. HAS to be called before starpu_init().
															
 
																-
															
 
																-\fn int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *worker_info)
															
 
																-\ingroup API_Profiling
															
 
																-Get the profiling info associated to the worker identified by
															
 
																-\p workerid, and reset the profiling measurements. If the argument \p
															
 
																-worker_info is <c>NULL</c>, only reset the counters associated to worker
															
 
																-\p workerid. Upon successful completion, this function returns 0.
															
 
																-Otherwise, a negative value is returned.
															
 
																-
															
 
																-\fn int starpu_bus_get_profiling_info(int busid, struct starpu_profiling_bus_info *bus_info)
															
 
																-\ingroup API_Profiling
															
 
																-
															
 
																-todo
															
 
																-
															
 
																-See _starpu_profiling_bus_helper_display_summary in src/profiling/profiling_helpers.c for a usage example.
															
 
																-Note that calling starpu_bus_get_profiling_info resets the counters to zero.
															
 
																-
															
 
																-\fn int starpu_bus_get_count(void)
															
 
																-\ingroup API_Profiling
															
 
																-Return the number of buses in the machine
															
 
																-
															
 
																-\fn int starpu_bus_get_id(int src, int dst)
															
 
																-\ingroup API_Profiling
															
 
																-Return the identifier of the bus between \p src and \p dst
															
 
																-
															
 
																-\fn int starpu_bus_get_src(int busid)
															
 
																-\ingroup API_Profiling
															
 
																-Return the source point of bus \p busid
															
 
																-
															
 
																-\fn int starpu_bus_get_dst(int busid)
															
 
																-\ingroup API_Profiling
															
 
																-Return the destination point of bus \p busid
															
 
																-
															
 
																-\fn double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end)
															
 
																-\ingroup API_Profiling
															
 
																-Return the time elapsed between \p start and \p end in microseconds.
															
 
																-
															
 
																-\fn double starpu_timing_timespec_to_us(struct timespec *ts)
															
 
																-\ingroup API_Profiling
															
 
																-Convert the given timespec \p ts into microseconds
															
 
																-
															
 
																-\fn void starpu_profiling_bus_helper_display_summary(void)
															
 
																-\ingroup API_Profiling
															
 
																-Display statistics about the bus on \c stderr. if the environment
															
 
																-variable \ref STARPU_BUS_STATS is defined. The function is called
															
 
																-automatically by starpu_shutdown().
															
 
																-
															
 
																-\fn void starpu_profiling_worker_helper_display_summary(void)
															
 
																-\ingroup API_Profiling
															
 
																-Displays statistic about the workers on \c stderr if the
															
 
																-environment variable \ref STARPU_WORKER_STATS is defined. The function is
															
 
																-called automatically by starpu_shutdown().
															
 
																-
															
 
																-\fn void starpu_data_display_memory_stats()
															
 
																-\ingroup API_Profiling
															
 
																-Display statistics about the current data handles registered
															
 
																-within StarPU. StarPU must have been configured with the configure
															
 
																-option \ref enable-memory-stats "--enable-memory-stats" (see \ref MemoryFeedback).
															
 
																-
															
 
																-*/
															
--- a/doc/doxygen/chapters/api/scc_extensions.doxy
+++ b/doc/doxygen/chapters/api/scc_extensions.doxy
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2013,2015,2017                      CNRS
															
 
																+ * Copyright (C) 2010-2013,2015,2017,2019                      CNRS
															
 
																  * Copyright (C) 2009-2011,2014                           Université de Bordeaux
															
 
																  * Copyright (C) 2011,2012                                Inria
															
 
																  *
															
@@ -16,7 +16,7 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																-/*! \defgroup API_SCC_Extensions SCC Extensions
															
 
																+/*! \ingroup API_SCC_Extensions
															
 
																 \def STARPU_USE_SCC
															
 
																 \ingroup API_SCC_Extensions
															
@@ -28,20 +28,4 @@ It should be used in your code to detect the availability of SCC.
 
																 Define the maximum number of SCC devices that are
															
 
																 supported by StarPU.
															
 
																-\typedef starpu_scc_func_symbol_t
															
 
																-\ingroup API_SCC_Extensions
															
 
																-Type for SCC function symbols
															
 
																-
															
 
																-\fn int starpu_scc_register_kernel(starpu_scc_func_symbol_t *symbol, const char *func_name)
															
 
																-\ingroup API_SCC_Extensions
															
 
																-Initiate a lookup on each SCC device to find the adress of the
															
 
																-function named \p func_name, store them in the global array kernels
															
 
																-and return the index in the array through \p symbol.
															
 
																-
															
 
																-\fn starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t symbol)
															
 
																-\ingroup API_SCC_Extensions
															
 
																-If success, return the pointer to the function defined by \p symbol on
															
 
																-the device linked to the called device. This can for instance be used
															
 
																-in a starpu_scc_func_symbol_t implementation.
															
 
																-
															
 
																 */
															
--- a/include/starpu_bitmap.h
+++ b/include/starpu_bitmap.h
@@ -19,11 +19,10 @@
 
																 #ifndef __STARPU_BITMAP_H__
															
 
																 #define __STARPU_BITMAP_H__
															
 
																-/** @defgroup API_Bitmap Bitmap
															
 
																-
															
 
																-    @brief This is the interface for the bitmap utilities provided by StarPU.
															
 
																-
															
 
																-    @{
															
 
																+/**
															
 
																+   @defgroup API_Bitmap Bitmap
															
 
																+   @brief This is the interface for the bitmap utilities provided by StarPU.
															
 
																+   @{
															
 
																  */
															
 
																 #ifdef __cplusplus
															
@@ -33,7 +32,7 @@ extern "C"
 
																 /** create a empty starpu_bitmap */
															
 
																 struct starpu_bitmap *starpu_bitmap_create(void) STARPU_ATTRIBUTE_MALLOC;
															
 
																-/** free \b */
															
 
																+/** free \p b */
															
 
																 void starpu_bitmap_destroy(struct starpu_bitmap *b);
															
 
																 /** set bit \p e in \p b */
															
--- a/include/starpu_bound.h
+++ b/include/starpu_bound.h
@@ -18,12 +18,11 @@
 
																 #ifndef __STARPU_BOUND_H__
															
 
																 #define __STARPU_BOUND_H__
															
 
																-/** @defgroup API_Theoretical_Lower_Bound_on_Execution_Time Theoretical Lower Bound on Execution Time
															
 
																-
															
 
																-    @brief Compute theoretical upper computation efficiency bound corresponding to some actual execution.
															
 
																-
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_Theoretical_Lower_Bound_on_Execution_Time Theoretical Lower Bound on Execution Time
															
 
																+   @brief Compute theoretical upper computation efficiency bound corresponding to some actual execution.
															
 
																+   @{
															
 
																+*/
															
 
																 #include <stdio.h>
															
@@ -32,34 +31,46 @@ extern "C"
 
																 {
															
 
																 #endif
															
 
																-/** Start recording tasks (resets stats). \p deps tells whether dependencies should be recorded too (this is quite expensive) */
															
 
																+/**
															
 
																+   Start recording tasks (resets stats). \p deps tells whether
															
 
																+   dependencies should be recorded too (this is quite expensive)
															
 
																+*/
															
 
																 void starpu_bound_start(int deps, int prio);
															
 
																-/** Stop recording tasks */
															
 
																+
															
 
																+/**
															
 
																+   Stop recording tasks
															
 
																+*/
															
 
																 void starpu_bound_stop(void);
															
 
																-/** Emit the DAG that was recorded on \p output. */
															
 
																+/**
															
 
																+   Emit the DAG that was recorded on \p output.
															
 
																+*/
															
 
																 void starpu_bound_print_dot(FILE *output);
															
 
																-/** Get theoretical upper bound (in ms) (needs glpk support
															
 
																-    detected by configure script). It returns 0 if some performance models
															
 
																-    are not calibrated.
															
 
																+/**
															
 
																+   Get theoretical upper bound (in ms) (needs glpk support detected by
															
 
																+   configure script). It returns 0 if some performance models are not
															
 
																+   calibrated.
															
 
																 */
															
 
																 void starpu_bound_compute(double *res, double *integer_res, int integer);
															
 
																-/** Emit the Linear Programming system on \p output for the recorded
															
 
																-    tasks, in the lp format
															
 
																+/**
															
 
																+   Emit the Linear Programming system on \p output for the recorded
															
 
																+   tasks, in the lp format
															
 
																 */
															
 
																 void starpu_bound_print_lp(FILE *output);
															
 
																-/** Emit the Linear Programming system on \p output for the recorded
															
 
																-    tasks, in the mps format
															
 
																+/**
															
 
																+   Emit the Linear Programming system on \p output for the recorded
															
 
																+   tasks, in the mps format
															
 
																 */
															
 
																 void starpu_bound_print_mps(FILE *output);
															
 
																-/** Emit on \p output the statistics of actual execution vs theoretical upper bound.
															
 
																-    \p integer permits to choose between integer solving (which takes a
															
 
																-    long time but is correct), and relaxed solving (which provides an
															
 
																-    approximate solution).
															
 
																+/**
															
 
																+   Emit on \p output the statistics of actual execution vs theoretical
															
 
																+   upper bound. \p integer permits to choose between integer solving
															
 
																+   (which takes a long time but is correct), and relaxed solving
															
 
																+   (which provides an approximate solution).
															
 
																 */
															
 
																 void starpu_bound_print(FILE *output, int integer);
															
--- a/include/starpu_clusters.h
+++ b/include/starpu_clusters.h
@@ -19,9 +19,9 @@
 
																 #ifndef __STARPU_CLUSTERS_UTIL_H__
															
 
																 #define __STARPU_CLUSTERS_UTIL_H__
															
 
																-/** @defgroup API_Clustering_Machine Clustering Machine
															
 
																-
															
 
																-    @{
															
 
																+/**
															
 
																+   @defgroup API_Clustering_Machine Clustering Machine
															
 
																+   @{
															
 
																  */
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
--- a/include/starpu_cublas.h
+++ b/include/starpu_cublas.h
@@ -18,9 +18,9 @@
 
																 #ifndef __STARPU_CUBLAS_H__
															
 
																 #define __STARPU_CUBLAS_H__
															
 
																-/** @ingroup API_CUDA_Extensions
															
 
																-
															
 
																-    @{
															
 
																+/**
															
 
																+   @ingroup API_CUDA_Extensions
															
 
																+   @{
															
 
																  */
															
 
																 #ifdef __cplusplus
															
@@ -38,10 +38,11 @@ extern "C"
 
																 void starpu_cublas_init(void);
															
 
																 /**
															
 
																-   Set the proper CUBLAS stream for CUBLAS v1. This must be called from the CUDA
															
 
																-   codelet before calling CUBLAS v1 kernels, so that they are queued on the proper
															
 
																-   CUDA stream. When using one thread per CUDA worker, this function does not
															
 
																-   do anything since the CUBLAS stream does not change, and is set once by
															
 
																+   Set the proper CUBLAS stream for CUBLAS v1. This must be called
															
 
																+   from the CUDA codelet before calling CUBLAS v1 kernels, so that
															
 
																+   they are queued on the proper CUDA stream. When using one thread
															
 
																+   per CUDA worker, this function does not do anything since the
															
 
																+   CUBLAS stream does not change, and is set once by
															
 
																    starpu_cublas_init().
															
 
																 */
															
 
																 void starpu_cublas_set_stream(void);
															
--- a/include/starpu_cublas_v2.h
+++ b/include/starpu_cublas_v2.h
@@ -18,9 +18,9 @@
 
																 #ifndef __STARPU_CUBLAS_V2_H__
															
 
																 #define __STARPU_CUBLAS_V2_H__
															
 
																-/** @ingroup API_CUDA_Extensions
															
 
																-
															
 
																-    @{
															
 
																+/**
															
 
																+   @ingroup API_CUDA_Extensions
															
 
																+   @{
															
 
																  */
															
 
																 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
															
@@ -33,8 +33,8 @@ extern "C"
 
																 #endif
															
 
																 /**
															
 
																-   Return the CUSPARSE handle to be used to queue CUSPARSE
															
 
																-   kernels. It is properly initialized and configured for multistream by
															
 
																+   Return the CUSPARSE handle to be used to queue CUSPARSE kernels. It
															
 
																+   is properly initialized and configured for multistream by
															
 
																    starpu_cusparse_init().
															
 
																 */
															
 
																 cublasHandle_t starpu_cublas_get_local_handle(void);
															
--- a/include/starpu_cuda.h
+++ b/include/starpu_cuda.h
@@ -19,9 +19,9 @@
 
																 #ifndef __STARPU_CUDA_H__
															
 
																 #define __STARPU_CUDA_H__
															
 
																-/** @defgroup API_CUDA_Extensions CUDA Extensions
															
 
																-
															
 
																-    @{
															
 
																+/**
															
 
																+   @defgroup API_CUDA_Extensions CUDA Extensions
															
 
																+   @{
															
 
																  */
															
 
																 #include <starpu_config.h>
															
@@ -36,49 +36,60 @@ extern "C"
 
																 {
															
 
																 #endif
															
 
																-/** Report a CUBLAS error. */
															
 
																+/**
															
 
																+   Report a CUBLAS error.
															
 
																+*/
															
 
																 void starpu_cublas_report_error(const char *func, const char *file, int line, int status);
															
 
																-/** Calls starpu_cublas_report_error(), passing the current function, file and line position.*/
															
 
																+/**
															
 
																+   Call starpu_cublas_report_error(), passing the current function, file and line position.
															
 
																+*/
															
 
																 #define STARPU_CUBLAS_REPORT_ERROR(status) starpu_cublas_report_error(__starpu_func__, __FILE__, __LINE__, status)
															
 
																-/** Report a CUDA error. */
															
 
																+/**
															
 
																+   Report a CUDA error.
															
 
																+*/
															
 
																 void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status);
															
 
																-/** Calls starpu_cuda_report_error(), passing the current function, file and line position.*/
															
 
																+/**
															
 
																+   Call starpu_cuda_report_error(), passing the current function, file and line position.
															
 
																+*/
															
 
																 #define STARPU_CUDA_REPORT_ERROR(status) starpu_cuda_report_error(__starpu_func__, __FILE__, __LINE__, status)
															
 
																 /**
															
 
																-    Return the current worker’s CUDA stream. StarPU
															
 
																-    provides a stream for every CUDA device controlled by StarPU. This
															
 
																-    function is only provided for convenience so that programmers can
															
 
																-    easily use asynchronous operations within codelets without having to
															
 
																-    create a stream by hand. Note that the application is not forced to
															
 
																-    use the stream provided by starpu_cuda_get_local_stream() and may also
															
 
																-    create its own streams. Synchronizing with <c>cudaThreadSynchronize()</c> is
															
 
																-    allowed, but will reduce the likelihood of having all transfers
															
 
																-    overlapped.
															
 
																+   Return the current worker’s CUDA stream. StarPU provides a stream
															
 
																+   for every CUDA device controlled by StarPU. This function is only
															
 
																+   provided for convenience so that programmers can easily use
															
 
																+   asynchronous operations within codelets without having to create a
															
 
																+   stream by hand. Note that the application is not forced to use the
															
 
																+   stream provided by starpu_cuda_get_local_stream() and may also
															
 
																+   create its own streams. Synchronizing with
															
 
																+   <c>cudaThreadSynchronize()</c> is allowed, but will reduce the
															
 
																+   likelihood of having all transfers overlapped.
															
 
																 */
															
 
																 cudaStream_t starpu_cuda_get_local_stream(void);
															
 
																-/** Return a pointer to device properties for worker \p workerid (assumed to be a CUDA worker). */
															
 
																+/**
															
 
																+   Return a pointer to device properties for worker \p workerid
															
 
																+   (assumed to be a CUDA worker).
															
 
																+*/
															
 
																 const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid);
															
 
																 /**
															
 
																-    Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
															
 
																-    to the pointer \p dst_ptr on \p dst_node. The function first tries to
															
 
																-    copy the data asynchronous (unless \p stream is <c>NULL</c>). If the
															
 
																-    asynchronous copy fails or if \p stream is <c>NULL</c>, it copies the
															
 
																-    data synchronously. The function returns <c>-EAGAIN</c> if the
															
 
																-    asynchronous launch was successfull. It returns 0 if the synchronous
															
 
																-    copy was successful, or fails otherwise.
															
 
																+   Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
															
 
																+   to the pointer \p dst_ptr on \p dst_node. The function first tries to
															
 
																+   copy the data asynchronous (unless \p stream is <c>NULL</c>). If the
															
 
																+   asynchronous copy fails or if \p stream is <c>NULL</c>, it copies the
															
 
																+   data synchronously. The function returns <c>-EAGAIN</c> if the
															
 
																+   asynchronous launch was successfull. It returns 0 if the synchronous
															
 
																+   copy was successful, or fails otherwise.
															
 
																 */
															
 
																 int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind);
															
 
																 /**
															
 
																-    Calls <c>cudaSetDevice(\p devid)</c> or <c>cudaGLSetGLDevice(\p devid)</c>,
															
 
																-    according to whether \p devid is among the field
															
 
																-    starpu_conf::cuda_opengl_interoperability.
															
 
																+   Call <c>cudaSetDevice(\p devid)</c> or <c>cudaGLSetGLDevice(\p devid)</c>,
															
 
																+   according to whether \p devid is among the field
															
 
																+   starpu_conf::cuda_opengl_interoperability.
															
 
																 */
															
 
																 void starpu_cuda_set_device(unsigned devid);
															
--- a/include/starpu_cusparse.h
+++ b/include/starpu_cusparse.h
@@ -18,10 +18,10 @@
 
																 #ifndef __STARPU_CUSPARSE_H__
															
 
																 #define __STARPU_CUSPARSE_H__
															
 
																-/** @ingroup API_CUDA_Extensions
															
 
																-
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @ingroup API_CUDA_Extensions
															
 
																+   @{
															
 
																+*/
															
 
																 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
															
 
																 #include <cusparse.h>
															
@@ -40,14 +40,14 @@ extern "C"
 
																 void starpu_cusparse_init(void);
															
 
																 /**
															
 
																-   Synchronously deinitialize the CUSPARSE library on
															
 
																+   @brief Synchronously deinitialize the CUSPARSE library on
															
 
																    every CUDA device.
															
 
																 */
															
 
																 void starpu_cusparse_shutdown(void);
															
 
																 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
															
 
																 /**
															
 
																-   Return the CUSPARSE handle to be used to queue CUSPARSE
															
 
																+   @brief Return the CUSPARSE handle to be used to queue CUSPARSE
															
 
																    kernels. It is properly initialized and configured for multistream by
															
 
																    starpu_cusparse_init().
															
 
																 */
															
--- a/include/starpu_data.h
+++ b/include/starpu_data.h
@@ -19,14 +19,13 @@
 
																 #ifndef __STARPU_DATA_H__
															
 
																 #define __STARPU_DATA_H__
															
 
																-/** @defgroup API_Data_Management Data Management
															
 
																-
															
 
																-    @brief Data management facilities provided by StarPU. We show how
															
 
																-    to use existing data interfaces in \ref API_Data_Interfaces, but
															
 
																-    developers can design their own data interfaces if required.
															
 
																-
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_Data_Management Data Management
															
 
																+   @brief Data management facilities provided by StarPU. We show how
															
 
																+   to use existing data interfaces in \ref API_Data_Interfaces, but
															
 
																+   developers can design their own data interfaces if required.
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu.h>
															
@@ -52,12 +51,12 @@ struct _starpu_data_state;
 
																 typedef struct _starpu_data_state* starpu_data_handle_t;
															
 
																 /**
															
 
																-    Describe a StarPU data access mode
															
 
																+   Describe a StarPU data access mode
															
 
																-    Note: when adding a flag here, update
															
 
																-    _starpu_detect_implicit_data_deps_with_handle
															
 
																+   Note: when adding a flag here, update
															
 
																+   _starpu_detect_implicit_data_deps_with_handle
															
 
																-    Note: other STARPU_* values in include/starpu_task_util.h
															
 
																+   Note: other STARPU_* values in include/starpu_task_util.h
															
 
																  */
															
 
																 enum starpu_data_access_mode
															
 
																 {
															
--- a/include/starpu_data_filters.h
+++ b/include/starpu_data_filters.h
@@ -21,10 +21,10 @@
 
																 #ifndef __STARPU_DATA_FILTERS_H__
															
 
																 #define __STARPU_DATA_FILTERS_H__
															
 
																-/** @defgroup API_Data_Partition Data Partition
															
 
																-
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_Data_Partition Data Partition
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu.h>
															
 
																 #include <stdarg.h>
															
@@ -36,7 +36,9 @@ extern "C"
 
																 struct starpu_data_interface_ops;
															
 
																-/** Describe a data partitioning operation, to be given to starpu_data_partition() */
															
 
																+/**
															
 
																+   Describe a data partitioning operation, to be given to starpu_data_partition()
															
 
																+*/
															
 
																 struct starpu_data_filter
															
 
																 {
															
 
																 	/**
															
@@ -99,37 +101,38 @@ struct starpu_data_filter
 
																 	void *filter_arg_ptr;
															
 
																 };
															
 
																-/** @name Basic API
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Basic API
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																-    Request the partitioning of \p initial_handle into several subdata
															
 
																-    according to the filter \p f.
															
 
																-    Here an example of how to use the function.
															
 
																-    \code{.c}
															
 
																-    struct starpu_data_filter f =
															
 
																-    {
															
 
																-      .filter_func = starpu_matrix_filter_block,
															
 
																-      .nchildren = nslicesx
															
 
																-    };
															
 
																-    starpu_data_partition(A_handle, &f);
															
 
																+   Request the partitioning of \p initial_handle into several subdata
															
 
																+   according to the filter \p f.
															
 
																+
															
 
																+   Here an example of how to use the function.
															
 
																+   \code{.c}
															
 
																+   struct starpu_data_filter f =
															
 
																+   {
															
 
																+     .filter_func = starpu_matrix_filter_block,
															
 
																+     .nchildren = nslicesx
															
 
																+   };
															
 
																+   starpu_data_partition(A_handle, &f);
															
 
																     \endcode
															
 
																 */
															
 
																 void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f);
															
 
																 /**
															
 
																-   Unapply the filter which has been applied to \p root_data, thus
															
 
																-   unpartitioning the data. The pieces of data are collected back into
															
 
																-   one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM).
															
 
																-   Tasks working on the partitioned data will be waited for
															
 
																-   by starpu_data_unpartition().
															
 
																+  Unapply the filter which has been applied to \p root_data, thus
															
 
																+  unpartitioning the data. The pieces of data are collected back into
															
 
																+  one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM).
															
 
																+  Tasks working on the partitioned data will be waited for
															
 
																+  by starpu_data_unpartition().
															
 
																-   Here an example of how to use the function.
															
 
																-   \code{.c}
															
 
																-   starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
															
 
																-   \endcode
															
 
																+  Here an example of how to use the function.
															
 
																+  \code{.c}
															
 
																+  starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
															
 
																+  \endcode
															
 
																 */
															
 
																 void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node);
															
@@ -182,10 +185,10 @@ void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters,
 
																 /** @} */
															
 
																-/** @name Asynchronous API
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Asynchronous API
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																    Plan to partition \p initial_handle into several subdata according to
															
@@ -303,44 +306,47 @@ void starpu_data_partition_not_automatic(starpu_data_handle_t handle);
 
																 /** @} */
															
 
																-/** @name Predefined BCSR Filter Functions
															
 
																- * Predefined partitioning functions for BCSR data. Examples on how to
															
 
																- * use them are shown in \ref PartitioningData.
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Predefined BCSR Filter Functions
															
 
																+   Predefined partitioning functions for BCSR data. Examples on how to
															
 
																+   use them are shown in \ref PartitioningData.
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																    Partition a block-sparse matrix into dense matrices.
															
 
																- */
															
 
																+*/
															
 
																 void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
 
																 /** @} */
															
 
																-/** @name Predefined CSR Filter Functions
															
 
																- * Predefined partitioning functions for CSR data. Examples on how to
															
 
																- * use them are shown in \ref PartitioningData.
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Predefined CSR Filter Functions
															
 
																+   Predefined partitioning functions for CSR data. Examples on how to
															
 
																+   use them are shown in \ref PartitioningData.
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																    Partition a block-sparse matrix into vertical block-sparse matrices.
															
 
																- */
															
 
																+*/
															
 
																 void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
 
																 /** @} */
															
 
																-/** @name Predefined Matrix Filter Functions
															
 
																- * Predefined partitioning functions for matrix
															
 
																- * data. Examples on how to use them are shown in \ref
															
 
																- * PartitioningData.
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Predefined Matrix Filter Functions
															
 
																+   Predefined partitioning functions for matrix
															
 
																+   data. Examples on how to use them are shown in \ref
															
 
																+   PartitioningData.
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																    Partition a dense Matrix along the x dimension, thus getting (x/\p
															
 
																    nparts ,y) matrices. If \p nparts does not divide x, the last
															
 
																    submatrix contains the remainder.
															
 
																- */
															
 
																+*/
															
 
																 void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
 
																 /**
															
@@ -353,14 +359,14 @@ void starpu_matrix_filter_block(void *father_interface, void *child_interface, s
 
																    only be used for read-only access, as no coherency is enforced for the
															
 
																    shadowed parts. A usage example is available in
															
 
																    examples/filters/shadow2d.c
															
 
																- */
															
 
																+*/
															
 
																 void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
 
																 /**
															
 
																    Partition a dense Matrix along the y dimension, thus getting
															
 
																    (x,y/\p nparts) matrices. If \p nparts does not divide y, the last
															
 
																    submatrix contains the remainder.
															
 
																- */
															
 
																+*/
															
 
																 void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
 
																 /**
															
@@ -377,18 +383,19 @@ void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *ch
 
																 /** @} */
															
 
																-/** @name Predefined Vector Filter Functions
															
 
																- * Predefined partitioning functions for vector
															
 
																- * data. Examples on how to use them are shown in \ref
															
 
																- * PartitioningData.
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Predefined Vector Filter Functions
															
 
																+   Predefined partitioning functions for vector
															
 
																+   data. Examples on how to use them are shown in \ref
															
 
																+   PartitioningData.
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																    Return in \p child_interface the \p id th element of the vector
															
 
																    represented by \p father_interface once partitioned in \p nparts chunks of
															
 
																    equal size.
															
 
																- */
															
 
																+*/
															
 
																 void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
 
																 /**
															
@@ -411,39 +418,40 @@ void starpu_vector_filter_block_shadow(void *father_interface, void *child_inter
 
																    <c>filter_arg_ptr</c> field must point to an array of \p nparts long
															
 
																    elements, each of which specifies the number of elements in each chunk
															
 
																    of the partition.
															
 
																- */
															
 
																+*/
															
 
																 void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
 
																 /**
															
 
																-   Return in \p child_interface the \p id th element of the vector
															
 
																-   represented by \p father_interface once partitioned into \p nparts chunks
															
 
																-   according to the <c>filter_arg_ptr</c> field of \p f. The
															
 
																-   <c>filter_arg_ptr</c> field must point to an array of \p nparts uint32_t
															
 
																-   elements, each of which specifies the number of elements in each chunk
															
 
																-   of the partition.
															
 
																- */
															
 
																+  Return in \p child_interface the \p id th element of the vector
															
 
																+  represented by \p father_interface once partitioned into \p nparts chunks
															
 
																+  according to the <c>filter_arg_ptr</c> field of \p f. The
															
 
																+  <c>filter_arg_ptr</c> field must point to an array of \p nparts uint32_t
															
 
																+  elements, each of which specifies the number of elements in each chunk
															
 
																+  of the partition.
															
 
																+*/
															
 
																 void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
 
																 /**
															
 
																    Return in \p child_interface the \p id th element of the vector
															
 
																    represented by \p father_interface once partitioned in <c>2</c> chunks of
															
 
																    equal size, ignoring nparts. Thus, \p id must be <c>0</c> or <c>1</c>.
															
 
																- */
															
 
																+*/
															
 
																 void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
 
																 /** @} */
															
 
																-/** @name Predefined Block Filter Functions
															
 
																- * Predefined partitioning functions for block data. Examples on how
															
 
																- * to use them are shown in \ref PartitioningData. An example is
															
 
																- * available in \c examples/filters/shadow3d.c
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Predefined Block Filter Functions
															
 
																+   Predefined partitioning functions for block data. Examples on how
															
 
																+   to use them are shown in \ref PartitioningData. An example is
															
 
																+   available in \c examples/filters/shadow3d.c
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																-   Partition a block along the X dimension, thus getting
															
 
																-   (x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last
															
 
																-   submatrix contains the remainder.
															
 
																+  Partition a block along the X dimension, thus getting
															
 
																+  (x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last
															
 
																+  submatrix contains the remainder.
															
 
																  */
															
 
																 void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
@@ -475,14 +483,14 @@ void starpu_block_filter_vertical_block(void *father_interface, void *child_inte
 
																    <b>IMPORTANT</b>:
															
 
																    This can only be used for read-only access, as no coherency is
															
 
																    enforced for the shadowed parts.
															
 
																- */
															
 
																+*/
															
 
																 void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
 
																 /**
															
 
																    Partition a block along the Z dimension, thus getting
															
 
																    (x,y,z/\p nparts) blocks. If \p nparts does not divide z, the last
															
 
																    submatrix contains the remainder.
															
 
																- */
															
 
																+*/
															
 
																 void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
 
																 /**
															
@@ -494,7 +502,7 @@ void starpu_block_filter_depth_block(void *father_interface, void *child_interfa
 
																    <b>IMPORTANT</b>:
															
 
																    This can only be used for read-only access, as no coherency is
															
 
																    enforced for the shadowed parts.
															
 
																- */
															
 
																+*/
															
 
																 void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
															
 
																 /** @} */
															
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
@@ -19,53 +19,53 @@
 
																 #ifndef __STARPU_DATA_INTERFACES_H__
															
 
																 #define __STARPU_DATA_INTERFACES_H__
															
 
																-/** @defgroup API_Data_Interfaces Data Interfaces
															
 
																-
															
 
																-    @brief Data management is done at a high-level in StarPU: rather than
															
 
																-    accessing a mere list of contiguous buffers, the tasks may manipulate
															
 
																-    data that are described by a high-level construct which we call data
															
 
																-    interface.
															
 
																-
															
 
																-    An example of data interface is the "vector" interface which describes
															
 
																-    a contiguous data array on a spefic memory node. This interface is a
															
 
																-    simple structure containing the number of elements in the array, the
															
 
																-    size of the elements, and the address of the array in the appropriate
															
 
																-    address space (this address may be invalid if there is no valid copy
															
 
																-    of the array in the memory node). More informations on the data
															
 
																-    interfaces provided by StarPU are given in \ref API_Data_Interfaces.
															
 
																-
															
 
																-    When a piece of data managed by StarPU is used by a task, the task
															
 
																-    implementation is given a pointer to an interface describing a valid
															
 
																-    copy of the data that is accessible from the current processing unit.
															
 
																-
															
 
																-    Every worker is associated to a memory node which is a logical
															
 
																-    abstraction of the address space from which the processing unit gets
															
 
																-    its data. For instance, the memory node associated to the different
															
 
																-    CPU workers represents main memory (RAM), the memory node associated
															
 
																-    to a GPU is DRAM embedded on the device. Every memory node is
															
 
																-    identified by a logical index which is accessible from the
															
 
																-    function starpu_worker_get_memory_node(). When registering a piece of
															
 
																-    data to StarPU, the specified memory node indicates where the piece of
															
 
																-    data initially resides (we also call this memory node the home node of
															
 
																-    a piece of data).
															
 
																-
															
 
																-    In the case of NUMA systems, functions starpu_memory_nodes_numa_devid_to_id()
															
 
																-    and starpu_memory_nodes_numa_id_to_devid() can be used to convert from NUMA node
															
 
																-    numbers as seen by the Operating System and NUMA node numbers as seen by StarPU.
															
 
																-
															
 
																-    There are several ways to register a memory region so that it can be
															
 
																-    managed by StarPU. StarPU provides data interfaces for vectors, 2D
															
 
																-    matrices, 3D matrices as well as BCSR and CSR sparse matrices.
															
 
																-
															
 
																-    Each data interface is provided with a set of field access functions.
															
 
																-    The ones using a <c>void *</c> parameter aimed to be used in codelet
															
 
																-    implementations (see for example the code in
															
 
																-    \ref VectorScalingUsingStarPUAPI).
															
 
																-
															
 
																-    Applications can provide their own interface as shown in \ref DefiningANewDataInterface.
															
 
																-
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_Data_Interfaces Data Interfaces
															
 
																+   @brief Data management is done at a high-level in StarPU: rather than
															
 
																+   accessing a mere list of contiguous buffers, the tasks may manipulate
															
 
																+   data that are described by a high-level construct which we call data
															
 
																+   interface.
															
 
																+
															
 
																+   An example of data interface is the "vector" interface which describes
															
 
																+   a contiguous data array on a spefic memory node. This interface is a
															
 
																+   simple structure containing the number of elements in the array, the
															
 
																+   size of the elements, and the address of the array in the appropriate
															
 
																+   address space (this address may be invalid if there is no valid copy
															
 
																+   of the array in the memory node). More informations on the data
															
 
																+   interfaces provided by StarPU are given in \ref API_Data_Interfaces.
															
 
																+
															
 
																+   When a piece of data managed by StarPU is used by a task, the task
															
 
																+   implementation is given a pointer to an interface describing a valid
															
 
																+   copy of the data that is accessible from the current processing unit.
															
 
																+
															
 
																+   Every worker is associated to a memory node which is a logical
															
 
																+   abstraction of the address space from which the processing unit gets
															
 
																+   its data. For instance, the memory node associated to the different
															
 
																+   CPU workers represents main memory (RAM), the memory node associated
															
 
																+   to a GPU is DRAM embedded on the device. Every memory node is
															
 
																+   identified by a logical index which is accessible from the
															
 
																+   function starpu_worker_get_memory_node(). When registering a piece of
															
 
																+   data to StarPU, the specified memory node indicates where the piece of
															
 
																+   data initially resides (we also call this memory node the home node of
															
 
																+   a piece of data).
															
 
																+
															
 
																+   In the case of NUMA systems, functions starpu_memory_nodes_numa_devid_to_id()
															
 
																+   and starpu_memory_nodes_numa_id_to_devid() can be used to convert from NUMA node
															
 
																+   numbers as seen by the Operating System and NUMA node numbers as seen by StarPU.
															
 
																+
															
 
																+   There are several ways to register a memory region so that it can be
															
 
																+   managed by StarPU. StarPU provides data interfaces for vectors, 2D
															
 
																+   matrices, 3D matrices as well as BCSR and CSR sparse matrices.
															
 
																+
															
 
																+   Each data interface is provided with a set of field access functions.
															
 
																+   The ones using a <c>void *</c> parameter aimed to be used in codelet
															
 
																+   implementations (see for example the code in
															
 
																+   \ref VectorScalingUsingStarPUAPI).
															
 
																+
															
 
																+   Applications can provide their own interface as shown in \ref DefiningANewDataInterface.
															
 
																+
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu.h>
															
@@ -538,9 +538,11 @@ struct starpu_data_interface_ops
 
																 	char *name;
															
 
																 };
															
 
																-/** @name Basic API
															
 
																-    @{
															
 
																-    */
															
 
																+/**
															
 
																+   @name Basic API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																 /**
															
 
																    Register a piece of data into the handle located at the
															
 
																    \p handleptr address. The \p data_interface buffer contains the initial
															
@@ -706,13 +708,16 @@ void starpu_malloc_on_node_set_default_flags(unsigned node, int flags);
 
																 /** @} */
															
 
																-/** @name Accessing Matrix Data Interfaces
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Accessing Matrix Data Interfaces
															
 
																+   @{
															
 
																+*/
															
 
																 extern struct starpu_data_interface_ops starpu_interface_matrix_ops;
															
 
																-/** Matrix interface for dense matrices */
															
 
																+/**
															
 
																+   Matrix interface for dense matrices
															
 
																+*/
															
 
																 struct starpu_matrix_interface
															
 
																 {
															
 
																 	enum starpu_data_interface_id id; /**< Identifier of the interface */
															
@@ -875,13 +880,16 @@ size_t starpu_matrix_get_allocsize(starpu_data_handle_t handle);
 
																 /** @} */
															
 
																-/** @name Accessing COO Data Interfaces
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Accessing COO Data Interfaces
															
 
																+   @{
															
 
																+*/
															
 
																 extern struct starpu_data_interface_ops starpu_interface_coo_ops;
															
 
																-/** COO Matrices */
															
 
																+/**
															
 
																+   COO Matrices
															
 
																+*/
															
 
																 struct starpu_coo_interface
															
 
																 {
															
 
																 	enum starpu_data_interface_id id; /**< identifier of the interface */
															
@@ -964,15 +972,18 @@ void starpu_coo_data_register(starpu_data_handle_t *handleptr, int home_node, ui
 
																 /** @} */
															
 
																-/** @name Block Data Interface
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Block Data Interface
															
 
																+   @{
															
 
																+*/
															
 
																 extern struct starpu_data_interface_ops starpu_interface_block_ops;
															
 
																 /* TODO: rename to 3dmatrix? */
															
 
																 /* TODO: add allocsize support */
															
 
																-/** Block interface for 3D dense blocks */
															
 
																+/**
															
 
																+   Block interface for 3D dense blocks
															
 
																+*/
															
 
																 struct starpu_block_interface
															
 
																 {
															
 
																 	enum starpu_data_interface_id id; /**< identifier of the interface */
															
@@ -1115,9 +1126,10 @@ designated by \p interface.
 
																 /** @} */
															
 
																-/** @name Vector Data Interface
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Vector Data Interface
															
 
																+   @{
															
 
																+*/
															
 
																 extern struct starpu_data_interface_ops starpu_interface_vector_ops;
															
@@ -1241,9 +1253,10 @@ uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle);
 
																 /** @} */
															
 
																-/** @name Variable Data Interface
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Variable Data Interface
															
 
																+   @{
															
 
																+*/
															
 
																 extern struct starpu_data_interface_ops starpu_interface_variable_ops;
															
@@ -1322,9 +1335,10 @@ uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle);
 
																 /** @} */
															
 
																-/** @name Void Data Interface
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Void Data Interface
															
 
																+   @{
															
 
																+*/
															
 
																 extern struct starpu_data_interface_ops starpu_interface_void_ops;
															
@@ -1340,8 +1354,9 @@ void starpu_void_data_register(starpu_data_handle_t *handle);
 
																 /** @} */
															
 
																-/** @name CSR Data Interface
															
 
																-    @{
															
 
																+/**
															
 
																+   @name CSR Data Interface
															
 
																+   @{
															
 
																  */
															
 
																 extern struct starpu_data_interface_ops starpu_interface_csr_ops;
															
@@ -1473,9 +1488,10 @@ size_t starpu_csr_get_elemsize(starpu_data_handle_t handle);
 
																 /** @} */
															
 
																-/** @name BCSR Data Interface
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name BCSR Data Interface
															
 
																+   @{
															
 
																+*/
															
 
																 extern struct starpu_data_interface_ops starpu_interface_bcsr_ops;
															
@@ -1677,9 +1693,10 @@ size_t starpu_bcsr_get_elemsize(starpu_data_handle_t handle);
 
																 /** @} */
															
 
																-/** @name Multiformat Data Interface
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Multiformat Data Interface
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																    Multiformat operations
															
--- a/include/starpu_disk.h
+++ b/include/starpu_disk.h
@@ -20,14 +20,17 @@
 
																 #ifndef __STARPU_DISK_H__
															
 
																 #define __STARPU_DISK_H__
															
 
																-/** @defgroup API_Out_Of_Core Out Of Core
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_Out_Of_Core Out Of Core
															
 
																+   @{
															
 
																+*/
															
 
																 #include <sys/types.h>
															
 
																 #include <starpu_config.h>
															
 
																-/** Set of functions to manipulate datas on disk. */
															
 
																+/**
															
 
																+   Set of functions to manipulate datas on disk.
															
 
																+*/
															
 
																 struct starpu_disk_ops
															
 
																 {
															
 
																 	/**
															
--- a/include/starpu_driver.h
+++ b/include/starpu_driver.h
@@ -18,10 +18,10 @@
 
																 #ifndef __STARPU_DRIVER_H__
															
 
																 #define __STARPU_DRIVER_H__
															
 
																-/** @defgroup API_Running_Drivers Running Drivers
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_Running_Drivers Running Drivers
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu_config.h>
															
 
																 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
															
@@ -33,7 +33,9 @@ extern "C"
 
																 {
															
 
																 #endif
															
 
																-/** structure for a driver */
															
 
																+/**
															
 
																+   structure for a driver
															
 
																+*/
															
 
																 struct starpu_driver
															
 
																 {
															
 
																 	/**
															
--- a/include/starpu_expert.h
+++ b/include/starpu_expert.h
@@ -18,10 +18,10 @@
 
																 #ifndef __STARPU_EXPERT_H__
															
 
																 #define __STARPU_EXPERT_H__
															
 
																-/** @defgroup API_Expert_Mode Expert Mode
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_Expert_Mode Expert Mode
															
 
																+   @{
															
 
																+*/
															
 
																 #ifdef __cplusplus
															
 
																 extern "C"
															
--- a/include/starpu_fxt.h
+++ b/include/starpu_fxt.h
@@ -21,10 +21,10 @@
 
																 #ifndef __STARPU_FXT_H__
															
 
																 #define __STARPU_FXT_H__
															
 
																-/** @defgroup API_FxT_Support FxT Support
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_FxT_Support FxT Support
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu_perfmodel.h>
															
--- a/include/starpu_hash.h
+++ b/include/starpu_hash.h
@@ -19,10 +19,10 @@
 
																 #ifndef __STARPU_HASH_H__
															
 
																 #define __STARPU_HASH_H__
															
 
																-/** @ingroup API_Data_Interfaces
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @ingroup API_Data_Interfaces
															
 
																+   @{
															
 
																+*/
															
 
																 #include <stdint.h>
															
 
																 #include <stddef.h>
															
--- a/include/starpu_mic.h
+++ b/include/starpu_mic.h
@@ -19,10 +19,10 @@
 
																 #ifndef __STARPU_MIC_H__
															
 
																 #define __STARPU_MIC_H__
															
 
																-/** @defgroup API_MIC_Extensions MIC Extensions
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_MIC_Extensions MIC Extensions
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu_config.h>
															
--- a/include/starpu_mpi_ms.h
+++ b/include/starpu_mpi_ms.h
@@ -18,10 +18,10 @@
 
																 #ifndef __STARPU_MPI_MS_H__
															
 
																 #define __STARPU_MPI_MS_H__
															
 
																-/** @defgroup API_Master_Slave Master Slave Extension
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_Master_Slave Master Slave Extension
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu_config.h>
															
--- a/include/starpu_opencl.h
+++ b/include/starpu_opencl.h
@@ -19,10 +19,10 @@
 
																 #ifndef __STARPU_OPENCL_H__
															
 
																 #define __STARPU_OPENCL_H__
															
 
																-/** @defgroup API_OpenCL_Extensions OpenCL Extensions
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_OpenCL_Extensions OpenCL Extensions
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu_config.h>
															
 
																 #ifdef STARPU_USE_OPENCL
															
@@ -51,9 +51,10 @@ struct starpu_opencl_program
 
																 	cl_program programs[STARPU_MAXOPENCLDEVS];
															
 
																 };
															
 
																-/** @name Writing OpenCL kernels
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Writing OpenCL kernels
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																    Return the OpenCL context of the device designated by \p devid
															
@@ -105,17 +106,18 @@ int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...);
 
																 /** @} */
															
 
																-/** @name Compiling OpenCL kernels
															
 
																-    Source codes for OpenCL kernels can be stored in a file or in a
															
 
																-    string. StarPU provides functions to build the program executable for
															
 
																-    each available OpenCL device as a cl_program object. This program
															
 
																-    executable can then be loaded within a specific queue as explained in
															
 
																-    the next section. These are only helpers, Applications can also fill a
															
 
																-    starpu_opencl_program array by hand for more advanced use (e.g.
															
 
																-    different programs on the different OpenCL devices, for relocation
															
 
																-    purpose for instance).
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Compiling OpenCL kernels
															
 
																+   Source codes for OpenCL kernels can be stored in a file or in a
															
 
																+   string. StarPU provides functions to build the program executable for
															
 
																+   each available OpenCL device as a cl_program object. This program
															
 
																+   executable can then be loaded within a specific queue as explained in
															
 
																+   the next section. These are only helpers, Applications can also fill a
															
 
																+   starpu_opencl_program array by hand for more advanced use (e.g.
															
 
																+   different programs on the different OpenCL devices, for relocation
															
 
																+   purpose for instance).
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																    Store the contents of the file \p source_file_name in the buffer
															
@@ -182,9 +184,10 @@ int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs);
 
																 /** @} */
															
 
																-/** @name Loading OpenCL kernels
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name Loading OpenCL kernels
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																    Create a kernel \p kernel for device \p devid, on its computation
															
@@ -200,9 +203,10 @@ int starpu_opencl_release_kernel(cl_kernel kernel);
 
																 /** @} */
															
 
																-/** @name OpenCL Statistics
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name OpenCL Statistics
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																    Collect statistics on a kernel execution.
															
@@ -215,9 +219,10 @@ int starpu_opencl_collect_stats(cl_event event);
 
																 /** @} */
															
 
																-/** @name OpenCL Utilities
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @name OpenCL Utilities
															
 
																+   @{
															
 
																+*/
															
 
																 /**
															
 
																    Return the error message in English corresponding to \p status, an OpenCL
															
--- a/include/starpu_openmp.h
+++ b/include/starpu_openmp.h
@@ -18,10 +18,11 @@
 
																 #ifndef __STARPU_OPENMP_H__
															
 
																 #define __STARPU_OPENMP_H__
															
 
																-/** @defgroup API_OpenMP_Runtime_Support OpenMP Runtime Support
															
 
																-    @brief This section describes the interface provided for implementing OpenMP runtimes on top of StarPU.
															
 
																-    @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_OpenMP_Runtime_Support OpenMP Runtime Support
															
 
																+   @brief This section describes the interface provided for implementing OpenMP runtimes on top of StarPU.
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu_config.h>
															
@@ -213,8 +214,9 @@ extern "C"
 
																 #define __STARPU_OMP_NOTHROW __attribute__((__nothrow__))
															
 
																 #endif
															
 
																-/** @name Initialisation
															
 
																-    @{
															
 
																+/**
															
 
																+   @name Initialisation
															
 
																+   @{
															
 
																 */
															
 
																 /**
															
@@ -228,9 +230,10 @@ extern void starpu_omp_shutdown(void) __STARPU_OMP_NOTHROW;
 
																 /** @} */
															
 
																-/** @name Parallel
															
 
																-    \anchor ORS_Parallel
															
 
																-    @{
															
 
																+/**
															
 
																+   @name Parallel
															
 
																+   \anchor ORS_Parallel
															
 
																+   @{
															
 
																 */
															
 
																 /**
															
@@ -266,9 +269,10 @@ extern int starpu_omp_master_inline(void) __STARPU_OMP_NOTHROW;
 
																 /** @} */
															
 
																-/** @name Synchronization
															
 
																-    \anchor ORS_Synchronization
															
 
																-    @{
															
 
																+/**
															
 
																+   @name Synchronization
															
 
																+   \anchor ORS_Synchronization
															
 
																+   @{
															
 
																 */
															
 
																 /**
															
@@ -317,9 +321,10 @@ extern void starpu_omp_critical_inline_end(const char *name) __STARPU_OMP_NOTHRO
 
																 /** @} */
															
 
																-/** @name Worksharing
															
 
																-    \anchor ORS_Worksharing
															
 
																-    @{
															
 
																+/**
															
 
																+   @name Worksharing
															
 
																+   \anchor ORS_Worksharing
															
 
																+   @{
															
 
																 */
															
 
																 /**
															
@@ -542,9 +547,10 @@ extern void starpu_omp_sections_combined(unsigned long long nb_sections, void (*
 
																 /** @} */
															
 
																-/** @name Task
															
 
																-    \anchor ORS_Task
															
 
																-    @{
															
 
																+/**
															
 
																+   @name Task
															
 
																+   \anchor ORS_Task
															
 
																+   @{
															
 
																 */
															
 
																 /**
															
@@ -604,9 +610,10 @@ extern void starpu_omp_taskloop_inline_end(const struct starpu_omp_task_region_a
 
																 /** @} */
															
 
																-/** @name API
															
 
																-    \anchor ORS_API
															
 
																-    @{
															
 
																+/**
															
 
																+   @name API
															
 
																+   \anchor ORS_API
															
 
																+   @{
															
 
																 */
															
 
																 /**
															
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -21,10 +21,10 @@
 
																 #ifndef __STARPU_PERFMODEL_H__
															
 
																 #define __STARPU_PERFMODEL_H__
															
 
																-/** @defgroup
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_Performance_Model Performance Model
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu.h>
															
 
																 #include <stdio.h>
															
@@ -39,31 +39,37 @@ struct starpu_data_descr;
 
																 #define STARPU_NARCH STARPU_ANY_WORKER
															
 
																+/**
															
 
																+   todo
															
 
																+*/
															
 
																 struct starpu_perfmodel_device
															
 
																 {
															
 
																-	enum starpu_worker_archtype type;
															
 
																-	int devid;
															
 
																-	int ncores;
															
 
																+	enum starpu_worker_archtype type; /**< type of the device */
															
 
																+	int devid;                        /**< identifier of the precise device */
															
 
																+	int ncores;                       /**< number of execution in parallel, minus 1 */
															
 
																 };
															
 
																+/**
															
 
																+   todo
															
 
																+*/
															
 
																 struct starpu_perfmodel_arch
															
 
																 {
															
 
																-	int ndevices;
															
 
																-	struct starpu_perfmodel_device *devices;
															
 
																+	int ndevices;                            /**< number of the devices for the given arch */
															
 
																+	struct starpu_perfmodel_device *devices; /**< list of the devices for the given arch */
															
 
																 };
															
 
																 struct starpu_perfmodel_history_entry
															
 
																 {
															
 
																-	double mean;
															
 
																-	double deviation;
															
 
																-	double sum;
															
 
																-	double sum2;
															
 
																-	unsigned nsample;
															
 
																+	double mean;        /**< mean_n = 1/n sum */
															
 
																+	double deviation;   /**< n dev_n = sum2 - 1/n (sum)^2 */
															
 
																+	double sum;         /**< sum of samples (in µs) */
															
 
																+	double sum2;        /**< sum of samples^2 */
															
 
																+	unsigned nsample;   /**< number of samples */
															
 
																 	unsigned nerror;
															
 
																-	uint32_t footprint;
															
 
																-	size_t size;
															
 
																-	double flops;
															
 
																+	uint32_t footprint; /**< data footprint */
															
 
																+	size_t size;        /**< in bytes */
															
 
																+	double flops;       /**< Provided by the application */
															
 
																 	double duration;
															
 
																 	starpu_tag_t tag;
															
@@ -76,30 +82,35 @@ struct starpu_perfmodel_history_list
 
																 	struct starpu_perfmodel_history_entry *entry;
															
 
																 };
															
 
																+/**
															
 
																+   todo
															
 
																+*/
															
 
																 struct starpu_perfmodel_regression_model
															
 
																 {
															
 
																-	double sumlny;
															
 
																+	double sumlny;          /**< sum of ln(measured) */
															
 
																-	double sumlnx;
															
 
																-	double sumlnx2;
															
 
																+	double sumlnx;          /**< sum of ln(size) */
															
 
																+	double sumlnx2;         /**< sum of ln(size)^2 */
															
 
																-	unsigned long minx;
															
 
																-	unsigned long maxx;
															
 
																+	unsigned long minx;     /**< minimum size */
															
 
																+	unsigned long maxx;     /**< maximum size */
															
 
																-	double sumlnxlny;
															
 
																+	double sumlnxlny;       /**< sum of ln(size)*ln(measured) */
															
 
																-	double alpha;
															
 
																-	double beta;
															
 
																-	unsigned valid;
															
 
																+	double alpha;           /**< estimated = alpha * size ^ beta */
															
 
																+	double beta;            /**< estimated = alpha * size ^ beta */
															
 
																+	unsigned valid;         /**< whether the linear regression model is valid (i.e. enough measures) */
															
 
																-	double a, b, c;
															
 
																-	unsigned nl_valid;
															
 
																+	double a;               /**< estimated = a size ^b + c */
															
 
																+	double b;               /**< estimated = a size ^b + c */
															
 
																+	double c;               /**< estimated = a size ^b + c */
															
 
																+	unsigned nl_valid;      /**< whether the non-linear regression model is valid (i.e. enough measures) */
															
 
																-	unsigned nsample;
															
 
																+	unsigned nsample;       /**< number of sample values for non-linear regression */
															
 
																-	double *coeff;
															
 
																-	unsigned ncoeff;
															
 
																-	unsigned multi_valid;
															
 
																+	double *coeff;          /**< list of computed coefficients for multiple linear regression model */
															
 
																+	unsigned ncoeff;        /**< number of coefficients for multiple linear regression model */
															
 
																+	unsigned multi_valid;   /**< whether the multiple linear regression model is valid */
															
 
																 };
															
 
																 struct starpu_perfmodel_history_table;
															
@@ -109,66 +120,224 @@ struct starpu_perfmodel_history_table;
 
																 typedef double (*starpu_perfmodel_per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
															
 
																 typedef size_t (*starpu_perfmodel_per_arch_size_base)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
															
 
																+/**
															
 
																+   information about the performance model of a given arch.
															
 
																+*/
															
 
																 struct starpu_perfmodel_per_arch
															
 
																 {
															
 
																+	/**
															
 
																+	   Used by ::STARPU_PER_ARCH, must point to functions which take a
															
 
																+	   task, the target arch and implementation number (as mere
															
 
																+	   conveniency, since the array is already indexed by these), and
															
 
																+	   must return a task duration estimation in micro-seconds.
															
 
																+	*/
															
 
																 	starpu_perfmodel_per_arch_cost_function cost_function;
															
 
																+	/**
															
 
																+	   Same as in structure starpu_perfmodel, but per-arch, in case it
															
 
																+	   depends on the architecture-specific implementation.
															
 
																+	*/
															
 
																 	starpu_perfmodel_per_arch_size_base size_base;
															
 
																+	/**
															
 
																+	   \private
															
 
																+	   The history of performance measurements.
															
 
																+	*/
															
 
																 	struct starpu_perfmodel_history_table *history;
															
 
																+	/**
															
 
																+	   \private
															
 
																+	   Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and
															
 
																+	   ::STARPU_MULTIPLE_REGRESSION_BASED, records all execution history
															
 
																+	   measures.
															
 
																+	*/
															
 
																 	struct starpu_perfmodel_history_list *list;
															
 
																+	/**
															
 
																+	   \private
															
 
																+	   Used by ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED
															
 
																+	   and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated
															
 
																+	   factors of the regression.
															
 
																+	*/
															
 
																 	struct starpu_perfmodel_regression_model regression;
															
 
																 	char debug_path[256];
															
 
																 };
															
 
																+/**
															
 
																+   todo
															
 
																+*/
															
 
																 enum starpu_perfmodel_type
															
 
																 {
															
 
																         STARPU_PERFMODEL_INVALID=0,
															
 
																-	STARPU_PER_ARCH,
															
 
																-	STARPU_COMMON,
															
 
																-	STARPU_HISTORY_BASED,
															
 
																-	STARPU_REGRESSION_BASED,
															
 
																-	STARPU_NL_REGRESSION_BASED,
															
 
																-	STARPU_MULTIPLE_REGRESSION_BASED
															
 
																+	STARPU_PER_ARCH,                  /**< Application-provided per-arch cost model function */
															
 
																+	STARPU_COMMON,                    /**< Application-provided common cost model function, with per-arch factor */
															
 
																+	STARPU_HISTORY_BASED,             /**< Automatic history-based cost model */
															
 
																+	STARPU_REGRESSION_BASED,          /**< Automatic linear regression-based cost model  (alpha * size ^ beta) */
															
 
																+	STARPU_NL_REGRESSION_BASED,       /**< Automatic non-linear regression-based cost model (a * size ^ b + c) */
															
 
																+	STARPU_MULTIPLE_REGRESSION_BASED  /**< Automatic multiple linear regression-based cost model. Application
															
 
																+					     provides parameters, their combinations and exponents. */
															
 
																 };
															
 
																 struct _starpu_perfmodel_state;
															
 
																 typedef struct _starpu_perfmodel_state* starpu_perfmodel_state_t;
															
 
																+/**
															
 
																+   Contain all information about a performance model. At least the
															
 
																+   type and symbol fields have to be filled when defining a performance
															
 
																+   model for a codelet. For compatibility, make sure to initialize the
															
 
																+   whole structure to zero, either by using explicit memset, or by
															
 
																+   letting the compiler implicitly do it in e.g. static storage case. If
															
 
																+   not provided, other fields have to be zero.
															
 
																+*/
															
 
																 struct starpu_perfmodel
															
 
																 {
															
 
																+	/**
															
 
																+	   type of performance model
															
 
																+	   <ul>
															
 
																+	   <li>
															
 
																+	   ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
															
 
																+	   ::STARPU_NL_REGRESSION_BASED: No other fields needs to be
															
 
																+	   provided, this is purely history-based.
															
 
																+	   </li>
															
 
																+	   <li>
															
 
																+	   ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields
															
 
																+	   starpu_perfmodel::nparameters (number of different parameters),
															
 
																+	   starpu_perfmodel::ncombinations (number of parameters
															
 
																+	   combinations-tuples) and table starpu_perfmodel::combinations
															
 
																+	   which defines exponents of the equation. Function cl_perf_func
															
 
																+	   also needs to define how to extract parameters from the task.
															
 
																+	   </li>
															
 
																+	   <li>
															
 
																+	   ::STARPU_PER_ARCH: either field
															
 
																+	   starpu_perfmodel::arch_cost_function has to be filled with a
															
 
																+	   function that returns the cost in micro-seconds on the arch given
															
 
																+	   as parameter, or field starpu_perfmodel::per_arch has to be filled
															
 
																+	   with functions which return the cost in micro-seconds.
															
 
																+	   </li>
															
 
																+	   <li>
															
 
																+	   ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
															
 
																+	   filled with a function that returns the cost in micro-seconds on a
															
 
																+	   CPU, timing on other archs will be determined by multiplying by an
															
 
																+	   arch-specific factor.
															
 
																+	   </li>
															
 
																+	   </ul>
															
 
																+	*/
															
 
																 	enum starpu_perfmodel_type type;
															
 
																+	/**
															
 
																+	   Used by ::STARPU_COMMON. Take a task and implementation number,
															
 
																+	   and must return a task duration estimation in micro-seconds.
															
 
																+	*/
															
 
																 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
															
 
																+	/**
															
 
																+	   Used by ::STARPU_COMMON. Take a task, an arch and implementation
															
 
																+	   number, and must return a task duration estimation in
															
 
																+	   micro-seconds on that arch.
															
 
																+	*/
															
 
																 	double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch * arch, unsigned nimpl);
															
 
																+	/**
															
 
																+	   Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
															
 
																+	   ::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, take a task and
															
 
																+	   implementation number, and return the size to be used as index to
															
 
																+	   distinguish histories and as a base for regressions.
															
 
																+	*/
															
 
																 	size_t (*size_base)(struct starpu_task *, unsigned nimpl);
															
 
																+	/**
															
 
																+	   Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, take a task
															
 
																+	   and return the footprint to be used as index to distinguish
															
 
																+	   histories. The default is to use the starpu_task_data_footprint()
															
 
																+	   function.
															
 
																+	*/
															
 
																 	uint32_t (*footprint)(struct starpu_task *);
															
 
																+	/**
															
 
																+	   symbol name for the performance model, which will be used as file
															
 
																+	   name to store the model. It must be set otherwise the model will
															
 
																+	   be ignored.
															
 
																+	*/
															
 
																 	const char *symbol;
															
 
																+	/**
															
 
																+	   \private
															
 
																+	   Whether the performance model is already loaded from the disk.
															
 
																+	*/
															
 
																 	unsigned is_loaded;
															
 
																+	/**
															
 
																+	   \private
															
 
																+	*/
															
 
																 	unsigned benchmarking;
															
 
																+	/**
															
 
																+	   \private
															
 
																+	*/
															
 
																 	unsigned is_init;
															
 
																 	void (*parameters)(struct starpu_task * task, double *parameters);
															
 
																+	/**
															
 
																+	   \private
															
 
																+	   Names of parameters used for multiple linear regression models (M,
															
 
																+	   N, K)
															
 
																+	*/
															
 
																 	const char **parameters_names;
															
 
																+	/**
															
 
																+	   \private
															
 
																+	   Number of parameters used for multiple linear regression models
															
 
																+	*/
															
 
																 	unsigned nparameters;
															
 
																+	/**
															
 
																+	   \private
															
 
																+	   Table of combinations of parameters (and the exponents) used for
															
 
																+	   multiple linear regression models
															
 
																+	*/
															
 
																 	unsigned **combinations;
															
 
																+	/**
															
 
																+	   \private
															
 
																+	   Number of combination of parameters used for multiple linear
															
 
																+	   regression models
															
 
																+	*/
															
 
																 	unsigned ncombinations;
															
 
																-
															
 
																+	/**
															
 
																+	   \private
															
 
																+	*/
															
 
																 	starpu_perfmodel_state_t state;
															
 
																 };
															
 
																 void starpu_perfmodel_init(struct starpu_perfmodel *model);
															
 
																+
															
 
																+/**
															
 
																+   Load the performance model found in the file named \p filename. \p model has to be
															
 
																+   completely zero, and will be filled with the information stored in the given file.
															
 
																+*/
															
 
																 int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model);
															
 
																+
															
 
																+/**
															
 
																+   Load a given performance model. \p model has to be
															
 
																+   completely zero, and will be filled with the information stored in
															
 
																+   <c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
															
 
																+   external tools that want to read the performance model files.
															
 
																+*/
															
 
																+
															
 
																 int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model);
															
 
																+
															
 
																+/**
															
 
																+   Unload \p model which has been previously loaded
															
 
																+   through the function starpu_perfmodel_load_symbol()
															
 
																+*/
															
 
																 int starpu_perfmodel_unload_model(struct starpu_perfmodel *model);
															
 
																+
															
 
																 void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen);
															
 
																+/**
															
 
																+   Free internal memory used for sampling directory
															
 
																+   management. It should only be called by an application which is not
															
 
																+   calling starpu_shutdown() as this function already calls it. See for
															
 
																+   example <c>tools/starpu_perfmodel_display.c</c>.
															
 
																+*/
															
 
																 void starpu_perfmodel_free_sampling_directories(void);
															
 
																+/**
															
 
																+   Return the architecture type of the worker \p workerid.
															
 
																+*/
															
 
																 struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id);
															
 
																+
															
 
																 int starpu_perfmodel_get_narch_combs();
															
 
																 int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device* devices);
															
 
																 int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices);
															
@@ -180,39 +349,88 @@ struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_devices(struct
 
																 int starpu_perfmodel_set_per_devices_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...);
															
 
																 int starpu_perfmodel_set_per_devices_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...);
															
 
																+/**
															
 
																+   Return the path to the debugging information for the performance model.
															
 
																+*/
															
 
																 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl);
															
 
																+
															
 
																 char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
															
 
																+
															
 
																+/**
															
 
																+   Return the architecture name for \p arch
															
 
																+*/
															
 
																 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl);
															
 
																+/**
															
 
																+   Return the estimated time of a task with the given model and the given footprint.
															
 
																+*/
															
 
																 double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint);
															
 
																+
															
 
																+/**
															
 
																+   If starpu_init() is not used, starpu_perfmodel_initialize() should be used called calling starpu_perfmodel_* functions.
															
 
																+*/
															
 
																 void starpu_perfmodel_initialize(void);
															
 
																+
															
 
																+/**
															
 
																+   Print a list of all performance models on \p output
															
 
																+*/
															
 
																 int starpu_perfmodel_list(FILE *output);
															
 
																+
															
 
																 void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output);
															
 
																 int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output);
															
 
																 int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output);
															
 
																 int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model);
															
 
																+/**
															
 
																+   Feed the performance model model with an explicit
															
 
																+   measurement measured (in µs), in addition to measurements done by StarPU
															
 
																+   itself. This can be useful when the application already has an
															
 
																+   existing set of measurements done in good conditions, that StarPU
															
 
																+   could benefit from instead of doing on-line measurements. An example
															
 
																+   of use can be seen in \ref PerformanceModelExample.
															
 
																+*/
															
 
																 void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
															
 
																+
															
 
																+/**
															
 
																+   Print the directory name storing performance models on \p output
															
 
																+*/
															
 
																 void starpu_perfmodel_directory(FILE *output);
															
 
																+/**
															
 
																+   Print a matrix of bus bandwidths on \p f.
															
 
																+*/
															
 
																 void starpu_bus_print_bandwidth(FILE *f);
															
 
																+
															
 
																+/**
															
 
																+   Print the affinity devices on \p f.
															
 
																+*/
															
 
																 void starpu_bus_print_affinity(FILE *f);
															
 
																+
															
 
																+/**
															
 
																+   Print on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
															
 
																+*/
															
 
																 void starpu_bus_print_filenames(FILE *f);
															
 
																+/**
															
 
																+   Return the bandwidth of data transfer between two memory nodes
															
 
																+*/
															
 
																 double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node);
															
 
																+
															
 
																+/**
															
 
																+   Return the latency of data transfer between two memory nodes
															
 
																+*/
															
 
																 double starpu_transfer_latency(unsigned src_node, unsigned dst_node);
															
 
																-double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size);
															
 
																-extern struct starpu_perfmodel starpu_perfmodel_nop;
															
 
																+/**
															
 
																+   Return the estimated time to transfer a given size between two memory nodes.
															
 
																+*/
															
 
																+double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size);
															
 
																 /**
															
 
																-   Display statistics about the current data handles registered
															
 
																-   within StarPU. StarPU must have been configured with the configure
															
 
																-   option \ref enable-memory-stats "--enable-memory-stats" (see \ref
															
 
																-   MemoryFeedback).
															
 
																+   Performance model which just always return 1µs.
															
 
																 */
															
 
																-void starpu_data_display_memory_stats();
															
 
																+extern struct starpu_perfmodel starpu_perfmodel_nop;
															
 
																 #ifdef __cplusplus
															
 
																 }
															
--- a/include/starpu_profiling.h
+++ b/include/starpu_profiling.h
@@ -19,10 +19,10 @@
 
																 #ifndef __STARPU_PROFILING_H__
															
 
																 #define __STARPU_PROFILING_H__
															
 
																-/** @defgroup
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_Profiling Profiling
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu.h>
															
 
																 #include <errno.h>
															
@@ -33,48 +33,89 @@ extern "C"
 
																 {
															
 
																 #endif
															
 
																+/**
															
 
																+   Used when calling the function starpu_profiling_status_set() to disable profiling.
															
 
																+*/
															
 
																 #define STARPU_PROFILING_DISABLE	0
															
 
																+/**
															
 
																+   Used when calling the function starpu_profiling_status_set() to enable profiling.
															
 
																+*/
															
 
																 #define STARPU_PROFILING_ENABLE		1
															
 
																+/**
															
 
																+   Information about the execution of a task. It is accessible from
															
 
																+   the field starpu_task::profiling_info if profiling was enabled.
															
 
																+ */
															
 
																 struct starpu_profiling_task_info
															
 
																 {
															
 
																+	/** Date of task submission (relative to the initialization of StarPU). */
															
 
																 	struct timespec submit_time;
															
 
																+	/** Time when the task was submitted to the scheduler. */
															
 
																 	struct timespec push_start_time;
															
 
																+	/** Time when the scheduler finished with the task submission. */
															
 
																 	struct timespec push_end_time;
															
 
																+	/** Time when the scheduler started to be requested for a task, and eventually gave that task. */
															
 
																 	struct timespec pop_start_time;
															
 
																+	/** Time when the scheduler finished providing the task for execution. */
															
 
																 	struct timespec pop_end_time;
															
 
																+	/** Time when the worker started fetching input data. */
															
 
																 	struct timespec acquire_data_start_time;
															
 
																+	/** Time when the worker finished fetching input data. */
															
 
																 	struct timespec acquire_data_end_time;
															
 
																+	/** Date of task execution beginning (relative to the initialization of StarPU). */
															
 
																 	struct timespec start_time;
															
 
																+	/** Date of task execution termination (relative to the initialization of StarPU). */
															
 
																 	struct timespec end_time;
															
 
																+	/** Time when the worker started releasing data. */
															
 
																 	struct timespec release_data_start_time;
															
 
																+	/** Time when the worker finished releasing data. */
															
 
																 	struct timespec release_data_end_time;
															
 
																+	/** Time when the worker started the application callback for the task. */
															
 
																 	struct timespec callback_start_time;
															
 
																+	/** Time when the worker finished the application callback for the task. */
															
 
																 	struct timespec callback_end_time;
															
 
																 	/* TODO add expected length, expected start/end ? */
															
 
																+
															
 
																+	/** Identifier of the worker which has executed the task. */
															
 
																 	int workerid;
															
 
																+	/** Number of cycles used by the task, only available in the MoviSim */
															
 
																 	uint64_t used_cycles;
															
 
																+	/** Number of cycles stalled within the task, only available in the MoviSim */
															
 
																 	uint64_t stall_cycles;
															
 
																+	/** Energy consumed by the task, in Joules */
															
 
																 	double energy_consumed;
															
 
																 };
															
 
																+/**
															
 
																+   Profiling information associated to a worker. The timing is
															
 
																+   provided since the previous call to
															
 
																+   starpu_profiling_worker_get_info()
															
 
																+*/
															
 
																 struct starpu_profiling_worker_info
															
 
																 {
															
 
																+	/** Starting date for the reported profiling measurements. */
															
 
																 	struct timespec start_time;
															
 
																+	/** Duration of the profiling measurement interval. */
															
 
																 	struct timespec total_time;
															
 
																+	/** Time spent by the worker to execute tasks during the profiling measurement interval. */
															
 
																 	struct timespec executing_time;
															
 
																+	/** Time spent idling by the worker during the profiling measurement interval. */
															
 
																 	struct timespec sleeping_time;
															
 
																+	/** Number of tasks executed by the worker during the profiling measurement interval. */
															
 
																 	int executed_tasks;
															
 
																+	/** Number of cycles used by the worker, only available in the MoviSim */
															
 
																 	uint64_t used_cycles;
															
 
																+	/** Number of cycles stalled within the worker, only available in the MoviSim */
															
 
																 	uint64_t stall_cycles;
															
 
																+	/** Energy consumed by the worker, in Joules */
															
 
																 	double energy_consumed;
															
 
																 	double flops;
															
@@ -82,15 +123,43 @@ struct starpu_profiling_worker_info
 
																 struct starpu_profiling_bus_info
															
 
																 {
															
 
																+	/** Time of bus profiling startup. */
															
 
																 	struct timespec start_time;
															
 
																+	/** Total time of bus profiling. */
															
 
																 	struct timespec total_time;
															
 
																+	/** Number of bytes transferred during profiling. */
															
 
																 	int long long transferred_bytes;
															
 
																+	/** Number of transfers during profiling. */
															
 
																 	int transfer_count;
															
 
																 };
															
 
																+/**
															
 
																+   Reset performance counters and enable profiling if the
															
 
																+   environment variable \ref STARPU_PROFILING is set to a positive value.
															
 
																+*/
															
 
																 void starpu_profiling_init(void);
															
 
																+
															
 
																+/**
															
 
																+   Set the ID used for profiling trace filename. Has to be called before starpu_init().
															
 
																+*/
															
 
																 void starpu_profiling_set_id(int new_id);
															
 
																+
															
 
																+/**
															
 
																+   Set the profiling status. Profiling is activated
															
 
																+   by passing \ref STARPU_PROFILING_ENABLE in \p status. Passing
															
 
																+   \ref STARPU_PROFILING_DISABLE disables profiling. Calling this function
															
 
																+   resets all profiling measurements. When profiling is enabled, the
															
 
																+   field starpu_task::profiling_info points to a valid structure
															
 
																+   starpu_profiling_task_info containing information about the execution
															
 
																+   of the task. Negative return values indicate an error, otherwise the
															
 
																+   previous status is returned.
															
 
																+*/
															
 
																 int starpu_profiling_status_set(int status);
															
 
																+
															
 
																+/**
															
 
																+   Return the current profiling status or a negative value in case
															
 
																+   there was an error.
															
 
																+*/
															
 
																 int starpu_profiling_status_get(void);
															
 
																 #ifdef BUILDING_STARPU
															
@@ -107,17 +176,43 @@ extern int _starpu_profiling;
 
																 #endif
															
 
																 #endif
															
 
																+/**
															
 
																+   Get the profiling info associated to the worker identified by
															
 
																+   \p workerid, and reset the profiling measurements. If the argument \p
															
 
																+   worker_info is <c>NULL</c>, only reset the counters associated to worker
															
 
																+   \p workerid. Upon successful completion, this function returns 0.
															
 
																+   Otherwise, a negative value is returned.
															
 
																+*/
															
 
																 int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *worker_info);
															
 
																+/**
															
 
																+   Return the number of buses in the machine
															
 
																+*/
															
 
																 int starpu_bus_get_count(void);
															
 
																+
															
 
																+/**
															
 
																+   Return the identifier of the bus between \p src and \p dst
															
 
																+*/
															
 
																 int starpu_bus_get_id(int src, int dst);
															
 
																+
															
 
																+/**
															
 
																+   Return the source point of bus \p busid
															
 
																+*/
															
 
																 int starpu_bus_get_src(int busid);
															
 
																+
															
 
																+/**
															
 
																+   Return the destination point of bus \p busid
															
 
																+*/
															
 
																 int starpu_bus_get_dst(int busid);
															
 
																 void starpu_bus_set_direct(int busid, int direct);
															
 
																 int starpu_bus_get_direct(int busid);
															
 
																 void starpu_bus_set_ngpus(int busid, int ngpus);
															
 
																 int starpu_bus_get_ngpus(int busid);
															
 
																+/**
															
 
																+   See _starpu_profiling_bus_helper_display_summary in src/profiling/profiling_helpers.c for a usage example.
															
 
																+   Note that calling starpu_bus_get_profiling_info() resets the counters to zero.
															
 
																+*/
															
 
																 int starpu_bus_get_profiling_info(int busid, struct starpu_profiling_bus_info *bus_info);
															
 
																 /* Some helper functions to manipulate profiling API output */
															
@@ -177,12 +272,38 @@ static __starpu_inline void starpu_timespec_sub(const struct timespec *a,
 
																 #define starpu_timespec_cmp(a, b, CMP)                          \
															
 
																 	(((a)->tv_sec == (b)->tv_sec) ? ((a)->tv_nsec CMP (b)->tv_nsec) : ((a)->tv_sec CMP (b)->tv_sec))
															
 
																+/**
															
 
																+   Return the time elapsed between \p start and \p end in microseconds.
															
 
																+*/
															
 
																 double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end);
															
 
																+
															
 
																+/**
															
 
																+   Convert the given timespec \p ts into microseconds
															
 
																+*/
															
 
																 double starpu_timing_timespec_to_us(struct timespec *ts);
															
 
																+/**
															
 
																+   Display statistics about the bus on \c stderr. if the environment
															
 
																+   variable \ref STARPU_BUS_STATS is defined. The function is called
															
 
																+   automatically by starpu_shutdown().
															
 
																+*/
															
 
																 void starpu_profiling_bus_helper_display_summary(void);
															
 
																+
															
 
																+/**
															
 
																+   Display statistic about the workers on \c stderr if the
															
 
																+   environment variable \ref STARPU_WORKER_STATS is defined. The function is
															
 
																+   called automatically by starpu_shutdown().
															
 
																+*/
															
 
																 void starpu_profiling_worker_helper_display_summary(void);
															
 
																+/**
															
 
																+   Display statistics about the current data handles registered
															
 
																+   within StarPU. StarPU must have been configured with the configure
															
 
																+   option \ref enable-memory-stats "--enable-memory-stats" (see \ref
															
 
																+   MemoryFeedback).
															
 
																+*/
															
 
																+void starpu_data_display_memory_stats();
															
 
																+
															
 
																 #ifdef __cplusplus
															
 
																 }
															
 
																 #endif
															
--- a/include/starpu_rand.h
+++ b/include/starpu_rand.h
@@ -19,9 +19,9 @@
 
																 #ifndef __STARPU_RAND_H__
															
 
																 #define __STARPU_RAND_H__
															
 
																-/** @defgroup
															
 
																- *
															
 
																- * @{
															
 
																+/**
															
 
																+   @defgroup API_Random_Functions Random Functions
															
 
																+   @{
															
 
																  */
															
 
																 #include <stdlib.h>
															
--- a/include/starpu_scc.h
+++ b/include/starpu_scc.h
@@ -19,19 +19,32 @@
 
																 #ifndef __STARPU_SCC_H__
															
 
																 #define __STARPU_SCC_H__
															
 
																-/** @defgroup
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_SCC_Extensions SCC Extensions
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu_config.h>
															
 
																 #ifdef STARPU_USE_SCC
															
 
																+/**
															
 
																+   Type for SCC function symbols
															
 
																+*/
															
 
																 typedef void *starpu_scc_func_symbol_t;
															
 
																+/**
															
 
																+   Initiate a lookup on each SCC device to find the adress of the
															
 
																+   function named \p func_name, store them in the global array kernels
															
 
																+   and return the index in the array through \p symbol.
															
 
																+*/
															
 
																 int starpu_scc_register_kernel(starpu_scc_func_symbol_t *symbol, const char *func_name);
															
 
																+/**
															
 
																+   If success, return the pointer to the function defined by \p symbol on
															
 
																+   the device linked to the called device. This can for instance be used
															
 
																+   in a starpu_scc_func_symbol_t implementation.
															
 
																+*/
															
 
																 starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t symbol);
															
 
																 #endif /* STARPU_USE_SCC */
															
--- a/include/starpu_sched_component.h
+++ b/include/starpu_sched_component.h
@@ -2,7 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2017                                     Arthur Chevalier
															
 
																  * Copyright (C) 2013,2014,2017                           Inria
															
 
																- * Copyright (C) 2014,2015,2017,2019                           CNRS
															
 
																+ * Copyright (C) 2014,2015,2017,2019                      CNRS
															
 
																  * Copyright (C) 2014-2019                                Université de Bordeaux
															
 
																  * Copyright (C) 2013                                     Simon Archipoff
															
 
																  *
															
@@ -21,10 +21,10 @@
 
																 #ifndef __STARPU_SCHED_COMPONENT_H__
															
 
																 #define __STARPU_SCHED_COMPONENT_H__
															
 
																-/** @defgroup
															
 
																- *
															
 
																- * @{
															
 
																- */
															
 
																+/**
															
 
																+   @defgroup API_Modularized_Scheduler Modularized Scheduler Interface
															
 
																+   @{
															
 
																+*/
															
 
																 #include <starpu.h>
															
@@ -37,106 +37,379 @@ extern "C"
 
																 {
															
 
																 #endif
															
 
																+/**
															
 
																+   flags for starpu_sched_component::properties
															
 
																+*/
															
 
																 enum starpu_sched_component_properties
															
 
																 {
															
 
																+	/** indicate that all workers have the same starpu_worker_archtype */
															
 
																 	STARPU_SCHED_COMPONENT_HOMOGENEOUS = (1<<0),
															
 
																+	/** indicate that all workers have the same memory component */
															
 
																 	STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE = (1<<1)
															
 
																 };
															
 
																+/**
															
 
																+   indicate if component is homogeneous
															
 
																+*/
															
 
																 #define STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component) ((component)->properties & STARPU_SCHED_COMPONENT_HOMOGENEOUS)
															
 
																+
															
 
																+/**
															
 
																+   indicate if all workers have the same memory component
															
 
																+*/
															
 
																 #define STARPU_SCHED_COMPONENT_IS_SINGLE_MEMORY_NODE(component) ((component)->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE)
															
 
																+/**
															
 
																+   Structure for a scheduler module.  A scheduler is a
															
 
																+   tree-like structure of them, some parts of scheduler can be shared by
															
 
																+   several contexes to perform some local optimisations, so, for all
															
 
																+   components, a list of parent is defined by \c sched_ctx_id. They
															
 
																+   embed there specialised method in a pseudo object-style, so calls are
															
 
																+   like <c>component->push_task(component,task)</c>
															
 
																+*/
															
 
																 struct starpu_sched_component
															
 
																 {
															
 
																+	/** The tree containing the component*/
															
 
																 	struct starpu_sched_tree *tree;
															
 
																+	/** set of underlying workers */
															
 
																 	struct starpu_bitmap *workers;
															
 
																+	/**
															
 
																+	   subset of starpu_sched_component::workers that is currently available in the context
															
 
																+	   The push method should take this value into account, it is set with:
															
 
																+	   component->workers UNION tree->workers UNION
															
 
																+	   component->child[i]->workers_in_ctx iff exist x such as component->children[i]->parents[x] == component
															
 
																+	*/
															
 
																 	struct starpu_bitmap *workers_in_ctx;
															
 
																+	/** private data */
															
 
																 	void *data;
															
 
																 	char *name;
															
 
																+	/** number of compoments's children */
															
 
																 	unsigned nchildren;
															
 
																+	/** vector of component's children */
															
 
																 	struct starpu_sched_component **children;
															
 
																+	/** number of component's parents */
															
 
																 	unsigned nparents;
															
 
																+	/** vector of component's parents */
															
 
																 	struct starpu_sched_component **parents;
															
 
																+	/** add a child to component */
															
 
																 	void (*add_child)(struct starpu_sched_component *component, struct starpu_sched_component *child);
															
 
																+	/** remove a child from component */
															
 
																 	void (*remove_child)(struct starpu_sched_component *component, struct starpu_sched_component *child);
															
 
																 	void (*add_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent);
															
 
																 	void (*remove_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent);
															
 
																+	/**
															
 
																+	   push a task in the scheduler module. this function is called to
															
 
																+	   push a task on component subtree, this can either perform a
															
 
																+	   recursive call on a child or store the task in the component,
															
 
																+	   then it will be returned by a further pull_task call.
															
 
																+	   the caller must ensure that component is able to execute task.
															
 
																+	   This method must either return 0 if it the task was properly stored or
															
 
																+	   passed over to a child component, or return a value different from 0 if the
															
 
																+	   task could not be consumed (e.g. the queue is full).
															
 
																+	*/
															
 
																 	int (*push_task)(struct starpu_sched_component *, struct starpu_task *);
															
 
																+
															
 
																+	/**
															
 
																+	   pop a task from the scheduler module. this function is called by workers to get a task from their
															
 
																+	   parents. this function should first return a locally stored task
															
 
																+	   or perform a recursive call on the parents.
															
 
																+	   the task returned by this function should be executable by the caller
															
 
																+	*/
															
 
																 	struct starpu_task *(*pull_task)(struct starpu_sched_component *from, struct starpu_sched_component *to);
															
 
																+	/**
															
 
																+	   This function is called by a component which implements a queue,
															
 
																+	   allowing it to signify to its parents that an empty slot is
															
 
																+	   available in its queue. This should return 1 if some tasks could be pushed
															
 
																+	   The basic implementation of this function
															
 
																+	   is a recursive call to its parents, the user has to specify a
															
 
																+	   personally-made function to catch those calls.
															
 
																+	*/
															
 
																 	int (*can_push)(struct starpu_sched_component *from, struct starpu_sched_component *to);
															
 
																+
															
 
																+	/**
															
 
																+	   This function allow a component to wake up a worker. It is
															
 
																+	   currently called by component which implements a queue, to
															
 
																+	   signify to its children that a task have been pushed in its local
															
 
																+	   queue, and is available to be popped by a worker, for example.
															
 
																+	   This should return 1 if some some container or worker could (or will) pull
															
 
																+	   some tasks.
															
 
																+	   The basic implementation of this function is a recursive call to
															
 
																+	   its children, until at least one worker have been woken up.
															
 
																+	*/
															
 
																 	int (*can_pull)(struct starpu_sched_component *component);
															
 
																 	int (*notify)(struct starpu_sched_component* component, int message_ID, void* arg);
															
 
																+	/**
															
 
																+	   heuristic to compute load of scheduler module. Basically the number of tasks divided by the sum
															
 
																+	   of relatives speedup of workers available in context.
															
 
																+	   estimated_load(component) = sum(estimated_load(component_children)) + nb_local_tasks / average(relative_speedup(underlying_worker))
															
 
																+	*/
															
 
																 	double (*estimated_load)(struct starpu_sched_component *component);
															
 
																+	/**
															
 
																+	   return the time when a worker will enter in starvation. This function is relevant only if the task->predicted
															
 
																+	   member has been set.
															
 
																+	*/
															
 
																 	double (*estimated_end)(struct starpu_sched_component *component);
															
 
																+	/**
															
 
																+	   called by starpu_sched_component_destroy. Should free data allocated during creation
															
 
																+	*/
															
 
																 	void (*deinit_data)(struct starpu_sched_component *component);
															
 
																+
															
 
																+	/**
															
 
																+	   this function is called for each component when workers are added or removed from a context
															
 
																+	*/
															
 
																 	void (*notify_change_workers)(struct starpu_sched_component *component);
															
 
																 	int properties;
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																+	/**
															
 
																+	   the hwloc object associated to scheduler module. points to the
															
 
																+	   part of topology that is binded to this component, eg: a numa
															
 
																+	   node for a ws component that would balance load between
															
 
																+	   underlying sockets
															
 
																+	*/
															
 
																 	hwloc_obj_t obj;
															
 
																 #else
															
 
																 	void *obj;
															
 
																 #endif
															
 
																 };
															
 
																+/**
															
 
																+   The actual scheduler
															
 
																+*/
															
 
																 struct starpu_sched_tree
															
 
																 {
															
 
																+	/**
															
 
																+	   entry module of the scheduler
															
 
																+	*/
															
 
																 	struct starpu_sched_component *root;
															
 
																+	/**
															
 
																+	   set of workers available in this context, this value is used to mask workers in modules
															
 
																+	*/
															
 
																 	struct starpu_bitmap *workers;
															
 
																+	/**
															
 
																+	   context id of the scheduler
															
 
																+	*/
															
 
																 	unsigned sched_ctx_id;
															
 
																+	/**
															
 
																+	   lock used to protect the scheduler, it is taken in read mode pushing a task and in write mode for adding or
															
 
																+	   removing workers
															
 
																+	*/
															
 
																 	starpu_pthread_mutex_t lock;
															
 
																 };
															
 
																+void starpu_initialize_prio_center_policy(unsigned sched_ctx_id);
															
 
																+
															
 
																+/**
															
 
																+   @name Scheduling Tree API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																+/**
															
 
																+   create a empty initialized starpu_sched_tree
															
 
																+*/
															
 
																 struct starpu_sched_tree *starpu_sched_tree_create(unsigned sched_ctx_id) STARPU_ATTRIBUTE_MALLOC;
															
 
																+/**
															
 
																+   destroy tree and free all non shared component in it.
															
 
																+*/
															
 
																 void starpu_sched_tree_destroy(struct starpu_sched_tree *tree);
															
 
																 struct starpu_sched_tree *starpu_sched_tree_get(unsigned sched_ctx_id);
															
 
																+/**
															
 
																+   recursively set all starpu_sched_component::workers, do not take into account shared parts (except workers).
															
 
																+*/
															
 
																 void starpu_sched_tree_update_workers(struct starpu_sched_tree *t);
															
 
																+/**
															
 
																+   recursively set all starpu_sched_component::workers_in_ctx, do not take into account shared parts (except workers)
															
 
																+*/
															
 
																 void starpu_sched_tree_update_workers_in_ctx(struct starpu_sched_tree *t);
															
 
																+/**
															
 
																+   compatibility with starpu_sched_policy interface
															
 
																+*/
															
 
																 int starpu_sched_tree_push_task(struct starpu_task *task);
															
 
																-int starpu_sched_component_push_task(struct starpu_sched_component *from, struct starpu_sched_component *to, struct starpu_task *task);
															
 
																+/**
															
 
																+   compatibility with starpu_sched_policy interface
															
 
																+*/
															
 
																 struct starpu_task *starpu_sched_tree_pop_task(unsigned sched_ctx);
															
 
																+
															
 
																+/**
															
 
																+   Push a task to a component. This is a helper for <c>component->push_task(component, task)</c> plus tracing.
															
 
																+*/
															
 
																+int starpu_sched_component_push_task(struct starpu_sched_component *from, struct starpu_sched_component *to, struct starpu_task *task);
															
 
																+
															
 
																+/**
															
 
																+   Pull a task from a component. This is a helper for <c>component->pull_task(component)</c> plus tracing.
															
 
																+*/
															
 
																 struct starpu_task *starpu_sched_component_pull_task(struct starpu_sched_component *from, struct starpu_sched_component *to);
															
 
																+
															
 
																 struct starpu_task* starpu_sched_component_pump_to(struct starpu_sched_component *component, struct starpu_sched_component *to, int* success);
															
 
																 struct starpu_task* starpu_sched_component_pump_downstream(struct starpu_sched_component *component, int* success);
															
 
																 int starpu_sched_component_send_can_push_to_parents(struct starpu_sched_component * component);
															
 
																-
															
 
																+/**
															
 
																+   compatibility with starpu_sched_policy interface
															
 
																+*/
															
 
																 void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
															
 
																+/**
															
 
																+   compatibility with starpu_sched_policy interface
															
 
																+*/
															
 
																 void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
															
 
																+/**
															
 
																+   Attach component \p child to parent \p parent. Some component may accept only one child, others accept several (e.g. MCT)
															
 
																+*/
															
 
																+void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child);
															
 
																+
															
 
																+/** @} */
															
 
																+
															
 
																+/**
															
 
																+   @name Generic Scheduling Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																 typedef struct starpu_sched_component * (*starpu_sched_component_create_t)(struct starpu_sched_tree *tree, void *data);
															
 
																+
															
 
																+/**
															
 
																+   allocate and initialize component field with defaults values :
															
 
																+   .pop_task make recursive call on father
															
 
																+   .estimated_load compute relative speedup and tasks in sub tree
															
 
																+   .estimated_end return the minimum of recursive call on children
															
 
																+   .add_child is starpu_sched_component_add_child
															
 
																+   .remove_child is starpu_sched_component_remove_child
															
 
																+   .notify_change_workers does nothing
															
 
																+   .deinit_data does nothing
															
 
																+*/
															
 
																 struct starpu_sched_component *starpu_sched_component_create(struct starpu_sched_tree *tree, const char *name) STARPU_ATTRIBUTE_MALLOC;
															
 
																-void starpu_sched_component_add_child(struct starpu_sched_component* component, struct starpu_sched_component * child);
															
 
																+
															
 
																+/**
															
 
																+   free data allocated by starpu_sched_component_create and call component->deinit_data(component)
															
 
																+   set to <c>NULL</c> the member starpu_sched_component::fathers[sched_ctx_id] of all child if its equal to \p component
															
 
																+*/
															
 
																+
															
 
																 void starpu_sched_component_destroy(struct starpu_sched_component *component);
															
 
																+/**
															
 
																+   recursively destroy non shared parts of a \p component 's tree
															
 
																+*/
															
 
																 void starpu_sched_component_destroy_rec(struct starpu_sched_component *component);
															
 
																+
															
 
																+void starpu_sched_component_add_child(struct starpu_sched_component* component, struct starpu_sched_component * child);
															
 
																+
															
 
																+/**
															
 
																+   return true iff \p component can execute \p task, this function take into account the workers available in the scheduling context
															
 
																+*/
															
 
																 int starpu_sched_component_can_execute_task(struct starpu_sched_component *component, struct starpu_task *task);
															
 
																+
															
 
																+/**
															
 
																+   return a non <c>NULL</c> value if \p component can execute \p task.
															
 
																+   write the execution prediction length for the best implementation of the best worker available and write this at \p length address.
															
 
																+   this result is more relevant if starpu_sched_component::is_homogeneous is non <c>NULL</c>.
															
 
																+   if a worker need to be calibrated for an implementation, nan is set to \p length.
															
 
																+*/
															
 
																 int STARPU_WARN_UNUSED_RESULT starpu_sched_component_execute_preds(struct starpu_sched_component *component, struct starpu_task *task, double *length);
															
 
																+
															
 
																+/**
															
 
																+   return the average time to transfer \p task data to underlying \p component workers.
															
 
																+*/
															
 
																 double starpu_sched_component_transfer_length(struct starpu_sched_component *component, struct starpu_task *task);
															
 
																+
															
 
																 void starpu_sched_component_prefetch_on_node(struct starpu_sched_component *component, struct starpu_task *task);
															
 
																-void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child);
															
 
																+/** @} */
															
 
																+
															
 
																+/**
															
 
																+   @name Worker Component API
															
 
																+   @{
															
 
																+*/
															
 
																+/**
															
 
																+   return the struct starpu_sched_component corresponding to \p workerid. Undefined if \p workerid is not a valid workerid
															
 
																+*/
															
 
																 struct starpu_sched_component *starpu_sched_component_worker_get(unsigned sched_ctx, int workerid);
															
 
																 struct starpu_sched_component *starpu_sched_component_worker_new(unsigned sched_ctx, int workerid);
															
 
																+
															
 
																+/**
															
 
																+   Create a combined worker that pushes tasks in parallel to workers \p workers (size \p nworkers).
															
 
																+*/
															
 
																 struct starpu_sched_component *starpu_sched_component_parallel_worker_create(struct starpu_sched_tree *tree, unsigned nworkers, unsigned *workers);
															
 
																+
															
 
																+/**
															
 
																+   return the workerid of \p worker_component, undefined if starpu_sched_component_is_worker(worker_component) == 0
															
 
																+*/
															
 
																 int starpu_sched_component_worker_get_workerid(struct starpu_sched_component *worker_component);
															
 
																+
															
 
																+/**
															
 
																+   return true iff \p component is a worker component
															
 
																+*/
															
 
																 int starpu_sched_component_is_worker(struct starpu_sched_component *component);
															
 
																+
															
 
																+/**
															
 
																+   return true iff \p component is a simple worker component
															
 
																+*/
															
 
																 int starpu_sched_component_is_simple_worker(struct starpu_sched_component *component);
															
 
																+
															
 
																+/**
															
 
																+   return true iff \p component is a combined worker component
															
 
																+*/
															
 
																 int starpu_sched_component_is_combined_worker(struct starpu_sched_component *component);
															
 
																+
															
 
																+/**
															
 
																+   compatibility with starpu_sched_policy interface
															
 
																+   update predictions for workers
															
 
																+*/
															
 
																 void starpu_sched_component_worker_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id);
															
 
																+
															
 
																+/**
															
 
																+   compatibility with starpu_sched_policy interface
															
 
																+*/
															
 
																 void starpu_sched_component_worker_post_exec_hook(struct starpu_task *task, unsigned sched_ctx_id);
															
 
																+/** @} */
															
 
																+
															
 
																+/**
															
 
																+   @name Flow-control Fifo Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																+/**
															
 
																+   default function for the can_push component method, just call can_push of parents until one of them returns non-zero
															
 
																+*/
															
 
																 int starpu_sched_component_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to);
															
 
																+
															
 
																+/**
															
 
																+default function for the can_pull component method, just call can_pull of children until one of them returns non-zero
															
 
																+*/
															
 
																 int starpu_sched_component_can_pull(struct starpu_sched_component * component);
															
 
																+
															
 
																+/**
															
 
																+   function for the can_pull component method, call can_pull of all children
															
 
																+*/
															
 
																 int starpu_sched_component_can_pull_all(struct starpu_sched_component * component);
															
 
																+
															
 
																+/**
															
 
																+   default function for the estimated_load component method, just sum up the loads
															
 
																+   of the children of the component.
															
 
																+*/
															
 
																 double starpu_sched_component_estimated_load(struct starpu_sched_component * component);
															
 
																+
															
 
																+/**
															
 
																+   function that can be used for the estimated_end component method, compute the minimum completion time of the children.
															
 
																+*/
															
 
																 double starpu_sched_component_estimated_end_min(struct starpu_sched_component * component);
															
 
																+
															
 
																+/**
															
 
																+   function that can be used for the estimated_end component method, compute
															
 
																+   the minimum completion time of the children, and add to it an estimation of how
															
 
																+   existing queued work, plus the exp_len work, can be completed. This is typically
															
 
																+   used instead of starpu_sched_component_estimated_end_min when the component
															
 
																+   contains a queue of tasks, which thus needs to be added to the estimations.
															
 
																+*/
															
 
																 double starpu_sched_component_estimated_end_min_add(struct starpu_sched_component * component, double exp_len);
															
 
																+
															
 
																+/**
															
 
																+   default function for the estimated_end component method, compute the average completion time of the children.
															
 
																+*/
															
 
																 double starpu_sched_component_estimated_end_average(struct starpu_sched_component * component);
															
 
																 struct starpu_sched_component_fifo_data
															
@@ -145,9 +418,25 @@ struct starpu_sched_component_fifo_data
 
																 	double exp_len_threshold;
															
 
																 };
															
 
																+/**
															
 
																+   Return a struct starpu_sched_component with a fifo. A stable sort is performed according to tasks priorities.
															
 
																+   A push_task call on this component does not perform recursive calls, underlying components will have to call pop_task to get it.
															
 
																+   starpu_sched_component::estimated_end function compute the estimated length by dividing the sequential length by the number of underlying workers.
															
 
																+*/
															
 
																 struct starpu_sched_component *starpu_sched_component_fifo_create(struct starpu_sched_tree *tree, struct starpu_sched_component_fifo_data *fifo_data) STARPU_ATTRIBUTE_MALLOC;
															
 
																+
															
 
																+/**
															
 
																+   return true iff \p component is a fifo component
															
 
																+*/
															
 
																 int starpu_sched_component_is_fifo(struct starpu_sched_component *component);
															
 
																+/** @} */
															
 
																+
															
 
																+/**
															
 
																+   @name Flow-control Prio Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																 struct starpu_sched_component_prio_data
															
 
																 {
															
 
																 	unsigned ntasks_threshold;
															
@@ -156,19 +445,70 @@ struct starpu_sched_component_prio_data
 
																 struct starpu_sched_component *starpu_sched_component_prio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_prio_data *prio_data) STARPU_ATTRIBUTE_MALLOC;
															
 
																 int starpu_sched_component_is_prio(struct starpu_sched_component *component);
															
 
																+/** @} */
															
 
																+
															
 
																+/**
															
 
																+   @name Resource-mapping Work-Stealing Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																+/**
															
 
																+   return a component that perform a work stealing scheduling. Tasks are pushed in a round robin way. estimated_end return the average of expected length of fifos, starting at the average of the expected_end of his children. When a worker have to steal a task, it steal a task in a round robin way, and get the last pushed task of the higher priority.
															
 
																+*/
															
 
																 struct starpu_sched_component *starpu_sched_component_work_stealing_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
															
 
																+
															
 
																+/**
															
 
																+   return true iff \p component is a work stealing component
															
 
																+ */
															
 
																 int starpu_sched_component_is_work_stealing(struct starpu_sched_component *component);
															
 
																+
															
 
																+/**
															
 
																+   undefined if there is no work stealing component in the scheduler. If any, \p task is pushed in a default way if the caller is the application, and in the caller's fifo if its a worker.
															
 
																+*/
															
 
																 int starpu_sched_tree_work_stealing_push_task(struct starpu_task *task);
															
 
																+/** @} */
															
 
																+
															
 
																+/**
															
 
																+   @name Resource-mapping Random Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																+/**
															
 
																+   create a component that perform a random scheduling
															
 
																+*/
															
 
																 struct starpu_sched_component *starpu_sched_component_random_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
															
 
																+
															
 
																+/**
															
 
																+   return true iff \p component is a random component
															
 
																+*/
															
 
																 int starpu_sched_component_is_random(struct starpu_sched_component *);
															
 
																+/** @} */
															
 
																+
															
 
																+/**
															
 
																+   @name Resource-mapping Eager Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																 struct starpu_sched_component *starpu_sched_component_eager_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
															
 
																 int starpu_sched_component_is_eager(struct starpu_sched_component *);
															
 
																+/**
															
 
																+   @name Resource-mapping Eager-Calibration Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																 struct starpu_sched_component *starpu_sched_component_eager_calibration_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
															
 
																 int starpu_sched_component_is_eager_calibration(struct starpu_sched_component *);
															
 
																+/** @} */
															
 
																+
															
 
																+/**
															
 
																+   @name Resource-mapping MCT Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																 struct starpu_sched_component_mct_data
															
 
																 {
															
 
																 	double alpha;
															
@@ -176,14 +516,48 @@ struct starpu_sched_component_mct_data
 
																 	double _gamma;
															
 
																 	double idle_power;
															
 
																 };
															
 
																+
															
 
																+/**
															
 
																+   create a component with mct_data paremeters. the mct component doesnt
															
 
																+   do anything but pushing tasks on no_perf_model_component and
															
 
																+   calibrating_component
															
 
																+*/
															
 
																 struct starpu_sched_component *starpu_sched_component_mct_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data) STARPU_ATTRIBUTE_MALLOC;
															
 
																+
															
 
																 int starpu_sched_component_is_mct(struct starpu_sched_component *component);
															
 
																+/** @} */
															
 
																+
															
 
																+/**
															
 
																+   @name Resource-mapping Heft Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																 struct starpu_sched_component *starpu_sched_component_heft_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data) STARPU_ATTRIBUTE_MALLOC;
															
 
																 int starpu_sched_component_is_heft(struct starpu_sched_component *component);
															
 
																+/** @} */
															
 
																+
															
 
																+/**
															
 
																+   @name Special-purpose Best_Implementation Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																+/**
															
 
																+   Select the implementation that offer the shortest computation length for the first worker that can execute the task.
															
 
																+   Or an implementation that need to be calibrated.
															
 
																+   Also set starpu_task::predicted and starpu_task::predicted_transfer for memory component of the first suitable workerid.
															
 
																+   If starpu_sched_component::push method is called and starpu_sched_component::nchild > 1 the result is undefined.
															
 
																+*/
															
 
																 struct starpu_sched_component *starpu_sched_component_best_implementation_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
															
 
																+/** @} */
															
 
																+
															
 
																+/**
															
 
																+   @name Special-purpose Perfmodel_Select Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																 struct starpu_sched_component_perfmodel_select_data
															
 
																 {
															
 
																 	struct starpu_sched_component *calibrator_component;
															
@@ -193,46 +567,168 @@ struct starpu_sched_component_perfmodel_select_data
 
																 struct starpu_sched_component *starpu_sched_component_perfmodel_select_create(struct starpu_sched_tree *tree, struct starpu_sched_component_perfmodel_select_data *perfmodel_select_data) STARPU_ATTRIBUTE_MALLOC;
															
 
																 int starpu_sched_component_is_perfmodel_select(struct starpu_sched_component *component);
															
 
																-void starpu_initialize_prio_center_policy(unsigned sched_ctx_id);
															
 
																+/** @} */
															
 
																+/**
															
 
																+   @name Recipe Component API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																+/**
															
 
																+   parameters for starpu_sched_component_composed_component_create
															
 
																+*/
															
 
																 struct starpu_sched_component_composed_recipe;
															
 
																+
															
 
																+/**
															
 
																+   return an empty recipe for a composed component, it should not be used without modification
															
 
																+*/
															
 
																 struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create(void) STARPU_ATTRIBUTE_MALLOC;
															
 
																+
															
 
																+/**
															
 
																+   return a recipe to build a composed component with a \p create_component
															
 
																+*/
															
 
																 struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create_singleton(struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg) STARPU_ATTRIBUTE_MALLOC;
															
 
																+
															
 
																+/**
															
 
																+   add \p create_component under all previous components in recipe
															
 
																+*/
															
 
																 void starpu_sched_component_composed_recipe_add(struct starpu_sched_component_composed_recipe *recipe, struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg);
															
 
																+
															
 
																+/**
															
 
																+   destroy composed_sched_component, this should be done after starpu_sched_component_composed_component_create was called
															
 
																+*/
															
 
																 void starpu_sched_component_composed_recipe_destroy(struct starpu_sched_component_composed_recipe *);
															
 
																+
															
 
																+/**
															
 
																+   create a component that behave as all component of recipe where linked. Except that you cant use starpu_sched_component_is_foo function
															
 
																+   if recipe contain a single create_foo arg_foo pair, create_foo(arg_foo) is returned instead of a composed component
															
 
																+*/
															
 
																 struct starpu_sched_component *starpu_sched_component_composed_component_create(struct starpu_sched_tree *tree, struct starpu_sched_component_composed_recipe *recipe) STARPU_ATTRIBUTE_MALLOC;
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																+/**
															
 
																+   Define how build a scheduler according to topology. Each level (except for hwloc_machine_composed_sched_component) can be <c>NULL</c>, then
															
 
																+   the level is just skipped. Bugs everywhere, do not rely on.
															
 
																+*/
															
 
																 struct starpu_sched_component_specs
															
 
																 {
															
 
																+	/**
															
 
																+	   the composed component to put on the top of the scheduler
															
 
																+	   this member must not be <c>NULL</c> as it is the root of the topology
															
 
																+	*/
															
 
																 	struct starpu_sched_component_composed_recipe *hwloc_machine_composed_sched_component;
															
 
																+	/**
															
 
																+	   the composed component to put for each memory component
															
 
																+	*/
															
 
																 	struct starpu_sched_component_composed_recipe *hwloc_component_composed_sched_component;
															
 
																+	/**
															
 
																+	   the composed component to put for each socket
															
 
																+	*/
															
 
																 	struct starpu_sched_component_composed_recipe *hwloc_socket_composed_sched_component;
															
 
																+	/**
															
 
																+	   the composed component to put for each cache
															
 
																+	*/
															
 
																 	struct starpu_sched_component_composed_recipe *hwloc_cache_composed_sched_component;
															
 
																+	/**
															
 
																+	   a function that return a starpu_sched_component_composed_recipe to put on top of a worker of type \p archtype.
															
 
																+	   <c>NULL</c> is a valid return value, then no component will be added on top
															
 
																+	*/
															
 
																 	struct starpu_sched_component_composed_recipe *(*worker_composed_sched_component)(enum starpu_worker_archtype archtype);
															
 
																+	/**
															
 
																+	   this flag is a dirty hack because of the poor expressivity of this interface. As example, if you want to build
															
 
																+	   a heft component with a fifo component per numa component, and you also have GPUs, if this flag is set, GPUs will share those fifos.
															
 
																+	   If this flag is not set, a new fifo will be built for each of them (if they have the same starpu_perf_arch and the same
															
 
																+	   numa component it will be shared. it indicates if heterogenous workers should be brothers or cousins, as example, if a gpu and a cpu should share or not there numa node
															
 
																+	*/
															
 
																 	int mix_heterogeneous_workers;
															
 
																 };
															
 
																+
															
 
																+/**
															
 
																+   build a scheduler for \p sched_ctx_id according to \p s and the hwloc topology of the machine.
															
 
																+*/
															
 
																 struct starpu_sched_tree *starpu_sched_component_make_scheduler(unsigned sched_ctx_id, struct starpu_sched_component_specs s);
															
 
																 #endif /* STARPU_HAVE_HWLOC */
															
 
																+/**
															
 
																+   @name Basic API
															
 
																+   @{
															
 
																+*/
															
 
																+
															
 
																 #define STARPU_SCHED_SIMPLE_DECIDE_MASK		(3<<0)
															
 
																+
															
 
																+/**
															
 
																+   Request to create downstream queues per worker, i.e. the scheduling decision-making component will choose exactly which workers tasks should got to.
															
 
																+*/
															
 
																 #define STARPU_SCHED_SIMPLE_DECIDE_WORKERS	(1<<0)
															
 
																+
															
 
																+/**
															
 
																+   Request to create downstream queues per memory nodes, i.e. the scheduling decision-making component will choose which memory node tasks will go to.
															
 
																+*/
															
 
																 #define STARPU_SCHED_SIMPLE_DECIDE_MEMNODES	(2<<0)
															
 
																+
															
 
																+/**
															
 
																+   Request to create downstream queues per computation arch, i.e. the scheduling decision-making component will choose whether tasks go to CPUs, or CUDA, or OpenCL, etc.
															
 
																+*/
															
 
																 #define STARPU_SCHED_SIMPLE_DECIDE_ARCHS	(3<<0)
															
 
																+/**
															
 
																+   Request to add a perfmodel selector above the scheduling decision-making component. That way, only tasks with a calibrated performance model will be given to the component, other tasks will go to an eager branch that will distributed tasks so that their performance models will get calibrated.
															
 
																+   In other words, this is needed when using a component which needs performance models for tasks.
															
 
																+*/
															
 
																 #define STARPU_SCHED_SIMPLE_PERFMODEL		(1<<4)
															
 
																+
															
 
																+/**
															
 
																+   Request that a component be added just above workers, that chooses the best task implementation.
															
 
																+*/
															
 
																 #define STARPU_SCHED_SIMPLE_IMPL		(1<<5)
															
 
																+
															
 
																+/**
															
 
																+   Request to create a fifo above the scheduling decision-making component, otherwise tasks will be pushed directly to the component.
															
 
																+
															
 
																+   This is useful to store tasks if there is a fifo below which limits the number of tasks to be scheduld in advance. The scheduling decision-making component can also store tasks itself, in which case this flag is not useful.
															
 
																+*/
															
 
																 #define STARPU_SCHED_SIMPLE_FIFO_ABOVE		(1<<6)
															
 
																+
															
 
																+/**
															
 
																+   Request that the fifo above be sorted by priorities
															
 
																+*/
															
 
																 #define STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO	(1<<7)
															
 
																+
															
 
																+/**
															
 
																+   Request to create fifos below the scheduling decision-making component, otherwise tasks will be pulled directly from workers.
															
 
																+
															
 
																+   This is useful to be able to schedule a (tunable) small number of tasks in advance only.
															
 
																+*/
															
 
																 #define STARPU_SCHED_SIMPLE_FIFOS_BELOW		(1<<8)
															
 
																+
															
 
																+/**
															
 
																+   Request that the fifos below be sorted by priorities
															
 
																+*/
															
 
																 #define STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO	(1<<9)
															
 
																+
															
 
																+/**
															
 
																+   Request that work between workers using the same fifo below be distributed using a work stealing component.
															
 
																+*/
															
 
																 #define STARPU_SCHED_SIMPLE_WS_BELOW		(1<<10)
															
 
																+
															
 
																+/**
															
 
																+   Request to not only choose between simple workers, but also choose between combined workers.
															
 
																+*/
															
 
																 #define STARPU_SCHED_SIMPLE_COMBINED_WORKERS	(1<<11)
															
 
																+/**
															
 
																+   Create a simple modular scheduler tree around a scheduling decision-making
															
 
																+   component \p component. The details of what should be built around \p component
															
 
																+   is described by \p flags. The different STARPU_SCHED_SIMPL_DECIDE_* flags are
															
 
																+   mutually exclusive. \p data is passed to the \p create_decision_component
															
 
																+   function when creating the decision component.
															
 
																+*/
															
 
																 void starpu_sched_component_initialize_simple_scheduler(starpu_sched_component_create_t create_decision_component, void *data, unsigned flags, unsigned sched_ctx_id);
															
 
																+/** @} */
															
 
																+
															
 
																 #define STARPU_COMPONENT_MUTEX_LOCK(m) \
															
 
																 do \
															
 
																 { \