лет назад: 6 · 6862bbddbf
--- a/doc/doxygen/Makefile.am
+++ b/doc/doxygen/Makefile.am
@@ -116,8 +116,6 @@ chapters =	\
 
				 	chapters/api/mic_extensions.doxy \
			
 
				 	chapters/api/scc_extensions.doxy \
			
 
				 	chapters/api/parallel_tasks.doxy \
			
 
				-	chapters/api/performance_model.doxy \
			
 
				-	chapters/api/profiling.doxy \
			
 
				 	chapters/api/scheduling_contexts.doxy \
			
 
				 	chapters/api/scheduling_policy.doxy \
			
 
				 	chapters/api/standard_memory_library.doxy \
			
@@ -131,7 +129,6 @@ chapters =	\
 
				 	chapters/api/toolbox.doxy \
			
 
				 	chapters/api/sc_hypervisor/sc_hypervisor.doxy \
			
 
				 	chapters/api/sc_hypervisor/sc_hypervisor_usage.doxy \
			
 
				-	chapters/api/modularized_scheduler.doxy \
			
 
				 	chapters/api/interoperability.doxy
			
 
				 
			
 
				 images = 	\
			
--- a/doc/doxygen/chapters/api/modularized_scheduler.doxy
+++ b/doc/doxygen/chapters/api/modularized_scheduler.doxy
@@ -1,560 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2013,2014                                Inria
			
 
				- * Copyright (C) 2013-2018                                CNRS
			
 
				- * Copyright (C) 2009-2011,2014,2015,2017,2018-2019       Université de Bordeaux
			
 
				- * Copyright (C) 2013                                     Simon Archipoff
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Modularized_Scheduler Modularized Scheduler Interface
			
 
				-
			
 
				-\enum starpu_sched_component_properties
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-flags for starpu_sched_component::properties
			
 
				-\var starpu_sched_component_properties::STARPU_SCHED_COMPONENT_HOMOGENEOUS
			
 
				-     indicate that all workers have the same starpu_worker_archtype
			
 
				-\var starpu_sched_component_properties::STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE
			
 
				-     indicate that all workers have the same memory component
			
 
				-
			
 
				-\def STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-indicate if component is homogeneous
			
 
				-\def STARPU_SCHED_COMPONENT_IS_SINGLE_MEMORY_NODE
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-indicate if all workers have the same memory component
			
 
				-
			
 
				-\struct starpu_sched_component
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-This structure represent a scheduler module.  A scheduler is a
			
 
				-tree-like structure of them, some parts of scheduler can be shared by
			
 
				-several contexes to perform some local optimisations, so, for all
			
 
				-components, a list of parent is defined by \c sched_ctx_id. They
			
 
				-embed there specialised method in a pseudo object-style, so calls are
			
 
				-like <c>component->push_task(component,task)</c>
			
 
				-
			
 
				-\var struct starpu_sched_tree *starpu_sched_component::tree
			
 
				-     The tree containing the component
			
 
				-\var struct starpu_bitmap *starpu_sched_component::workers
			
 
				-     this member contain the set of underlying workers
			
 
				-\var starpu_sched_component::workers_in_ctx
			
 
				-     this member contain the subset of starpu_sched_component::workers that is currently available in the context
			
 
				-     The push method should take this member into account.
			
 
				-     this member is set with :
			
 
				-     component->workers UNION tree->workers UNION
			
 
				-     component->child[i]->workers_in_ctx iff exist x such as component->children[i]->parents[x] == component
			
 
				-\var void *starpu_sched_component::data
			
 
				-     private data
			
 
				-\var int starpu_sched_component::nchildren
			
 
				-     the number of compoments's children
			
 
				-\var struct starpu_sched_component **starpu_sched_component::children
			
 
				-     the vector of component's children
			
 
				-\var int starpu_sched_component::nparents
			
 
				-     the numbers of component's parents
			
 
				-\var struct starpu_sched_component **starpu_sched_component::parents
			
 
				-     the vector of component's parents
			
 
				-
			
 
				-\var void(*starpu_sched_component::add_child)(struct starpu_sched_component *component, struct starpu_sched_component *child)
			
 
				-     add a child to component
			
 
				-\var void(*starpu_sched_component::remove_child)(struct starpu_sched_component *component, struct starpu_sched_component *child)
			
 
				-     remove a child from component
			
 
				-\var void(*starpu_sched_component::add_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent)
			
 
				-     todo
			
 
				-\var void(*starpu_sched_component::remove_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent)
			
 
				-     todo
			
 
				-
			
 
				-\var int (*starpu_sched_component::push_task)(struct starpu_sched_component *, struct starpu_task *)
			
 
				-     push a task in the scheduler module. this function is called to
			
 
				-     push a task on component subtree, this can either perform a
			
 
				-     recursive call on a child or store the task in the component,
			
 
				-     then it will be returned by a further pull_task call.
			
 
				-     the caller must ensure that component is able to execute task.
			
 
				-     This method must either return 0 if it the task was properly stored or
			
 
				-     passed over to a child component, or return a value different from 0 if the
			
 
				-     task could not be consumed (e.g. the queue is full).
			
 
				-\var struct starpu_task * (*starpu_sched_component::pull_task)(struct starpu_sched_component *component, struct starpu_sched_component *to)
			
 
				-     pop a task from the scheduler module. this function is called by workers to get a task from their
			
 
				-     parents. this function should first return a locally stored task
			
 
				-     or perform a recursive call on the parents.
			
 
				-     the task returned by this function should be executable by the caller
			
 
				-
			
 
				-\var int (*starpu_sched_component::can_push)(struct starpu_sched_component *component, struct starpu_sched_component *to)
			
 
				-     This function is called by a component which implements a queue,
			
 
				-     allowing it to signify to its parents that an empty slot is
			
 
				-     available in its queue. This should return 1 if some tasks could be pushed
			
 
				-     The basic implementation of this function
			
 
				-     is a recursive call to its parents, the user has to specify a
			
 
				-     personally-made function to catch those calls.
			
 
				-\var int (*starpu_sched_component::can_pull)(struct starpu_sched_component *component)
			
 
				-     This function allow a component to wake up a worker. It is
			
 
				-     currently called by component which implements a queue, to
			
 
				-     signify to its children that a task have been pushed in its local
			
 
				-     queue, and is available to be popped by a worker, for example.
			
 
				-     This should return 1 if some some container or worker could (or will) pull
			
 
				-     some tasks.
			
 
				-     The basic implementation of this function is a recursive call to
			
 
				-     its children, until at least one worker have been woken up.
			
 
				-
			
 
				-\var double (*starpu_sched_component::estimated_load)(struct starpu_sched_component *component)
			
 
				-	is an heuristic to compute load of scheduler module. Basically the number of tasks divided by the sum
			
 
				-	of relatives speedup of workers available in context.
			
 
				-	estimated_load(component) = sum(estimated_load(component_children)) + nb_local_tasks / average(relative_speedup(underlying_worker))
			
 
				-\var starpu_sched_component::estimated_end
			
 
				-	return the time when a worker will enter in starvation. This function is relevant only if the task->predicted
			
 
				-	member has been set.
			
 
				-
			
 
				-\var void (*starpu_sched_component::deinit_data)(struct starpu_sched_component *component)
			
 
				-	called by starpu_sched_component_destroy. Should free data allocated during creation
			
 
				-\var void (*starpu_sched_component::notify_change_workers)(struct starpu_sched_component *component)
			
 
				-	this function is called for each component when workers are added or removed from a context
			
 
				-\var int starpu_sched_component::properties
			
 
				-	todo
			
 
				-\var hwloc_obj_t starpu_sched_component::obj
			
 
				-	the hwloc object associated to scheduler module. points to the
			
 
				-	part of topology that is binded to this component, eg: a numa
			
 
				-	node for a ws component that would balance load between
			
 
				-	underlying sockets
			
 
				-
			
 
				-\struct starpu_sched_tree
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-The actual scheduler
			
 
				-\var struct starpu_sched_component *starpu_sched_tree::root
			
 
				-	this is the entry module of the scheduler
			
 
				-\var struct starpu_bitmap *starpu_sched_tree::workers
			
 
				-	this is the set of workers available in this context, this value is used to mask workers in modules
			
 
				-\var unsigned starpu_sched_tree::sched_ctx_id
			
 
				-	the context id of the scheduler
			
 
				-\var starpu_pthread_mutex_t starpu_sched_tree::lock
			
 
				-	this lock is used to protect the scheduler, it is taken in
			
 
				-	read mode pushing a task and in write mode for adding or
			
 
				-	removing workers
			
 
				-
			
 
				-@name Scheduling Tree API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\fn struct starpu_sched_tree *starpu_sched_tree_create(unsigned sched_ctx_id)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 create a empty initialized starpu_sched_tree
			
 
				-
			
 
				-\fn void starpu_sched_tree_destroy(struct starpu_sched_tree *tree)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 destroy tree and free all non shared component in it.
			
 
				-
			
 
				-\fn void starpu_sched_tree_update_workers(struct starpu_sched_tree *t)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 recursively set all starpu_sched_component::workers, do not take into account shared parts (except workers).
			
 
				-
			
 
				-\fn void starpu_sched_tree_update_workers_in_ctx(struct starpu_sched_tree *t)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 recursively set all starpu_sched_component::workers_in_ctx, do not take into account shared parts (except workers)
			
 
				-
			
 
				-\fn int starpu_sched_tree_push_task(struct starpu_task *task)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 compatibility with starpu_sched_policy interface
			
 
				-
			
 
				-\fn struct starpu_task *starpu_sched_tree_pop_task(unsigned sched_ctx)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 compatibility with starpu_sched_policy interface
			
 
				-
			
 
				-\fn void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 compatibility with starpu_sched_policy interface
			
 
				-
			
 
				-\fn void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 compatibility with starpu_sched_policy interface
			
 
				-
			
 
				-\fn void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 Attaches component \p child to parent \p parent. Some component may accept only one child, others accept several (e.g. MCT)
			
 
				-
			
 
				-@name Generic Scheduling Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_create(struct starpu_sched_tree *tree, const char *name)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 allocate and initialize component field with defaults values :
			
 
				-	.pop_task make recursive call on father
			
 
				-	.estimated_load compute relative speedup and tasks in sub tree
			
 
				-	.estimated_end return the minimum of recursive call on children
			
 
				-	.add_child is starpu_sched_component_add_child
			
 
				-	.remove_child is starpu_sched_component_remove_child
			
 
				-	.notify_change_workers does nothing
			
 
				-	.deinit_data does nothing
			
 
				-
			
 
				-\fn void starpu_sched_component_destroy(struct starpu_sched_component *component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 free data allocated by starpu_sched_component_create and call component->deinit_data(component)
			
 
				-	 set to <c>NULL</c> the member starpu_sched_component::fathers[sched_ctx_id] of all child if its equal to \p component
			
 
				-
			
 
				-\fn void starpu_sched_component_destroy_rec(struct starpu_sched_component *component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 recursively destroy non shared parts of a \p component 's tree
			
 
				-
			
 
				-\fn int starpu_sched_component_can_execute_task(struct starpu_sched_component *component, struct starpu_task *task)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return true iff \p component can execute \p task, this function take into account the workers available in the scheduling context
			
 
				-
			
 
				-\fn int starpu_sched_component_execute_preds(struct starpu_sched_component *component, struct starpu_task *task, double *length)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return a non <c>NULL</c> value if \p component can execute \p task.
			
 
				-	 write the execution prediction length for the best implementation of the best worker available and write this at \p length address.
			
 
				-	 this result is more relevant if starpu_sched_component::is_homogeneous is non <c>NULL</c>.
			
 
				-	 if a worker need to be calibrated for an implementation, nan is set to \p length.
			
 
				-
			
 
				-\fn double starpu_sched_component_transfer_length(struct starpu_sched_component *component, struct starpu_task *task)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return the average time to transfer \p task data to underlying \p component workers.
			
 
				-
			
 
				-@name Worker Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_worker_get(unsigned sched_ctx, int workerid)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return the struct starpu_sched_component corresponding to \p workerid. Undefined if \p workerid is not a valid workerid
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_parallel_worker_create(struct starpu_sched_tree *tree, unsigned nworkers, unsigned *workers)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 Create a combined worker that pushes tasks in parallel to workers \p workers (size \p nworkers).
			
 
				-
			
 
				-\fn int starpu_sched_component_worker_get_workerid(struct starpu_sched_component *worker_component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return the workerid of \p worker_component, undefined if starpu_sched_component_is_worker(worker_component) == 0
			
 
				-
			
 
				-\fn int starpu_sched_component_is_worker(struct starpu_sched_component *component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return true iff \p component is a worker component
			
 
				-
			
 
				-\fn int starpu_sched_component_is_simple_worker(struct starpu_sched_component *component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return true iff \p component is a simple worker component
			
 
				-
			
 
				-\fn int starpu_sched_component_is_combined_worker(struct starpu_sched_component *component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return true iff \p component is a combined worker component
			
 
				-
			
 
				-\fn void starpu_sched_component_worker_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 compatibility with starpu_sched_policy interface
			
 
				-	 update predictions for workers
			
 
				-
			
 
				-\fn void starpu_sched_component_worker_post_exec_hook(struct starpu_task *task, unsigned sched_ctx_id)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 compatibility with starpu_sched_policy interface
			
 
				-
			
 
				-@name Flow-control Fifo Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\fn int starpu_sched_component_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-default function for the can_push component method, just calls can_push of parents until one of them returns non-zero
			
 
				-
			
 
				-\fn int starpu_sched_component_can_pull(struct starpu_sched_component * component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-default function for the can_pull component method, just calls can_pull of children until one of them returns non-zero
			
 
				-
			
 
				-\fn int starpu_sched_component_can_pull_all(struct starpu_sched_component * component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-function for the can_pull component method, calls can_pull of all children
			
 
				-
			
 
				-\fn double starpu_sched_component_estimated_load(struct starpu_sched_component * component);
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-default function for the estimated_load component method, just sums up the loads
			
 
				-of the children of the component.
			
 
				-
			
 
				-\fn double starpu_sched_component_estimated_end_min(struct starpu_sched_component * component);
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-function that can be used for the estimated_end component method, which just computes the minimum completion time of the children.
			
 
				-
			
 
				-\fn double starpu_sched_component_estimated_end_min_add(struct starpu_sched_component * component, double exp_len);
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-function that can be used for the estimated_end component method, which computes
			
 
				-the minimum completion time of the children, and adds to it an estimation of how
			
 
				-existing queued work, plus the exp_len work, can be completed. This is typically
			
 
				-used instead of starpu_sched_component_estimated_end_min when the component
			
 
				-contains a queue of tasks, which thus needs to be added to the estimations.
			
 
				-
			
 
				-\fn double starpu_sched_component_estimated_end_average(struct starpu_sched_component * component);
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-default function for the estimated_end component method, which just computes the average completion time of the children.
			
 
				-
			
 
				-
			
 
				-\struct starpu_sched_component_fifo_data
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-\var unsigned starpu_sched_component_fifo_data::ntasks_threshold
			
 
				-todo
			
 
				-\var double starpu_sched_component_fifo_data::exp_len_threshold
			
 
				-todo
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_fifo_create(struct starpu_sched_tree *tree, struct starpu_sched_component_fifo_data *fifo_data)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 Return a struct starpu_sched_component with a fifo. A stable sort is performed according to tasks priorities.
			
 
				-	 A push_task call on this component does not perform recursive calls, underlying components will have to call pop_task to get it.
			
 
				-	 starpu_sched_component::estimated_end function compute the estimated length by dividing the sequential length by the number of underlying workers.
			
 
				-
			
 
				-\fn int starpu_sched_component_is_fifo(struct starpu_sched_component *component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return true iff \p component is a fifo component
			
 
				-
			
 
				-@name Flow-control Prio Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\struct starpu_sched_component_prio_data
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-\var unsigned starpu_sched_component_prio_data::ntasks_threshold
			
 
				-todo
			
 
				-\var double starpu_sched_component_prio_data::exp_len_threshold
			
 
				-todo
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_prio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_prio_data *prio_data)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-todo
			
 
				-
			
 
				-\fn int starpu_sched_component_is_prio(struct starpu_sched_component *component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-todo
			
 
				-
			
 
				-@name Resource-mapping Work-Stealing Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_work_stealing_create(struct starpu_sched_tree *tree, void *arg)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return a component that perform a work stealing scheduling. Tasks are pushed in a round robin way. estimated_end return the average of expected length of fifos, starting at the average of the expected_end of his children. When a worker have to steal a task, it steal a task in a round robin way, and get the last pushed task of the higher priority.
			
 
				-
			
 
				-\fn int starpu_sched_tree_work_stealing_push_task(struct starpu_task *task)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 undefined if there is no work stealing component in the scheduler. If any, \p task is pushed in a default way if the caller is the application, and in the caller's fifo if its a worker.
			
 
				-
			
 
				-\fn int starpu_sched_component_is_work_stealing(struct starpu_sched_component *component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return true iff \p component is a work stealing component
			
 
				-
			
 
				-@name Resource-mapping Random Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_random_create(struct starpu_sched_tree *tree, void *arg)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 create a component that perform a random scheduling
			
 
				-
			
 
				-\fn int starpu_sched_component_is_random(struct starpu_sched_component *)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return true iff \p component is a random component
			
 
				-
			
 
				-@name Resource-mapping Eager Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_eager_create(struct starpu_sched_tree *tree, void *arg)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-todo
			
 
				-
			
 
				-\fn int starpu_sched_component_is_eager(struct starpu_sched_component *)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-todo
			
 
				-
			
 
				-@name Resource-mapping Eager-Calibration Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_eager_calibration_create(struct starpu_sched_tree *tree, void *arg)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-todo
			
 
				-
			
 
				-\fn int starpu_sched_component_is_eager_calibration(struct starpu_sched_component *)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-todo
			
 
				-
			
 
				-@name Resource-mapping MCT Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\struct starpu_sched_component_mct_data
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-\var double starpu_sched_component_mct_data::alpha
			
 
				-todo
			
 
				-\var double starpu_sched_component_mct_data::beta
			
 
				-todo
			
 
				-\var double starpu_sched_component_mct_data::_gamma
			
 
				-todo
			
 
				-\var double starpu_sched_component_mct_data::idle_power
			
 
				-todo
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_mct_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-create a component with mct_data paremeters. the mct component doesnt
			
 
				-do anything but pushing tasks on no_perf_model_component and
			
 
				-calibrating_component
			
 
				-
			
 
				-\fn int starpu_sched_component_is_mct(struct starpu_sched_component *component);
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-todo
			
 
				-
			
 
				-@name Resource-mapping Heft Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_heft_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 this component perform a heft scheduling
			
 
				-
			
 
				-\fn int starpu_sched_component_is_heft(struct starpu_sched_component *component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return true iff \p component is a heft component
			
 
				-
			
 
				-@name Special-purpose Best_Implementation Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_best_implementation_create(struct starpu_sched_tree *tree, void *arg)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 Select the implementation that offer the shortest computation length for the first worker that can execute the task.
			
 
				-	 Or an implementation that need to be calibrated.
			
 
				-	 Also set starpu_task::predicted and starpu_task::predicted_transfer for memory component of the first suitable workerid.
			
 
				-	 If starpu_sched_component::push method is called and starpu_sched_component::nchild > 1 the result is undefined.
			
 
				-
			
 
				-@name Special-purpose Perfmodel_Select Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\struct starpu_sched_component_perfmodel_select_data
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-\var struct starpu_sched_component *starpu_sched_component_perfmodel_select_data::calibrator_component
			
 
				-todo
			
 
				-\var struct starpu_sched_component *starpu_sched_component_perfmodel_select_data::no_perfmodel_component
			
 
				-todo
			
 
				-\var struct starpu_sched_component *starpu_sched_component_perfmodel_select_data::perfmodel_component
			
 
				-todo
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_perfmodel_select_create(struct starpu_sched_tree *tree, struct starpu_sched_component_perfmodel_select_data *perfmodel_select_data)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-todo
			
 
				-
			
 
				-\fn int starpu_sched_component_is_perfmodel_select(struct starpu_sched_component *component)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-todo
			
 
				-
			
 
				-@name Recipe Component API
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-
			
 
				-\struct starpu_sched_component_composed_recipe
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	parameters for starpu_sched_component_composed_component_create
			
 
				-
			
 
				-\fn struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create(void)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return an empty recipe for a composed component, it should not be used without modification
			
 
				-
			
 
				-\fn struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create_singleton(struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 return a recipe to build a composed component with a \p create_component
			
 
				-
			
 
				-\fn void starpu_sched_component_composed_recipe_add(struct starpu_sched_component_composed_recipe *recipe, struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 add \p create_component under all previous components in recipe
			
 
				-
			
 
				-\fn void starpu_sched_component_composed_recipe_destroy(struct starpu_sched_component_composed_recipe *)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 destroy composed_sched_component, this should be done after starpu_sched_component_composed_component_create was called
			
 
				-
			
 
				-\fn struct starpu_sched_component *starpu_sched_component_composed_component_create(struct starpu_sched_tree *tree, struct starpu_sched_component_composed_recipe *recipe)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 create a component that behave as all component of recipe where linked. Except that you cant use starpu_sched_component_is_foo function
			
 
				-	 if recipe contain a single create_foo arg_foo pair, create_foo(arg_foo) is returned instead of a composed component
			
 
				-
			
 
				-\struct starpu_sched_component_specs
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 Define how build a scheduler according to topology. Each level (except for hwloc_machine_composed_sched_component) can be <c>NULL</c>, then
			
 
				-	 the level is just skipped. Bugs everywhere, do not rely on.
			
 
				-\var struct starpu_sched_component_composed_recipe *starpu_sched_specs::hwloc_machine_composed_sched_component
			
 
				-     the composed component to put on the top of the scheduler
			
 
				-     this member must not be <c>NULL</c> as it is the root of the topology
			
 
				-\var struct starpu_sched_component_composed_recipe *starpu_sched_specs::hwloc_component_composed_sched_component
			
 
				-     the composed component to put for each memory component
			
 
				-\var struct starpu_sched_component_composed_recipe *starpu_sched_specs::hwloc_socket_composed_sched_component
			
 
				-     the composed component to put for each socket
			
 
				-\var struct starpu_sched_component_composed_recipe *starpu_sched_specs::hwloc_cache_composed_sched_component
			
 
				-     the composed component to put for each cache
			
 
				-\var struct starpu_sched_component_composed_recipe *(*starpu_sched_specs::worker_composed_sched_component)(enum starpu_worker_archtype archtype)
			
 
				-     a function that return a starpu_sched_component_composed_recipe to put on top of a worker of type \p archtype.
			
 
				-     <c>NULL</c> is a valid return value, then no component will be added on top
			
 
				-\var starpu_sched_specs::mix_heterogeneous_workers
			
 
				-     this flag is a dirty hack because of the poor expressivity of this interface. As example, if you want to build
			
 
				-     a heft component with a fifo component per numa component, and you also have GPUs, if this flag is set, GPUs will share those fifos.
			
 
				-     If this flag is not set, a new fifo will be built for each of them (if they have the same starpu_perf_arch and the same
			
 
				-     numa component it will be shared. it indicates if heterogenous workers should be brothers or cousins, as example, if a gpu and a cpu should share or not there numa node
			
 
				-
			
 
				-\fn struct starpu_sched_tree *starpu_sched_component_make_scheduler(unsigned sched_ctx_id, struct starpu_sched_component_specs s)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-	 this function build a scheduler for \p sched_ctx_id according to \p s and the hwloc topology of the machine.
			
 
				-
			
 
				-\def STARPU_SCHED_SIMPLE_DECIDE_WORKERS
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Request to create downstream queues per worker, i.e. the scheduling decision-making component will choose exactly which workers tasks should got to.
			
 
				-
			
 
				-\def STARPU_SCHED_SIMPLE_COMBINED_WORKERS
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Request to not only choose between simple workers, but also choose between combined workers.
			
 
				-
			
 
				-\def STARPU_SCHED_SIMPLE_DECIDE_MEMNODES
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Request to create downstream queues per memory nodes, i.e. the scheduling decision-making component will choose which memory node tasks will go to.
			
 
				-
			
 
				-\def STARPU_SCHED_SIMPLE_DECIDE_ARCHS
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Request to create downstream queues per computation arch, i.e. the scheduling decision-making component will choose whether tasks go to CPUs, or CUDA, or OpenCL, etc.
			
 
				-
			
 
				-\def STARPU_SCHED_SIMPLE_PERFMODEL
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Request to add a perfmodel selector above the scheduling decision-making component. That way, only tasks with a calibrated performance model will be given to the component, other tasks will go to an eager branch that will distributed tasks so that their performance models will get calibrated.
			
 
				-
			
 
				-In other words, this is needed when using a component which needs performance models for tasks.
			
 
				-
			
 
				-\def STARPU_SCHED_SIMPLE_FIFO_ABOVE
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Request to create a fifo above the scheduling decision-making component, otherwise tasks will be pushed directly to the component.
			
 
				-
			
 
				-This is useful to store tasks if there is a fifo below which limits the number of tasks to be scheduld in advance. The scheduling decision-making component can also store tasks itself, in which case this flag is not useful.
			
 
				-
			
 
				-\def STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Request that the fifo above be sorted by priorities
			
 
				-
			
 
				-\def STARPU_SCHED_SIMPLE_FIFOS_BELOW
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Request to create fifos below the scheduling decision-making component, otherwise tasks will be pulled directly from workers.
			
 
				-
			
 
				-This is useful to be able to schedule a (tunable) small number of tasks in advance only.
			
 
				-
			
 
				-\def STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Request that the fifos below be sorted by priorities
			
 
				-
			
 
				-\def STARPU_SCHED_SIMPLE_WS_BELOW
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Request that work between workers using the same fifo below be distributed using a work stealing component.
			
 
				-
			
 
				-\def STARPU_SCHED_SIMPLE_IMPL
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Request that a component be added just above workers, that chooses the best task implementation.
			
 
				-
			
 
				-\fn void starpu_sched_component_initialize_simple_scheduler(starpu_sched_component_create_t create_decision_component, void *data, unsigned flags, unsigned sched_ctx_id)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-This creates a simple modular scheduler tree around a scheduling decision-making
			
 
				-component \p component. The details of what should be built around \p component
			
 
				-is described by \p flags. The different STARPU_SCHED_SIMPL_DECIDE_* flags are
			
 
				-mutually exclusive. \p data is passed to the \p create_decision_component
			
 
				-function when creating the decision component.
			
 
				-
			
 
				-\fn int starpu_sched_component_push_task(struct starpu_sched_component *from, struct starpu_sched_component *to, struct starpu_task *task)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Push a task to a component. This is a helper for <c>component->push_task(component, task)</c> plus tracing.
			
 
				-
			
 
				-\fn struct starpu_task *starpu_sched_component_pull_task(struct starpu_sched_component *from, struct starpu_sched_component *to)
			
 
				-\ingroup API_Modularized_Scheduler
			
 
				-Pull a task from a component. This is a helper for <c>component->pull_task(component)</c> plus tracing.
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/chapters/api/performance_model.doxy
+++ b/doc/doxygen/chapters/api/performance_model.doxy
@@ -1,349 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2011-2013,2016                           Inria
			
 
				- * Copyright (C) 2010-2017                                CNRS
			
 
				- * Copyright (C) 2009-2011,2013-2017                      Université de Bordeaux
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Performance_Model Performance Model
			
 
				-
			
 
				-\enum starpu_perfmodel_type
			
 
				-\ingroup API_Performance_Model
			
 
				-TODO
			
 
				-\var starpu_perfmodel_type::STARPU_PERFMODEL_INVALID
			
 
				-    todo
			
 
				-\var starpu_perfmodel_type::STARPU_PER_ARCH
			
 
				-    Application-provided per-arch cost model function
			
 
				-\var starpu_perfmodel_type::STARPU_COMMON
			
 
				-    Application-provided common cost model function, with per-arch
			
 
				-    factor
			
 
				-\var starpu_perfmodel_type::STARPU_HISTORY_BASED
			
 
				-    Automatic history-based cost model
			
 
				-\var starpu_perfmodel_type::STARPU_REGRESSION_BASED
			
 
				-    Automatic linear regression-based cost model  (alpha * size ^
			
 
				-    beta)
			
 
				-\var starpu_perfmodel_type::STARPU_NL_REGRESSION_BASED
			
 
				-    Automatic non-linear regression-based cost model (a * size ^ b +
			
 
				-    c)
			
 
				-\var starpu_perfmodel_type::STARPU_MULTIPLE_REGRESSION_BASED
			
 
				-    Automatic multiple linear regression-based cost model. Application
			
 
				-    provides parameters, their combinations and exponents.
			
 
				-
			
 
				-\struct starpu_perfmodel_device
			
 
				-todo
			
 
				-\ingroup API_Performance_Model
			
 
				-\var enum starpu_worker_archtype starpu_perfmodel_device::type
			
 
				-    type of the device
			
 
				-\var int starpu_perfmodel_device::devid
			
 
				-    identifier of the precise device
			
 
				-\var int starpu_perfmodel_device::ncore
			
 
				-    number of execution in parallel, minus 1
			
 
				-
			
 
				-\struct starpu_perfmodel_arch
			
 
				-todo
			
 
				-\ingroup API_Performance_Model
			
 
				-\var int starpu_perfmodel_arch::ndevices
			
 
				-    number of the devices for the given arch
			
 
				-\var struct starpu_perfmodel_device *starpu_perfmodel_arch::devices
			
 
				-    list of the devices for the given arch
			
 
				-
			
 
				-\struct starpu_perfmodel
			
 
				-Contain all information about a performance model. At least the
			
 
				-type and symbol fields have to be filled when defining a performance
			
 
				-model for a codelet. For compatibility, make sure to initialize the
			
 
				-whole structure to zero, either by using explicit memset, or by
			
 
				-letting the compiler implicitly do it in e.g. static storage case. If
			
 
				-not provided, other fields have to be zero.
			
 
				-\ingroup API_Performance_Model
			
 
				-\var enum starpu_perfmodel_type starpu_perfmodel::type
			
 
				-    type of performance model
			
 
				-    <ul>
			
 
				-    <li>
			
 
				-    ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
			
 
				-    ::STARPU_NL_REGRESSION_BASED: No other fields needs to be
			
 
				-    provided, this is purely history-based.
			
 
				-    </li>
			
 
				-    <li>
			
 
				-    ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields
			
 
				-    starpu_perfmodel::nparameters (number of different parameters),
			
 
				-    starpu_perfmodel::ncombinations (number of parameters
			
 
				-    combinations-tuples) and table starpu_perfmodel::combinations
			
 
				-    which defines exponents of the equation. Function cl_perf_func
			
 
				-    also needs to define how to extract parameters from the task. 
			
 
				-    </li>
			
 
				-    <li>
			
 
				-    ::STARPU_PER_ARCH: either field
			
 
				-    starpu_perfmodel::arch_cost_function has to be filled with a
			
 
				-    function that returns the cost in micro-seconds on the arch given
			
 
				-    as parameter, or field starpu_perfmodel::per_arch has to be filled
			
 
				-    with functions which return the cost in micro-seconds.
			
 
				-    </li>
			
 
				-    <li>
			
 
				-    ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
			
 
				-    filled with a function that returns the cost in micro-seconds on a
			
 
				-    CPU, timing on other archs will be determined by multiplying by an
			
 
				-    arch-specific factor.
			
 
				-    </li>
			
 
				-    </ul>
			
 
				-\var const char *starpu_perfmodel::symbol
			
 
				-    symbol name for the performance model, which will be used as file
			
 
				-    name to store the model. It must be set otherwise the model will
			
 
				-    be ignored.
			
 
				-\var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl)
			
 
				-    Used by ::STARPU_COMMON. Take a task and implementation number,
			
 
				-    and must return a task duration estimation in micro-seconds.
			
 
				-\var double (*starpu_perfmodel::arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl)
			
 
				-    Used by ::STARPU_COMMON. Take a task, an arch and implementation
			
 
				-    number, and must return a task duration estimation in
			
 
				-    micro-seconds on that arch.
			
 
				-\var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl)
			
 
				-    Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
			
 
				-    ::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, take a task and
			
 
				-    implementation number, and return the size to be used as index to
			
 
				-    distinguish histories and as a base for regressions.
			
 
				-\var uint32_t (*starpu_perfmodel::footprint)(struct starpu_task *)
			
 
				-    Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, take a task
			
 
				-    and return the footprint to be used as index to distinguish
			
 
				-    histories. The default is to use the starpu_task_data_footprint()
			
 
				-    function.
			
 
				-\var unsigned starpu_perfmodel::is_loaded
			
 
				-\private
			
 
				-    Whether the performance model is already loaded from the disk.
			
 
				-\var unsigned starpu_perfmodel::benchmarking
			
 
				-\private
			
 
				-    todo
			
 
				-\var unsigned starpu_perfmodel::is_init
			
 
				-    todo
			
 
				-\var starpu_perfmodel_state_t starpu_perfmodel::state
			
 
				-\private
			
 
				-    todo
			
 
				-\var void (*starpu_perfmodel::parameters)(struct starpu_task * task, double *parameters);
			
 
				-    todo
			
 
				-\var const char ** starpu_perfmodel::parameters_names
			
 
				-\private
			
 
				-    Names of parameters used for multiple linear regression models (M,
			
 
				-    N, K)
			
 
				-\var unsigned starpu_perfmodel::nparameters
			
 
				-\private
			
 
				-    Number of parameters used for multiple linear regression models
			
 
				-\var unsigned ** starpu_perfmodel::combinations
			
 
				-\private
			
 
				-    Table of combinations of parameters (and the exponents) used for
			
 
				-    multiple linear regression models
			
 
				-\var unsigned starpu_perfmodel::ncombinations
			
 
				-\private
			
 
				-    Number of combination of parameters used for multiple linear
			
 
				-    regression models
			
 
				-
			
 
				-\struct starpu_perfmodel_regression_model
			
 
				-todo
			
 
				-\ingroup API_Performance_Model
			
 
				-\var double starpu_perfmodel_regression_model::sumlny
			
 
				-    sum of ln(measured)
			
 
				-\var double starpu_perfmodel_regression_model::sumlnx
			
 
				-    sum of ln(size)
			
 
				-\var double starpu_perfmodel_regression_model::sumlnx2
			
 
				-    sum of ln(size)^2
			
 
				-\var unsigned long starpu_perfmodel_regression_model::minx
			
 
				-    minimum size
			
 
				-\var unsigned long starpu_perfmodel_regression_model::maxx
			
 
				-    maximum size
			
 
				-\var double starpu_perfmodel_regression_model::sumlnxlny
			
 
				-    sum of ln(size)*ln(measured)
			
 
				-\var double starpu_perfmodel_regression_model::alpha
			
 
				-    estimated = alpha * size ^ beta
			
 
				-\var double starpu_perfmodel_regression_model::beta
			
 
				-    estimated = alpha * size ^ beta
			
 
				-\var unsigned starpu_perfmodel_regression_model::valid
			
 
				-    whether the linear regression model is valid (i.e. enough measures)
			
 
				-\var double starpu_perfmodel_regression_model::a
			
 
				-    estimated = a size ^b + c
			
 
				-\var double starpu_perfmodel_regression_model::b
			
 
				-    estimated = a size ^b + c
			
 
				-\var double starpu_perfmodel_regression_model::c
			
 
				-    estimated = a size ^b + c
			
 
				-\var unsigned starpu_perfmodel_regression_model::nl_valid
			
 
				-    whether the non-linear regression model is valid (i.e. enough measures)
			
 
				-\var unsigned starpu_perfmodel_regression_model::nsample
			
 
				-    number of sample values for non-linear regression
			
 
				-\var double starpu_perfmodel_regression_model::coeff[]
			
 
				-    list of computed coefficients for multiple linear regression model
			
 
				-\var double starpu_perfmodel_regression_model::ncoeff
			
 
				-    number of coefficients for multiple linear regression model
			
 
				-\var double starpu_perfmodel_regression_model::multi_valid
			
 
				-    whether the multiple linear regression model is valid
			
 
				-
			
 
				-\struct starpu_perfmodel_per_arch
			
 
				-contains information about the performance model of a given
			
 
				-arch.
			
 
				-\ingroup API_Performance_Model
			
 
				-\var starpu_perfmodel_per_arch_cost_function starpu_perfmodel_per_arch::cost_function
			
 
				-    Used by ::STARPU_PER_ARCH, must point to functions which take a
			
 
				-    task, the target arch and implementation number (as mere
			
 
				-    conveniency, since the array is already indexed by these), and
			
 
				-    must return a task duration estimation in micro-seconds.
			
 
				-\var starpu_perfmodel_per_arch_size_base starpu_perfmodel_per_arch::size_base
			
 
				-    Same as in structure starpu_perfmodel, but per-arch, in case it
			
 
				-    depends on the architecture-specific implementation.
			
 
				-\var struct starpu_perfmodel_history_table *starpu_perfmodel_per_arch::history
			
 
				-\private
			
 
				-    The history of performance measurements.
			
 
				-\var struct starpu_perfmodel_history_list *starpu_perfmodel_per_arch::list
			
 
				-\private
			
 
				-    Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and
			
 
				-    ::STARPU_MULTIPLE_REGRESSION_BASED, records all execution history
			
 
				-    measures.
			
 
				-\var struct starpu_perfmodel_regression_model starpu_perfmodel_per_arch::regression
			
 
				-\private
			
 
				-    Used by ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED
			
 
				-    and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated
			
 
				-    factors of the regression.
			
 
				-
			
 
				-\struct starpu_perfmodel_history_list
			
 
				-todo
			
 
				-\ingroup API_Performance_Model
			
 
				-\var struct starpu_perfmodel_history_list *starpu_perfmodel_history_list::next
			
 
				-    todo
			
 
				-\var struct starpu_perfmodel_history_entry *starpu_perfmodel_history_list::entry
			
 
				-    todo
			
 
				-
			
 
				-\struct starpu_perfmodel_history_entry
			
 
				-todo
			
 
				-\ingroup API_Performance_Model
			
 
				-\var double starpu_perfmodel_history_entry::mean
			
 
				-    mean_n = 1/n sum
			
 
				-\var double starpu_perfmodel_history_entry::deviation
			
 
				-    n dev_n = sum2 - 1/n (sum)^2
			
 
				-\var double starpu_perfmodel_history_entry::sum
			
 
				-    sum of samples (in µs)
			
 
				-\var double starpu_perfmodel_history_entry::sum2
			
 
				-    sum of samples^2
			
 
				-\var unsigned starpu_perfmodel_history_entry::nsample
			
 
				-    number of samples
			
 
				-\var uint32_t starpu_perfmodel_history_entry::footprint
			
 
				-    data footprint
			
 
				-\var size_t starpu_perfmodel_history_entry::size
			
 
				-    in bytes
			
 
				-\var double starpu_perfmodel_history_entry::flops
			
 
				-    Provided by the application
			
 
				-
			
 
				-\fn void starpu_perfmodel_init(struct starpu_perfmodel *model)
			
 
				-\ingroup API_Performance_Model
			
 
				-todo
			
 
				-
			
 
				-\fn void starpu_perfmodel_free_sampling_directories(void)
			
 
				-\ingroup API_Performance_Model
			
 
				-Free internal memory used for sampling directory
			
 
				-management. It should only be called by an application which is not
			
 
				-calling starpu_shutdown() as this function already calls it. See for
			
 
				-example <c>tools/starpu_perfmodel_display.c</c>.
			
 
				-
			
 
				-\fn int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model)
			
 
				-\ingroup API_Performance_Model
			
 
				-Load the performance model found in the file named \p filename. \p model has to be
			
 
				-completely zero, and will be filled with the information stored in the given file.
			
 
				-
			
 
				-\fn int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
			
 
				-\ingroup API_Performance_Model
			
 
				-Load a given performance model. \p model has to be
			
 
				-completely zero, and will be filled with the information stored in
			
 
				-<c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
			
 
				-external tools that want to read the performance model files.
			
 
				-
			
 
				-\fn int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
			
 
				-\ingroup API_Performance_Model
			
 
				-Unload \p model which has been previously loaded
			
 
				-through the function starpu_perfmodel_load_symbol()
			
 
				-
			
 
				-\fn void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl)
			
 
				-\ingroup API_Performance_Model
			
 
				-Return the path to the debugging information for the performance model.
			
 
				-
			
 
				-\fn char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
			
 
				-\ingroup API_Performance_Model
			
 
				-todo
			
 
				-
			
 
				-\fn void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl)
			
 
				-\ingroup API_Performance_Model
			
 
				-Return the architecture name for \p arch
			
 
				-
			
 
				-\fn struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id)
			
 
				-\ingroup API_Performance_Model
			
 
				-Return the architecture type of the worker \p workerid.
			
 
				-
			
 
				-\fn void starpu_perfmodel_initialize(void)
			
 
				-\ingroup API_Performance_Model
			
 
				-If starpu_init is not used, starpu_perfmodel_initialize should be used before calling starpu_perfmodel_* functions.
			
 
				-
			
 
				-\fn int starpu_perfmodel_list(FILE *output)
			
 
				-\ingroup API_Performance_Model
			
 
				-Print a list of all performance models on \p output
			
 
				-
			
 
				-\fn void starpu_perfmodel_directory(FILE *output)
			
 
				-\ingroup API_Performance_Model
			
 
				-Print the directory name storing performance models on \p output
			
 
				-
			
 
				-\fn void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
			
 
				-\ingroup API_Performance_Model
			
 
				-todo
			
 
				-
			
 
				-\fn int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output)
			
 
				-\ingroup API_Performance_Model
			
 
				-todo
			
 
				-
			
 
				-\fn int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output)
			
 
				-\ingroup API_Performance_Model
			
 
				-todo
			
 
				-
			
 
				-\fn void starpu_bus_print_bandwidth(FILE *f)
			
 
				-\ingroup API_Performance_Model
			
 
				-Print a matrix of bus bandwidths on \p f.
			
 
				-
			
 
				-\fn void starpu_bus_print_affinity(FILE *f)
			
 
				-\ingroup API_Performance_Model
			
 
				-Print the affinity devices on \p f.
			
 
				-
			
 
				-\fn void starpu_bus_print_filenames(FILE *f)
			
 
				-\ingroup API_Performance_Model
			
 
				-Print on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
			
 
				-
			
 
				-\fn void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
			
 
				-\ingroup API_Performance_Model
			
 
				-Feed the performance model model with an explicit
			
 
				-measurement measured (in µs), in addition to measurements done by StarPU
			
 
				-itself. This can be useful when the application already has an
			
 
				-existing set of measurements done in good conditions, that StarPU
			
 
				-could benefit from instead of doing on-line measurements. An example
			
 
				-of use can be seen in \ref PerformanceModelExample.
			
 
				-
			
 
				-\fn double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node)
			
 
				-\ingroup API_Performance_Model
			
 
				-Return the bandwidth of data transfer between two memory nodes
			
 
				-
			
 
				-\fn double starpu_transfer_latency(unsigned src_node, unsigned dst_node)
			
 
				-\ingroup API_Performance_Model
			
 
				-Return the latency of data transfer between two memory nodes
			
 
				-
			
 
				-\fn double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size)
			
 
				-\ingroup API_Performance_Model
			
 
				-Return the estimated time to transfer a given size between two memory nodes.
			
 
				-
			
 
				-\fn double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint)
			
 
				-\ingroup API_Performance_Model
			
 
				-Return the estimated time of a task with the given model and the given footprint.
			
 
				-
			
 
				-\var starpu_perfmodel_nop
			
 
				-Performance model which just always return 1µs.
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/chapters/api/profiling.doxy
+++ b/doc/doxygen/chapters/api/profiling.doxy
@@ -1,202 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2010-2015,2017                           CNRS
			
 
				- * Copyright (C) 2009-2011,2014,2016,2018-2019            Université de Bordeaux
			
 
				- * Copyright (C) 2011,2012                                Inria
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-/*! \defgroup API_Profiling Profiling
			
 
				-
			
 
				-\struct starpu_profiling_task_info
			
 
				-\ingroup API_Profiling
			
 
				-This structure contains information about the execution of a
			
 
				-task. It is accessible from the field starpu_task::profiling_info if
			
 
				-profiling was enabled.
			
 
				-\var struct timespec starpu_profiling_task_info::submit_time
			
 
				-    Date of task submission (relative to the initialization of StarPU).
			
 
				-
			
 
				-\var struct timespec starpu_profiling_task_info::push_start_time
			
 
				-    Time when the task was submitted to the scheduler.
			
 
				-
			
 
				-\var struct timespec starpu_profiling_task_info::push_end_time
			
 
				-    Time when the scheduler finished with the task submission.
			
 
				-
			
 
				-\var struct timespec starpu_profiling_task_info::pop_start_time
			
 
				-    Time when the scheduler started to be requested for a task, and eventually gave that task.
			
 
				-
			
 
				-\var struct timespec starpu_profiling_task_info::pop_end_time
			
 
				-    Time when the scheduler finished providing the task for execution.
			
 
				-
			
 
				-\var struct timespec starpu_profiling_task_info::acquire_data_start_time
			
 
				-    Time when the worker started fetching input data.
			
 
				-
			
 
				-\var struct timespec starpu_profiling_task_info::acquire_data_end_time
			
 
				-    Time when the worker finished fetching input data.
			
 
				-
			
 
				-\var struct timespec starpu_profiling_task_info::start_time
			
 
				-    Date of task execution beginning (relative to the initialization of StarPU).
			
 
				-
			
 
				-\var struct timespec starpu_profiling_task_info::end_time
			
 
				-    Date of task execution termination (relative to the initialization of StarPU).
			
 
				-
			
 
				-\var struct timespec starpu_profiling_task_info::release_data_start_time
			
 
				-    Time when the worker started releasing data.
			
 
				-
			
 
				-\var struct timespec starpu_profiling_task_info::release_data_end_time
			
 
				-    Time when the worker finished releasing data.
			
 
				-
			
 
				-\var struct timespec starpu_profiling_task_info::callback_start_time
			
 
				-    Time when the worker started the application callback for the task.
			
 
				-
			
 
				-\var struct timespec starpu_profiling_task_info::callback_end_time
			
 
				-    Time when the worker finished the application callback for the task.
			
 
				-
			
 
				-\var int starpu_profiling_task_info::workerid
			
 
				-    Identifier of the worker which has executed the task.
			
 
				-
			
 
				-\var uint64_t starpu_profiling_task_info::used_cycles
			
 
				-    Number of cycles used by the task, only available in the MoviSim
			
 
				-
			
 
				-\var uint64_t starpu_profiling_task_info::stall_cycles
			
 
				-    Number of cycles stalled within the task, only available in the MoviSim
			
 
				-
			
 
				-\var double starpu_profiling_task_info::energy_consumed
			
 
				-Energy consumed by the task, in Joules
			
 
				-
			
 
				-\struct starpu_profiling_worker_info
			
 
				-This structure contains the profiling information associated to
			
 
				-a worker. The timing is provided since the previous call to
			
 
				-starpu_profiling_worker_get_info()
			
 
				-\ingroup API_Profiling
			
 
				-\var struct timespec starpu_profiling_worker_info::start_time
			
 
				-        Starting date for the reported profiling measurements.
			
 
				-\var struct timespec starpu_profiling_worker_info::total_time
			
 
				-        Duration of the profiling measurement interval.
			
 
				-\var struct timespec starpu_profiling_worker_info::executing_time
			
 
				-        Time spent by the worker to execute tasks during the profiling measurement interval.
			
 
				-\var struct timespec starpu_profiling_worker_info::sleeping_time
			
 
				-        Time spent idling by the worker during the profiling measurement interval.
			
 
				-\var int starpu_profiling_worker_info::executed_tasks
			
 
				-        Number of tasks executed by the worker during the profiling measurement interval.
			
 
				-\var uint64_t starpu_profiling_worker_info::used_cycles
			
 
				-        Number of cycles used by the worker, only available in the MoviSim
			
 
				-\var uint64_t starpu_profiling_worker_info::stall_cycles
			
 
				-        Number of cycles stalled within the worker, only available in the MoviSim
			
 
				-\var double starpu_profiling_worker_info::energy_consumed
			
 
				-        Energy consumed by the worker, in Joules
			
 
				-
			
 
				-\struct starpu_profiling_bus_info
			
 
				-todo
			
 
				-\ingroup API_Profiling
			
 
				-\var struct timespec starpu_profiling_bus_info::start_time
			
 
				-        Time of bus profiling startup.
			
 
				-\var struct timespec starpu_profiling_bus_info::total_time
			
 
				-        Total time of bus profiling.
			
 
				-\var int long long starpu_profiling_bus_info::transferred_bytes
			
 
				-        Number of bytes transferred during profiling.
			
 
				-\var int starpu_profiling_bus_info::transfer_count
			
 
				-        Number of transfers during profiling.
			
 
				-
			
 
				-\typedef STARPU_PROFILING_DISABLE
			
 
				-\ingroup API_Profiling
			
 
				-Used when calling the function starpu_profiling_status_set() to disable profiling.
			
 
				-
			
 
				-\typedef STARPU_PROFILING_ENABLE
			
 
				-\ingroup API_Profiling
			
 
				-Used when calling the function starpu_profiling_status_set() to enable profiling.
			
 
				-
			
 
				-\fn int starpu_profiling_status_set(int status)
			
 
				-\ingroup API_Profiling
			
 
				-Set the profiling status. Profiling is activated
			
 
				-by passing \ref STARPU_PROFILING_ENABLE in \p status. Passing
			
 
				-\ref STARPU_PROFILING_DISABLE disables profiling. Calling this function
			
 
				-resets all profiling measurements. When profiling is enabled, the
			
 
				-field starpu_task::profiling_info points to a valid structure
			
 
				-starpu_profiling_task_info containing information about the execution
			
 
				-of the task. Negative return values indicate an error, otherwise the
			
 
				-previous status is returned.
			
 
				-
			
 
				-\fn int starpu_profiling_status_get(void)
			
 
				-\ingroup API_Profiling
			
 
				-Return the current profiling status or a negative value in case
			
 
				-there was an error.
			
 
				-
			
 
				-\fn void starpu_profiling_init(void)
			
 
				-\ingroup API_Profiling
			
 
				-Reset performance counters and enable profiling if the
			
 
				-environment variable \ref STARPU_PROFILING is set to a positive value.
			
 
				-
			
 
				-\fn void starpu_profiling_set_id(int new_id)
			
 
				-\ingroup API_Profiling
			
 
				-Set the ID used for profiling trace filename. HAS to be called before starpu_init().
			
 
				-
			
 
				-\fn int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *worker_info)
			
 
				-\ingroup API_Profiling
			
 
				-Get the profiling info associated to the worker identified by
			
 
				-\p workerid, and reset the profiling measurements. If the argument \p
			
 
				-worker_info is <c>NULL</c>, only reset the counters associated to worker
			
 
				-\p workerid. Upon successful completion, this function returns 0.
			
 
				-Otherwise, a negative value is returned.
			
 
				-
			
 
				-\fn int starpu_bus_get_profiling_info(int busid, struct starpu_profiling_bus_info *bus_info)
			
 
				-\ingroup API_Profiling
			
 
				-
			
 
				-todo
			
 
				-
			
 
				-See _starpu_profiling_bus_helper_display_summary in src/profiling/profiling_helpers.c for a usage example.
			
 
				-Note that calling starpu_bus_get_profiling_info resets the counters to zero.
			
 
				-
			
 
				-\fn int starpu_bus_get_count(void)
			
 
				-\ingroup API_Profiling
			
 
				-Return the number of buses in the machine
			
 
				-
			
 
				-\fn int starpu_bus_get_id(int src, int dst)
			
 
				-\ingroup API_Profiling
			
 
				-Return the identifier of the bus between \p src and \p dst
			
 
				-
			
 
				-\fn int starpu_bus_get_src(int busid)
			
 
				-\ingroup API_Profiling
			
 
				-Return the source point of bus \p busid
			
 
				-
			
 
				-\fn int starpu_bus_get_dst(int busid)
			
 
				-\ingroup API_Profiling
			
 
				-Return the destination point of bus \p busid
			
 
				-
			
 
				-\fn double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end)
			
 
				-\ingroup API_Profiling
			
 
				-Return the time elapsed between \p start and \p end in microseconds.
			
 
				-
			
 
				-\fn double starpu_timing_timespec_to_us(struct timespec *ts)
			
 
				-\ingroup API_Profiling
			
 
				-Convert the given timespec \p ts into microseconds
			
 
				-
			
 
				-\fn void starpu_profiling_bus_helper_display_summary(void)
			
 
				-\ingroup API_Profiling
			
 
				-Display statistics about the bus on \c stderr. if the environment
			
 
				-variable \ref STARPU_BUS_STATS is defined. The function is called
			
 
				-automatically by starpu_shutdown().
			
 
				-
			
 
				-\fn void starpu_profiling_worker_helper_display_summary(void)
			
 
				-\ingroup API_Profiling
			
 
				-Displays statistic about the workers on \c stderr if the
			
 
				-environment variable \ref STARPU_WORKER_STATS is defined. The function is
			
 
				-called automatically by starpu_shutdown().
			
 
				-
			
 
				-\fn void starpu_data_display_memory_stats()
			
 
				-\ingroup API_Profiling
			
 
				-Display statistics about the current data handles registered
			
 
				-within StarPU. StarPU must have been configured with the configure
			
 
				-option \ref enable-memory-stats "--enable-memory-stats" (see \ref MemoryFeedback).
			
 
				-
			
 
				-*/
			
--- a/doc/doxygen/chapters/api/scc_extensions.doxy
+++ b/doc/doxygen/chapters/api/scc_extensions.doxy
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2019                      CNRS
			
 
				  * Copyright (C) 2009-2011,2014                           Université de Bordeaux
			
 
				  * Copyright (C) 2011,2012                                Inria
			
 
				  *
			
@@ -16,7 +16,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-/*! \defgroup API_SCC_Extensions SCC Extensions
			
 
				+/*! \ingroup API_SCC_Extensions
			
 
				 
			
 
				 \def STARPU_USE_SCC
			
 
				 \ingroup API_SCC_Extensions
			
@@ -28,20 +28,4 @@ It should be used in your code to detect the availability of SCC.
 
				 Define the maximum number of SCC devices that are
			
 
				 supported by StarPU.
			
 
				 
			
 
				-\typedef starpu_scc_func_symbol_t
			
 
				-\ingroup API_SCC_Extensions
			
 
				-Type for SCC function symbols
			
 
				-
			
 
				-\fn int starpu_scc_register_kernel(starpu_scc_func_symbol_t *symbol, const char *func_name)
			
 
				-\ingroup API_SCC_Extensions
			
 
				-Initiate a lookup on each SCC device to find the adress of the
			
 
				-function named \p func_name, store them in the global array kernels
			
 
				-and return the index in the array through \p symbol.
			
 
				-
			
 
				-\fn starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t symbol)
			
 
				-\ingroup API_SCC_Extensions
			
 
				-If success, return the pointer to the function defined by \p symbol on
			
 
				-the device linked to the called device. This can for instance be used
			
 
				-in a starpu_scc_func_symbol_t implementation.
			
 
				-
			
 
				 */
			
--- a/include/starpu_bitmap.h
+++ b/include/starpu_bitmap.h
@@ -19,11 +19,10 @@
 
				 #ifndef __STARPU_BITMAP_H__
			
 
				 #define __STARPU_BITMAP_H__
			
 
				 
			
 
				-/** @defgroup API_Bitmap Bitmap
			
 
				-
			
 
				-    @brief This is the interface for the bitmap utilities provided by StarPU.
			
 
				-
			
 
				-    @{
			
 
				+/**
			
 
				+   @defgroup API_Bitmap Bitmap
			
 
				+   @brief This is the interface for the bitmap utilities provided by StarPU.
			
 
				+   @{
			
 
				  */
			
 
				 
			
 
				 #ifdef __cplusplus
			
@@ -33,7 +32,7 @@ extern "C"
 
				 
			
 
				 /** create a empty starpu_bitmap */
			
 
				 struct starpu_bitmap *starpu_bitmap_create(void) STARPU_ATTRIBUTE_MALLOC;
			
 
				-/** free \b */
			
 
				+/** free \p b */
			
 
				 void starpu_bitmap_destroy(struct starpu_bitmap *b);
			
 
				 
			
 
				 /** set bit \p e in \p b */
			
--- a/include/starpu_bound.h
+++ b/include/starpu_bound.h
@@ -18,12 +18,11 @@
 
				 #ifndef __STARPU_BOUND_H__
			
 
				 #define __STARPU_BOUND_H__
			
 
				 
			
 
				-/** @defgroup API_Theoretical_Lower_Bound_on_Execution_Time Theoretical Lower Bound on Execution Time
			
 
				-
			
 
				-    @brief Compute theoretical upper computation efficiency bound corresponding to some actual execution.
			
 
				-
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_Theoretical_Lower_Bound_on_Execution_Time Theoretical Lower Bound on Execution Time
			
 
				+   @brief Compute theoretical upper computation efficiency bound corresponding to some actual execution.
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <stdio.h>
			
 
				 
			
@@ -32,34 +31,46 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				-/** Start recording tasks (resets stats). \p deps tells whether dependencies should be recorded too (this is quite expensive) */
			
 
				+/**
			
 
				+   Start recording tasks (resets stats). \p deps tells whether
			
 
				+   dependencies should be recorded too (this is quite expensive)
			
 
				+*/
			
 
				 void starpu_bound_start(int deps, int prio);
			
 
				-/** Stop recording tasks */
			
 
				+
			
 
				+/**
			
 
				+   Stop recording tasks
			
 
				+*/
			
 
				 void starpu_bound_stop(void);
			
 
				 
			
 
				-/** Emit the DAG that was recorded on \p output. */
			
 
				+/**
			
 
				+   Emit the DAG that was recorded on \p output.
			
 
				+*/
			
 
				 void starpu_bound_print_dot(FILE *output);
			
 
				 
			
 
				-/** Get theoretical upper bound (in ms) (needs glpk support
			
 
				-    detected by configure script). It returns 0 if some performance models
			
 
				-    are not calibrated.
			
 
				+/**
			
 
				+   Get theoretical upper bound (in ms) (needs glpk support detected by
			
 
				+   configure script). It returns 0 if some performance models are not
			
 
				+   calibrated.
			
 
				 */
			
 
				 void starpu_bound_compute(double *res, double *integer_res, int integer);
			
 
				 
			
 
				-/** Emit the Linear Programming system on \p output for the recorded
			
 
				-    tasks, in the lp format
			
 
				+/**
			
 
				+   Emit the Linear Programming system on \p output for the recorded
			
 
				+   tasks, in the lp format
			
 
				 */
			
 
				 void starpu_bound_print_lp(FILE *output);
			
 
				 
			
 
				-/** Emit the Linear Programming system on \p output for the recorded
			
 
				-    tasks, in the mps format
			
 
				+/**
			
 
				+   Emit the Linear Programming system on \p output for the recorded
			
 
				+   tasks, in the mps format
			
 
				 */
			
 
				 void starpu_bound_print_mps(FILE *output);
			
 
				 
			
 
				-/** Emit on \p output the statistics of actual execution vs theoretical upper bound.
			
 
				-    \p integer permits to choose between integer solving (which takes a
			
 
				-    long time but is correct), and relaxed solving (which provides an
			
 
				-    approximate solution).
			
 
				+/**
			
 
				+   Emit on \p output the statistics of actual execution vs theoretical
			
 
				+   upper bound. \p integer permits to choose between integer solving
			
 
				+   (which takes a long time but is correct), and relaxed solving
			
 
				+   (which provides an approximate solution).
			
 
				 */
			
 
				 void starpu_bound_print(FILE *output, int integer);
			
 
				 
			
--- a/include/starpu_clusters.h
+++ b/include/starpu_clusters.h
@@ -19,9 +19,9 @@
 
				 #ifndef __STARPU_CLUSTERS_UTIL_H__
			
 
				 #define __STARPU_CLUSTERS_UTIL_H__
			
 
				 
			
 
				-/** @defgroup API_Clustering_Machine Clustering Machine
			
 
				-
			
 
				-    @{
			
 
				+/**
			
 
				+   @defgroup API_Clustering_Machine Clustering Machine
			
 
				+   @{
			
 
				  */
			
 
				 
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
--- a/include/starpu_cublas.h
+++ b/include/starpu_cublas.h
@@ -18,9 +18,9 @@
 
				 #ifndef __STARPU_CUBLAS_H__
			
 
				 #define __STARPU_CUBLAS_H__
			
 
				 
			
 
				-/** @ingroup API_CUDA_Extensions
			
 
				-
			
 
				-    @{
			
 
				+/**
			
 
				+   @ingroup API_CUDA_Extensions
			
 
				+   @{
			
 
				  */
			
 
				 
			
 
				 #ifdef __cplusplus
			
@@ -38,10 +38,11 @@ extern "C"
 
				 void starpu_cublas_init(void);
			
 
				 
			
 
				 /**
			
 
				-   Set the proper CUBLAS stream for CUBLAS v1. This must be called from the CUDA
			
 
				-   codelet before calling CUBLAS v1 kernels, so that they are queued on the proper
			
 
				-   CUDA stream. When using one thread per CUDA worker, this function does not
			
 
				-   do anything since the CUBLAS stream does not change, and is set once by
			
 
				+   Set the proper CUBLAS stream for CUBLAS v1. This must be called
			
 
				+   from the CUDA codelet before calling CUBLAS v1 kernels, so that
			
 
				+   they are queued on the proper CUDA stream. When using one thread
			
 
				+   per CUDA worker, this function does not do anything since the
			
 
				+   CUBLAS stream does not change, and is set once by
			
 
				    starpu_cublas_init().
			
 
				 */
			
 
				 void starpu_cublas_set_stream(void);
			
--- a/include/starpu_cublas_v2.h
+++ b/include/starpu_cublas_v2.h
@@ -18,9 +18,9 @@
 
				 #ifndef __STARPU_CUBLAS_V2_H__
			
 
				 #define __STARPU_CUBLAS_V2_H__
			
 
				 
			
 
				-/** @ingroup API_CUDA_Extensions
			
 
				-
			
 
				-    @{
			
 
				+/**
			
 
				+   @ingroup API_CUDA_Extensions
			
 
				+   @{
			
 
				  */
			
 
				 
			
 
				 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
			
@@ -33,8 +33,8 @@ extern "C"
 
				 #endif
			
 
				 
			
 
				 /**
			
 
				-   Return the CUSPARSE handle to be used to queue CUSPARSE
			
 
				-   kernels. It is properly initialized and configured for multistream by
			
 
				+   Return the CUSPARSE handle to be used to queue CUSPARSE kernels. It
			
 
				+   is properly initialized and configured for multistream by
			
 
				    starpu_cusparse_init().
			
 
				 */
			
 
				 cublasHandle_t starpu_cublas_get_local_handle(void);
			
--- a/include/starpu_cuda.h
+++ b/include/starpu_cuda.h
@@ -19,9 +19,9 @@
 
				 #ifndef __STARPU_CUDA_H__
			
 
				 #define __STARPU_CUDA_H__
			
 
				 
			
 
				-/** @defgroup API_CUDA_Extensions CUDA Extensions
			
 
				-
			
 
				-    @{
			
 
				+/**
			
 
				+   @defgroup API_CUDA_Extensions CUDA Extensions
			
 
				+   @{
			
 
				  */
			
 
				 
			
 
				 #include <starpu_config.h>
			
@@ -36,49 +36,60 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				-/** Report a CUBLAS error. */
			
 
				+/**
			
 
				+   Report a CUBLAS error.
			
 
				+*/
			
 
				 void starpu_cublas_report_error(const char *func, const char *file, int line, int status);
			
 
				 
			
 
				-/** Calls starpu_cublas_report_error(), passing the current function, file and line position.*/
			
 
				+/**
			
 
				+   Call starpu_cublas_report_error(), passing the current function, file and line position.
			
 
				+*/
			
 
				 #define STARPU_CUBLAS_REPORT_ERROR(status) starpu_cublas_report_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				 
			
 
				-/** Report a CUDA error. */
			
 
				+/**
			
 
				+   Report a CUDA error.
			
 
				+*/
			
 
				 void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status);
			
 
				 
			
 
				-/** Calls starpu_cuda_report_error(), passing the current function, file and line position.*/
			
 
				+/**
			
 
				+   Call starpu_cuda_report_error(), passing the current function, file and line position.
			
 
				+*/
			
 
				 #define STARPU_CUDA_REPORT_ERROR(status) starpu_cuda_report_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				 
			
 
				 /**
			
 
				-    Return the current worker’s CUDA stream. StarPU
			
 
				-    provides a stream for every CUDA device controlled by StarPU. This
			
 
				-    function is only provided for convenience so that programmers can
			
 
				-    easily use asynchronous operations within codelets without having to
			
 
				-    create a stream by hand. Note that the application is not forced to
			
 
				-    use the stream provided by starpu_cuda_get_local_stream() and may also
			
 
				-    create its own streams. Synchronizing with <c>cudaThreadSynchronize()</c> is
			
 
				-    allowed, but will reduce the likelihood of having all transfers
			
 
				-    overlapped.
			
 
				+   Return the current worker’s CUDA stream. StarPU provides a stream
			
 
				+   for every CUDA device controlled by StarPU. This function is only
			
 
				+   provided for convenience so that programmers can easily use
			
 
				+   asynchronous operations within codelets without having to create a
			
 
				+   stream by hand. Note that the application is not forced to use the
			
 
				+   stream provided by starpu_cuda_get_local_stream() and may also
			
 
				+   create its own streams. Synchronizing with
			
 
				+   <c>cudaThreadSynchronize()</c> is allowed, but will reduce the
			
 
				+   likelihood of having all transfers overlapped.
			
 
				 */
			
 
				 cudaStream_t starpu_cuda_get_local_stream(void);
			
 
				 
			
 
				-/** Return a pointer to device properties for worker \p workerid (assumed to be a CUDA worker). */
			
 
				+/**
			
 
				+   Return a pointer to device properties for worker \p workerid
			
 
				+   (assumed to be a CUDA worker).
			
 
				+*/
			
 
				 const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid);
			
 
				 
			
 
				 /**
			
 
				-    Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
			
 
				-    to the pointer \p dst_ptr on \p dst_node. The function first tries to
			
 
				-    copy the data asynchronous (unless \p stream is <c>NULL</c>). If the
			
 
				-    asynchronous copy fails or if \p stream is <c>NULL</c>, it copies the
			
 
				-    data synchronously. The function returns <c>-EAGAIN</c> if the
			
 
				-    asynchronous launch was successfull. It returns 0 if the synchronous
			
 
				-    copy was successful, or fails otherwise.
			
 
				+   Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
			
 
				+   to the pointer \p dst_ptr on \p dst_node. The function first tries to
			
 
				+   copy the data asynchronous (unless \p stream is <c>NULL</c>). If the
			
 
				+   asynchronous copy fails or if \p stream is <c>NULL</c>, it copies the
			
 
				+   data synchronously. The function returns <c>-EAGAIN</c> if the
			
 
				+   asynchronous launch was successfull. It returns 0 if the synchronous
			
 
				+   copy was successful, or fails otherwise.
			
 
				 */
			
 
				 int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind);
			
 
				 
			
 
				 /**
			
 
				-    Calls <c>cudaSetDevice(\p devid)</c> or <c>cudaGLSetGLDevice(\p devid)</c>,
			
 
				-    according to whether \p devid is among the field
			
 
				-    starpu_conf::cuda_opengl_interoperability.
			
 
				+   Call <c>cudaSetDevice(\p devid)</c> or <c>cudaGLSetGLDevice(\p devid)</c>,
			
 
				+   according to whether \p devid is among the field
			
 
				+   starpu_conf::cuda_opengl_interoperability.
			
 
				 */
			
 
				 void starpu_cuda_set_device(unsigned devid);
			
 
				 
			
--- a/include/starpu_cusparse.h
+++ b/include/starpu_cusparse.h
@@ -18,10 +18,10 @@
 
				 #ifndef __STARPU_CUSPARSE_H__
			
 
				 #define __STARPU_CUSPARSE_H__
			
 
				 
			
 
				-/** @ingroup API_CUDA_Extensions
			
 
				-
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @ingroup API_CUDA_Extensions
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
			
 
				 #include <cusparse.h>
			
@@ -40,14 +40,14 @@ extern "C"
 
				 void starpu_cusparse_init(void);
			
 
				 
			
 
				 /**
			
 
				-   Synchronously deinitialize the CUSPARSE library on
			
 
				+   @brief Synchronously deinitialize the CUSPARSE library on
			
 
				    every CUDA device.
			
 
				 */
			
 
				 void starpu_cusparse_shutdown(void);
			
 
				 
			
 
				 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
			
 
				 /**
			
 
				-   Return the CUSPARSE handle to be used to queue CUSPARSE
			
 
				+   @brief Return the CUSPARSE handle to be used to queue CUSPARSE
			
 
				    kernels. It is properly initialized and configured for multistream by
			
 
				    starpu_cusparse_init().
			
 
				 */
			
--- a/include/starpu_data.h
+++ b/include/starpu_data.h
@@ -19,14 +19,13 @@
 
				 #ifndef __STARPU_DATA_H__
			
 
				 #define __STARPU_DATA_H__
			
 
				 
			
 
				-/** @defgroup API_Data_Management Data Management
			
 
				-
			
 
				-    @brief Data management facilities provided by StarPU. We show how
			
 
				-    to use existing data interfaces in \ref API_Data_Interfaces, but
			
 
				-    developers can design their own data interfaces if required.
			
 
				-
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_Data_Management Data Management
			
 
				+   @brief Data management facilities provided by StarPU. We show how
			
 
				+   to use existing data interfaces in \ref API_Data_Interfaces, but
			
 
				+   developers can design their own data interfaces if required.
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu.h>
			
 
				 
			
@@ -52,12 +51,12 @@ struct _starpu_data_state;
 
				 typedef struct _starpu_data_state* starpu_data_handle_t;
			
 
				 
			
 
				 /**
			
 
				-    Describe a StarPU data access mode
			
 
				+   Describe a StarPU data access mode
			
 
				 
			
 
				-    Note: when adding a flag here, update
			
 
				-    _starpu_detect_implicit_data_deps_with_handle
			
 
				+   Note: when adding a flag here, update
			
 
				+   _starpu_detect_implicit_data_deps_with_handle
			
 
				 
			
 
				-    Note: other STARPU_* values in include/starpu_task_util.h
			
 
				+   Note: other STARPU_* values in include/starpu_task_util.h
			
 
				  */
			
 
				 enum starpu_data_access_mode
			
 
				 {
			
--- a/include/starpu_data_filters.h
+++ b/include/starpu_data_filters.h
@@ -21,10 +21,10 @@
 
				 #ifndef __STARPU_DATA_FILTERS_H__
			
 
				 #define __STARPU_DATA_FILTERS_H__
			
 
				 
			
 
				-/** @defgroup API_Data_Partition Data Partition
			
 
				-
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_Data_Partition Data Partition
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu.h>
			
 
				 #include <stdarg.h>
			
@@ -36,7 +36,9 @@ extern "C"
 
				 
			
 
				 struct starpu_data_interface_ops;
			
 
				 
			
 
				-/** Describe a data partitioning operation, to be given to starpu_data_partition() */
			
 
				+/**
			
 
				+   Describe a data partitioning operation, to be given to starpu_data_partition()
			
 
				+*/
			
 
				 struct starpu_data_filter
			
 
				 {
			
 
				 	/**
			
@@ -99,37 +101,38 @@ struct starpu_data_filter
 
				 	void *filter_arg_ptr;
			
 
				 };
			
 
				 
			
 
				-/** @name Basic API
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Basic API
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				-    Request the partitioning of \p initial_handle into several subdata
			
 
				-    according to the filter \p f.
			
 
				-    Here an example of how to use the function.
			
 
				-    \code{.c}
			
 
				-    struct starpu_data_filter f =
			
 
				-    {
			
 
				-      .filter_func = starpu_matrix_filter_block,
			
 
				-      .nchildren = nslicesx
			
 
				-    };
			
 
				-    starpu_data_partition(A_handle, &f);
			
 
				+   Request the partitioning of \p initial_handle into several subdata
			
 
				+   according to the filter \p f.
			
 
				+
			
 
				+   Here an example of how to use the function.
			
 
				+   \code{.c}
			
 
				+   struct starpu_data_filter f =
			
 
				+   {
			
 
				+     .filter_func = starpu_matrix_filter_block,
			
 
				+     .nchildren = nslicesx
			
 
				+   };
			
 
				+   starpu_data_partition(A_handle, &f);
			
 
				     \endcode
			
 
				 */
			
 
				 void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f);
			
 
				 
			
 
				 /**
			
 
				-   Unapply the filter which has been applied to \p root_data, thus
			
 
				-   unpartitioning the data. The pieces of data are collected back into
			
 
				-   one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM).
			
 
				-   Tasks working on the partitioned data will be waited for
			
 
				-   by starpu_data_unpartition().
			
 
				+  Unapply the filter which has been applied to \p root_data, thus
			
 
				+  unpartitioning the data. The pieces of data are collected back into
			
 
				+  one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM).
			
 
				+  Tasks working on the partitioned data will be waited for
			
 
				+  by starpu_data_unpartition().
			
 
				 
			
 
				-   Here an example of how to use the function.
			
 
				-   \code{.c}
			
 
				-   starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
			
 
				-   \endcode
			
 
				+  Here an example of how to use the function.
			
 
				+  \code{.c}
			
 
				+  starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
			
 
				+  \endcode
			
 
				 */
			
 
				 void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node);
			
 
				 
			
@@ -182,10 +185,10 @@ void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters,
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Asynchronous API
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Asynchronous API
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				    Plan to partition \p initial_handle into several subdata according to
			
@@ -303,44 +306,47 @@ void starpu_data_partition_not_automatic(starpu_data_handle_t handle);
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Predefined BCSR Filter Functions
			
 
				- * Predefined partitioning functions for BCSR data. Examples on how to
			
 
				- * use them are shown in \ref PartitioningData.
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Predefined BCSR Filter Functions
			
 
				+   Predefined partitioning functions for BCSR data. Examples on how to
			
 
				+   use them are shown in \ref PartitioningData.
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				    Partition a block-sparse matrix into dense matrices.
			
 
				- */
			
 
				+*/
			
 
				 void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Predefined CSR Filter Functions
			
 
				- * Predefined partitioning functions for CSR data. Examples on how to
			
 
				- * use them are shown in \ref PartitioningData.
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Predefined CSR Filter Functions
			
 
				+   Predefined partitioning functions for CSR data. Examples on how to
			
 
				+   use them are shown in \ref PartitioningData.
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				    Partition a block-sparse matrix into vertical block-sparse matrices.
			
 
				- */
			
 
				+*/
			
 
				 void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Predefined Matrix Filter Functions
			
 
				- * Predefined partitioning functions for matrix
			
 
				- * data. Examples on how to use them are shown in \ref
			
 
				- * PartitioningData.
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Predefined Matrix Filter Functions
			
 
				+   Predefined partitioning functions for matrix
			
 
				+   data. Examples on how to use them are shown in \ref
			
 
				+   PartitioningData.
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				    Partition a dense Matrix along the x dimension, thus getting (x/\p
			
 
				    nparts ,y) matrices. If \p nparts does not divide x, the last
			
 
				    submatrix contains the remainder.
			
 
				- */
			
 
				+*/
			
 
				 void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				 /**
			
@@ -353,14 +359,14 @@ void starpu_matrix_filter_block(void *father_interface, void *child_interface, s
 
				    only be used for read-only access, as no coherency is enforced for the
			
 
				    shadowed parts. A usage example is available in
			
 
				    examples/filters/shadow2d.c
			
 
				- */
			
 
				+*/
			
 
				 void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				 /**
			
 
				    Partition a dense Matrix along the y dimension, thus getting
			
 
				    (x,y/\p nparts) matrices. If \p nparts does not divide y, the last
			
 
				    submatrix contains the remainder.
			
 
				- */
			
 
				+*/
			
 
				 void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				 /**
			
@@ -377,18 +383,19 @@ void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *ch
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Predefined Vector Filter Functions
			
 
				- * Predefined partitioning functions for vector
			
 
				- * data. Examples on how to use them are shown in \ref
			
 
				- * PartitioningData.
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Predefined Vector Filter Functions
			
 
				+   Predefined partitioning functions for vector
			
 
				+   data. Examples on how to use them are shown in \ref
			
 
				+   PartitioningData.
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				    Return in \p child_interface the \p id th element of the vector
			
 
				    represented by \p father_interface once partitioned in \p nparts chunks of
			
 
				    equal size.
			
 
				- */
			
 
				+*/
			
 
				 void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				 /**
			
@@ -411,39 +418,40 @@ void starpu_vector_filter_block_shadow(void *father_interface, void *child_inter
 
				    <c>filter_arg_ptr</c> field must point to an array of \p nparts long
			
 
				    elements, each of which specifies the number of elements in each chunk
			
 
				    of the partition.
			
 
				- */
			
 
				+*/
			
 
				 void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				 /**
			
 
				-   Return in \p child_interface the \p id th element of the vector
			
 
				-   represented by \p father_interface once partitioned into \p nparts chunks
			
 
				-   according to the <c>filter_arg_ptr</c> field of \p f. The
			
 
				-   <c>filter_arg_ptr</c> field must point to an array of \p nparts uint32_t
			
 
				-   elements, each of which specifies the number of elements in each chunk
			
 
				-   of the partition.
			
 
				- */
			
 
				+  Return in \p child_interface the \p id th element of the vector
			
 
				+  represented by \p father_interface once partitioned into \p nparts chunks
			
 
				+  according to the <c>filter_arg_ptr</c> field of \p f. The
			
 
				+  <c>filter_arg_ptr</c> field must point to an array of \p nparts uint32_t
			
 
				+  elements, each of which specifies the number of elements in each chunk
			
 
				+  of the partition.
			
 
				+*/
			
 
				 void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				 /**
			
 
				    Return in \p child_interface the \p id th element of the vector
			
 
				    represented by \p father_interface once partitioned in <c>2</c> chunks of
			
 
				    equal size, ignoring nparts. Thus, \p id must be <c>0</c> or <c>1</c>.
			
 
				- */
			
 
				+*/
			
 
				 void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Predefined Block Filter Functions
			
 
				- * Predefined partitioning functions for block data. Examples on how
			
 
				- * to use them are shown in \ref PartitioningData. An example is
			
 
				- * available in \c examples/filters/shadow3d.c
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Predefined Block Filter Functions
			
 
				+   Predefined partitioning functions for block data. Examples on how
			
 
				+   to use them are shown in \ref PartitioningData. An example is
			
 
				+   available in \c examples/filters/shadow3d.c
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				-   Partition a block along the X dimension, thus getting
			
 
				-   (x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last
			
 
				-   submatrix contains the remainder.
			
 
				+  Partition a block along the X dimension, thus getting
			
 
				+  (x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last
			
 
				+  submatrix contains the remainder.
			
 
				  */
			
 
				 void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
@@ -475,14 +483,14 @@ void starpu_block_filter_vertical_block(void *father_interface, void *child_inte
 
				    <b>IMPORTANT</b>:
			
 
				    This can only be used for read-only access, as no coherency is
			
 
				    enforced for the shadowed parts.
			
 
				- */
			
 
				+*/
			
 
				 void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				 /**
			
 
				    Partition a block along the Z dimension, thus getting
			
 
				    (x,y,z/\p nparts) blocks. If \p nparts does not divide z, the last
			
 
				    submatrix contains the remainder.
			
 
				- */
			
 
				+*/
			
 
				 void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				 /**
			
@@ -494,7 +502,7 @@ void starpu_block_filter_depth_block(void *father_interface, void *child_interfa
 
				    <b>IMPORTANT</b>:
			
 
				    This can only be used for read-only access, as no coherency is
			
 
				    enforced for the shadowed parts.
			
 
				- */
			
 
				+*/
			
 
				 void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				 
			
 
				 /** @} */
			
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
@@ -19,53 +19,53 @@
 
				 #ifndef __STARPU_DATA_INTERFACES_H__
			
 
				 #define __STARPU_DATA_INTERFACES_H__
			
 
				 
			
 
				-/** @defgroup API_Data_Interfaces Data Interfaces
			
 
				-
			
 
				-    @brief Data management is done at a high-level in StarPU: rather than
			
 
				-    accessing a mere list of contiguous buffers, the tasks may manipulate
			
 
				-    data that are described by a high-level construct which we call data
			
 
				-    interface.
			
 
				-
			
 
				-    An example of data interface is the "vector" interface which describes
			
 
				-    a contiguous data array on a spefic memory node. This interface is a
			
 
				-    simple structure containing the number of elements in the array, the
			
 
				-    size of the elements, and the address of the array in the appropriate
			
 
				-    address space (this address may be invalid if there is no valid copy
			
 
				-    of the array in the memory node). More informations on the data
			
 
				-    interfaces provided by StarPU are given in \ref API_Data_Interfaces.
			
 
				-
			
 
				-    When a piece of data managed by StarPU is used by a task, the task
			
 
				-    implementation is given a pointer to an interface describing a valid
			
 
				-    copy of the data that is accessible from the current processing unit.
			
 
				-
			
 
				-    Every worker is associated to a memory node which is a logical
			
 
				-    abstraction of the address space from which the processing unit gets
			
 
				-    its data. For instance, the memory node associated to the different
			
 
				-    CPU workers represents main memory (RAM), the memory node associated
			
 
				-    to a GPU is DRAM embedded on the device. Every memory node is
			
 
				-    identified by a logical index which is accessible from the
			
 
				-    function starpu_worker_get_memory_node(). When registering a piece of
			
 
				-    data to StarPU, the specified memory node indicates where the piece of
			
 
				-    data initially resides (we also call this memory node the home node of
			
 
				-    a piece of data).
			
 
				-
			
 
				-    In the case of NUMA systems, functions starpu_memory_nodes_numa_devid_to_id()
			
 
				-    and starpu_memory_nodes_numa_id_to_devid() can be used to convert from NUMA node
			
 
				-    numbers as seen by the Operating System and NUMA node numbers as seen by StarPU.
			
 
				-
			
 
				-    There are several ways to register a memory region so that it can be
			
 
				-    managed by StarPU. StarPU provides data interfaces for vectors, 2D
			
 
				-    matrices, 3D matrices as well as BCSR and CSR sparse matrices.
			
 
				-
			
 
				-    Each data interface is provided with a set of field access functions.
			
 
				-    The ones using a <c>void *</c> parameter aimed to be used in codelet
			
 
				-    implementations (see for example the code in
			
 
				-    \ref VectorScalingUsingStarPUAPI).
			
 
				-
			
 
				-    Applications can provide their own interface as shown in \ref DefiningANewDataInterface.
			
 
				-
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_Data_Interfaces Data Interfaces
			
 
				+   @brief Data management is done at a high-level in StarPU: rather than
			
 
				+   accessing a mere list of contiguous buffers, the tasks may manipulate
			
 
				+   data that are described by a high-level construct which we call data
			
 
				+   interface.
			
 
				+
			
 
				+   An example of data interface is the "vector" interface which describes
			
 
				+   a contiguous data array on a spefic memory node. This interface is a
			
 
				+   simple structure containing the number of elements in the array, the
			
 
				+   size of the elements, and the address of the array in the appropriate
			
 
				+   address space (this address may be invalid if there is no valid copy
			
 
				+   of the array in the memory node). More informations on the data
			
 
				+   interfaces provided by StarPU are given in \ref API_Data_Interfaces.
			
 
				+
			
 
				+   When a piece of data managed by StarPU is used by a task, the task
			
 
				+   implementation is given a pointer to an interface describing a valid
			
 
				+   copy of the data that is accessible from the current processing unit.
			
 
				+
			
 
				+   Every worker is associated to a memory node which is a logical
			
 
				+   abstraction of the address space from which the processing unit gets
			
 
				+   its data. For instance, the memory node associated to the different
			
 
				+   CPU workers represents main memory (RAM), the memory node associated
			
 
				+   to a GPU is DRAM embedded on the device. Every memory node is
			
 
				+   identified by a logical index which is accessible from the
			
 
				+   function starpu_worker_get_memory_node(). When registering a piece of
			
 
				+   data to StarPU, the specified memory node indicates where the piece of
			
 
				+   data initially resides (we also call this memory node the home node of
			
 
				+   a piece of data).
			
 
				+
			
 
				+   In the case of NUMA systems, functions starpu_memory_nodes_numa_devid_to_id()
			
 
				+   and starpu_memory_nodes_numa_id_to_devid() can be used to convert from NUMA node
			
 
				+   numbers as seen by the Operating System and NUMA node numbers as seen by StarPU.
			
 
				+
			
 
				+   There are several ways to register a memory region so that it can be
			
 
				+   managed by StarPU. StarPU provides data interfaces for vectors, 2D
			
 
				+   matrices, 3D matrices as well as BCSR and CSR sparse matrices.
			
 
				+
			
 
				+   Each data interface is provided with a set of field access functions.
			
 
				+   The ones using a <c>void *</c> parameter aimed to be used in codelet
			
 
				+   implementations (see for example the code in
			
 
				+   \ref VectorScalingUsingStarPUAPI).
			
 
				+
			
 
				+   Applications can provide their own interface as shown in \ref DefiningANewDataInterface.
			
 
				+
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu.h>
			
 
				 
			
@@ -538,9 +538,11 @@ struct starpu_data_interface_ops
 
				 	char *name;
			
 
				 };
			
 
				 
			
 
				-/** @name Basic API
			
 
				-    @{
			
 
				-    */
			
 
				+/**
			
 
				+   @name Basic API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				 /**
			
 
				    Register a piece of data into the handle located at the
			
 
				    \p handleptr address. The \p data_interface buffer contains the initial
			
@@ -706,13 +708,16 @@ void starpu_malloc_on_node_set_default_flags(unsigned node, int flags);
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Accessing Matrix Data Interfaces
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Accessing Matrix Data Interfaces
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 extern struct starpu_data_interface_ops starpu_interface_matrix_ops;
			
 
				 
			
 
				-/** Matrix interface for dense matrices */
			
 
				+/**
			
 
				+   Matrix interface for dense matrices
			
 
				+*/
			
 
				 struct starpu_matrix_interface
			
 
				 {
			
 
				 	enum starpu_data_interface_id id; /**< Identifier of the interface */
			
@@ -875,13 +880,16 @@ size_t starpu_matrix_get_allocsize(starpu_data_handle_t handle);
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Accessing COO Data Interfaces
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Accessing COO Data Interfaces
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 extern struct starpu_data_interface_ops starpu_interface_coo_ops;
			
 
				 
			
 
				-/** COO Matrices */
			
 
				+/**
			
 
				+   COO Matrices
			
 
				+*/
			
 
				 struct starpu_coo_interface
			
 
				 {
			
 
				 	enum starpu_data_interface_id id; /**< identifier of the interface */
			
@@ -964,15 +972,18 @@ void starpu_coo_data_register(starpu_data_handle_t *handleptr, int home_node, ui
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Block Data Interface
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Block Data Interface
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 extern struct starpu_data_interface_ops starpu_interface_block_ops;
			
 
				 
			
 
				 /* TODO: rename to 3dmatrix? */
			
 
				 /* TODO: add allocsize support */
			
 
				-/** Block interface for 3D dense blocks */
			
 
				+/**
			
 
				+   Block interface for 3D dense blocks
			
 
				+*/
			
 
				 struct starpu_block_interface
			
 
				 {
			
 
				 	enum starpu_data_interface_id id; /**< identifier of the interface */
			
@@ -1115,9 +1126,10 @@ designated by \p interface.
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Vector Data Interface
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Vector Data Interface
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 extern struct starpu_data_interface_ops starpu_interface_vector_ops;
			
 
				 
			
@@ -1241,9 +1253,10 @@ uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle);
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Variable Data Interface
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Variable Data Interface
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 extern struct starpu_data_interface_ops starpu_interface_variable_ops;
			
 
				 
			
@@ -1322,9 +1335,10 @@ uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle);
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Void Data Interface
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Void Data Interface
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 extern struct starpu_data_interface_ops starpu_interface_void_ops;
			
 
				 
			
@@ -1340,8 +1354,9 @@ void starpu_void_data_register(starpu_data_handle_t *handle);
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name CSR Data Interface
			
 
				-    @{
			
 
				+/**
			
 
				+   @name CSR Data Interface
			
 
				+   @{
			
 
				  */
			
 
				 
			
 
				 extern struct starpu_data_interface_ops starpu_interface_csr_ops;
			
@@ -1473,9 +1488,10 @@ size_t starpu_csr_get_elemsize(starpu_data_handle_t handle);
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name BCSR Data Interface
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name BCSR Data Interface
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 extern struct starpu_data_interface_ops starpu_interface_bcsr_ops;
			
 
				 
			
@@ -1677,9 +1693,10 @@ size_t starpu_bcsr_get_elemsize(starpu_data_handle_t handle);
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Multiformat Data Interface
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Multiformat Data Interface
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				    Multiformat operations
			
--- a/include/starpu_disk.h
+++ b/include/starpu_disk.h
@@ -20,14 +20,17 @@
 
				 #ifndef __STARPU_DISK_H__
			
 
				 #define __STARPU_DISK_H__
			
 
				 
			
 
				-/** @defgroup API_Out_Of_Core Out Of Core
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_Out_Of_Core Out Of Core
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <sys/types.h>
			
 
				 #include <starpu_config.h>
			
 
				 
			
 
				-/** Set of functions to manipulate datas on disk. */
			
 
				+/**
			
 
				+   Set of functions to manipulate datas on disk.
			
 
				+*/
			
 
				 struct starpu_disk_ops
			
 
				 {
			
 
				 	/**
			
--- a/include/starpu_driver.h
+++ b/include/starpu_driver.h
@@ -18,10 +18,10 @@
 
				 #ifndef __STARPU_DRIVER_H__
			
 
				 #define __STARPU_DRIVER_H__
			
 
				 
			
 
				-/** @defgroup API_Running_Drivers Running Drivers
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_Running_Drivers Running Drivers
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu_config.h>
			
 
				 #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
			
@@ -33,7 +33,9 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				-/** structure for a driver */
			
 
				+/**
			
 
				+   structure for a driver
			
 
				+*/
			
 
				 struct starpu_driver
			
 
				 {
			
 
				 	/**
			
--- a/include/starpu_expert.h
+++ b/include/starpu_expert.h
@@ -18,10 +18,10 @@
 
				 #ifndef __STARPU_EXPERT_H__
			
 
				 #define __STARPU_EXPERT_H__
			
 
				 
			
 
				-/** @defgroup API_Expert_Mode Expert Mode
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_Expert_Mode Expert Mode
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 extern "C"
			
--- a/include/starpu_fxt.h
+++ b/include/starpu_fxt.h
@@ -21,10 +21,10 @@
 
				 #ifndef __STARPU_FXT_H__
			
 
				 #define __STARPU_FXT_H__
			
 
				 
			
 
				-/** @defgroup API_FxT_Support FxT Support
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_FxT_Support FxT Support
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu_perfmodel.h>
			
 
				 
			
--- a/include/starpu_hash.h
+++ b/include/starpu_hash.h
@@ -19,10 +19,10 @@
 
				 #ifndef __STARPU_HASH_H__
			
 
				 #define __STARPU_HASH_H__
			
 
				 
			
 
				-/** @ingroup API_Data_Interfaces
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @ingroup API_Data_Interfaces
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <stdint.h>
			
 
				 #include <stddef.h>
			
--- a/include/starpu_mic.h
+++ b/include/starpu_mic.h
@@ -19,10 +19,10 @@
 
				 #ifndef __STARPU_MIC_H__
			
 
				 #define __STARPU_MIC_H__
			
 
				 
			
 
				-/** @defgroup API_MIC_Extensions MIC Extensions
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_MIC_Extensions MIC Extensions
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu_config.h>
			
 
				 
			
--- a/include/starpu_mpi_ms.h
+++ b/include/starpu_mpi_ms.h
@@ -18,10 +18,10 @@
 
				 #ifndef __STARPU_MPI_MS_H__
			
 
				 #define __STARPU_MPI_MS_H__
			
 
				 
			
 
				-/** @defgroup API_Master_Slave Master Slave Extension
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_Master_Slave Master Slave Extension
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu_config.h>
			
 
				 
			
--- a/include/starpu_opencl.h
+++ b/include/starpu_opencl.h
@@ -19,10 +19,10 @@
 
				 #ifndef __STARPU_OPENCL_H__
			
 
				 #define __STARPU_OPENCL_H__
			
 
				 
			
 
				-/** @defgroup API_OpenCL_Extensions OpenCL Extensions
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_OpenCL_Extensions OpenCL Extensions
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu_config.h>
			
 
				 #ifdef STARPU_USE_OPENCL
			
@@ -51,9 +51,10 @@ struct starpu_opencl_program
 
				 	cl_program programs[STARPU_MAXOPENCLDEVS];
			
 
				 };
			
 
				 
			
 
				-/** @name Writing OpenCL kernels
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Writing OpenCL kernels
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				    Return the OpenCL context of the device designated by \p devid
			
@@ -105,17 +106,18 @@ int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...);
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Compiling OpenCL kernels
			
 
				-    Source codes for OpenCL kernels can be stored in a file or in a
			
 
				-    string. StarPU provides functions to build the program executable for
			
 
				-    each available OpenCL device as a cl_program object. This program
			
 
				-    executable can then be loaded within a specific queue as explained in
			
 
				-    the next section. These are only helpers, Applications can also fill a
			
 
				-    starpu_opencl_program array by hand for more advanced use (e.g.
			
 
				-    different programs on the different OpenCL devices, for relocation
			
 
				-    purpose for instance).
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Compiling OpenCL kernels
			
 
				+   Source codes for OpenCL kernels can be stored in a file or in a
			
 
				+   string. StarPU provides functions to build the program executable for
			
 
				+   each available OpenCL device as a cl_program object. This program
			
 
				+   executable can then be loaded within a specific queue as explained in
			
 
				+   the next section. These are only helpers, Applications can also fill a
			
 
				+   starpu_opencl_program array by hand for more advanced use (e.g.
			
 
				+   different programs on the different OpenCL devices, for relocation
			
 
				+   purpose for instance).
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				    Store the contents of the file \p source_file_name in the buffer
			
@@ -182,9 +184,10 @@ int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs);
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Loading OpenCL kernels
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name Loading OpenCL kernels
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				    Create a kernel \p kernel for device \p devid, on its computation
			
@@ -200,9 +203,10 @@ int starpu_opencl_release_kernel(cl_kernel kernel);
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name OpenCL Statistics
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name OpenCL Statistics
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				    Collect statistics on a kernel execution.
			
@@ -215,9 +219,10 @@ int starpu_opencl_collect_stats(cl_event event);
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name OpenCL Utilities
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @name OpenCL Utilities
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 /**
			
 
				    Return the error message in English corresponding to \p status, an OpenCL
			
--- a/include/starpu_openmp.h
+++ b/include/starpu_openmp.h
@@ -18,10 +18,11 @@
 
				 #ifndef __STARPU_OPENMP_H__
			
 
				 #define __STARPU_OPENMP_H__
			
 
				 
			
 
				-/** @defgroup API_OpenMP_Runtime_Support OpenMP Runtime Support
			
 
				-    @brief This section describes the interface provided for implementing OpenMP runtimes on top of StarPU.
			
 
				-    @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_OpenMP_Runtime_Support OpenMP Runtime Support
			
 
				+   @brief This section describes the interface provided for implementing OpenMP runtimes on top of StarPU.
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu_config.h>
			
 
				 
			
@@ -213,8 +214,9 @@ extern "C"
 
				 #define __STARPU_OMP_NOTHROW __attribute__((__nothrow__))
			
 
				 #endif
			
 
				 
			
 
				-/** @name Initialisation
			
 
				-    @{
			
 
				+/**
			
 
				+   @name Initialisation
			
 
				+   @{
			
 
				 */
			
 
				 
			
 
				 /**
			
@@ -228,9 +230,10 @@ extern void starpu_omp_shutdown(void) __STARPU_OMP_NOTHROW;
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Parallel
			
 
				-    \anchor ORS_Parallel
			
 
				-    @{
			
 
				+/**
			
 
				+   @name Parallel
			
 
				+   \anchor ORS_Parallel
			
 
				+   @{
			
 
				 */
			
 
				 
			
 
				 /**
			
@@ -266,9 +269,10 @@ extern int starpu_omp_master_inline(void) __STARPU_OMP_NOTHROW;
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Synchronization
			
 
				-    \anchor ORS_Synchronization
			
 
				-    @{
			
 
				+/**
			
 
				+   @name Synchronization
			
 
				+   \anchor ORS_Synchronization
			
 
				+   @{
			
 
				 */
			
 
				 
			
 
				 /**
			
@@ -317,9 +321,10 @@ extern void starpu_omp_critical_inline_end(const char *name) __STARPU_OMP_NOTHRO
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Worksharing
			
 
				-    \anchor ORS_Worksharing
			
 
				-    @{
			
 
				+/**
			
 
				+   @name Worksharing
			
 
				+   \anchor ORS_Worksharing
			
 
				+   @{
			
 
				 */
			
 
				 
			
 
				 /**
			
@@ -542,9 +547,10 @@ extern void starpu_omp_sections_combined(unsigned long long nb_sections, void (*
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name Task
			
 
				-    \anchor ORS_Task
			
 
				-    @{
			
 
				+/**
			
 
				+   @name Task
			
 
				+   \anchor ORS_Task
			
 
				+   @{
			
 
				 */
			
 
				 
			
 
				 /**
			
@@ -604,9 +610,10 @@ extern void starpu_omp_taskloop_inline_end(const struct starpu_omp_task_region_a
 
				 
			
 
				 /** @} */
			
 
				 
			
 
				-/** @name API
			
 
				-    \anchor ORS_API
			
 
				-    @{
			
 
				+/**
			
 
				+   @name API
			
 
				+   \anchor ORS_API
			
 
				+   @{
			
 
				 */
			
 
				 
			
 
				 /**
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -21,10 +21,10 @@
 
				 #ifndef __STARPU_PERFMODEL_H__
			
 
				 #define __STARPU_PERFMODEL_H__
			
 
				 
			
 
				-/** @defgroup
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_Performance_Model Performance Model
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu.h>
			
 
				 #include <stdio.h>
			
@@ -39,31 +39,37 @@ struct starpu_data_descr;
 
				 
			
 
				 #define STARPU_NARCH STARPU_ANY_WORKER
			
 
				 
			
 
				+/**
			
 
				+   todo
			
 
				+*/
			
 
				 struct starpu_perfmodel_device
			
 
				 {
			
 
				-	enum starpu_worker_archtype type;
			
 
				-	int devid;
			
 
				-	int ncores;
			
 
				+	enum starpu_worker_archtype type; /**< type of the device */
			
 
				+	int devid;                        /**< identifier of the precise device */
			
 
				+	int ncores;                       /**< number of execution in parallel, minus 1 */
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+   todo
			
 
				+*/
			
 
				 struct starpu_perfmodel_arch
			
 
				 {
			
 
				-	int ndevices;
			
 
				-	struct starpu_perfmodel_device *devices;
			
 
				+	int ndevices;                            /**< number of the devices for the given arch */
			
 
				+	struct starpu_perfmodel_device *devices; /**< list of the devices for the given arch */
			
 
				 };
			
 
				 
			
 
				 
			
 
				 struct starpu_perfmodel_history_entry
			
 
				 {
			
 
				-	double mean;
			
 
				-	double deviation;
			
 
				-	double sum;
			
 
				-	double sum2;
			
 
				-	unsigned nsample;
			
 
				+	double mean;        /**< mean_n = 1/n sum */
			
 
				+	double deviation;   /**< n dev_n = sum2 - 1/n (sum)^2 */
			
 
				+	double sum;         /**< sum of samples (in µs) */
			
 
				+	double sum2;        /**< sum of samples^2 */
			
 
				+	unsigned nsample;   /**< number of samples */
			
 
				 	unsigned nerror;
			
 
				-	uint32_t footprint;
			
 
				-	size_t size;
			
 
				-	double flops;
			
 
				+	uint32_t footprint; /**< data footprint */
			
 
				+	size_t size;        /**< in bytes */
			
 
				+	double flops;       /**< Provided by the application */
			
 
				 
			
 
				 	double duration;
			
 
				 	starpu_tag_t tag;
			
@@ -76,30 +82,35 @@ struct starpu_perfmodel_history_list
 
				 	struct starpu_perfmodel_history_entry *entry;
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+   todo
			
 
				+*/
			
 
				 struct starpu_perfmodel_regression_model
			
 
				 {
			
 
				-	double sumlny;
			
 
				+	double sumlny;          /**< sum of ln(measured) */
			
 
				 
			
 
				-	double sumlnx;
			
 
				-	double sumlnx2;
			
 
				+	double sumlnx;          /**< sum of ln(size) */
			
 
				+	double sumlnx2;         /**< sum of ln(size)^2 */
			
 
				 
			
 
				-	unsigned long minx;
			
 
				-	unsigned long maxx;
			
 
				+	unsigned long minx;     /**< minimum size */
			
 
				+	unsigned long maxx;     /**< maximum size */
			
 
				 
			
 
				-	double sumlnxlny;
			
 
				+	double sumlnxlny;       /**< sum of ln(size)*ln(measured) */
			
 
				 
			
 
				-	double alpha;
			
 
				-	double beta;
			
 
				-	unsigned valid;
			
 
				+	double alpha;           /**< estimated = alpha * size ^ beta */
			
 
				+	double beta;            /**< estimated = alpha * size ^ beta */
			
 
				+	unsigned valid;         /**< whether the linear regression model is valid (i.e. enough measures) */
			
 
				 
			
 
				-	double a, b, c;
			
 
				-	unsigned nl_valid;
			
 
				+	double a;               /**< estimated = a size ^b + c */
			
 
				+	double b;               /**< estimated = a size ^b + c */
			
 
				+	double c;               /**< estimated = a size ^b + c */
			
 
				+	unsigned nl_valid;      /**< whether the non-linear regression model is valid (i.e. enough measures) */
			
 
				 
			
 
				-	unsigned nsample;
			
 
				+	unsigned nsample;       /**< number of sample values for non-linear regression */
			
 
				 
			
 
				-	double *coeff;
			
 
				-	unsigned ncoeff;
			
 
				-	unsigned multi_valid;
			
 
				+	double *coeff;          /**< list of computed coefficients for multiple linear regression model */
			
 
				+	unsigned ncoeff;        /**< number of coefficients for multiple linear regression model */
			
 
				+	unsigned multi_valid;   /**< whether the multiple linear regression model is valid */
			
 
				 };
			
 
				 
			
 
				 struct starpu_perfmodel_history_table;
			
@@ -109,66 +120,224 @@ struct starpu_perfmodel_history_table;
 
				 typedef double (*starpu_perfmodel_per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
			
 
				 typedef size_t (*starpu_perfmodel_per_arch_size_base)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
			
 
				 
			
 
				+/**
			
 
				+   information about the performance model of a given arch.
			
 
				+*/
			
 
				 struct starpu_perfmodel_per_arch
			
 
				 {
			
 
				+	/**
			
 
				+	   Used by ::STARPU_PER_ARCH, must point to functions which take a
			
 
				+	   task, the target arch and implementation number (as mere
			
 
				+	   conveniency, since the array is already indexed by these), and
			
 
				+	   must return a task duration estimation in micro-seconds.
			
 
				+	*/
			
 
				 	starpu_perfmodel_per_arch_cost_function cost_function;
			
 
				+	/**
			
 
				+	   Same as in structure starpu_perfmodel, but per-arch, in case it
			
 
				+	   depends on the architecture-specific implementation.
			
 
				+	*/
			
 
				 	starpu_perfmodel_per_arch_size_base size_base;
			
 
				 
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   The history of performance measurements.
			
 
				+	*/
			
 
				 	struct starpu_perfmodel_history_table *history;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and
			
 
				+	   ::STARPU_MULTIPLE_REGRESSION_BASED, records all execution history
			
 
				+	   measures.
			
 
				+	*/
			
 
				 	struct starpu_perfmodel_history_list *list;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Used by ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED
			
 
				+	   and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated
			
 
				+	   factors of the regression.
			
 
				+	*/
			
 
				 	struct starpu_perfmodel_regression_model regression;
			
 
				 
			
 
				 	char debug_path[256];
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+   todo
			
 
				+*/
			
 
				 enum starpu_perfmodel_type
			
 
				 {
			
 
				         STARPU_PERFMODEL_INVALID=0,
			
 
				-	STARPU_PER_ARCH,
			
 
				-	STARPU_COMMON,
			
 
				-	STARPU_HISTORY_BASED,
			
 
				-	STARPU_REGRESSION_BASED,
			
 
				-	STARPU_NL_REGRESSION_BASED,
			
 
				-	STARPU_MULTIPLE_REGRESSION_BASED
			
 
				+	STARPU_PER_ARCH,                  /**< Application-provided per-arch cost model function */
			
 
				+	STARPU_COMMON,                    /**< Application-provided common cost model function, with per-arch factor */
			
 
				+	STARPU_HISTORY_BASED,             /**< Automatic history-based cost model */
			
 
				+	STARPU_REGRESSION_BASED,          /**< Automatic linear regression-based cost model  (alpha * size ^ beta) */
			
 
				+	STARPU_NL_REGRESSION_BASED,       /**< Automatic non-linear regression-based cost model (a * size ^ b + c) */
			
 
				+	STARPU_MULTIPLE_REGRESSION_BASED  /**< Automatic multiple linear regression-based cost model. Application
			
 
				+					     provides parameters, their combinations and exponents. */
			
 
				 };
			
 
				 
			
 
				 struct _starpu_perfmodel_state;
			
 
				 typedef struct _starpu_perfmodel_state* starpu_perfmodel_state_t;
			
 
				 
			
 
				+/**
			
 
				+   Contain all information about a performance model. At least the
			
 
				+   type and symbol fields have to be filled when defining a performance
			
 
				+   model for a codelet. For compatibility, make sure to initialize the
			
 
				+   whole structure to zero, either by using explicit memset, or by
			
 
				+   letting the compiler implicitly do it in e.g. static storage case. If
			
 
				+   not provided, other fields have to be zero.
			
 
				+*/
			
 
				 struct starpu_perfmodel
			
 
				 {
			
 
				+	/**
			
 
				+	   type of performance model
			
 
				+	   <ul>
			
 
				+	   <li>
			
 
				+	   ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
			
 
				+	   ::STARPU_NL_REGRESSION_BASED: No other fields needs to be
			
 
				+	   provided, this is purely history-based.
			
 
				+	   </li>
			
 
				+	   <li>
			
 
				+	   ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields
			
 
				+	   starpu_perfmodel::nparameters (number of different parameters),
			
 
				+	   starpu_perfmodel::ncombinations (number of parameters
			
 
				+	   combinations-tuples) and table starpu_perfmodel::combinations
			
 
				+	   which defines exponents of the equation. Function cl_perf_func
			
 
				+	   also needs to define how to extract parameters from the task.
			
 
				+	   </li>
			
 
				+	   <li>
			
 
				+	   ::STARPU_PER_ARCH: either field
			
 
				+	   starpu_perfmodel::arch_cost_function has to be filled with a
			
 
				+	   function that returns the cost in micro-seconds on the arch given
			
 
				+	   as parameter, or field starpu_perfmodel::per_arch has to be filled
			
 
				+	   with functions which return the cost in micro-seconds.
			
 
				+	   </li>
			
 
				+	   <li>
			
 
				+	   ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
			
 
				+	   filled with a function that returns the cost in micro-seconds on a
			
 
				+	   CPU, timing on other archs will be determined by multiplying by an
			
 
				+	   arch-specific factor.
			
 
				+	   </li>
			
 
				+	   </ul>
			
 
				+	*/
			
 
				 	enum starpu_perfmodel_type type;
			
 
				 
			
 
				+	/**
			
 
				+	   Used by ::STARPU_COMMON. Take a task and implementation number,
			
 
				+	   and must return a task duration estimation in micro-seconds.
			
 
				+	*/
			
 
				 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
			
 
				+	/**
			
 
				+	   Used by ::STARPU_COMMON. Take a task, an arch and implementation
			
 
				+	   number, and must return a task duration estimation in
			
 
				+	   micro-seconds on that arch.
			
 
				+	*/
			
 
				 	double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch * arch, unsigned nimpl);
			
 
				 
			
 
				+	/**
			
 
				+	   Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
			
 
				+	   ::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, take a task and
			
 
				+	   implementation number, and return the size to be used as index to
			
 
				+	   distinguish histories and as a base for regressions.
			
 
				+	*/
			
 
				 	size_t (*size_base)(struct starpu_task *, unsigned nimpl);
			
 
				+	/**
			
 
				+	   Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, take a task
			
 
				+	   and return the footprint to be used as index to distinguish
			
 
				+	   histories. The default is to use the starpu_task_data_footprint()
			
 
				+	   function.
			
 
				+	*/
			
 
				 	uint32_t (*footprint)(struct starpu_task *);
			
 
				 
			
 
				+	/**
			
 
				+	   symbol name for the performance model, which will be used as file
			
 
				+	   name to store the model. It must be set otherwise the model will
			
 
				+	   be ignored.
			
 
				+	*/
			
 
				 	const char *symbol;
			
 
				 
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Whether the performance model is already loaded from the disk.
			
 
				+	*/
			
 
				 	unsigned is_loaded;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	*/
			
 
				 	unsigned benchmarking;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	*/
			
 
				 	unsigned is_init;
			
 
				 
			
 
				 	void (*parameters)(struct starpu_task * task, double *parameters);
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Names of parameters used for multiple linear regression models (M,
			
 
				+	   N, K)
			
 
				+	*/
			
 
				 	const char **parameters_names;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Number of parameters used for multiple linear regression models
			
 
				+	*/
			
 
				 	unsigned nparameters;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Table of combinations of parameters (and the exponents) used for
			
 
				+	   multiple linear regression models
			
 
				+	*/
			
 
				 	unsigned **combinations;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Number of combination of parameters used for multiple linear
			
 
				+	   regression models
			
 
				+	*/
			
 
				 	unsigned ncombinations;
			
 
				-
			
 
				+	/**
			
 
				+	   \private
			
 
				+	*/
			
 
				 	starpu_perfmodel_state_t state;
			
 
				 };
			
 
				 
			
 
				 void starpu_perfmodel_init(struct starpu_perfmodel *model);
			
 
				+
			
 
				+/**
			
 
				+   Load the performance model found in the file named \p filename. \p model has to be
			
 
				+   completely zero, and will be filled with the information stored in the given file.
			
 
				+*/
			
 
				 int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model);
			
 
				+
			
 
				+/**
			
 
				+   Load a given performance model. \p model has to be
			
 
				+   completely zero, and will be filled with the information stored in
			
 
				+   <c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
			
 
				+   external tools that want to read the performance model files.
			
 
				+*/
			
 
				+
			
 
				 int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model);
			
 
				+
			
 
				+/**
			
 
				+   Unload \p model which has been previously loaded
			
 
				+   through the function starpu_perfmodel_load_symbol()
			
 
				+*/
			
 
				 int starpu_perfmodel_unload_model(struct starpu_perfmodel *model);
			
 
				+
			
 
				 void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen);
			
 
				 
			
 
				+/**
			
 
				+   Free internal memory used for sampling directory
			
 
				+   management. It should only be called by an application which is not
			
 
				+   calling starpu_shutdown() as this function already calls it. See for
			
 
				+   example <c>tools/starpu_perfmodel_display.c</c>.
			
 
				+*/
			
 
				 void starpu_perfmodel_free_sampling_directories(void);
			
 
				 
			
 
				+/**
			
 
				+   Return the architecture type of the worker \p workerid.
			
 
				+*/
			
 
				 struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id);
			
 
				+
			
 
				 int starpu_perfmodel_get_narch_combs();
			
 
				 int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device* devices);
			
 
				 int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices);
			
@@ -180,39 +349,88 @@ struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_devices(struct
 
				 int starpu_perfmodel_set_per_devices_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...);
			
 
				 int starpu_perfmodel_set_per_devices_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...);
			
 
				 
			
 
				+/**
			
 
				+   Return the path to the debugging information for the performance model.
			
 
				+*/
			
 
				 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl);
			
 
				+
			
 
				 char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
			
 
				+
			
 
				+/**
			
 
				+   Return the architecture name for \p arch
			
 
				+*/
			
 
				 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl);
			
 
				 
			
 
				+/**
			
 
				+   Return the estimated time of a task with the given model and the given footprint.
			
 
				+*/
			
 
				 double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint);
			
 
				+
			
 
				+/**
			
 
				+   If starpu_init() is not used, starpu_perfmodel_initialize() should be used called calling starpu_perfmodel_* functions.
			
 
				+*/
			
 
				 void starpu_perfmodel_initialize(void);
			
 
				+
			
 
				+/**
			
 
				+   Print a list of all performance models on \p output
			
 
				+*/
			
 
				 int starpu_perfmodel_list(FILE *output);
			
 
				+
			
 
				 void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output);
			
 
				 int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output);
			
 
				 int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output);
			
 
				 
			
 
				 int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model);
			
 
				 
			
 
				+/**
			
 
				+   Feed the performance model model with an explicit
			
 
				+   measurement measured (in µs), in addition to measurements done by StarPU
			
 
				+   itself. This can be useful when the application already has an
			
 
				+   existing set of measurements done in good conditions, that StarPU
			
 
				+   could benefit from instead of doing on-line measurements. An example
			
 
				+   of use can be seen in \ref PerformanceModelExample.
			
 
				+*/
			
 
				 void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
			
 
				+
			
 
				+/**
			
 
				+   Print the directory name storing performance models on \p output
			
 
				+*/
			
 
				 void starpu_perfmodel_directory(FILE *output);
			
 
				 
			
 
				+/**
			
 
				+   Print a matrix of bus bandwidths on \p f.
			
 
				+*/
			
 
				 void starpu_bus_print_bandwidth(FILE *f);
			
 
				+
			
 
				+/**
			
 
				+   Print the affinity devices on \p f.
			
 
				+*/
			
 
				 void starpu_bus_print_affinity(FILE *f);
			
 
				+
			
 
				+/**
			
 
				+   Print on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
			
 
				+*/
			
 
				 void starpu_bus_print_filenames(FILE *f);
			
 
				 
			
 
				+/**
			
 
				+   Return the bandwidth of data transfer between two memory nodes
			
 
				+*/
			
 
				 double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node);
			
 
				+
			
 
				+/**
			
 
				+   Return the latency of data transfer between two memory nodes
			
 
				+*/
			
 
				 double starpu_transfer_latency(unsigned src_node, unsigned dst_node);
			
 
				-double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size);
			
 
				 
			
 
				-extern struct starpu_perfmodel starpu_perfmodel_nop;
			
 
				+/**
			
 
				+   Return the estimated time to transfer a given size between two memory nodes.
			
 
				+*/
			
 
				+double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size);
			
 
				 
			
 
				 /**
			
 
				-   Display statistics about the current data handles registered
			
 
				-   within StarPU. StarPU must have been configured with the configure
			
 
				-   option \ref enable-memory-stats "--enable-memory-stats" (see \ref
			
 
				-   MemoryFeedback).
			
 
				+   Performance model which just always return 1µs.
			
 
				 */
			
 
				-void starpu_data_display_memory_stats();
			
 
				+extern struct starpu_perfmodel starpu_perfmodel_nop;
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/include/starpu_profiling.h
+++ b/include/starpu_profiling.h
@@ -19,10 +19,10 @@
 
				 #ifndef __STARPU_PROFILING_H__
			
 
				 #define __STARPU_PROFILING_H__
			
 
				 
			
 
				-/** @defgroup
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_Profiling Profiling
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu.h>
			
 
				 #include <errno.h>
			
@@ -33,48 +33,89 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+   Used when calling the function starpu_profiling_status_set() to disable profiling.
			
 
				+*/
			
 
				 #define STARPU_PROFILING_DISABLE	0
			
 
				+/**
			
 
				+   Used when calling the function starpu_profiling_status_set() to enable profiling.
			
 
				+*/
			
 
				 #define STARPU_PROFILING_ENABLE		1
			
 
				 
			
 
				+/**
			
 
				+   Information about the execution of a task. It is accessible from
			
 
				+   the field starpu_task::profiling_info if profiling was enabled.
			
 
				+ */
			
 
				 struct starpu_profiling_task_info
			
 
				 {
			
 
				+	/** Date of task submission (relative to the initialization of StarPU). */
			
 
				 	struct timespec submit_time;
			
 
				 
			
 
				+	/** Time when the task was submitted to the scheduler. */
			
 
				 	struct timespec push_start_time;
			
 
				+	/** Time when the scheduler finished with the task submission. */
			
 
				 	struct timespec push_end_time;
			
 
				+	/** Time when the scheduler started to be requested for a task, and eventually gave that task. */
			
 
				 	struct timespec pop_start_time;
			
 
				+	/** Time when the scheduler finished providing the task for execution. */
			
 
				 	struct timespec pop_end_time;
			
 
				 
			
 
				+	/** Time when the worker started fetching input data. */
			
 
				 	struct timespec acquire_data_start_time;
			
 
				+	/** Time when the worker finished fetching input data. */
			
 
				 	struct timespec acquire_data_end_time;
			
 
				 
			
 
				+	/** Date of task execution beginning (relative to the initialization of StarPU). */
			
 
				 	struct timespec start_time;
			
 
				+	/** Date of task execution termination (relative to the initialization of StarPU). */
			
 
				 	struct timespec end_time;
			
 
				 
			
 
				+	/** Time when the worker started releasing data. */
			
 
				 	struct timespec release_data_start_time;
			
 
				+	/** Time when the worker finished releasing data. */
			
 
				 	struct timespec release_data_end_time;
			
 
				 
			
 
				+	/** Time when the worker started the application callback for the task. */
			
 
				 	struct timespec callback_start_time;
			
 
				+	/** Time when the worker finished the application callback for the task. */
			
 
				 	struct timespec callback_end_time;
			
 
				 
			
 
				 	/* TODO add expected length, expected start/end ? */
			
 
				+
			
 
				+	/** Identifier of the worker which has executed the task. */
			
 
				 	int workerid;
			
 
				 
			
 
				+	/** Number of cycles used by the task, only available in the MoviSim */
			
 
				 	uint64_t used_cycles;
			
 
				+	/** Number of cycles stalled within the task, only available in the MoviSim */
			
 
				 	uint64_t stall_cycles;
			
 
				+	/** Energy consumed by the task, in Joules */
			
 
				 	double energy_consumed;
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+   Profiling information associated to a worker. The timing is
			
 
				+   provided since the previous call to
			
 
				+   starpu_profiling_worker_get_info()
			
 
				+*/
			
 
				 struct starpu_profiling_worker_info
			
 
				 {
			
 
				+	/** Starting date for the reported profiling measurements. */
			
 
				 	struct timespec start_time;
			
 
				+	/** Duration of the profiling measurement interval. */
			
 
				 	struct timespec total_time;
			
 
				+	/** Time spent by the worker to execute tasks during the profiling measurement interval. */
			
 
				 	struct timespec executing_time;
			
 
				+	/** Time spent idling by the worker during the profiling measurement interval. */
			
 
				 	struct timespec sleeping_time;
			
 
				+	/** Number of tasks executed by the worker during the profiling measurement interval. */
			
 
				 	int executed_tasks;
			
 
				 
			
 
				+	/** Number of cycles used by the worker, only available in the MoviSim */
			
 
				 	uint64_t used_cycles;
			
 
				+	/** Number of cycles stalled within the worker, only available in the MoviSim */
			
 
				 	uint64_t stall_cycles;
			
 
				+	/** Energy consumed by the worker, in Joules */
			
 
				 	double energy_consumed;
			
 
				 
			
 
				 	double flops;
			
@@ -82,15 +123,43 @@ struct starpu_profiling_worker_info
 
				 
			
 
				 struct starpu_profiling_bus_info
			
 
				 {
			
 
				+	/** Time of bus profiling startup. */
			
 
				 	struct timespec start_time;
			
 
				+	/** Total time of bus profiling. */
			
 
				 	struct timespec total_time;
			
 
				+	/** Number of bytes transferred during profiling. */
			
 
				 	int long long transferred_bytes;
			
 
				+	/** Number of transfers during profiling. */
			
 
				 	int transfer_count;
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+   Reset performance counters and enable profiling if the
			
 
				+   environment variable \ref STARPU_PROFILING is set to a positive value.
			
 
				+*/
			
 
				 void starpu_profiling_init(void);
			
 
				+
			
 
				+/**
			
 
				+   Set the ID used for profiling trace filename. Has to be called before starpu_init().
			
 
				+*/
			
 
				 void starpu_profiling_set_id(int new_id);
			
 
				+
			
 
				+/**
			
 
				+   Set the profiling status. Profiling is activated
			
 
				+   by passing \ref STARPU_PROFILING_ENABLE in \p status. Passing
			
 
				+   \ref STARPU_PROFILING_DISABLE disables profiling. Calling this function
			
 
				+   resets all profiling measurements. When profiling is enabled, the
			
 
				+   field starpu_task::profiling_info points to a valid structure
			
 
				+   starpu_profiling_task_info containing information about the execution
			
 
				+   of the task. Negative return values indicate an error, otherwise the
			
 
				+   previous status is returned.
			
 
				+*/
			
 
				 int starpu_profiling_status_set(int status);
			
 
				+
			
 
				+/**
			
 
				+   Return the current profiling status or a negative value in case
			
 
				+   there was an error.
			
 
				+*/
			
 
				 int starpu_profiling_status_get(void);
			
 
				 
			
 
				 #ifdef BUILDING_STARPU
			
@@ -107,17 +176,43 @@ extern int _starpu_profiling;
 
				 #endif
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+   Get the profiling info associated to the worker identified by
			
 
				+   \p workerid, and reset the profiling measurements. If the argument \p
			
 
				+   worker_info is <c>NULL</c>, only reset the counters associated to worker
			
 
				+   \p workerid. Upon successful completion, this function returns 0.
			
 
				+   Otherwise, a negative value is returned.
			
 
				+*/
			
 
				 int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *worker_info);
			
 
				 
			
 
				+/**
			
 
				+   Return the number of buses in the machine
			
 
				+*/
			
 
				 int starpu_bus_get_count(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the identifier of the bus between \p src and \p dst
			
 
				+*/
			
 
				 int starpu_bus_get_id(int src, int dst);
			
 
				+
			
 
				+/**
			
 
				+   Return the source point of bus \p busid
			
 
				+*/
			
 
				 int starpu_bus_get_src(int busid);
			
 
				+
			
 
				+/**
			
 
				+   Return the destination point of bus \p busid
			
 
				+*/
			
 
				 int starpu_bus_get_dst(int busid);
			
 
				 void starpu_bus_set_direct(int busid, int direct);
			
 
				 int starpu_bus_get_direct(int busid);
			
 
				 void starpu_bus_set_ngpus(int busid, int ngpus);
			
 
				 int starpu_bus_get_ngpus(int busid);
			
 
				 
			
 
				+/**
			
 
				+   See _starpu_profiling_bus_helper_display_summary in src/profiling/profiling_helpers.c for a usage example.
			
 
				+   Note that calling starpu_bus_get_profiling_info() resets the counters to zero.
			
 
				+*/
			
 
				 int starpu_bus_get_profiling_info(int busid, struct starpu_profiling_bus_info *bus_info);
			
 
				 
			
 
				 /* Some helper functions to manipulate profiling API output */
			
@@ -177,12 +272,38 @@ static __starpu_inline void starpu_timespec_sub(const struct timespec *a,
 
				 #define starpu_timespec_cmp(a, b, CMP)                          \
			
 
				 	(((a)->tv_sec == (b)->tv_sec) ? ((a)->tv_nsec CMP (b)->tv_nsec) : ((a)->tv_sec CMP (b)->tv_sec))
			
 
				 
			
 
				+/**
			
 
				+   Return the time elapsed between \p start and \p end in microseconds.
			
 
				+*/
			
 
				 double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end);
			
 
				+
			
 
				+/**
			
 
				+   Convert the given timespec \p ts into microseconds
			
 
				+*/
			
 
				 double starpu_timing_timespec_to_us(struct timespec *ts);
			
 
				 
			
 
				+/**
			
 
				+   Display statistics about the bus on \c stderr. if the environment
			
 
				+   variable \ref STARPU_BUS_STATS is defined. The function is called
			
 
				+   automatically by starpu_shutdown().
			
 
				+*/
			
 
				 void starpu_profiling_bus_helper_display_summary(void);
			
 
				+
			
 
				+/**
			
 
				+   Display statistic about the workers on \c stderr if the
			
 
				+   environment variable \ref STARPU_WORKER_STATS is defined. The function is
			
 
				+   called automatically by starpu_shutdown().
			
 
				+*/
			
 
				 void starpu_profiling_worker_helper_display_summary(void);
			
 
				 
			
 
				+/**
			
 
				+   Display statistics about the current data handles registered
			
 
				+   within StarPU. StarPU must have been configured with the configure
			
 
				+   option \ref enable-memory-stats "--enable-memory-stats" (see \ref
			
 
				+   MemoryFeedback).
			
 
				+*/
			
 
				+void starpu_data_display_memory_stats();
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/include/starpu_rand.h
+++ b/include/starpu_rand.h
@@ -19,9 +19,9 @@
 
				 #ifndef __STARPU_RAND_H__
			
 
				 #define __STARPU_RAND_H__
			
 
				 
			
 
				-/** @defgroup
			
 
				- *
			
 
				- * @{
			
 
				+/**
			
 
				+   @defgroup API_Random_Functions Random Functions
			
 
				+   @{
			
 
				  */
			
 
				 
			
 
				 #include <stdlib.h>
			
--- a/include/starpu_scc.h
+++ b/include/starpu_scc.h
@@ -19,19 +19,32 @@
 
				 #ifndef __STARPU_SCC_H__
			
 
				 #define __STARPU_SCC_H__
			
 
				 
			
 
				-/** @defgroup
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_SCC_Extensions SCC Extensions
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu_config.h>
			
 
				 
			
 
				 #ifdef STARPU_USE_SCC
			
 
				 
			
 
				+/**
			
 
				+   Type for SCC function symbols
			
 
				+*/
			
 
				 typedef void *starpu_scc_func_symbol_t;
			
 
				 
			
 
				+/**
			
 
				+   Initiate a lookup on each SCC device to find the adress of the
			
 
				+   function named \p func_name, store them in the global array kernels
			
 
				+   and return the index in the array through \p symbol.
			
 
				+*/
			
 
				 int starpu_scc_register_kernel(starpu_scc_func_symbol_t *symbol, const char *func_name);
			
 
				 
			
 
				+/**
			
 
				+   If success, return the pointer to the function defined by \p symbol on
			
 
				+   the device linked to the called device. This can for instance be used
			
 
				+   in a starpu_scc_func_symbol_t implementation.
			
 
				+*/
			
 
				 starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t symbol);
			
 
				 
			
 
				 #endif /* STARPU_USE_SCC */
			
--- a/include/starpu_sched_component.h
+++ b/include/starpu_sched_component.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2017                                     Arthur Chevalier
			
 
				  * Copyright (C) 2013,2014,2017                           Inria
			
 
				- * Copyright (C) 2014,2015,2017,2019                           CNRS
			
 
				+ * Copyright (C) 2014,2015,2017,2019                      CNRS
			
 
				  * Copyright (C) 2014-2019                                Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Simon Archipoff
			
 
				  *
			
@@ -21,10 +21,10 @@
 
				 #ifndef __STARPU_SCHED_COMPONENT_H__
			
 
				 #define __STARPU_SCHED_COMPONENT_H__
			
 
				 
			
 
				-/** @defgroup
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				+/**
			
 
				+   @defgroup API_Modularized_Scheduler Modularized Scheduler Interface
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				 #include <starpu.h>
			
 
				 
			
@@ -37,106 +37,379 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+   flags for starpu_sched_component::properties
			
 
				+*/
			
 
				 enum starpu_sched_component_properties
			
 
				 {
			
 
				+	/** indicate that all workers have the same starpu_worker_archtype */
			
 
				 	STARPU_SCHED_COMPONENT_HOMOGENEOUS = (1<<0),
			
 
				+	/** indicate that all workers have the same memory component */
			
 
				 	STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE = (1<<1)
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+   indicate if component is homogeneous
			
 
				+*/
			
 
				 #define STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component) ((component)->properties & STARPU_SCHED_COMPONENT_HOMOGENEOUS)
			
 
				+
			
 
				+/**
			
 
				+   indicate if all workers have the same memory component
			
 
				+*/
			
 
				 #define STARPU_SCHED_COMPONENT_IS_SINGLE_MEMORY_NODE(component) ((component)->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE)
			
 
				 
			
 
				+/**
			
 
				+   Structure for a scheduler module.  A scheduler is a
			
 
				+   tree-like structure of them, some parts of scheduler can be shared by
			
 
				+   several contexes to perform some local optimisations, so, for all
			
 
				+   components, a list of parent is defined by \c sched_ctx_id. They
			
 
				+   embed there specialised method in a pseudo object-style, so calls are
			
 
				+   like <c>component->push_task(component,task)</c>
			
 
				+*/
			
 
				 struct starpu_sched_component
			
 
				 {
			
 
				+	/** The tree containing the component*/
			
 
				 	struct starpu_sched_tree *tree;
			
 
				+	/** set of underlying workers */
			
 
				 	struct starpu_bitmap *workers;
			
 
				+	/**
			
 
				+	   subset of starpu_sched_component::workers that is currently available in the context
			
 
				+	   The push method should take this value into account, it is set with:
			
 
				+	   component->workers UNION tree->workers UNION
			
 
				+	   component->child[i]->workers_in_ctx iff exist x such as component->children[i]->parents[x] == component
			
 
				+	*/
			
 
				 	struct starpu_bitmap *workers_in_ctx;
			
 
				+	/** private data */
			
 
				 	void *data;
			
 
				 	char *name;
			
 
				+	/** number of compoments's children */
			
 
				 	unsigned nchildren;
			
 
				+	/** vector of component's children */
			
 
				 	struct starpu_sched_component **children;
			
 
				+	/** number of component's parents */
			
 
				 	unsigned nparents;
			
 
				+	/** vector of component's parents */
			
 
				 	struct starpu_sched_component **parents;
			
 
				 
			
 
				+	/** add a child to component */
			
 
				 	void (*add_child)(struct starpu_sched_component *component, struct starpu_sched_component *child);
			
 
				+	/** remove a child from component */
			
 
				 	void (*remove_child)(struct starpu_sched_component *component, struct starpu_sched_component *child);
			
 
				 	void (*add_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent);
			
 
				 	void (*remove_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent);
			
 
				 
			
 
				+	/**
			
 
				+	   push a task in the scheduler module. this function is called to
			
 
				+	   push a task on component subtree, this can either perform a
			
 
				+	   recursive call on a child or store the task in the component,
			
 
				+	   then it will be returned by a further pull_task call.
			
 
				+	   the caller must ensure that component is able to execute task.
			
 
				+	   This method must either return 0 if it the task was properly stored or
			
 
				+	   passed over to a child component, or return a value different from 0 if the
			
 
				+	   task could not be consumed (e.g. the queue is full).
			
 
				+	*/
			
 
				 	int (*push_task)(struct starpu_sched_component *, struct starpu_task *);
			
 
				+
			
 
				+	/**
			
 
				+	   pop a task from the scheduler module. this function is called by workers to get a task from their
			
 
				+	   parents. this function should first return a locally stored task
			
 
				+	   or perform a recursive call on the parents.
			
 
				+	   the task returned by this function should be executable by the caller
			
 
				+	*/
			
 
				 	struct starpu_task *(*pull_task)(struct starpu_sched_component *from, struct starpu_sched_component *to);
			
 
				 
			
 
				+	/**
			
 
				+	   This function is called by a component which implements a queue,
			
 
				+	   allowing it to signify to its parents that an empty slot is
			
 
				+	   available in its queue. This should return 1 if some tasks could be pushed
			
 
				+	   The basic implementation of this function
			
 
				+	   is a recursive call to its parents, the user has to specify a
			
 
				+	   personally-made function to catch those calls.
			
 
				+	*/
			
 
				 	int (*can_push)(struct starpu_sched_component *from, struct starpu_sched_component *to);
			
 
				+
			
 
				+	/**
			
 
				+	   This function allow a component to wake up a worker. It is
			
 
				+	   currently called by component which implements a queue, to
			
 
				+	   signify to its children that a task have been pushed in its local
			
 
				+	   queue, and is available to be popped by a worker, for example.
			
 
				+	   This should return 1 if some some container or worker could (or will) pull
			
 
				+	   some tasks.
			
 
				+	   The basic implementation of this function is a recursive call to
			
 
				+	   its children, until at least one worker have been woken up.
			
 
				+	*/
			
 
				 	int (*can_pull)(struct starpu_sched_component *component);
			
 
				 
			
 
				 	int (*notify)(struct starpu_sched_component* component, int message_ID, void* arg);
			
 
				 
			
 
				+	/**
			
 
				+	   heuristic to compute load of scheduler module. Basically the number of tasks divided by the sum
			
 
				+	   of relatives speedup of workers available in context.
			
 
				+	   estimated_load(component) = sum(estimated_load(component_children)) + nb_local_tasks / average(relative_speedup(underlying_worker))
			
 
				+	*/
			
 
				 	double (*estimated_load)(struct starpu_sched_component *component);
			
 
				+	/**
			
 
				+	   return the time when a worker will enter in starvation. This function is relevant only if the task->predicted
			
 
				+	   member has been set.
			
 
				+	*/
			
 
				 	double (*estimated_end)(struct starpu_sched_component *component);
			
 
				 
			
 
				+	/**
			
 
				+	   called by starpu_sched_component_destroy. Should free data allocated during creation
			
 
				+	*/
			
 
				 	void (*deinit_data)(struct starpu_sched_component *component);
			
 
				+
			
 
				+	/**
			
 
				+	   this function is called for each component when workers are added or removed from a context
			
 
				+	*/
			
 
				 	void (*notify_change_workers)(struct starpu_sched_component *component);
			
 
				 	int properties;
			
 
				 
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				+	/**
			
 
				+	   the hwloc object associated to scheduler module. points to the
			
 
				+	   part of topology that is binded to this component, eg: a numa
			
 
				+	   node for a ws component that would balance load between
			
 
				+	   underlying sockets
			
 
				+	*/
			
 
				 	hwloc_obj_t obj;
			
 
				 #else
			
 
				 	void *obj;
			
 
				 #endif
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+   The actual scheduler
			
 
				+*/
			
 
				 struct starpu_sched_tree
			
 
				 {
			
 
				+	/**
			
 
				+	   entry module of the scheduler
			
 
				+	*/
			
 
				 	struct starpu_sched_component *root;
			
 
				+	/**
			
 
				+	   set of workers available in this context, this value is used to mask workers in modules
			
 
				+	*/
			
 
				 	struct starpu_bitmap *workers;
			
 
				+	/**
			
 
				+	   context id of the scheduler
			
 
				+	*/
			
 
				 	unsigned sched_ctx_id;
			
 
				+	/**
			
 
				+	   lock used to protect the scheduler, it is taken in read mode pushing a task and in write mode for adding or
			
 
				+	   removing workers
			
 
				+	*/
			
 
				 	starpu_pthread_mutex_t lock;
			
 
				 };
			
 
				 
			
 
				+void starpu_initialize_prio_center_policy(unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   @name Scheduling Tree API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   create a empty initialized starpu_sched_tree
			
 
				+*/
			
 
				 struct starpu_sched_tree *starpu_sched_tree_create(unsigned sched_ctx_id) STARPU_ATTRIBUTE_MALLOC;
			
 
				+/**
			
 
				+   destroy tree and free all non shared component in it.
			
 
				+*/
			
 
				 void starpu_sched_tree_destroy(struct starpu_sched_tree *tree);
			
 
				 struct starpu_sched_tree *starpu_sched_tree_get(unsigned sched_ctx_id);
			
 
				+/**
			
 
				+   recursively set all starpu_sched_component::workers, do not take into account shared parts (except workers).
			
 
				+*/
			
 
				 void starpu_sched_tree_update_workers(struct starpu_sched_tree *t);
			
 
				+/**
			
 
				+   recursively set all starpu_sched_component::workers_in_ctx, do not take into account shared parts (except workers)
			
 
				+*/
			
 
				 void starpu_sched_tree_update_workers_in_ctx(struct starpu_sched_tree *t);
			
 
				+/**
			
 
				+   compatibility with starpu_sched_policy interface
			
 
				+*/
			
 
				 int starpu_sched_tree_push_task(struct starpu_task *task);
			
 
				-int starpu_sched_component_push_task(struct starpu_sched_component *from, struct starpu_sched_component *to, struct starpu_task *task);
			
 
				+/**
			
 
				+   compatibility with starpu_sched_policy interface
			
 
				+*/
			
 
				 struct starpu_task *starpu_sched_tree_pop_task(unsigned sched_ctx);
			
 
				+
			
 
				+/**
			
 
				+   Push a task to a component. This is a helper for <c>component->push_task(component, task)</c> plus tracing.
			
 
				+*/
			
 
				+int starpu_sched_component_push_task(struct starpu_sched_component *from, struct starpu_sched_component *to, struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   Pull a task from a component. This is a helper for <c>component->pull_task(component)</c> plus tracing.
			
 
				+*/
			
 
				 struct starpu_task *starpu_sched_component_pull_task(struct starpu_sched_component *from, struct starpu_sched_component *to);
			
 
				+
			
 
				 struct starpu_task* starpu_sched_component_pump_to(struct starpu_sched_component *component, struct starpu_sched_component *to, int* success);
			
 
				 struct starpu_task* starpu_sched_component_pump_downstream(struct starpu_sched_component *component, int* success);
			
 
				 int starpu_sched_component_send_can_push_to_parents(struct starpu_sched_component * component);
			
 
				-
			
 
				+/**
			
 
				+   compatibility with starpu_sched_policy interface
			
 
				+*/
			
 
				 void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
			
 
				+/**
			
 
				+   compatibility with starpu_sched_policy interface
			
 
				+*/
			
 
				 void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
			
 
				 
			
 
				+/**
			
 
				+   Attach component \p child to parent \p parent. Some component may accept only one child, others accept several (e.g. MCT)
			
 
				+*/
			
 
				+void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Generic Scheduling Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				 typedef struct starpu_sched_component * (*starpu_sched_component_create_t)(struct starpu_sched_tree *tree, void *data);
			
 
				+
			
 
				+/**
			
 
				+   allocate and initialize component field with defaults values :
			
 
				+   .pop_task make recursive call on father
			
 
				+   .estimated_load compute relative speedup and tasks in sub tree
			
 
				+   .estimated_end return the minimum of recursive call on children
			
 
				+   .add_child is starpu_sched_component_add_child
			
 
				+   .remove_child is starpu_sched_component_remove_child
			
 
				+   .notify_change_workers does nothing
			
 
				+   .deinit_data does nothing
			
 
				+*/
			
 
				 struct starpu_sched_component *starpu_sched_component_create(struct starpu_sched_tree *tree, const char *name) STARPU_ATTRIBUTE_MALLOC;
			
 
				-void starpu_sched_component_add_child(struct starpu_sched_component* component, struct starpu_sched_component * child);
			
 
				+
			
 
				+/**
			
 
				+   free data allocated by starpu_sched_component_create and call component->deinit_data(component)
			
 
				+   set to <c>NULL</c> the member starpu_sched_component::fathers[sched_ctx_id] of all child if its equal to \p component
			
 
				+*/
			
 
				+
			
 
				 void starpu_sched_component_destroy(struct starpu_sched_component *component);
			
 
				+/**
			
 
				+   recursively destroy non shared parts of a \p component 's tree
			
 
				+*/
			
 
				 void starpu_sched_component_destroy_rec(struct starpu_sched_component *component);
			
 
				+
			
 
				+void starpu_sched_component_add_child(struct starpu_sched_component* component, struct starpu_sched_component * child);
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component can execute \p task, this function take into account the workers available in the scheduling context
			
 
				+*/
			
 
				 int starpu_sched_component_can_execute_task(struct starpu_sched_component *component, struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   return a non <c>NULL</c> value if \p component can execute \p task.
			
 
				+   write the execution prediction length for the best implementation of the best worker available and write this at \p length address.
			
 
				+   this result is more relevant if starpu_sched_component::is_homogeneous is non <c>NULL</c>.
			
 
				+   if a worker need to be calibrated for an implementation, nan is set to \p length.
			
 
				+*/
			
 
				 int STARPU_WARN_UNUSED_RESULT starpu_sched_component_execute_preds(struct starpu_sched_component *component, struct starpu_task *task, double *length);
			
 
				+
			
 
				+/**
			
 
				+   return the average time to transfer \p task data to underlying \p component workers.
			
 
				+*/
			
 
				 double starpu_sched_component_transfer_length(struct starpu_sched_component *component, struct starpu_task *task);
			
 
				+
			
 
				 void starpu_sched_component_prefetch_on_node(struct starpu_sched_component *component, struct starpu_task *task);
			
 
				 
			
 
				-void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child);
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Worker Component API
			
 
				+   @{
			
 
				+*/
			
 
				 
			
 
				+/**
			
 
				+   return the struct starpu_sched_component corresponding to \p workerid. Undefined if \p workerid is not a valid workerid
			
 
				+*/
			
 
				 struct starpu_sched_component *starpu_sched_component_worker_get(unsigned sched_ctx, int workerid);
			
 
				 struct starpu_sched_component *starpu_sched_component_worker_new(unsigned sched_ctx, int workerid);
			
 
				+
			
 
				+/**
			
 
				+   Create a combined worker that pushes tasks in parallel to workers \p workers (size \p nworkers).
			
 
				+*/
			
 
				 struct starpu_sched_component *starpu_sched_component_parallel_worker_create(struct starpu_sched_tree *tree, unsigned nworkers, unsigned *workers);
			
 
				+
			
 
				+/**
			
 
				+   return the workerid of \p worker_component, undefined if starpu_sched_component_is_worker(worker_component) == 0
			
 
				+*/
			
 
				 int starpu_sched_component_worker_get_workerid(struct starpu_sched_component *worker_component);
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component is a worker component
			
 
				+*/
			
 
				 int starpu_sched_component_is_worker(struct starpu_sched_component *component);
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component is a simple worker component
			
 
				+*/
			
 
				 int starpu_sched_component_is_simple_worker(struct starpu_sched_component *component);
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component is a combined worker component
			
 
				+*/
			
 
				 int starpu_sched_component_is_combined_worker(struct starpu_sched_component *component);
			
 
				+
			
 
				+/**
			
 
				+   compatibility with starpu_sched_policy interface
			
 
				+   update predictions for workers
			
 
				+*/
			
 
				 void starpu_sched_component_worker_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   compatibility with starpu_sched_policy interface
			
 
				+*/
			
 
				 void starpu_sched_component_worker_post_exec_hook(struct starpu_task *task, unsigned sched_ctx_id);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Flow-control Fifo Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   default function for the can_push component method, just call can_push of parents until one of them returns non-zero
			
 
				+*/
			
 
				 int starpu_sched_component_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to);
			
 
				+
			
 
				+/**
			
 
				+default function for the can_pull component method, just call can_pull of children until one of them returns non-zero
			
 
				+*/
			
 
				 int starpu_sched_component_can_pull(struct starpu_sched_component * component);
			
 
				+
			
 
				+/**
			
 
				+   function for the can_pull component method, call can_pull of all children
			
 
				+*/
			
 
				 int starpu_sched_component_can_pull_all(struct starpu_sched_component * component);
			
 
				+
			
 
				+/**
			
 
				+   default function for the estimated_load component method, just sum up the loads
			
 
				+   of the children of the component.
			
 
				+*/
			
 
				 double starpu_sched_component_estimated_load(struct starpu_sched_component * component);
			
 
				+
			
 
				+/**
			
 
				+   function that can be used for the estimated_end component method, compute the minimum completion time of the children.
			
 
				+*/
			
 
				 double starpu_sched_component_estimated_end_min(struct starpu_sched_component * component);
			
 
				+
			
 
				+/**
			
 
				+   function that can be used for the estimated_end component method, compute
			
 
				+   the minimum completion time of the children, and add to it an estimation of how
			
 
				+   existing queued work, plus the exp_len work, can be completed. This is typically
			
 
				+   used instead of starpu_sched_component_estimated_end_min when the component
			
 
				+   contains a queue of tasks, which thus needs to be added to the estimations.
			
 
				+*/
			
 
				 double starpu_sched_component_estimated_end_min_add(struct starpu_sched_component * component, double exp_len);
			
 
				+
			
 
				+/**
			
 
				+   default function for the estimated_end component method, compute the average completion time of the children.
			
 
				+*/
			
 
				 double starpu_sched_component_estimated_end_average(struct starpu_sched_component * component);
			
 
				 
			
 
				 struct starpu_sched_component_fifo_data
			
@@ -145,9 +418,25 @@ struct starpu_sched_component_fifo_data
 
				 	double exp_len_threshold;
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+   Return a struct starpu_sched_component with a fifo. A stable sort is performed according to tasks priorities.
			
 
				+   A push_task call on this component does not perform recursive calls, underlying components will have to call pop_task to get it.
			
 
				+   starpu_sched_component::estimated_end function compute the estimated length by dividing the sequential length by the number of underlying workers.
			
 
				+*/
			
 
				 struct starpu_sched_component *starpu_sched_component_fifo_create(struct starpu_sched_tree *tree, struct starpu_sched_component_fifo_data *fifo_data) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component is a fifo component
			
 
				+*/
			
 
				 int starpu_sched_component_is_fifo(struct starpu_sched_component *component);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Flow-control Prio Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				 struct starpu_sched_component_prio_data
			
 
				 {
			
 
				 	unsigned ntasks_threshold;
			
@@ -156,19 +445,70 @@ struct starpu_sched_component_prio_data
 
				 struct starpu_sched_component *starpu_sched_component_prio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_prio_data *prio_data) STARPU_ATTRIBUTE_MALLOC;
			
 
				 int starpu_sched_component_is_prio(struct starpu_sched_component *component);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping Work-Stealing Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   return a component that perform a work stealing scheduling. Tasks are pushed in a round robin way. estimated_end return the average of expected length of fifos, starting at the average of the expected_end of his children. When a worker have to steal a task, it steal a task in a round robin way, and get the last pushed task of the higher priority.
			
 
				+*/
			
 
				 struct starpu_sched_component *starpu_sched_component_work_stealing_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component is a work stealing component
			
 
				+ */
			
 
				 int starpu_sched_component_is_work_stealing(struct starpu_sched_component *component);
			
 
				+
			
 
				+/**
			
 
				+   undefined if there is no work stealing component in the scheduler. If any, \p task is pushed in a default way if the caller is the application, and in the caller's fifo if its a worker.
			
 
				+*/
			
 
				 int starpu_sched_tree_work_stealing_push_task(struct starpu_task *task);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping Random Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   create a component that perform a random scheduling
			
 
				+*/
			
 
				 struct starpu_sched_component *starpu_sched_component_random_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component is a random component
			
 
				+*/
			
 
				 int starpu_sched_component_is_random(struct starpu_sched_component *);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping Eager Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				 struct starpu_sched_component *starpu_sched_component_eager_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				 int starpu_sched_component_is_eager(struct starpu_sched_component *);
			
 
				 
			
 
				+/**
			
 
				+   @name Resource-mapping Eager-Calibration Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				 struct starpu_sched_component *starpu_sched_component_eager_calibration_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				 int starpu_sched_component_is_eager_calibration(struct starpu_sched_component *);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping MCT Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				 struct starpu_sched_component_mct_data
			
 
				 {
			
 
				 	double alpha;
			
@@ -176,14 +516,48 @@ struct starpu_sched_component_mct_data
 
				 	double _gamma;
			
 
				 	double idle_power;
			
 
				 };
			
 
				+
			
 
				+/**
			
 
				+   create a component with mct_data paremeters. the mct component doesnt
			
 
				+   do anything but pushing tasks on no_perf_model_component and
			
 
				+   calibrating_component
			
 
				+*/
			
 
				 struct starpu_sched_component *starpu_sched_component_mct_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				 int starpu_sched_component_is_mct(struct starpu_sched_component *component);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping Heft Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				 struct starpu_sched_component *starpu_sched_component_heft_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data) STARPU_ATTRIBUTE_MALLOC;
			
 
				 int starpu_sched_component_is_heft(struct starpu_sched_component *component);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Special-purpose Best_Implementation Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Select the implementation that offer the shortest computation length for the first worker that can execute the task.
			
 
				+   Or an implementation that need to be calibrated.
			
 
				+   Also set starpu_task::predicted and starpu_task::predicted_transfer for memory component of the first suitable workerid.
			
 
				+   If starpu_sched_component::push method is called and starpu_sched_component::nchild > 1 the result is undefined.
			
 
				+*/
			
 
				 struct starpu_sched_component *starpu_sched_component_best_implementation_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Special-purpose Perfmodel_Select Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				 struct starpu_sched_component_perfmodel_select_data
			
 
				 {
			
 
				 	struct starpu_sched_component *calibrator_component;
			
@@ -193,46 +567,168 @@ struct starpu_sched_component_perfmodel_select_data
 
				 struct starpu_sched_component *starpu_sched_component_perfmodel_select_create(struct starpu_sched_tree *tree, struct starpu_sched_component_perfmodel_select_data *perfmodel_select_data) STARPU_ATTRIBUTE_MALLOC;
			
 
				 int starpu_sched_component_is_perfmodel_select(struct starpu_sched_component *component);
			
 
				 
			
 
				-void starpu_initialize_prio_center_policy(unsigned sched_ctx_id);
			
 
				+/** @} */
			
 
				 
			
 
				+/**
			
 
				+   @name Recipe Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   parameters for starpu_sched_component_composed_component_create
			
 
				+*/
			
 
				 struct starpu_sched_component_composed_recipe;
			
 
				+
			
 
				+/**
			
 
				+   return an empty recipe for a composed component, it should not be used without modification
			
 
				+*/
			
 
				 struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create(void) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   return a recipe to build a composed component with a \p create_component
			
 
				+*/
			
 
				 struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create_singleton(struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   add \p create_component under all previous components in recipe
			
 
				+*/
			
 
				 void starpu_sched_component_composed_recipe_add(struct starpu_sched_component_composed_recipe *recipe, struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg);
			
 
				+
			
 
				+/**
			
 
				+   destroy composed_sched_component, this should be done after starpu_sched_component_composed_component_create was called
			
 
				+*/
			
 
				 void starpu_sched_component_composed_recipe_destroy(struct starpu_sched_component_composed_recipe *);
			
 
				+
			
 
				+/**
			
 
				+   create a component that behave as all component of recipe where linked. Except that you cant use starpu_sched_component_is_foo function
			
 
				+   if recipe contain a single create_foo arg_foo pair, create_foo(arg_foo) is returned instead of a composed component
			
 
				+*/
			
 
				 struct starpu_sched_component *starpu_sched_component_composed_component_create(struct starpu_sched_tree *tree, struct starpu_sched_component_composed_recipe *recipe) STARPU_ATTRIBUTE_MALLOC;
			
 
				 
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				+/**
			
 
				+   Define how build a scheduler according to topology. Each level (except for hwloc_machine_composed_sched_component) can be <c>NULL</c>, then
			
 
				+   the level is just skipped. Bugs everywhere, do not rely on.
			
 
				+*/
			
 
				 struct starpu_sched_component_specs
			
 
				 {
			
 
				+	/**
			
 
				+	   the composed component to put on the top of the scheduler
			
 
				+	   this member must not be <c>NULL</c> as it is the root of the topology
			
 
				+	*/
			
 
				 	struct starpu_sched_component_composed_recipe *hwloc_machine_composed_sched_component;
			
 
				+	/**
			
 
				+	   the composed component to put for each memory component
			
 
				+	*/
			
 
				 	struct starpu_sched_component_composed_recipe *hwloc_component_composed_sched_component;
			
 
				+	/**
			
 
				+	   the composed component to put for each socket
			
 
				+	*/
			
 
				 	struct starpu_sched_component_composed_recipe *hwloc_socket_composed_sched_component;
			
 
				+	/**
			
 
				+	   the composed component to put for each cache
			
 
				+	*/
			
 
				 	struct starpu_sched_component_composed_recipe *hwloc_cache_composed_sched_component;
			
 
				 
			
 
				+	/**
			
 
				+	   a function that return a starpu_sched_component_composed_recipe to put on top of a worker of type \p archtype.
			
 
				+	   <c>NULL</c> is a valid return value, then no component will be added on top
			
 
				+	*/
			
 
				 	struct starpu_sched_component_composed_recipe *(*worker_composed_sched_component)(enum starpu_worker_archtype archtype);
			
 
				+	/**
			
 
				+	   this flag is a dirty hack because of the poor expressivity of this interface. As example, if you want to build
			
 
				+	   a heft component with a fifo component per numa component, and you also have GPUs, if this flag is set, GPUs will share those fifos.
			
 
				+	   If this flag is not set, a new fifo will be built for each of them (if they have the same starpu_perf_arch and the same
			
 
				+	   numa component it will be shared. it indicates if heterogenous workers should be brothers or cousins, as example, if a gpu and a cpu should share or not there numa node
			
 
				+	*/
			
 
				 	int mix_heterogeneous_workers;
			
 
				 };
			
 
				 
			
 
				+
			
 
				+/**
			
 
				+   build a scheduler for \p sched_ctx_id according to \p s and the hwloc topology of the machine.
			
 
				+*/
			
 
				 struct starpu_sched_tree *starpu_sched_component_make_scheduler(unsigned sched_ctx_id, struct starpu_sched_component_specs s);
			
 
				 #endif /* STARPU_HAVE_HWLOC */
			
 
				 
			
 
				+/**
			
 
				+   @name Basic API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				 #define STARPU_SCHED_SIMPLE_DECIDE_MASK		(3<<0)
			
 
				+
			
 
				+/**
			
 
				+   Request to create downstream queues per worker, i.e. the scheduling decision-making component will choose exactly which workers tasks should got to.
			
 
				+*/
			
 
				 #define STARPU_SCHED_SIMPLE_DECIDE_WORKERS	(1<<0)
			
 
				+
			
 
				+/**
			
 
				+   Request to create downstream queues per memory nodes, i.e. the scheduling decision-making component will choose which memory node tasks will go to.
			
 
				+*/
			
 
				 #define STARPU_SCHED_SIMPLE_DECIDE_MEMNODES	(2<<0)
			
 
				+
			
 
				+/**
			
 
				+   Request to create downstream queues per computation arch, i.e. the scheduling decision-making component will choose whether tasks go to CPUs, or CUDA, or OpenCL, etc.
			
 
				+*/
			
 
				 #define STARPU_SCHED_SIMPLE_DECIDE_ARCHS	(3<<0)
			
 
				 
			
 
				+/**
			
 
				+   Request to add a perfmodel selector above the scheduling decision-making component. That way, only tasks with a calibrated performance model will be given to the component, other tasks will go to an eager branch that will distributed tasks so that their performance models will get calibrated.
			
 
				+   In other words, this is needed when using a component which needs performance models for tasks.
			
 
				+*/
			
 
				 #define STARPU_SCHED_SIMPLE_PERFMODEL		(1<<4)
			
 
				+
			
 
				+/**
			
 
				+   Request that a component be added just above workers, that chooses the best task implementation.
			
 
				+*/
			
 
				 #define STARPU_SCHED_SIMPLE_IMPL		(1<<5)
			
 
				+
			
 
				+/**
			
 
				+   Request to create a fifo above the scheduling decision-making component, otherwise tasks will be pushed directly to the component.
			
 
				+
			
 
				+   This is useful to store tasks if there is a fifo below which limits the number of tasks to be scheduld in advance. The scheduling decision-making component can also store tasks itself, in which case this flag is not useful.
			
 
				+*/
			
 
				 #define STARPU_SCHED_SIMPLE_FIFO_ABOVE		(1<<6)
			
 
				+
			
 
				+/**
			
 
				+   Request that the fifo above be sorted by priorities
			
 
				+*/
			
 
				 #define STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO	(1<<7)
			
 
				+
			
 
				+/**
			
 
				+   Request to create fifos below the scheduling decision-making component, otherwise tasks will be pulled directly from workers.
			
 
				+
			
 
				+   This is useful to be able to schedule a (tunable) small number of tasks in advance only.
			
 
				+*/
			
 
				 #define STARPU_SCHED_SIMPLE_FIFOS_BELOW		(1<<8)
			
 
				+
			
 
				+/**
			
 
				+   Request that the fifos below be sorted by priorities
			
 
				+*/
			
 
				 #define STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO	(1<<9)
			
 
				+
			
 
				+/**
			
 
				+   Request that work between workers using the same fifo below be distributed using a work stealing component.
			
 
				+*/
			
 
				 #define STARPU_SCHED_SIMPLE_WS_BELOW		(1<<10)
			
 
				+
			
 
				+/**
			
 
				+   Request to not only choose between simple workers, but also choose between combined workers.
			
 
				+*/
			
 
				 #define STARPU_SCHED_SIMPLE_COMBINED_WORKERS	(1<<11)
			
 
				 
			
 
				+/**
			
 
				+   Create a simple modular scheduler tree around a scheduling decision-making
			
 
				+   component \p component. The details of what should be built around \p component
			
 
				+   is described by \p flags. The different STARPU_SCHED_SIMPL_DECIDE_* flags are
			
 
				+   mutually exclusive. \p data is passed to the \p create_decision_component
			
 
				+   function when creating the decision component.
			
 
				+*/
			
 
				 void starpu_sched_component_initialize_simple_scheduler(starpu_sched_component_create_t create_decision_component, void *data, unsigned flags, unsigned sched_ctx_id);
			
 
				 
			
 
				+/** @} */
			
 
				+
			
 
				 #define STARPU_COMPONENT_MUTEX_LOCK(m) \
			
 
				 do \
			
 
				 { \