| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841 | 
							- \input texinfo @c -*-texinfo-*-
 
- @c %**start of header
 
- @setfilename starpu.info
 
- @settitle StarPU
 
- @c %**end of header
 
- @setchapternewpage odd
 
- @titlepage
 
- @title StarPU
 
- @page
 
- @vskip 0pt plus 1filll
 
- @comment For the @value{version-GCC} Version*
 
- @end titlepage
 
- @summarycontents
 
- @contents
 
- @page
 
- @node Top
 
- @top Preface
 
- @cindex Preface
 
- This manual documents the usage of StarPU.
 
- @comment
 
- @comment  When you add a new menu item, please keep the right hand
 
- @comment  aligned to the same column.  Do not use tabs.  This provides
 
- @comment  better formatting.
 
- @comment
 
- @menu
 
- * Introduction::          A basic introduction to using StarPU
 
- * Installing StarPU::     How to configure, build and install StarPU
 
- * Configuration options:: Configurations options
 
- * Environment variables:: Environment variables used by StarPU
 
- * StarPU API::            The API to use StarPU
 
- * Basic Examples::        Basic examples of the use of StarPU
 
- * Advanced Topics::       Advanced use of StarPU
 
- @end menu
 
- @c ---------------------------------------------------------------------
 
- @c Introduction to StarPU
 
- @c ---------------------------------------------------------------------
 
- @node Introduction
 
- @chapter Introduction to StarPU
 
- @menu
 
- * Motivation::             Why StarPU ?
 
- * StarPU in a Nutshell::   The Fundamentals of StarPU
 
- @end menu
 
- @node Motivation
 
- @section Motivation
 
- @c complex machines with heterogeneous cores/devices
 
- The use of specialized hardware such as accelerators or coprocessors offers an
 
- interesting approach to overcome the physical limits encountered by processor
 
- architects. As a result, many machines are now equipped with one or several
 
- accelerators (e.g. a GPU), in addition to the usual processor(s). While a lot of
 
- efforts have been devoted to offload computation onto such accelerators, very
 
- little attention as been paid to portability concerns on the one hand, and to the
 
- possibility of having heterogeneous accelerators and processors to interact on the other hand.
 
- StarPU is a runtime system that offers support for heterogeneous multicore
 
- architectures, it not only offers a unified view of the computational resources
 
- (i.e. CPUs and accelerators at the same time), but it also takes care of
 
- efficiently mapping and executing tasks onto an heterogeneous machine while
 
- transparently handling low-level issues in a portable fashion.
 
- @c this leads to a complicated distributed memory design
 
- @c which is not (easily) manageable by hand
 
- @c added value/benefits of StarPU
 
- @c   - portability
 
- @c   - scheduling, perf. portability
 
- @node StarPU in a Nutshell
 
- @section StarPU in a Nutshell
 
- From a programming point of view, StarPU is not a new language but a library
 
- that executes tasks explicitly submitted by the application.  The data that a
 
- task manipulates are automatically transferred onto the accelerator so that the
 
- programmer does not have to take care of complex data movements.  StarPU also
 
- takes particular care of scheduling those tasks efficiently and allows
 
- scheduling experts to implement custom scheduling policies in a portable
 
- fashion.
 
- @c explain the notion of codelet and task (i.e. g(A, B)
 
- @subsection Codelet and Tasks
 
- One of StarPU primary data structure is the @b{codelet}. A codelet describes a
 
- computational kernel that can possibly be implemented on multiple architectures
 
- such as a CPU, a CUDA device or a Cell's SPU.
 
- @c TODO insert illustration f : f_spu, f_cpu, ...
 
- Another important data structure is the @b{task}. Executing a StarPU task
 
- consists in applying a codelet on a data set, on one of the architectures on
 
- which the codelet is implemented. In addition to the codelet that a task
 
- implements, it also describes which data are accessed, and how they are
 
- accessed during the computation (read and/or write).
 
- StarPU tasks are asynchronous: submitting a task to StarPU is a non-blocking
 
- operation. The task structure can also specify a @b{callback} function that is
 
- called once StarPU has properly executed the task. It also contains optional
 
- fields that the application may use to give hints to the scheduler (such as
 
- priority levels).
 
- A task may be identified by a unique 64-bit number which we refer as a @b{tag}.
 
- Task dependencies can be enforced either by the means of callback functions, or
 
- by expressing dependencies between tags.
 
- @c TODO insert illustration f(Ar, Brw, Cr) + ..
 
- @c DSM
 
- @subsection StarPU Data Management Library
 
- Because StarPU schedules tasks at runtime, data transfers have to be
 
- done automatically and ``just-in-time'' between processing units,
 
- relieving the application programmer from explicit data transfers.
 
- Moreover, to avoid unnecessary transfers, StarPU keeps data
 
- where it was last needed, even if was modified there, and it
 
- allows multiple copies of the same data to reside at the same time on
 
- several processing units as long as it is not modified.
 
- @c ---------------------------------------------------------------------
 
- @c Installing StarPU
 
- @c ---------------------------------------------------------------------
 
- @node Installing StarPU
 
- @chapter Installing StarPU
 
- @menu
 
- * Configuration of StarPU::
 
- * Building and Installing StarPU::
 
- @end menu
 
- StarPU can be built and installed by the standard means of the GNU
 
- autotools. The following chapter is intended to briefly remind how these tools
 
- can be used to install StarPU.
 
- @node Configuration of StarPU
 
- @section Configuration of StarPU
 
- @menu
 
- * Generating Makefiles and configuration scripts::
 
- * Configuring StarPU::
 
- @end menu
 
- @node Generating Makefiles and configuration scripts
 
- @subsection Generating Makefiles and configuration scripts
 
- This step is not necessary when using the tarball releases of StarPU.  If you
 
- are using the source code from the svn repository, you first need to generate
 
- the configure scripts and the Makefiles.
 
- @example
 
- $ autoreconf -vfi
 
- @end example
 
- @node Configuring StarPU
 
- @subsection Configuring StarPU
 
- @example
 
- $ ./configure
 
- @end example
 
- Details about options that are useful to give to @code{./configure} are given in
 
- @ref{Configuration options}.
 
- @node Building and Installing StarPU
 
- @section Building and Installing StarPU
 
- @menu
 
- * Building::
 
- * Sanity Checks::
 
- * Installing::
 
- * pkg-config configuration::
 
- @end menu
 
- @node Building
 
- @subsection Building
 
- @example
 
- $ make
 
- @end example
 
- @node Sanity Checks
 
- @subsection Sanity Checks
 
- In order to make sure that StarPU is working properly on the system, it is also
 
- possible to run a test suite.
 
- @example
 
- $ make check
 
- @end example
 
- @node Installing
 
- @subsection Installing
 
- In order to install StarPU at the location that was specified during
 
- configuration:
 
- @example
 
- $ make install
 
- @end example
 
- @node pkg-config configuration
 
- @subsection pkg-config configuration
 
- It is possible that compiling and linking an application against StarPU
 
- requires to use specific flags or libraries (for instance @code{CUDA} or
 
- @code{libspe2}). To this end, it is possible to use the @code{pkg-config} tool.
 
- If StarPU was not installed at some standard location, the path of StarPU's
 
- library must be specified in the @code{PKG_CONFIG_PATH} environment variable so
 
- that @code{pkg-config} can find it. For example if StarPU was installed in
 
- @code{$prefix_dir}:
 
- @example
 
- $ PKG_CONFIG_PATH = $PKG_CONFIG_PATH:$prefix_dir/lib/pkgconfig
 
- @end example
 
- The flags required to compile or link against StarPU are then
 
- accessible with the following commands:
 
- @example
 
- $ pkg-config --cflags libstarpu  # options for the compiler
 
- $ pkg-config --libs libstarpu    # options for the linker
 
- @end example
 
- @c ---------------------------------------------------------------------
 
- @c Configuration options
 
- @c ---------------------------------------------------------------------
 
- @node Configuration options
 
- @chapter Configuration options
 
- @table @asis
 
- @item @code{--disable-cpu}
 
- Disable the use of CPUs of the machine. Only GPUs etc. will be used.
 
- @item @code{--enable-maxcudadev=<number>}
 
- Defines the maximum number of CUDA devices that StarPU will support, then
 
- available as the STARPU_MAXCUDADEVS macro.
 
- @item @code{--disable-cuda}
 
- Disable the use of CUDA, even if the SDK is detected.
 
- @item @code{--enable-maxopencldev=<number>}
 
- Defines the maximum number of OpenCL devices that StarPU will support, then
 
- available as the STARPU_MAXOPENCLDEVS macro.
 
- @item @code{--disable-opencl}
 
- Disable the use of OpenCL, even if the SDK is detected.
 
- @item @code{--enable-gordon}
 
- Enable the use of the Gordon runtime for Cell SPUs.
 
- @c TODO: rather default to enabled when detected
 
- @item @code{--enable-debug}
 
- Enable debugging messages.
 
- @item @code{--enable-fast}
 
- Do not enforce assertions, saves a lot of time spent to compute them otherwise.
 
- @item @code{--enable-verbose}
 
- Augment the verbosity of the debugging messages.
 
- @item @code{--enable-coverage}
 
- Enable flags for the coverage tool.
 
- @item @code{--enable-perf-debug}
 
- Enable performance debugging.
 
- @item @code{--enable-model-debug}
 
- Enable performance model debugging.
 
- @item @code{--enable-stats}
 
- Enable statistics.
 
- @item @code{--enable-maxbuffers=<nbuffers>}
 
- Define the maximum number of buffers that tasks will be able to take as parameters, then available as the STARPU_NMAXBUFS macro.
 
- @item @code{--disable-priority}
 
- Disable taking priorities into account in scheduling decisions. Mostly for
 
- comparison purposes.
 
- @item @code{--enable-allocation-cache}
 
- Enable the use of a data allocation cache to avoid the cost of it with
 
- CUDA. Still experimental.
 
- @item @code{--enable-opengl-render}
 
- Enable the use of OpenGL for the rendering of some examples.
 
- @c TODO: rather default to enabled when detected
 
- @item @code{--enable-blas-lib=<name>}
 
- Specify the blas library to be used by some of the examples. The
 
- library has to be 'atlas' or 'goto'.
 
- @item @code{--with-cuda-dir=<path>}
 
- Specify the location of the CUDA SDK resides. This directory should notably contain
 
- @code{include/cuda.h}.
 
- @item @code{--with-magma=<path>}
 
- Specify where magma is installed.
 
- @item @code{--with-opencl-dir=<path>}
 
- Specify the location of the OpenCL SDK. This directory should notably contain
 
- @code{include/CL/cl.h}.
 
- @item @code{--with-gordon-dir=<path>}
 
- Specify the location of the Gordon SDK.
 
- @item @code{--with-fxt=<path>}
 
- Specify the location of FxT (for generating traces and rendering them
 
- using ViTE). This directory should notably contain
 
- @code{include/fxt/fxt.h}.
 
- @item @code{--with-perf-model-dir=<dir>}
 
- Specify where performance models should be stored (instead of defaulting to the
 
- current user's home).
 
- @item @code{--with-mpicc=<path to mpicc>}
 
- Specify the location of the @code{mpicc} compiler to be used for starpumpi.
 
- @c TODO: also just use AC_PROG
 
- @item @code{--with-mpi}
 
- Enable building libstarpumpi.
 
- @c TODO: rather just use the availability of mpicc instead of a second option
 
- @item @code{--with-goto-dir=<dir>}
 
- Specify the location of GotoBLAS.
 
- @item @code{--with-atlas-dir=<dir>}
 
- Specify the location of ATLAS. This directory should notably contain
 
- @code{include/cblas.h}.
 
- @end table
 
- @c ---------------------------------------------------------------------
 
- @c Environment variables
 
- @c ---------------------------------------------------------------------
 
- @node Environment variables
 
- @chapter Environment variables
 
- @menu
 
- * Workers::     Configuring workers
 
- * Scheduling::  Configuring the Scheduling engine
 
- * Misc::        Miscellaneous and debug
 
- @end menu
 
- Note: the values given in @code{starpu_conf} structure passed when
 
- calling @code{starpu_init} will override the values of the environment
 
- variables.
 
- @node Workers
 
- @section Configuring workers
 
- @menu
 
- * STARPU_NCPUS     :: Number of CPU workers
 
- * STARPU_NCUDA     :: Number of CUDA workers
 
- * STARPU_NOPENCL   :: Number of OpenCL workers
 
- * STARPU_NGORDON   :: Number of SPU workers (Cell)
 
- * STARPU_WORKERS_CPUID    :: Bind workers to specific CPUs
 
- * STARPU_WORKERS_CUDAID   :: Select specific CUDA devices
 
- * STARPU_WORKERS_OPENCLID :: Select specific OpenCL devices
 
- @end menu
 
- @node STARPU_NCPUS
 
- @subsection @code{STARPU_NCPUS} -- Number of CPU workers
 
- @table @asis
 
- @item @emph{Description}:
 
- Specify the maximum number of CPU workers. Note that StarPU will not allocate
 
- more CPUs than there are physical CPUs, and that some CPUs are used to control
 
- the accelerators.
 
- @end table
 
- @node STARPU_NCUDA
 
- @subsection @code{STARPU_NCUDA} -- Number of CUDA workers
 
- @table @asis
 
- @item @emph{Description}:
 
- Specify the maximum number of CUDA devices that StarPU can use. If
 
- @code{STARPU_NCUDA} is lower than the number of physical devices, it is
 
- possible to select which CUDA devices should be used by the means of the
 
- @code{STARPU_WORKERS_CUDAID} environment variable.
 
- @end table
 
- @node STARPU_NOPENCL
 
- @subsection @code{STARPU_NOPENCL} -- Number of OpenCL workers
 
- @table @asis
 
- @item @emph{Description}:
 
- OpenCL equivalent of the @code{STARPU_NCUDA} environment variable.
 
- @end table
 
- @node STARPU_NGORDON
 
- @subsection @code{STARPU_NGORDON} -- Number of SPU workers (Cell)
 
- @table @asis
 
- @item @emph{Description}:
 
- Specify the maximum number of SPUs that StarPU can use.
 
- @end table
 
- @node STARPU_WORKERS_CPUID
 
- @subsection @code{STARPU_WORKERS_CPUID} -- Bind workers to specific CPUs
 
- @table @asis
 
- @item @emph{Description}:
 
- Passing an array of integers (starting from 0) in @code{STARPU_WORKERS_CPUID}
 
- specifies on which logical CPU the different workers should be
 
- bound. For instance, if @code{STARPU_WORKERS_CPUID = "1 3 0 2"}, the first
 
- worker will be bound to logical CPU #1, the second CPU worker will be bound to
 
- logical CPU #3 and so on.  Note that the logical ordering of the CPUs is either
 
- determined by the OS, or provided by the @code{hwloc} library in case it is
 
- available.
 
- Note that the first workers correspond to the CUDA workers, then come the
 
- OpenCL and the SPU, and finally the CPU workers. For example if
 
- we have @code{STARPU_NCUDA=1}, @code{STARPU_NOPENCL=1}, @code{STARPU_NCPUS=2}
 
- and @code{STARPU_WORKERS_CPUID = "0 2 1 3"}, the CUDA device will be controlled
 
- by logical CPU #0, the OpenCL device will be controlled by logical CPU #2, and
 
- the logical CPUs #1 and #3 will be used by the CPU workers.
 
- If the number of workers is larger than the array given in
 
- @code{STARPU_WORKERS_CPUID}, the workers are bound to the logical CPUs in a
 
- round-robin fashion: if @code{STARPU_WORKERS_CPUID = "0 1"}, the first and the
 
- third (resp. second and fourth) workers will be put on CPU #0 (resp. CPU #1).
 
- @end table
 
- @node STARPU_WORKERS_CUDAID
 
- @subsection @code{STARPU_WORKERS_CUDAID} -- Select specific CUDA devices
 
- @table @asis
 
- @item @emph{Description}:
 
- Similarly to the @code{STARPU_WORKERS_CPUID} environment variable, it is
 
- possible to select which CUDA devices should be used by StarPU. On a machine
 
- equipped with 4 GPUs, setting @code{STARPU_WORKERS_CUDAID = "1 3"} and
 
- @code{STARPU_NCUDA=2} specifies that 2 CUDA workers should be created, and that
 
- they should use CUDA devices #1 and #3 (the logical ordering of the devices is
 
- the one reported by CUDA).
 
- @end table
 
- @node STARPU_WORKERS_OPENCLID
 
- @subsection @code{STARPU_WORKERS_OPENCLID} -- Select specific OpenCL devices
 
- @table @asis
 
- @item @emph{Description}:
 
- OpenCL equivalent of the @code{STARPU_WORKERS_CUDAID} environment variable.
 
- @end table
 
- @node Scheduling
 
- @section Configuring the Scheduling engine
 
- @menu
 
- * STARPU_SCHED     :: Scheduling policy
 
- * STARPU_CALIBRATE :: Calibrate performance models
 
- * STARPU_PREFETCH  :: Use data prefetch
 
- * STARPU_SCHED_ALPHA  :: Computation factor
 
- * STARPU_SCHED_BETA  :: Communication factor
 
- @end menu
 
- @node STARPU_SCHED
 
- @subsection @code{STARPU_SCHED} -- Scheduling policy
 
- @table @asis
 
- @item @emph{Description}:
 
- This chooses between the different scheduling policies proposed by StarPU: work
 
- random, stealing, greedy, with performance models, etc.
 
- Use @code{STARPU_SCHED=help} to get the list of available schedulers.
 
- @end table
 
- @node STARPU_CALIBRATE
 
- @subsection @code{STARPU_CALIBRATE} -- Calibrate performance models
 
- @table @asis
 
- @item @emph{Description}:
 
- If this variable is set to 1, the performance models are calibrated during
 
- the execution. If it is set to 2, the previous values are dropped to restart
 
- calibration from scratch.
 
- Note: this currently only applies to dm and dmda scheduling policies.
 
- @end table
 
- @node STARPU_PREFETCH
 
- @subsection @code{STARPU_PREFETCH} -- Use data prefetch
 
- @table @asis
 
- @item @emph{Description}:
 
- If this variable is set, data prefetching will be enabled, that is when a task is
 
- scheduled to be executed e.g. on a GPU, StarPU will request an asynchronous
 
- transfer in advance, so that data is already present on the GPU when the task
 
- starts. As a result, computation and data transfers are overlapped.
 
- @end table
 
- @node STARPU_SCHED_ALPHA
 
- @subsection @code{STARPU_SCHED_ALPHA} -- Computation factor
 
- @table @asis
 
- @item @emph{Description}:
 
- To estimate the cost of a task StarPU takes into account the estimated
 
- computation time (obtained thanks to performance models). The alpha factor is
 
- the coefficient to be applied to it before adding it to the communication part.
 
- @end table
 
- @node STARPU_SCHED_BETA
 
- @subsection @code{STARPU_SCHED_BETA} -- Communication factor
 
- @table @asis
 
- @item @emph{Description}:
 
- To estimate the cost of a task StarPU takes into account the estimated
 
- data transfer time (obtained thanks to performance models). The beta factor is
 
- the coefficient to be applied to it before adding it to the computation part.
 
- @end table
 
- @node Misc
 
- @section Miscellaneous and debug
 
- @menu
 
- * STARPU_LOGFILENAME  :: Select debug file name
 
- @end menu
 
- @node STARPU_LOGFILENAME
 
- @subsection @code{STARPU_LOGFILENAME} -- Select debug file name
 
- @table @asis
 
- @item @emph{Description}:
 
- This variable specify in which file the debugging output should be saved to.
 
- @end table
 
- @c ---------------------------------------------------------------------
 
- @c StarPU API
 
- @c ---------------------------------------------------------------------
 
- @node StarPU API
 
- @chapter StarPU API
 
- @menu
 
- * Initialization and Termination::   Initialization and Termination methods
 
- * Workers' Properties::              Methods to enumerate workers' properties
 
- * Data Library::                     Methods to manipulate data
 
- * Codelets and Tasks::               Methods to construct tasks
 
- * Tags::                             Task dependencies
 
- * CUDA extensions::                  CUDA extensions
 
- * OpenCL extensions::                OpenCL extensions
 
- * Cell extensions::                  Cell extensions
 
- * Miscellaneous::                    Miscellaneous helpers
 
- @end menu
 
- @node Initialization and Termination
 
- @section Initialization and Termination
 
- @menu
 
- * starpu_init::            Initialize StarPU
 
- * struct starpu_conf::     StarPU runtime configuration
 
- * starpu_shutdown::        Terminate StarPU
 
- @end menu
 
- @node starpu_init
 
- @subsection @code{starpu_init} -- Initialize StarPU
 
- @table @asis
 
- @item @emph{Description}:
 
- This is StarPU initialization method, which must be called prior to any other
 
- StarPU call.  It is possible to specify StarPU's configuration (e.g. scheduling
 
- policy, number of cores, ...) by passing a non-null argument. Default
 
- configuration is used if the passed argument is @code{NULL}.
 
- @item @emph{Return value}:
 
- Upon successful completion, this function returns 0. Otherwise, @code{-ENODEV}
 
- indicates that no worker was available (so that StarPU was not initialized).
 
- @item @emph{Prototype}:
 
- @code{int starpu_init(struct starpu_conf *conf);}
 
- @end table
 
- @node struct starpu_conf
 
- @subsection @code{struct starpu_conf} -- StarPU runtime configuration
 
- @table @asis
 
- @item @emph{Description}:
 
- This structure is passed to the @code{starpu_init} function in order
 
- to configure StarPU.
 
- When the default value is used, StarPU automatically selects the number
 
- of processing units and takes the default scheduling policy. This parameter
 
- overwrites the equivalent environment variables.
 
- @item @emph{Fields}:
 
- @table @asis 
 
- @item @code{sched_policy} (default = NULL):
 
- This is the name of the scheduling policy. This can also be specified with the
 
- @code{STARPU_SCHED} environment variable.
 
- @item @code{ncpus} (default = -1):
 
- This is the maximum number of CPU cores that StarPU can use. This can also be
 
- specified with the @code{STARPU_NCPUS} environment variable.
 
- @item @code{ncuda} (default = -1):
 
- This is the maximum number of CUDA devices that StarPU can use. This can also be
 
- specified with the @code{STARPU_NCUDA} environment variable.
 
- @item @code{nopencl} (default = -1):
 
- This is the maximum number of OpenCL devices that StarPU can use. This can also be
 
- specified with the @code{STARPU_NOPENCL} environment variable.
 
- @item @code{nspus} (default = -1):
 
- This is the maximum number of Cell SPUs that StarPU can use. This can also be
 
- specified with the @code{STARPU_NGORDON} environment variable.
 
- @item @code{calibrate} (default = 0):
 
- If this flag is set, StarPU will calibrate the performance models when
 
- executing tasks. This can also be specified with the @code{STARPU_CALIBRATE}
 
- environment variable.
 
- @end table
 
- @end table
 
- @node starpu_shutdown
 
- @subsection @code{starpu_shutdown} -- Terminate StarPU
 
- @table @asis
 
- @item @emph{Description}:
 
- This is StarPU termination method. It must be called at the end of the
 
- application: statistics and other post-mortem debugging information are not
 
- guaranteed to be available until this method has been called.
 
- @item @emph{Prototype}:
 
- @code{void starpu_shutdown(void);}
 
- @end table
 
- @node Workers' Properties
 
- @section Workers' Properties
 
- @menu
 
- * starpu_worker_get_count::        Get the number of processing units
 
- * starpu_cpu_worker_get_count::    Get the number of CPU controlled by StarPU
 
- * starpu_cuda_worker_get_count::   Get the number of CUDA devices controlled by StarPU
 
- * starpu_opencl_worker_get_count:: Get the number of OpenCL devices controlled by StarPU
 
- * starpu_spu_worker_get_count::    Get the number of Cell SPUs controlled by StarPU
 
- * starpu_worker_get_id::           Get the identifier of the current worker
 
- * starpu_worker_get_type::         Get the type of processing unit associated to a worker
 
- * starpu_worker_get_name::         Get the name of a worker
 
- @end menu
 
- @node starpu_worker_get_count
 
- @subsection @code{starpu_worker_get_count} -- Get the number of processing units
 
- @table @asis
 
- @item @emph{Description}:
 
- This function returns the number of workers (i.e. processing units executing
 
- StarPU tasks). The returned value should be at most @code{STARPU_NMAXWORKERS}.
 
- @item @emph{Prototype}:
 
- @code{unsigned starpu_worker_get_count(void);}
 
- @end table
 
- @node starpu_cpu_worker_get_count
 
- @subsection @code{starpu_cpu_worker_get_count} -- Get the number of CPU controlled by StarPU
 
- @table @asis
 
- @item @emph{Description}:
 
- This function returns the number of CPUs controlled by StarPU. The returned
 
- value should be at most @code{STARPU_NMAXCPUS}.
 
- @item @emph{Prototype}:
 
- @code{unsigned starpu_cpu_worker_get_count(void);}
 
- @end table
 
- @node starpu_cuda_worker_get_count
 
- @subsection @code{starpu_cuda_worker_get_count} -- Get the number of CUDA devices controlled by StarPU
 
- @table @asis
 
- @item @emph{Description}:
 
- This function returns the number of CUDA devices controlled by StarPU. The returned
 
- value should be at most @code{STARPU_MAXCUDADEVS}.
 
- @item @emph{Prototype}:
 
- @code{unsigned starpu_cuda_worker_get_count(void);}
 
- @end table
 
- @node starpu_opencl_worker_get_count
 
- @subsection @code{starpu_opencl_worker_get_count} -- Get the number of OpenCL devices controlled by StarPU
 
- @table @asis
 
- @item @emph{Description}:
 
- This function returns the number of OpenCL devices controlled by StarPU. The returned
 
- value should be at most @code{STARPU_MAXOPENCLDEVS}.
 
- @item @emph{Prototype}:
 
- @code{unsigned starpu_opencl_worker_get_count(void);}
 
- @end table
 
- @node starpu_spu_worker_get_count
 
- @subsection @code{starpu_spu_worker_get_count} -- Get the number of Cell SPUs controlled by StarPU
 
- @table @asis
 
- @item @emph{Description}:
 
- This function returns the number of Cell SPUs controlled by StarPU.
 
- @item @emph{Prototype}:
 
- @code{unsigned starpu_opencl_worker_get_count(void);}
 
- @end table
 
- @node starpu_worker_get_id
 
- @subsection @code{starpu_worker_get_id} -- Get the identifier of the current worker
 
- @table @asis
 
- @item @emph{Description}:
 
- This function returns the identifier of the worker associated to the calling
 
- thread. The returned value is either -1 if the current context is not a StarPU
 
- worker (i.e. when called from the application outside a task or a callback), or
 
- an integer between 0 and @code{starpu_worker_get_count() - 1}.
 
- @item @emph{Prototype}:
 
- @code{int starpu_worker_get_id(void);}
 
- @end table
 
- @node starpu_worker_get_type
 
- @subsection @code{starpu_worker_get_type} -- Get the type of processing unit associated to a worker
 
- @table @asis
 
- @item @emph{Description}:
 
- This function returns the type of worker associated to an identifier (as
 
- returned by the @code{starpu_worker_get_id} function). The returned value
 
- indicates the architecture of the worker: @code{STARPU_CPU_WORKER} for a CPU
 
- core, @code{STARPU_CUDA_WORKER} for a CUDA device,
 
- @code{STARPU_OPENCL_WORKER} for a OpenCL device, and
 
- @code{STARPU_GORDON_WORKER} for a Cell SPU. The value returned for an invalid
 
- identifier is unspecified.
 
- @item @emph{Prototype}:
 
- @code{enum starpu_archtype starpu_worker_get_type(int id);}
 
- @end table
 
- @node starpu_worker_get_name
 
- @subsection @code{starpu_worker_get_name} -- Get the name of a worker
 
- @table @asis
 
- @item @emph{Description}:
 
- StarPU associates a unique human readable string to each processing unit. This
 
- function copies at most the @code{maxlen} first bytes of the unique string
 
- associated to a worker identified by its identifier @code{id} into the
 
- @code{dst} buffer. The caller is responsible for ensuring that the @code{dst}
 
- is a valid pointer to a buffer of @code{maxlen} bytes at least. Calling this
 
- function on an invalid identifier results in an unspecified behaviour.
 
- @item @emph{Prototype}:
 
- @code{void starpu_worker_get_name(int id, char *dst, size_t maxlen);}
 
- @end table
 
- @node Data Library
 
- @section Data Library
 
- This section describes the data management facilities provided by StarPU.
 
- TODO: We show how to use existing data interfaces in [ref], but developers can
 
- design their own data interfaces if required.
 
- @menu
 
- * starpu_data_handle::  StarPU opaque data handle
 
- * void *interface::     StarPU data interface
 
- @end menu
 
- @node starpu_data_handle
 
- @subsection @code{starpu_data_handle} -- StarPU opaque data handle
 
- @table @asis 
 
- @item @emph{Description}:
 
- StarPU uses @code{starpu_data_handle} as an opaque handle to manage a piece of
 
- data. Once a piece of data has been registered to StarPU, it is associated to a
 
- @code{starpu_data_handle} which keeps track of the state of the piece of data
 
- over the entire machine, so that we can maintain data consistency and locate
 
- data replicates for instance.
 
- @end table
 
- @node void *interface
 
- @subsection @code{void *interface} -- StarPU data interface
 
- @table @asis 
 
- @item @emph{Description}:
 
- Data management is done at a high-level in StarPU: rather than accessing a mere
 
- list of contiguous buffers, the tasks may manipulate data that are described by
 
- a high-level construct which we call data interface.
 
- TODO
 
- @end table
 
- @c void starpu_data_unregister(struct starpu_data_state_t *state);
 
- @c starpu_worker_get_memory_node TODO
 
- @c 
 
- @c user interaction with the DSM
 
- @c   void starpu_data_sync_with_mem(struct starpu_data_state_t *state);
 
- @c   void starpu_notify_data_modification(struct starpu_data_state_t *state, uint32_t modifying_node);
 
- @node Codelets and Tasks
 
- @section Codelets and Tasks
 
- @menu
 
- * struct starpu_codelet::         StarPU codelet structure
 
- * struct starpu_task::            StarPU task structure
 
- * starpu_task_init::              Initialize a Task
 
- * starpu_task_create::            Allocate and Initialize a Task
 
- * starpu_task_deinit::            Release all the resources used by a Task
 
- * starpu_task_destroy::           Destroy a dynamically allocated Task
 
- * starpu_task_submit::            Submit a Task
 
- * starpu_task_wait::              Wait for the termination of a Task
 
- * starpu_task_wait_for_all::	  Wait for the termination of all Tasks
 
- @end menu
 
- @node struct starpu_codelet
 
- @subsection @code{struct starpu_codelet} -- StarPU codelet structure
 
- @table @asis 
 
- @item @emph{Description}:
 
- The codelet structure describes a kernel that is possibly implemented on
 
- various targets.
 
- @item @emph{Fields}:
 
- @table @asis
 
- @item @code{where}: 
 
- Indicates which types of processing units are able to execute the codelet.
 
- @code{STARPU_CPU|STARPU_CUDA} for instance indicates that the codelet is
 
- implemented for both CPU cores and CUDA devices while @code{STARPU_GORDON}
 
- indicates that it is only available on Cell SPUs.
 
- @item @code{cpu_func} (optional):
 
- Is a function pointer to the CPU implementation of the codelet. Its prototype
 
- must be: @code{void cpu_func(void *buffers[], void *cl_arg)}. The first
 
- argument being the array of data managed by the data management library, and
 
- the second argument is a pointer to the argument passed from the @code{cl_arg}
 
- field of the @code{starpu_task} structure.
 
- The @code{cpu_func} field is ignored if @code{STARPU_CPU} does not appear in
 
- the @code{where} field, it must be non-null otherwise.
 
- @item @code{cuda_func} (optional):
 
- Is a function pointer to the CUDA implementation of the codelet. @emph{This
 
- must be a host-function written in the CUDA runtime API}. Its prototype must
 
- be: @code{void cuda_func(void *buffers[], void *cl_arg);}. The @code{cuda_func}
 
- field is ignored if @code{STARPU_CUDA} does not appear in the @code{where}
 
- field, it must be non-null otherwise.
 
- @item @code{opencl_func} (optional):
 
- Is a function pointer to the OpenCL implementation of the codelet. Its
 
- prototype must be:
 
- @code{void opencl_func(starpu_data_interface_t *descr, void *arg);}.
 
- This pointer is ignored if @code{OPENCL} does not appear in the
 
- @code{where} field, it must be non-null otherwise.
 
- @item @code{gordon_func} (optional):
 
- This is the index of the Cell SPU implementation within the Gordon library.
 
- TODO
 
- @item @code{nbuffers}:
 
- Specifies the number of arguments taken by the codelet. These arguments are
 
- managed by the DSM and are accessed from the @code{void *buffers[]}
 
- array. The constant argument passed with the @code{cl_arg} field of the
 
- @code{starpu_task} structure is not counted in this number.  This value should
 
- not be above @code{STARPU_NMAXBUFS}.
 
- @item @code{model} (optional):
 
- This is a pointer to the performance model associated to this codelet. This
 
- optional field is ignored when null. TODO
 
- @end table
 
- @end table
 
- @node struct starpu_task
 
- @subsection @code{struct starpu_task} -- StarPU task structure
 
- @table @asis
 
- @item @emph{Description}:
 
- The @code{starpu_task} structure describes a task that can be offloaded on the various
 
- processing units managed by StarPU. It instantiates a codelet. It can either be
 
- allocated dynamically with the @code{starpu_task_create} method, or declared
 
- statically. In the latter case, the programmer has to zero the
 
- @code{starpu_task} structure and to fill the different fields properly. The
 
- indicated default values correspond to the configuration of a task allocated
 
- with @code{starpu_task_create}.
 
- @item @emph{Fields}:
 
- @table @asis
 
- @item @code{cl}:
 
- Is a pointer to the corresponding @code{starpu_codelet} data structure. This
 
- describes where the kernel should be executed, and supplies the appropriate
 
- implementations. When set to @code{NULL}, no code is executed during the tasks,
 
- such empty tasks can be useful for synchronization purposes. 
 
- @item @code{buffers}:
 
- TODO
 
- @item @code{cl_arg} (optional) (default = NULL):
 
- This pointer is passed to the codelet through the second argument
 
- of the codelet implementation (e.g. @code{cpu_func} or @code{cuda_func}).
 
- In the specific case of the Cell processor, see the @code{cl_arg_size}
 
- argument.
 
- @item @code{cl_arg_size} (optional, Cell specific):
 
- In the case of the Cell processor, the @code{cl_arg} pointer is not directly
 
- given to the SPU function. A buffer of size @code{cl_arg_size} is allocated on
 
- the SPU. This buffer is then filled with the @code{cl_arg_size} bytes starting
 
- at address @code{cl_arg}. In this case, the argument given to the SPU codelet
 
- is therefore not the @code{cl_arg} pointer, but the address of the buffer in
 
- local store (LS) instead. This field is ignored for CPU, CUDA and OpenCL
 
- codelets. 
 
- @item @code{callback_func} (optional) (default = @code{NULL}):
 
- This is a function pointer of prototype @code{void (*f)(void *)} which
 
- specifies a possible callback. If this pointer is non-null, the callback
 
- function is executed @emph{on the host} after the execution of the task. The
 
- callback is passed the value contained in the @code{callback_arg} field. No
 
- callback is executed if the field is null.
 
- @item @code{callback_arg} (optional) (default = @code{NULL}):
 
- This is the pointer passed to the callback function. This field is ignored if
 
- the @code{callback_func} is null.
 
- @item @code{use_tag} (optional) (default = 0):
 
- If set, this flag indicates that the task should be associated with the tag
 
- contained in the @code{tag_id} field. Tag allow the application to synchronize
 
- with the task and to express task dependencies easily.
 
- @item @code{tag_id}:
 
- This fields contains the tag associated to the task if the @code{use_tag} field
 
- was set, it is ignored otherwise.
 
- @item @code{synchronous}:
 
- If this flag is set, the @code{starpu_task_submit} function is blocking and
 
- returns only when the task has been executed (or if no worker is able to
 
- process the task). Otherwise, @code{starpu_task_submit} returns immediately.
 
- @item @code{priority} (optional) (default = @code{STARPU_DEFAULT_PRIO}):
 
- This field indicates a level of priority for the task. This is an integer value
 
- that must be set between @code{STARPU_MIN_PRIO} (for the least important
 
- tasks) and @code{STARPU_MAX_PRIO} (for the most important tasks) included.
 
- Default priority is @code{STARPU_DEFAULT_PRIO}.  Scheduling strategies that
 
- take priorities into account can use this parameter to take better scheduling
 
- decisions, but the scheduling policy may also ignore it.
 
- @item @code{execute_on_a_specific_worker} (default = 0):
 
- If this flag is set, StarPU will bypass the scheduler and directly affect this
 
- task to the worker specified by the @code{workerid} field.
 
- @item @code{workerid} (optional):
 
- If the @code{execute_on_a_specific_worker} field is set, this field indicates
 
- which is the identifier of the worker that should process this task (as
 
- returned by @code{starpu_worker_get_id}). This field is ignored if
 
- @code{execute_on_a_specific_worker} field is set to 0.
 
- @item @code{detach} (optional) (default = 1):
 
- If this flag is set, it is not possible to synchronize with the task
 
- by the means of @code{starpu_task_wait} later on. Internal data structures
 
- are only guaranteed to be freed once @code{starpu_task_wait} is called if the
 
- flag is not set.
 
- @item @code{destroy} (optional) (default = 1):
 
- If this flag is set, the task structure will automatically be freed, either
 
- after the execution of the callback if the task is detached, or during
 
- @code{starpu_task_wait} otherwise. If this flag is not set, dynamically
 
- allocated data structures will not be freed until @code{starpu_task_destroy} is
 
- called explicitly. Setting this flag for a statically allocated task structure
 
- will result in undefined behaviour.
 
- @end table
 
- @end table
 
- @node starpu_task_init
 
- @subsection @code{starpu_task_init} -- Initialize a Task
 
- @table @asis
 
- @item @emph{Description}:
 
- Initialize a task structure with default values. This function is implicitly
 
- called by @code{starpu_task_create}. By default, tasks initialized with
 
- @code{starpu_task_init} must be deinitialized explicitly with
 
- @code{starpu_task_deinit}. Tasks can also be initialized statically, using the
 
- constant @code{STARPU_TASK_INITIALIZER}.
 
- @item @emph{Prototype}:
 
- @code{void starpu_task_init(struct starpu_task *task);}
 
- @end table
 
- @node starpu_task_create
 
- @subsection @code{starpu_task_create} -- Allocate and Initialize a Task
 
- @table @asis
 
- @item @emph{Description}:
 
- Allocate a task structure and initialize it with default values. Tasks
 
- allocated dynamically with @code{starpu_task_create} are automatically freed when the
 
- task is terminated. If the destroy flag is explicitly unset, the resources used
 
- by the task are freed by calling
 
- @code{starpu_task_destroy}.
 
- @item @emph{Prototype}:
 
- @code{struct starpu_task *starpu_task_create(void);}
 
- @end table
 
- @node starpu_task_deinit
 
- @subsection @code{starpu_task_deinit} -- Release all the resources used by a Task
 
- @table @asis
 
- @item @emph{Description}:
 
- Release all the structures automatically allocated to execute the task. This is
 
- called automatically by @code{starpu_task_destroy}, but the task structure itself is not
 
- freed. This should be used for statically allocated tasks for instance.
 
- @item @emph{Prototype}:
 
- @code{void starpu_task_deinit(struct starpu_task *task);}
 
- @end table
 
- @node starpu_task_destroy
 
- @subsection @code{starpu_task_destroy} -- Destroy a dynamically allocated Task
 
- @table @asis
 
- @item @emph{Description}:
 
- Free the resource allocated during @code{starpu_task_create}. This function can be
 
- called automatically after the execution of a task by setting the
 
- @code{destroy} flag of the @code{starpu_task} structure (default behaviour).
 
- Calling this function on a statically allocated task results in an undefined
 
- behaviour.
 
- @item @emph{Prototype}:
 
- @code{void starpu_task_destroy(struct starpu_task *task);}
 
- @end table
 
- @node starpu_task_wait
 
- @subsection @code{starpu_task_wait} -- Wait for the termination of a Task
 
- @table @asis
 
- @item @emph{Description}:
 
- This function blocks until the task has been executed. It is not possible to
 
- synchronize with a task more than once. It is not possible to wait for
 
- synchronous or detached tasks.
 
- @item @emph{Return value}:
 
- Upon successful completion, this function returns 0. Otherwise, @code{-EINVAL}
 
- indicates that the specified task was either synchronous or detached.
 
- @item @emph{Prototype}:
 
- @code{int starpu_task_wait(struct starpu_task *task);}
 
- @end table
 
- @node starpu_task_submit
 
- @subsection @code{starpu_task_submit} -- Submit a Task
 
- @table @asis
 
- @item @emph{Description}:
 
- This function submits a task to StarPU. Calling this function does
 
- not mean that the task will be executed immediately as there can be data or task
 
- (tag) dependencies that are not fulfilled yet: StarPU will take care of
 
- scheduling this task with respect to such dependencies.
 
- This function returns immediately if the @code{synchronous} field of the
 
- @code{starpu_task} structure was set to 0, and block until the termination of
 
- the task otherwise. It is also possible to synchronize the application with
 
- asynchronous tasks by the means of tags, using the @code{starpu_tag_wait}
 
- function for instance.
 
- @item @emph{Return value}:
 
- In case of success, this function returns 0, a return value of @code{-ENODEV}
 
- means that there is no worker able to process this task (e.g. there is no GPU
 
- available and this task is only implemented for CUDA devices).
 
- @item @emph{Prototype}:
 
- @code{int starpu_task_submit(struct starpu_task *task);}
 
- @end table
 
- @node starpu_task_wait_for_all
 
- @subsection @code{starpu_task_wait_for_all} -- Wait for the termination of all Tasks
 
- @table @asis
 
- @item @emph{Description}:
 
- This function blocks until all the tasks that were submitted are terminated.
 
- @item @emph{Prototype}:
 
- @code{void starpu_task_wait_for_all(void);}
 
- @end table
 
- @c Callbacks : what can we put in callbacks ?
 
- @node Tags
 
- @section Tags
 
- @menu
 
- * starpu_tag_t::                   Task identifier
 
- * starpu_tag_declare_deps::        Declare the Dependencies of a Tag
 
- * starpu_tag_declare_deps_array::  Declare the Dependencies of a Tag
 
- * starpu_tag_wait::                Block until a Tag is terminated
 
- * starpu_tag_wait_array::          Block until a set of Tags is terminated
 
- * starpu_tag_remove::              Destroy a Tag
 
- * starpu_tag_notify_from_apps::    Feed a tag explicitly
 
- @end menu
 
- @node starpu_tag_t
 
- @subsection @code{starpu_tag_t} -- Task identifier
 
- @table @asis
 
- @item @emph{Description}:
 
- It is possible to associate a task with a unique ``tag'' and to express
 
- dependencies between tasks by the means of those tags. To do so, fill the
 
- @code{tag_id} field of the @code{starpu_task} structure with a tag number (can
 
- be arbitrary) and set the @code{use_tag} field to 1.
 
- If @code{starpu_tag_declare_deps} is called with this tag number, the task will
 
- not be started until the tasks which holds the declared dependency tags are
 
- completed.
 
- @end table
 
- @node starpu_tag_declare_deps
 
- @subsection @code{starpu_tag_declare_deps} -- Declare the Dependencies of a Tag
 
- @table @asis
 
- @item @emph{Description}:
 
- Specify the dependencies of the task identified by tag @code{id}. The first
 
- argument specifies the tag which is configured, the second argument gives the
 
- number of tag(s) on which @code{id} depends. The following arguments are the
 
- tags which have to be terminated to unlock the task.
 
- This function must be called before the associated task is submitted to StarPU
 
- with @code{starpu_task_submit}.
 
- @item @emph{Remark}
 
- Because of the variable arity of @code{starpu_tag_declare_deps}, note that the
 
- last arguments @emph{must} be of type @code{starpu_tag_t}: constant values
 
- typically need to be explicitly casted. Using the
 
- @code{starpu_tag_declare_deps_array} function avoids this hazard.
 
- @item @emph{Prototype}:
 
- @code{void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...);}
 
- @item @emph{Example}:
 
- @cartouche
 
- @example
 
- /*  Tag 0x1 depends on tags 0x32 and 0x52 */
 
- starpu_tag_declare_deps((starpu_tag_t)0x1,
 
-         2, (starpu_tag_t)0x32, (starpu_tag_t)0x52);
 
- @end example
 
- @end cartouche
 
- @end table
 
- @node starpu_tag_declare_deps_array
 
- @subsection @code{starpu_tag_declare_deps_array} -- Declare the Dependencies of a Tag
 
- @table @asis
 
- @item @emph{Description}:
 
- This function is similar to @code{starpu_tag_declare_deps}, except that its
 
- does not take a variable number of arguments but an array of tags of size
 
- @code{ndeps}.
 
- @item @emph{Prototype}:
 
- @code{void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array);}
 
- @item @emph{Example}:
 
- @cartouche
 
- @example
 
- /*  Tag 0x1 depends on tags 0x32 and 0x52 */
 
- starpu_tag_t tag_array[2] = @{0x32, 0x52@};
 
- starpu_tag_declare_deps_array((starpu_tag_t)0x1, 2, tag_array);
 
- @end example
 
- @end cartouche
 
- @end table
 
- @node starpu_tag_wait
 
- @subsection @code{starpu_tag_wait} -- Block until a Tag is terminated
 
- @table @asis
 
- @item @emph{Description}:
 
- This function blocks until the task associated to tag @code{id} has been
 
- executed. This is a blocking call which must therefore not be called within
 
- tasks or callbacks, but only from the application directly.  It is possible to
 
- synchronize with the same tag multiple times, as long as the
 
- @code{starpu_tag_remove} function is not called.  Note that it is still
 
- possible to synchronize with a tag associated to a task which @code{starpu_task}
 
- data structure was freed (e.g. if the @code{destroy} flag of the
 
- @code{starpu_task} was enabled).
 
- @item @emph{Prototype}:
 
- @code{void starpu_tag_wait(starpu_tag_t id);}
 
- @end table
 
- @node starpu_tag_wait_array
 
- @subsection @code{starpu_tag_wait_array} -- Block until a set of Tags is terminated
 
- @table @asis
 
- @item @emph{Description}:
 
- This function is similar to @code{starpu_tag_wait} except that it blocks until
 
- @emph{all} the @code{ntags} tags contained in the @code{id} array are
 
- terminated.
 
- @item @emph{Prototype}:
 
- @code{void starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id);}
 
- @end table
 
- @node starpu_tag_remove
 
- @subsection @code{starpu_tag_remove} -- Destroy a Tag
 
- @table @asis
 
- @item @emph{Description}:
 
- This function releases the resources associated to tag @code{id}. It can be
 
- called once the corresponding task has been executed and when there is
 
- no other tag that depend on this tag anymore.
 
- @item @emph{Prototype}:
 
- @code{void starpu_tag_remove(starpu_tag_t id);}
 
- @end table
 
- @node starpu_tag_notify_from_apps
 
- @subsection @code{starpu_tag_notify_from_apps} -- Feed a Tag explicitly
 
- @table @asis
 
- @item @emph{Description}:
 
- This function explicitly unlocks tag @code{id}. It may be useful in the
 
- case of applications which execute part of their computation outside StarPU
 
- tasks (e.g. third-party libraries).  It is also provided as a
 
- convenient tool for the programmer, for instance to entirely construct the task
 
- DAG before actually giving StarPU the opportunity to execute the tasks.
 
- @item @emph{Prototype}:
 
- @code{void starpu_tag_notify_from_apps(starpu_tag_t id);}
 
- @end table
 
- @node CUDA extensions
 
- @section CUDA extensions
 
- @c void starpu_data_malloc_pinned_if_possible(float **A, size_t dim);
 
- @c starpu_helper_cublas_init TODO
 
- @c starpu_helper_cublas_shutdown TODO
 
- @menu
 
- * starpu_cuda_get_local_stream::   Get current worker's CUDA stream
 
- * starpu_helper_cublas_init::      Initialize CUBLAS on every CUDA device
 
- * starpu_helper_cublas_shutdown::  Deinitialize CUBLAS on every CUDA device
 
- @end menu
 
- @node starpu_cuda_get_local_stream
 
- @subsection @code{starpu_cuda_get_local_stream} -- Get current worker's CUDA stream
 
- @table @asis
 
- @item @emph{Description}:
 
- StarPU provides a stream for every CUDA device controlled by StarPU. This
 
- function is only provided for convenience so that programmers can easily use
 
- asynchronous operations within codelets without having to create a stream by
 
- hand. Note that the application is not forced to use the stream provided by
 
- @code{starpu_cuda_get_local_stream} and may also create its own streams.
 
- @item @emph{Prototype}:
 
- @code{cudaStream_t *starpu_cuda_get_local_stream(void);}
 
- @end table
 
- @node starpu_helper_cublas_init
 
- @subsection @code{starpu_helper_cublas_init} -- Initialize CUBLAS on every CUDA device
 
- @table @asis
 
- @item @emph{Description}:
 
- The CUBLAS library must be initialized prior to any CUBLAS call. Calling
 
- @code{starpu_helper_cublas_init} will initialize CUBLAS on every CUDA device
 
- controlled by StarPU. This call blocks until CUBLAS has been properly
 
- initialized on every device.
 
- @item @emph{Prototype}:
 
- @code{void starpu_helper_cublas_init(void);}
 
- @end table
 
- @node starpu_helper_cublas_shutdown
 
- @subsection @code{starpu_helper_cublas_shutdown} -- Deinitialize CUBLAS on every CUDA device
 
- @table @asis
 
- @item @emph{Description}:
 
- This function synchronously deinitializes the CUBLAS library on every CUDA device.
 
- @item @emph{Prototype}:
 
- @code{void starpu_helper_cublas_shutdown(void);}
 
- @end table
 
- @node OpenCL extensions
 
- @section OpenCL extensions
 
- @menu
 
- * Enabling OpenCL::                 Enabling OpenCL
 
- * Compiling OpenCL codelets::       Compiling OpenCL codelets
 
- @end menu
 
- @node Enabling OpenCL
 
- @subsection Enabling OpenCL
 
- On GPU devices which can run both CUDA and OpenCL, CUDA will be
 
- enabled by default. To enable OpenCL, you need either to disable CUDA
 
- when configuring StarPU:
 
- @example
 
- $ ./configure --disable-cuda
 
- @end example
 
- or when running applications:
 
- @example
 
- $ STARPU_NCUDA=0 ./application
 
- @end example
 
- OpenCL will automatically be started on any device not yet used by
 
- CUDA. So on a machine running 4 GPUS, it is therefore possible to
 
- enable CUDA on 2 devices, and OpenCL on the 2 other devices by doing
 
- so:
 
- @example
 
- $ STARPU_NCUDA=2 ./application
 
- @end example
 
- @node Compiling OpenCL codelets
 
- @subsection Compiling OpenCL codelets
 
- TODO
 
- @node Cell extensions
 
- @section Cell extensions
 
- nothing yet.
 
- @node Miscellaneous
 
- @section Miscellaneous helpers
 
- @menu
 
- * starpu_execute_on_each_worker::   Execute a function on a subset of workers
 
- @end menu
 
- @node starpu_execute_on_each_worker
 
- @subsection @code{starpu_execute_on_each_worker} -- Execute a function on a subset of workers
 
- @table @asis
 
- @item @emph{Description}:
 
- When calling this method, the offloaded function specified by the first argument is
 
- executed by every StarPU worker that may execute the function.
 
- The second argument is passed to the offloaded function.
 
- The last argument specifies on which types of processing units the function
 
- should be executed. Similarly to the @code{where} field of the
 
- @code{starpu_codelet} structure, it is possible to specify that the function
 
- should be executed on every CUDA device and every CPU by passing
 
- @code{STARPU_CPU|STARPU_CUDA}.
 
- This function blocks until the function has been executed on every appropriate
 
- processing units, so that it may not be called from a callback function for
 
- instance.
 
- @item @emph{Prototype}:
 
- @code{void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where);}
 
- @end table
 
- @c ---------------------------------------------------------------------
 
- @c Basic Examples
 
- @c ---------------------------------------------------------------------
 
- @node Basic Examples
 
- @chapter Basic Examples
 
- @menu
 
- * Compiling and linking::        Compiling and Linking Options
 
- * Hello World::                  Submitting Tasks
 
- * Scaling a Vector::             Manipulating Data
 
- * Scaling a Vector (hybrid)::    Handling Heterogeneous Architectures
 
- @end menu
 
- @node Compiling and linking
 
- @section Compiling and linking options
 
- The Makefile could for instance contain the following lines to define which
 
- options must be given to the compiler and to the linker:
 
- @cartouche
 
- @example
 
- CFLAGS+=$$(pkg-config --cflags libstarpu)
 
- LIBS+=$$(pkg-config --libs libstarpu)
 
- @end example
 
- @end cartouche
 
- @node Hello World
 
- @section Hello World
 
- In this section, we show how to implement a simple program that submits a task to StarPU.
 
- @subsection Required Headers
 
- The @code{starpu.h} header should be included in any code using StarPU.
 
- @cartouche
 
- @example
 
- #include <starpu.h>
 
- @end example
 
- @end cartouche
 
- @subsection Defining a Codelet
 
- @cartouche
 
- @example
 
- void cpu_func(void *buffers[], void *cl_arg)
 
- @{
 
-     float *array = cl_arg;
 
-     printf("Hello world (array = @{%f, %f@} )\n", array[0], array[1]);
 
- @}
 
- starpu_codelet cl =
 
- @{
 
-     .where = STARPU_CPU,
 
-     .cpu_func = cpu_func,
 
-     .nbuffers = 0
 
- @};
 
- @end example
 
- @end cartouche
 
- A codelet is a structure that represents a computational kernel. Such a codelet
 
- may contain an implementation of the same kernel on different architectures
 
- (e.g. CUDA, Cell's SPU, x86, ...).
 
- The @code{nbuffers} field specifies the number of data buffers that are
 
- manipulated by the codelet: here the codelet does not access or modify any data
 
- that is controlled by our data management library. Note that the argument
 
- passed to the codelet (the @code{cl_arg} field of the @code{starpu_task}
 
- structure) does not count as a buffer since it is not managed by our data
 
- management library. 
 
- @c TODO need a crossref to the proper description of "where" see bla for more ...
 
- We create a codelet which may only be executed on the CPUs. The @code{where}
 
- field is a bitmask that defines where the codelet may be executed. Here, the
 
- @code{STARPU_CPU} value means that only CPUs can execute this codelet
 
- (@pxref{Codelets and Tasks} for more details on this field).
 
- When a CPU core executes a codelet, it calls the @code{cpu_func} function,
 
- which @emph{must} have the following prototype:
 
- @code{void (*cpu_func)(void *buffers[], void *cl_arg)}
 
- In this example, we can ignore the first argument of this function which gives a
 
- description of the input and output buffers (e.g. the size and the location of
 
- the matrices). The second argument is a pointer to a buffer passed as an
 
- argument to the codelet by the means of the @code{cl_arg} field of the
 
- @code{starpu_task} structure.
 
- @c TODO rewrite so that it is a little clearer ?
 
- Be aware that this may be a pointer to a
 
- @emph{copy} of the actual buffer, and not the pointer given by the programmer:
 
- if the codelet modifies this buffer, there is no guarantee that the initial
 
- buffer will be modified as well: this for instance implies that the buffer
 
- cannot be used as a synchronization medium.
 
- @subsection Submitting a Task
 
- @cartouche
 
- @example
 
- void callback_func(void *callback_arg)
 
- @{
 
-     printf("Callback function (arg %x)\n", callback_arg);
 
- @}
 
- int main(int argc, char **argv)
 
- @{
 
-     /* initialize StarPU */
 
-     starpu_init(NULL);
 
-     struct starpu_task *task = starpu_task_create();
 
-     task->cl = &cl;
 
-     float *array[2] = @{1.0f, -1.0f@};
 
-     task->cl_arg = &array;
 
-     task->cl_arg_size = 2*sizeof(float);
 
-     task->callback_func = callback_func;
 
-     task->callback_arg = 0x42;
 
-     /* starpu_task_submit will be a blocking call */
 
-     task->synchronous = 1;
 
-     /* submit the task to StarPU */
 
-     starpu_task_submit(task);
 
-     /* terminate StarPU */
 
-     starpu_shutdown();
 
-     return 0;
 
- @}
 
- @end example
 
- @end cartouche
 
- Before submitting any tasks to StarPU, @code{starpu_init} must be called. The
 
- @code{NULL} argument specifies that we use default configuration. Tasks cannot
 
- be submitted after the termination of StarPU by a call to
 
- @code{starpu_shutdown}.
 
- In the example above, a task structure is allocated by a call to
 
- @code{starpu_task_create}. This function only allocates and fills the
 
- corresponding structure with the default settings (@pxref{starpu_task_create}),
 
- but it does not submit the task to StarPU.
 
- @c not really clear ;)
 
- The @code{cl} field is a pointer to the codelet which the task will
 
- execute: in other words, the codelet structure describes which computational
 
- kernel should be offloaded on the different architectures, and the task
 
- structure is a wrapper containing a codelet and the piece of data on which the
 
- codelet should operate.
 
- The optional @code{cl_arg} field is a pointer to a buffer (of size
 
- @code{cl_arg_size}) with some parameters for the kernel
 
- described by the codelet. For instance, if a codelet implements a computational
 
- kernel that multiplies its input vector by a constant, the constant could be
 
- specified by the means of this buffer.
 
- Once a task has been executed, an optional callback function can be called.
 
- While the computational kernel could be offloaded on various architectures, the
 
- callback function is always executed on a CPU. The @code{callback_arg}
 
- pointer is passed as an argument of the callback. The prototype of a callback
 
- function must be:
 
- @cartouche
 
- @example
 
- void (*callback_function)(void *);
 
- @end example
 
- @end cartouche
 
- If the @code{synchronous} field is non-null, task submission will be
 
- synchronous: the @code{starpu_task_submit} function will not return until the
 
- task was executed. Note that the @code{starpu_shutdown} method does not
 
- guarantee that asynchronous tasks have been executed before it returns.
 
- @node Scaling a Vector
 
- @section Manipulating Data: Scaling a Vector
 
- The previous example has shown how to submit tasks. In this section we show how
 
- StarPU tasks can manipulate data.
 
- Programmers can describe the data layout of their application so that StarPU is
 
- responsible for enforcing data coherency and availability across the machine.
 
- Instead of handling complex (and non-portable) mechanisms to perform data
 
- movements, programmers only declare which piece of data is accessed and/or
 
- modified by a task, and StarPU makes sure that when a computational kernel
 
- starts somewhere (e.g. on a GPU), its data are available locally.
 
- Before submitting those tasks, the programmer first needs to declare the
 
- different pieces of data to StarPU using the @code{starpu_*_data_register}
 
- functions. To ease the development of applications for StarPU, it is possible
 
- to describe multiple types of data layout. A type of data layout is called an
 
- @b{interface}. By default, there are different interfaces available in StarPU:
 
- here we will consider the @b{vector interface}.
 
- The following lines show how to declare an array of @code{n} elements of type
 
- @code{float} using the vector interface:
 
- @cartouche
 
- @example
 
- float tab[n];
 
- starpu_data_handle tab_handle;
 
- starpu_vector_data_register(&tab_handle, 0, tab, n, sizeof(float));
 
- @end example
 
- @end cartouche
 
- The first argument, called the @b{data handle}, is an opaque pointer which
 
- designates the array in StarPU. This is also the structure which is used to
 
- describe which data is used by a task. The second argument is the node number
 
- where the data currently resides. Here it is 0 since the @code{tab} array is in
 
- the main memory. Then comes the pointer @code{tab} where the data can be found,
 
- the number of elements in the vector and the size of each element.
 
- It is possible to construct a StarPU
 
- task that multiplies this vector by a constant factor:
 
- @cartouche
 
- @example
 
- float factor = 3.0;
 
- struct starpu_task *task = starpu_task_create();
 
- task->cl = &cl;
 
- task->buffers[0].handle = tab_handle;
 
- task->buffers[0].mode = STARPU_RW;
 
- task->cl_arg = &factor;
 
- task->cl_arg_size = sizeof(float);
 
- task->synchronous = 1;
 
- starpu_task_submit(task);
 
- @end example
 
- @end cartouche
 
- Since the factor is constant, it does not need a preliminary declaration, and
 
- can just be passed through the @code{cl_arg} pointer like in the previous
 
- example.  The vector parameter is described by its handle.
 
- There are two fields in each element of the @code{buffers} array.
 
- @code{handle} is the handle of the data, and @code{mode} specifies how the
 
- kernel will access the data (@code{STARPU_R} for read-only, @code{STARPU_W} for
 
- write-only and @code{STARPU_RW} for read and write access).
 
- The definition of the codelet can be written as follows:
 
- @cartouche
 
- @example
 
- void scal_func(void *buffers[], void *cl_arg)
 
- @{
 
-     unsigned i;
 
-     float *factor = cl_arg;
 
-     struct starpu_vector_interface_s *vector = buffers[0];
 
-     /* length of the vector */
 
-     unsigned n = STARPU_GET_VECTOR_NX(vector);
 
-     /* local copy of the vector pointer */
 
-     float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
 
-     for (i = 0; i < n; i++)
 
-         val[i] *= *factor;
 
- @}
 
- starpu_codelet cl = @{
 
-     .where = STARPU_CPU,
 
-     .cpu_func = scal_func,
 
-     .nbuffers = 1
 
- @};
 
- @end example
 
- @end cartouche
 
- The second argument of the @code{scal_func} function contains a pointer to the
 
- parameters of the codelet (given in @code{task->cl_arg}), so that we read the
 
- constant factor from this pointer. The first argument is an array that gives
 
- a description of every buffers passed in the @code{task->buffers}@ array. The
 
- size of this array is given by the @code{nbuffers} field of the codelet
 
- structure. For the sake of generality, this array contains pointers to the
 
- different interfaces describing each buffer.  In the case of the @b{vector
 
- interface}, the location of the vector (resp. its length) is accessible in the
 
- @code{ptr} (resp. @code{nx}) of this array. Since the vector is accessed in a
 
- read-write fashion, any modification will automatically affect future accesses
 
- to this vector made by other tasks.
 
- @node Scaling a Vector (hybrid)
 
- @section Vector Scaling on an Hybrid CPU/GPU Machine
 
- Contrary to the previous examples, the task submitted in this example may not
 
- only be executed by the CPUs, but also by a CUDA device.
 
- @menu
 
- * Source code::                  Source of the StarPU application
 
- * Compilation and execution::    Executing the StarPU application
 
- @end menu
 
- @node Source code
 
- @subsection Source code
 
- The CUDA implementation can be written as follows. It needs to be
 
- compiled with a CUDA compiler such as nvcc, the NVIDIA CUDA compiler
 
- driver.
 
- @cartouche
 
- @example
 
- #include <starpu.h>
 
- static __global__ void vector_mult_cuda(float *val, unsigned n,
 
-                                         float factor)
 
- @{
 
-         unsigned i;
 
-         for(i = 0 ; i < n ; i++)
 
-                val[i] *= factor;
 
- @}
 
- extern "C" void scal_cuda_func(void *buffers[], void *_args)
 
- @{
 
-         float *factor = (float *)_args;
 
-         struct starpu_vector_interface_s *vector = (struct starpu_vector_interface_s *) buffers[0];
 
-         /* length of the vector */
 
-         unsigned n = STARPU_GET_VECTOR_NX(vector);
 
-         /* local copy of the vector pointer */
 
-         float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
 
-         /* TODO: use more blocks and threads in blocks */
 
-         vector_mult_cuda<<<1,1>>>(val, n, *factor);
 
- 	cudaThreadSynchronize();
 
- @}
 
- @end example
 
- @end cartouche
 
- The CPU implementation is the same as in the previous section.
 
- Here is the source of the main application. You can notice the value of the
 
- field @code{where} for the codelet. We specify
 
- @code{STARPU_CPU|STARPU_CUDA} to indicate to StarPU that the codelet
 
- can be executed either on a CPU or on a CUDA device.
 
- @cartouche
 
- @example
 
- #include <starpu.h>
 
- #define NX 5
 
- extern void scal_cuda_func(void *buffers[], void *_args);
 
- extern void scal_func(void *buffers[], void *_args);
 
- /* @b{Definition of the codelet} */
 
- static starpu_codelet cl = @{
 
- 	.where = STARPU_CPU|STARPU_CUDA; /* @b{It can be executed on a CPU} */
 
- 	                                 /* @b{or on a CUDA device} */
 
- 	.cuda_func = scal_cuda_func;
 
- 	.cpu_func = scal_func;
 
- 	.nbuffers = 1;
 
- @}
 
- int main(int argc, char **argv)
 
- @{
 
-         float *vector;
 
-         int i, ret;
 
-         float factor=3.0;
 
-         struct starpu_task *task;
 
-         starpu_data_handle tab_handle;
 
-         starpu_init(NULL);                            /* @b{Initialising StarPU} */
 
-         vector = (float*)malloc(NX*sizeof(float));
 
-         assert(vector);
 
-         for(i=0 ; i<NX ; i++) vector[i] = i;
 
- @end example
 
- @end cartouche
 
- @cartouche
 
- @example
 
-         /* @b{Registering data within StarPU} */
 
-         starpu_vector_data_register(&tab_handle, 0, (uintptr_t)vector,
 
-                                     NX, sizeof(float));
 
-         /* @b{Definition of the task} */
 
-         task = starpu_task_create();
 
-         task->cl = &cl;
 
-         task->callback_func = NULL;
 
-         task->buffers[0].handle = tab_handle;
 
-         task->buffers[0].mode = STARPU_RW;
 
-         task->cl_arg = &factor;
 
- @end example
 
- @end cartouche
 
- @cartouche
 
- @example
 
-         /* @b{Submitting the task} */
 
-         ret = starpu_task_submit(task);
 
-         if (ret == -ENODEV) @{
 
-                 fprintf(stderr, "No worker may execute this task\n");
 
-                 return 1;
 
-         @}
 
-         /* @b{Waiting for its termination} */
 
-         starpu_task_wait_for_all();
 
-         /* @b{Update the vector in RAM} */
 
-         starpu_data_sync_with_mem(tab_handle, STARPU_R);
 
- @end example
 
- @end cartouche
 
- @cartouche
 
- @example
 
-         /* @b{Access the data} */
 
-         for(i=0 ; i<NX; i++) @{
 
-           fprintf(stderr, "%f ", vector[i]);
 
-         @}
 
-         fprintf(stderr, "\n");
 
-         /* @b{Release the data and shutdown StarPU} */
 
-         starpu_data_release_from_mem(tab_handle);
 
-         starpu_shutdown();
 
-         return 0;
 
- @}
 
- @end example
 
- @end cartouche
 
- @node Compilation and execution
 
- @subsection Compilation and execution
 
- Let's suppose StarPU has been installed in the directory
 
- @code{$STARPU_DIR}. As explained in @ref{pkg-config configuration},
 
- the variable @code{PKG_CONFIG_PATH} needs to be set. It is also
 
- necessary to set the variable @code{LD_LIBRARY_PATH} to locate dynamic
 
- libraries at runtime.
 
- @example
 
- $ PKG_CONFIG_PATH=$STARPU_DIR/lib/pkgconfig:$PKG_CONFIG_PATH
 
- $ LD_LIBRARY_PATH=$STARPU_DIR/lib:$LD_LIBRARY_PATH
 
- @end example
 
- It is then possible to compile the application using the following
 
- makefile:
 
- @cartouche
 
- @example
 
- CFLAGS	+=	$(shell pkg-config --cflags libstarpu)
 
- LDFLAGS	+=	$(shell pkg-config --libs libstarpu)
 
- CC	=	gcc
 
- vector: vector.o vector_cpu.o vector_cuda.o
 
- %.o: %.cu
 
-        nvcc $(CFLAGS) $< -c $@
 
- clean:
 
-        rm -f vector *.o
 
- @end example
 
- @end cartouche
 
- @example
 
- $ make
 
- @end example
 
- and to execute it, with the default configuration:
 
- @example
 
- $ ./vector
 
- 0.000000 3.000000 6.000000 9.000000 12.000000
 
- @end example
 
- or for example, by disabling CPU devices:
 
- @example
 
- $ STARPU_NCPUS=0 ./vector
 
- 0.000000 3.000000 6.000000 9.000000 12.000000
 
- @end example
 
- or by disabling CUDA devices:
 
- @example
 
- $ STARPU_NCUDA=0 ./vector
 
- 0.000000 3.000000 6.000000 9.000000 12.000000
 
- @end example
 
- @c TODO: Add performance model example (and update basic_examples)
 
- @c ---------------------------------------------------------------------
 
- @c Advanced Topics
 
- @c ---------------------------------------------------------------------
 
- @node Advanced Topics
 
- @chapter Advanced Topics
 
- @bye
 
 
  |