exa2pro
/
starpu-max


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
							@c -*-texinfo-*-

@c This file is part of the StarPU Handbook.
@c Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
@c Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
@c Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
@c See the file starpu.texi for copying conditions.

@node Configuring StarPU
@chapter Configuring StarPU

@menu
* Compilation configuration::   
* Execution configuration through environment variables::  
@end menu

@node Compilation configuration
@section Compilation configuration

The following arguments can be given to the @code{configure} script.

@menu
* Common configuration::        
* Configuring workers::         
* Advanced configuration::      
@end menu

@node Common configuration
@subsection Common configuration


@menu
* --enable-debug::              
* --enable-fast::               
* --enable-verbose::            
* --enable-coverage::           
@end menu

@node --enable-debug
@subsubsection @code{--enable-debug}
@table @asis
@item @emph{Description}:
Enable debugging messages.
@end table

@node --enable-fast
@subsubsection @code{--enable-fast}
@table @asis
@item @emph{Description}:
Do not enforce assertions, saves a lot of time spent to compute them otherwise.
@end table

@node --enable-verbose
@subsubsection @code{--enable-verbose}
@table @asis
@item @emph{Description}:
Augment the verbosity of the debugging messages. This can be disabled
at runtime by setting the environment variable @code{STARPU_SILENT} to
any value.

@smallexample
% STARPU_SILENT=1 ./vector_scal
@end smallexample
@end table

@node --enable-coverage
@subsubsection @code{--enable-coverage}
@table @asis
@item @emph{Description}:
Enable flags for the @code{gcov} coverage tool.
@end table

@node Configuring workers
@subsection Configuring workers

@menu
* --enable-maxcpus::         
* --disable-cpu::               
* --enable-maxcudadev::         
* --disable-cuda::              
* --with-cuda-dir::             
* --with-cuda-include-dir::             
* --with-cuda-lib-dir::             
* --disable-cuda-memcpy-peer::
* --enable-maxopencldev::       
* --disable-opencl::            
* --with-opencl-dir::           
* --with-opencl-include-dir::           
* --with-opencl-lib-dir::           
* --enable-gordon::             
* --with-gordon-dir::           
* --enable-maximplementations::
@end menu

@node --enable-maxcpus
@subsubsection @code{--enable-maxcpus=<number>}
@table @asis
@item @emph{Description}:
Defines the maximum number of CPU cores that StarPU will support, then
available as the @code{STARPU_MAXCPUS} macro.
@end table

@node --disable-cpu
@subsubsection @code{--disable-cpu}
@table @asis
@item @emph{Description}:
Disable the use of CPUs of the machine. Only GPUs etc. will be used.
@end table

@node --enable-maxcudadev
@subsubsection @code{--enable-maxcudadev=<number>}
@table @asis
@item @emph{Description}:
Defines the maximum number of CUDA devices that StarPU will support, then
available as the @code{STARPU_MAXCUDADEVS} macro.
@end table

@node --disable-cuda
@subsubsection @code{--disable-cuda}
@table @asis
@item @emph{Description}:
Disable the use of CUDA, even if a valid CUDA installation was detected.
@end table

@node --with-cuda-dir
@subsubsection @code{--with-cuda-dir=<path>}
@table @asis
@item @emph{Description}:
Specify the directory where CUDA is installed. This directory should notably contain
@code{include/cuda.h}.
@end table

@node --with-cuda-include-dir
@subsubsection @code{--with-cuda-include-dir=<path>}
@table @asis
@item @emph{Description}:
Specify the directory where CUDA headers are installed. This directory should
notably contain @code{cuda.h}. This defaults to @code{/include} appended to the
value given to @code{--with-cuda-dir}.
@end table

@node --with-cuda-lib-dir
@subsubsection @code{--with-cuda-lib-dir=<path>}
@table @asis
@item @emph{Description}:
Specify the directory where the CUDA library is installed. This directory should
notably contain the CUDA shared libraries (e.g. libcuda.so). This defaults to
@code{/lib} appended to the value given to @code{--with-cuda-dir}.

@end table

@node --disable-cuda-memcpy-peer
@subsubsection @code{--disable-cuda-memcpy-peer}
@table @asis
@item @emph{Description}
Explicitely disables peer transfers when using CUDA 4.0
@end table

@node --enable-maxopencldev
@subsubsection @code{--enable-maxopencldev=<number>}
@table @asis
@item @emph{Description}:
Defines the maximum number of OpenCL devices that StarPU will support, then
available as the @code{STARPU_MAXOPENCLDEVS} macro.
@end table

@node --disable-opencl
@subsubsection @code{--disable-opencl}
@table @asis
@item @emph{Description}:
Disable the use of OpenCL, even if the SDK is detected.
@end table

@node --with-opencl-dir
@subsubsection @code{--with-opencl-dir=<path>}
@table @asis
@item @emph{Description}:
Specify the location of the OpenCL SDK. This directory should notably contain
@code{include/CL/cl.h} (or @code{include/OpenCL/cl.h} on Mac OS).
@end table

@node --with-opencl-include-dir
@subsubsection @code{--with-opencl-include-dir=<path>}
@table @asis
@item @emph{Description}:
Specify the location of OpenCL headers. This directory should notably contain
@code{CL/cl.h} (or @code{OpenCL/cl.h} on Mac OS). This defaults to
@code{/include} appended to the value given to @code{--with-opencl-dir}.

@end table

@node --with-opencl-lib-dir
@subsubsection @code{--with-opencl-lib-dir=<path>}
@table @asis
@item @emph{Description}:
Specify the location of the OpenCL library. This directory should notably
contain the OpenCL shared libraries (e.g. libOpenCL.so). This defaults to
@code{/lib} appended to the value given to @code{--with-opencl-dir}.
@end table

@node --enable-gordon
@subsubsection @code{--enable-gordon}
@table @asis
@item @emph{Description}:
Enable the use of the Gordon runtime for Cell SPUs.
@c TODO: rather default to enabled when detected
@end table

@node --with-gordon-dir
@subsubsection @code{--with-gordon-dir=<path>}
@table @asis
@item @emph{Description}:
Specify the location of the Gordon SDK.
@end table

@node --enable-maximplementations
@subsubsection @code{--enable-maximplementations=<number>}
@table @asis
@item @emph{Description}:
Defines the number of implementations that can be defined for a single kind of
device. It is then available as the @code{STARPU_MAXIMPLEMENTATIONS} macro.
@end table

@node Advanced configuration
@subsection Advanced configuration

@menu
* --enable-perf-debug::         
* --enable-model-debug::        
* --enable-stats::              
* --enable-maxbuffers::         
* --enable-allocation-cache::   
* --enable-opengl-render::      
* --enable-blas-lib::           
* --with-magma::                
* --with-fxt::                  
* --with-perf-model-dir::       
* --with-mpicc::                
* --with-goto-dir::             
* --with-atlas-dir::            
* --with-mkl-cflags::
* --with-mkl-ldflags::
@end menu

@node --enable-perf-debug
@subsubsection @code{--enable-perf-debug}
@table @asis
@item @emph{Description}:
Enable performance debugging through gprof.
@end table

@node --enable-model-debug
@subsubsection @code{--enable-model-debug}
@table @asis
@item @emph{Description}:
Enable performance model debugging.
@end table

@node --enable-stats
@subsubsection @code{--enable-stats}
@table @asis
@item @emph{Description}:
Enable statistics.
@end table

@node --enable-maxbuffers
@subsubsection @code{--enable-maxbuffers=<nbuffers>}
@table @asis
@item @emph{Description}:
Define the maximum number of buffers that tasks will be able to take
as parameters, then available as the @code{STARPU_NMAXBUFS} macro.
@end table

@node --enable-allocation-cache
@subsubsection @code{--enable-allocation-cache}
@table @asis
@item @emph{Description}:
Enable the use of a data allocation cache to avoid the cost of it with
CUDA. Still experimental.
@end table

@node --enable-opengl-render
@subsubsection @code{--enable-opengl-render}
@table @asis
@item @emph{Description}:
Enable the use of OpenGL for the rendering of some examples.
@c TODO: rather default to enabled when detected
@end table

@node --enable-blas-lib
@subsubsection @code{--enable-blas-lib=<name>}
@table @asis
@item @emph{Description}:
Specify the blas library to be used by some of the examples. The
library has to be 'atlas' or 'goto'.
@end table

@node --with-magma
@subsubsection @code{--with-magma=<path>}
@table @asis
@item @emph{Description}:
Specify where magma is installed. This directory should notably contain
@code{include/magmablas.h}.
@end table

@node --with-fxt
@subsubsection @code{--with-fxt=<path>}
@table @asis
@item @emph{Description}:
Specify the location of FxT (for generating traces and rendering them
using ViTE). This directory should notably contain
@code{include/fxt/fxt.h}.
@c TODO add ref to other section
@end table

@node --with-perf-model-dir
@subsubsection @code{--with-perf-model-dir=<dir>}
@table @asis
@item @emph{Description}:
Specify where performance models should be stored (instead of defaulting to the
current user's home).
@end table

@node --with-mpicc
@subsubsection @code{--with-mpicc=<path to mpicc>}
@table @asis
@item @emph{Description}:
Specify the location of the @code{mpicc} compiler to be used for starpumpi.
@end table

@node --with-goto-dir
@subsubsection @code{--with-goto-dir=<dir>}
@table @asis
@item @emph{Description}:
Specify the location of GotoBLAS.
@end table

@node --with-atlas-dir
@subsubsection @code{--with-atlas-dir=<dir>}
@table @asis
@item @emph{Description}:
Specify the location of ATLAS. This directory should notably contain
@code{include/cblas.h}.
@end table

@node --with-mkl-cflags
@subsubsection @code{--with-mkl-cflags=<cflags>}
@table @asis
@item @emph{Description}:
Specify the compilation flags for the MKL Library.
@end table

@node --with-mkl-ldflags
@subsubsection @code{--with-mkl-ldflags=<ldflags>}
@table @asis
@item @emph{Description}:
Specify the linking flags for the MKL Library. Note that the
@url{http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor/}
website provides a script to determine the linking flags.
@end table

@node Execution configuration through environment variables
@section Execution configuration through environment variables

@menu
* Workers::                     Configuring workers
* Scheduling::                  Configuring the Scheduling engine
* Misc::                        Miscellaneous and debug
@end menu

Note: the values given in @code{starpu_conf} structure passed when
calling @code{starpu_init} will override the values of the environment
variables.

@node Workers
@subsection Configuring workers

@menu
* STARPU_NCPUS::                Number of CPU workers
* STARPU_NCUDA::                Number of CUDA workers
* STARPU_NOPENCL::              Number of OpenCL workers
* STARPU_NGORDON::              Number of SPU workers (Cell)
* STARPU_WORKERS_CPUID::        Bind workers to specific CPUs
* STARPU_WORKERS_CUDAID::       Select specific CUDA devices
* STARPU_WORKERS_OPENCLID::     Select specific OpenCL devices
@end menu

@node STARPU_NCPUS
@subsubsection @code{STARPU_NCPUS} -- Number of CPU workers
@table @asis

@item @emph{Description}:
Specify the number of CPU workers (thus not including workers dedicated to control acceleratores). Note that by default, StarPU will not allocate
more CPU workers than there are physical CPUs, and that some CPUs are used to control
the accelerators.

@end table

@node STARPU_NCUDA
@subsubsection @code{STARPU_NCUDA} -- Number of CUDA workers
@table @asis

@item @emph{Description}:
Specify the number of CUDA devices that StarPU can use. If
@code{STARPU_NCUDA} is lower than the number of physical devices, it is
possible to select which CUDA devices should be used by the means of the
@code{STARPU_WORKERS_CUDAID} environment variable. By default, StarPU will
create as many CUDA workers as there are CUDA devices.

@end table

@node STARPU_NOPENCL
@subsubsection @code{STARPU_NOPENCL} -- Number of OpenCL workers
@table @asis

@item @emph{Description}:
OpenCL equivalent of the @code{STARPU_NCUDA} environment variable.
@end table

@node STARPU_NGORDON
@subsubsection @code{STARPU_NGORDON} -- Number of SPU workers (Cell)
@table @asis

@item @emph{Description}:
Specify the number of SPUs that StarPU can use.
@end table


@node STARPU_WORKERS_CPUID
@subsubsection @code{STARPU_WORKERS_CPUID} -- Bind workers to specific CPUs
@table @asis

@item @emph{Description}:
Passing an array of integers (starting from 0) in @code{STARPU_WORKERS_CPUID}
specifies on which logical CPU the different workers should be
bound. For instance, if @code{STARPU_WORKERS_CPUID = "0 1 4 5"}, the first
worker will be bound to logical CPU #0, the second CPU worker will be bound to
logical CPU #1 and so on.  Note that the logical ordering of the CPUs is either
determined by the OS, or provided by the @code{hwloc} library in case it is
available.

Note that the first workers correspond to the CUDA workers, then come the
OpenCL and the SPU, and finally the CPU workers. For example if
we have @code{STARPU_NCUDA=1}, @code{STARPU_NOPENCL=1}, @code{STARPU_NCPUS=2}
and @code{STARPU_WORKERS_CPUID = "0 2 1 3"}, the CUDA device will be controlled
by logical CPU #0, the OpenCL device will be controlled by logical CPU #2, and
the logical CPUs #1 and #3 will be used by the CPU workers.

If the number of workers is larger than the array given in
@code{STARPU_WORKERS_CPUID}, the workers are bound to the logical CPUs in a
round-robin fashion: if @code{STARPU_WORKERS_CPUID = "0 1"}, the first and the
third (resp. second and fourth) workers will be put on CPU #0 (resp. CPU #1).

This variable is ignored if the @code{use_explicit_workers_bindid} flag of the
@code{starpu_conf} structure passed to @code{starpu_init} is set.

@end table

@node STARPU_WORKERS_CUDAID
@subsubsection @code{STARPU_WORKERS_CUDAID} -- Select specific CUDA devices
@table @asis

@item @emph{Description}:
Similarly to the @code{STARPU_WORKERS_CPUID} environment variable, it is
possible to select which CUDA devices should be used by StarPU. On a machine
equipped with 4 GPUs, setting @code{STARPU_WORKERS_CUDAID = "1 3"} and
@code{STARPU_NCUDA=2} specifies that 2 CUDA workers should be created, and that
they should use CUDA devices #1 and #3 (the logical ordering of the devices is
the one reported by CUDA).

This variable is ignored if the @code{use_explicit_workers_cuda_gpuid} flag of
the @code{starpu_conf} structure passed to @code{starpu_init} is set.
@end table

@node STARPU_WORKERS_OPENCLID
@subsubsection @code{STARPU_WORKERS_OPENCLID} -- Select specific OpenCL devices
@table @asis

@item @emph{Description}:
OpenCL equivalent of the @code{STARPU_WORKERS_CUDAID} environment variable.

This variable is ignored if the @code{use_explicit_workers_opencl_gpuid} flag of
the @code{starpu_conf} structure passed to @code{starpu_init} is set.
@end table

@node Scheduling
@subsection Configuring the Scheduling engine

@menu
* STARPU_SCHED::                Scheduling policy
* STARPU_CALIBRATE::            Calibrate performance models
* STARPU_PREFETCH::             Use data prefetch
* STARPU_SCHED_ALPHA::          Computation factor
* STARPU_SCHED_BETA::           Communication factor
@end menu

@node STARPU_SCHED
@subsubsection @code{STARPU_SCHED} -- Scheduling policy
@table @asis

@item @emph{Description}:

This chooses between the different scheduling policies proposed by StarPU: work
random, stealing, greedy, with performance models, etc.

Use @code{STARPU_SCHED=help} to get the list of available schedulers.

@end table

@node STARPU_CALIBRATE
@subsubsection @code{STARPU_CALIBRATE} -- Calibrate performance models
@table @asis

@item @emph{Description}:
If this variable is set to 1, the performance models are calibrated during
the execution. If it is set to 2, the previous values are dropped to restart
calibration from scratch. Setting this variable to 0 disable calibration, this
is the default behaviour.

Note: this currently only applies to @code{dm}, @code{dmda} and @code{heft} scheduling policies.

@end table

@node STARPU_PREFETCH
@subsubsection @code{STARPU_PREFETCH} -- Use data prefetch
@table @asis

@item @emph{Description}:
This variable indicates whether data prefetching should be enabled (0 means
that it is disabled). If prefetching is enabled, when a task is scheduled to be
executed e.g. on a GPU, StarPU will request an asynchronous transfer in
advance, so that data is already present on the GPU when the task starts. As a
result, computation and data transfers are overlapped.
Note that prefetching is enabled by default in StarPU.

@end table

@node STARPU_SCHED_ALPHA
@subsubsection @code{STARPU_SCHED_ALPHA} -- Computation factor
@table @asis

@item @emph{Description}:
To estimate the cost of a task StarPU takes into account the estimated
computation time (obtained thanks to performance models). The alpha factor is
the coefficient to be applied to it before adding it to the communication part.

@end table

@node STARPU_SCHED_BETA
@subsubsection @code{STARPU_SCHED_BETA} -- Communication factor
@table @asis

@item @emph{Description}:
To estimate the cost of a task StarPU takes into account the estimated
data transfer time (obtained thanks to performance models). The beta factor is
the coefficient to be applied to it before adding it to the computation part.

@end table

@node Misc
@subsection Miscellaneous and debug

@menu
* STARPU_SILENT::               Disable verbose mode
* STARPU_LOGFILENAME::          Select debug file name
* STARPU_FXT_PREFIX::           FxT trace location
* STARPU_LIMIT_GPU_MEM::        Restrict memory size on the GPUs
* STARPU_GENERATE_TRACE::       Generate a Paje trace when StarPU is shut down
@end menu

@node STARPU_SILENT
@subsubsection @code{STARPU_SILENT} -- Disable verbose mode
@table @asis

@item @emph{Description}:
This variable allows to disable verbose mode at runtime when StarPU
has been configured with the option @code{--enable-verbose}.
@end table

@node STARPU_LOGFILENAME
@subsubsection @code{STARPU_LOGFILENAME} -- Select debug file name
@table @asis

@item @emph{Description}:
This variable specifies in which file the debugging output should be saved to.
@end table

@node STARPU_FXT_PREFIX
@subsubsection @code{STARPU_FXT_PREFIX} -- FxT trace location
@table @asis

@item @emph{Description}
This variable specifies in which directory to save the trace generated if FxT is enabled. It needs to have a trailing '/' character.
@end table

@node STARPU_LIMIT_GPU_MEM
@subsubsection @code{STARPU_LIMIT_GPU_MEM} -- Restrict memory size on the GPUs
@table @asis

@item @emph{Description}
This variable specifies the maximum number of megabytes that should be
available to the application on each GPUs. In case this value is smaller than
the size of the memory of a GPU, StarPU pre-allocates a buffer to waste memory
on the device. This variable is intended to be used for experimental purposes
as it emulates devices that have a limited amount of memory.
@end table

@node STARPU_GENERATE_TRACE
@subsubsection @code{STARPU_GENERATE_TRACE} -- Generate a Paje trace when StarPU is shut down
@table @asis

@item @emph{Description}
When set to 1, this variable indicates that StarPU should automatically
generate a Paje trace when starpu_shutdown is called.
@end table