|
@@ -2,7 +2,7 @@
|
|
|
|
|
|
@c This file is part of the StarPU Handbook.
|
|
|
@c Copyright (C) 2009--2011 Universit@'e de Bordeaux 1
|
|
|
-@c Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
|
|
|
+@c Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
|
|
|
@c Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
|
|
|
@c See the file starpu.texi for copying conditions.
|
|
|
|
|
@@ -38,22 +38,14 @@ The following arguments can be given to the @code{configure} script.
|
|
|
|
|
|
@node --enable-debug
|
|
|
@subsubsection @code{--enable-debug}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Enable debugging messages.
|
|
|
-@end table
|
|
|
|
|
|
@node --enable-fast
|
|
|
@subsubsection @code{--enable-fast}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Do not enforce assertions, saves a lot of time spent to compute them otherwise.
|
|
|
-@end table
|
|
|
|
|
|
@node --enable-verbose
|
|
|
@subsubsection @code{--enable-verbose}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Augment the verbosity of the debugging messages. This can be disabled
|
|
|
at runtime by setting the environment variable @code{STARPU_SILENT} to
|
|
|
any value.
|
|
@@ -61,14 +53,10 @@ any value.
|
|
|
@smallexample
|
|
|
% STARPU_SILENT=1 ./vector_scal
|
|
|
@end smallexample
|
|
|
-@end table
|
|
|
|
|
|
@node --enable-coverage
|
|
|
@subsubsection @code{--enable-coverage}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Enable flags for the @code{gcov} coverage tool.
|
|
|
-@end table
|
|
|
|
|
|
@node Configuring workers
|
|
|
@subsection Configuring workers
|
|
@@ -94,132 +82,82 @@ Enable flags for the @code{gcov} coverage tool.
|
|
|
|
|
|
@node --enable-maxcpus
|
|
|
@subsubsection @code{--enable-maxcpus=<number>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
-Defines the maximum number of CPU cores that StarPU will support, then
|
|
|
+Define the maximum number of CPU cores that StarPU will support, then
|
|
|
available as the @code{STARPU_MAXCPUS} macro.
|
|
|
-@end table
|
|
|
|
|
|
@node --disable-cpu
|
|
|
@subsubsection @code{--disable-cpu}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Disable the use of CPUs of the machine. Only GPUs etc. will be used.
|
|
|
-@end table
|
|
|
|
|
|
@node --enable-maxcudadev
|
|
|
@subsubsection @code{--enable-maxcudadev=<number>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
-Defines the maximum number of CUDA devices that StarPU will support, then
|
|
|
+Define the maximum number of CUDA devices that StarPU will support, then
|
|
|
available as the @code{STARPU_MAXCUDADEVS} macro.
|
|
|
-@end table
|
|
|
|
|
|
@node --disable-cuda
|
|
|
@subsubsection @code{--disable-cuda}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Disable the use of CUDA, even if a valid CUDA installation was detected.
|
|
|
-@end table
|
|
|
|
|
|
@node --with-cuda-dir
|
|
|
@subsubsection @code{--with-cuda-dir=<path>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the directory where CUDA is installed. This directory should notably contain
|
|
|
@code{include/cuda.h}.
|
|
|
-@end table
|
|
|
|
|
|
@node --with-cuda-include-dir
|
|
|
@subsubsection @code{--with-cuda-include-dir=<path>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the directory where CUDA headers are installed. This directory should
|
|
|
notably contain @code{cuda.h}. This defaults to @code{/include} appended to the
|
|
|
value given to @code{--with-cuda-dir}.
|
|
|
-@end table
|
|
|
|
|
|
@node --with-cuda-lib-dir
|
|
|
@subsubsection @code{--with-cuda-lib-dir=<path>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the directory where the CUDA library is installed. This directory should
|
|
|
notably contain the CUDA shared libraries (e.g. libcuda.so). This defaults to
|
|
|
@code{/lib} appended to the value given to @code{--with-cuda-dir}.
|
|
|
|
|
|
-@end table
|
|
|
-
|
|
|
@node --disable-cuda-memcpy-peer
|
|
|
@subsubsection @code{--disable-cuda-memcpy-peer}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}
|
|
|
-Explicitely disables peer transfers when using CUDA 4.0
|
|
|
-@end table
|
|
|
+Explicitely disable peer transfers when using CUDA 4.0
|
|
|
|
|
|
@node --enable-maxopencldev
|
|
|
@subsubsection @code{--enable-maxopencldev=<number>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
-Defines the maximum number of OpenCL devices that StarPU will support, then
|
|
|
+Define the maximum number of OpenCL devices that StarPU will support, then
|
|
|
available as the @code{STARPU_MAXOPENCLDEVS} macro.
|
|
|
-@end table
|
|
|
|
|
|
@node --disable-opencl
|
|
|
@subsubsection @code{--disable-opencl}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Disable the use of OpenCL, even if the SDK is detected.
|
|
|
-@end table
|
|
|
|
|
|
@node --with-opencl-dir
|
|
|
@subsubsection @code{--with-opencl-dir=<path>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the location of the OpenCL SDK. This directory should notably contain
|
|
|
@code{include/CL/cl.h} (or @code{include/OpenCL/cl.h} on Mac OS).
|
|
|
-@end table
|
|
|
|
|
|
@node --with-opencl-include-dir
|
|
|
@subsubsection @code{--with-opencl-include-dir=<path>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the location of OpenCL headers. This directory should notably contain
|
|
|
@code{CL/cl.h} (or @code{OpenCL/cl.h} on Mac OS). This defaults to
|
|
|
@code{/include} appended to the value given to @code{--with-opencl-dir}.
|
|
|
|
|
|
-@end table
|
|
|
-
|
|
|
@node --with-opencl-lib-dir
|
|
|
@subsubsection @code{--with-opencl-lib-dir=<path>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the location of the OpenCL library. This directory should notably
|
|
|
contain the OpenCL shared libraries (e.g. libOpenCL.so). This defaults to
|
|
|
@code{/lib} appended to the value given to @code{--with-opencl-dir}.
|
|
|
-@end table
|
|
|
|
|
|
@node --enable-gordon
|
|
|
@subsubsection @code{--enable-gordon}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Enable the use of the Gordon runtime for Cell SPUs.
|
|
|
@c TODO: rather default to enabled when detected
|
|
|
-@end table
|
|
|
|
|
|
@node --with-gordon-dir
|
|
|
@subsubsection @code{--with-gordon-dir=<path>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the location of the Gordon SDK.
|
|
|
-@end table
|
|
|
|
|
|
@node --enable-maximplementations
|
|
|
@subsubsection @code{--enable-maximplementations=<number>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
-Defines the number of implementations that can be defined for a single kind of
|
|
|
+Define the number of implementations that can be defined for a single kind of
|
|
|
device. It is then available as the @code{STARPU_MAXIMPLEMENTATIONS} macro.
|
|
|
-@end table
|
|
|
|
|
|
@node Advanced configuration
|
|
|
@subsection Advanced configuration
|
|
@@ -244,120 +182,75 @@ device. It is then available as the @code{STARPU_MAXIMPLEMENTATIONS} macro.
|
|
|
|
|
|
@node --enable-perf-debug
|
|
|
@subsubsection @code{--enable-perf-debug}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Enable performance debugging through gprof.
|
|
|
-@end table
|
|
|
|
|
|
@node --enable-model-debug
|
|
|
@subsubsection @code{--enable-model-debug}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Enable performance model debugging.
|
|
|
-@end table
|
|
|
|
|
|
@node --enable-stats
|
|
|
@subsubsection @code{--enable-stats}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Enable statistics.
|
|
|
-@end table
|
|
|
|
|
|
@node --enable-maxbuffers
|
|
|
@subsubsection @code{--enable-maxbuffers=<nbuffers>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Define the maximum number of buffers that tasks will be able to take
|
|
|
as parameters, then available as the @code{STARPU_NMAXBUFS} macro.
|
|
|
-@end table
|
|
|
|
|
|
@node --enable-allocation-cache
|
|
|
@subsubsection @code{--enable-allocation-cache}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Enable the use of a data allocation cache to avoid the cost of it with
|
|
|
CUDA. Still experimental.
|
|
|
-@end table
|
|
|
|
|
|
@node --enable-opengl-render
|
|
|
@subsubsection @code{--enable-opengl-render}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Enable the use of OpenGL for the rendering of some examples.
|
|
|
@c TODO: rather default to enabled when detected
|
|
|
-@end table
|
|
|
|
|
|
@node --enable-blas-lib
|
|
|
@subsubsection @code{--enable-blas-lib=<name>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the blas library to be used by some of the examples. The
|
|
|
library has to be 'atlas' or 'goto'.
|
|
|
-@end table
|
|
|
|
|
|
@node --with-magma
|
|
|
@subsubsection @code{--with-magma=<path>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify where magma is installed. This directory should notably contain
|
|
|
@code{include/magmablas.h}.
|
|
|
-@end table
|
|
|
|
|
|
@node --with-fxt
|
|
|
@subsubsection @code{--with-fxt=<path>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the location of FxT (for generating traces and rendering them
|
|
|
using ViTE). This directory should notably contain
|
|
|
@code{include/fxt/fxt.h}.
|
|
|
@c TODO add ref to other section
|
|
|
-@end table
|
|
|
|
|
|
@node --with-perf-model-dir
|
|
|
@subsubsection @code{--with-perf-model-dir=<dir>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify where performance models should be stored (instead of defaulting to the
|
|
|
current user's home).
|
|
|
-@end table
|
|
|
|
|
|
@node --with-mpicc
|
|
|
@subsubsection @code{--with-mpicc=<path to mpicc>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the location of the @code{mpicc} compiler to be used for starpumpi.
|
|
|
-@end table
|
|
|
|
|
|
@node --with-goto-dir
|
|
|
@subsubsection @code{--with-goto-dir=<dir>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the location of GotoBLAS.
|
|
|
-@end table
|
|
|
|
|
|
@node --with-atlas-dir
|
|
|
@subsubsection @code{--with-atlas-dir=<dir>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the location of ATLAS. This directory should notably contain
|
|
|
@code{include/cblas.h}.
|
|
|
-@end table
|
|
|
|
|
|
@node --with-mkl-cflags
|
|
|
@subsubsection @code{--with-mkl-cflags=<cflags>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the compilation flags for the MKL Library.
|
|
|
-@end table
|
|
|
|
|
|
@node --with-mkl-ldflags
|
|
|
@subsubsection @code{--with-mkl-ldflags=<ldflags>}
|
|
|
-@table @asis
|
|
|
-@item @emph{Description}:
|
|
|
Specify the linking flags for the MKL Library. Note that the
|
|
|
@url{http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor/}
|
|
|
website provides a script to determine the linking flags.
|
|
|
-@end table
|
|
|
|
|
|
@node Execution configuration through environment variables
|
|
|
@section Execution configuration through environment variables
|
|
@@ -387,50 +280,33 @@ variables.
|
|
|
|
|
|
@node STARPU_NCPUS
|
|
|
@subsubsection @code{STARPU_NCPUS} -- Number of CPU workers
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
Specify the number of CPU workers (thus not including workers dedicated to control acceleratores). Note that by default, StarPU will not allocate
|
|
|
more CPU workers than there are physical CPUs, and that some CPUs are used to control
|
|
|
the accelerators.
|
|
|
|
|
|
-@end table
|
|
|
-
|
|
|
@node STARPU_NCUDA
|
|
|
@subsubsection @code{STARPU_NCUDA} -- Number of CUDA workers
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
Specify the number of CUDA devices that StarPU can use. If
|
|
|
@code{STARPU_NCUDA} is lower than the number of physical devices, it is
|
|
|
possible to select which CUDA devices should be used by the means of the
|
|
|
@code{STARPU_WORKERS_CUDAID} environment variable. By default, StarPU will
|
|
|
create as many CUDA workers as there are CUDA devices.
|
|
|
|
|
|
-@end table
|
|
|
-
|
|
|
@node STARPU_NOPENCL
|
|
|
@subsubsection @code{STARPU_NOPENCL} -- Number of OpenCL workers
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
OpenCL equivalent of the @code{STARPU_NCUDA} environment variable.
|
|
|
-@end table
|
|
|
|
|
|
@node STARPU_NGORDON
|
|
|
@subsubsection @code{STARPU_NGORDON} -- Number of SPU workers (Cell)
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
Specify the number of SPUs that StarPU can use.
|
|
|
-@end table
|
|
|
-
|
|
|
|
|
|
@node STARPU_WORKERS_CPUID
|
|
|
@subsubsection @code{STARPU_WORKERS_CPUID} -- Bind workers to specific CPUs
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
Passing an array of integers (starting from 0) in @code{STARPU_WORKERS_CPUID}
|
|
|
specifies on which logical CPU the different workers should be
|
|
|
bound. For instance, if @code{STARPU_WORKERS_CPUID = "0 1 4 5"}, the first
|
|
@@ -454,13 +330,9 @@ third (resp. second and fourth) workers will be put on CPU #0 (resp. CPU #1).
|
|
|
This variable is ignored if the @code{use_explicit_workers_bindid} flag of the
|
|
|
@code{starpu_conf} structure passed to @code{starpu_init} is set.
|
|
|
|
|
|
-@end table
|
|
|
-
|
|
|
@node STARPU_WORKERS_CUDAID
|
|
|
@subsubsection @code{STARPU_WORKERS_CUDAID} -- Select specific CUDA devices
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
Similarly to the @code{STARPU_WORKERS_CPUID} environment variable, it is
|
|
|
possible to select which CUDA devices should be used by StarPU. On a machine
|
|
|
equipped with 4 GPUs, setting @code{STARPU_WORKERS_CUDAID = "1 3"} and
|
|
@@ -470,18 +342,14 @@ the one reported by CUDA).
|
|
|
|
|
|
This variable is ignored if the @code{use_explicit_workers_cuda_gpuid} flag of
|
|
|
the @code{starpu_conf} structure passed to @code{starpu_init} is set.
|
|
|
-@end table
|
|
|
|
|
|
@node STARPU_WORKERS_OPENCLID
|
|
|
@subsubsection @code{STARPU_WORKERS_OPENCLID} -- Select specific OpenCL devices
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
OpenCL equivalent of the @code{STARPU_WORKERS_CUDAID} environment variable.
|
|
|
|
|
|
This variable is ignored if the @code{use_explicit_workers_opencl_gpuid} flag of
|
|
|
the @code{starpu_conf} structure passed to @code{starpu_init} is set.
|
|
|
-@end table
|
|
|
|
|
|
@node Scheduling
|
|
|
@subsection Configuring the Scheduling engine
|
|
@@ -496,22 +364,15 @@ the @code{starpu_conf} structure passed to @code{starpu_init} is set.
|
|
|
|
|
|
@node STARPU_SCHED
|
|
|
@subsubsection @code{STARPU_SCHED} -- Scheduling policy
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
-
|
|
|
-This chooses between the different scheduling policies proposed by StarPU: work
|
|
|
+Choose between the different scheduling policies proposed by StarPU: work
|
|
|
random, stealing, greedy, with performance models, etc.
|
|
|
|
|
|
Use @code{STARPU_SCHED=help} to get the list of available schedulers.
|
|
|
|
|
|
-@end table
|
|
|
-
|
|
|
@node STARPU_CALIBRATE
|
|
|
@subsubsection @code{STARPU_CALIBRATE} -- Calibrate performance models
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
If this variable is set to 1, the performance models are calibrated during
|
|
|
the execution. If it is set to 2, the previous values are dropped to restart
|
|
|
calibration from scratch. Setting this variable to 0 disable calibration, this
|
|
@@ -519,13 +380,9 @@ is the default behaviour.
|
|
|
|
|
|
Note: this currently only applies to @code{dm}, @code{dmda} and @code{heft} scheduling policies.
|
|
|
|
|
|
-@end table
|
|
|
-
|
|
|
@node STARPU_PREFETCH
|
|
|
@subsubsection @code{STARPU_PREFETCH} -- Use data prefetch
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
This variable indicates whether data prefetching should be enabled (0 means
|
|
|
that it is disabled). If prefetching is enabled, when a task is scheduled to be
|
|
|
executed e.g. on a GPU, StarPU will request an asynchronous transfer in
|
|
@@ -533,30 +390,20 @@ advance, so that data is already present on the GPU when the task starts. As a
|
|
|
result, computation and data transfers are overlapped.
|
|
|
Note that prefetching is enabled by default in StarPU.
|
|
|
|
|
|
-@end table
|
|
|
-
|
|
|
@node STARPU_SCHED_ALPHA
|
|
|
@subsubsection @code{STARPU_SCHED_ALPHA} -- Computation factor
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
To estimate the cost of a task StarPU takes into account the estimated
|
|
|
computation time (obtained thanks to performance models). The alpha factor is
|
|
|
the coefficient to be applied to it before adding it to the communication part.
|
|
|
|
|
|
-@end table
|
|
|
-
|
|
|
@node STARPU_SCHED_BETA
|
|
|
@subsubsection @code{STARPU_SCHED_BETA} -- Communication factor
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
To estimate the cost of a task StarPU takes into account the estimated
|
|
|
data transfer time (obtained thanks to performance models). The beta factor is
|
|
|
the coefficient to be applied to it before adding it to the computation part.
|
|
|
|
|
|
-@end table
|
|
|
-
|
|
|
@node Misc
|
|
|
@subsection Miscellaneous and debug
|
|
|
|
|
@@ -570,46 +417,31 @@ the coefficient to be applied to it before adding it to the computation part.
|
|
|
|
|
|
@node STARPU_SILENT
|
|
|
@subsubsection @code{STARPU_SILENT} -- Disable verbose mode
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
This variable allows to disable verbose mode at runtime when StarPU
|
|
|
has been configured with the option @code{--enable-verbose}.
|
|
|
-@end table
|
|
|
|
|
|
@node STARPU_LOGFILENAME
|
|
|
@subsubsection @code{STARPU_LOGFILENAME} -- Select debug file name
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}:
|
|
|
This variable specifies in which file the debugging output should be saved to.
|
|
|
-@end table
|
|
|
|
|
|
@node STARPU_FXT_PREFIX
|
|
|
@subsubsection @code{STARPU_FXT_PREFIX} -- FxT trace location
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}
|
|
|
This variable specifies in which directory to save the trace generated if FxT is enabled. It needs to have a trailing '/' character.
|
|
|
-@end table
|
|
|
|
|
|
@node STARPU_LIMIT_GPU_MEM
|
|
|
@subsubsection @code{STARPU_LIMIT_GPU_MEM} -- Restrict memory size on the GPUs
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}
|
|
|
This variable specifies the maximum number of megabytes that should be
|
|
|
available to the application on each GPUs. In case this value is smaller than
|
|
|
the size of the memory of a GPU, StarPU pre-allocates a buffer to waste memory
|
|
|
on the device. This variable is intended to be used for experimental purposes
|
|
|
as it emulates devices that have a limited amount of memory.
|
|
|
-@end table
|
|
|
|
|
|
@node STARPU_GENERATE_TRACE
|
|
|
@subsubsection @code{STARPU_GENERATE_TRACE} -- Generate a Paje trace when StarPU is shut down
|
|
|
-@table @asis
|
|
|
|
|
|
-@item @emph{Description}
|
|
|
When set to 1, this variable indicates that StarPU should automatically
|
|
|
generate a Paje trace when starpu_shutdown is called.
|
|
|
-@end table
|