瀏覽代碼

doc/doxygen: improve doc

Nathalie Furmento 12 年之前
父節點
當前提交
fad14ebefa
共有 4 個文件被更改,包括 1041 次插入32 次删除
  1. 5 1
      doc/doxygen/Makefile.am
  2. 489 0
      doc/doxygen/chapters/configure_options.doxy
  3. 506 0
      doc/doxygen/chapters/environment_variables.doxy
  4. 41 31
      doc/doxygen/refman.tex

+ 5 - 1
doc/doxygen/Makefile.am

@@ -37,6 +37,8 @@ chapters =	\
 	chapters/scheduling_contexts.doxy \
 	chapters/socl_opencl_extensions.doxy \
 	chapters/tips_and_tricks.doxy \
+	chapters/environment_variables.doxy \
+	chapters/configure_options.doxy \
 	chapters/hello_pragma2.c \
 	chapters/hello_pragma.c \
 	chapters/api/codelet_and_tasks.doxy \
@@ -82,7 +84,7 @@ chapters/version.sty: $(chapters)
 		LC_ALL=C date --date=@`cat timestamp` +"%B %Y" > timestamp_updated_month 2>/dev/null;\
 	fi
 	@if test -s timestamp_updated ; then \
-		echo "\newcommand{\STARPUUPDATED}{" `cat timestamp_updated` "}" > $(top_srcdir)/doc/doxygen/chapters/version.sty;\
+		echo "\newcommand{\STARPUUPDATED}{"`cat timestamp_updated`"}" > $(top_srcdir)/doc/doxygen/chapters/version.sty;\
 	else \
 		echo "\newcommand{\STARPUUPDATED}{unknown_date}" > $(top_srcdir)/doc/doxygen/chapters/version.sty;\
 	fi
@@ -141,6 +143,8 @@ $(DOX_PDF): $(DOX_TAG)
 	cp $(top_srcdir)/doc/doxygen/chapters/version.sty $(DOX_DIR)
 	cd $(DOX_LATEX_DIR); \
 	rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \
+	sed -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//'  EnvironmentVariables.tex ;\
+	sed -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//'  ConfigureOptions.tex ;\
 	$(PDFLATEX) refman.tex; \
 	$(MAKEINDEX) refman.idx;\
 	$(PDFLATEX) refman.tex; \

+ 489 - 0
doc/doxygen/chapters/configure_options.doxy

@@ -0,0 +1,489 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \page ConfigureOptions Compilation Configuration
+
+The behavior of the StarPU library and tools may be tuned thanks to
+the following configure options.
+
+\section CommonConfiguration Common Configuration
+
+<dl>
+
+<dt>--enable-debug</dt>
+<dd>
+\anchor enable-debug
+\addindex __configure__--enable-debug
+Enable debugging messages.
+</dd>
+
+<dt>--enable-debug</dt>
+<dd>
+\anchor enable-debug
+\addindex __configure__--enable-debug
+Enable debugging messages.
+</dd>
+
+<dt>--enable-fast</dt>
+<dd>
+\anchor enable-fast
+\addindex __configure__--enable-fast
+Disable assertion checks, which saves computation time.
+</dd>
+
+<dt>--enable-verbose</dt>
+<dd>
+\anchor enable-verbose
+\addindex __configure__--enable-verbose
+Increase the verbosity of the debugging messages.  This can be disabled
+at runtime by setting the environment variable \ref STARPU_SILENT to
+any value.
+
+\verbatim
+$ STARPU_SILENT=1 ./vector_scal
+\endverbatim
+</dd>
+
+<dt>--enable-coverage</dt>
+<dd>
+\anchor enable-coverage
+\addindex __configure__--enable-coverage
+Enable flags for the coverage tool <c>gcov</c>.
+</dd>
+
+<dt>--enable-quick-check</dt>
+<dd>
+\anchor enable-quick-check
+\addindex __configure__--enable-quick-check
+Specify tests and examples should be run on a smaller data set, i.e
+allowing a faster execution time
+</dd>
+
+<dt>--enable-long-check</dt>
+<dd>
+\anchor enable-long-check
+\addindex __configure__--enable-long-check
+Enable some exhaustive checks which take a really long time.
+</dd>
+
+<dt>--with-hwloc</dt>
+<dd>
+\anchor with-hwloc
+\addindex __configure__--with-hwloc
+Specify hwloc should be used by StarPU. hwloc should be found by the
+means of the tool <c>pkg-config</c>.
+</dd>
+
+<dt>--with-hwloc=<c>prefix</c></dt>
+<dd>
+\anchor with-hwloc
+\addindex __configure__--with-hwloc
+Specify hwloc should be used by StarPU. hwloc should be found in the
+directory specified by <c>prefix</c>
+</dd>
+
+<dt>--without-hwloc</dt>
+<dd>
+\anchor without-hwloc
+\addindex __configure__--without-hwloc
+Specify hwloc should not be used by StarPU.
+</dd>
+
+<dt>--disable-build-doc</dt>
+<dd>
+\anchor disable-build-doc
+\addindex __configure__--disable-build-doc
+Disable the creation of the documentation. This should be done on a
+machine which does not have the tools <c>makeinfo</c> and <c>tex</c>.
+</dd>
+
+Additionally, the script <c>configure</c> recognize many variables, which
+can be listed by typing <c>./configure --help</c>. For example,
+<c>./configure NVCCFLAGS="-arch sm_13"</c> adds a flag for the compilation of
+CUDA kernels.
+
+</dl>
+
+\section ConfiguringWorkers Configuring Workers
+
+<dl>
+
+<dt>--enable-maxcpus=<c>count</c></dt>
+<dd>
+\anchor enable-maxcpus
+\addindex __configure__--enable-maxcpus
+Use at most <c>count</c> CPU cores.  This information is then
+available as the macro ::STARPU_MAXCPUS.
+</dd>
+
+<dt>--disable-cpu</dt>
+<dd>
+\anchor disable-cpu
+\addindex __configure__--disable-cpu
+Disable the use of CPUs of the machine. Only GPUs etc. will be used.
+</dd>
+
+<dt>--enable-maxcudadev=<c>count</c></dt>
+<dd>
+\anchor enable-maxcudadev
+\addindex __configure__--enable-maxcudadev
+Use at most <c>count</c> CUDA devices.  This information is then
+available as the macro ::STARPU_MAXCUDADEVS.
+</dd>
+
+<dt>--disable-cuda</dt>
+<dd>
+\anchor disable-cuda
+\addindex __configure__--disable-cuda
+Disable the use of CUDA, even if a valid CUDA installation was detected.
+</dd>
+
+<dt>--with-cuda-dir=<c>prefix</c></dt>
+<dd>
+\anchor with-cuda-dir
+\addindex __configure__--with-cuda-dir
+Search for CUDA under <c>prefix</c>, which should notably contain the file
+<c>include/cuda.h</c>.
+</dd>
+
+<dt>--with-cuda-include-dir=<c>dir</c></dt>
+<dd>
+\anchor with-cuda-include-dir
+\addindex __configure__--with-cuda-include-dir
+Search for CUDA headers under <c>dir</c>, which should
+notably contain the file <c>cuda.h</c>. This defaults to
+<c>/include</c> appended to the value given to \ref --with-cuda-dir.
+</dd>
+
+<dt>--with-cuda-lib-dir=<c>dir</c></dt>
+<dd>
+\anchor with-cuda-lib-dir
+\addindex __configure__--with-cuda-lib-dir
+Search for CUDA libraries under <c>dir</c>, which should notably contain
+the CUDA shared libraries---e.g., <c>libcuda.so</c>.  This defaults to
+<c>/lib</c> appended to the value given to \ref --with-cuda-dir.
+</dd>
+
+<dt>--disable-cuda-memcpy-peer</dt>
+<dd>
+\anchor disable-cuda-memcpy-peer
+\addindex __configure__--disable-cuda-memcpy-peer
+Explicitly disable peer transfers when using CUDA 4.0.
+</dd>
+
+<dt>--enable-maxopencldev=<c>count</c></dt>
+<dd>
+\anchor enable-maxopencldev
+\addindex __configure__--enable-maxopencldev
+Use at most <c>count</c> OpenCL devices.  This information is then
+available as the macro ::STARPU_MAXOPENCLDEVS.
+</dd>
+
+<dt>--disable-opencl</dt>
+<dd>
+\anchor disable-opencl
+\addindex __configure__--disable-opencl
+Disable the use of OpenCL, even if the SDK is detected.
+</dd>
+
+<dt>--with-opencl-dir=<c>prefix</c></dt>
+<dd>
+\anchor with-opencl-dir
+\addindex __configure__--with-opencl-dir
+Search for an OpenCL implementation under <c>prefix</c>, which should
+notably contain <c>include/CL/cl.h</c> (or <c>include/OpenCL/cl.h</c>
+on Mac OS).
+</dd>
+
+<dt>--with-opencl-include-dir=<c>dir</c></dt>
+<dd>
+\anchor with-opencl-include-dir
+\addindex __configure__--with-opencl-include-dir
+Search for OpenCL headers under <c>dir</c>, which should notably contain
+<c>CL/cl.h</c> (or <c>OpenCL/cl.h</c> on Mac OS).  This defaults to
+<c>/include</c> appended to the value given to \ref --with-opencl-dir.
+</dd>
+
+<dt>--with-opencl-lib-dir=<c>dir</c></dt>
+<dd>
+\anchor with-opencl-lib-dir
+\addindex __configure__--with-opencl-lib-dir
+Search for an OpenCL library under <c>dir</c>, which should notably
+contain the OpenCL shared libraries---e.g. <c>libOpenCL.so</c>. This defaults to
+<c>/lib</c> appended to the value given to \ref --with-opencl-dir.
+</dd>
+
+<dt>--enable-opencl-simulator</dt>
+<dd>
+\anchor enable-opencl-simulator
+\addindex __configure__--enable-opencl-simulator
+Enable considering the provided OpenCL implementation as a simulator, i.e. use
+the kernel duration returned by OpenCL profiling information as wallclock time
+instead of the actual measured real time. This requires simgrid support.
+</dd>
+
+<dt>--enable-maximplementations=<c>count</c></dt>
+<dd>
+\anchor enable-maximplementations
+\addindex __configure__--enable-maximplementations
+Allow for at most <c>count</c> codelet implementations for the same
+target device.  This information is then available as the
+macro ::STARPU_MAXIMPLEMENTATIONS macro.
+</dd>
+
+<dt>--enable-max-sched-ctxs=<c>count</c></dt>
+<dd>
+\anchor enable-max-sched-ctxs
+\addindex __configure__--enable-max-sched-ctxs
+Allow for at most <c>count</c> scheduling contexts
+This information is then available as the macro
+::STARPU_NMAX_SCHED_CTXS.
+</dd>
+
+<dt>--disable-asynchronous-copy</dt>
+<dd>
+\anchor disable-asynchronous-copy
+\addindex __configure__--disable-asynchronous-copy
+Disable asynchronous copies between CPU and GPU devices.
+The AMD implementation of OpenCL is known to
+fail when copying data asynchronously. When using this implementation,
+it is therefore necessary to disable asynchronous data transfers.
+</dd>
+
+<dt>--disable-asynchronous-cuda-copy</dt>
+<dd>
+\anchor disable-asynchronous-cuda-copy
+\addindex __configure__--disable-asynchronous-cuda-copy
+Disable asynchronous copies between CPU and CUDA devices.
+</dd>
+
+<dt>--disable-asynchronous-opencl-copy</dt>
+<dd>
+\anchor disable-asynchronous-opencl-copy
+\addindex __configure__--disable-asynchronous-opencl-copy
+Disable asynchronous copies between CPU and OpenCL devices.
+The AMD implementation of OpenCL is known to
+fail when copying data asynchronously. When using this implementation,
+it is therefore necessary to disable asynchronous data transfers.
+</dd>
+
+</dl>
+
+\section ExtensionConfiguration Extension Configuration
+
+<dl>
+
+<dt>--disable-socl</dt>
+<dd>
+\anchor disable-socl
+\addindex __configure__--disable-socl
+Disable the SOCL extension (\ref SOCL_OpenCL_Extensions).  By
+default, it is enabled when an OpenCL implementation is found.
+</dd>
+
+<dt>--disable-starpu-top</dt>
+<dd>
+\anchor disable-starpu-top
+\addindex __configure__--disable-starpu-top
+Disable the StarPU-Top interface (\ref StarPU-Top).  By default, it
+is enabled when the required dependencies are found.
+</dd>
+
+<dt>--disable-gcc-extensions</dt>
+<dd>
+\anchor disable-gcc-extensions
+\addindex __configure__--disable-gcc-extensions
+Disable the GCC plug-in (\ref C_Extensions).  By default, it is
+enabled when the GCC compiler provides a plug-in support.
+</dd>
+
+<dt>--with-mpicc=<c>path</c></dt>
+<dd>
+\anchor with-mpicc
+\addindex __configure__--with-mpicc
+Use the compiler <c>mpicc</c> at <c>path</c>, for StarPU-MPI.
+(\ref StarPU_MPI_support).
+</dd>
+
+<dt>--enable-mpi-progression-hook</dt>
+<dd>
+\anchor enable-mpi-progression-hook
+\addindex __configure__--enable-mpi-progression-hook
+Enable the activity polling method for StarPU-MPI.
+</dd>
+
+\section AdvancedConfiguration Advanced Configuration
+
+<dl>
+
+<dt>--enable-perf-debug</dt>
+<dd>
+\anchor enable-perf-debug
+\addindex __configure__--enable-perf-debug
+Enable performance debugging through gprof.
+</dd>
+
+<dt>--enable-model-debug</dt>
+<dd>
+\anchor enable-model-debug
+\addindex __configure__--enable-model-debug
+Enable performance model debugging.
+</dd>
+
+<dt>--enable-stats</dt>
+<dd>
+\anchor enable-stats
+\addindex __configure__--enable-stats
+(see ../../src/datawizard/datastats.c)
+Enable gathering of various data statistics (\ref Data_statistics).
+</dd>
+
+<dt>--enable-maxbuffers</dt>
+<dd>
+\anchor enable-maxbuffers
+\addindex __configure__--enable-maxbuffers
+Define the maximum number of buffers that tasks will be able to take
+as parameters, then available as the macro ::STARPU_NMAXBUFS.
+</dd>
+
+<dt>--enable-allocation-cache</dt>
+<dd>
+\anchor enable-allocation-cache
+\addindex __configure__--enable-allocation-cache
+Enable the use of a data allocation cache to avoid the cost of it with
+CUDA. Still experimental.
+</dd>
+
+<dt>--enable-opengl-render</dt>
+<dd>
+\anchor enable-opengl-render
+\addindex __configure__--enable-opengl-render
+Enable the use of OpenGL for the rendering of some examples.
+\internal
+TODO: rather default to enabled when detected
+\endinternal
+</dd>
+
+<dt>--enable-blas-lib</dt>
+<dd>
+\anchor enable-blas-lib
+\addindex __configure__--enable-blas-lib
+Specify the blas library to be used by some of the examples. The
+library has to be 'atlas' or 'goto'.
+</dd>
+
+<dt>--disable-starpufft</dt>
+<dd>
+\anchor disable-starpufft
+\addindex __configure__--disable-starpufft
+Disable the build of libstarpufft, even if fftw or cuFFT is available.
+</dd>
+
+<dt>--with-magma=<c>prefix</c></dt>
+<dd>
+\anchor with-magma
+\addindex __configure__--with-magma
+Search for MAGMA under <c>prefix</c>.  <c>prefix</c> should notably
+contain <c>include/magmablas.h</c>.
+</dd>
+
+<dt>--with-fxt=<c>prefix</c></dt>
+<dd>
+\anchor with-fxt
+\addindex __configure__--with-fxt
+Search for FxT under <c>prefix</c>.
+FxT (http://savannah.nongnu.org/projects/fkt) is used to generate
+traces of scheduling events, which can then be rendered them using ViTE
+(\ref off-line_performance_feedback).  <c>prefix</c> should
+notably contain <c>include/fxt/fxt.h</c>.
+</dd>
+
+<dt>--with-perf-model-dir=<c>dir</c></dt>
+<dd>
+\anchor with-perf-model-dir
+\addindex __configure__--with-perf-model-dir
+Store performance models under <c>dir</c>, instead of the current user's
+home.
+</dd>
+
+<dt>--with-goto-dir=<c>prefix</c></dt>
+<dd>
+\anchor with-goto-dir
+\addindex __configure__--with-goto-dir
+Search for GotoBLAS under <c>prefix</c>, which should notably contain
+<c>libgoto.so</c> or <c>libgoto2.so</c>.
+</dd>
+
+<dt>--with-atlas-dir=<c>prefix</c></dt>
+<dd>
+\anchor with-atlas-dir
+\addindex __configure__--with-atlas-dir
+Search for ATLAS under <c>prefix</c>, which should notably contain
+<c>include/cblas.h</c>.
+</dd>
+
+<dt>--with-mkl-cflags=<c>cflags</c></dt>
+<dd>
+\anchor with-mkl-cflags
+\addindex __configure__--with-mkl-cflags
+Use <c>cflags</c> to compile code that uses the MKL library.
+</dd>
+
+<dt>--with-mkl-ldflags=<c>ldflags</c></dt>
+<dd>
+\anchor with-mkl-ldflags
+\addindex __configure__--with-mkl-ldflags
+Use <c>ldflags</c> when linking code that uses the MKL library.  Note
+that the MKL website
+(http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor/)
+provides a script to determine the linking flags.
+</dd>
+
+<dt>--disable-build-examples</dt>
+<dd>
+\anchor disable-build-examples
+\addindex __configure__--disable-build-examples
+Disable the build of examples.
+</dd>
+
+
+<dt>--enable-sc-hypervisor</dt>
+<dd>
+\anchor enable-sc-hypervisor
+\addindex __configure__--enable-sc-hypervisor
+Enable the Scheduling Context Hypervisor plugin(\ref Scheduling_Context_Hypervisor).
+By default, it is disabled.
+</dd>
+
+<dt>--enable-memory-stats</dt>
+<dd>
+\anchor enable-memory-stats
+\addindex __configure__--enable-memory-stats
+Enable memory statistics (\ref Memory_feedback).
+</dd>
+
+<dt>--enable-simgrid</dt>
+<dd>
+\anchor enable-simgrid
+\addindex __configure__--enable-simgrid
+Enable simulation of execution in simgrid, to allow easy experimentation with
+various numbers of cores and GPUs, or amount of memory, etc. Experimental.
+
+The path to simgrid can be specified through the <c>SIMGRID_CFLAGS</c> and
+<c>SIMGRID_LIBS</c> environment variables, for instance:
+
+\verbatim
+export SIMGRID_CFLAGS="-I/usr/local/simgrid/include"
+export SIMGRID_LIBS="-L/usr/local/simgrid/lib -lsimgrid"
+\endverbatim
+
+</dd>
+
+</dl>
+
+*/

+ 506 - 0
doc/doxygen/chapters/environment_variables.doxy

@@ -0,0 +1,506 @@
+/*
+ * This file is part of the StarPU Handbook.
+ * Copyright (C) 2009--2011  Universit@'e de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
+ * See the file version.doxy for copying conditions.
+ */
+
+/*! \page EnvironmentVariables Execution Configuration Through Environment Variables
+
+The behavior of the StarPU library and tools may be tuned thanks to
+the following environment variables.
+
+\section ConfiguringWorkers Configuring Workers
+
+<dl>
+
+<dt>STARPU_NCPU</dt>
+<dd>
+\anchor STARPU_NCPU
+\addindex __env__STARPU_NCPU
+Specify the number of CPU workers (thus not including workers
+dedicated to control accelerators). Note that by default, StarPU will
+not allocate more CPU workers than there are physical CPUs, and that
+some CPUs are used to control the accelerators.
+</dd>
+
+<dt>STARPU_NCPUS</dt>
+<dd>
+\anchor STARPU_NCPUS
+\addindex __env__STARPU_NCPUS
+This variable is deprecated. You should use \ref STARPU_NCPU.
+</dd>
+
+<dt>STARPU_NCUDA</dt>
+<dd>
+\anchor STARPU_NCUDA
+\addindex __env__STARPU_NCUDA
+Specify the number of CUDA devices that StarPU can use. If
+\ref STARPU_NCUDA is lower than the number of physical devices, it is
+possible to select which CUDA devices should be used by the means of the
+environment variable \ref STARPU_WORKERS_CUDAID. By default, StarPU will
+create as many CUDA workers as there are CUDA devices.
+</dd>
+
+<dt>STARPU_NOPENCL</dt>
+<dd>
+\anchor STARPU_NOPENCL
+\addindex __env__STARPU_NOPENCL
+OpenCL equivalent of the environment variable \ref STARPU_NCUDA.
+</dd>
+
+<dt>STARPU_OPENCL_ON_CPUS</dt>
+<dd>
+\anchor STARPU_OPENCL_ON_CPUS
+\addindex __env__STARPU_OPENCL_ON_CPUS
+By default, the OpenCL driver only enables GPU and accelerator
+devices. By setting the environment variable \ref
+STARPU_OPENCL_ON_CPUS to 1, the OpenCL driver will also enable CPU
+devices.
+</dd>
+
+<dt>STARPU_OPENCL_ONLY_ON_CPUS</dt>
+<dd>
+\anchor STARPU_OPENCL_ONLY_ON_CPUS
+\addindex __env__STARPU_OPENCL_ONLY_ON_CPUS
+By default, the OpenCL driver enables GPU and accelerator
+devices. By setting the environment variable \ref
+STARPU_OPENCL_ONLY_ON_CPUS to 1, the OpenCL driver will ONLY enable
+CPU devices.
+</dd>
+
+<dt>STARPU_WORKERS_NOBIND</dt>
+<dd>
+\anchor STARPU_WORKERS_NOBIND
+\addindex __env__STARPU_WORKERS_NOBIND
+Setting it to non-zero will prevent StarPU from binding its threads to
+CPUs. This is for instance useful when running the testsuite in parallel.
+</dd>
+
+<dt>STARPU_WORKERS_CPUID</dt>
+<dd>
+\anchor STARPU_WORKERS_CPUID
+\addindex __env__STARPU_WORKERS_CPUID
+Passing an array of integers (starting from 0) in \ref STARPU_WORKERS_CPUID
+specifies on which logical CPU the different workers should be
+bound. For instance, if <c>STARPU_WORKERS_CPUID = "0 1 4 5"</c>, the first
+worker will be bound to logical CPU #0, the second CPU worker will be bound to
+logical CPU #1 and so on.  Note that the logical ordering of the CPUs is either
+determined by the OS, or provided by the library <c>hwloc</c> in case it is
+available.
+
+Note that the first workers correspond to the CUDA workers, then come the
+OpenCL workers, and finally the CPU workers. For example if
+we have <c>STARPU_NCUDA=1</c>, <c>STARPU_NOPENCL=1</c>, <c>STARPU_NCPU=2</c>
+and <c>STARPU_WORKERS_CPUID = "0 2 1 3"</c>, the CUDA device will be controlled
+by logical CPU #0, the OpenCL device will be controlled by logical CPU #2, and
+the logical CPUs #1 and #3 will be used by the CPU workers.
+
+If the number of workers is larger than the array given in \ref
+STARPU_WORKERS_CPUID, the workers are bound to the logical CPUs in a
+round-robin fashion: if <c>STARPU_WORKERS_CPUID = "0 1"</c>, the first
+and the third (resp. second and fourth) workers will be put on CPU #0
+(resp. CPU #1).
+
+This variable is ignored if the field
+starpu_conf::use_explicit_workers_bindid passed to starpu_init() is
+set.
+
+</dd>
+
+<dt>STARPU_WORKERS_CUDAID</dt>
+<dd>
+\anchor STARPU_WORKERS_CUDAID
+\addindex __env__STARPU_WORKERS_CUDAID
+Similarly to the \ref STARPU_WORKERS_CPUID environment variable, it is
+possible to select which CUDA devices should be used by StarPU. On a machine
+equipped with 4 GPUs, setting <c>STARPU_WORKERS_CUDAID = "1 3"</c> and
+<c>STARPU_NCUDA=2</c> specifies that 2 CUDA workers should be created, and that
+they should use CUDA devices #1 and #3 (the logical ordering of the devices is
+the one reported by CUDA).
+
+This variable is ignored if the field
+starpu_conf::use_explicit_workers_cuda_gpuid passed to starpu_init()
+is set.
+</dd>
+
+<dt>STARPU_WORKERS_OPENCLID</dt>
+<dd>
+\anchor STARPU_WORKERS_OPENCLID
+\addindex __env__STARPU_WORKERS_OPENCLID
+OpenCL equivalent of the \ref STARPU_WORKERS_CUDAID environment variable.
+
+This variable is ignored if the field
+starpu_conf::use_explicit_workers_opencl_gpuid passed to starpu_init()
+is set.
+</dd>
+
+<dt>STARPU_SINGLE_COMBINED_WORKER</dt>
+<dd>
+\anchor STARPU_SINGLE_COMBINED_WORKER
+\addindex __env__STARPU_SINGLE_COMBINED_WORKER
+If set, StarPU will create several workers which won't be able to work
+concurrently. It will by default create combined workers which size goes from 1
+to the total number of CPU workers in the system. \ref STARPU_MIN_WORKERSIZE
+and \ref STARPU_MAX_WORKERSIZE can be used to change this default.
+</dd>
+
+<dt>STARPU_MIN_WORKERSIZE</dt>
+<dd>
+\anchor STARPU_MIN_WORKERSIZE
+\addindex __env__STARPU_MIN_WORKERSIZE
+When \ref STARPU_SINGLE_COMBINED_WORKER is set, \ref STARPU_MIN_WORKERSIZE
+permits to specify the minimum size of the combined workers (instead of the default 1)
+</dd>
+
+<dt>STARPU_MAX_WORKERSIZE</dt>
+<dd>
+\anchor STARPU_MAX_WORKERSIZE
+\addindex __env__STARPU_MAX_WORKERSIZE
+When \ref STARPU_SINGLE_COMBINED_WORKER is set, \ref STARPU_MAX_WORKERSIZE
+permits to specify the minimum size of the combined workers (instead of the
+number of CPU workers in the system)
+</dd>
+
+<dt>STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER</dt>
+<dd>
+\anchor STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER
+\addindex __env__STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER
+Let the user decide how many elements are allowed between combined workers
+created from hwloc information. For instance, in the case of sockets with 6
+cores without shared L2 caches, if \ref SYNTHESIZE_ARITY_COMBINED_WORKER is
+set to 6, no combined worker will be synthesized beyond one for the socket
+and one per core. If it is set to 3, 3 intermediate combined workers will be
+synthesized, to divide the socket cores into 3 chunks of 2 cores. If it set to
+2, 2 intermediate combined workers will be synthesized, to divide the the socket
+cores into 2 chunks of 3 cores, and then 3 additional combined workers will be
+synthesized, to divide the former synthesized workers into a bunch of 2 cores,
+and the remaining core (for which no combined worker is synthesized since there
+is already a normal worker for it).
+
+The default, 2, thus makes StarPU tend to building a binary trees of combined
+workers.
+</dd>
+
+<dt>STARPU_DISABLE_ASYNCHRONOUS_COPY</dt>
+<dd>
+\anchor STARPU_DISABLE_ASYNCHRONOUS_COPY
+\addindex __env__STARPU_DISABLE_ASYNCHRONOUS_COPY
+Disable asynchronous copies between CPU and GPU devices.
+The AMD implementation of OpenCL is known to
+fail when copying data asynchronously. When using this implementation,
+it is therefore necessary to disable asynchronous data transfers.
+</dd>
+
+<dt>STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY</dt>
+<dd>
+\anchor STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY
+\addindex __env__STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY
+Disable asynchronous copies between CPU and CUDA devices.
+</dd>
+
+<dt>STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY</dt>
+<dd>
+\anchor STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY
+\addindex __env__STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY
+Disable asynchronous copies between CPU and OpenCL devices.
+The AMD implementation of OpenCL is known to
+fail when copying data asynchronously. When using this implementation,
+it is therefore necessary to disable asynchronous data transfers.
+</dd>
+
+<dt>STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY</dt>
+<dd>
+\anchor STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY
+\addindex __env__STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY
+Disable asynchronous copies between CPU and MIC devices.
+</dd>
+
+<dt>STARPU_DISABLE_CUDA_GPU_GPU_DIRECT</dt>
+<dd>
+\anchor STARPU_DISABLE_CUDA_GPU_GPU_DIRECT
+\addindex __env__STARPU_DISABLE_CUDA_GPU_GPU_DIRECT
+Disable direct CUDA transfers from GPU to GPU, and let CUDA copy through RAM
+instead. This permits to test the performance effect of GPU-Direct.
+</dd>
+
+</dl>
+
+\section ConfiguringtheSchedulingengine Configuring the Scheduling engine
+
+<dl>
+
+<dt>STARPU_SCHED</dt>
+<dd>
+\anchor STARPU_SCHED
+\addindex __env__STARPU_SCHED
+Choose between the different scheduling policies proposed by StarPU: work
+random, stealing, greedy, with performance models, etc.
+
+Use <c>STARPU_SCHED=help</c> to get the list of available schedulers.
+</dd>
+
+<dt>STARPU_CALIBRATE</dt>
+<dd>
+\anchor STARPU_CALIBRATE
+\addindex __env__STARPU_CALIBRATE
+If this variable is set to 1, the performance models are calibrated during
+the execution. If it is set to 2, the previous values are dropped to restart
+calibration from scratch. Setting this variable to 0 disable calibration, this
+is the default behaviour.
+
+Note: this currently only applies to <c>dm</c> and <c>dmda</c> scheduling policies.
+</dd>
+
+<dt>STARPU_BUS_CALIBRATE</dt>
+<dd>
+\anchor STARPU_BUS_CALIBRATE
+\addindex __env__STARPU_BUS_CALIBRATE
+If this variable is set to 1, the bus is recalibrated during intialization.
+</dd>
+
+<dt>STARPU_PREFETCH</dt>
+<dd>
+\anchor STARPU_PREFETCH
+\addindex __env__STARPU_PREFETCH
+This variable indicates whether data prefetching should be enabled (0 means
+that it is disabled). If prefetching is enabled, when a task is scheduled to be
+executed e.g. on a GPU, StarPU will request an asynchronous transfer in
+advance, so that data is already present on the GPU when the task starts. As a
+result, computation and data transfers are overlapped.
+Note that prefetching is enabled by default in StarPU.
+</dd>
+
+<dt>STARPU_SCHED_ALPHA</dt>
+<dd>
+\anchor STARPU_SCHED_ALPHA
+\addindex __env__STARPU_SCHED_ALPHA
+To estimate the cost of a task StarPU takes into account the estimated
+computation time (obtained thanks to performance models). The alpha factor is
+the coefficient to be applied to it before adding it to the communication part.
+</dd>
+
+<dt>STARPU_SCHED_BETA</dt>
+<dd>
+\anchor STARPU_SCHED_BETA
+\addindex __env__STARPU_SCHED_BETA
+To estimate the cost of a task StarPU takes into account the estimated
+data transfer time (obtained thanks to performance models). The beta factor is
+the coefficient to be applied to it before adding it to the computation part.
+</dd>
+
+<dt>STARPU_SCHED_GAMMA</dt>
+<dd>
+\anchor STARPU_SCHED_GAMMA
+\addindex __env__STARPU_SCHED_GAMMA
+Define the execution time penalty of a joule (\ref Power-based_scheduling).
+</dd>
+
+<dt>STARPU_IDLE_POWER</dt>
+<dd>
+\anchor STARPU_IDLE_POWER
+\addindex __env__STARPU_IDLE_POWER
+Define the idle power of the machine (\ref Power-based_scheduling).
+</dd>
+
+<dt>STARPU_PROFILING</dt>
+<dd>
+\anchor STARPU_PROFILING
+\addindex __env__STARPU_PROFILING
+Enable on-line performance monitoring (\ref Enabling_on-line_performance_monitoring).
+</dd>
+
+</dl>
+
+\section Extensions Extensions
+
+<dl>
+
+<dt>SOCL_OCL_LIB_OPENCL</dt>
+<dd>
+\anchor SOCL_OCL_LIB_OPENCL
+\addindex __env__SOCL_OCL_LIB_OPENCL
+THE SOCL test suite is only run when the environment variable \ref
+SOCL_OCL_LIB_OPENCL is defined. It should contain the location
+of the libOpenCL.so file of the OCL ICD implementation.
+</dd>
+
+<dt>STARPU_COMM_STATS</dt>
+<dd>
+\anchor STARPU_COMM_STATS
+\addindex __env__STARPU_COMM_STATS
+Communication statistics for starpumpi (\ref StarPU_MPI_support)
+will be enabled when the environment variable \ref STARPU_COMM_STATS
+is defined to an value other than 0.
+</dd>
+
+<dt>STARPU_MPI_CACHE</dt>
+<dd>
+\anchor STARPU_MPI_CACHE
+\addindex __env__STARPU_MPI_CACHE
+Communication cache for starpumpi (\ref StarPU_MPI_support) will be
+disabled when the environment variable \ref STARPU_MPI_CACHE is set
+to 0. It is enabled by default or for any other values of the variable
+\ref STARPU_MPI_CACHE.
+</dd>
+
+</dl>
+
+\section Miscellaneousanddebug Miscellaneous and debug
+
+<dl>
+
+<dt>STARPU_HOME</dt>
+<dd>
+\anchor STARPU_HOME
+\addindex __env__STARPU_HOME
+This specifies the main directory in which StarPU stores its
+configuration files. The default is <c>$HOME</c> on Unix environments,
+and <c>$USERPROFILE</c> on Windows environments.
+</dd>
+
+<dt>STARPU_HOSTNAME</dt>
+<dd>
+\anchor STARPU_HOSTNAME
+\addindex __env__STARPU_HOSTNAME
+When set, force the hostname to be used when dealing performance model
+files. Models are indexed by machine name. When running for example on
+a homogenenous cluster, it is possible to share the models between
+machines by setting <c>export STARPU_HOSTNAME=some_global_name</c>.
+</dd>
+
+<dt>STARPU_OPENCL_PROGRAM_DIR</dt>
+<dd>
+\anchor STARPU_OPENCL_PROGRAM_DIR
+\addindex __env__STARPU_OPENCL_PROGRAM_DIR
+This specifies the directory where the OpenCL codelet source files are
+located. The function starpu_opencl_load_program_source() looks
+for the codelet in the current directory, in the directory specified
+by the environment variable \ref STARPU_OPENCL_PROGRAM_DIR, in the
+directory <c>share/starpu/opencl</c> of the installation directory of
+StarPU, and finally in the source directory of StarPU.
+</dd>
+
+<dt>STARPU_SILENT</dt>
+<dd>
+\anchor STARPU_SILENT
+\addindex __env__STARPU_SILENT
+This variable allows to disable verbose mode at runtime when StarPU
+has been configured with the option \ref enable-verbose. It also
+disables the display of StarPU information and warning messages.
+</dd>
+
+<dt>STARPU_LOGFILENAME</dt>
+<dd>
+\anchor STARPU_LOGFILENAME
+\addindex __env__STARPU_LOGFILENAME
+This variable specifies in which file the debugging output should be saved to.
+</dd>
+
+<dt>STARPU_FXT_PREFIX</dt>
+<dd>
+\anchor STARPU_FXT_PREFIX
+\addindex __env__STARPU_FXT_PREFIX
+This variable specifies in which directory to save the trace generated if FxT is enabled. It needs to have a trailing '/' character.
+</dd>
+
+<dt>STARPU_LIMIT_CUDA_devid_MEM</dt>
+<dd>
+\anchor STARPU_LIMIT_CUDA_devid_MEM
+\addindex __env__STARPU_LIMIT_CUDA_devid_MEM
+This variable specifies the maximum number of megabytes that should be
+available to the application on the CUDA device with the identifier
+<c>devid</c>. This variable is intended to be used for experimental
+purposes as it emulates devices that have a limited amount of memory.
+When defined, the variable overwrites the value of the variable
+\ref STARPU_LIMIT_CUDA_MEM.
+</dd>
+
+<dt>STARPU_LIMIT_CUDA_MEM</dt>
+<dd>
+\anchor STARPU_LIMIT_CUDA_MEM
+\addindex __env__STARPU_LIMIT_CUDA_MEM
+This variable specifies the maximum number of megabytes that should be
+available to the application on each CUDA devices. This variable is
+intended to be used for experimental purposes as it emulates devices
+that have a limited amount of memory.
+</dd>
+
+<dt>STARPU_LIMIT_OPENCL_devid_MEM</dt>
+<dd>
+\anchor STARPU_LIMIT_OPENCL_devid_MEM
+\addindex __env__STARPU_LIMIT_OPENCL_devid_MEM
+This variable specifies the maximum number of megabytes that should be
+available to the application on the OpenCL device with the identifier
+<c>devid</c>. This variable is intended to be used for experimental
+purposes as it emulates devices that have a limited amount of memory.
+When defined, the variable overwrites the value of the variable
+\ref STARPU_LIMIT_OPENCL_MEM.
+</dd>
+
+<dt>STARPU_LIMIT_OPENCL_MEM</dt>
+<dd>
+\anchor STARPU_LIMIT_OPENCL_MEM
+\addindex __env__STARPU_LIMIT_OPENCL_MEM
+This variable specifies the maximum number of megabytes that should be
+available to the application on each OpenCL devices. This variable is
+intended to be used for experimental purposes as it emulates devices
+that have a limited amount of memory.
+</dd>
+
+<dt>STARPU_LIMIT_CPU_MEM</dt>
+<dd>
+\anchor STARPU_LIMIT_CPU_MEM
+\addindex __env__STARPU_LIMIT_CPU_MEM
+This variable specifies the maximum number of megabytes that should be
+available to the application on each CPU device. This variable is
+intended to be used for experimental purposes as it emulates devices
+that have a limited amount of memory.
+</dd>
+
+<dt>STARPU_GENERATE_TRACE</dt>
+<dd>
+\anchor STARPU_GENERATE_TRACE
+\addindex __env__STARPU_GENERATE_TRACE
+When set to <c>1</c>, this variable indicates that StarPU should automatically
+generate a Paje trace when starpu_shutdown() is called.
+</dd>
+
+<dt>STARPU_MEMORY_STATS</dt>
+<dd>
+\anchor STARPU_MEMORY_STATS
+\addindex __env__STARPU_MEMORY_STATS
+When set to 0, disable the display of memory statistics on data which
+have not been unregistered at the end of the execution (\ref Memory_feedback).
+</dd>
+
+<dt>STARPU_BUS_STATS</dt>
+<dd>
+\anchor STARPU_BUS_STATS
+\addindex __env__STARPU_BUS_STATS
+When defined, statistics about data transfers will be displayed when calling
+starpu_shutdown() (\ref Profiling).
+</dd>
+
+<dt>STARPU_WORKER_STATS</dt>
+<dd>
+\anchor STARPU_WORKER_STATS
+\addindex __env__STARPU_WORKER_STATS
+When defined, statistics about the workers will be displayed when calling
+starpu_shutdown() (\ref Profiling). When combined with the
+environment variable \ref STARPU_PROFILING, it displays the power
+consumption (\ref Power-based_scheduling).
+</dd>
+
+<dt>STARPU_STATS</dt>
+<dd>
+\anchor STARPU_STATS
+\addindex __env__STARPU_STATS
+When set to 0, data statistics will not be displayed at the
+end of the execution of an application (\ref Data_statistics).
+</dd>
+
+</dl>
+
+*/

+ 41 - 31
doc/doxygen/refman.tex

@@ -161,46 +161,56 @@ Documentation License”.
 
 \part{Inside StarPU}
 
+\chapter{Execution Configuration Through Environment Variables}
+\label{EnvironmentVariables}
+\hypertarget{EnvironmentVariables}{}
+\input{EnvironmentVariables}
+
+\chapter{Compilation Configuration}
+\label{ConfigureOptions}
+\hypertarget{ConfigureOptions}{}
+\input{ConfigureOptions}
+
 \chapter{Module Index}
 \input{modules}
 
 \chapter{Module Documentation a.k.a StarPU's API}
 
-\input{group__Versioning}
-\input{group__Initialization__and__Termination}
-\input{group__Standard__Memory__Library}
-\input{group__Workers__Properties}
-\input{group__Data__Management}
-\input{group__Data__Interfaces}
-\input{group__Data__Partition}
-\input{group__Multiformat__Data__Interface}
-\input{group__Codelet__And__Tasks}
-\input{group__Insert__Task}
-\input{group__Explicit__Dependencies}
-\input{group__Implicit__Data__Dependencies}
-\input{group__Performance__Model}
-\input{group__Profiling}
-\input{group__Theoretical__lower__bound__on__execution__time}
-\input{group__CUDA__Extensions}
-\input{group__OpenCL__Extensions}
+\input{group__API__Versioning}
+\input{group__API__Initialization__and__Termination}
+\input{group__API__Standard__Memory__Library}
+\input{group__API__Workers__Properties}
+\input{group__API__Data__Management}
+\input{group__API__Data__Interfaces}
+\input{group__API__Data__Partition}
+\input{group__API__Multiformat__Data__Interface}
+\input{group__API__Codelet__And__Tasks}
+\input{group__API__Insert__Task}
+\input{group__API__Explicit__Dependencies}
+\input{group__API__Implicit__Data__Dependencies}
+\input{group__API__Performance__Model}
+\input{group__API__Profiling}
+\input{group__API__Theoretical__Lower__Bound__on__Execution__Time}
+\input{group__API__CUDA__Extensions}
+\input{group__API__OpenCL__Extensions}
 
 %\input{group__MIC__Extensions}
 %\input{group__SCC__Extensions}
 
-\input{group__Miscellaneous__helpers}
-\input{group__FxT__Support}
-\input{group__FFT__Support}
-\input{group__MPI__Support}
-\input{group__Task__Bundles}
-\input{group__Task__Lists}
-\input{group__Parallel__Tasks}
-\input{group__Running__Drivers}
-\input{group__Expert__Mode}
-\input{group__StarPU-Top__Interface}
-
-\input{group__Scheduling__Contexts}
-\input{group__Scheduling__Policy}
-\input{group__Scheduling__Context__Hypervisor}
+\input{group__API__Miscellaneous__Helpers}
+\input{group__API__FxT__Support}
+\input{group__API__FFT__Support}
+\input{group__API__MPI__Support}
+\input{group__API__Task__Bundles}
+\input{group__API__Task__Lists}
+\input{group__API__Parallel__Tasks}
+\input{group__API__Running__Drivers}
+\input{group__API__Expert__Mode}
+\input{group__API__StarPU-Top__Interface}
+
+\input{group__API__Scheduling__Contexts}
+\input{group__API__Scheduling__Policy}
+\input{group__API__Scheduling__Context__Hypervisor}
 
 \chapter{Deprecated List}
 \label{deprecated}