configuration.texi 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. @c -*-texinfo-*-
  2. @c This file is part of the StarPU Handbook.
  3. @c Copyright (C) 2009--2011 Universit@'e de Bordeaux 1
  4. @c Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
  5. @c Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
  6. @c See the file starpu.texi for copying conditions.
  7. @node Configuring StarPU
  8. @chapter Configuring StarPU
  9. @menu
  10. * Compilation configuration::
  11. * Execution configuration through environment variables::
  12. @end menu
  13. @node Compilation configuration
  14. @section Compilation configuration
  15. The following arguments can be given to the @code{configure} script.
  16. @menu
  17. * Common configuration::
  18. * Configuring workers::
  19. * Advanced configuration::
  20. @end menu
  21. @node Common configuration
  22. @subsection Common configuration
  23. @menu
  24. * --enable-debug::
  25. * --enable-fast::
  26. * --enable-verbose::
  27. * --enable-coverage::
  28. @end menu
  29. @node --enable-debug
  30. @subsubsection @code{--enable-debug}
  31. @table @asis
  32. @item @emph{Description}:
  33. Enable debugging messages.
  34. @end table
  35. @node --enable-fast
  36. @subsubsection @code{--enable-fast}
  37. @table @asis
  38. @item @emph{Description}:
  39. Do not enforce assertions, saves a lot of time spent to compute them otherwise.
  40. @end table
  41. @node --enable-verbose
  42. @subsubsection @code{--enable-verbose}
  43. @table @asis
  44. @item @emph{Description}:
  45. Augment the verbosity of the debugging messages. This can be disabled
  46. at runtime by setting the environment variable @code{STARPU_SILENT} to
  47. any value.
  48. @smallexample
  49. % STARPU_SILENT=1 ./vector_scal
  50. @end smallexample
  51. @end table
  52. @node --enable-coverage
  53. @subsubsection @code{--enable-coverage}
  54. @table @asis
  55. @item @emph{Description}:
  56. Enable flags for the @code{gcov} coverage tool.
  57. @end table
  58. @node Configuring workers
  59. @subsection Configuring workers
  60. @menu
  61. * --enable-maxcpus::
  62. * --disable-cpu::
  63. * --enable-maxcudadev::
  64. * --disable-cuda::
  65. * --with-cuda-dir::
  66. * --with-cuda-include-dir::
  67. * --with-cuda-lib-dir::
  68. * --disable-cuda-memcpy-peer::
  69. * --enable-maxopencldev::
  70. * --disable-opencl::
  71. * --with-opencl-dir::
  72. * --with-opencl-include-dir::
  73. * --with-opencl-lib-dir::
  74. * --enable-gordon::
  75. * --with-gordon-dir::
  76. * --enable-maximplementations::
  77. @end menu
  78. @node --enable-maxcpus
  79. @subsubsection @code{--enable-maxcpus=<number>}
  80. @table @asis
  81. @item @emph{Description}:
  82. Defines the maximum number of CPU cores that StarPU will support, then
  83. available as the @code{STARPU_MAXCPUS} macro.
  84. @end table
  85. @node --disable-cpu
  86. @subsubsection @code{--disable-cpu}
  87. @table @asis
  88. @item @emph{Description}:
  89. Disable the use of CPUs of the machine. Only GPUs etc. will be used.
  90. @end table
  91. @node --enable-maxcudadev
  92. @subsubsection @code{--enable-maxcudadev=<number>}
  93. @table @asis
  94. @item @emph{Description}:
  95. Defines the maximum number of CUDA devices that StarPU will support, then
  96. available as the @code{STARPU_MAXCUDADEVS} macro.
  97. @end table
  98. @node --disable-cuda
  99. @subsubsection @code{--disable-cuda}
  100. @table @asis
  101. @item @emph{Description}:
  102. Disable the use of CUDA, even if a valid CUDA installation was detected.
  103. @end table
  104. @node --with-cuda-dir
  105. @subsubsection @code{--with-cuda-dir=<path>}
  106. @table @asis
  107. @item @emph{Description}:
  108. Specify the directory where CUDA is installed. This directory should notably contain
  109. @code{include/cuda.h}.
  110. @end table
  111. @node --with-cuda-include-dir
  112. @subsubsection @code{--with-cuda-include-dir=<path>}
  113. @table @asis
  114. @item @emph{Description}:
  115. Specify the directory where CUDA headers are installed. This directory should
  116. notably contain @code{cuda.h}. This defaults to @code{/include} appended to the
  117. value given to @code{--with-cuda-dir}.
  118. @end table
  119. @node --with-cuda-lib-dir
  120. @subsubsection @code{--with-cuda-lib-dir=<path>}
  121. @table @asis
  122. @item @emph{Description}:
  123. Specify the directory where the CUDA library is installed. This directory should
  124. notably contain the CUDA shared libraries (e.g. libcuda.so). This defaults to
  125. @code{/lib} appended to the value given to @code{--with-cuda-dir}.
  126. @end table
  127. @node --disable-cuda-memcpy-peer
  128. @subsubsection @code{--disable-cuda-memcpy-peer}
  129. @table @asis
  130. @item @emph{Description}
  131. Explicitely disables peer transfers when using CUDA 4.0
  132. @end table
  133. @node --enable-maxopencldev
  134. @subsubsection @code{--enable-maxopencldev=<number>}
  135. @table @asis
  136. @item @emph{Description}:
  137. Defines the maximum number of OpenCL devices that StarPU will support, then
  138. available as the @code{STARPU_MAXOPENCLDEVS} macro.
  139. @end table
  140. @node --disable-opencl
  141. @subsubsection @code{--disable-opencl}
  142. @table @asis
  143. @item @emph{Description}:
  144. Disable the use of OpenCL, even if the SDK is detected.
  145. @end table
  146. @node --with-opencl-dir
  147. @subsubsection @code{--with-opencl-dir=<path>}
  148. @table @asis
  149. @item @emph{Description}:
  150. Specify the location of the OpenCL SDK. This directory should notably contain
  151. @code{include/CL/cl.h} (or @code{include/OpenCL/cl.h} on Mac OS).
  152. @end table
  153. @node --with-opencl-include-dir
  154. @subsubsection @code{--with-opencl-include-dir=<path>}
  155. @table @asis
  156. @item @emph{Description}:
  157. Specify the location of OpenCL headers. This directory should notably contain
  158. @code{CL/cl.h} (or @code{OpenCL/cl.h} on Mac OS). This defaults to
  159. @code{/include} appended to the value given to @code{--with-opencl-dir}.
  160. @end table
  161. @node --with-opencl-lib-dir
  162. @subsubsection @code{--with-opencl-lib-dir=<path>}
  163. @table @asis
  164. @item @emph{Description}:
  165. Specify the location of the OpenCL library. This directory should notably
  166. contain the OpenCL shared libraries (e.g. libOpenCL.so). This defaults to
  167. @code{/lib} appended to the value given to @code{--with-opencl-dir}.
  168. @end table
  169. @node --enable-gordon
  170. @subsubsection @code{--enable-gordon}
  171. @table @asis
  172. @item @emph{Description}:
  173. Enable the use of the Gordon runtime for Cell SPUs.
  174. @c TODO: rather default to enabled when detected
  175. @end table
  176. @node --with-gordon-dir
  177. @subsubsection @code{--with-gordon-dir=<path>}
  178. @table @asis
  179. @item @emph{Description}:
  180. Specify the location of the Gordon SDK.
  181. @end table
  182. @node --enable-maximplementations
  183. @subsubsection @code{--enable-maximplementations=<number>}
  184. @table @asis
  185. @item @emph{Description}:
  186. Defines the number of implementations that can be defined for a single kind of
  187. device. It is then available as the @code{STARPU_MAXIMPLEMENTATIONS} macro.
  188. @end table
  189. @node Advanced configuration
  190. @subsection Advanced configuration
  191. @menu
  192. * --enable-perf-debug::
  193. * --enable-model-debug::
  194. * --enable-stats::
  195. * --enable-maxbuffers::
  196. * --enable-allocation-cache::
  197. * --enable-opengl-render::
  198. * --enable-blas-lib::
  199. * --with-magma::
  200. * --with-fxt::
  201. * --with-perf-model-dir::
  202. * --with-mpicc::
  203. * --with-goto-dir::
  204. * --with-atlas-dir::
  205. * --with-mkl-cflags::
  206. * --with-mkl-ldflags::
  207. @end menu
  208. @node --enable-perf-debug
  209. @subsubsection @code{--enable-perf-debug}
  210. @table @asis
  211. @item @emph{Description}:
  212. Enable performance debugging through gprof.
  213. @end table
  214. @node --enable-model-debug
  215. @subsubsection @code{--enable-model-debug}
  216. @table @asis
  217. @item @emph{Description}:
  218. Enable performance model debugging.
  219. @end table
  220. @node --enable-stats
  221. @subsubsection @code{--enable-stats}
  222. @table @asis
  223. @item @emph{Description}:
  224. Enable statistics.
  225. @end table
  226. @node --enable-maxbuffers
  227. @subsubsection @code{--enable-maxbuffers=<nbuffers>}
  228. @table @asis
  229. @item @emph{Description}:
  230. Define the maximum number of buffers that tasks will be able to take
  231. as parameters, then available as the @code{STARPU_NMAXBUFS} macro.
  232. @end table
  233. @node --enable-allocation-cache
  234. @subsubsection @code{--enable-allocation-cache}
  235. @table @asis
  236. @item @emph{Description}:
  237. Enable the use of a data allocation cache to avoid the cost of it with
  238. CUDA. Still experimental.
  239. @end table
  240. @node --enable-opengl-render
  241. @subsubsection @code{--enable-opengl-render}
  242. @table @asis
  243. @item @emph{Description}:
  244. Enable the use of OpenGL for the rendering of some examples.
  245. @c TODO: rather default to enabled when detected
  246. @end table
  247. @node --enable-blas-lib
  248. @subsubsection @code{--enable-blas-lib=<name>}
  249. @table @asis
  250. @item @emph{Description}:
  251. Specify the blas library to be used by some of the examples. The
  252. library has to be 'atlas' or 'goto'.
  253. @end table
  254. @node --with-magma
  255. @subsubsection @code{--with-magma=<path>}
  256. @table @asis
  257. @item @emph{Description}:
  258. Specify where magma is installed. This directory should notably contain
  259. @code{include/magmablas.h}.
  260. @end table
  261. @node --with-fxt
  262. @subsubsection @code{--with-fxt=<path>}
  263. @table @asis
  264. @item @emph{Description}:
  265. Specify the location of FxT (for generating traces and rendering them
  266. using ViTE). This directory should notably contain
  267. @code{include/fxt/fxt.h}.
  268. @c TODO add ref to other section
  269. @end table
  270. @node --with-perf-model-dir
  271. @subsubsection @code{--with-perf-model-dir=<dir>}
  272. @table @asis
  273. @item @emph{Description}:
  274. Specify where performance models should be stored (instead of defaulting to the
  275. current user's home).
  276. @end table
  277. @node --with-mpicc
  278. @subsubsection @code{--with-mpicc=<path to mpicc>}
  279. @table @asis
  280. @item @emph{Description}:
  281. Specify the location of the @code{mpicc} compiler to be used for starpumpi.
  282. @end table
  283. @node --with-goto-dir
  284. @subsubsection @code{--with-goto-dir=<dir>}
  285. @table @asis
  286. @item @emph{Description}:
  287. Specify the location of GotoBLAS.
  288. @end table
  289. @node --with-atlas-dir
  290. @subsubsection @code{--with-atlas-dir=<dir>}
  291. @table @asis
  292. @item @emph{Description}:
  293. Specify the location of ATLAS. This directory should notably contain
  294. @code{include/cblas.h}.
  295. @end table
  296. @node --with-mkl-cflags
  297. @subsubsection @code{--with-mkl-cflags=<cflags>}
  298. @table @asis
  299. @item @emph{Description}:
  300. Specify the compilation flags for the MKL Library.
  301. @end table
  302. @node --with-mkl-ldflags
  303. @subsubsection @code{--with-mkl-ldflags=<ldflags>}
  304. @table @asis
  305. @item @emph{Description}:
  306. Specify the linking flags for the MKL Library. Note that the
  307. @url{http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor/}
  308. website provides a script to determine the linking flags.
  309. @end table
  310. @node Execution configuration through environment variables
  311. @section Execution configuration through environment variables
  312. @menu
  313. * Workers:: Configuring workers
  314. * Scheduling:: Configuring the Scheduling engine
  315. * Misc:: Miscellaneous and debug
  316. @end menu
  317. Note: the values given in @code{starpu_conf} structure passed when
  318. calling @code{starpu_init} will override the values of the environment
  319. variables.
  320. @node Workers
  321. @subsection Configuring workers
  322. @menu
  323. * STARPU_NCPUS:: Number of CPU workers
  324. * STARPU_NCUDA:: Number of CUDA workers
  325. * STARPU_NOPENCL:: Number of OpenCL workers
  326. * STARPU_NGORDON:: Number of SPU workers (Cell)
  327. * STARPU_WORKERS_CPUID:: Bind workers to specific CPUs
  328. * STARPU_WORKERS_CUDAID:: Select specific CUDA devices
  329. * STARPU_WORKERS_OPENCLID:: Select specific OpenCL devices
  330. @end menu
  331. @node STARPU_NCPUS
  332. @subsubsection @code{STARPU_NCPUS} -- Number of CPU workers
  333. @table @asis
  334. @item @emph{Description}:
  335. Specify the number of CPU workers (thus not including workers dedicated to control acceleratores). Note that by default, StarPU will not allocate
  336. more CPU workers than there are physical CPUs, and that some CPUs are used to control
  337. the accelerators.
  338. @end table
  339. @node STARPU_NCUDA
  340. @subsubsection @code{STARPU_NCUDA} -- Number of CUDA workers
  341. @table @asis
  342. @item @emph{Description}:
  343. Specify the number of CUDA devices that StarPU can use. If
  344. @code{STARPU_NCUDA} is lower than the number of physical devices, it is
  345. possible to select which CUDA devices should be used by the means of the
  346. @code{STARPU_WORKERS_CUDAID} environment variable. By default, StarPU will
  347. create as many CUDA workers as there are CUDA devices.
  348. @end table
  349. @node STARPU_NOPENCL
  350. @subsubsection @code{STARPU_NOPENCL} -- Number of OpenCL workers
  351. @table @asis
  352. @item @emph{Description}:
  353. OpenCL equivalent of the @code{STARPU_NCUDA} environment variable.
  354. @end table
  355. @node STARPU_NGORDON
  356. @subsubsection @code{STARPU_NGORDON} -- Number of SPU workers (Cell)
  357. @table @asis
  358. @item @emph{Description}:
  359. Specify the number of SPUs that StarPU can use.
  360. @end table
  361. @node STARPU_WORKERS_CPUID
  362. @subsubsection @code{STARPU_WORKERS_CPUID} -- Bind workers to specific CPUs
  363. @table @asis
  364. @item @emph{Description}:
  365. Passing an array of integers (starting from 0) in @code{STARPU_WORKERS_CPUID}
  366. specifies on which logical CPU the different workers should be
  367. bound. For instance, if @code{STARPU_WORKERS_CPUID = "0 1 4 5"}, the first
  368. worker will be bound to logical CPU #0, the second CPU worker will be bound to
  369. logical CPU #1 and so on. Note that the logical ordering of the CPUs is either
  370. determined by the OS, or provided by the @code{hwloc} library in case it is
  371. available.
  372. Note that the first workers correspond to the CUDA workers, then come the
  373. OpenCL and the SPU, and finally the CPU workers. For example if
  374. we have @code{STARPU_NCUDA=1}, @code{STARPU_NOPENCL=1}, @code{STARPU_NCPUS=2}
  375. and @code{STARPU_WORKERS_CPUID = "0 2 1 3"}, the CUDA device will be controlled
  376. by logical CPU #0, the OpenCL device will be controlled by logical CPU #2, and
  377. the logical CPUs #1 and #3 will be used by the CPU workers.
  378. If the number of workers is larger than the array given in
  379. @code{STARPU_WORKERS_CPUID}, the workers are bound to the logical CPUs in a
  380. round-robin fashion: if @code{STARPU_WORKERS_CPUID = "0 1"}, the first and the
  381. third (resp. second and fourth) workers will be put on CPU #0 (resp. CPU #1).
  382. This variable is ignored if the @code{use_explicit_workers_bindid} flag of the
  383. @code{starpu_conf} structure passed to @code{starpu_init} is set.
  384. @end table
  385. @node STARPU_WORKERS_CUDAID
  386. @subsubsection @code{STARPU_WORKERS_CUDAID} -- Select specific CUDA devices
  387. @table @asis
  388. @item @emph{Description}:
  389. Similarly to the @code{STARPU_WORKERS_CPUID} environment variable, it is
  390. possible to select which CUDA devices should be used by StarPU. On a machine
  391. equipped with 4 GPUs, setting @code{STARPU_WORKERS_CUDAID = "1 3"} and
  392. @code{STARPU_NCUDA=2} specifies that 2 CUDA workers should be created, and that
  393. they should use CUDA devices #1 and #3 (the logical ordering of the devices is
  394. the one reported by CUDA).
  395. This variable is ignored if the @code{use_explicit_workers_cuda_gpuid} flag of
  396. the @code{starpu_conf} structure passed to @code{starpu_init} is set.
  397. @end table
  398. @node STARPU_WORKERS_OPENCLID
  399. @subsubsection @code{STARPU_WORKERS_OPENCLID} -- Select specific OpenCL devices
  400. @table @asis
  401. @item @emph{Description}:
  402. OpenCL equivalent of the @code{STARPU_WORKERS_CUDAID} environment variable.
  403. This variable is ignored if the @code{use_explicit_workers_opencl_gpuid} flag of
  404. the @code{starpu_conf} structure passed to @code{starpu_init} is set.
  405. @end table
  406. @node Scheduling
  407. @subsection Configuring the Scheduling engine
  408. @menu
  409. * STARPU_SCHED:: Scheduling policy
  410. * STARPU_CALIBRATE:: Calibrate performance models
  411. * STARPU_PREFETCH:: Use data prefetch
  412. * STARPU_SCHED_ALPHA:: Computation factor
  413. * STARPU_SCHED_BETA:: Communication factor
  414. @end menu
  415. @node STARPU_SCHED
  416. @subsubsection @code{STARPU_SCHED} -- Scheduling policy
  417. @table @asis
  418. @item @emph{Description}:
  419. This chooses between the different scheduling policies proposed by StarPU: work
  420. random, stealing, greedy, with performance models, etc.
  421. Use @code{STARPU_SCHED=help} to get the list of available schedulers.
  422. @end table
  423. @node STARPU_CALIBRATE
  424. @subsubsection @code{STARPU_CALIBRATE} -- Calibrate performance models
  425. @table @asis
  426. @item @emph{Description}:
  427. If this variable is set to 1, the performance models are calibrated during
  428. the execution. If it is set to 2, the previous values are dropped to restart
  429. calibration from scratch. Setting this variable to 0 disable calibration, this
  430. is the default behaviour.
  431. Note: this currently only applies to @code{dm}, @code{dmda} and @code{heft} scheduling policies.
  432. @end table
  433. @node STARPU_PREFETCH
  434. @subsubsection @code{STARPU_PREFETCH} -- Use data prefetch
  435. @table @asis
  436. @item @emph{Description}:
  437. This variable indicates whether data prefetching should be enabled (0 means
  438. that it is disabled). If prefetching is enabled, when a task is scheduled to be
  439. executed e.g. on a GPU, StarPU will request an asynchronous transfer in
  440. advance, so that data is already present on the GPU when the task starts. As a
  441. result, computation and data transfers are overlapped.
  442. Note that prefetching is enabled by default in StarPU.
  443. @end table
  444. @node STARPU_SCHED_ALPHA
  445. @subsubsection @code{STARPU_SCHED_ALPHA} -- Computation factor
  446. @table @asis
  447. @item @emph{Description}:
  448. To estimate the cost of a task StarPU takes into account the estimated
  449. computation time (obtained thanks to performance models). The alpha factor is
  450. the coefficient to be applied to it before adding it to the communication part.
  451. @end table
  452. @node STARPU_SCHED_BETA
  453. @subsubsection @code{STARPU_SCHED_BETA} -- Communication factor
  454. @table @asis
  455. @item @emph{Description}:
  456. To estimate the cost of a task StarPU takes into account the estimated
  457. data transfer time (obtained thanks to performance models). The beta factor is
  458. the coefficient to be applied to it before adding it to the computation part.
  459. @end table
  460. @node Misc
  461. @subsection Miscellaneous and debug
  462. @menu
  463. * STARPU_SILENT:: Disable verbose mode
  464. * STARPU_LOGFILENAME:: Select debug file name
  465. * STARPU_FXT_PREFIX:: FxT trace location
  466. * STARPU_LIMIT_GPU_MEM:: Restrict memory size on the GPUs
  467. * STARPU_GENERATE_TRACE:: Generate a Paje trace when StarPU is shut down
  468. @end menu
  469. @node STARPU_SILENT
  470. @subsubsection @code{STARPU_SILENT} -- Disable verbose mode
  471. @table @asis
  472. @item @emph{Description}:
  473. This variable allows to disable verbose mode at runtime when StarPU
  474. has been configured with the option @code{--enable-verbose}.
  475. @end table
  476. @node STARPU_LOGFILENAME
  477. @subsubsection @code{STARPU_LOGFILENAME} -- Select debug file name
  478. @table @asis
  479. @item @emph{Description}:
  480. This variable specifies in which file the debugging output should be saved to.
  481. @end table
  482. @node STARPU_FXT_PREFIX
  483. @subsubsection @code{STARPU_FXT_PREFIX} -- FxT trace location
  484. @table @asis
  485. @item @emph{Description}
  486. This variable specifies in which directory to save the trace generated if FxT is enabled. It needs to have a trailing '/' character.
  487. @end table
  488. @node STARPU_LIMIT_GPU_MEM
  489. @subsubsection @code{STARPU_LIMIT_GPU_MEM} -- Restrict memory size on the GPUs
  490. @table @asis
  491. @item @emph{Description}
  492. This variable specifies the maximum number of megabytes that should be
  493. available to the application on each GPUs. In case this value is smaller than
  494. the size of the memory of a GPU, StarPU pre-allocates a buffer to waste memory
  495. on the device. This variable is intended to be used for experimental purposes
  496. as it emulates devices that have a limited amount of memory.
  497. @end table
  498. @node STARPU_GENERATE_TRACE
  499. @subsubsection @code{STARPU_GENERATE_TRACE} -- Generate a Paje trace when StarPU is shut down
  500. @table @asis
  501. @item @emph{Description}
  502. When set to 1, this variable indicates that StarPU should automatically
  503. generate a Paje trace when starpu_shutdown is called.
  504. @end table