starpu.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2017 Inria
  4. * Copyright (C) 2009-2014,2016-2019 Université de Bordeaux
  5. * Copyright (C) 2010-2015,2017,2019 CNRS
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #ifndef __STARPU_H__
  19. #define __STARPU_H__
  20. #include <stdlib.h>
  21. #ifndef _MSC_VER
  22. #include <stdint.h>
  23. #else
  24. #include <windows.h>
  25. typedef unsigned char uint8_t;
  26. typedef unsigned short uint16_t;
  27. typedef unsigned int uint32_t;
  28. typedef unsigned long long uint64_t;
  29. typedef UINT_PTR uintptr_t;
  30. typedef char int8_t;
  31. typedef short int16_t;
  32. typedef int int32_t;
  33. typedef long long int64_t;
  34. typedef INT_PTR intptr_t;
  35. #endif
  36. #include <starpu_config.h>
  37. #ifdef STARPU_HAVE_WINDOWS
  38. #include <windows.h>
  39. #endif
  40. #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
  41. #include <starpu_opencl.h>
  42. #endif
  43. #include <starpu_thread.h>
  44. #include <starpu_thread_util.h>
  45. #include <starpu_util.h>
  46. #include <starpu_data.h>
  47. #include <starpu_helper.h>
  48. #include <starpu_disk.h>
  49. #include <starpu_data_interfaces.h>
  50. #include <starpu_data_filters.h>
  51. #include <starpu_stdlib.h>
  52. #include <starpu_task_bundle.h>
  53. #include <starpu_task_dep.h>
  54. #include <starpu_task.h>
  55. #include <starpu_worker.h>
  56. #include <starpu_perfmodel.h>
  57. #include <starpu_worker.h>
  58. #ifndef BUILDING_STARPU
  59. #include <starpu_task_list.h>
  60. #endif
  61. #include <starpu_task_util.h>
  62. #include <starpu_scheduler.h>
  63. #include <starpu_sched_ctx.h>
  64. #include <starpu_expert.h>
  65. #include <starpu_rand.h>
  66. #include <starpu_cuda.h>
  67. #include <starpu_cublas.h>
  68. #include <starpu_cusparse.h>
  69. #include <starpu_bound.h>
  70. #include <starpu_hash.h>
  71. #include <starpu_profiling.h>
  72. #include <starpu_top.h>
  73. #include <starpu_fxt.h>
  74. #include <starpu_driver.h>
  75. #include <starpu_tree.h>
  76. #include <starpu_openmp.h>
  77. #include <starpu_simgrid_wrap.h>
  78. #include <starpu_bitmap.h>
  79. #include <starpu_clusters.h>
  80. #include <starpu_perf_monitoring.h>
  81. #include <starpu_perf_steering.h>
  82. #ifdef __cplusplus
  83. extern "C"
  84. {
  85. #endif
  86. /**
  87. @defgroup API_Initialization_and_Termination Initialization and Termination
  88. @{
  89. */
  90. /**
  91. Structure passed to the starpu_init() function to configure StarPU.
  92. It has to be initialized with starpu_conf_init(). When the default
  93. value is used, StarPU automatically selects the number of
  94. processing units and takes the default scheduling policy. The
  95. environment variables overwrite the equivalent parameters.
  96. */
  97. struct starpu_conf
  98. {
  99. /**
  100. @private
  101. Will be initialized by starpu_conf_init(). Should not be
  102. set by hand.
  103. */
  104. int magic;
  105. /**
  106. Name of the scheduling policy. This can also be specified
  107. with the environment variable \ref STARPU_SCHED. (default =
  108. <c>NULL</c>).
  109. */
  110. const char *sched_policy_name;
  111. /**
  112. Definition of the scheduling policy. This field is ignored
  113. if starpu_conf::sched_policy_name is set.
  114. (default = <c>NULL</c>)
  115. */
  116. struct starpu_sched_policy *sched_policy;
  117. void (*sched_policy_init)(unsigned);
  118. /**
  119. Number of CPU cores that StarPU can use. This can also be
  120. specified with the environment variable \ref STARPU_NCPU.
  121. (default = -1)
  122. */
  123. int ncpus;
  124. int reserve_ncpus;
  125. /**
  126. Number of CUDA devices that StarPU can use. This can also
  127. be specified with the environment variable \ref
  128. STARPU_NCUDA.
  129. (default = -1)
  130. */
  131. int ncuda;
  132. /**
  133. Number of OpenCL devices that StarPU can use. This can also
  134. be specified with the environment variable \ref
  135. STARPU_NOPENCL.
  136. (default = -1)
  137. */
  138. int nopencl;
  139. /**
  140. Number of MIC devices that StarPU can use. This can also be
  141. specified with the environment variable \ref STARPU_NMIC.
  142. (default = -1)
  143. */
  144. int nmic;
  145. /**
  146. Number of SCC devices that StarPU can use. This can also be
  147. specified with the environment variable \ref STARPU_NSCC.
  148. (default = -1)
  149. */
  150. int nscc;
  151. /**
  152. Number of MPI Master Slave devices that StarPU can use.
  153. This can also be specified with the environment variable
  154. \ref STARPU_NMPI_MS.
  155. (default = -1)
  156. */
  157. int nmpi_ms;
  158. /**
  159. If this flag is set, the starpu_conf::workers_bindid array
  160. indicates where the different workers are bound, otherwise
  161. StarPU automatically selects where to bind the different
  162. workers. This can also be specified with the environment
  163. variable \ref STARPU_WORKERS_CPUID.
  164. (default = 0)
  165. */
  166. unsigned use_explicit_workers_bindid;
  167. /**
  168. If the starpu_conf::use_explicit_workers_bindid flag is
  169. set, this array indicates where to bind the different
  170. workers. The i-th entry of the starpu_conf::workers_bindid
  171. indicates the logical identifier of the processor which
  172. should execute the i-th worker. Note that the logical
  173. ordering of the CPUs is either determined by the OS, or
  174. provided by the hwloc library in case it is available.
  175. */
  176. unsigned workers_bindid[STARPU_NMAXWORKERS];
  177. /**
  178. If this flag is set, the CUDA workers will be attached to
  179. the CUDA devices specified in the
  180. starpu_conf::workers_cuda_gpuid array. Otherwise, StarPU
  181. affects the CUDA devices in a round-robin fashion. This can
  182. also be specified with the environment variable \ref
  183. STARPU_WORKERS_CUDAID.
  184. (default = 0)
  185. */
  186. unsigned use_explicit_workers_cuda_gpuid;
  187. /**
  188. If the starpu_conf::use_explicit_workers_cuda_gpuid flag is
  189. set, this array contains the logical identifiers of the
  190. CUDA devices (as used by \c cudaGetDevice()).
  191. */
  192. unsigned workers_cuda_gpuid[STARPU_NMAXWORKERS];
  193. /**
  194. If this flag is set, the OpenCL workers will be attached to
  195. the OpenCL devices specified in the
  196. starpu_conf::workers_opencl_gpuid array. Otherwise, StarPU
  197. affects the OpenCL devices in a round-robin fashion. This
  198. can also be specified with the environment variable \ref
  199. STARPU_WORKERS_OPENCLID.
  200. (default = 0)
  201. */
  202. unsigned use_explicit_workers_opencl_gpuid;
  203. /**
  204. If the starpu_conf::use_explicit_workers_opencl_gpuid flag
  205. is set, this array contains the logical identifiers of the
  206. OpenCL devices to be used.
  207. */
  208. unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
  209. /**
  210. If this flag is set, the MIC workers will be attached to
  211. the MIC devices specified in the array
  212. starpu_conf::workers_mic_deviceid. Otherwise, StarPU
  213. affects the MIC devices in a round-robin fashion. This can
  214. also be specified with the environment variable \ref
  215. STARPU_WORKERS_MICID.
  216. (default = 0)
  217. */
  218. unsigned use_explicit_workers_mic_deviceid;
  219. /**
  220. If the flag starpu_conf::use_explicit_workers_mic_deviceid
  221. is set, the array contains the logical identifiers of the
  222. MIC devices to be used.
  223. */
  224. unsigned workers_mic_deviceid[STARPU_NMAXWORKERS];
  225. /**
  226. If this flag is set, the SCC workers will be attached to
  227. the SCC devices specified in the array
  228. starpu_conf::workers_scc_deviceid.
  229. (default = 0)
  230. */
  231. unsigned use_explicit_workers_scc_deviceid;
  232. /**
  233. If the flag starpu_conf::use_explicit_workers_scc_deviceid
  234. is set, the array contains the logical identifiers of the
  235. SCC devices to be used. Otherwise, StarPU affects the SCC
  236. devices in a round-robin fashion. This can also be
  237. specified with the environment variable \ref
  238. STARPU_WORKERS_SCCID.
  239. */
  240. unsigned workers_scc_deviceid[STARPU_NMAXWORKERS];
  241. /**
  242. If this flag is set, the MPI Master Slave workers will be
  243. attached to the MPI Master Slave devices specified in the
  244. array starpu_conf::workers_mpi_ms_deviceid. Otherwise,
  245. StarPU affects the MPI Master Slave devices in a
  246. round-robin fashion.
  247. (default = 0)
  248. */
  249. unsigned use_explicit_workers_mpi_ms_deviceid;
  250. /**
  251. If the flag
  252. starpu_conf::use_explicit_workers_mpi_ms_deviceid is set,
  253. the array contains the logical identifiers of the MPI
  254. Master Slave devices to be used.
  255. */
  256. unsigned workers_mpi_ms_deviceid[STARPU_NMAXWORKERS];
  257. /**
  258. If this flag is set, StarPU will recalibrate the bus. If
  259. this value is equal to -1, the default value is used. This
  260. can also be specified with the environment variable \ref
  261. STARPU_BUS_CALIBRATE.
  262. (default = 0)
  263. */
  264. int bus_calibrate;
  265. /**
  266. If this flag is set, StarPU will calibrate the performance
  267. models when executing tasks. If this value is equal to -1,
  268. the default value is used. If the value is equal to 1, it
  269. will force continuing calibration. If the value is equal to
  270. 2, the existing performance models will be overwritten.
  271. This can also be specified with the environment variable
  272. \ref STARPU_CALIBRATE.
  273. (default = 0)
  274. */
  275. int calibrate;
  276. /**
  277. By default, StarPU executes parallel tasks concurrently.
  278. Some parallel libraries (e.g. most OpenMP implementations)
  279. however do not support concurrent calls to parallel code.
  280. In such case, setting this flag makes StarPU only start one
  281. parallel task at a time (but other CPU and GPU tasks are
  282. not affected and can be run concurrently). The parallel
  283. task scheduler will however still try varying combined
  284. worker sizes to look for the most efficient ones.
  285. This can also be specified with the environment variable
  286. \ref STARPU_SINGLE_COMBINED_WORKER.
  287. (default = 0)
  288. */
  289. int single_combined_worker;
  290. /**
  291. Path to the kernel to execute on the MIC device, compiled
  292. for MIC architecture. When set to <c>NULL</c>, StarPU
  293. automatically looks next to the host program location.
  294. (default = <c>NULL</c>)
  295. */
  296. char *mic_sink_program_path;
  297. /**
  298. This flag should be set to 1 to disable asynchronous copies
  299. between CPUs and all accelerators.
  300. The AMD implementation of OpenCL is known to fail when
  301. copying data asynchronously. When using this
  302. implementation, it is therefore necessary to disable
  303. asynchronous data transfers.
  304. This can also be specified with the environment variable
  305. \ref STARPU_DISABLE_ASYNCHRONOUS_COPY.
  306. This can also be specified at compilation time by giving to
  307. the configure script the option \ref
  308. disable-asynchronous-copy "--disable-asynchronous-copy".
  309. (default = 0)
  310. */
  311. int disable_asynchronous_copy;
  312. /**
  313. This flag should be set to 1 to disable asynchronous copies
  314. between CPUs and CUDA accelerators.
  315. This can also be specified with the environment variable
  316. \ref STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY.
  317. This can also be specified at compilation time by giving to
  318. the configure script the option \ref
  319. disable-asynchronous-cuda-copy
  320. "--disable-asynchronous-cuda-copy".
  321. (default = 0)
  322. */
  323. int disable_asynchronous_cuda_copy;
  324. /**
  325. This flag should be set to 1 to disable asynchronous copies
  326. between CPUs and OpenCL accelerators.
  327. The AMD implementation of OpenCL is known to fail when
  328. copying data asynchronously. When using this
  329. implementation, it is therefore necessary to disable
  330. asynchronous data transfers.
  331. This can also be specified with the environment variable
  332. \ref STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY.
  333. This can also be specified at compilation time by giving to
  334. the configure script the option \ref
  335. disable-asynchronous-opencl-copy
  336. "--disable-asynchronous-opencl-copy".
  337. (default = 0)
  338. */
  339. int disable_asynchronous_opencl_copy;
  340. /**
  341. This flag should be set to 1 to disable asynchronous copies
  342. between CPUs and MIC accelerators.
  343. This can also be specified with the environment variable
  344. \ref STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY.
  345. This can also be specified at compilation time by giving to
  346. the configure script the option \ref
  347. disable-asynchronous-mic-copy
  348. "--disable-asynchronous-mic-copy".
  349. (default = 0).
  350. */
  351. int disable_asynchronous_mic_copy;
  352. /**
  353. This flag should be set to 1 to disable asynchronous copies
  354. between CPUs and MPI Master Slave devices.
  355. This can also be specified with the environment variable
  356. \ref STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY.
  357. This can also be specified at compilation time by giving to
  358. the configure script the option \ref
  359. disable-asynchronous-mpi-master-slave-copy
  360. "--disable-asynchronous-mpi-master-slave-copy".
  361. (default = 0).
  362. */
  363. int disable_asynchronous_mpi_ms_copy;
  364. /**
  365. Enable CUDA/OpenGL interoperation on these CUDA devices.
  366. This can be set to an array of CUDA device identifiers for
  367. which \c cudaGLSetGLDevice() should be called instead of \c
  368. cudaSetDevice(). Its size is specified by the
  369. starpu_conf::n_cuda_opengl_interoperability field below
  370. (default = <c>NULL</c>)
  371. */
  372. unsigned *cuda_opengl_interoperability;
  373. /**
  374. Size of the array starpu_conf::cuda_opengl_interoperability
  375. */
  376. unsigned n_cuda_opengl_interoperability;
  377. /**
  378. Array of drivers that should not be launched by StarPU. The
  379. application will run in one of its own threads.
  380. (default = <c>NULL</c>)
  381. */
  382. struct starpu_driver *not_launched_drivers;
  383. /**
  384. The number of StarPU drivers that should not be launched by
  385. StarPU, i.e number of elements of the array
  386. starpu_conf::not_launched_drivers.
  387. (default = 0)
  388. */
  389. unsigned n_not_launched_drivers;
  390. /**
  391. Specify the buffer size used for FxT tracing. Starting from
  392. FxT version 0.2.12, the buffer will automatically be
  393. flushed when it fills in, but it may still be interesting
  394. to specify a bigger value to avoid any flushing (which
  395. would disturb the trace).
  396. */
  397. unsigned trace_buffer_size;
  398. int global_sched_ctx_min_priority;
  399. int global_sched_ctx_max_priority;
  400. #ifdef STARPU_WORKER_CALLBACKS
  401. void (*callback_worker_going_to_sleep)(unsigned workerid);
  402. void (*callback_worker_waking_up)(unsigned workerid);
  403. #endif
  404. /**
  405. Specify if StarPU should catch SIGINT, SIGSEGV and SIGTRAP
  406. signals to make sure final actions (e.g dumping FxT trace
  407. files) are done even though the application has crashed. By
  408. default (value = \c 1), signals are catched. It should be
  409. disabled on systems which already catch these signals for
  410. their own needs (e.g JVM)
  411. This can also be specified with the environment variable
  412. \ref STARPU_CATCH_SIGNALS
  413. */
  414. int catch_signals;
  415. };
  416. /**
  417. Initialize the \p conf structure with the default values. In case
  418. some configuration parameters are already specified through
  419. environment variables, starpu_conf_init() initializes the fields of
  420. \p conf according to the environment variables.
  421. For instance if \ref STARPU_CALIBRATE is set, its value is put in
  422. the field starpu_conf::calibrate of \p conf.
  423. Upon successful completion, this function returns 0. Otherwise,
  424. <c>-EINVAL</c> indicates that the argument was <c>NULL</c>.
  425. */
  426. int starpu_conf_init(struct starpu_conf *conf);
  427. /**
  428. StarPU initialization method, must be called prior to any other
  429. StarPU call. It is possible to specify StarPU’s configuration (e.g.
  430. scheduling policy, number of cores, ...) by passing a
  431. non-<c>NULL</c> \p conf. Default configuration is used if \p conf
  432. is <c>NULL</c>. Upon successful completion, this function returns
  433. 0. Otherwise, <c>-ENODEV</c> indicates that no worker was available
  434. (and thus StarPU was not initialized).
  435. */
  436. int starpu_init(struct starpu_conf *conf) STARPU_WARN_UNUSED_RESULT;
  437. /**
  438. Similar to starpu_init(), but also take the \p argc and \p argv as
  439. defined by the application. This is needed for SCC execution to
  440. initialize the communication library.
  441. Do not call starpu_init() and starpu_initialize() in the same
  442. program.
  443. */
  444. int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv);
  445. /**
  446. Return 1 if StarPU is already initialized.
  447. */
  448. int starpu_is_initialized(void);
  449. /**
  450. Wait for starpu_init() call to finish.
  451. */
  452. void starpu_wait_initialized(void);
  453. /**
  454. StarPU termination method, must be called at the end of the
  455. application: statistics and other post-mortem debugging information
  456. are not guaranteed to be available until this method has been
  457. called.
  458. */
  459. void starpu_shutdown(void);
  460. /**
  461. Suspend the processing of new tasks by workers. It can be used in a
  462. program where StarPU is used during only a part of the execution.
  463. Without this call, the workers continue to poll for new tasks in a
  464. tight loop, wasting CPU time. The symmetric call to starpu_resume()
  465. should be used to unfreeze the workers.
  466. */
  467. void starpu_pause(void);
  468. /**
  469. Symmetrical call to starpu_pause(), used to resume the workers
  470. polling for new tasks.
  471. */
  472. void starpu_resume(void);
  473. /**
  474. Value to be passed to starpu_get_next_bindid() and
  475. starpu_bind_thread_on() when binding a thread which will
  476. significantly eat CPU time, and should thus have its own dedicated
  477. CPU.
  478. */
  479. #define STARPU_THREAD_ACTIVE (1 << 0)
  480. /**
  481. Return a PU binding ID which can be used to bind threads with
  482. starpu_bind_thread_on(). \p flags can be set to
  483. STARPU_THREAD_ACTIVE or 0. When \p npreferred is set to non-zero,
  484. \p preferred is an array of size \p npreferred in which a
  485. preference of PU binding IDs can be set. By default StarPU will
  486. return the first PU available for binding.
  487. */
  488. unsigned starpu_get_next_bindid(unsigned flags, unsigned *preferred, unsigned npreferred);
  489. /**
  490. Bind the calling thread on the given \p cpuid (which should have
  491. been obtained with starpu_get_next_bindid()).
  492. Return -1 if a thread was already bound to this PU (but binding
  493. will still have been done, and a warning will have been printed),
  494. so the caller can tell the user how to avoid the issue.
  495. \p name should be set to a unique string so that different calls
  496. with the same name for the same cpuid does not produce a warning.
  497. */
  498. int starpu_bind_thread_on(int cpuid, unsigned flags, const char *name);
  499. /**
  500. Print a description of the topology on \p f.
  501. */
  502. void starpu_topology_print(FILE *f);
  503. /**
  504. Return 1 if asynchronous data transfers between CPU and
  505. accelerators are disabled.
  506. */
  507. int starpu_asynchronous_copy_disabled(void);
  508. /**
  509. Return 1 if asynchronous data transfers between CPU and CUDA
  510. accelerators are disabled.
  511. */
  512. int starpu_asynchronous_cuda_copy_disabled(void);
  513. /**
  514. Return 1 if asynchronous data transfers between CPU and OpenCL
  515. accelerators are disabled.
  516. */
  517. int starpu_asynchronous_opencl_copy_disabled(void);
  518. /**
  519. Return 1 if asynchronous data transfers between CPU and MIC devices
  520. are disabled.
  521. */
  522. int starpu_asynchronous_mic_copy_disabled(void);
  523. /**
  524. Return 1 if asynchronous data transfers between CPU and MPI Slave
  525. devices are disabled.
  526. */
  527. int starpu_asynchronous_mpi_ms_copy_disabled(void);
  528. void starpu_display_stats(void);
  529. void starpu_get_version(int *major, int *minor, int *release);
  530. /** @} */
  531. #ifdef __cplusplus
  532. }
  533. #endif
  534. #include "starpu_deprecated_api.h"
  535. #endif /* __STARPU_H__ */