starpu_openmp.h 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2014,2015,2017,2019 CNRS
  4. * Copyright (C) 2014-2016,2018 Inria
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #ifndef __STARPU_OPENMP_H__
  18. #define __STARPU_OPENMP_H__
  19. #include <starpu_config.h>
  20. /**
  21. @defgroup API_OpenMP_Runtime_Support OpenMP Runtime Support
  22. @brief This section describes the interface provided for implementing OpenMP runtimes on top of StarPU.
  23. @{
  24. */
  25. #if defined STARPU_OPENMP
  26. /**
  27. Opaque Simple Lock object (\ref SimpleLock) for inter-task
  28. synchronization operations.
  29. \sa starpu_omp_init_lock()
  30. \sa starpu_omp_destroy_lock()
  31. \sa starpu_omp_set_lock()
  32. \sa starpu_omp_unset_lock()
  33. \sa starpu_omp_test_lock()
  34. */
  35. typedef struct { void *internal; /**< opaque pointer for internal use */ } starpu_omp_lock_t;
  36. /**
  37. Opaque Nestable Lock object (\ref NestableLock) for inter-task
  38. synchronization operations.
  39. \sa starpu_omp_init_nest_lock()
  40. \sa starpu_omp_destroy_nest_lock()
  41. \sa starpu_omp_set_nest_lock()
  42. \sa starpu_omp_unset_nest_lock()
  43. \sa starpu_omp_test_nest_lock()
  44. */
  45. typedef struct { void *internal; /**< opaque pointer for internal use */ } starpu_omp_nest_lock_t;
  46. /**
  47. Set of constants for selecting the for loop iteration scheduling
  48. algorithm (\ref OMPFor) as defined by the OpenMP specification.
  49. \sa starpu_omp_for()
  50. \sa starpu_omp_for_inline_first()
  51. \sa starpu_omp_for_inline_next()
  52. \sa starpu_omp_for_alt()
  53. \sa starpu_omp_for_inline_first_alt()
  54. \sa starpu_omp_for_inline_next_alt()
  55. */
  56. enum starpu_omp_sched_value
  57. {
  58. starpu_omp_sched_undefined = 0, /**< Undefined iteration scheduling algorithm. */
  59. starpu_omp_sched_static = 1, /**< \b Static iteration scheduling algorithm.*/
  60. starpu_omp_sched_dynamic = 2, /**< \b Dynamic iteration scheduling algorithm.*/
  61. starpu_omp_sched_guided = 3, /**< \b Guided iteration scheduling algorithm.*/
  62. starpu_omp_sched_auto = 4, /**< \b Automatically choosen iteration scheduling algorithm.*/
  63. starpu_omp_sched_runtime = 5 /**< Choice of iteration scheduling algorithm deferred at \b runtime.*/
  64. };
  65. /**
  66. Set of constants for selecting the processor binding method, as
  67. defined in the OpenMP specification.
  68. \sa starpu_omp_get_proc_bind()
  69. */
  70. enum starpu_omp_proc_bind_value
  71. {
  72. starpu_omp_proc_bind_undefined = -1, /**< Undefined processor binding method.*/
  73. starpu_omp_proc_bind_false = 0, /**< Team threads may be moved between places at any time.*/
  74. starpu_omp_proc_bind_true = 1, /**< Team threads may not be moved between places.*/
  75. starpu_omp_proc_bind_master = 2, /**< Assign every thread in the team to the same place as the \b master thread.*/
  76. starpu_omp_proc_bind_close = 3, /**< Assign every thread in the team to a place \b close to the parent thread.*/
  77. starpu_omp_proc_bind_spread = 4 /**< Assign team threads as a sparse distribution over the selected places.*/
  78. };
  79. /**
  80. Set of attributes used for creating a new parallel region.
  81. \sa starpu_omp_parallel_region()
  82. */
  83. struct starpu_omp_parallel_region_attr
  84. {
  85. /**
  86. ::starpu_codelet (\ref API_Codelet_And_Tasks) to use for the
  87. parallel region implicit tasks. The codelet must provide a
  88. CPU implementation function.
  89. */
  90. struct starpu_codelet cl;
  91. /**
  92. Array of zero or more ::starpu_data_handle_t data handle to
  93. be passed to the parallel region implicit tasks.
  94. */
  95. starpu_data_handle_t *handles;
  96. /**
  97. Optional pointer to an inline argument to be passed to the
  98. region implicit tasks.
  99. */
  100. void *cl_arg;
  101. /**
  102. Size of the optional inline argument to be passed to the
  103. region implicit tasks, or 0 if unused.
  104. */
  105. size_t cl_arg_size;
  106. /**
  107. Boolean indicating whether the optional inline argument
  108. should be automatically freed (true), or not (false).
  109. */
  110. unsigned cl_arg_free;
  111. /**
  112. Boolean indicating whether the \b if clause of the
  113. corresponding <c>pragma omp parallel</c> is true or false.
  114. */
  115. int if_clause;
  116. /**
  117. Integer indicating the requested number of threads in the
  118. team of the newly created parallel region, or 0 to let the
  119. runtime choose the number of threads alone. This attribute
  120. may be ignored by the runtime system if the requested
  121. number of threads is higher than the number of threads that
  122. the runtime can create.
  123. */
  124. int num_threads;
  125. };
  126. /**
  127. Set of attributes used for creating a new task region.
  128. \sa starpu_omp_task_region()
  129. */
  130. struct starpu_omp_task_region_attr
  131. {
  132. /**
  133. ::starpu_codelet (\ref API_Codelet_And_Tasks) to use for
  134. the task region explicit task. The codelet must provide a
  135. CPU implementation function or an accelerator
  136. implementation for offloaded target regions.
  137. */
  138. struct starpu_codelet cl;
  139. /**
  140. Array of zero or more ::starpu_data_handle_t data handle to
  141. be passed to the task region explicit tasks.
  142. */
  143. starpu_data_handle_t *handles;
  144. /**
  145. Optional pointer to an inline argument to be passed to the
  146. region implicit tasks.
  147. */
  148. void *cl_arg;
  149. /**
  150. Size of the optional inline argument to be passed to the
  151. region implicit tasks, or 0 if unused.
  152. */
  153. size_t cl_arg_size;
  154. /**
  155. Boolean indicating whether the optional inline argument
  156. should be automatically freed (true), or not (false).
  157. */
  158. unsigned cl_arg_free;
  159. int priority;
  160. /**
  161. Boolean indicating whether the \b if clause of the
  162. corresponding <c>pragma omp task</c> is true or false.
  163. */
  164. int if_clause;
  165. /**
  166. Boolean indicating whether the \b final clause of the
  167. corresponding <c>pragma omp task</c> is true or false.
  168. */
  169. int final_clause;
  170. /**
  171. Boolean indicating whether the \b untied clause of the
  172. corresponding <c>pragma omp task</c> is true or false.
  173. */
  174. int untied_clause;
  175. /**
  176. Boolean indicating whether the \b mergeable clause of the
  177. corresponding <c>pragma omp task</c> is true or false.
  178. */
  179. int mergeable_clause;
  180. /**
  181. taskloop attribute
  182. */
  183. int is_loop;
  184. int nogroup_clause;
  185. int collapse;
  186. int num_tasks;
  187. unsigned long long nb_iterations;
  188. unsigned long long grainsize;
  189. unsigned long long begin_i;
  190. unsigned long long end_i;
  191. unsigned long long chunk;
  192. };
  193. #ifdef __cplusplus
  194. extern "C"
  195. {
  196. #define __STARPU_OMP_NOTHROW throw ()
  197. #else
  198. #define __STARPU_OMP_NOTHROW __attribute__((__nothrow__))
  199. #endif
  200. /**
  201. @name Initialisation
  202. @{
  203. */
  204. /**
  205. Initialize StarPU and its OpenMP Runtime support.
  206. */
  207. extern int starpu_omp_init(void) __STARPU_OMP_NOTHROW;
  208. /**
  209. Shutdown StarPU and its OpenMP Runtime support.
  210. */
  211. extern void starpu_omp_shutdown(void) __STARPU_OMP_NOTHROW;
  212. /** @} */
  213. /**
  214. @name Parallel
  215. \anchor ORS_Parallel
  216. @{
  217. */
  218. /**
  219. Generate and launch an OpenMP parallel region and return after its
  220. completion. \p attr specifies the attributes for the generated parallel region.
  221. If this function is called from inside another, generating, parallel region, the
  222. generated parallel region is nested within the generating parallel region.
  223. This function can be used to implement <c>\#pragma omp parallel</c>.
  224. */
  225. extern void starpu_omp_parallel_region(const struct starpu_omp_parallel_region_attr *attr) __STARPU_OMP_NOTHROW;
  226. /**
  227. Execute a function only on the master thread of the OpenMP
  228. parallel region it is called from. When called from a thread that is not the
  229. master of the parallel region it is called from, this function does nothing. \p
  230. f is the function to be called. \p arg is an argument passed to function \p f.
  231. This function can be used to implement <c>\#pragma omp master</c>.
  232. */
  233. extern void starpu_omp_master(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW;
  234. /**
  235. Determine whether the calling thread is the master of the OpenMP parallel region
  236. it is called from or not.
  237. This function can be used to implement <c>\#pragma omp master</c> without code
  238. outlining.
  239. \return <c>!0</c> if called by the region's master thread.
  240. \return <c>0</c> if not called by the region's master thread.
  241. */
  242. extern int starpu_omp_master_inline(void) __STARPU_OMP_NOTHROW;
  243. /** @} */
  244. /**
  245. @name Synchronization
  246. \anchor ORS_Synchronization
  247. @{
  248. */
  249. /**
  250. Wait until each participating thread of the innermost OpenMP parallel region
  251. has reached the barrier and each explicit OpenMP task bound to this region has
  252. completed its execution.
  253. This function can be used to implement <c>\#pragma omp barrier</c>.
  254. */
  255. extern void starpu_omp_barrier(void) __STARPU_OMP_NOTHROW;
  256. /**
  257. Wait until no other thread is executing within the context of the selected
  258. critical section, then proceeds to the exclusive execution of a function within
  259. the critical section. \p f is the function to be executed in the critical
  260. section. \p arg is an argument passed to function \p f. \p name is the name of
  261. the selected critical section. If <c>name == NULL</c>, the selected critical
  262. section is the unique anonymous critical section.
  263. This function can be used to implement <c>\#pragma omp
  264. critical</c>.
  265. */
  266. extern void starpu_omp_critical(void (*f)(void *arg), void *arg, const char *name) __STARPU_OMP_NOTHROW;
  267. /**
  268. Wait until execution can proceed exclusively within the context of the
  269. selected critical section. \p name is the name of the selected critical
  270. section. If <c>name == NULL</c>, the selected critical section is the unique
  271. anonymous critical section.
  272. This function together with #starpu_omp_critical_inline_end can be used to
  273. implement <c>\#pragma omp critical</c> without code outlining.
  274. */
  275. extern void starpu_omp_critical_inline_begin(const char *name) __STARPU_OMP_NOTHROW;
  276. /**
  277. End the exclusive execution within the context of the selected critical
  278. section. \p name is the name of the selected critical section. If
  279. <c>name==NULL</c>, the selected critical section is the unique anonymous
  280. critical section.
  281. This function together with #starpu_omp_critical_inline_begin can be used to
  282. implement <c>\#pragma omp critical</c> without code outlining.
  283. */
  284. extern void starpu_omp_critical_inline_end(const char *name) __STARPU_OMP_NOTHROW;
  285. /** @} */
  286. /**
  287. @name Worksharing
  288. \anchor ORS_Worksharing
  289. @{
  290. */
  291. /**
  292. Ensure that a single participating thread of the innermost OpenMP parallel
  293. region executes a function. \p f is the function to be executed by a single
  294. thread. \p arg is an argument passed to function \p f. \p nowait is a flag
  295. indicating whether an implicit barrier is requested after the single section
  296. (<c>nowait==0</c>) or not (<c>nowait==!0</c>).
  297. This function can be used to implement <c>\#pragma omp single</c>.
  298. */
  299. extern void starpu_omp_single(void (*f)(void *arg), void *arg, int nowait) __STARPU_OMP_NOTHROW;
  300. /**
  301. Decide whether the current thread is elected to run the following single
  302. section among the participating threads of the innermost OpenMP parallel
  303. region.
  304. This function can be used to implement <c>\#pragma omp single</c> without code
  305. outlining.
  306. \return <c>!0</c> if the calling thread has won the election.
  307. \return <c>0</c> if the calling thread has lost the election.
  308. */
  309. extern int starpu_omp_single_inline(void) __STARPU_OMP_NOTHROW;
  310. /**
  311. Execute \p f on a single task of the current parallel region
  312. task, and then broadcast the contents of the memory block pointed by the
  313. copyprivate pointer \p data and of size \p data_size to the corresponding \p
  314. data pointed memory blocks of all the other participating region tasks. This
  315. function can be used to implement <c>\#pragma omp single</c> with a copyprivate
  316. clause.
  317. \sa starpu_omp_single_copyprivate_inline
  318. \sa starpu_omp_single_copyprivate_inline_begin
  319. \sa starpu_omp_single_copyprivate_inline_end
  320. */
  321. extern void starpu_omp_single_copyprivate(void (*f)(void *arg, void *data, unsigned long long data_size), void *arg, void *data, unsigned long long data_size) __STARPU_OMP_NOTHROW;
  322. /**
  323. Elect one task among the tasks of the current parallel region
  324. task to execute the following single section, and then broadcast the
  325. copyprivate pointer \p data to all the other participating region tasks. This
  326. function can be used to implement <c>\#pragma omp single</c> with a copyprivate
  327. clause without code outlining.
  328. \sa starpu_omp_single_copyprivate_inline
  329. \sa starpu_omp_single_copyprivate_inline_end
  330. */
  331. extern void *starpu_omp_single_copyprivate_inline_begin(void *data) __STARPU_OMP_NOTHROW;
  332. /**
  333. Complete the execution of a single section and return the
  334. broadcasted copyprivate pointer for tasks that lost the election and <c>NULL</c> for
  335. the task that won the election. This function can be used to implement
  336. <c>\#pragma omp single</c> with a copyprivate clause without code outlining.
  337. \return the copyprivate pointer for tasks that lost the election and therefore did not execute the code of the single section.
  338. \return <c>NULL</c> for the task that won the election and executed the code of the single section.
  339. \sa starpu_omp_single_copyprivate_inline
  340. \sa starpu_omp_single_copyprivate_inline_begin
  341. */
  342. extern void starpu_omp_single_copyprivate_inline_end(void) __STARPU_OMP_NOTHROW;
  343. /**
  344. Execute a parallel loop together with the other threads participating to the
  345. innermost parallel region. \p f is the function to be executed iteratively. \p
  346. arg is an argument passed to function \p f. \p nb_iterations is the number of
  347. iterations to be performed by the parallel loop. \p chunk is the number of
  348. consecutive iterations that should be affected to the same thread when
  349. scheduling the loop workshares, it follows the semantics of the \c modifier
  350. argument in OpenMP <c>\#pragma omp for</c> specification. \p schedule is the
  351. scheduling mode according to the OpenMP specification. \p ordered is a flag
  352. indicating whether the loop region may contain an ordered section
  353. (<c>ordered==!0</c>) or not (<c>ordered==0</c>). \p nowait is a flag
  354. indicating whether an implicit barrier is requested after the for section
  355. (<c>nowait==0</c>) or not (<c>nowait==!0</c>).
  356. The function \p f will be called with arguments \p _first_i, the first iteration
  357. to perform, \p _nb_i, the number of consecutive iterations to perform before
  358. returning, \p arg, the free \p arg argument.
  359. This function can be used to implement <c>\#pragma omp for</c>.
  360. */
  361. extern void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) __STARPU_OMP_NOTHROW;
  362. /**
  363. Decide whether the current thread should start to execute a parallel loop
  364. section. See #starpu_omp_for for the argument description.
  365. This function together with #starpu_omp_for_inline_next can be used to
  366. implement <c>\#pragma omp for</c> without code outlining.
  367. \return <c>!0</c> if the calling thread participates to the loop region and
  368. should execute a first chunk of iterations. In that case, \p *_first_i will be
  369. set to the first iteration of the chunk to perform and \p *_nb_i will be set to
  370. the number of iterations of the chunk to perform.
  371. \return <c>0</c> if the calling thread does not participate to the loop region
  372. because all the available iterations have been affected to the other threads of
  373. the parallel region.
  374. \sa starpu_omp_for
  375. */
  376. extern int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) __STARPU_OMP_NOTHROW;
  377. /**
  378. Decide whether the current thread should continue to execute a parallel loop
  379. section. See #starpu_omp_for for the argument description.
  380. This function together with #starpu_omp_for_inline_first can be used to
  381. implement <c>\#pragma omp for</c> without code outlining.
  382. \return <c>!0</c> if the calling thread should execute a next chunk of
  383. iterations. In that case, \p *_first_i will be set to the first iteration of the
  384. chunk to perform and \p *_nb_i will be set to the number of iterations of the
  385. chunk to perform.
  386. \return <c>0</c> if the calling thread does not participate anymore to the loop
  387. region because all the available iterations have been affected to the other
  388. threads of the parallel region.
  389. \sa starpu_omp_for
  390. */
  391. extern int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) __STARPU_OMP_NOTHROW;
  392. /**
  393. Alternative implementation of a parallel loop. Differ from
  394. #starpu_omp_for in the expected arguments of the loop function \c f.
  395. The function \p f will be called with arguments \p _begin_i, the first iteration
  396. to perform, \p _end_i, the first iteration not to perform before
  397. returning, \p arg, the free \p arg argument.
  398. This function can be used to implement <c>\#pragma omp for</c>.
  399. \sa starpu_omp_for
  400. */
  401. extern void starpu_omp_for_alt(void (*f)(unsigned long long _begin_i, unsigned long long _end_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) __STARPU_OMP_NOTHROW;
  402. /**
  403. Inline version of the alternative implementation of a parallel loop.
  404. This function together with #starpu_omp_for_inline_next_alt can be used to
  405. implement <c>\#pragma omp for</c> without code outlining.
  406. \sa starpu_omp_for
  407. \sa starpu_omp_for_alt
  408. \sa starpu_omp_for_inline_first
  409. */
  410. extern int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) __STARPU_OMP_NOTHROW;
  411. /**
  412. Inline version of the alternative implementation of a parallel loop.
  413. This function together with #starpu_omp_for_inline_first_alt can be used to
  414. implement <c>\#pragma omp for</c> without code outlining.
  415. \sa starpu_omp_for
  416. \sa starpu_omp_for_alt
  417. \sa starpu_omp_for_inline_next
  418. */
  419. extern int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) __STARPU_OMP_NOTHROW;
  420. /**
  421. Ensure that a function is sequentially executed once for each iteration in
  422. order within a parallel loop, by the thread that own the iteration. \p f is the
  423. function to be executed by the thread that own the current iteration. \p arg is
  424. an argument passed to function \p f.
  425. This function can be used to implement <c>\#pragma omp ordered</c>.
  426. */
  427. extern void starpu_omp_ordered(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW;
  428. /**
  429. Wait until all the iterations of a parallel loop below the iteration owned by
  430. the current thread have been executed.
  431. This function together with #starpu_omp_ordered_inline_end can be used to
  432. implement <c>\#pragma omp ordered</c> without code code outlining.
  433. */
  434. extern void starpu_omp_ordered_inline_begin(void) __STARPU_OMP_NOTHROW;
  435. /**
  436. Notify that the ordered section for the current iteration has been completed.
  437. This function together with #starpu_omp_ordered_inline_begin can be used to
  438. implement <c>\#pragma omp ordered</c> without code code outlining.
  439. */
  440. extern void starpu_omp_ordered_inline_end(void) __STARPU_OMP_NOTHROW;
  441. /**
  442. Ensure that each function of a given array of functions is executed by one and
  443. only one thread. \p nb_sections is the number of functions in the array \p
  444. section_f. \p section_f is the array of functions to be executed as sections. \p
  445. section_arg is an array of arguments to be passed to the corresponding function.
  446. \p nowait is a flag indicating whether an implicit barrier is requested after
  447. the execution of all the sections (<c>nowait==0</c>) or not (<c>nowait==!0</c>).
  448. This function can be used to implement <c>\#pragma omp sections</c> and <c>\#pragma omp section</c>.
  449. */
  450. extern void starpu_omp_sections(unsigned long long nb_sections, void (**section_f)(void *arg), void **section_arg, int nowait) __STARPU_OMP_NOTHROW;
  451. /**
  452. Alternative implementation of sections. Differ from
  453. #starpu_omp_sections in that all the sections are combined within a single
  454. function in this version. \p section_f is the function implementing the combined
  455. sections.
  456. The function \p section_f will be called with arguments \p section_num, the
  457. section number to be executed, \p arg, the entry of \p section_arg corresponding
  458. to this section.
  459. This function can be used to implement <c>\#pragma omp sections</c> and <c>\#pragma omp section</c>.
  460. \sa starpu_omp_sections
  461. */
  462. extern void starpu_omp_sections_combined(unsigned long long nb_sections, void (*section_f)(unsigned long long section_num, void *arg), void *section_arg, int nowait) __STARPU_OMP_NOTHROW;
  463. /** @} */
  464. /**
  465. @name Task
  466. \anchor ORS_Task
  467. @{
  468. */
  469. /**
  470. Generate an explicit child task. The execution of the generated task is
  471. asynchronous with respect to the calling code unless specified otherwise.
  472. \p attr specifies the attributes for the generated task region.
  473. This function can be used to implement <c>\#pragma omp task</c>.
  474. */
  475. extern void starpu_omp_task_region(const struct starpu_omp_task_region_attr *attr) __STARPU_OMP_NOTHROW;
  476. /**
  477. Wait for the completion of the tasks generated by the current task. This
  478. function does not wait for the descendants of the tasks generated by the current
  479. task.
  480. This function can be used to implement <c>\#pragma omp taskwait</c>.
  481. */
  482. extern void starpu_omp_taskwait(void) __STARPU_OMP_NOTHROW;
  483. /**
  484. Launch a function and wait for the completion of every descendant task
  485. generated during the execution of the function.
  486. This function can be used to implement <c>\#pragma omp taskgroup</c>.
  487. \sa starpu_omp_taskgroup_inline_begin
  488. \sa starpu_omp_taskgroup_inline_end
  489. */
  490. extern void starpu_omp_taskgroup(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW;
  491. /**
  492. Launch a function and gets ready to wait for the completion of every descendant task
  493. generated during the dynamic scope of the taskgroup.
  494. This function can be used to implement <c>\#pragma omp taskgroup</c> without code outlining.
  495. \sa starpu_omp_taskgroup
  496. \sa starpu_omp_taskgroup_inline_end
  497. */
  498. extern void starpu_omp_taskgroup_inline_begin(void) __STARPU_OMP_NOTHROW;
  499. /**
  500. Wait for the completion of every descendant task
  501. generated during the dynamic scope of the taskgroup.
  502. This function can be used to implement <c>\#pragma omp taskgroup</c> without code outlining.
  503. \sa starpu_omp_taskgroup
  504. \sa starpu_omp_taskgroup_inline_begin
  505. */
  506. extern void starpu_omp_taskgroup_inline_end(void) __STARPU_OMP_NOTHROW;
  507. extern void starpu_omp_taskloop_inline_begin(struct starpu_omp_task_region_attr *attr) __STARPU_OMP_NOTHROW;
  508. extern void starpu_omp_taskloop_inline_end(const struct starpu_omp_task_region_attr *attr) __STARPU_OMP_NOTHROW;
  509. /** @} */
  510. /**
  511. @name API
  512. \anchor ORS_API
  513. @{
  514. */
  515. /**
  516. Set ICVS nthreads_var for the parallel regions to be created
  517. with the current region.
  518. Note: The StarPU OpenMP runtime support currently ignores
  519. this setting for nested parallel regions.
  520. \sa starpu_omp_get_num_threads
  521. \sa starpu_omp_get_thread_num
  522. \sa starpu_omp_get_max_threads
  523. \sa starpu_omp_get_num_procs
  524. */
  525. extern void starpu_omp_set_num_threads(int threads) __STARPU_OMP_NOTHROW;
  526. /**
  527. Return the number of threads of the current region.
  528. \return the number of threads of the current region.
  529. \sa starpu_omp_set_num_threads
  530. \sa starpu_omp_get_thread_num
  531. \sa starpu_omp_get_max_threads
  532. \sa starpu_omp_get_num_procs
  533. */
  534. extern int starpu_omp_get_num_threads() __STARPU_OMP_NOTHROW;
  535. /**
  536. Return the rank of the current thread among the threads
  537. of the current region.
  538. \return the rank of the current thread in the current region.
  539. \sa starpu_omp_set_num_threads
  540. \sa starpu_omp_get_num_threads
  541. \sa starpu_omp_get_max_threads
  542. \sa starpu_omp_get_num_procs
  543. */
  544. extern int starpu_omp_get_thread_num() __STARPU_OMP_NOTHROW;
  545. /**
  546. Return the maximum number of threads that can be used to
  547. create a region from the current region.
  548. \return the maximum number of threads that can be used to create a region from the current region.
  549. \sa starpu_omp_set_num_threads
  550. \sa starpu_omp_get_num_threads
  551. \sa starpu_omp_get_thread_num
  552. \sa starpu_omp_get_num_procs
  553. */
  554. extern int starpu_omp_get_max_threads() __STARPU_OMP_NOTHROW;
  555. /**
  556. Return the number of StarPU CPU workers.
  557. \return the number of StarPU CPU workers.
  558. \sa starpu_omp_set_num_threads
  559. \sa starpu_omp_get_num_threads
  560. \sa starpu_omp_get_thread_num
  561. \sa starpu_omp_get_max_threads
  562. */
  563. extern int starpu_omp_get_num_procs(void) __STARPU_OMP_NOTHROW;
  564. /**
  565. Return whether it is called from the scope of a parallel region or not.
  566. \return <c>!0</c> if called from a parallel region scope.
  567. \return <c>0</c> otherwise.
  568. */
  569. extern int starpu_omp_in_parallel(void) __STARPU_OMP_NOTHROW;
  570. /**
  571. Enable (1) or disable (0) dynamically adjusting the number of parallel threads.
  572. Note: The StarPU OpenMP runtime support currently ignores the argument of this function.
  573. \sa starpu_omp_get_dynamic
  574. */
  575. extern void starpu_omp_set_dynamic(int dynamic_threads) __STARPU_OMP_NOTHROW;
  576. /**
  577. Return the state of dynamic thread number adjustment.
  578. \return <c>!0</c> if dynamic thread number adjustment is enabled.
  579. \return <c>0</c> otherwise.
  580. \sa starpu_omp_set_dynamic
  581. */
  582. extern int starpu_omp_get_dynamic(void) __STARPU_OMP_NOTHROW;
  583. /**
  584. Enable (1) or disable (0) nested parallel regions.
  585. Note: The StarPU OpenMP runtime support currently ignores the argument of this function.
  586. \sa starpu_omp_get_nested
  587. \sa starpu_omp_get_max_active_levels
  588. \sa starpu_omp_set_max_active_levels
  589. \sa starpu_omp_get_level
  590. \sa starpu_omp_get_active_level
  591. */
  592. extern void starpu_omp_set_nested(int nested) __STARPU_OMP_NOTHROW;
  593. /**
  594. Return whether nested parallel sections are enabled or not.
  595. \return <c>!0</c> if nested parallel sections are enabled.
  596. \return <c>0</c> otherwise.
  597. \sa starpu_omp_set_nested
  598. \sa starpu_omp_get_max_active_levels
  599. \sa starpu_omp_set_max_active_levels
  600. \sa starpu_omp_get_level
  601. \sa starpu_omp_get_active_level
  602. */
  603. extern int starpu_omp_get_nested(void) __STARPU_OMP_NOTHROW;
  604. /**
  605. Return the state of the cancel ICVS var.
  606. */
  607. extern int starpu_omp_get_cancellation(void) __STARPU_OMP_NOTHROW;
  608. /**
  609. Set the default scheduling kind for upcoming loops within the
  610. current parallel section. \p kind is the scheduler kind, \p modifier
  611. complements the scheduler kind with informations such as the chunk size,
  612. in accordance with the OpenMP specification.
  613. \sa starpu_omp_get_schedule
  614. */
  615. extern void starpu_omp_set_schedule(enum starpu_omp_sched_value kind, int modifier) __STARPU_OMP_NOTHROW;
  616. /**
  617. Return the current selected default loop scheduler.
  618. \return the kind and the modifier of the current default loop scheduler.
  619. \sa starpu_omp_set_schedule
  620. */
  621. extern void starpu_omp_get_schedule(enum starpu_omp_sched_value *kind, int *modifier) __STARPU_OMP_NOTHROW;
  622. /**
  623. Return the number of StarPU CPU workers.
  624. \return the number of StarPU CPU workers.
  625. */
  626. extern int starpu_omp_get_thread_limit(void) __STARPU_OMP_NOTHROW;
  627. /**
  628. Set the maximum number of allowed active parallel section levels.
  629. Note: The StarPU OpenMP runtime support currently ignores the argument of this function and assume \p max_levels equals <c>1</c> instead.
  630. \sa starpu_omp_set_nested
  631. \sa starpu_omp_get_nested
  632. \sa starpu_omp_get_max_active_levels
  633. \sa starpu_omp_get_level
  634. \sa starpu_omp_get_active_level
  635. */
  636. extern void starpu_omp_set_max_active_levels(int max_levels) __STARPU_OMP_NOTHROW;
  637. /**
  638. Return the current maximum number of allowed active parallel section levels
  639. \return the current maximum number of allowed active parallel section levels.
  640. \sa starpu_omp_set_nested
  641. \sa starpu_omp_get_nested
  642. \sa starpu_omp_set_max_active_levels
  643. \sa starpu_omp_get_level
  644. \sa starpu_omp_get_active_level
  645. */
  646. extern int starpu_omp_get_max_active_levels(void) __STARPU_OMP_NOTHROW;
  647. /**
  648. Return the nesting level of the current parallel section.
  649. \return the nesting level of the current parallel section.
  650. \sa starpu_omp_set_nested
  651. \sa starpu_omp_get_nested
  652. \sa starpu_omp_get_max_active_levels
  653. \sa starpu_omp_set_max_active_levels
  654. \sa starpu_omp_get_active_level
  655. */
  656. extern int starpu_omp_get_level(void) __STARPU_OMP_NOTHROW;
  657. /**
  658. Return the number of the ancestor of the current parallel section.
  659. \return the number of the ancestor of the current parallel section.
  660. */
  661. extern int starpu_omp_get_ancestor_thread_num(int level) __STARPU_OMP_NOTHROW;
  662. /**
  663. Return the size of the team of the current parallel section.
  664. \return the size of the team of the current parallel section.
  665. */
  666. extern int starpu_omp_get_team_size(int level) __STARPU_OMP_NOTHROW;
  667. /**
  668. Return the nestinglevel of the current innermost active parallel section.
  669. \return the nestinglevel of the current innermost active parallel section.
  670. \sa starpu_omp_set_nested
  671. \sa starpu_omp_get_nested
  672. \sa starpu_omp_get_max_active_levels
  673. \sa starpu_omp_set_max_active_levels
  674. \sa starpu_omp_get_level
  675. */
  676. extern int starpu_omp_get_active_level(void) __STARPU_OMP_NOTHROW;
  677. /**
  678. Check whether the current task is final or not.
  679. \return <c>!0</c> if called from a final task.
  680. \return <c>0</c> otherwise.
  681. */
  682. extern int starpu_omp_in_final(void) __STARPU_OMP_NOTHROW;
  683. /**
  684. Return the proc_bind setting of the current parallel region.
  685. \return the proc_bind setting of the current parallel region.
  686. */
  687. extern enum starpu_omp_proc_bind_value starpu_omp_get_proc_bind(void) __STARPU_OMP_NOTHROW;
  688. extern int starpu_omp_get_num_places(void) __STARPU_OMP_NOTHROW;
  689. extern int starpu_omp_get_place_num_procs(int place_num) __STARPU_OMP_NOTHROW;
  690. extern void starpu_omp_get_place_proc_ids(int place_num, int *ids) __STARPU_OMP_NOTHROW;
  691. extern int starpu_omp_get_place_num(void) __STARPU_OMP_NOTHROW;
  692. extern int starpu_omp_get_partition_num_places(void) __STARPU_OMP_NOTHROW;
  693. extern void starpu_omp_get_partition_place_nums(int *place_nums) __STARPU_OMP_NOTHROW;
  694. /**
  695. Set the number of the device to use as default.
  696. Note: The StarPU OpenMP runtime support currently ignores the argument of this function.
  697. \sa starpu_omp_get_default_device
  698. \sa starpu_omp_is_initial_device
  699. */
  700. extern void starpu_omp_set_default_device(int device_num) __STARPU_OMP_NOTHROW;
  701. /**
  702. Return the number of the device used as default.
  703. \return the number of the device used as default.
  704. \sa starpu_omp_set_default_device
  705. \sa starpu_omp_is_initial_device
  706. */
  707. extern int starpu_omp_get_default_device(void) __STARPU_OMP_NOTHROW;
  708. /**
  709. Return the number of the devices.
  710. \return the number of the devices.
  711. */
  712. extern int starpu_omp_get_num_devices(void) __STARPU_OMP_NOTHROW;
  713. /**
  714. Return the number of teams in the current teams region.
  715. \return the number of teams in the current teams region.
  716. \sa starpu_omp_get_num_teams
  717. */
  718. extern int starpu_omp_get_num_teams(void) __STARPU_OMP_NOTHROW;
  719. /**
  720. Return the team number of the calling thread.
  721. \return the team number of the calling thread.
  722. \sa starpu_omp_get_num_teams
  723. */
  724. extern int starpu_omp_get_team_num(void) __STARPU_OMP_NOTHROW;
  725. /**
  726. Check whether the current device is the initial device or not.
  727. */
  728. extern int starpu_omp_is_initial_device(void) __STARPU_OMP_NOTHROW;
  729. /**
  730. */
  731. extern int starpu_omp_get_initial_device(void) __STARPU_OMP_NOTHROW;
  732. /**
  733. Return the maximum value that can be specified in the priority
  734. clause.
  735. \return <c>!0</c> if called from the host device.
  736. \return <c>0</c> otherwise.
  737. \sa starpu_omp_set_default_device
  738. \sa starpu_omp_get_default_device
  739. */
  740. extern int starpu_omp_get_max_task_priority(void) __STARPU_OMP_NOTHROW;
  741. /**
  742. Initialize an opaque lock object.
  743. \sa starpu_omp_destroy_lock
  744. \sa starpu_omp_set_lock
  745. \sa starpu_omp_unset_lock
  746. \sa starpu_omp_test_lock
  747. */
  748. extern void starpu_omp_init_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW;
  749. /**
  750. Destroy an opaque lock object.
  751. \sa starpu_omp_init_lock
  752. \sa starpu_omp_set_lock
  753. \sa starpu_omp_unset_lock
  754. \sa starpu_omp_test_lock
  755. */
  756. extern void starpu_omp_destroy_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW;
  757. /**
  758. Lock an opaque lock object. If the lock is already locked, the
  759. function will block until it succeeds in exclusively acquiring the lock.
  760. \sa starpu_omp_init_lock
  761. \sa starpu_omp_destroy_lock
  762. \sa starpu_omp_unset_lock
  763. \sa starpu_omp_test_lock
  764. */
  765. extern void starpu_omp_set_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW;
  766. /**
  767. Unlock a previously locked lock object. The behaviour of this
  768. function is unspecified if it is called on an unlocked lock object.
  769. \sa starpu_omp_init_lock
  770. \sa starpu_omp_destroy_lock
  771. \sa starpu_omp_set_lock
  772. \sa starpu_omp_test_lock
  773. */
  774. extern void starpu_omp_unset_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW;
  775. /**
  776. Unblockingly attempt to lock a lock object and return whether
  777. it succeeded or not.
  778. \return <c>!0</c> if the function succeeded in acquiring the lock.
  779. \return <c>0</c> if the lock was already locked.
  780. \sa starpu_omp_init_lock
  781. \sa starpu_omp_destroy_lock
  782. \sa starpu_omp_set_lock
  783. \sa starpu_omp_unset_lock
  784. */
  785. extern int starpu_omp_test_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW;
  786. /**
  787. Initialize an opaque lock object supporting nested locking operations.
  788. \sa starpu_omp_destroy_nest_lock
  789. \sa starpu_omp_set_nest_lock
  790. \sa starpu_omp_unset_nest_lock
  791. \sa starpu_omp_test_nest_lock
  792. */
  793. extern void starpu_omp_init_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW;
  794. /**
  795. Destroy an opaque lock object supporting nested locking operations.
  796. \sa starpu_omp_init_nest_lock
  797. \sa starpu_omp_set_nest_lock
  798. \sa starpu_omp_unset_nest_lock
  799. \sa starpu_omp_test_nest_lock
  800. */
  801. extern void starpu_omp_destroy_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW;
  802. /**
  803. Lock an opaque lock object supporting nested locking operations.
  804. If the lock is already locked by another task, the function will block until
  805. it succeeds in exclusively acquiring the lock. If the lock is already taken by
  806. the current task, the function will increase the nested locking level of the
  807. lock object.
  808. \sa starpu_omp_init_nest_lock
  809. \sa starpu_omp_destroy_nest_lock
  810. \sa starpu_omp_unset_nest_lock
  811. \sa starpu_omp_test_nest_lock
  812. */
  813. extern void starpu_omp_set_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW;
  814. /**
  815. Unlock a previously locked lock object supporting nested locking
  816. operations. If the lock has been locked multiple times in nested fashion, the
  817. nested locking level is decreased and the lock remains locked. Otherwise, if
  818. the lock has only been locked once, it becomes unlocked. The behaviour of this
  819. function is unspecified if it is called on an unlocked lock object. The
  820. behaviour of this function is unspecified if it is called from a different task
  821. than the one that locked the lock object.
  822. \sa starpu_omp_init_nest_lock
  823. \sa starpu_omp_destroy_nest_lock
  824. \sa starpu_omp_set_nest_lock
  825. \sa starpu_omp_test_nest_lock
  826. */
  827. extern void starpu_omp_unset_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW;
  828. /**
  829. Unblocking attempt to lock an opaque lock object supporting
  830. nested locking operations and returns whether it succeeded or not. If the lock
  831. is already locked by another task, the function will return without having
  832. acquired the lock. If the lock is already taken by the current task, the
  833. function will increase the nested locking level of the lock object.
  834. \return <c>!0</c> if the function succeeded in acquiring the lock.
  835. \return <c>0</c> if the lock was already locked.
  836. \sa starpu_omp_init_nest_lock
  837. \sa starpu_omp_destroy_nest_lock
  838. \sa starpu_omp_set_nest_lock
  839. \sa starpu_omp_unset_nest_lock
  840. */
  841. extern int starpu_omp_test_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW;
  842. /**
  843. Implement the entry point of a fallback global atomic region.
  844. Block until it succeeds in acquiring exclusive access to the global atomic
  845. region.
  846. \sa starpu_omp_atomic_fallback_inline_end
  847. */
  848. extern void starpu_omp_atomic_fallback_inline_begin(void) __STARPU_OMP_NOTHROW;
  849. /**
  850. Implement the exit point of a fallback global atomic region.
  851. Release the exclusive access to the global atomic region.
  852. \sa starpu_omp_atomic_fallback_inline_begin
  853. */
  854. extern void starpu_omp_atomic_fallback_inline_end(void) __STARPU_OMP_NOTHROW;
  855. /**
  856. Return the elapsed wallclock time in seconds.
  857. \return the elapsed wallclock time in seconds.
  858. \sa starpu_omp_get_wtick
  859. */
  860. extern double starpu_omp_get_wtime(void) __STARPU_OMP_NOTHROW;
  861. /**
  862. Return the precision of the time used by \p starpu_omp_get_wtime().
  863. \return the precision of the time used by \p starpu_omp_get_wtime().
  864. \sa starpu_omp_get_wtime
  865. */
  866. extern double starpu_omp_get_wtick(void) __STARPU_OMP_NOTHROW;
  867. /**
  868. Enable setting additional vector metadata needed by the OpenMP Runtime Support.
  869. \p handle is vector data handle.
  870. \p slice_base is the base of an array slice, expressed in number of vector elements from the array base.
  871. \sa STARPU_VECTOR_GET_SLICE_BASE
  872. */
  873. extern void starpu_omp_vector_annotate(starpu_data_handle_t handle, uint32_t slice_base) __STARPU_OMP_NOTHROW;
  874. /**
  875. */
  876. extern struct starpu_arbiter *starpu_omp_get_default_arbiter(void) __STARPU_OMP_NOTHROW;
  877. /** @} */
  878. #ifdef __cplusplus
  879. }
  880. #endif
  881. #endif /* STARPU_USE_OPENMP && !STARPU_DONT_INCLUDE_OPENMP_HEADERS */
  882. /** @} */
  883. #endif /* __STARPU_OPENMP_H__ */