starpu_task.h 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2017 Inria
  4. * Copyright (C) 2009-2018 Université de Bordeaux
  5. * Copyright (C) 2010-2015,2017,2018,2019 CNRS
  6. * Copyright (C) 2011 Télécom-SudParis
  7. * Copyright (C) 2016 Uppsala University
  8. *
  9. * StarPU is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation; either version 2.1 of the License, or (at
  12. * your option) any later version.
  13. *
  14. * StarPU is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  19. */
  20. #ifndef __STARPU_TASK_H__
  21. #define __STARPU_TASK_H__
  22. #include <starpu.h>
  23. #include <errno.h>
  24. #include <assert.h>
  25. #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
  26. # include <cuda.h>
  27. #endif
  28. #ifdef __cplusplus
  29. extern "C"
  30. {
  31. #endif
  32. /**
  33. @defgroup API_Codelet_And_Tasks Codelet And Tasks
  34. @brief This section describes the interface to manipulate codelets
  35. and tasks.
  36. @{
  37. */
  38. /**
  39. To be used when setting the field starpu_codelet::where to specify
  40. that the codelet has no computation part, and thus does not need to
  41. be scheduled, and data does not need to be actually loaded. This is
  42. thus essentially used for synchronization tasks.
  43. */
  44. #define STARPU_NOWHERE ((1ULL)<<0)
  45. /**
  46. To be used when setting the field starpu_codelet::where (or
  47. starpu_task::where) to specify the codelet (or the task) may be
  48. executed on a CPU processing unit.
  49. */
  50. #define STARPU_CPU ((1ULL)<<1)
  51. /**
  52. To be used when setting the field starpu_codelet::where (or
  53. starpu_task::where) to specify the codelet (or the task) may be
  54. executed on a CUDA processing unit.
  55. */
  56. #define STARPU_CUDA ((1ULL)<<3)
  57. /**
  58. To be used when setting the field starpu_codelet::where (or
  59. starpu_task::where) to specify the codelet (or the task) may be
  60. executed on a OpenCL processing unit.
  61. */
  62. #define STARPU_OPENCL ((1ULL)<<6)
  63. /**
  64. To be used when setting the field starpu_codelet::where (or
  65. starpu_task::where) to specify the codelet (or the task) may be
  66. executed on a MIC processing unit.
  67. */
  68. #define STARPU_MIC ((1ULL)<<7)
  69. /**
  70. To be used when setting the field starpu_codelet::where (or
  71. starpu_task::where) to specify the codelet (or the task) may be
  72. executed on a SCC processing unit.
  73. */
  74. #define STARPU_SCC ((1ULL)<<8)
  75. /**
  76. To be used when setting the field starpu_codelet::where (or
  77. starpu_task::where) to specify the codelet (or the task) may be
  78. executed on a MPI Slave processing unit.
  79. */
  80. #define STARPU_MPI_MS ((1ULL)<<9)
  81. /**
  82. Value to be set in starpu_codelet::flags to execute the codelet
  83. functions even in simgrid mode.
  84. */
  85. #define STARPU_CODELET_SIMGRID_EXECUTE (1<<0)
  86. /**
  87. Value to be set in starpu_codelet::flags to execute the codelet
  88. functions even in simgrid mode, and later inject the measured
  89. timing inside the simulation.
  90. */
  91. #define STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT (1<<1)
  92. /**
  93. Value to be set in starpu_codelet::flags to make starpu_task_submit()
  94. not submit automatic asynchronous partitioning/unpartitioning.
  95. */
  96. #define STARPU_CODELET_NOPLANS (1<<2)
  97. /**
  98. Value to be set in starpu_codelet::cuda_flags to allow asynchronous
  99. CUDA kernel execution.
  100. */
  101. #define STARPU_CUDA_ASYNC (1<<0)
  102. /**
  103. Value to be set in starpu_codelet::opencl_flags to allow
  104. asynchronous OpenCL kernel execution.
  105. */
  106. #define STARPU_OPENCL_ASYNC (1<<0)
  107. /**
  108. To be used when the RAM memory node is specified.
  109. */
  110. #define STARPU_MAIN_RAM 0
  111. /**
  112. Describe the type of parallel task. See \ref ParallelTasks for
  113. details.
  114. */
  115. enum starpu_codelet_type
  116. {
  117. STARPU_SEQ = 0, /**< (default) for classical sequential
  118. tasks.
  119. */
  120. STARPU_SPMD, /**< for a parallel task whose threads are
  121. handled by StarPU, the code has to use
  122. starpu_combined_worker_get_size() and
  123. starpu_combined_worker_get_rank() to
  124. distribute the work.
  125. */
  126. STARPU_FORKJOIN /**< for a parallel task whose threads are
  127. started by the codelet function, which has
  128. to use starpu_combined_worker_get_size() to
  129. determine how many threads should be
  130. started.
  131. */
  132. };
  133. enum starpu_task_status
  134. {
  135. STARPU_TASK_INVALID, /**< The task has just been initialized. */
  136. #define STARPU_TASK_INVALID 0
  137. STARPU_TASK_BLOCKED, /**< The task has just been
  138. submitted, and its dependencies has not been checked yet. */
  139. STARPU_TASK_READY, /**< The task is ready for execution. */
  140. STARPU_TASK_RUNNING, /**< The task is running on some worker. */
  141. STARPU_TASK_FINISHED, /**< The task is finished executing. */
  142. STARPU_TASK_BLOCKED_ON_TAG, /**< The task is waiting for a tag. */
  143. STARPU_TASK_BLOCKED_ON_TASK, /**< The task is waiting for a task. */
  144. STARPU_TASK_BLOCKED_ON_DATA, /**< The task is waiting for some data. */
  145. STARPU_TASK_STOPPED /**< The task is stopped. */
  146. };
  147. /**
  148. CPU implementation of a codelet.
  149. */
  150. typedef void (*starpu_cpu_func_t)(void **, void*);
  151. /**
  152. CUDA implementation of a codelet.
  153. */
  154. typedef void (*starpu_cuda_func_t)(void **, void*);
  155. /**
  156. OpenCL implementation of a codelet.
  157. */
  158. typedef void (*starpu_opencl_func_t)(void **, void*);
  159. /**
  160. MIC implementation of a codelet.
  161. */
  162. typedef void (*starpu_mic_kernel_t)(void **, void*);
  163. /**
  164. MIC kernel for a codelet
  165. */
  166. typedef starpu_mic_kernel_t (*starpu_mic_func_t)(void);
  167. /**
  168. MPI Master Slave kernel for a codelet
  169. */
  170. typedef void (*starpu_mpi_ms_kernel_t)(void **, void*);
  171. /**
  172. MPI Master Slave implementation of a codelet.
  173. */
  174. typedef starpu_mpi_ms_kernel_t (*starpu_mpi_ms_func_t)(void);
  175. /**
  176. SCC kernel for a codelet
  177. */
  178. typedef void (*starpu_scc_kernel_t)(void **, void*);
  179. /**
  180. SCC implementation of a codelet.
  181. */
  182. typedef starpu_scc_kernel_t (*starpu_scc_func_t)(void);
  183. /**
  184. @deprecated
  185. Setting the field starpu_codelet::cpu_func with this macro
  186. indicates the codelet will have several implementations. The use of
  187. this macro is deprecated. One should always only define the field
  188. starpu_codelet::cpu_funcs.
  189. */
  190. #define STARPU_MULTIPLE_CPU_IMPLEMENTATIONS ((starpu_cpu_func_t) -1)
  191. /**
  192. @deprecated
  193. Setting the field starpu_codelet::cuda_func with this macro
  194. indicates the codelet will have several implementations. The use of
  195. this macro is deprecated. One should always only define the field
  196. starpu_codelet::cuda_funcs.
  197. */
  198. #define STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS ((starpu_cuda_func_t) -1)
  199. /**
  200. @deprecated
  201. Setting the field starpu_codelet::opencl_func with this macro
  202. indicates the codelet will have several implementations. The use of
  203. this macro is deprecated. One should always only define the field
  204. starpu_codelet::opencl_funcs.
  205. */
  206. #define STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS ((starpu_opencl_func_t) -1)
  207. /**
  208. Value to set in starpu_codelet::nbuffers to specify that the
  209. codelet can accept a variable number of buffers, specified in
  210. starpu_task::nbuffers.
  211. */
  212. #define STARPU_VARIABLE_NBUFFERS (-1)
  213. /**
  214. Value to be set in the field starpu_codelet::nodes to request
  215. StarPU to put the data in CPU-accessible memory (and let StarPU
  216. choose the NUMA node).
  217. */
  218. #define STARPU_SPECIFIC_NODE_LOCAL (-1)
  219. #define STARPU_SPECIFIC_NODE_CPU (-2)
  220. #define STARPU_SPECIFIC_NODE_SLOW (-3)
  221. #define STARPU_SPECIFIC_NODE_FAST (-4)
  222. struct starpu_task;
  223. /**
  224. The codelet structure describes a kernel that is possibly
  225. implemented on various targets. For compatibility, make sure to
  226. initialize the whole structure to zero, either by using explicit
  227. memset, or the function starpu_codelet_init(), or by letting the
  228. compiler implicitly do it in e.g. static storage case.
  229. */
  230. struct starpu_codelet
  231. {
  232. /**
  233. Optional field to indicate which types of processing units
  234. are able to execute the codelet. The different values
  235. ::STARPU_CPU, ::STARPU_CUDA, ::STARPU_OPENCL can be
  236. combined to specify on which types of processing units the
  237. codelet can be executed. ::STARPU_CPU|::STARPU_CUDA for
  238. instance indicates that the codelet is implemented for both
  239. CPU cores and CUDA devices while ::STARPU_OPENCL indicates
  240. that it is only available on OpenCL devices. If the field
  241. is unset, its value will be automatically set based on the
  242. availability of the XXX_funcs fields defined below. It can
  243. also be set to ::STARPU_NOWHERE to specify that no
  244. computation has to be actually done.
  245. */
  246. uint32_t where;
  247. /**
  248. Define a function which should return 1 if the worker
  249. designated by \p workerid can execute the \p nimpl -th
  250. implementation of \p task, 0 otherwise.
  251. */
  252. int (*can_execute)(unsigned workerid, struct starpu_task *task, unsigned nimpl);
  253. /**
  254. Optional field to specify the type of the codelet. The
  255. default is ::STARPU_SEQ, i.e. usual sequential
  256. implementation. Other values (::STARPU_SPMD or
  257. ::STARPU_FORKJOIN) declare that a parallel implementation is
  258. also available. See \ref ParallelTasks for details.
  259. */
  260. enum starpu_codelet_type type;
  261. /**
  262. Optional field. If a parallel implementation is available,
  263. this denotes the maximum combined worker size that StarPU
  264. will use to execute parallel tasks for this codelet.
  265. */
  266. int max_parallelism;
  267. /**
  268. @deprecated
  269. Optional field which has been made deprecated. One should
  270. use instead the field starpu_codelet::cpu_funcs.
  271. */
  272. starpu_cpu_func_t cpu_func STARPU_DEPRECATED;
  273. /**
  274. @deprecated
  275. Optional field which has been made deprecated. One should
  276. use instead the starpu_codelet::cuda_funcs field.
  277. */
  278. starpu_cuda_func_t cuda_func STARPU_DEPRECATED;
  279. /**
  280. @deprecated
  281. Optional field which has been made deprecated. One should
  282. use instead the starpu_codelet::opencl_funcs field.
  283. */
  284. starpu_opencl_func_t opencl_func STARPU_DEPRECATED;
  285. /**
  286. Optional array of function pointers to the CPU
  287. implementations of the codelet. The functions prototype
  288. must be:
  289. \code{.c}
  290. void cpu_func(void *buffers[], void *cl_arg)
  291. \endcode
  292. The first argument being the array of data managed by the
  293. data management library, and the second argument is a
  294. pointer to the argument passed from the field
  295. starpu_task::cl_arg. If the field starpu_codelet::where is
  296. set, then the field tarpu_codelet::cpu_funcs is ignored if
  297. ::STARPU_CPU does not appear in the field
  298. starpu_codelet::where, it must be non-<c>NULL</c> otherwise.
  299. */
  300. starpu_cpu_func_t cpu_funcs[STARPU_MAXIMPLEMENTATIONS];
  301. /**
  302. Optional array of function pointers to the CUDA
  303. implementations of the codelet. The functions must be
  304. host-functions written in the CUDA runtime API. Their
  305. prototype must be:
  306. \code{.c}
  307. void cuda_func(void *buffers[], void *cl_arg)
  308. \endcode
  309. If the field starpu_codelet::where is set, then the field
  310. starpu_codelet::cuda_funcs is ignored if ::STARPU_CUDA does
  311. not appear in the field starpu_codelet::where, it must be
  312. non-<c>NULL</c> otherwise.
  313. */
  314. starpu_cuda_func_t cuda_funcs[STARPU_MAXIMPLEMENTATIONS];
  315. /**
  316. Optional array of flags for CUDA execution. They specify
  317. some semantic details about CUDA kernel execution, such as
  318. asynchronous execution.
  319. */
  320. char cuda_flags[STARPU_MAXIMPLEMENTATIONS];
  321. /**
  322. Optional array of function pointers to the OpenCL
  323. implementations of the codelet. The functions prototype
  324. must be:
  325. \code{.c}
  326. void opencl_func(void *buffers[], void *cl_arg)
  327. \endcode
  328. If the field starpu_codelet::where field is set, then the
  329. field starpu_codelet::opencl_funcs is ignored if
  330. ::STARPU_OPENCL does not appear in the field
  331. starpu_codelet::where, it must be non-<c>NULL</c> otherwise.
  332. */
  333. starpu_opencl_func_t opencl_funcs[STARPU_MAXIMPLEMENTATIONS];
  334. /**
  335. Optional array of flags for OpenCL execution. They specify
  336. some semantic details about OpenCL kernel execution, such
  337. as asynchronous execution.
  338. */
  339. char opencl_flags[STARPU_MAXIMPLEMENTATIONS];
  340. /**
  341. Optional array of function pointers to a function which
  342. returns the MIC implementation of the codelet. The
  343. functions prototype must be:
  344. \code{.c}
  345. starpu_mic_kernel_t mic_func(struct starpu_codelet *cl, unsigned nimpl)
  346. \endcode
  347. If the field starpu_codelet::where is set, then the field
  348. starpu_codelet::mic_funcs is ignored if ::STARPU_MIC does
  349. not appear in the field starpu_codelet::where. It can be
  350. <c>NULL</c> if starpu_codelet::cpu_funcs_name is
  351. non-<c>NULL</c>, in which case StarPU will simply make a
  352. symbol lookup to get the implementation.
  353. */
  354. starpu_mic_func_t mic_funcs[STARPU_MAXIMPLEMENTATIONS];
  355. /**
  356. Optional array of function pointers to a function which
  357. returns the MPI Master Slave implementation of the codelet.
  358. The functions prototype must be:
  359. \code{.c}
  360. starpu_mpi_ms_kernel_t mpi_ms_func(struct starpu_codelet *cl, unsigned nimpl)
  361. \endcode
  362. If the field starpu_codelet::where is set, then the field
  363. starpu_codelet::mpi_ms_funcs is ignored if ::STARPU_MPI_MS
  364. does not appear in the field starpu_codelet::where. It can
  365. be <c>NULL</c> if starpu_codelet::cpu_funcs_name is
  366. non-<c>NULL</c>, in which case StarPU will simply make a
  367. symbol lookup to get the implementation.
  368. */
  369. starpu_mpi_ms_func_t mpi_ms_funcs[STARPU_MAXIMPLEMENTATIONS];
  370. /**
  371. Optional array of function pointers to a function which
  372. returns the SCC implementation of the codelet. The
  373. functions prototype must be:
  374. \code{.c}
  375. starpu_scc_kernel_t scc_func(struct starpu_codelet *cl, unsigned nimpl)
  376. \endcode
  377. If the field starpu_codelet::where is set, then the field
  378. starpu_codelet::scc_funcs is ignored if ::STARPU_SCC does
  379. not appear in the field starpu_codelet::where. It can be
  380. <c>NULL</c> if starpu_codelet::cpu_funcs_name is
  381. non-<c>NULL</c>, in which case StarPU will simply make a
  382. symbol lookup to get the implementation.
  383. */
  384. starpu_scc_func_t scc_funcs[STARPU_MAXIMPLEMENTATIONS];
  385. /**
  386. Optional array of strings which provide the name of the CPU
  387. functions referenced in the array
  388. starpu_codelet::cpu_funcs. This can be used when running on
  389. MIC devices or the SCC platform, for StarPU to simply look
  390. up the MIC function implementation through its name.
  391. */
  392. const char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS];
  393. /**
  394. Specify the number of arguments taken by the codelet. These
  395. arguments are managed by the DSM and are accessed from the
  396. <c>void *buffers[]</c> array. The constant argument passed
  397. with the field starpu_task::cl_arg is not counted in this
  398. number. This value should not be above \ref
  399. STARPU_NMAXBUFS. It may be set to \ref
  400. STARPU_VARIABLE_NBUFFERS to specify that the number of
  401. buffers and their access modes will be set in
  402. starpu_task::nbuffers and starpu_task::modes or
  403. starpu_task::dyn_modes, which thus permits to define
  404. codelets with a varying number of data.
  405. */
  406. int nbuffers;
  407. /**
  408. Is an array of ::starpu_data_access_mode. It describes the
  409. required access modes to the data neeeded by the codelet
  410. (e.g. ::STARPU_RW). The number of entries in this array
  411. must be specified in the field starpu_codelet::nbuffers,
  412. and should not exceed \ref STARPU_NMAXBUFS. If
  413. unsufficient, this value can be set with the configure
  414. option \ref enable-maxbuffers "--enable-maxbuffers".
  415. */
  416. enum starpu_data_access_mode modes[STARPU_NMAXBUFS];
  417. /**
  418. Is an array of ::starpu_data_access_mode. It describes the
  419. required access modes to the data needed by the codelet
  420. (e.g. ::STARPU_RW). The number of entries in this array
  421. must be specified in the field starpu_codelet::nbuffers.
  422. This field should be used for codelets having a number of
  423. datas greater than \ref STARPU_NMAXBUFS (see \ref
  424. SettingManyDataHandlesForATask). When defining a codelet,
  425. one should either define this field or the field
  426. starpu_codelet::modes defined above.
  427. */
  428. enum starpu_data_access_mode *dyn_modes;
  429. /**
  430. Default value is 0. If this flag is set, StarPU will not
  431. systematically send all data to the memory node where the
  432. task will be executing, it will read the
  433. starpu_codelet::nodes or starpu_codelet::dyn_nodes array to
  434. determine, for each data, whether to send it on the memory
  435. node where the task will be executing (-1), or on a
  436. specific node (!= -1).
  437. */
  438. unsigned specific_nodes;
  439. /**
  440. Optional field. When starpu_codelet::specific_nodes is 1,
  441. this specifies the memory nodes where each data should be
  442. sent to for task execution. The number of entries in this
  443. array is starpu_codelet::nbuffers, and should not exceed
  444. \ref STARPU_NMAXBUFS.
  445. */
  446. int nodes[STARPU_NMAXBUFS];
  447. /**
  448. Optional field. When starpu_codelet::specific_nodes is 1,
  449. this specifies the memory nodes where each data should be
  450. sent to for task execution. The number of entries in this
  451. array is starpu_codelet::nbuffers. This field should be
  452. used for codelets having a number of datas greater than
  453. \ref STARPU_NMAXBUFS (see \ref
  454. SettingManyDataHandlesForATask). When defining a codelet,
  455. one should either define this field or the field
  456. starpu_codelet::nodes defined above.
  457. */
  458. int *dyn_nodes;
  459. /**
  460. Optional pointer to the task duration performance model
  461. associated to this codelet. This optional field is ignored
  462. when set to <c>NULL</c> or when its field
  463. starpu_perfmodel::symbol is not set.
  464. */
  465. struct starpu_perfmodel *model;
  466. /**
  467. Optional pointer to the task energy consumption performance
  468. model associated to this codelet. This optional field is
  469. ignored when set to <c>NULL</c> or when its field
  470. starpu_perfmodel::symbol is not set. In the case of
  471. parallel codelets, this has to account for all processing
  472. units involved in the parallel execution.
  473. */
  474. struct starpu_perfmodel *energy_model;
  475. /**
  476. Optional array for statistics collected at runtime: this is
  477. filled by StarPU and should not be accessed directly, but
  478. for example by calling the function
  479. starpu_codelet_display_stats() (See
  480. starpu_codelet_display_stats() for details).
  481. */
  482. unsigned long per_worker_stats[STARPU_NMAXWORKERS];
  483. /**
  484. Optional name of the codelet. This can be useful for
  485. debugging purposes.
  486. */
  487. const char *name;
  488. /**
  489. Optional color of the codelet. This can be useful for
  490. debugging purposes.
  491. */
  492. unsigned color;
  493. /**
  494. Various flags for the codelet.
  495. */
  496. int flags;
  497. };
  498. /**
  499. Describe a data handle along with an access mode.
  500. */
  501. struct starpu_data_descr
  502. {
  503. starpu_data_handle_t handle; /**< data */
  504. enum starpu_data_access_mode mode; /**< access mode */
  505. };
  506. /**
  507. Describe a task that can be offloaded on the various processing
  508. units managed by StarPU. It instantiates a codelet. It can either
  509. be allocated dynamically with the function starpu_task_create(), or
  510. declared statically. In the latter case, the programmer has to zero
  511. the structure starpu_task and to fill the different fields
  512. properly. The indicated default values correspond to the
  513. configuration of a task allocated with starpu_task_create().
  514. */
  515. struct starpu_task
  516. {
  517. /**
  518. Optional name of the task. This can be useful for debugging
  519. purposes.
  520. */
  521. const char *name;
  522. /**
  523. Pointer to the corresponding structure starpu_codelet. This
  524. describes where the kernel should be executed, and supplies
  525. the appropriate implementations. When set to <c>NULL</c>,
  526. no code is executed during the tasks, such empty tasks can
  527. be useful for synchronization purposes.
  528. */
  529. struct starpu_codelet *cl;
  530. /**
  531. When set, specify where the task is allowed to be executed.
  532. When unset, take the value of starpu_codelet::where.
  533. */
  534. int32_t where;
  535. /**
  536. Specify the number of buffers. This is only used when
  537. starpu_codelet::nbuffers is \ref STARPU_VARIABLE_NBUFFERS.
  538. */
  539. int nbuffers;
  540. /* Keep dyn_handles, dyn_interfaces and dyn_modes before the
  541. * equivalent static arrays, so we can detect dyn_handles
  542. * being NULL while nbuffers being bigger that STARPU_NMAXBUFS
  543. * (otherwise the overflow would put a non-NULL) */
  544. /**
  545. Array of ::starpu_data_handle_t. Specify the handles to the
  546. different pieces of data accessed by the task. The number
  547. of entries in this array must be specified in the field
  548. starpu_codelet::nbuffers. This field should be used for
  549. tasks having a number of datas greater than \ref
  550. STARPU_NMAXBUFS (see \ref SettingManyDataHandlesForATask).
  551. When defining a task, one should either define this field
  552. or the field starpu_task::handles defined below.
  553. */
  554. starpu_data_handle_t *dyn_handles;
  555. /**
  556. Array of data pointers to the memory node where execution
  557. will happen, managed by the DSM. Is used when the field
  558. starpu_task::dyn_handles is defined.
  559. */
  560. void **dyn_interfaces;
  561. /**
  562. Used only when starpu_codelet::nbuffers is \ref
  563. STARPU_VARIABLE_NBUFFERS.
  564. Array of ::starpu_data_access_mode which describes the
  565. required access modes to the data needed by the codelet
  566. (e.g. ::STARPU_RW). The number of entries in this array
  567. must be specified in the field starpu_codelet::nbuffers.
  568. This field should be used for codelets having a number of
  569. datas greater than \ref STARPU_NMAXBUFS (see \ref
  570. SettingManyDataHandlesForATask).
  571. When defining a codelet, one should either define this
  572. field or the field starpu_task::modes defined below.
  573. */
  574. enum starpu_data_access_mode *dyn_modes;
  575. /**
  576. Array of ::starpu_data_handle_t. Specify the handles to the
  577. different pieces of data accessed by the task. The number
  578. of entries in this array must be specified in the field
  579. starpu_codelet::nbuffers, and should not exceed
  580. \ref STARPU_NMAXBUFS. If unsufficient, this value can be
  581. set with the configure option \ref enable-maxbuffers
  582. "--enable-maxbuffers".
  583. */
  584. starpu_data_handle_t handles[STARPU_NMAXBUFS];
  585. /**
  586. Array of Data pointers to the memory node where execution
  587. will happen, managed by the DSM.
  588. */
  589. void *interfaces[STARPU_NMAXBUFS];
  590. /**
  591. Used only when starpu_codelet::nbuffers is \ref
  592. STARPU_VARIABLE_NBUFFERS.
  593. Array of ::starpu_data_access_mode which describes the
  594. required access modes to the data neeeded by the codelet
  595. (e.g. ::STARPU_RW). The number of entries in this array
  596. must be specified in the field starpu_task::nbuffers, and
  597. should not exceed \ref STARPU_NMAXBUFS. If unsufficient,
  598. this value can be set with the configure option
  599. \ref enable-maxbuffers "--enable-maxbuffers".
  600. */
  601. enum starpu_data_access_mode modes[STARPU_NMAXBUFS];
  602. /**
  603. Optional pointer to an array of characters which allows to
  604. define the sequential consistency for each handle for the
  605. current task.
  606. */
  607. unsigned char *handles_sequential_consistency;
  608. /**
  609. Optional pointer which is passed to the codelet through the
  610. second argument of the codelet implementation (e.g.
  611. starpu_codelet::cpu_func or starpu_codelet::cuda_func). The
  612. default value is <c>NULL</c>. starpu_codelet_pack_args()
  613. and starpu_codelet_unpack_args() are helpers that can can
  614. be used to respectively pack and unpack data into and from
  615. it, but the application can manage it any way, the only
  616. requirement is that the size of the data must be set in
  617. starpu_task::cl_arg_size .
  618. */
  619. void *cl_arg;
  620. /**
  621. Optional field. For some specific drivers, the pointer
  622. starpu_task::cl_arg cannot not be directly given to the
  623. driver function. A buffer of size starpu_task::cl_arg_size
  624. needs to be allocated on the driver. This buffer is then
  625. filled with the starpu_task::cl_arg_size bytes starting at
  626. address starpu_task::cl_arg. In this case, the argument
  627. given to the codelet is therefore not the
  628. starpu_task::cl_arg pointer, but the address of the buffer
  629. in local store (LS) instead. This field is ignored for CPU,
  630. CUDA and OpenCL codelets, where the starpu_task::cl_arg
  631. pointer is given as such.
  632. */
  633. size_t cl_arg_size;
  634. /**
  635. Optional field, the default value is <c>NULL</c>. This is a
  636. function pointer of prototype <c>void (*f)(void *)</c>
  637. which specifies a possible callback. If this pointer is
  638. non-<c>NULL</c>, the callback function is executed on the
  639. host after the execution of the task. Tasks which depend on
  640. it might already be executing. The callback is passed the
  641. value contained in the starpu_task::callback_arg field. No
  642. callback is executed if the field is set to <c>NULL</c>.
  643. */
  644. void (*callback_func)(void *);
  645. /**
  646. Optional field, the default value is <c>NULL</c>. This is
  647. the pointer passed to the callback function. This field is
  648. ignored if the field starpu_task::callback_func is set to
  649. <c>NULL</c>.
  650. */
  651. void *callback_arg;
  652. /**
  653. Optional field, the default value is <c>NULL</c>. This is a
  654. function pointer of prototype <c>void (*f)(void *)</c>
  655. which specifies a possible callback. If this pointer is
  656. non-<c>NULL</c>, the callback function is executed on the
  657. host when the task becomes ready for execution, before
  658. getting scheduled. The callback is passed the value
  659. contained in the starpu_task::prologue_callback_arg field.
  660. No callback is executed if the field is set to <c>NULL</c>.
  661. */
  662. void (*prologue_callback_func)(void *);
  663. /**
  664. Optional field, the default value is <c>NULL</c>. This is
  665. the pointer passed to the prologue callback function. This
  666. field is ignored if the field
  667. starpu_task::prologue_callback_func is set to <c>NULL</c>.
  668. */
  669. void *prologue_callback_arg;
  670. void (*prologue_callback_pop_func)(void *);
  671. void *prologue_callback_pop_arg;
  672. /**
  673. Optional field. Contain the tag associated to the task if
  674. the field starpu_task::use_tag is set, ignored
  675. otherwise.
  676. */
  677. starpu_tag_t tag_id;
  678. /**
  679. Optional field. In case starpu_task::cl_arg was allocated
  680. by the application through <c>malloc()</c>, setting
  681. starpu_task::cl_arg_free to 1 makes StarPU automatically
  682. call <c>free(cl_arg)</c> when destroying the task. This
  683. saves the user from defining a callback just for that. This
  684. is mostly useful when targetting MIC or SCC, where the
  685. codelet does not execute in the same memory space as the
  686. main thread.
  687. */
  688. unsigned cl_arg_free:1;
  689. /**
  690. Optional field. In case starpu_task::callback_arg was
  691. allocated by the application through <c>malloc()</c>,
  692. setting starpu_task::callback_arg_free to 1 makes StarPU
  693. automatically call <c>free(callback_arg)</c> when
  694. destroying the task.
  695. */
  696. unsigned callback_arg_free:1;
  697. /**
  698. Optional field. In case starpu_task::prologue_callback_arg
  699. was allocated by the application through <c>malloc()</c>,
  700. setting starpu_task::prologue_callback_arg_free to 1 makes
  701. StarPU automatically call
  702. <c>free(prologue_callback_arg)</c> when destroying the task.
  703. */
  704. unsigned prologue_callback_arg_free:1;
  705. /**
  706. Optional field. In case starpu_task::prologue_callback_pop_arg
  707. was allocated by the application through <c>malloc()</c>,
  708. setting starpu_task::prologue_callback_pop_arg_free to 1 makes
  709. StarPU automatically call
  710. <c>free(prologue_callback_pop_arg)</c> when destroying the
  711. task.
  712. */
  713. unsigned prologue_callback_pop_arg_free:1;
  714. /**
  715. Optional field, the default value is 0. If set, this flag
  716. indicates that the task should be associated with the tag
  717. contained in the starpu_task::tag_id field. Tag allow the
  718. application to synchronize with the task and to express
  719. task dependencies easily.
  720. */
  721. unsigned use_tag:1;
  722. /**
  723. If this flag is set (which is the default), sequential
  724. consistency is enforced for the data parameters of this
  725. task for which sequential consistency is enabled. Clearing
  726. this flag permits to disable sequential consistency for
  727. this task, even if data have it enabled.
  728. */
  729. unsigned sequential_consistency:1;
  730. /**
  731. If this flag is set, the function starpu_task_submit() is
  732. blocking and returns only when the task has been executed
  733. (or if no worker is able to process the task). Otherwise,
  734. starpu_task_submit() returns immediately.
  735. */
  736. unsigned synchronous:1;
  737. /**
  738. Default value is 0. If this flag is set, StarPU will bypass
  739. the scheduler and directly affect this task to the worker
  740. specified by the field starpu_task::workerid.
  741. */
  742. unsigned execute_on_a_specific_worker:1;
  743. /**
  744. Optional field, default value is 1. If this flag is set, it
  745. is not possible to synchronize with the task by the means
  746. of starpu_task_wait() later on. Internal data structures
  747. are only guaranteed to be freed once starpu_task_wait() is
  748. called if the flag is not set.
  749. */
  750. unsigned detach:1;
  751. /**
  752. Optional value. Default value is 0 for starpu_task_init(),
  753. and 1 for starpu_task_create(). If this flag is set, the
  754. task structure will automatically be freed, either after
  755. the execution of the callback if the task is detached, or
  756. during starpu_task_wait() otherwise. If this flag is not
  757. set, dynamically allocated data structures will not be
  758. freed until starpu_task_destroy() is called explicitly.
  759. Setting this flag for a statically allocated task structure
  760. will result in undefined behaviour. The flag is set to 1
  761. when the task is created by calling starpu_task_create().
  762. Note that starpu_task_wait_for_all() will not free any task.
  763. */
  764. unsigned destroy:1;
  765. /**
  766. Optional field. If this flag is set, the task will be
  767. re-submitted to StarPU once it has been executed. This flag
  768. must not be set if the flag starpu_task::destroy is set.
  769. This flag must be set before making another task depend on
  770. this one.
  771. */
  772. unsigned regenerate:1;
  773. /**
  774. @private
  775. This is only used for tasks that use multiformat handle.
  776. This should only be used by StarPU.
  777. */
  778. unsigned mf_skip:1;
  779. /**
  780. do not allocate a submitorder id for this task
  781. */
  782. unsigned no_submitorder:1;
  783. /**
  784. Whether the scheduler has pushed the task on some queue
  785. */
  786. unsigned scheduled:1;
  787. unsigned prefetched:1;
  788. /**
  789. Optional field. If the field
  790. starpu_task::execute_on_a_specific_worker is set, this
  791. field indicates the identifier of the worker that should
  792. process this task (as returned by starpu_worker_get_id()).
  793. This field is ignored if the field
  794. starpu_task::execute_on_a_specific_worker is set to 0.
  795. */
  796. unsigned workerid;
  797. /**
  798. Optional field. If the field
  799. starpu_task::execute_on_a_specific_worker is set, this
  800. field indicates the per-worker consecutive order in which
  801. tasks should be executed on the worker. Tasks will be
  802. executed in consecutive starpu_task::workerorder values,
  803. thus ignoring the availability order or task priority. See
  804. \ref StaticScheduling for more details. This field is
  805. ignored if the field
  806. starpu_task::execute_on_a_specific_worker is set to 0.
  807. */
  808. unsigned workerorder;
  809. /**
  810. Optional field. If the field starpu_task::workerids_len is
  811. different from 0, this field indicates an array of bits
  812. (stored as uint32_t values) which indicate the set of
  813. workers which are allowed to execute the task.
  814. starpu_task::workerid takes precedence over this.
  815. */
  816. uint32_t *workerids;
  817. /**
  818. Optional field. This provides the number of uint32_t values
  819. in the starpu_task::workerids array.
  820. */
  821. unsigned workerids_len;
  822. /**
  823. Optional field, the default value is ::STARPU_DEFAULT_PRIO.
  824. This field indicates a level of priority for the task. This
  825. is an integer value that must be set between the return
  826. values of the function starpu_sched_get_min_priority() for
  827. the least important tasks, and that of the function
  828. starpu_sched_get_max_priority() for the most important
  829. tasks (included). The ::STARPU_MIN_PRIO and
  830. ::STARPU_MAX_PRIO macros are provided for convenience and
  831. respectively return the value of
  832. starpu_sched_get_min_priority() and
  833. starpu_sched_get_max_priority(). Default priority is
  834. ::STARPU_DEFAULT_PRIO, which is always defined as 0 in
  835. order to allow static task initialization. Scheduling
  836. strategies that take priorities into account can use this
  837. parameter to take better scheduling decisions, but the
  838. scheduling policy may also ignore it.
  839. */
  840. int priority;
  841. /**
  842. Optional field. Current state of the task.
  843. */
  844. enum starpu_task_status status;
  845. /**
  846. @private
  847. This field is set when initializing a task. The function
  848. starpu_task_submit() will fail if the field does not have
  849. the correct value. This will hence avoid submitting tasks
  850. which have not been properly initialised.
  851. */
  852. int magic;
  853. /**
  854. allow to specify the type of task, for filtering out tasks
  855. in profiling outputs, whether it is really internal to
  856. StarPU (::STARPU_TASK_TYPE_INTERNAL), a data acquisition
  857. synchronization task (::STARPU_TASK_TYPE_DATA_ACQUIRE), or
  858. a normal task (::STARPU_TASK_TYPE_NORMAL)
  859. */
  860. unsigned type;
  861. /**
  862. color of the task to be used in dag.dot.
  863. */
  864. unsigned color;
  865. /**
  866. Scheduling context.
  867. */
  868. unsigned sched_ctx;
  869. /**
  870. Help the hypervisor monitor the execution of this task.
  871. */
  872. int hypervisor_tag;
  873. unsigned possibly_parallel;
  874. /**
  875. Optional field. The bundle that includes this task. If no
  876. bundle is used, this should be <c>NULL</c>.
  877. */
  878. starpu_task_bundle_t bundle;
  879. /**
  880. Optional field. Profiling information for the task.
  881. */
  882. struct starpu_profiling_task_info *profiling_info;
  883. /**
  884. This can be set to the number of floating points operations
  885. that the task will have to achieve. This is useful for
  886. easily getting GFlops curves from the tool
  887. <c>starpu_perfmodel_plot</c>, and for the hypervisor load
  888. balancing.
  889. */
  890. double flops;
  891. /**
  892. Output field. Predicted duration of the task. This field is
  893. only set if the scheduling strategy uses performance
  894. models.
  895. */
  896. double predicted;
  897. /**
  898. Optional field. Predicted data transfer duration for the task in
  899. microseconds. This field is only valid if the scheduling
  900. strategy uses performance models.
  901. */
  902. double predicted_transfer;
  903. double predicted_start;
  904. /**
  905. @private
  906. A pointer to the previous task. This should only be used by
  907. StarPU.
  908. */
  909. struct starpu_task *prev;
  910. /**
  911. @private
  912. A pointer to the next task. This should only be used by
  913. StarPU.
  914. */
  915. struct starpu_task *next;
  916. /**
  917. @private
  918. This is private to StarPU, do not modify. If the task is
  919. allocated by hand (without starpu_task_create()), this
  920. field should be set to <c>NULL</c>.
  921. */
  922. void *starpu_private;
  923. #ifdef STARPU_OPENMP
  924. struct starpu_omp_task *omp_task;
  925. #else
  926. void *omp_task;
  927. #endif
  928. unsigned nb_termination_call_required;
  929. /**
  930. This field is managed by the scheduler, is it allowed to do
  931. whatever with it. Typically, some area would be allocated on push, and released on pop.
  932. */
  933. void *sched_data;
  934. };
  935. #define STARPU_TASK_TYPE_NORMAL 0
  936. #define STARPU_TASK_TYPE_INTERNAL (1<<0)
  937. #define STARPU_TASK_TYPE_DATA_ACQUIRE (1<<1)
  938. /**
  939. Value to be used to initialize statically allocated tasks. This is
  940. equivalent to initializing a structure starpu_task
  941. with the function starpu_task_init().
  942. */
  943. /* Note: remember to update starpu_task_init as well */
  944. #define STARPU_TASK_INITIALIZER \
  945. { \
  946. .cl = NULL, \
  947. .where = -1, \
  948. .cl_arg = NULL, \
  949. .cl_arg_size = 0, \
  950. .callback_func = NULL, \
  951. .callback_arg = NULL, \
  952. .priority = STARPU_DEFAULT_PRIO, \
  953. .use_tag = 0, \
  954. .sequential_consistency = 1, \
  955. .synchronous = 0, \
  956. .execute_on_a_specific_worker = 0, \
  957. .workerorder = 0, \
  958. .bundle = NULL, \
  959. .detach = 1, \
  960. .destroy = 0, \
  961. .regenerate = 0, \
  962. .status = STARPU_TASK_INVALID, \
  963. .profiling_info = NULL, \
  964. .predicted = NAN, \
  965. .predicted_transfer = NAN, \
  966. .predicted_start = NAN, \
  967. .starpu_private = NULL, \
  968. .magic = 42, \
  969. .type = 0, \
  970. .color = 0, \
  971. .sched_ctx = STARPU_NMAX_SCHED_CTXS, \
  972. .hypervisor_tag = 0, \
  973. .flops = 0.0, \
  974. .scheduled = 0, \
  975. .prefetched = 0, \
  976. .dyn_handles = NULL, \
  977. .dyn_interfaces = NULL, \
  978. .dyn_modes = NULL, \
  979. .name = NULL, \
  980. .possibly_parallel = 0 \
  981. }
  982. /**
  983. Return the number of buffers for \p task, i.e.
  984. starpu_codelet::nbuffers, or starpu_task::nbuffers if the former is
  985. \ref STARPU_VARIABLE_NBUFFERS.
  986. */
  987. #define STARPU_TASK_GET_NBUFFERS(task) ((unsigned)((task)->cl->nbuffers == STARPU_VARIABLE_NBUFFERS ? ((task)->nbuffers) : ((task)->cl->nbuffers)))
  988. /**
  989. Return the \p i -th data handle of \p task. If \p task is defined
  990. with a static or dynamic number of handles, will either return the
  991. \p i -th element of the field starpu_task::handles or the \p i -th
  992. element of the field starpu_task::dyn_handles (see \ref
  993. SettingManyDataHandlesForATask)
  994. */
  995. #define STARPU_TASK_GET_HANDLE(task, i) (((task)->dyn_handles) ? (task)->dyn_handles[i] : (task)->handles[i])
  996. #define STARPU_TASK_GET_HANDLES(task) (((task)->dyn_handles) ? (task)->dyn_handles : (task)->handles)
  997. /**
  998. Set the \p i -th data handle of \p task with \p handle. If \p task
  999. is defined with a static or dynamic number of handles, will either
  1000. set the \p i -th element of the field starpu_task::handles or the
  1001. \p i -th element of the field starpu_task::dyn_handles
  1002. (see \ref SettingManyDataHandlesForATask)
  1003. */
  1004. #define STARPU_TASK_SET_HANDLE(task, handle, i) \
  1005. do { if ((task)->dyn_handles) (task)->dyn_handles[i] = handle; else (task)->handles[i] = handle; } while(0)
  1006. /**
  1007. Return the access mode of the \p i -th data handle of \p codelet.
  1008. If \p codelet is defined with a static or dynamic number of
  1009. handles, will either return the \p i -th element of the field
  1010. starpu_codelet::modes or the \p i -th element of the field
  1011. starpu_codelet::dyn_modes (see \ref SettingManyDataHandlesForATask)
  1012. */
  1013. #define STARPU_CODELET_GET_MODE(codelet, i) \
  1014. (((codelet)->dyn_modes) ? (codelet)->dyn_modes[i] : (assert(i < STARPU_NMAXBUFS), (codelet)->modes[i]))
  1015. /**
  1016. Set the access mode of the \p i -th data handle of \p codelet. If
  1017. \p codelet is defined with a static or dynamic number of handles,
  1018. will either set the \p i -th element of the field
  1019. starpu_codelet::modes or the \p i -th element of the field
  1020. starpu_codelet::dyn_modes (see \ref SettingManyDataHandlesForATask)
  1021. */
  1022. #define STARPU_CODELET_SET_MODE(codelet, mode, i) \
  1023. do { if ((codelet)->dyn_modes) (codelet)->dyn_modes[i] = mode; else (codelet)->modes[i] = mode; } while(0)
  1024. /**
  1025. Return the access mode of the \p i -th data handle of \p task. If
  1026. \p task is defined with a static or dynamic number of handles, will
  1027. either return the \p i -th element of the field starpu_task::modes
  1028. or the \p i -th element of the field starpu_task::dyn_modes (see
  1029. \ref SettingManyDataHandlesForATask)
  1030. */
  1031. #define STARPU_TASK_GET_MODE(task, i) \
  1032. ((task)->cl->nbuffers == STARPU_VARIABLE_NBUFFERS || (task)->dyn_modes ? \
  1033. (((task)->dyn_modes) ? (task)->dyn_modes[i] : (task)->modes[i]) : \
  1034. STARPU_CODELET_GET_MODE((task)->cl, i) )
  1035. /**
  1036. Set the access mode of the \p i -th data handle of \p task. If \p
  1037. task is defined with a static or dynamic number of handles, will
  1038. either set the \p i -th element of the field starpu_task::modes or
  1039. the \p i -th element of the field starpu_task::dyn_modes (see \ref
  1040. SettingManyDataHandlesForATask)
  1041. */
  1042. #define STARPU_TASK_SET_MODE(task, mode, i) \
  1043. do { \
  1044. if ((task)->cl->nbuffers == STARPU_VARIABLE_NBUFFERS || (task)->cl->nbuffers > STARPU_NMAXBUFS) \
  1045. if ((task)->dyn_modes) (task)->dyn_modes[i] = mode; else (task)->modes[i] = mode; \
  1046. else \
  1047. STARPU_CODELET_SET_MODE((task)->cl, mode, i); \
  1048. } while(0)
  1049. /**
  1050. Return the target node of the \p i -th data handle of \p codelet.
  1051. If \p node is defined with a static or dynamic number of handles,
  1052. will either return the \p i -th element of the field
  1053. starpu_codelet::nodes or the \p i -th element of the field
  1054. starpu_codelet::dyn_nodes (see \ref SettingManyDataHandlesForATask)
  1055. */
  1056. #define STARPU_CODELET_GET_NODE(codelet, i) (((codelet)->dyn_nodes) ? (codelet)->dyn_nodes[i] : (codelet)->nodes[i])
  1057. /**
  1058. Set the target node of the \p i -th data handle of \p codelet. If
  1059. \p codelet is defined with a static or dynamic number of handles,
  1060. will either set the \p i -th element of the field
  1061. starpu_codelet::nodes or the \p i -th element of the field
  1062. starpu_codelet::dyn_nodes (see \ref SettingManyDataHandlesForATask)
  1063. */
  1064. #define STARPU_CODELET_SET_NODE(codelet, __node, i) \
  1065. do { if ((codelet)->dyn_nodes) (codelet)->dyn_nodes[i] = __node; else (codelet)->nodes[i] = __node; } while(0)
  1066. /**
  1067. Initialize \p task with default values. This function is implicitly
  1068. called by starpu_task_create(). By default, tasks initialized with
  1069. starpu_task_init() must be deinitialized explicitly with
  1070. starpu_task_clean(). Tasks can also be initialized statically,
  1071. using ::STARPU_TASK_INITIALIZER.
  1072. */
  1073. void starpu_task_init(struct starpu_task *task);
  1074. /**
  1075. Release all the structures automatically allocated to execute \p
  1076. task, but not the task structure itself and values set by the user
  1077. remain unchanged. It is thus useful for statically allocated tasks
  1078. for instance. It is also useful when users want to execute the same
  1079. operation several times with as least overhead as possible. It is
  1080. called automatically by starpu_task_destroy(). It has to be called
  1081. only after explicitly waiting for the task or after
  1082. starpu_shutdown() (waiting for the callback is not enough, since
  1083. StarPU still manipulates the task after calling the callback).
  1084. */
  1085. void starpu_task_clean(struct starpu_task *task);
  1086. /**
  1087. Allocate a task structure and initialize it with default values.
  1088. Tasks allocated dynamically with starpu_task_create() are
  1089. automatically freed when the task is terminated. This means that
  1090. the task pointer can not be used any more once the task is
  1091. submitted, since it can be executed at any time (unless
  1092. dependencies make it wait) and thus freed at any time. If the field
  1093. starpu_task::destroy is explicitly unset, the resources used by the
  1094. task have to be freed by calling starpu_task_destroy().
  1095. */
  1096. struct starpu_task *starpu_task_create(void) STARPU_ATTRIBUTE_MALLOC;
  1097. /**
  1098. Free the resource allocated during starpu_task_create() and
  1099. associated with \p task. This function is called automatically
  1100. after the execution of a task when the field starpu_task::destroy
  1101. is set, which is the default for tasks created by
  1102. starpu_task_create(). Calling this function on a statically
  1103. allocated task results in an undefined behaviour.
  1104. */
  1105. void starpu_task_destroy(struct starpu_task *task);
  1106. /**
  1107. Submit \p task to StarPU. Calling this function does not mean that
  1108. the task will be executed immediately as there can be data or task
  1109. (tag) dependencies that are not fulfilled yet: StarPU will take
  1110. care of scheduling this task with respect to such dependencies.
  1111. This function returns immediately if the field
  1112. starpu_task::synchronous is set to 0, and block until the
  1113. termination of the task otherwise. It is also possible to
  1114. synchronize the application with asynchronous tasks by the means of
  1115. tags, using the function starpu_tag_wait() function for instance.
  1116. In case of success, this function returns 0, a return value of
  1117. <c>-ENODEV</c> means that there is no worker able to process this
  1118. task (e.g. there is no GPU available and this task is only
  1119. implemented for CUDA devices). starpu_task_submit() can be called
  1120. from anywhere, including codelet functions and callbacks, provided
  1121. that the field starpu_task::synchronous is set to 0.
  1122. */
  1123. int starpu_task_submit(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT;
  1124. /**
  1125. Submit \p task to the context \p sched_ctx_id. By default,
  1126. starpu_task_submit() submits the task to a global context that is
  1127. created automatically by StarPU.
  1128. */
  1129. int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id);
  1130. int starpu_task_finished(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT;
  1131. /**
  1132. Block until \p task has been executed. It is not possible to
  1133. synchronize with a task more than once. It is not possible to wait
  1134. for synchronous or detached tasks. Upon successful completion, this
  1135. function returns 0. Otherwise, <c>-EINVAL</c> indicates that the
  1136. specified task was either synchronous or detached.
  1137. */
  1138. int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT;
  1139. /**
  1140. Allow to wait for an array of tasks. Upon successful completion,
  1141. this function returns 0. Otherwise, <c>-EINVAL</c> indicates that
  1142. one of the tasks was either synchronous or detached.
  1143. */
  1144. int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks) STARPU_WARN_UNUSED_RESULT;
  1145. /**
  1146. Block until all the tasks that were submitted (to the current
  1147. context or the global one if there is no current context) are
  1148. terminated. It does not destroy these tasks.
  1149. */
  1150. int starpu_task_wait_for_all(void);
  1151. /**
  1152. Block until there are \p n submitted tasks left (to the current
  1153. context or the global one if there is no current context) to be
  1154. executed. It does not destroy these tasks.
  1155. */
  1156. int starpu_task_wait_for_n_submitted(unsigned n);
  1157. /**
  1158. Wait until all the tasks that were already submitted to the context
  1159. \p sched_ctx_id have been terminated.
  1160. */
  1161. int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id);
  1162. /**
  1163. Wait until there are \p n tasks submitted left to be
  1164. executed that were already submitted to the context \p
  1165. sched_ctx_id.
  1166. */
  1167. int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx_id, unsigned n);
  1168. /**
  1169. Wait until there is no more ready task.
  1170. */
  1171. int starpu_task_wait_for_no_ready(void);
  1172. /**
  1173. Return the number of submitted tasks which are ready for execution
  1174. are already executing. It thus does not include tasks waiting for
  1175. dependencies.
  1176. */
  1177. int starpu_task_nready(void);
  1178. /**
  1179. Return the number of submitted tasks which have not completed yet.
  1180. */
  1181. int starpu_task_nsubmitted(void);
  1182. /**
  1183. Set the iteration number for all the tasks to be submitted after
  1184. this call. This is typically called at the beginning of a task
  1185. submission loop. This number will then show up in tracing tools. A
  1186. corresponding starpu_iteration_pop() call must be made to match the
  1187. call to starpu_iteration_push(), at the end of the same task
  1188. submission loop, typically.
  1189. Nested calls to starpu_iteration_push() and starpu_iteration_pop()
  1190. are allowed, to describe a loop nest for instance, provided that
  1191. they match properly.
  1192. */
  1193. void starpu_iteration_push(unsigned long iteration);
  1194. /**
  1195. Drop the iteration number for submitted tasks. This must match a
  1196. previous call to starpu_iteration_push(), and is typically called
  1197. at the end of a task submission loop.
  1198. */
  1199. void starpu_iteration_pop(void);
  1200. void starpu_do_schedule(void);
  1201. /**
  1202. Initialize \p cl with default values. Codelets should preferably be
  1203. initialized statically as shown in \ref DefiningACodelet. However
  1204. such a initialisation is not always possible, e.g. when using C++.
  1205. */
  1206. void starpu_codelet_init(struct starpu_codelet *cl);
  1207. /**
  1208. Output on \c stderr some statistics on the codelet \p cl.
  1209. */
  1210. void starpu_codelet_display_stats(struct starpu_codelet *cl);
  1211. /**
  1212. Return the task currently executed by the worker, or <c>NULL</c> if
  1213. it is called either from a thread that is not a task or simply
  1214. because there is no task being executed at the moment.
  1215. */
  1216. struct starpu_task *starpu_task_get_current(void);
  1217. /**
  1218. Return the memory node number of parameter \p i of the task
  1219. currently executed, or -1 if it is called either from a thread that
  1220. is not a task or simply because there is no task being executed at
  1221. the moment.
  1222. Usually, the returned memory node number is simply the memory node
  1223. for the current worker. That may however be different when using
  1224. e.g. starpu_codelet::specific_nodes.
  1225. */
  1226. int starpu_task_get_current_data_node(unsigned i);
  1227. /**
  1228. Return the name of the performance model of \p task.
  1229. */
  1230. const char *starpu_task_get_model_name(struct starpu_task *task);
  1231. /**
  1232. Return the name of \p task, i.e. either its starpu_task::name
  1233. field, or the name of the corresponding performance model.
  1234. */
  1235. const char *starpu_task_get_name(struct starpu_task *task);
  1236. /**
  1237. Allocate a task structure which is the exact duplicate of \p task.
  1238. */
  1239. struct starpu_task *starpu_task_dup(struct starpu_task *task);
  1240. /**
  1241. This function should be called by schedulers to specify the
  1242. codelet implementation to be executed when executing \p task.
  1243. */
  1244. void starpu_task_set_implementation(struct starpu_task *task, unsigned impl);
  1245. /**
  1246. Return the codelet implementation to be executed
  1247. when executing \p task.
  1248. */
  1249. unsigned starpu_task_get_implementation(struct starpu_task *task);
  1250. /**
  1251. Create (and submit) an empty task that unlocks a tag once all its
  1252. dependencies are fulfilled.
  1253. */
  1254. void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg);
  1255. /** @} */
  1256. #ifdef __cplusplus
  1257. }
  1258. #endif
  1259. #endif /* __STARPU_TASK_H__ */