starpu_data_filters.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011 Antoine Lucas
  4. * Copyright (C) 2009-2012,2014,2015,2017 Université de Bordeaux
  5. * Copyright (C) 2010 Mehdi Juhoor
  6. * Copyright (C) 2010-2013,2015,2017,2018,2019 CNRS
  7. * Copyright (C) 2011 Inria
  8. *
  9. * StarPU is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation; either version 2.1 of the License, or (at
  12. * your option) any later version.
  13. *
  14. * StarPU is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  19. */
  20. #ifndef __STARPU_DATA_FILTERS_H__
  21. #define __STARPU_DATA_FILTERS_H__
  22. #include <starpu.h>
  23. #include <stdarg.h>
  24. #ifdef __cplusplus
  25. extern "C"
  26. {
  27. #endif
  28. /**
  29. @defgroup API_Data_Partition Data Partition
  30. @{
  31. */
  32. struct starpu_data_interface_ops;
  33. /**
  34. Describe a data partitioning operation, to be given to starpu_data_partition()
  35. */
  36. struct starpu_data_filter
  37. {
  38. /**
  39. Fill the \p child_interface structure with interface information
  40. for the \p i -th child of the parent \p father_interface (among
  41. \p nparts). The \p filter structure is provided, allowing to inspect the
  42. starpu_data_filter::filter_arg and starpu_data_filter::filter_arg_ptr
  43. parameters.
  44. The details of what needs to be filled in \p child_interface vary according
  45. to the data interface, but generally speaking:
  46. <ul>
  47. <li> <c>id</c> is usually just copied over from the father,
  48. when the sub data has the same structure as the father,
  49. e.g. a subvector is a vector, a submatrix is a matrix, etc.
  50. This is however not the case for instance when dividing a
  51. BCSR matrix into its dense blocks, which then are matrices.
  52. </li>
  53. <li> <c>nx</c>, <c>ny</c> and alike are usually divided by
  54. the number of subdata, depending how the subdivision is
  55. done (e.g. nx division vs ny division for vertical matrix
  56. division vs horizontal matrix division). </li>
  57. <li> <c>ld</c> for matrix interfaces are usually just
  58. copied over: the leading dimension (ld) usually does not
  59. change. </li>
  60. <li> <c>elemsize</c> is usually just copied over. </li>
  61. <li> <c>ptr</c>, the pointer to the data, has to be
  62. computed according to \p i and the father's <c>ptr</c>, so
  63. as to point to the start of the sub data. This should
  64. however be done only if the father has <c>ptr</c> different
  65. from NULL: in the OpenCL case notably, the
  66. <c>dev_handle</c> and <c>offset</c> fields are used
  67. instead. </li>
  68. <li> <c>dev_handle</c> should be just copied over from the
  69. parent. </li>
  70. <li> <c>offset</c> has to be computed according to \p i and
  71. the father's <c>offset</c>, so as to provide the offset of
  72. the start of the sub data. This is notably used for the
  73. OpenCL case.
  74. </ul>
  75. */
  76. void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts);
  77. unsigned nchildren; /**< Number of parts to partition the data into. */
  78. /**
  79. Return the number of children. This can be used instead of
  80. starpu_data_filter::nchildren when the number of children depends
  81. on the actual data (e.g. the number of blocks in a sparse
  82. matrix).
  83. */
  84. unsigned (*get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle);
  85. /**
  86. When children use different data interface,
  87. return which interface is used by child number \p id.
  88. */
  89. struct starpu_data_interface_ops *(*get_child_ops)(struct starpu_data_filter *, unsigned id);
  90. unsigned filter_arg; /**< Additional parameter for the filter function */
  91. /**
  92. Additional pointer parameter for
  93. the filter function, such as the
  94. sizes of the different parts. */
  95. void *filter_arg_ptr;
  96. };
  97. /**
  98. @name Basic API
  99. @{
  100. */
  101. /**
  102. Request the partitioning of \p initial_handle into several subdata
  103. according to the filter \p f.
  104. Here an example of how to use the function.
  105. \code{.c}
  106. struct starpu_data_filter f =
  107. {
  108. .filter_func = starpu_matrix_filter_block,
  109. .nchildren = nslicesx
  110. };
  111. starpu_data_partition(A_handle, &f);
  112. \endcode
  113. */
  114. void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f);
  115. /**
  116. Unapply the filter which has been applied to \p root_data, thus
  117. unpartitioning the data. The pieces of data are collected back into
  118. one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM).
  119. Tasks working on the partitioned data will be waited for
  120. by starpu_data_unpartition().
  121. Here an example of how to use the function.
  122. \code{.c}
  123. starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
  124. \endcode
  125. */
  126. void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node);
  127. /**
  128. Return the \p i -th child of the given \p handle, which must have
  129. been partitionned beforehand.
  130. */
  131. starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i);
  132. /**
  133. Return the number of children \p handle has been partitioned into.
  134. */
  135. int starpu_data_get_nb_children(starpu_data_handle_t handle);
  136. /**
  137. After partitioning a StarPU data by applying a filter,
  138. starpu_data_get_sub_data() can be used to get handles for each of the
  139. data portions. \p root_data is the parent data that was partitioned.
  140. \p depth is the number of filters to traverse (in case several filters
  141. have been applied, to e.g. partition in row blocks, and then in column
  142. blocks), and the subsequent parameters are the indexes. The function
  143. returns a handle to the subdata.
  144. Here an example of how to use the function.
  145. \code{.c}
  146. h = starpu_data_get_sub_data(A_handle, 1, taskx);
  147. \endcode
  148. */
  149. starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... );
  150. /**
  151. Similar to starpu_data_get_sub_data() but use a \c va_list for the
  152. parameter list.
  153. */
  154. starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa);
  155. /**
  156. Apply \p nfilters filters to the handle designated by \p
  157. root_handle recursively. \p nfilters pointers to variables of the
  158. type starpu_data_filter should be given.
  159. */
  160. void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...);
  161. /**
  162. Apply \p nfilters filters to the handle designated by
  163. \p root_handle recursively. Use a \p va_list of pointers to
  164. variables of the type starpu_data_filter.
  165. */
  166. void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa);
  167. /** @} */
  168. /**
  169. @name Asynchronous API
  170. @{
  171. */
  172. /**
  173. Plan to partition \p initial_handle into several subdata according to
  174. the filter \p f.
  175. The handles are returned into the \p children array, which has to be
  176. the same size as the number of parts described in \p f. These handles
  177. are not immediately usable, starpu_data_partition_submit() has to be
  178. called to submit the actual partitioning.
  179. Here is an example of how to use the function:
  180. \code{.c}
  181. starpu_data_handle_t children[nslicesx];
  182. struct starpu_data_filter f =
  183. {
  184. .filter_func = starpu_matrix_filter_block,
  185. .nchildren = nslicesx
  186. };
  187. starpu_data_partition_plan(A_handle, &f, children);
  188. \endcode
  189. */
  190. void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children);
  191. /**
  192. Submit the actual partitioning of \p initial_handle into the \p nparts
  193. \p children handles. This call is asynchronous, it only submits that the
  194. partitioning should be done, so that the \p children handles can now be used to
  195. submit tasks, and \p initial_handle can not be used to submit tasks any more (to
  196. guarantee coherency).
  197. For instance,
  198. \code{.c}
  199. starpu_data_partition_submit(A_handle, nslicesx, children);
  200. \endcode
  201. */
  202. void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
  203. /**
  204. Similar to starpu_data_partition_submit(), but do not invalidate \p
  205. initial_handle. This allows to continue using it, but the application has to be
  206. careful not to write to \p initial_handle or \p children handles, only read from
  207. them, since the coherency is otherwise not guaranteed. This thus allows to
  208. submit various tasks which concurrently read from various partitions of the data.
  209. When the application wants to write to \p initial_handle again, it should call
  210. starpu_data_unpartition_submit(), which will properly add dependencies between the
  211. reads on the \p children and the writes to be submitted.
  212. If instead the application wants to write to \p children handles, it should
  213. call starpu_data_partition_readwrite_upgrade_submit(), which will correctly add
  214. dependencies between the reads on the \p initial_handle and the writes to be
  215. submitted.
  216. */
  217. void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
  218. /**
  219. Assume that a partitioning of \p initial_handle has already been submited
  220. in readonly mode through starpu_data_partition_readonly_submit(), and will upgrade
  221. that partitioning into read-write mode for the \p children, by invalidating \p
  222. initial_handle, and adding the necessary dependencies.
  223. */
  224. void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
  225. /**
  226. Assuming that \p initial_handle is partitioned into \p children,
  227. submit an unpartitionning of \p initial_handle, i.e. submit a
  228. gathering of the pieces on the requested \p gathering_node memory
  229. node, and submit an invalidation of the children.
  230. */
  231. void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node);
  232. void starpu_data_unpartition_submit_r(starpu_data_handle_t initial_handle, int gathering_node);
  233. /**
  234. Similar to starpu_data_partition_submit(), but do not invalidate \p
  235. initial_handle. This allows to continue using it, but the application has to be
  236. careful not to write to \p initial_handle or \p children handles, only read from
  237. them, since the coherency is otherwise not guaranteed. This thus allows to
  238. submit various tasks which concurrently read from various
  239. partitions of the data.
  240. */
  241. void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node);
  242. /**
  243. Clear the partition planning established between \p root_data and
  244. \p children with starpu_data_partition_plan(). This will notably
  245. submit an unregister all the \p children, which can thus not be
  246. used any more afterwards.
  247. */
  248. void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children);
  249. /**
  250. Similar to starpu_data_unpartition_submit_sequential_consistency()
  251. but allow to specify a callback function for the unpartitiong task
  252. */
  253. void starpu_data_unpartition_submit_sequential_consistency_cb(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency, void (*callback_func)(void *), void *callback_arg);
  254. /**
  255. Similar to starpu_data_partition_submit() but also allow to specify
  256. the coherency to be used for the main data \p initial_handle
  257. through the parameter \p sequential_consistency.
  258. */
  259. void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency);
  260. /**
  261. Similar to starpu_data_unpartition_submit() but also allow to specify
  262. the coherency to be used for the main data \p initial_handle
  263. through the parameter \p sequential_consistency.
  264. */
  265. void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node, int sequential_consistency);
  266. /**
  267. Disable the automatic partitioning of the data \p handle for which
  268. a asynchronous plan has previously been submitted
  269. */
  270. void starpu_data_partition_not_automatic(starpu_data_handle_t handle);
  271. /** @} */
  272. /**
  273. @name Predefined BCSR Filter Functions
  274. Predefined partitioning functions for BCSR data. Examples on how to
  275. use them are shown in \ref PartitioningData.
  276. @{
  277. */
  278. /**
  279. Partition a block-sparse matrix into dense matrices.
  280. */
  281. void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  282. /** @} */
  283. /**
  284. @name Predefined CSR Filter Functions
  285. Predefined partitioning functions for CSR data. Examples on how to
  286. use them are shown in \ref PartitioningData.
  287. @{
  288. */
  289. /**
  290. Partition a block-sparse matrix into vertical block-sparse matrices.
  291. */
  292. void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  293. /** @} */
  294. /**
  295. @name Predefined Matrix Filter Functions
  296. Predefined partitioning functions for matrix
  297. data. Examples on how to use them are shown in \ref
  298. PartitioningData.
  299. @{
  300. */
  301. /**
  302. Partition a dense Matrix along the x dimension, thus getting (x/\p
  303. nparts ,y) matrices. If \p nparts does not divide x, the last
  304. submatrix contains the remainder.
  305. */
  306. void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  307. /**
  308. Partition a dense Matrix along the x dimension, with a
  309. shadow border <c>filter_arg_ptr</c>, thus getting ((x-2*shadow)/\p
  310. nparts +2*shadow,y) matrices. If \p nparts does not divide x-2*shadow,
  311. the last submatrix contains the remainder.
  312. <b>IMPORTANT</b>: This can
  313. only be used for read-only access, as no coherency is enforced for the
  314. shadowed parts. A usage example is available in
  315. examples/filters/shadow2d.c
  316. */
  317. void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  318. /**
  319. Partition a dense Matrix along the y dimension, thus getting
  320. (x,y/\p nparts) matrices. If \p nparts does not divide y, the last
  321. submatrix contains the remainder.
  322. */
  323. void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  324. /**
  325. Partition a dense Matrix along the y dimension, with a
  326. shadow border <c>filter_arg_ptr</c>, thus getting
  327. (x,(y-2*shadow)/\p nparts +2*shadow) matrices. If \p nparts does not
  328. divide y-2*shadow, the last submatrix contains the remainder.
  329. <b>IMPORTANT</b>: This can only be used for read-only access, as no
  330. coherency is enforced for the shadowed parts. A usage example is
  331. available in examples/filters/shadow2d.c
  332. */
  333. void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  334. /** @} */
  335. /**
  336. @name Predefined Vector Filter Functions
  337. Predefined partitioning functions for vector
  338. data. Examples on how to use them are shown in \ref
  339. PartitioningData.
  340. @{
  341. */
  342. /**
  343. Return in \p child_interface the \p id th element of the vector
  344. represented by \p father_interface once partitioned in \p nparts chunks of
  345. equal size.
  346. */
  347. void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  348. /**
  349. Return in \p child_interface the \p id th element of the vector
  350. represented by \p father_interface once partitioned in \p nparts chunks of
  351. equal size with a shadow border <c>filter_arg_ptr</c>, thus getting a vector
  352. of size <c>(n-2*shadow)/nparts+2*shadow</c>. The <c>filter_arg_ptr</c> field
  353. of \p f must be the shadow size casted into \c void*.
  354. <b>IMPORTANT</b>: This can only be used for read-only access, as no coherency is
  355. enforced for the shadowed parts. An usage example is available in
  356. examples/filters/shadow.c
  357. */
  358. void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  359. /**
  360. Return in \p child_interface the \p id th element of the vector
  361. represented by \p father_interface once partitioned into \p nparts chunks
  362. according to the <c>filter_arg_ptr</c> field of \p f. The
  363. <c>filter_arg_ptr</c> field must point to an array of \p nparts long
  364. elements, each of which specifies the number of elements in each chunk
  365. of the partition.
  366. */
  367. void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  368. /**
  369. Return in \p child_interface the \p id th element of the vector
  370. represented by \p father_interface once partitioned into \p nparts chunks
  371. according to the <c>filter_arg_ptr</c> field of \p f. The
  372. <c>filter_arg_ptr</c> field must point to an array of \p nparts uint32_t
  373. elements, each of which specifies the number of elements in each chunk
  374. of the partition.
  375. */
  376. void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  377. /**
  378. Return in \p child_interface the \p id th element of the vector
  379. represented by \p father_interface once partitioned in <c>2</c> chunks of
  380. equal size, ignoring nparts. Thus, \p id must be <c>0</c> or <c>1</c>.
  381. */
  382. void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  383. /** @} */
  384. /**
  385. @name Predefined Block Filter Functions
  386. Predefined partitioning functions for block data. Examples on how
  387. to use them are shown in \ref PartitioningData. An example is
  388. available in \c examples/filters/shadow3d.c
  389. @{
  390. */
  391. /**
  392. Partition a block along the X dimension, thus getting
  393. (x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last
  394. submatrix contains the remainder.
  395. */
  396. void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  397. /**
  398. Partition a block along the X dimension, with a
  399. shadow border <c>filter_arg_ptr</c>, thus getting
  400. ((x-2*shadow)/\p nparts +2*shadow,y,z) blocks. If \p nparts does not
  401. divide x, the last submatrix contains the remainder.
  402. <b>IMPORTANT</b>:
  403. This can only be used for read-only access, as no coherency is
  404. enforced for the shadowed parts.
  405. */
  406. void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  407. /**
  408. Partition a block along the Y dimension, thus getting
  409. (x,y/\p nparts ,z) blocks. If \p nparts does not divide y, the last
  410. submatrix contains the remainder.
  411. */
  412. void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  413. /**
  414. Partition a block along the Y dimension, with a
  415. shadow border <c>filter_arg_ptr</c>, thus getting
  416. (x,(y-2*shadow)/\p nparts +2*shadow,z) 3D matrices. If \p nparts does not
  417. divide y, the last submatrix contains the remainder.
  418. <b>IMPORTANT</b>:
  419. This can only be used for read-only access, as no coherency is
  420. enforced for the shadowed parts.
  421. */
  422. void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  423. /**
  424. Partition a block along the Z dimension, thus getting
  425. (x,y,z/\p nparts) blocks. If \p nparts does not divide z, the last
  426. submatrix contains the remainder.
  427. */
  428. void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  429. /**
  430. Partition a block along the Z dimension, with a
  431. shadow border <c>filter_arg_ptr</c>, thus getting
  432. (x,y,(z-2*shadow)/\p nparts +2*shadow) blocks. If \p nparts does not
  433. divide z, the last submatrix contains the remainder.
  434. <b>IMPORTANT</b>:
  435. This can only be used for read-only access, as no coherency is
  436. enforced for the shadowed parts.
  437. */
  438. void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
  439. /** @} */
  440. /** @} */
  441. #ifdef __cplusplus
  442. }
  443. #endif
  444. #endif