data_partition.doxy 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. /*
  2. * This file is part of the StarPU Handbook.
  3. * Copyright (C) 2009--2011 Universit@'e de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014 CNRS
  5. * Copyright (C) 2011, 2012 INRIA
  6. * See the file version.doxy for copying conditions.
  7. */
  8. /*! \defgroup API_Data_Partition Data Partition
  9. \struct starpu_data_filter
  10. The filter structure describes a data partitioning operation, to be
  11. given to the starpu_data_partition() function.
  12. \ingroup API_Data_Partition
  13. \var void (*starpu_data_filter::filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts)
  14. This function fills the child_interface structure with interface
  15. information for the id-th child of the parent father_interface (among
  16. nparts).
  17. \var unsigned starpu_data_filter::nchildren
  18. This is the number of parts to partition the data into.
  19. \var unsigned (*starpu_data_filter::get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle)
  20. This returns the number of children. This can be used instead of
  21. nchildren when the number of children depends on the actual data (e.g.
  22. the number of blocks in a sparse matrix).
  23. \var struct starpu_data_interface_ops *(*starpu_data_filter::get_child_ops)(struct starpu_data_filter *, unsigned id)
  24. In case the resulting children use a different data interface, this
  25. function returns which interface is used by child number id.
  26. \var unsigned starpu_data_filter::filter_arg
  27. Allow to define an additional parameter for the filter function.
  28. \var void *starpu_data_filter::filter_arg_ptr
  29. Allow to define an additional pointer parameter for the filter
  30. function, such as the sizes of the different parts.
  31. @name Basic API
  32. \ingroup API_Data_Partition
  33. \fn void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f)
  34. \ingroup API_Data_Partition
  35. This requests partitioning one StarPU data initial_handle into
  36. several subdata according to the filter \p f.
  37. Here an example of how to use the function.
  38. \code{.c}
  39. struct starpu_data_filter f = {
  40. .filter_func = starpu_matrix_filter_block,
  41. .nchildren = nslicesx
  42. };
  43. starpu_data_partition(A_handle, &f);
  44. \endcode
  45. \fn void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node)
  46. \ingroup API_Data_Partition
  47. This unapplies one filter, thus unpartitioning the data. The
  48. pieces of data are collected back into one big piece in the
  49. \p gathering_node (usually STARPU_MAIN_RAM). Tasks working on the partitioned data must
  50. be already finished when calling starpu_data_unpartition().
  51. Here an example of how to use the function.
  52. \code{.c}
  53. starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
  54. \endcode
  55. \fn int starpu_data_get_nb_children(starpu_data_handle_t handle)
  56. \ingroup API_Data_Partition
  57. This function returns the number of children.
  58. \fn starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i)
  59. \ingroup API_Data_Partition
  60. Return the ith child of the given \p handle, which must have been
  61. partitionned beforehand.
  62. \fn starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... )
  63. \ingroup API_Data_Partition
  64. After partitioning a StarPU data by applying a filter,
  65. starpu_data_get_sub_data() can be used to get handles for each of the
  66. data portions. \p root_data is the parent data that was partitioned.
  67. \p depth is the number of filters to traverse (in case several filters
  68. have been applied, to e.g. partition in row blocks, and then in column
  69. blocks), and the subsequent parameters are the indexes. The function
  70. returns a handle to the subdata.
  71. Here an example of how to use the function.
  72. \code{.c}
  73. h = starpu_data_get_sub_data(A_handle, 1, taskx);
  74. \endcode
  75. \fn starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa)
  76. \ingroup API_Data_Partition
  77. This function is similar to starpu_data_get_sub_data() but uses a
  78. va_list for the parameter list.
  79. \fn void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...)
  80. \ingroup API_Data_Partition
  81. Applies \p nfilters filters to the handle designated by
  82. \p root_handle recursively. \p nfilters pointers to variables of the type
  83. starpu_data_filter should be given.
  84. \fn void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa)
  85. \ingroup API_Data_Partition
  86. Applies \p nfilters filters to the handle designated by
  87. \p root_handle recursively. It uses a va_list of pointers to variables of
  88. the type starpu_data_filter.
  89. @name Asynchronous API
  90. \ingroup API_Data_Partition
  91. \fn void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children)
  92. \ingroup API_Data_Partition
  93. This plans for partitioning one StarPU data handle \p initial_handle into
  94. several subdata according to the filter \p f. The handles are returned into
  95. the \p children array, which has to be the same size as the number of parts
  96. described in \p f. These handles are not immediately usable,
  97. starpu_data_partition_submit has to be called to submit the actual partitioning.
  98. Here is an example of how to use the function:
  99. \code{.c}
  100. starpu_data_handle_t children[nslicesx];
  101. struct starpu_data_filter f = {
  102. .filter_func = starpu_matrix_filter_block,
  103. .nchildren = nslicesx
  104. };
  105. starpu_data_partition_plan(A_handle, &f, children);
  106. \endcode
  107. \fn void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
  108. \ingroup API_Data_Partition
  109. This submits the actual partitioning of \p initial_handle into the \p nparts
  110. \p children handles. This call is asynchronous, it only submits that the
  111. partitioning should be done, so that the \p children handles can now be used to
  112. submit tasks, and \p initial_handle can not be used to submit tasks any more (to
  113. guarantee coherency).
  114. For instance,
  115. \code{.c}
  116. starpu_data_partition_submit(A_handle, nslicesx, children);
  117. \endcode
  118. \fn void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
  119. \ingroup API_Data_Partition
  120. This is the same as starpu_data_partition_submit, but does not invalidate \p
  121. initial_handle. This allows to continue using it, but the application has to be
  122. careful not to write to \p initial_handle or \p children handles, only read from
  123. them, since the coherency is otherwise not guaranteed. This thus allows to
  124. submit various tasks which concurrently read from various partitions of the data.
  125. When the application wants to write to \p initial_handle again, it should call
  126. starpu_data_unpartition_submit, which will properly add dependencies between the
  127. reads on the \p children and the writes to be submitted.
  128. If instead the application wants to write to \p children handles, it should
  129. call starpu_data_partition_readwrite_upgrade_submit, which will properly add
  130. dependencies between the reads on the \p initial_handle and the writes to be
  131. submitted.
  132. \fn void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
  133. \ingroup API_Data_Partition
  134. This assumes that a partitioning of \p initial_handle has already been submited
  135. in readonly mode through starpu_data_partition_readonly_submit, and will upgrade
  136. that partitioning into read-write mode for the \p children, by invalidating \p
  137. initial_handle, and adding the necessary dependencies.
  138. \fn void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node)
  139. \ingroup API_Data_Partition
  140. This assumes that \p initial_handle is partitioned into \p children, and submits
  141. an unpartitionning of it, i.e. submitting a gathering of the pieces on the
  142. requested \p gathering_node memory node, and submitting an invalidation of the
  143. children.
  144. \p gathering_node can be set to -1 to let the runtime decide which memory node
  145. should be used to gather the pieces.
  146. \fn void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node)
  147. \ingroup API_Data_Partition
  148. This assumes that \p initial_handle is partitioned into \p children, and submits
  149. just a readonly unpartitionning of it, i.e. submitting a gathering of the pieces
  150. on the requested \p gathering_node memory node. It does not invalidate the
  151. children. This brings \p initial_handle and \p children handles to the same
  152. state as obtained with starpu_data_partition_readonly_submit.
  153. \p gathering_node can be set to -1 to let the runtime decide which memory node
  154. should be used to gather the pieces.
  155. \fn void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children)
  156. \ingroup API_Data_Partition
  157. This should be used to clear the partition planning established between \p
  158. root_data and \p children with starpu_data_partition_plan. This will notably
  159. submit an unregister all the \p children, which can thus not be used any more
  160. afterwards.
  161. @name Predefined Vector Filter Functions
  162. \ingroup API_Data_Partition
  163. This section gives a partial list of the predefined partitioning
  164. functions for vector data. Examples on how to use them are shown in
  165. \ref PartitioningData. The complete list can be found in the file
  166. <c>starpu_data_filters.h</c>.
  167. \fn void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  168. \ingroup API_Data_Partition
  169. Return in \p child_interface the \p id th element of the vector
  170. represented by \p father_interface once partitioned in \p nparts chunks of
  171. equal size.
  172. \fn void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  173. \ingroup API_Data_Partition
  174. Return in \p child_interface the \p id th element of the vector
  175. represented by \p father_interface once partitioned in \p nparts chunks of
  176. equal size with a shadow border <c>filter_arg_ptr</c>, thus getting a vector
  177. of size (n-2*shadow)/nparts+2*shadow. The <c>filter_arg_ptr</c> field
  178. of \p f must be the shadow size casted into void*. <b>IMPORTANT</b>:
  179. This can only be used for read-only access, as no coherency is
  180. enforced for the shadowed parts. An usage example is available in
  181. examples/filters/shadow.c
  182. \fn void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  183. \ingroup API_Data_Partition
  184. Return in \p child_interface the \p id th element of the vector
  185. represented by \p father_interface once partitioned into \p nparts chunks
  186. according to the <c>filter_arg_ptr</c> field of \p f. The
  187. <c>filter_arg_ptr</c> field must point to an array of \p nparts uint32_t
  188. elements, each of which specifies the number of elements in each chunk
  189. of the partition.
  190. \fn void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  191. \ingroup API_Data_Partition
  192. Return in \p child_interface the \p id th element of the vector
  193. represented by \p father_interface once partitioned in <c>2</c> chunks of
  194. equal size, ignoring nparts. Thus, \p id must be <c>0</c> or <c>1</c>.
  195. @name Predefined Matrix Filter Functions
  196. \ingroup API_Data_Partition
  197. This section gives a partial list of the predefined partitioning
  198. functions for matrix data. Examples on how to use them are shown in
  199. \ref PartitioningData. The complete list can be found in the file
  200. <c>starpu_data_filters.h</c>.
  201. \fn void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  202. \ingroup API_Data_Partition
  203. This partitions a dense Matrix along the x dimension, thus
  204. getting (x/\p nparts ,y) matrices. If \p nparts does not divide x, the
  205. last submatrix contains the remainder.
  206. \fn void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  207. \ingroup API_Data_Partition
  208. This partitions a dense Matrix along the x dimension, with a
  209. shadow border <c>filter_arg_ptr</c>, thus getting ((x-2*shadow)/\p
  210. nparts +2*shadow,y) matrices. If \p nparts does not divide x-2*shadow,
  211. the last submatrix contains the remainder. <b>IMPORTANT</b>: This can
  212. only be used for read-only access, as no coherency is enforced for the
  213. shadowed parts. A usage example is available in
  214. examples/filters/shadow2d.c
  215. \fn void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  216. \ingroup API_Data_Partition
  217. This partitions a dense Matrix along the y dimension, thus
  218. getting (x,y/\p nparts) matrices. If \p nparts does not divide y, the
  219. last submatrix contains the remainder.
  220. \fn void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  221. \ingroup API_Data_Partition
  222. This partitions a dense Matrix along the y dimension, with a
  223. shadow border <c>filter_arg_ptr</c>, thus getting
  224. (x,(y-2*shadow)/\p nparts +2*shadow) matrices. If \p nparts does not
  225. divide y-2*shadow, the last submatrix contains the remainder.
  226. <b>IMPORTANT</b>: This can only be used for read-only access, as no
  227. coherency is enforced for the shadowed parts. A usage example is
  228. available in examples/filters/shadow2d.c
  229. @name Predefined Block Filter Functions
  230. \ingroup API_Data_Partition
  231. This section gives a partial list of the predefined partitioning
  232. functions for block data. Examples on how to use them are shown in
  233. \ref PartitioningData. The complete list can be found in the file
  234. <c>starpu_data_filters.h</c>. A usage example is available in
  235. examples/filters/shadow3d.c
  236. \fn void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  237. \ingroup API_Data_Partition
  238. This partitions a block along the X dimension, thus getting
  239. (x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last
  240. submatrix contains the remainder.
  241. \fn void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  242. \ingroup API_Data_Partition
  243. This partitions a block along the X dimension, with a
  244. shadow border <c>filter_arg_ptr</c>, thus getting
  245. ((x-2*shadow)/\p nparts +2*shadow,y,z) blocks. If \p nparts does not
  246. divide x, the last submatrix contains the remainder. <b>IMPORTANT</b>:
  247. This can only be used for read-only access, as no coherency is
  248. enforced for the shadowed parts.
  249. \fn void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  250. \ingroup API_Data_Partition
  251. This partitions a block along the Y dimension, thus getting
  252. (x,y/\p nparts ,z) blocks. If \p nparts does not divide y, the last
  253. submatrix contains the remainder.
  254. \fn void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  255. \ingroup API_Data_Partition
  256. This partitions a block along the Y dimension, with a
  257. shadow border <c>filter_arg_ptr</c>, thus getting
  258. (x,(y-2*shadow)/\p nparts +2*shadow,z) 3D matrices. If \p nparts does not
  259. divide y, the last submatrix contains the remainder. <b>IMPORTANT</b>:
  260. This can only be used for read-only access, as no coherency is
  261. enforced for the shadowed parts.
  262. \fn void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  263. \ingroup API_Data_Partition
  264. This partitions a block along the Z dimension, thus getting
  265. (x,y,z/\p nparts) blocks. If \p nparts does not divide z, the last
  266. submatrix contains the remainder.
  267. \fn void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  268. \ingroup API_Data_Partition
  269. This partitions a block along the Z dimension, with a
  270. shadow border <c>filter_arg_ptr</c>, thus getting
  271. (x,y,(z-2*shadow)/\p nparts +2*shadow) blocks. If \p nparts does not
  272. divide z, the last submatrix contains the remainder. <b>IMPORTANT</b>:
  273. This can only be used for read-only access, as no coherency is
  274. enforced for the shadowed parts.
  275. @name Predefined BCSR Filter Functions
  276. \ingroup API_Data_Partition
  277. This section gives a partial list of the predefined partitioning
  278. functions for BCSR data. Examples on how to use them are shown in
  279. \ref PartitioningData. The complete list can be found in the file
  280. <c>starpu_data_filters.h</c>.
  281. \fn void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  282. \ingroup API_Data_Partition
  283. This partitions a block-sparse matrix into dense matrices.
  284. \fn void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
  285. \ingroup API_Data_Partition
  286. This partitions a block-sparse matrix into vertical
  287. block-sparse matrices.
  288. */