starpu_data_interfaces.h 86 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #ifndef __STARPU_DATA_INTERFACES_H__
  17. #define __STARPU_DATA_INTERFACES_H__
  18. #include <starpu.h>
  19. #ifdef STARPU_USE_CUDA
  20. /* to use CUDA streams */
  21. # ifdef STARPU_DONT_INCLUDE_CUDA_HEADERS
  22. typedef void *starpu_cudaStream_t;
  23. # else
  24. # include <cuda_runtime.h>
  25. typedef cudaStream_t starpu_cudaStream_t;
  26. # endif
  27. #endif
  28. #ifdef __cplusplus
  29. extern "C"
  30. {
  31. #endif
  32. /**
  33. @defgroup API_Data_Interfaces Data Interfaces
  34. @brief Data management is done at a high-level in StarPU: rather than
  35. accessing a mere list of contiguous buffers, the tasks may manipulate
  36. data that are described by a high-level construct which we call data
  37. interface.
  38. An example of data interface is the "vector" interface which describes
  39. a contiguous data array on a spefic memory node. This interface is a
  40. simple structure containing the number of elements in the array, the
  41. size of the elements, and the address of the array in the appropriate
  42. address space (this address may be invalid if there is no valid copy
  43. of the array in the memory node). More informations on the data
  44. interfaces provided by StarPU are given in \ref API_Data_Interfaces.
  45. When a piece of data managed by StarPU is used by a task, the task
  46. implementation is given a pointer to an interface describing a valid
  47. copy of the data that is accessible from the current processing unit.
  48. Every worker is associated to a memory node which is a logical
  49. abstraction of the address space from which the processing unit gets
  50. its data. For instance, the memory node associated to the different
  51. CPU workers represents main memory (RAM), the memory node associated
  52. to a GPU is DRAM embedded on the device. Every memory node is
  53. identified by a logical index which is accessible from the
  54. function starpu_worker_get_memory_node(). When registering a piece of
  55. data to StarPU, the specified memory node indicates where the piece of
  56. data initially resides (we also call this memory node the home node of
  57. a piece of data).
  58. In the case of NUMA systems, functions starpu_memory_nodes_numa_devid_to_id()
  59. and starpu_memory_nodes_numa_id_to_devid() can be used to convert from NUMA node
  60. numbers as seen by the Operating System and NUMA node numbers as seen by StarPU.
  61. There are several ways to register a memory region so that it can be
  62. managed by StarPU. StarPU provides data interfaces for vectors, 2D
  63. matrices, 3D matrices as well as BCSR and CSR sparse matrices.
  64. Each data interface is provided with a set of field access functions.
  65. The ones using a <c>void *</c> parameter aimed to be used in codelet
  66. implementations (see for example the code in
  67. \ref VectorScalingUsingStarPUAPI).
  68. Applications can provide their own interface as shown in \ref DefiningANewDataInterface.
  69. @{
  70. */
  71. /**
  72. Define the per-interface methods. If the
  73. starpu_data_copy_methods::any_to_any method is provided, it will be
  74. used by default if no specific method is provided. It can still be
  75. useful to provide more specific method in case of e.g. available
  76. particular CUDA or OpenCL support.
  77. */
  78. struct starpu_data_copy_methods
  79. {
  80. /**
  81. If defined, allow the interface to declare whether it supports
  82. transferring from \p src_interface on node \p src_node to \p
  83. dst_interface on node \p dst_node, run from node \p handling_node.
  84. If not defined, it is assumed that the interface supports all
  85. transfers.
  86. */
  87. int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node);
  88. /**
  89. Define how to copy data from the \p src_interface interface on the
  90. \p src_node CPU node to the \p dst_interface interface on the \p
  91. dst_node CPU node. Return 0 on success.
  92. */
  93. int (*ram_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  94. /**
  95. Define how to copy data from the \p src_interface interface on the
  96. \p src_node CPU node to the \p dst_interface interface on the \p
  97. dst_node CUDA node. Return 0 on success.
  98. */
  99. int (*ram_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  100. /**
  101. Define how to copy data from the \p src_interface interface on the
  102. \p src_node CPU node to the \p dst_interface interface on the \p
  103. dst_node OpenCL node. Return 0 on success.
  104. */
  105. int (*ram_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  106. /**
  107. Define how to copy data from the \p src_interface interface on the
  108. \p src_node CPU node to the \p dst_interface interface on the \p
  109. dst_node FPGA node. Return 0 on success.
  110. */
  111. int (*ram_to_fpga)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  112. /**
  113. Define how to copy data from the \p src_interface interface on the
  114. \p src_node CUDA node to the \p dst_interface interface on the \p
  115. dst_node CPU node. Return 0 on success.
  116. */
  117. int (*cuda_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  118. /**
  119. Define how to copy data from the \p src_interface interface on the
  120. \p src_node CUDA node to the \p dst_interface interface on the \p
  121. dst_node CUDA node. Return 0 on success.
  122. */
  123. int (*cuda_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  124. /**
  125. Define how to copy data from the \p src_interface interface on the
  126. \p src_node OpenCL node to the \p dst_interface interface on the
  127. \p dst_node CPU node. Return 0 on success.
  128. */
  129. int (*opencl_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  130. /**
  131. Define how to copy data from the \p src_interface interface on the
  132. \p src_node OpenCL node to the \p dst_interface interface on the
  133. \p dst_node OpenCL node. Return 0 on success.
  134. */
  135. int (*opencl_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  136. /**
  137. Define how to copy data from the \p src_interface interface on the
  138. \p src_node FPGA node to the \p dst_interface interface on the \p
  139. dst_node CPU node. Return 0 on success.
  140. */
  141. int (*fpga_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
  142. /**
  143. Define how to copy data from the \p src_interface interface on the
  144. \p src_node CPU node to the \p dst_interface interface on the \p
  145. dst_node MPI Slave node. Return 0 on success.
  146. */
  147. int (*ram_to_mpi_ms)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  148. /**
  149. Define how to copy data from the \p src_interface interface on the
  150. \p src_node MPI Slave node to the \p dst_interface interface on
  151. the \p dst_node CPU node. Return 0 on success.
  152. */
  153. int (*mpi_ms_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  154. /**
  155. Define how to copy data from the \p src_interface interface on the
  156. \p src_node MPI Slave node to the \p dst_interface interface on
  157. the \p dst_node MPI Slave node. Return 0 on success.
  158. */
  159. int (*mpi_ms_to_mpi_ms)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  160. #ifdef STARPU_USE_CUDA
  161. /**
  162. Define how to copy data from the \p src_interface interface on the
  163. \p src_node CPU node to the \p dst_interface interface on the \p
  164. dst_node CUDA node, using the given stream. Must return 0 if the
  165. transfer was actually completed completely synchronously, or
  166. <c>-EAGAIN</c> if at least some transfers are still ongoing and
  167. should be awaited for by the core.
  168. */
  169. int (*ram_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream);
  170. /**
  171. Define how to copy data from the \p src_interface interface on the
  172. \p src_node CUDA node to the \p dst_interface interface on the \p
  173. dst_node CPU node, using the given stream. Must return 0 if the
  174. transfer was actually completed completely synchronously, or
  175. <c>-EAGAIN</c> if at least some transfers are still ongoing and
  176. should be awaited for by the core.
  177. */
  178. int (*cuda_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream);
  179. /**
  180. Define how to copy data from the \p src_interface interface on the
  181. \p src_node CUDA node to the \p dst_interface interface on the \p
  182. dst_node CUDA node, using the given stream. Must return 0 if the
  183. transfer was actually completed completely synchronously, or
  184. <c>-EAGAIN</c> if at least some transfers are still ongoing and
  185. should be awaited for by the core.
  186. */
  187. int (*cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream);
  188. #else
  189. int (*ram_to_cuda_async)(void);
  190. int (*cuda_to_ram_async)(void);
  191. int (*cuda_to_cuda_async)(void);
  192. #endif
  193. #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
  194. /**
  195. Define how to copy data from the \p src_interface interface on the
  196. \p src_node CPU node to the \p dst_interface interface on the \p
  197. dst_node OpenCL node, by recording in \p event, a pointer to a
  198. <c>cl_event</c>, the event of the last submitted transfer. Must
  199. return 0 if the transfer was actually completed completely
  200. synchronously, or <c>-EAGAIN</c> if at least some transfers are
  201. still ongoing and should be awaited for by the core.
  202. */
  203. int (*ram_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event);
  204. /**
  205. Define how to copy data from the \p src_interface interface on the
  206. \p src_node OpenCL node to the \p dst_interface interface on the
  207. \p dst_node CPU node, by recording in \p event, a pointer to a
  208. <c>cl_event</c>, the event of the last submitted transfer. Must
  209. return 0 if the transfer was actually completed completely
  210. synchronously, or <c>-EAGAIN</c> if at least some transfers are
  211. still ongoing and should be awaited for by the core.
  212. */
  213. int (*opencl_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event);
  214. /**
  215. Define how to copy data from the \p src_interface interface on the
  216. \p src_node OpenCL node to the \p dst_interface interface on the
  217. \p dst_node OpenCL node, by recording in \p event, a pointer to a
  218. <c>cl_event</c>, the event of the last submitted transfer. Must
  219. return 0 if the transfer was actually completed completely
  220. synchronously, or <c>-EAGAIN</c> if at least some transfers are
  221. still ongoing and should be awaited for by the core.
  222. */
  223. int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event);
  224. #else
  225. int (*ram_to_opencl_async)(void);
  226. int (*opencl_to_ram_async)(void);
  227. int (*opencl_to_opencl_async)(void);
  228. #endif
  229. /**
  230. Define how to copy data from the \p src_interface interface on the
  231. \p src_node CPU node to the \p dst_interface interface on the \p
  232. dst_node FPGA node. Must return 0 if the transfer was actually
  233. completed completely synchronously, or <c>-EAGAIN</c> if at least
  234. some transfers are still ongoing and should be awaited for by the
  235. core.
  236. */
  237. int (*ram_to_fpga_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  238. /**
  239. Define how to copy data from the \p src_interface interface on the
  240. \p src_node FPGA node to the \p dst_interface interface on the \p
  241. dst_node CPU node. Must return 0 if the transfer was actually
  242. completed completely synchronously, or <c>-EAGAIN</c> if at least
  243. some transfers are still ongoing and should be awaited for by the
  244. core.
  245. */
  246. int (*fpga_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
  247. /**
  248. Define how to copy data from the \p src_interface interface on the
  249. \p src_node CPU node to the \p dst_interface interface on the \p
  250. dst_node MPI Slave node, with the given even. Must return 0 if the
  251. transfer was actually completed completely synchronously, or
  252. <c>-EAGAIN</c> if at least some transfers are still ongoing and
  253. should be awaited for by the core.
  254. */
  255. int (*ram_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
  256. /**
  257. Define how to copy data from the \p src_interface interface on the
  258. \p src_node MPI Slave node to the \p dst_interface interface on
  259. the \p dst_node CPU node, with the given event. Must return 0 if
  260. the transfer was actually completed completely synchronously, or
  261. <c>-EAGAIN</c> if at least some transfers are still ongoing and
  262. should be awaited for by the core.
  263. */
  264. int (*mpi_ms_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
  265. /**
  266. Define how to copy data from the \p src_interface interface on the
  267. \p src_node MPI Slave node to the \p dst_interface interface on
  268. the \p dst_node MPI Slave node, using the given stream. Must
  269. return 0 if the transfer was actually completed completely
  270. synchronously, or <c>-EAGAIN</c> if at least some transfers are
  271. still ongoing and should be awaited for by the core.
  272. */
  273. int (*mpi_ms_to_mpi_ms_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void * event);
  274. /**
  275. Define how to copy data from the \p src_interface interface on the
  276. \p src_node node to the \p dst_interface interface on the \p
  277. dst_node node. This is meant to be implemented through the
  278. starpu_interface_copy() helper, to which async_data should be
  279. passed as such, and will be used to manage asynchronicity. This
  280. must return <c>-EAGAIN</c> if any of the starpu_interface_copy()
  281. calls has returned <c>-EAGAIN</c> (i.e. at least some transfer is
  282. still ongoing), and return 0 otherwise.
  283. This can only be implemented if the interface has ready-to-send
  284. data blocks. If the interface is more involved than
  285. this, i.e. it needs to collect pieces of data before
  286. transferring, starpu_data_interface_ops::pack_data and
  287. starpu_data_interface_ops::peek_data should be implemented instead,
  288. and the core will just transfer the resulting data buffer.
  289. */
  290. int (*any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
  291. };
  292. /**
  293. Identifier for all predefined StarPU data interfaces
  294. */
  295. enum starpu_data_interface_id
  296. {
  297. STARPU_UNKNOWN_INTERFACE_ID = -1, /**< Unknown interface */
  298. STARPU_MATRIX_INTERFACE_ID=0, /**< Identifier for the matrix data interface */
  299. STARPU_BLOCK_INTERFACE_ID=1, /**< Identifier for the block data interface*/
  300. STARPU_VECTOR_INTERFACE_ID=2, /**< Identifier for the vector data interface*/
  301. STARPU_CSR_INTERFACE_ID=3, /**< Identifier for the CSR data interface*/
  302. STARPU_BCSR_INTERFACE_ID=4, /**< Identifier for the BCSR data interface*/
  303. STARPU_VARIABLE_INTERFACE_ID=5, /**< Identifier for the variable data interface*/
  304. STARPU_VOID_INTERFACE_ID=6, /**< Identifier for the void data interface*/
  305. STARPU_MULTIFORMAT_INTERFACE_ID=7, /**< Identifier for the multiformat data interface*/
  306. STARPU_COO_INTERFACE_ID=8, /**< Identifier for the COO data interface*/
  307. STARPU_TENSOR_INTERFACE_ID=9, /**< Identifier for the block data interface*/
  308. STARPU_MAX_INTERFACE_ID=10 /**< Maximum number of data interfaces */
  309. };
  310. /**
  311. Per-interface data management methods.
  312. */
  313. struct starpu_data_interface_ops
  314. {
  315. /**
  316. Register an existing interface into a data handle.
  317. This iterates over all memory nodes to initialize all fields of the data
  318. interface on each of them. Since data is not allocated yet except on the
  319. home node, pointers should be left as NULL except on the \p home_node, for
  320. which the pointers should be copied from the given \p data_interface, which
  321. was filled with the application's pointers.
  322. This method is mandatory.
  323. */
  324. void (*register_data_handle) (starpu_data_handle_t handle, unsigned home_node, void *data_interface);
  325. /**
  326. Unregister a data handle.
  327. This iterates over all memory nodes to free any pointer in the data
  328. interface on each of them.
  329. At this point, free_data_on_node has been already called on each of them.
  330. This just clears anything that would still be left.
  331. */
  332. void (*unregister_data_handle) (starpu_data_handle_t handle);
  333. /**
  334. Allocate data for the interface on a given node. This should use
  335. starpu_malloc_on_node() to perform the allocation(s), and fill the pointers
  336. in the data interface. It should return the size of the allocated memory, or
  337. -ENOMEM if memory could not be allocated.
  338. Note that the memory node can be CPU memory, GPU memory, or even disk
  339. area. The result returned by starpu_malloc_on_node() should be just
  340. stored as uintptr_t without trying to interpret it since it may be a
  341. GPU pointer, a disk descriptor, etc.
  342. This method is mandatory to be able to support memory nodes.
  343. */
  344. starpu_ssize_t (*allocate_data_on_node) (void *data_interface, unsigned node);
  345. /**
  346. Free data of the interface on a given node.
  347. This method is mandatory to be able to support memory nodes.
  348. */
  349. void (*free_data_on_node) (void *data_interface, unsigned node);
  350. /**
  351. Initialize the interface.
  352. This method is optional. It is called when initializing the
  353. handler on all the memory nodes.
  354. */
  355. void (*init) (void *data_interface);
  356. /**
  357. Struct with pointer to functions for performing ram/cuda/opencl synchronous and asynchronous transfers.
  358. This field is mandatory to be able to support memory
  359. nodes, except disk nodes which can be supported by just
  360. implementing starpu_data_interface_ops::pack_data and
  361. starpu_data_interface_ops::unpack_data.
  362. */
  363. const struct starpu_data_copy_methods *copy_methods;
  364. /**
  365. @deprecated
  366. Use starpu_data_interface_ops::to_pointer instead.
  367. Return the current pointer (if any) for the handle on the given node.
  368. This method is only required if starpu_data_interface_ops::to_pointer
  369. is not implemented.
  370. */
  371. void * (*handle_to_pointer) (starpu_data_handle_t handle, unsigned node);
  372. /**
  373. Return the current pointer (if any) for the given interface on the given node.
  374. This method is only required for starpu_data_handle_to_pointer()
  375. and starpu_data_get_local_ptr(), and for disk support.
  376. */
  377. void * (*to_pointer) (void *data_interface, unsigned node);
  378. /**
  379. Return whether the given \p ptr is within the data for the given interface on the given node.
  380. This method is optional, as it is only used for coherency checks.
  381. */
  382. int (*pointer_is_inside) (void *data_interface, unsigned node, void *ptr);
  383. /**
  384. Return an estimation of the size of data, for performance models and tracing feedback.
  385. */
  386. size_t (*get_size) (starpu_data_handle_t handle);
  387. /**
  388. Return an estimation of the size of allocated data, for allocation
  389. management.
  390. If not specified, the starpu_data_interface_ops::get_size method is
  391. used instead.
  392. */
  393. size_t (*get_alloc_size) (starpu_data_handle_t handle);
  394. /**
  395. Return the maximum size that the data may need to increase to. For
  396. instance, in the case of compressed matrix tiles this is the size
  397. when the block is fully dense.
  398. This is currently only used for feedback tools.
  399. */
  400. size_t (*get_max_size) (starpu_data_handle_t handle);
  401. /**
  402. Return a 32bit footprint which characterizes the data size and layout (nx, ny, ld, elemsize, etc.), required for indexing performance models.
  403. starpu_hash_crc32c_be() and alike can be used to produce this 32bit value from various types of values.
  404. */
  405. uint32_t (*footprint) (starpu_data_handle_t handle);
  406. /**
  407. Return a 32bit footprint which characterizes the data allocation, to be used
  408. for indexing allocation cache.
  409. If not specified, the starpu_data_interface_ops::footprint method is
  410. used instead.
  411. */
  412. uint32_t (*alloc_footprint) (starpu_data_handle_t handle);
  413. /**
  414. Compare the data size and layout of two interfaces (nx, ny, ld, elemsize,
  415. etc.), to be used for indexing performance models. It should return 1 if
  416. the two interfaces size and layout match computation-wise, and 0 otherwise.
  417. It does *not* compare the actual content of the interfaces.
  418. */
  419. int (*compare) (void *data_interface_a, void *data_interface_b);
  420. /**
  421. Compare the data allocation of two interfaces etc.), to be used for indexing
  422. allocation cache. It should return
  423. 1 if the two interfaces are allocation-compatible, i.e. basically have the same alloc_size, and 0 otherwise.
  424. If not specified, the starpu_data_interface_ops::compare method is
  425. used instead.
  426. */
  427. int (*alloc_compare) (void *data_interface_a, void *data_interface_b);
  428. /**
  429. Dump the sizes of a handle to a file.
  430. This is required for performance models
  431. */
  432. void (*display) (starpu_data_handle_t handle, FILE *f);
  433. /**
  434. Describe the data into a string in a brief way, such as one
  435. letter to describe the type of data, and the data
  436. dimensions.
  437. This is required for tracing feedback.
  438. */
  439. starpu_ssize_t (*describe) (void *data_interface, char *buf, size_t size);
  440. /**
  441. An identifier that is unique to each interface.
  442. */
  443. enum starpu_data_interface_id interfaceid;
  444. /**
  445. Size of the interface data descriptor.
  446. */
  447. size_t interface_size;
  448. /**
  449. */
  450. char is_multiformat;
  451. /**
  452. If set to non-zero, StarPU will never try to reuse an allocated
  453. buffer for a different handle. This can be notably useful for
  454. application-defined interfaces which have a dynamic size, and for
  455. which it thus does not make sense to reuse the buffer since will
  456. probably not have the proper size.
  457. */
  458. char dontcache;
  459. /**
  460. */
  461. struct starpu_multiformat_data_interface_ops* (*get_mf_ops)(void *data_interface);
  462. /**
  463. Pack the data handle into a contiguous buffer at the address
  464. allocated with <c>starpu_malloc_flags(ptr, size, 0)</c> (and thus
  465. returned in \p ptr) and set the size of the newly created buffer
  466. in \p count. If \p ptr is <c>NULL</c>, the function should not
  467. copy the data in the buffer but just set count to the size of the
  468. buffer which would have been allocated. The special value -1
  469. indicates the size is yet unknown.
  470. This method (and starpu_data_interface_ops::unpack_data) is required
  471. for disk support if the starpu_data_copy_methods::any_to_any method
  472. is not implemented (because the in-memory data layout is too
  473. complex).
  474. This is also required for MPI support if there is no registered MPI data type.
  475. */
  476. int (*pack_data) (starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count);
  477. /**
  478. Read the data handle from the contiguous buffer at the address
  479. \p ptr of size \p count.
  480. */
  481. int (*peek_data) (starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
  482. /**
  483. Unpack the data handle from the contiguous buffer at the address
  484. \p ptr of size \p count.
  485. The memory at the address \p ptr should be freed after the data unpacking operation.
  486. */
  487. int (*unpack_data) (starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
  488. /**
  489. Name of the interface
  490. */
  491. char *name;
  492. };
  493. /**
  494. @name Basic API
  495. @{
  496. */
  497. /**
  498. Register a piece of data into the handle located at the
  499. \p handleptr address. The \p data_interface buffer contains the initial
  500. description of the data in the \p home_node. The \p ops argument is a
  501. pointer to a structure describing the different methods used to
  502. manipulate this type of interface. See starpu_data_interface_ops for
  503. more details on this structure.
  504. If \p home_node is -1, StarPU will automatically allocate the memory when
  505. it is used for the first time in write-only mode. Once such data
  506. handle has been automatically allocated, it is possible to access it
  507. using any access mode.
  508. Note that StarPU supplies a set of predefined types of interface (e.g.
  509. vector or matrix) which can be registered by the means of helper
  510. functions (e.g. starpu_vector_data_register() or
  511. starpu_matrix_data_register()).
  512. */
  513. void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops);
  514. /**
  515. Register that a buffer for \p handle on \p node will be set. This is typically
  516. used by starpu_*_ptr_register helpers before setting the interface pointers for
  517. this node, to tell the core that that is now allocated.
  518. */
  519. void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node);
  520. /**
  521. Register a new piece of data into the handle \p handledst with the
  522. same interface as the handle \p handlesrc.
  523. */
  524. void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc);
  525. /**
  526. Return the pointer associated with \p handle on node \p node or <c>NULL</c>
  527. if handle’s interface does not support this operation or data for this
  528. \p handle is not allocated on that \p node.
  529. */
  530. void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node);
  531. /**
  532. Return whether the given \p ptr is within the data for \p handle on node \p
  533. node (1) or not (0). If the handle interface does not support this operation,
  534. and thus the result is unknown, -1 is returned.
  535. */
  536. int starpu_data_pointer_is_inside(starpu_data_handle_t handle, unsigned node, void *ptr);
  537. /**
  538. Return the local pointer associated with \p handle or <c>NULL</c> if
  539. \p handle’s interface does not have any data allocated locally.
  540. */
  541. void *starpu_data_get_local_ptr(starpu_data_handle_t handle);
  542. /**
  543. Return the interface associated with \p handle on \p memory_node.
  544. */
  545. void *starpu_data_get_interface_on_node(starpu_data_handle_t handle, unsigned memory_node);
  546. /**
  547. Return the unique identifier of the interface associated with
  548. the given \p handle.
  549. */
  550. enum starpu_data_interface_id starpu_data_get_interface_id(starpu_data_handle_t handle);
  551. /**
  552. Execute the packing operation of the interface of the data
  553. registered at \p handle (see starpu_data_interface_ops). This
  554. packing operation must allocate a buffer large enough at \p ptr on node \p node and copy
  555. into the newly allocated buffer the data associated to \p handle. \p count
  556. will be set to the size of the allocated buffer. If \p ptr is <c>NULL</c>, the
  557. function should not copy the data in the buffer but just set \p count to
  558. the size of the buffer which would have been allocated. The special
  559. value -1 indicates the size is yet unknown.
  560. */
  561. int starpu_data_pack_node(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count);
  562. /**
  563. Like starpu_data_pack_node(), but for the local memory node.
  564. */
  565. int starpu_data_pack(starpu_data_handle_t handle, void **ptr, starpu_ssize_t *count);
  566. /**
  567. Read in handle's \p node replicate the data located at \p ptr
  568. of size \p count as described by the interface of the data. The interface
  569. registered at \p handle must define a peeking operation (see
  570. starpu_data_interface_ops).
  571. */
  572. int starpu_data_peek_node(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
  573. /**
  574. Read in handle's local replicate the data located at \p ptr
  575. of size \p count as described by the interface of the data. The interface
  576. registered at \p handle must define a peeking operation (see
  577. starpu_data_interface_ops).
  578. */
  579. int starpu_data_peek(starpu_data_handle_t handle, void *ptr, size_t count);
  580. /**
  581. Unpack in handle the data located at \p ptr of size \p count allocated
  582. on node \p node as described by the interface of the data. The interface
  583. registered at \p handle must define an unpacking operation (see
  584. starpu_data_interface_ops).
  585. */
  586. int starpu_data_unpack_node(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
  587. /**
  588. Unpack in handle the data located at \p ptr of size \p count as
  589. described by the interface of the data. The interface registered at
  590. \p handle must define a unpacking operation (see
  591. starpu_data_interface_ops).
  592. */
  593. int starpu_data_unpack(starpu_data_handle_t handle, void *ptr, size_t count);
  594. /**
  595. Return the size of the data associated with \p handle.
  596. */
  597. size_t starpu_data_get_size(starpu_data_handle_t handle);
  598. /**
  599. Return the size of the allocated data associated with \p handle.
  600. */
  601. size_t starpu_data_get_alloc_size(starpu_data_handle_t handle);
  602. /**
  603. Return the maximum size that the \p handle data may need to increase to.
  604. */
  605. starpu_ssize_t starpu_data_get_max_size(starpu_data_handle_t handle);
  606. /**
  607. Return the handle corresponding to the data pointed to by the \p ptr host pointer.
  608. */
  609. starpu_data_handle_t starpu_data_lookup(const void *ptr);
  610. int starpu_data_get_home_node(starpu_data_handle_t handle);
  611. /**
  612. Print basic informations on \p handle on \p node
  613. */
  614. void starpu_data_print(starpu_data_handle_t handle, unsigned node, FILE *stream);
  615. /**
  616. Return the next available id for a newly created data interface
  617. (\ref DefiningANewDataInterface).
  618. */
  619. int starpu_data_interface_get_next_id(void);
  620. /**
  621. Copy \p size bytes from byte offset \p src_offset of \p src on \p src_node
  622. to byte offset \p dst_offset of \p dst on \p dst_node. This is to be used in
  623. the starpu_data_copy_methods::any_to_any copy method, which is provided with \p async_data to
  624. be passed to starpu_interface_copy(). this returns <c>-EAGAIN</c> if the
  625. transfer is still ongoing, or 0 if the transfer is already completed.
  626. */
  627. int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node,
  628. uintptr_t dst, size_t dst_offset, unsigned dst_node,
  629. size_t size, void *async_data);
  630. /**
  631. Copy \p numblocks blocks of \p blocksize bytes from byte offset \p src_offset
  632. of \p src on \p src_node to byte offset \p dst_offset of \p dst on \p
  633. dst_node.
  634. The blocks start at addresses which are ld_src (resp. ld_dst) bytes apart in
  635. the source (resp. destination) interface.
  636. If blocksize == ld_src == ld_dst, the transfer is optimized into a single
  637. starpu_interface_copy call.
  638. This is to be used in the starpu_data_copy_methods::any_to_any copy
  639. method for 2D data, which is provided with \p async_data to be passed to
  640. starpu_interface_copy(). this returns <c>-EAGAIN</c> if the transfer is still
  641. ongoing, or 0 if the transfer is already completed.
  642. */
  643. int starpu_interface_copy2d(uintptr_t src, size_t src_offset, unsigned src_node,
  644. uintptr_t dst, size_t dst_offset, unsigned dst_node,
  645. size_t blocksize,
  646. size_t numblocks, size_t ld_src, size_t ld_dst,
  647. void *async_data);
  648. /**
  649. Copy \p numblocks_1 * \p numblocks_2 blocks of \p blocksize bytes from byte
  650. offset \p src_offset of \p src on \p src_node to byte offset \p dst_offset of
  651. \p dst on \p dst_node.
  652. The blocks are grouped by \p numblocks_1 blocks whose start addresses are
  653. ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination)
  654. interface.
  655. Such groups are grouped by numblocks_2 groups whose start addresses are
  656. ld2_src (resp. ld2_dst) bytes apart in the source (resp. destination)
  657. interface.
  658. If the blocks are contiguous, the transfers will be optimized.
  659. This is to be used in the starpu_data_copy_methods::any_to_any copy
  660. method for 3D data, which is provided with \p async_data to be passed to
  661. starpu_interface_copy(). this returns <c>-EAGAIN</c> if the transfer is still
  662. ongoing, or 0 if the transfer is already completed.
  663. */
  664. int starpu_interface_copy3d(uintptr_t src, size_t src_offset, unsigned src_node,
  665. uintptr_t dst, size_t dst_offset, unsigned dst_node,
  666. size_t blocksize,
  667. size_t numblocks1, size_t ld1_src, size_t ld1_dst,
  668. size_t numblocks2, size_t ld2_src, size_t ld2_dst,
  669. void *async_data);
  670. /**
  671. Copy \p numblocks_1 * \p numblocks_2 * \p numblocks_3 blocks of \p blocksize
  672. bytes from byte offset \p src_offset of \p src on \p src_node to byte offset
  673. \p dst_offset of \p dst on \p dst_node.
  674. The blocks are grouped by \p numblocks_1 blocks whose start addresses are
  675. ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination)
  676. interface.
  677. Such groups are grouped by numblocks_2 groups whose start addresses are
  678. ld2_src (resp. ld2_dst) bytes apart in the source (resp. destination)
  679. interface.
  680. Such groups are grouped by numblocks_3 groups whose start addresses are
  681. ld3_src (resp. ld3_dst) bytes apart in the source (resp. destination)
  682. interface.
  683. If the blocks are contiguous, the transfers will be optimized.
  684. This is to be used in the starpu_data_copy_methods::any_to_any copy
  685. method for 3D data, which is provided with \p async_data to be passed to
  686. starpu_interface_copy(). this returns <c>-EAGAIN</c> if the transfer is still
  687. ongoing, or 0 if the transfer is already completed.
  688. */
  689. int starpu_interface_copy4d(uintptr_t src, size_t src_offset, unsigned src_node,
  690. uintptr_t dst, size_t dst_offset, unsigned dst_node,
  691. size_t blocksize,
  692. size_t numblocks1, size_t ld1_src, size_t ld1_dst,
  693. size_t numblocks2, size_t ld2_src, size_t ld2_dst,
  694. size_t numblocks3, size_t ld3_src, size_t ld3_dst,
  695. void *async_data);
  696. /**
  697. When an asynchonous implementation of the data transfer is implemented, the call
  698. to the underlying CUDA, OpenCL, etc. call should be surrounded
  699. by calls to starpu_interface_start_driver_copy_async() and
  700. starpu_interface_end_driver_copy_async(), so that it is recorded in offline
  701. execution traces, and the timing of the submission is checked. \p start must
  702. point to a variable whose value will be passed unchanged to
  703. starpu_interface_end_driver_copy_async().
  704. */
  705. void starpu_interface_start_driver_copy_async(unsigned src_node, unsigned dst_node, double *start);
  706. /**
  707. See starpu_interface_start_driver_copy_async().
  708. */
  709. void starpu_interface_end_driver_copy_async(unsigned src_node, unsigned dst_node, double start);
  710. /**
  711. Record in offline execution traces the copy of \p size bytes from
  712. node \p src_node to node \p dst_node
  713. */
  714. void starpu_interface_data_copy(unsigned src_node, unsigned dst_node, size_t size);
  715. /**
  716. Allocate \p size bytes on node \p dst_node with the given allocation \p flags. This returns 0 if
  717. allocation failed, the allocation method should then return <c>-ENOMEM</c> as
  718. allocated size. Deallocation must be done with starpu_free_on_node_flags().
  719. */
  720. uintptr_t starpu_malloc_on_node_flags(unsigned dst_node, size_t size, int flags);
  721. /**
  722. Allocate \p size bytes on node \p dst_node with the default allocation flags. This returns 0 if
  723. allocation failed, the allocation method should then return <c>-ENOMEM</c> as
  724. allocated size. Deallocation must be done with starpu_free_on_node().
  725. */
  726. uintptr_t starpu_malloc_on_node(unsigned dst_node, size_t size);
  727. /**
  728. Free \p addr of \p size bytes on node \p dst_node which was previously allocated
  729. with starpu_malloc_on_node_flags() with the given allocation \p flags.
  730. */
  731. void starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags);
  732. /**
  733. Free \p addr of \p size bytes on node \p dst_node which was previously allocated
  734. with starpu_malloc_on_node().
  735. */
  736. void starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size);
  737. /**
  738. Define the default flags for allocations performed by starpu_malloc_on_node() and
  739. starpu_free_on_node(). The default is \ref STARPU_MALLOC_PINNED | \ref STARPU_MALLOC_COUNT.
  740. */
  741. void starpu_malloc_on_node_set_default_flags(unsigned node, int flags);
  742. /** @} */
  743. /**
  744. @name Accessing Matrix Data Interfaces
  745. @{
  746. */
  747. extern struct starpu_data_interface_ops starpu_interface_matrix_ops;
  748. /**
  749. Matrix interface for dense matrices
  750. */
  751. struct starpu_matrix_interface
  752. {
  753. enum starpu_data_interface_id id; /**< Identifier of the interface */
  754. uintptr_t ptr; /**< local pointer of the matrix */
  755. uintptr_t dev_handle; /**< device handle of the matrix */
  756. size_t offset; /**< offset in the matrix */
  757. uint32_t nx; /**< number of elements on the x-axis of the matrix */
  758. uint32_t ny; /**< number of elements on the y-axis of the matrix */
  759. uint32_t ld; /**< number of elements between each row of the
  760. matrix. Maybe be equal to starpu_matrix_interface::nx
  761. when there is no padding.
  762. */
  763. size_t elemsize; /**< size of the elements of the matrix */
  764. size_t allocsize; /**< size actually currently allocated */
  765. };
  766. /**
  767. Register the \p nx x \p ny 2D matrix of \p elemsize-byte elements pointed
  768. by \p ptr and initialize \p handle to represent it. \p ld specifies the number
  769. of elements between rows. a value greater than \p nx adds padding, which
  770. can be useful for alignment purposes.
  771. Here an example of how to use the function.
  772. \code{.c}
  773. float *matrix;
  774. starpu_data_handle_t matrix_handle;
  775. matrix = (float*)malloc(width * height * sizeof(float));
  776. starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float));
  777. \endcode
  778. */
  779. void starpu_matrix_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize);
  780. /**
  781. Similar to starpu_matrix_data_register, but additionally specifies which
  782. allocation size should be used instead of the initial nx*ny*elemsize.
  783. */
  784. void starpu_matrix_data_register_allocsize(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize, size_t allocsize);
  785. /**
  786. Register into the \p handle that to store data on node \p node it should use the
  787. buffer located at \p ptr, or device handle \p dev_handle and offset \p offset
  788. (for OpenCL, notably), with \p ld elements between rows.
  789. */
  790. void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld);
  791. /**
  792. Return the number of elements on the x-axis of the matrix
  793. designated by \p handle.
  794. */
  795. uint32_t starpu_matrix_get_nx(starpu_data_handle_t handle);
  796. /**
  797. Return the number of elements on the y-axis of the matrix
  798. designated by \p handle.
  799. */
  800. uint32_t starpu_matrix_get_ny(starpu_data_handle_t handle);
  801. /**
  802. Return the number of elements between each row of the matrix
  803. designated by \p handle. Maybe be equal to nx when there is no padding.
  804. */
  805. uint32_t starpu_matrix_get_local_ld(starpu_data_handle_t handle);
  806. /**
  807. Return the local pointer associated with \p handle.
  808. */
  809. uintptr_t starpu_matrix_get_local_ptr(starpu_data_handle_t handle);
  810. /**
  811. Return the size of the elements registered into the matrix
  812. designated by \p handle.
  813. */
  814. size_t starpu_matrix_get_elemsize(starpu_data_handle_t handle);
  815. /**
  816. Return the allocated size of the matrix designated by \p handle.
  817. */
  818. size_t starpu_matrix_get_allocsize(starpu_data_handle_t handle);
  819. #if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG)
  820. #define STARPU_MATRIX_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_matrix_interface *)(interface))->id) == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix.")
  821. #define STARPU_MATRIX_GET_PTR(interface) ({ STARPU_MATRIX_CHECK(interface); (((struct starpu_matrix_interface *)(interface))->ptr) ; })
  822. #define STARPU_MATRIX_GET_DEV_HANDLE(interface) ({ STARPU_MATRIX_CHECK(interface); (((struct starpu_matrix_interface *)(interface))->dev_handle) ; })
  823. #define STARPU_MATRIX_GET_OFFSET(interface) ({ STARPU_MATRIX_CHECK(interface); (((struct starpu_matrix_interface *)(interface))->offset) ; })
  824. #define STARPU_MATRIX_GET_NX(interface) ({ STARPU_MATRIX_CHECK(interface); (((struct starpu_matrix_interface *)(interface))->nx) ; })
  825. #define STARPU_MATRIX_GET_NY(interface) ({ STARPU_MATRIX_CHECK(interface); (((struct starpu_matrix_interface *)(interface))->ny) ; })
  826. #define STARPU_MATRIX_GET_LD(interface) ({ STARPU_MATRIX_CHECK(interface); (((struct starpu_matrix_interface *)(interface))->ld) ; })
  827. #define STARPU_MATRIX_GET_ELEMSIZE(interface) ({ STARPU_MATRIX_CHECK(interface); (((struct starpu_matrix_interface *)(interface))->elemsize) ; })
  828. #define STARPU_MATRIX_GET_ALLOCSIZE(interface) ({ STARPU_MATRIX_CHECK(interface); (((struct starpu_matrix_interface *)(interface))->allocsize) ; })
  829. #else
  830. /**
  831. Return a pointer to the matrix designated by \p interface, valid
  832. on CPUs and CUDA devices only. For OpenCL devices, the device handle
  833. and offset need to be used instead.
  834. */
  835. #define STARPU_MATRIX_GET_PTR(interface) (((struct starpu_matrix_interface *)(interface))->ptr)
  836. /**
  837. Return a device handle for the matrix designated by \p interface,
  838. to be used with OpenCL. The offset returned by
  839. ::STARPU_MATRIX_GET_OFFSET has to be used in
  840. addition to this.
  841. */
  842. #define STARPU_MATRIX_GET_DEV_HANDLE(interface) (((struct starpu_matrix_interface *)(interface))->dev_handle)
  843. /**
  844. Return the offset in the matrix designated by \p interface, to be
  845. used with the device handle.
  846. */
  847. #define STARPU_MATRIX_GET_OFFSET(interface) (((struct starpu_matrix_interface *)(interface))->offset)
  848. /**
  849. Return the number of elements on the x-axis of the matrix
  850. designated by \p interface.
  851. */
  852. #define STARPU_MATRIX_GET_NX(interface) (((struct starpu_matrix_interface *)(interface))->nx)
  853. /**
  854. Return the number of elements on the y-axis of the matrix
  855. designated by \p interface.
  856. */
  857. #define STARPU_MATRIX_GET_NY(interface) (((struct starpu_matrix_interface *)(interface))->ny)
  858. /**
  859. Return the number of elements between each row of the matrix
  860. designated by \p interface. May be equal to nx when there is no padding.
  861. */
  862. #define STARPU_MATRIX_GET_LD(interface) (((struct starpu_matrix_interface *)(interface))->ld)
  863. /**
  864. Return the size of the elements registered into the matrix
  865. designated by \p interface.
  866. */
  867. #define STARPU_MATRIX_GET_ELEMSIZE(interface) (((struct starpu_matrix_interface *)(interface))->elemsize)
  868. /**
  869. Return the allocated size of the matrix designated by \p interface.
  870. */
  871. #define STARPU_MATRIX_GET_ALLOCSIZE(interface) (((struct starpu_matrix_interface *)(interface))->allocsize)
  872. #endif
  873. /**
  874. Set the number of elements on the x-axis of the matrix
  875. designated by \p interface.
  876. */
  877. #define STARPU_MATRIX_SET_NX(interface, newnx) do { \
  878. STARPU_MATRIX_CHECK(interface); \
  879. (((struct starpu_matrix_interface *)(interface))->nx) = (newnx); \
  880. } while (0)
  881. /**
  882. Set the number of elements on the y-axis of the matrix
  883. designated by \p interface.
  884. */
  885. #define STARPU_MATRIX_SET_NY(interface, newny) do { \
  886. STARPU_MATRIX_CHECK(interface); \
  887. (((struct starpu_matrix_interface *)(interface))->ny) = (newny); \
  888. } while(0)
  889. /**
  890. Set the number of elements between each row of the matrix
  891. designated by \p interface. May be set to the same value as nx when there is
  892. no padding.
  893. */
  894. #define STARPU_MATRIX_SET_LD(interface, newld) do { \
  895. STARPU_MATRIX_CHECK(interface); \
  896. (((struct starpu_matrix_interface *)(interface))->ld) = (newld); \
  897. } while(0)
  898. /** @} */
  899. /**
  900. @name Accessing COO Data Interfaces
  901. @{
  902. */
  903. extern struct starpu_data_interface_ops starpu_interface_coo_ops;
  904. /**
  905. COO Matrices
  906. */
  907. struct starpu_coo_interface
  908. {
  909. enum starpu_data_interface_id id; /**< identifier of the interface */
  910. uint32_t *columns; /**< column array of the matrix */
  911. uint32_t *rows; /**< row array of the matrix */
  912. uintptr_t values; /**< values of the matrix */
  913. uint32_t nx; /**< number of elements on the x-axis of the matrix */
  914. uint32_t ny; /**< number of elements on the y-axis of the matrix */
  915. uint32_t n_values; /**< number of values registered in the matrix */
  916. size_t elemsize; /**< size of the elements of the matrix */
  917. };
  918. /**
  919. Register the \p nx x \p ny 2D matrix given in the COO format, using the
  920. \p columns, \p rows, \p values arrays, which must have \p n_values elements of
  921. size \p elemsize. Initialize \p handleptr.
  922. */
  923. void starpu_coo_data_register(starpu_data_handle_t *handleptr, int home_node, uint32_t nx, uint32_t ny, uint32_t n_values, uint32_t *columns, uint32_t *rows, uintptr_t values, size_t elemsize);
  924. /**
  925. Return a pointer to the column array of the matrix designated
  926. by \p interface.
  927. */
  928. #define STARPU_COO_GET_COLUMNS(interface) (((struct starpu_coo_interface *)(interface))->columns)
  929. /**
  930. Return a device handle for the column array of the matrix
  931. designated by \p interface, to be used with OpenCL. The offset
  932. returned by ::STARPU_COO_GET_OFFSET has to be used in addition to
  933. this.
  934. */
  935. #define STARPU_COO_GET_COLUMNS_DEV_HANDLE(interface) (((struct starpu_coo_interface *)(interface))->columns)
  936. /**
  937. Return a pointer to the rows array of the matrix designated by
  938. \p interface.
  939. */
  940. #define STARPU_COO_GET_ROWS(interface) (((struct starpu_coo_interface *)(interface))->rows)
  941. /**
  942. Return a device handle for the row array of the matrix
  943. designated by \p interface, to be used on OpenCL. The offset returned
  944. by ::STARPU_COO_GET_OFFSET has to be used in addition to this.
  945. */
  946. #define STARPU_COO_GET_ROWS_DEV_HANDLE(interface) (((struct starpu_coo_interface *)(interface))->rows)
  947. /**
  948. Return a pointer to the values array of the matrix designated
  949. by \p interface.
  950. */
  951. #define STARPU_COO_GET_VALUES(interface) (((struct starpu_coo_interface *)(interface))->values)
  952. /**
  953. Return a device handle for the value array of the matrix
  954. designated by \p interface, to be used on OpenCL. The offset returned
  955. by ::STARPU_COO_GET_OFFSET has to be used in addition to this.
  956. */
  957. #define STARPU_COO_GET_VALUES_DEV_HANDLE(interface) (((struct starpu_coo_interface *)(interface))->values)
  958. /**
  959. Return the offset in the arrays of the COO matrix designated by
  960. \p interface.
  961. */
  962. #define STARPU_COO_GET_OFFSET 0
  963. /**
  964. Return the number of elements on the x-axis of the matrix
  965. designated by \p interface.
  966. */
  967. #define STARPU_COO_GET_NX(interface) (((struct starpu_coo_interface *)(interface))->nx)
  968. /**
  969. Return the number of elements on the y-axis of the matrix
  970. designated by \p interface.
  971. */
  972. #define STARPU_COO_GET_NY(interface) (((struct starpu_coo_interface *)(interface))->ny)
  973. /**
  974. Return the number of values registered in the matrix designated
  975. by \p interface.
  976. */
  977. #define STARPU_COO_GET_NVALUES(interface) (((struct starpu_coo_interface *)(interface))->n_values)
  978. /**
  979. Return the size of the elements registered into the matrix
  980. designated by \p interface.
  981. */
  982. #define STARPU_COO_GET_ELEMSIZE(interface) (((struct starpu_coo_interface *)(interface))->elemsize)
  983. /** @} */
  984. /**
  985. @name Block Data Interface
  986. @{
  987. */
  988. extern struct starpu_data_interface_ops starpu_interface_block_ops;
  989. /* TODO: rename to 3dmatrix? */
  990. /* TODO: add allocsize support */
  991. /**
  992. Block interface for 3D dense blocks
  993. */
  994. struct starpu_block_interface
  995. {
  996. enum starpu_data_interface_id id; /**< identifier of the interface */
  997. uintptr_t ptr; /**< local pointer of the block */
  998. uintptr_t dev_handle; /**< device handle of the block. */
  999. size_t offset; /**< offset in the block. */
  1000. uint32_t nx; /**< number of elements on the x-axis of the block. */
  1001. uint32_t ny; /**< number of elements on the y-axis of the block. */
  1002. uint32_t nz; /**< number of elements on the z-axis of the block. */
  1003. uint32_t ldy; /**< number of elements between two lines */
  1004. uint32_t ldz; /**< number of elements between two planes */
  1005. size_t elemsize; /**< size of the elements of the block. */
  1006. };
  1007. /**
  1008. Register the \p nx x \p ny x \p nz 3D matrix of \p elemsize byte elements
  1009. pointed by \p ptr and initialize \p handle to represent it. Again, \p ldy and
  1010. \p ldz specify the number of elements between rows and between z planes.
  1011. Here an example of how to use the function.
  1012. \code{.c}
  1013. float *block;
  1014. starpu_data_handle_t block_handle;
  1015. block = (float*)malloc(nx*ny*nz*sizeof(float));
  1016. starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
  1017. \endcode
  1018. */
  1019. void starpu_block_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize);
  1020. /**
  1021. Register into the \p handle that to store data on node \p node it should use the
  1022. buffer located at \p ptr, or device handle \p dev_handle and offset \p offset
  1023. (for OpenCL, notably), with \p ldy elements between rows and \p ldz
  1024. elements between z planes.
  1025. */
  1026. void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz);
  1027. /**
  1028. Return the number of elements on the x-axis of the block
  1029. designated by \p handle.
  1030. */
  1031. uint32_t starpu_block_get_nx(starpu_data_handle_t handle);
  1032. /**
  1033. Return the number of elements on the y-axis of the block
  1034. designated by \p handle.
  1035. */
  1036. uint32_t starpu_block_get_ny(starpu_data_handle_t handle);
  1037. /**
  1038. Return the number of elements on the z-axis of the block
  1039. designated by \p handle.
  1040. */
  1041. uint32_t starpu_block_get_nz(starpu_data_handle_t handle);
  1042. /**
  1043. Return the number of elements between each row of the block
  1044. designated by \p handle, in the format of the current memory node.
  1045. */
  1046. uint32_t starpu_block_get_local_ldy(starpu_data_handle_t handle);
  1047. /**
  1048. Return the number of elements between each z plane of the block
  1049. designated by \p handle, in the format of the current memory node.
  1050. */
  1051. uint32_t starpu_block_get_local_ldz(starpu_data_handle_t handle);
  1052. /**
  1053. Return the local pointer associated with \p handle.
  1054. */
  1055. uintptr_t starpu_block_get_local_ptr(starpu_data_handle_t handle);
  1056. /**
  1057. Return the size of the elements of the block designated by
  1058. \p handle.
  1059. */
  1060. size_t starpu_block_get_elemsize(starpu_data_handle_t handle);
  1061. #if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG)
  1062. #define STARPU_BLOCK_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_block_interface *)(interface))->id) == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block.")
  1063. #define STARPU_BLOCK_GET_PTR(interface) ({ STARPU_BLOCK_CHECK(interface); (((struct starpu_block_interface *)(interface))->ptr) ; })
  1064. #define STARPU_BLOCK_GET_DEV_HANDLE(interface) ({ STARPU_BLOCK_CHECK(interface); (((struct starpu_block_interface *)(interface))->dev_handle) ; })
  1065. #define STARPU_BLOCK_GET_OFFSET(interface) ({ STARPU_BLOCK_CHECK(interface); (((struct starpu_block_interface *)(interface))->offset) ; })
  1066. #define STARPU_BLOCK_GET_NX(interface) ({ STARPU_BLOCK_CHECK(interface); (((struct starpu_block_interface *)(interface))->nx) ; })
  1067. #define STARPU_BLOCK_GET_NY(interface) ({ STARPU_BLOCK_CHECK(interface); (((struct starpu_block_interface *)(interface))->ny) ; })
  1068. #define STARPU_BLOCK_GET_NZ(interface) ({ STARPU_BLOCK_CHECK(interface); (((struct starpu_block_interface *)(interface))->nz) ; })
  1069. #define STARPU_BLOCK_GET_LDY(interface) ({ STARPU_BLOCK_CHECK(interface); (((struct starpu_block_interface *)(interface))->ldy) ; })
  1070. #define STARPU_BLOCK_GET_LDZ(interface) ({ STARPU_BLOCK_CHECK(interface); (((struct starpu_block_interface *)(interface))->ldz) ; })
  1071. #define STARPU_BLOCK_GET_ELEMSIZE(interface) ({ STARPU_BLOCK_CHECK(interface); (((struct starpu_block_interface *)(interface))->elemsize) ; })
  1072. #else
  1073. /**
  1074. Return a pointer to the block designated by \p interface.
  1075. */
  1076. #define STARPU_BLOCK_GET_PTR(interface) (((struct starpu_block_interface *)(interface))->ptr)
  1077. /**
  1078. Return a device handle for the block designated by \p interface,
  1079. to be used on OpenCL. The offset returned by
  1080. ::STARPU_BLOCK_GET_OFFSET has to be used in
  1081. addition to this.
  1082. */
  1083. #define STARPU_BLOCK_GET_DEV_HANDLE(interface) (((struct starpu_block_interface *)(interface))->dev_handle)
  1084. /**
  1085. Return the offset in the block designated by \p interface, to be
  1086. used with the device handle.
  1087. */
  1088. #define STARPU_BLOCK_GET_OFFSET(interface) (((struct starpu_block_interface *)(interface))->offset)
  1089. /**
  1090. Return the number of elements on the x-axis of the block
  1091. designated by \p interface.
  1092. */
  1093. #define STARPU_BLOCK_GET_NX(interface) (((struct starpu_block_interface *)(interface))->nx)
  1094. /**
  1095. Return the number of elements on the y-axis of the block
  1096. designated by \p interface.
  1097. */
  1098. #define STARPU_BLOCK_GET_NY(interface) (((struct starpu_block_interface *)(interface))->ny)
  1099. /**
  1100. Return the number of elements on the z-axis of the block
  1101. designated by \p interface.
  1102. */
  1103. #define STARPU_BLOCK_GET_NZ(interface) (((struct starpu_block_interface *)(interface))->nz)
  1104. /**
  1105. Return the number of elements between each row of the block
  1106. designated by \p interface. May be equal to nx when there is no padding.
  1107. */
  1108. #define STARPU_BLOCK_GET_LDY(interface) (((struct starpu_block_interface *)(interface))->ldy)
  1109. /**
  1110. Return the number of elements between each z plane of the block
  1111. designated by \p interface. May be equal to nx*ny when there is no
  1112. padding.
  1113. */
  1114. #define STARPU_BLOCK_GET_LDZ(interface) (((struct starpu_block_interface *)(interface))->ldz)
  1115. /**
  1116. Return the size of the elements of the block designated by
  1117. \p interface.
  1118. */
  1119. #define STARPU_BLOCK_GET_ELEMSIZE(interface) (((struct starpu_block_interface *)(interface))->elemsize)
  1120. #endif
  1121. /** @} */
  1122. /**
  1123. @name Tensor Data Interface
  1124. @{
  1125. */
  1126. extern struct starpu_data_interface_ops starpu_interface_tensor_ops;
  1127. /* TODO: rename to 4dtensor? */
  1128. /* TODO: add allocsize support */
  1129. /**
  1130. Tensor interface for 4D dense tensors
  1131. */
  1132. struct starpu_tensor_interface
  1133. {
  1134. enum starpu_data_interface_id id; /**< identifier of the interface */
  1135. uintptr_t ptr; /**< local pointer of the tensor */
  1136. uintptr_t dev_handle; /**< device handle of the tensor. */
  1137. size_t offset; /**< offset in the tensor. */
  1138. uint32_t nx; /**< number of elements on the x-axis of the tensor. */
  1139. uint32_t ny; /**< number of elements on the y-axis of the tensor. */
  1140. uint32_t nz; /**< number of elements on the z-axis of the tensor. */
  1141. uint32_t nt; /**< number of elements on the t-axis of the tensor. */
  1142. uint32_t ldy; /**< number of elements between two lines */
  1143. uint32_t ldz; /**< number of elements between two planes */
  1144. uint32_t ldt; /**< number of elements between two cubes */
  1145. size_t elemsize; /**< size of the elements of the tensor. */
  1146. };
  1147. /**
  1148. Register the \p nx x \p ny x \p nz x \p nt 4D tensor of \p elemsize byte elements
  1149. pointed by \p ptr and initialize \p handle to represent it. Again, \p ldy,
  1150. \p ldz, and \p ldt specify the number of elements between rows, between z planes and between t cubes.
  1151. Here an example of how to use the function.
  1152. \code{.c}
  1153. float *tensor;
  1154. starpu_data_handle_t tensor_handle;
  1155. tensor = (float*)malloc(nx*ny*nz*nt*sizeof(float));
  1156. starpu_tensor_data_register(&tensor_handle, STARPU_MAIN_RAM, (uintptr_t)tensor, nx, nx*ny, nx*ny*nz, nx, ny, nz, nt, sizeof(float));
  1157. \endcode
  1158. */
  1159. void starpu_tensor_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t ldt, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t nt, size_t elemsize);
  1160. /**
  1161. Register into the \p handle that to store data on node \p node it should use the
  1162. buffer located at \p ptr, or device handle \p dev_handle and offset \p offset
  1163. (for OpenCL, notably), with \p ldy elements between rows, and \p ldz
  1164. elements between z planes, and \p ldt elements between t cubes.
  1165. */
  1166. void starpu_tensor_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz, uint32_t ldt);
  1167. /**
  1168. Return the number of elements on the x-axis of the tensor
  1169. designated by \p handle.
  1170. */
  1171. uint32_t starpu_tensor_get_nx(starpu_data_handle_t handle);
  1172. /**
  1173. Return the number of elements on the y-axis of the tensor
  1174. designated by \p handle.
  1175. */
  1176. uint32_t starpu_tensor_get_ny(starpu_data_handle_t handle);
  1177. /**
  1178. Return the number of elements on the z-axis of the tensor
  1179. designated by \p handle.
  1180. */
  1181. uint32_t starpu_tensor_get_nz(starpu_data_handle_t handle);
  1182. /**
  1183. Return the number of elements on the t-axis of the tensor
  1184. designated by \p handle.
  1185. */
  1186. uint32_t starpu_tensor_get_nt(starpu_data_handle_t handle);
  1187. /**
  1188. Return the number of elements between each row of the tensor
  1189. designated by \p handle, in the format of the current memory node.
  1190. */
  1191. uint32_t starpu_tensor_get_local_ldy(starpu_data_handle_t handle);
  1192. /**
  1193. Return the number of elements between each z plane of the tensor
  1194. designated by \p handle, in the format of the current memory node.
  1195. */
  1196. uint32_t starpu_tensor_get_local_ldz(starpu_data_handle_t handle);
  1197. /**
  1198. Return the number of elements between each t cubes of the tensor
  1199. designated by \p handle, in the format of the current memory node.
  1200. */
  1201. uint32_t starpu_tensor_get_local_ldt(starpu_data_handle_t handle);
  1202. /**
  1203. Return the local pointer associated with \p handle.
  1204. */
  1205. uintptr_t starpu_tensor_get_local_ptr(starpu_data_handle_t handle);
  1206. /**
  1207. Return the size of the elements of the tensor designated by
  1208. \p handle.
  1209. */
  1210. size_t starpu_tensor_get_elemsize(starpu_data_handle_t handle);
  1211. #if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG)
  1212. #define STARPU_TENSOR_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_tensor_interface *)(interface))->id) == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a tensor.")
  1213. #define STARPU_TENSOR_GET_PTR(interface) ({ STARPU_TENSOR_CHECK(interface); (((struct starpu_tensor_interface *)(interface))->ptr) ; })
  1214. #define STARPU_TENSOR_GET_DEV_HANDLE(interface) ({ STARPU_TENSOR_CHECK(interface); (((struct starpu_tensor_interface *)(interface))->dev_handle) ; })
  1215. #define STARPU_TENSOR_GET_OFFSET(interface) ({ STARPU_TENSOR_CHECK(interface); (((struct starpu_tensor_interface *)(interface))->offset) ; })
  1216. #define STARPU_TENSOR_GET_NX(interface) ({ STARPU_TENSOR_CHECK(interface); (((struct starpu_tensor_interface *)(interface))->nx) ; })
  1217. #define STARPU_TENSOR_GET_NY(interface) ({ STARPU_TENSOR_CHECK(interface); (((struct starpu_tensor_interface *)(interface))->ny) ; })
  1218. #define STARPU_TENSOR_GET_NZ(interface) ({ STARPU_TENSOR_CHECK(interface); (((struct starpu_tensor_interface *)(interface))->nz) ; })
  1219. #define STARPU_TENSOR_GET_NT(interface) ({ STARPU_TENSOR_CHECK(interface); (((struct starpu_tensor_interface *)(interface))->nt) ; })
  1220. #define STARPU_TENSOR_GET_LDY(interface) ({ STARPU_TENSOR_CHECK(interface); (((struct starpu_tensor_interface *)(interface))->ldy) ; })
  1221. #define STARPU_TENSOR_GET_LDZ(interface) ({ STARPU_TENSOR_CHECK(interface); (((struct starpu_tensor_interface *)(interface))->ldz) ; })
  1222. #define STARPU_TENSOR_GET_LDT(interface) ({ STARPU_TENSOR_CHECK(interface); (((struct starpu_tensor_interface *)(interface))->ldt) ; })
  1223. #define STARPU_TENSOR_GET_ELEMSIZE(interface) ({ STARPU_TENSOR_CHECK(interface); (((struct starpu_tensor_interface *)(interface))->elemsize) ; })
  1224. #else
  1225. /**
  1226. Return a pointer to the tensor designated by \p interface.
  1227. */
  1228. #define STARPU_TENSOR_GET_PTR(interface) (((struct starpu_tensor_interface *)(interface))->ptr)
  1229. /**
  1230. Return a device handle for the tensor designated by \p interface,
  1231. to be used on OpenCL. The offset returned by
  1232. ::STARPU_TENSOR_GET_OFFSET has to be used in
  1233. addition to this.
  1234. */
  1235. #define STARPU_TENSOR_GET_DEV_HANDLE(interface) (((struct starpu_tensor_interface *)(interface))->dev_handle)
  1236. /**
  1237. Return the offset in the tensor designated by \p interface, to be
  1238. used with the device handle.
  1239. */
  1240. #define STARPU_TENSOR_GET_OFFSET(interface) (((struct starpu_tensor_interface *)(interface))->offset)
  1241. /**
  1242. Return the number of elements on the x-axis of the tensor
  1243. designated by \p interface.
  1244. */
  1245. #define STARPU_TENSOR_GET_NX(interface) (((struct starpu_tensor_interface *)(interface))->nx)
  1246. /**
  1247. Return the number of elements on the y-axis of the tensor
  1248. designated by \p interface.
  1249. */
  1250. #define STARPU_TENSOR_GET_NY(interface) (((struct starpu_tensor_interface *)(interface))->ny)
  1251. /**
  1252. Return the number of elements on the z-axis of the tensor
  1253. designated by \p interface.
  1254. */
  1255. #define STARPU_TENSOR_GET_NZ(interface) (((struct starpu_tensor_interface *)(interface))->nz)
  1256. /**
  1257. Return the number of elements on the t-axis of the tensor
  1258. designated by \p interface.
  1259. */
  1260. #define STARPU_TENSOR_GET_NT(interface) (((struct starpu_tensor_interface *)(interface))->nt)
  1261. /**
  1262. Return the number of elements between each row of the tensor
  1263. designated by \p interface. May be equal to nx when there is no padding.
  1264. */
  1265. #define STARPU_TENSOR_GET_LDY(interface) (((struct starpu_tensor_interface *)(interface))->ldy)
  1266. /**
  1267. Return the number of elements between each z plane of the tensor
  1268. designated by \p interface. May be equal to nx*ny when there is no
  1269. padding.
  1270. */
  1271. #define STARPU_TENSOR_GET_LDZ(interface) (((struct starpu_tensor_interface *)(interface))->ldz)
  1272. /**
  1273. Return the number of elements between each t cubes of the tensor
  1274. designated by \p interface. May be equal to nx*ny*nz when there is no
  1275. padding.
  1276. */
  1277. #define STARPU_TENSOR_GET_LDT(interface) (((struct starpu_tensor_interface *)(interface))->ldt)
  1278. /**
  1279. Return the size of the elements of the tensor designated by
  1280. \p interface.
  1281. */
  1282. #define STARPU_TENSOR_GET_ELEMSIZE(interface) (((struct starpu_tensor_interface *)(interface))->elemsize)
  1283. #endif
  1284. /** @} */
  1285. /**
  1286. @name Vector Data Interface
  1287. @{
  1288. */
  1289. extern struct starpu_data_interface_ops starpu_interface_vector_ops;
  1290. /**
  1291. */
  1292. struct starpu_vector_interface
  1293. {
  1294. enum starpu_data_interface_id id; /**< Identifier of the interface */
  1295. uintptr_t ptr; /**< local pointer of the vector */
  1296. uintptr_t dev_handle; /**< device handle of the vector. */
  1297. size_t offset; /**< offset in the vector */
  1298. uint32_t nx; /**< number of elements on the x-axis of the vector */
  1299. size_t elemsize; /**< size of the elements of the vector */
  1300. uint32_t slice_base; /**< vector slice base, used by the StarPU OpenMP runtime support */
  1301. size_t allocsize; /**< size actually currently allocated */
  1302. };
  1303. /**
  1304. Register the \p nx \p elemsize-byte elements pointed to by \p ptr and initialize \p handle to represent it.
  1305. Here an example of how to use the function.
  1306. \code{.c}
  1307. float vector[NX];
  1308. starpu_data_handle_t vector_handle;
  1309. starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
  1310. \endcode
  1311. */
  1312. void starpu_vector_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize);
  1313. /**
  1314. Similar to starpu_matrix_data_register, but additionally specifies which
  1315. allocation size should be used instead of the initial nx*elemsize.
  1316. */
  1317. void starpu_vector_data_register_allocsize(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize, size_t allocsize);
  1318. /**
  1319. Register into the \p handle that to store data on node \p node it should use the
  1320. buffer located at \p ptr, or device handle \p dev_handle and offset \p offset
  1321. (for OpenCL, notably)
  1322. */
  1323. void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
  1324. /**
  1325. Return the number of elements registered into the array designated by \p handle.
  1326. */
  1327. uint32_t starpu_vector_get_nx(starpu_data_handle_t handle);
  1328. /**
  1329. Return the size of each element of the array designated by \p handle.
  1330. */
  1331. size_t starpu_vector_get_elemsize(starpu_data_handle_t handle);
  1332. /**
  1333. Return the allocated size of the array designated by \p handle.
  1334. */
  1335. size_t starpu_vector_get_allocsize(starpu_data_handle_t handle);
  1336. /**
  1337. Return the local pointer associated with \p handle.
  1338. */
  1339. uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle);
  1340. #if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG)
  1341. #define STARPU_VECTOR_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_vector_interface *)(interface))->id) == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector.")
  1342. #define STARPU_VECTOR_GET_PTR(interface) ({ STARPU_VECTOR_CHECK(interface); (((struct starpu_vector_interface *)(interface))->ptr); })
  1343. #define STARPU_VECTOR_GET_DEV_HANDLE(interface) ({ STARPU_VECTOR_CHECK(interface); (((struct starpu_vector_interface *)(interface))->dev_handle); })
  1344. #define STARPU_VECTOR_GET_OFFSET(interface) ({ STARPU_VECTOR_CHECK(interface); (((struct starpu_vector_interface *)(interface))->offset); })
  1345. #define STARPU_VECTOR_GET_NX(interface) ({ STARPU_VECTOR_CHECK(interface); (((struct starpu_vector_interface *)(interface))->nx); })
  1346. #define STARPU_VECTOR_GET_ELEMSIZE(interface) ({ STARPU_VECTOR_CHECK(interface); (((struct starpu_vector_interface *)(interface))->elemsize); })
  1347. #define STARPU_VECTOR_GET_ALLOCSIZE(interface) ({ STARPU_VECTOR_CHECK(interface); (((struct starpu_vector_interface *)(interface))->allocsize); })
  1348. #define STARPU_VECTOR_GET_SLICE_BASE(interface) ({ STARPU_VECTOR_CHECK(interface); (((struct starpu_vector_interface *)(interface))->slice_base); })
  1349. #else
  1350. /**
  1351. Return a pointer to the array designated by \p interface, valid on
  1352. CPUs and CUDA only. For OpenCL, the device handle and offset need to
  1353. be used instead.
  1354. */
  1355. #define STARPU_VECTOR_GET_PTR(interface) (((struct starpu_vector_interface *)(interface))->ptr)
  1356. /**
  1357. Return a device handle for the array designated by \p interface,
  1358. to be used with OpenCL. the offset returned by ::STARPU_VECTOR_GET_OFFSET has to be used in
  1359. addition to this.
  1360. */
  1361. #define STARPU_VECTOR_GET_DEV_HANDLE(interface) (((struct starpu_vector_interface *)(interface))->dev_handle)
  1362. /**
  1363. Return the offset in the array designated by \p interface, to be
  1364. used with the device handle.
  1365. */
  1366. #define STARPU_VECTOR_GET_OFFSET(interface) (((struct starpu_vector_interface *)(interface))->offset)
  1367. /**
  1368. Return the number of elements registered into the array
  1369. designated by \p interface.
  1370. */
  1371. #define STARPU_VECTOR_GET_NX(interface) (((struct starpu_vector_interface *)(interface))->nx)
  1372. /**
  1373. Return the size of each element of the array designated by
  1374. \p interface.
  1375. */
  1376. #define STARPU_VECTOR_GET_ELEMSIZE(interface) (((struct starpu_vector_interface *)(interface))->elemsize)
  1377. /**
  1378. Return the size of each element of the array designated by
  1379. \p interface.
  1380. */
  1381. #define STARPU_VECTOR_GET_ALLOCSIZE(interface) (((struct starpu_vector_interface *)(interface))->allocsize)
  1382. /**
  1383. Return the OpenMP slice base annotation of each element of the array designated by
  1384. \p interface.
  1385. */
  1386. #define STARPU_VECTOR_GET_SLICE_BASE(interface) (((struct starpu_vector_interface *)(interface))->slice_base)
  1387. #endif
  1388. /**
  1389. Set the number of elements registered into the array designated by \p
  1390. interface.
  1391. */
  1392. #define STARPU_VECTOR_SET_NX(interface, newnx) do { \
  1393. STARPU_VECTOR_CHECK(interface); \
  1394. (((struct starpu_vector_interface *)(interface))->nx) = (newnx); \
  1395. } while(0)
  1396. /** @} */
  1397. /**
  1398. @name Variable Data Interface
  1399. @{
  1400. */
  1401. extern struct starpu_data_interface_ops starpu_interface_variable_ops;
  1402. /**
  1403. Variable interface for a single data (not a vector, a matrix, a list,
  1404. ...)
  1405. */
  1406. struct starpu_variable_interface
  1407. {
  1408. enum starpu_data_interface_id id; /**< Identifier of the interface */
  1409. uintptr_t ptr; /**< local pointer of the variable */
  1410. uintptr_t dev_handle; /**< device handle of the variable. */
  1411. size_t offset; /**< offset in the variable */
  1412. size_t elemsize; /**< size of the variable */
  1413. };
  1414. /**
  1415. Register the \p size byte element pointed to by \p ptr, which is
  1416. typically a scalar, and initialize \p handle to represent this data item.
  1417. Here an example of how to use the function.
  1418. \code{.c}
  1419. float var = 42.0;
  1420. starpu_data_handle_t var_handle;
  1421. starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
  1422. \endcode
  1423. */
  1424. void starpu_variable_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, size_t size);
  1425. /**
  1426. Register into the \p handle that to store data on node \p node it should use the
  1427. buffer located at \p ptr, or device handle \p dev_handle and offset \p offset
  1428. (for OpenCL, notably)
  1429. */
  1430. void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
  1431. /**
  1432. Return the size of the variable designated by \p handle.
  1433. */
  1434. size_t starpu_variable_get_elemsize(starpu_data_handle_t handle);
  1435. /**
  1436. Return a pointer to the variable designated by \p handle.
  1437. */
  1438. uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle);
  1439. #if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG)
  1440. #define STARPU_VARIABLE_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_variable_interface *)(interface))->id) == STARPU_VARIABLE_INTERFACE_ID, "Error. The given data is not a variable.")
  1441. #define STARPU_VARIABLE_GET_PTR(interface) ({ STARPU_VARIABLE_CHECK(interface); (((struct starpu_variable_interface *)(interface))->ptr) ; })
  1442. #define STARPU_VARIABLE_GET_OFFSET(interface) ({ STARPU_VARIABLE_CHECK(interface); (((struct starpu_variable_interface *)(interface))->offset) ; })
  1443. #define STARPU_VARIABLE_GET_ELEMSIZE(interface) ({ STARPU_VARIABLE_CHECK(interface); (((struct starpu_variable_interface *)(interface))->elemsize) ; })
  1444. #define STARPU_VARIABLE_GET_DEV_HANDLE(interface) ({ STARPU_VARIABLE_CHECK(interface); (((struct starpu_variable_interface *)(interface))->ptr) ; })
  1445. #else
  1446. /**
  1447. Return a pointer to the variable designated by \p interface.
  1448. */
  1449. #define STARPU_VARIABLE_GET_PTR(interface) (((struct starpu_variable_interface *)(interface))->ptr)
  1450. /**
  1451. Return the offset in the variable designated by \p interface, to
  1452. be used with the device handle.
  1453. */
  1454. #define STARPU_VARIABLE_GET_OFFSET(interface) (((struct starpu_variable_interface *)(interface))->offset)
  1455. /**
  1456. Return the size of the variable designated by \p interface.
  1457. */
  1458. #define STARPU_VARIABLE_GET_ELEMSIZE(interface) (((struct starpu_variable_interface *)(interface))->elemsize)
  1459. /**
  1460. Return a device handle for the variable designated by
  1461. \p interface, to be used with OpenCL. The offset returned by
  1462. ::STARPU_VARIABLE_GET_OFFSET has to be
  1463. used in addition to this.
  1464. */
  1465. #define STARPU_VARIABLE_GET_DEV_HANDLE(interface) (((struct starpu_variable_interface *)(interface))->ptr)
  1466. #endif
  1467. /** @} */
  1468. /**
  1469. @name Void Data Interface
  1470. @{
  1471. */
  1472. extern struct starpu_data_interface_ops starpu_interface_void_ops;
  1473. /**
  1474. Register a void interface. There is no data really associated
  1475. to that interface, but it may be used as a synchronization mechanism.
  1476. It also permits to express an abstract piece of data that is managed
  1477. by the application internally: this makes it possible to forbid the
  1478. concurrent execution of different tasks accessing the same <c>void</c>
  1479. data in read-write concurrently.
  1480. */
  1481. void starpu_void_data_register(starpu_data_handle_t *handle);
  1482. /** @} */
  1483. /**
  1484. @name CSR Data Interface
  1485. @{
  1486. */
  1487. extern struct starpu_data_interface_ops starpu_interface_csr_ops;
  1488. /**
  1489. CSR interface for sparse matrices (compressed sparse row
  1490. representation)
  1491. */
  1492. struct starpu_csr_interface
  1493. {
  1494. enum starpu_data_interface_id id; /**< Identifier of the interface */
  1495. uint32_t nnz; /**< number of non-zero entries */
  1496. uint32_t nrow; /**< number of rows */
  1497. uintptr_t nzval; /**< non-zero values */
  1498. uint32_t *colind; /**< position of non-zero entries on the row */
  1499. uint32_t *rowptr; /**< index (in nzval) of the first entry of the row */
  1500. uint32_t firstentry; /**< k for k-based indexing (0 or 1 usually). also useful when partitionning the matrix. */
  1501. size_t elemsize; /**< size of the elements of the matrix */
  1502. };
  1503. /**
  1504. Register a CSR (Compressed Sparse Row Representation) sparse matrix.
  1505. */
  1506. void starpu_csr_data_register(starpu_data_handle_t *handle, int home_node, uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize);
  1507. /**
  1508. Return the number of non-zero values in the matrix designated
  1509. by \p handle.
  1510. */
  1511. uint32_t starpu_csr_get_nnz(starpu_data_handle_t handle);
  1512. /**
  1513. Return the size of the row pointer array of the matrix
  1514. designated by \p handle.
  1515. */
  1516. uint32_t starpu_csr_get_nrow(starpu_data_handle_t handle);
  1517. /**
  1518. Return the index at which all arrays (the column indexes, the
  1519. row pointers...) of the matrix designated by \p handle.
  1520. */
  1521. uint32_t starpu_csr_get_firstentry(starpu_data_handle_t handle);
  1522. /**
  1523. Return a local pointer to the non-zero values of the matrix
  1524. designated by \p handle.
  1525. */
  1526. uintptr_t starpu_csr_get_local_nzval(starpu_data_handle_t handle);
  1527. /**
  1528. Return a local pointer to the column index of the matrix
  1529. designated by \p handle.
  1530. */
  1531. uint32_t *starpu_csr_get_local_colind(starpu_data_handle_t handle);
  1532. /**
  1533. Return a local pointer to the row pointer array of the matrix
  1534. designated by \p handle.
  1535. */
  1536. uint32_t *starpu_csr_get_local_rowptr(starpu_data_handle_t handle);
  1537. /**
  1538. Return the size of the elements registered into the matrix
  1539. designated by \p handle.
  1540. */
  1541. size_t starpu_csr_get_elemsize(starpu_data_handle_t handle);
  1542. /**
  1543. Return the number of non-zero values in the matrix designated
  1544. by \p interface.
  1545. */
  1546. #define STARPU_CSR_GET_NNZ(interface) (((struct starpu_csr_interface *)(interface))->nnz)
  1547. /**
  1548. Return the size of the row pointer array of the matrix
  1549. designated by \p interface.
  1550. */
  1551. #define STARPU_CSR_GET_NROW(interface) (((struct starpu_csr_interface *)(interface))->nrow)
  1552. /**
  1553. Return a pointer to the non-zero values of the matrix
  1554. designated by \p interface.
  1555. */
  1556. #define STARPU_CSR_GET_NZVAL(interface) (((struct starpu_csr_interface *)(interface))->nzval)
  1557. /**
  1558. Return a device handle for the array of non-zero values in the
  1559. matrix designated by \p interface. The offset returned by ::STARPU_CSR_GET_OFFSET
  1560. has to used in addition to this.
  1561. */
  1562. #define STARPU_CSR_GET_NZVAL_DEV_HANDLE(interface) (((struct starpu_csr_interface *)(interface))->nnz)
  1563. /**
  1564. Return a pointer to the column index of the matrix designated
  1565. by \p interface.
  1566. */
  1567. #define STARPU_CSR_GET_COLIND(interface) (((struct starpu_csr_interface *)(interface))->colind)
  1568. /**
  1569. Return a device handle for the column index of the matrix
  1570. designated by \p interface. The offset returned by ::STARPU_CSR_GET_OFFSET has to be used in
  1571. addition to this.
  1572. */
  1573. #define STARPU_CSR_GET_COLIND_DEV_HANDLE(interface) (((struct starpu_csr_interface *)(interface))->colind)
  1574. /**
  1575. Return a pointer to the row pointer array of the matrix
  1576. designated by \p interface.
  1577. */
  1578. #define STARPU_CSR_GET_ROWPTR(interface) (((struct starpu_csr_interface *)(interface))->rowptr)
  1579. /**
  1580. Return a device handle for the row pointer array of the matrix
  1581. designated by \p interface. The offset returned by ::STARPU_CSR_GET_OFFSET has to be used in
  1582. addition to this.
  1583. */
  1584. #define STARPU_CSR_GET_ROWPTR_DEV_HANDLE(interface) (((struct starpu_csr_interface *)(interface))->rowptr)
  1585. /**
  1586. Return the offset in the arrays (colind, rowptr, nzval) of the
  1587. matrix designated by \p interface, to be used with the device handles.
  1588. */
  1589. #define STARPU_CSR_GET_OFFSET 0
  1590. /**
  1591. Return the index at which all arrays (the column indexes, the
  1592. row pointers...) of the \p interface start.
  1593. */
  1594. #define STARPU_CSR_GET_FIRSTENTRY(interface) (((struct starpu_csr_interface *)(interface))->firstentry)
  1595. /**
  1596. Return the size of the elements registered into the matrix
  1597. designated by \p interface.
  1598. */
  1599. #define STARPU_CSR_GET_ELEMSIZE(interface) (((struct starpu_csr_interface *)(interface))->elemsize)
  1600. /** @} */
  1601. /**
  1602. @name BCSR Data Interface
  1603. @{
  1604. */
  1605. extern struct starpu_data_interface_ops starpu_interface_bcsr_ops;
  1606. /**
  1607. BCSR interface for sparse matrices (blocked compressed sparse
  1608. row representation)
  1609. Note: when a BCSR matrix is partitioned, nzval, colind, and rowptr point into
  1610. the corresponding father arrays. The rowptr content is thus the same as the
  1611. father's. Firstentry is used to offset this so it becomes valid for the child
  1612. arrays.
  1613. */
  1614. struct starpu_bcsr_interface
  1615. {
  1616. enum starpu_data_interface_id id; /**< Identifier of the interface */
  1617. uint32_t nnz; /**< number of non-zero BLOCKS */
  1618. uint32_t nrow; /**< number of rows (in terms of BLOCKS) */
  1619. uintptr_t nzval; /**< non-zero values: nnz blocks of r*c elements */
  1620. uint32_t *colind; /**< array of nnz elements, colind[i] is the block-column index for block i in nzval */
  1621. uint32_t *rowptr; /**< array of nrow+1
  1622. * elements, rowptr[i] is
  1623. * the block-index (in
  1624. * nzval) of the first block
  1625. * of row i. By convention,
  1626. * rowptr[nrow] is the
  1627. * number of blocks, this
  1628. * allows an easier access
  1629. * of the matrix's elements
  1630. * for the kernels. */
  1631. uint32_t firstentry; /**< k for k-based indexing (0 or 1 usually). Also useful when partitionning the matrix. */
  1632. uint32_t r; /**< height of the blocks */
  1633. uint32_t c; /**< width of the blocks */
  1634. size_t elemsize; /**< size of the elements of the matrix */
  1635. };
  1636. /**
  1637. This variant of starpu_data_register() uses the BCSR (Blocked
  1638. Compressed Sparse Row Representation) sparse matrix interface.
  1639. Register the sparse matrix made of \p nnz non-zero blocks of elements of
  1640. size \p elemsize stored in \p nzval and initializes \p handle to represent it.
  1641. Blocks have size \p r * \p c. \p nrow is the number of rows (in terms of
  1642. blocks), \p colind is an array of nnz elements, colind[i] is the block-column index for block i in \p nzval,
  1643. \p rowptr is an array of nrow+1 elements, rowptr[i] is the block-index (in \p nzval) of the first block of row i. By convention, rowptr[nrow] is the number of blocks, this allows an easier access of the matrix's elements for the kernels.
  1644. \p firstentry is the index of the first entry of the given arrays
  1645. (usually 0 or 1).
  1646. Here an example with the following matrix:
  1647. \code | 0 1 0 0 | \endcode
  1648. \code | 2 3 0 0 | \endcode
  1649. \code | 4 5 8 9 | \endcode
  1650. \code | 6 7 10 11 | \endcode
  1651. \code nzval = [0, 1, 2, 3] ++ [4, 5, 6, 7] ++ [8, 9, 10, 11] \endcode
  1652. \code colind = [0, 0, 1] \endcode
  1653. \code rowptr = [0, 1, 3] \endcode
  1654. \code r = c = 2 \endcode
  1655. which translates into the following code
  1656. \code{.c}
  1657. int R = 2; // Size of the blocks
  1658. int C = 2;
  1659. int NROWS = 2;
  1660. int NNZ_BLOCKS = 3; // out of 4
  1661. int NZVAL_SIZE = (R*C*NNZ_BLOCKS);
  1662. int nzval[NZVAL_SIZE] =
  1663. {
  1664. 0, 1, 2, 3, // First block
  1665. 4, 5, 6, 7, // Second block
  1666. 8, 9, 10, 11 // Third block
  1667. };
  1668. uint32_t colind[NNZ_BLOCKS] =
  1669. {
  1670. 0, // block-column index for first block in nzval
  1671. 0, // block-column index for second block in nzval
  1672. 1 // block-column index for third block in nzval
  1673. };
  1674. uint32_t rowptr[NROWS+1] =
  1675. {
  1676. 0, // block-index in nzval of the first block of the first row.
  1677. 1, // block-index in nzval of the first block of the second row.
  1678. NNZ_BLOCKS // number of blocks, to allow an easier element's access for the kernels
  1679. };
  1680. starpu_data_handle_t bcsr_handle;
  1681. starpu_bcsr_data_register(&bcsr_handle,
  1682. STARPU_MAIN_RAM,
  1683. NNZ_BLOCKS,
  1684. NROWS,
  1685. (uintptr_t) nzval,
  1686. colind,
  1687. rowptr,
  1688. 0, // firstentry
  1689. R,
  1690. C,
  1691. sizeof(nzval[0]));
  1692. \endcode
  1693. */
  1694. void starpu_bcsr_data_register(starpu_data_handle_t *handle, int home_node, uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, uint32_t r, uint32_t c, size_t elemsize);
  1695. /**
  1696. Return the number of non-zero elements in the matrix designated
  1697. by \p handle.
  1698. */
  1699. uint32_t starpu_bcsr_get_nnz(starpu_data_handle_t handle);
  1700. /**
  1701. Return the number of rows (in terms of blocks of size r*c) in
  1702. the matrix designated by \p handle.
  1703. */
  1704. uint32_t starpu_bcsr_get_nrow(starpu_data_handle_t handle);
  1705. /**
  1706. Return the index at which all arrays (the column indexes, the
  1707. row pointers...) of the matrix desginated by \p handle.
  1708. */
  1709. uint32_t starpu_bcsr_get_firstentry(starpu_data_handle_t handle);
  1710. /**
  1711. Return a pointer to the non-zero values of the matrix
  1712. designated by \p handle.
  1713. */
  1714. uintptr_t starpu_bcsr_get_local_nzval(starpu_data_handle_t handle);
  1715. /**
  1716. Return a pointer to the column index, which holds the positions
  1717. of the non-zero entries in the matrix designated by \p handle.
  1718. */
  1719. uint32_t *starpu_bcsr_get_local_colind(starpu_data_handle_t handle);
  1720. /**
  1721. Return the row pointer array of the matrix designated by
  1722. \p handle.
  1723. */
  1724. uint32_t *starpu_bcsr_get_local_rowptr(starpu_data_handle_t handle);
  1725. /**
  1726. Return the number of rows in a block.
  1727. */
  1728. uint32_t starpu_bcsr_get_r(starpu_data_handle_t handle);
  1729. /**
  1730. Return the number of columns in a block.
  1731. */
  1732. uint32_t starpu_bcsr_get_c(starpu_data_handle_t handle);
  1733. /**
  1734. Return the size of the elements in the matrix designated by
  1735. \p handle.
  1736. */
  1737. size_t starpu_bcsr_get_elemsize(starpu_data_handle_t handle);
  1738. /**
  1739. Return the number of non-zero values in the matrix designated
  1740. by \p interface.
  1741. */
  1742. #define STARPU_BCSR_GET_NNZ(interface) (((struct starpu_bcsr_interface *)(interface))->nnz)
  1743. /**
  1744. Return the number of block rows in the matrix designated
  1745. by \p interface.
  1746. */
  1747. #define STARPU_BCSR_GET_NROW(interface) (((struct starpu_bcsr_interface *)(interface))->nrow)
  1748. /**
  1749. Return a pointer to the non-zero values of the matrix
  1750. designated by \p interface.
  1751. */
  1752. #define STARPU_BCSR_GET_NZVAL(interface) (((struct starpu_bcsr_interface *)(interface))->nzval)
  1753. /**
  1754. Return a device handle for the array of non-zero values in the
  1755. matrix designated by \p interface. The offset returned by ::STARPU_BCSR_GET_OFFSET has to be
  1756. used in addition to this.
  1757. */
  1758. #define STARPU_BCSR_GET_NZVAL_DEV_HANDLE(interface) (((struct starpu_bcsr_interface *)(interface))->nnz)
  1759. /**
  1760. Return a pointer to the column index of the matrix designated
  1761. by \p interface.
  1762. */
  1763. #define STARPU_BCSR_GET_COLIND(interface) (((struct starpu_bcsr_interface *)(interface))->colind)
  1764. /**
  1765. Return a device handle for the column index of the matrix
  1766. designated by \p interface. The offset returned by ::STARPU_BCSR_GET_OFFSET has to be used in
  1767. addition to this.
  1768. */
  1769. #define STARPU_BCSR_GET_COLIND_DEV_HANDLE(interface) (((struct starpu_bcsr_interface *)(interface))->colind)
  1770. /**
  1771. Return a pointer to the row pointer array of the matrix
  1772. designated by \p interface.
  1773. */
  1774. #define STARPU_BCSR_GET_ROWPTR(interface) (((struct starpu_bcsr_interface *)(interface))->rowptr)
  1775. /**
  1776. Return a device handle for the row pointer array of the matrix
  1777. designated by \p interface. The offset returned by ::STARPU_BCSR_GET_OFFSET has to be used in
  1778. addition to this.
  1779. */
  1780. #define STARPU_BCSR_GET_ROWPTR_DEV_HANDLE(interface) (((struct starpu_bcsr_interface *)(interface))->rowptr)
  1781. /**
  1782. Return the base of the indexing (0 or 1 usually) in the matrix designated
  1783. by \p interface.
  1784. */
  1785. #define STARPU_BCSR_GET_FIRSTENTRY(interface) (((struct starpu_bcsr_interface *)(interface))->firstentry)
  1786. /**
  1787. Return the height of blocks in the matrix designated
  1788. by \p interface.
  1789. */
  1790. #define STARPU_BCSR_GET_R(interface) (((struct starpu_bcsr_interface *)(interface))->r)
  1791. /**
  1792. Return the width of blocks in the matrix designated
  1793. by \p interface.
  1794. */
  1795. #define STARPU_BCSR_GET_C(interface) (((struct starpu_bcsr_interface *)(interface))->c)
  1796. /**
  1797. Return the size of elements in the matrix designated by \p interface.
  1798. */
  1799. #define STARPU_BCSR_GET_ELEMSIZE(interface) (((struct starpu_bcsr_interface *)(interface))->elemsize)
  1800. /**
  1801. Return the offset in the arrays (coling, rowptr, nzval) of the
  1802. matrix designated by \p interface, to be used with the device handles.
  1803. */
  1804. #define STARPU_BCSR_GET_OFFSET 0
  1805. /** @} */
  1806. /**
  1807. @name Multiformat Data Interface
  1808. @{
  1809. */
  1810. /**
  1811. Multiformat operations
  1812. */
  1813. struct starpu_multiformat_data_interface_ops
  1814. {
  1815. size_t cpu_elemsize; /**< size of each element on CPUs */
  1816. size_t opencl_elemsize; /**< size of each element on OpenCL devices */
  1817. struct starpu_codelet *cpu_to_opencl_cl; /**< pointer to a codelet which converts from CPU to OpenCL */
  1818. struct starpu_codelet *opencl_to_cpu_cl; /**< pointer to a codelet which converts from OpenCL to CPU */
  1819. size_t cuda_elemsize; /**< size of each element on CUDA devices */
  1820. struct starpu_codelet *cpu_to_cuda_cl; /**< pointer to a codelet which converts from CPU to CUDA */
  1821. struct starpu_codelet *cuda_to_cpu_cl; /**< pointer to a codelet which converts from CUDA to CPU */
  1822. };
  1823. struct starpu_multiformat_interface
  1824. {
  1825. enum starpu_data_interface_id id;
  1826. void *cpu_ptr;
  1827. void *cuda_ptr;
  1828. void *opencl_ptr;
  1829. uint32_t nx;
  1830. struct starpu_multiformat_data_interface_ops *ops;
  1831. };
  1832. /**
  1833. Register a piece of data that can be represented in different
  1834. ways, depending upon the processing unit that manipulates it. It
  1835. allows the programmer, for instance, to use an array of structures
  1836. when working on a CPU, and a structure of arrays when working on a
  1837. GPU. \p nobjects is the number of elements in the data. \p format_ops
  1838. describes the format.
  1839. */
  1840. void starpu_multiformat_data_register(starpu_data_handle_t *handle, int home_node, void *ptr, uint32_t nobjects, struct starpu_multiformat_data_interface_ops *format_ops);
  1841. /**
  1842. Return the local pointer to the data with CPU format.
  1843. */
  1844. #define STARPU_MULTIFORMAT_GET_CPU_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->cpu_ptr)
  1845. /**
  1846. Return the local pointer to the data with CUDA format.
  1847. */
  1848. #define STARPU_MULTIFORMAT_GET_CUDA_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->cuda_ptr)
  1849. /**
  1850. Return the local pointer to the data with OpenCL format.
  1851. */
  1852. #define STARPU_MULTIFORMAT_GET_OPENCL_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->opencl_ptr)
  1853. /**
  1854. Return the number of elements in the data.
  1855. */
  1856. #define STARPU_MULTIFORMAT_GET_NX(interface) (((struct starpu_multiformat_interface *)(interface))->nx)
  1857. /** @} */
  1858. /** @} */
  1859. #ifdef __cplusplus
  1860. }
  1861. #endif
  1862. #endif /* __STARPU_DATA_INTERFACES_H__ */