starpu_data.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2019 Université de Bordeaux
  4. * Copyright (C) 2011-2013,2016,2017 Inria
  5. * Copyright (C) 2010-2015,2017,2019 CNRS
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #ifndef __STARPU_DATA_H__
  19. #define __STARPU_DATA_H__
  20. /** @defgroup API_Data_Management Data Management
  21. @brief Data management facilities provided by StarPU. We show how
  22. to use existing data interfaces in \ref API_Data_Interfaces, but
  23. developers can design their own data interfaces if required.
  24. @{
  25. */
  26. #include <starpu.h>
  27. #ifdef __cplusplus
  28. extern "C"
  29. {
  30. #endif
  31. /**
  32. This macro is used when the RAM memory node is specified.
  33. */
  34. #define STARPU_MAIN_RAM 0
  35. struct _starpu_data_state;
  36. /**
  37. StarPU uses ::starpu_data_handle_t as an opaque handle to manage a
  38. piece of data. Once a piece of data has been registered to StarPU,
  39. it is associated to a ::starpu_data_handle_t which keeps track of
  40. the state of the piece of data over the entire machine, so that we
  41. can maintain data consistency and locate data replicates for
  42. instance.
  43. */
  44. typedef struct _starpu_data_state* starpu_data_handle_t;
  45. /**
  46. Describe a StarPU data access mode
  47. Note: when adding a flag here, update
  48. _starpu_detect_implicit_data_deps_with_handle
  49. Note: other STARPU_* values in include/starpu_task_util.h
  50. */
  51. enum starpu_data_access_mode
  52. {
  53. STARPU_NONE=0, /**< todo */
  54. STARPU_R=(1<<0), /**< read-only mode */
  55. STARPU_W=(1<<1), /**< write-only mode */
  56. STARPU_RW=(STARPU_R|STARPU_W), /**< read-write mode. Equivalent to ::STARPU_R|::STARPU_W */
  57. STARPU_SCRATCH=(1<<2), /**< A temporary buffer is allocated
  58. for the task, but StarPU does not
  59. enforce data consistency---i.e. each
  60. device has its own buffer,
  61. independently from each other (even
  62. for CPUs), and no data transfer is
  63. ever performed. This is useful for
  64. temporary variables to avoid
  65. allocating/freeing buffers inside
  66. each task. Currently, no behavior is
  67. defined concerning the relation with
  68. the ::STARPU_R and ::STARPU_W modes
  69. and the value provided at
  70. registration --- i.e., the value of
  71. the scratch buffer is undefined at
  72. entry of the codelet function. It
  73. is being considered for future
  74. extensions at least to define the
  75. initial value. For now, data to be
  76. used in ::STARPU_SCRATCH mode should
  77. be registered with node -1 and a
  78. <c>NULL</c> pointer, since the value
  79. of the provided buffer is simply
  80. ignored for now.
  81. */
  82. STARPU_REDUX=(1<<3), /**< todo */
  83. STARPU_COMMUTE=(1<<4), /**< ::STARPU_COMMUTE can be passed
  84. along ::STARPU_W or ::STARPU_RW to
  85. express that StarPU can let tasks
  86. commute, which is useful e.g. when
  87. bringing a contribution into some
  88. data, which can be done in any order
  89. (but still require sequential
  90. consistency against reads or
  91. non-commutative writes).
  92. */
  93. STARPU_SSEND=(1<<5), /**< used in starpu_mpi_insert_task() to
  94. specify the data has to be sent using
  95. a synchronous and non-blocking mode
  96. (see starpu_mpi_issend())
  97. */
  98. STARPU_LOCALITY=(1<<6), /**< used to tell the scheduler which
  99. data is the most important for the
  100. task, and should thus be used to
  101. try to group tasks on the same core
  102. or cache, etc. For now only the ws
  103. and lws schedulers take this flag
  104. into account, and only when rebuild
  105. with \c USE_LOCALITY flag defined in
  106. the
  107. src/sched_policies/work_stealing_policy.c
  108. source code.
  109. */
  110. STARPU_ACCESS_MODE_MAX=(1<<7) /**< todo */
  111. };
  112. /**
  113. Describe a data handle along with an access mode.
  114. */
  115. struct starpu_data_descr
  116. {
  117. starpu_data_handle_t handle; /**< data */
  118. enum starpu_data_access_mode mode; /**< access mode */
  119. };
  120. struct starpu_data_interface_ops;
  121. /** Set the name of the data, to be shown in various profiling tools. */
  122. void starpu_data_set_name(starpu_data_handle_t handle, const char *name);
  123. /**
  124. Set the coordinates of the data, to be shown in various profiling
  125. tools. \p dimensions is the size of the \p dims array. This can be
  126. for instance the tile coordinates within a big matrix.
  127. */
  128. void starpu_data_set_coordinates_array(starpu_data_handle_t handle, int dimensions, int dims[]);
  129. /**
  130. Set the coordinates of the data, to be shown in various profiling
  131. tools. \p dimensions is the number of subsequent \c int parameters.
  132. This can be for instance the tile coordinates within a big matrix.
  133. */
  134. void starpu_data_set_coordinates(starpu_data_handle_t handle, unsigned dimensions, ...);
  135. /**
  136. Unregister a data \p handle from StarPU. If the data was
  137. automatically allocated by StarPU because the home node was -1, all
  138. automatically allocated buffers are freed. Otherwise, a valid copy
  139. of the data is put back into the home node in the buffer that was
  140. initially registered. Using a data handle that has been
  141. unregistered from StarPU results in an undefined behaviour. In case
  142. we do not need to update the value of the data in the home node, we
  143. can use the function starpu_data_unregister_no_coherency() instead.
  144. */
  145. void starpu_data_unregister(starpu_data_handle_t handle);
  146. /**
  147. Similar to starpu_data_unregister(), except that StarPU does not
  148. put back a valid copy into the home node, in the buffer that was
  149. initially registered.
  150. */
  151. void starpu_data_unregister_no_coherency(starpu_data_handle_t handle);
  152. /**
  153. Destroy the data \p handle once it is no longer needed by any
  154. submitted task. No coherency is assumed.
  155. */
  156. void starpu_data_unregister_submit(starpu_data_handle_t handle);
  157. /**
  158. Destroy all replicates of the data \p handle immediately. After
  159. data invalidation, the first access to \p handle must be performed
  160. in ::STARPU_W mode. Accessing an invalidated data in ::STARPU_R
  161. mode results in undefined behaviour.
  162. */
  163. void starpu_data_invalidate(starpu_data_handle_t handle);
  164. /**
  165. Submit invalidation of the data \p handle after completion of
  166. previously submitted tasks.
  167. */
  168. void starpu_data_invalidate_submit(starpu_data_handle_t handle);
  169. /**
  170. Specify that the data \p handle can be discarded without impacting
  171. the application.
  172. */
  173. void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important);
  174. /** @name Access registered data from the application
  175. * @{
  176. */
  177. /**
  178. This macro can be used to acquire data, but not require it to be
  179. available on a given node, only enforce R/W dependencies. This can
  180. for instance be used to wait for tasks which produce the data, but
  181. without requesting a fetch to the main memory.
  182. */
  183. #define STARPU_ACQUIRE_NO_NODE -1
  184. /**
  185. Similar to ::STARPU_ACQUIRE_NO_NODE, but will lock the data on all
  186. nodes, preventing them from being evicted for instance. This is
  187. mostly useful inside StarPU only.
  188. */
  189. #define STARPU_ACQUIRE_NO_NODE_LOCK_ALL -2
  190. /**
  191. The application must call this function prior to accessing
  192. registered data from main memory outside tasks. StarPU ensures that
  193. the application will get an up-to-date copy of \p handle in main
  194. memory located where the data was originally registered, and that
  195. all concurrent accesses (e.g. from tasks) will be consistent with
  196. the access mode specified with \p mode. starpu_data_release() must
  197. be called once the application no longer needs to access the piece
  198. of data. Note that implicit data dependencies are also enforced by
  199. starpu_data_acquire(), i.e. starpu_data_acquire() will wait for all
  200. tasks scheduled to work on the data, unless they have been disabled
  201. explictly by calling
  202. starpu_data_set_default_sequential_consistency_flag() or
  203. starpu_data_set_sequential_consistency_flag().
  204. starpu_data_acquire() is a blocking call, so that it cannot be
  205. called from tasks or from their callbacks (in that case,
  206. starpu_data_acquire() returns <c>-EDEADLK</c>). Upon successful
  207. completion, this function returns 0.
  208. */
  209. int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
  210. /**
  211. Similar to starpu_data_acquire(), except that the data will be
  212. available on the given memory node instead of main memory.
  213. ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can
  214. be used instead of an explicit node number.
  215. */
  216. int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode);
  217. /**
  218. Asynchronous equivalent of starpu_data_acquire(). When the data
  219. specified in \p handle is available in the access \p mode, the \p
  220. callback function is executed. The application may access
  221. the requested data during the execution of \p callback. The \p callback
  222. function must call starpu_data_release() once the application no longer
  223. needs to access the piece of data. Note that implicit data
  224. dependencies are also enforced by starpu_data_acquire_cb() in case they
  225. are not disabled. Contrary to starpu_data_acquire(), this function is
  226. non-blocking and may be called from task callbacks. Upon successful
  227. completion, this function returns 0.
  228. */
  229. int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg);
  230. /**
  231. Similar to starpu_data_acquire_cb(), except that the
  232. data will be available on the given memory node instead of main
  233. memory.
  234. ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be
  235. used instead of an explicit node number.
  236. */
  237. int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg);
  238. /**
  239. Similar to starpu_data_acquire_cb() with the possibility of
  240. enabling or disabling data dependencies.
  241. When the data specified in \p handle is available in the access
  242. \p mode, the \p callback function is executed. The application may access
  243. the requested data during the execution of this \p callback. The \p callback
  244. function must call starpu_data_release() once the application no longer
  245. needs to access the piece of data. Note that implicit data
  246. dependencies are also enforced by starpu_data_acquire_cb_sequential_consistency() in case they
  247. are not disabled specifically for the given \p handle or by the parameter \p sequential_consistency.
  248. Similarly to starpu_data_acquire_cb(), this function is
  249. non-blocking and may be called from task callbacks. Upon successful
  250. completion, this function returns 0.
  251. */
  252. int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency);
  253. /**
  254. Similar to starpu_data_acquire_cb_sequential_consistency(), except that the
  255. data will be available on the given memory node instead of main
  256. memory.
  257. ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
  258. explicit node number.
  259. */
  260. int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency);
  261. int starpu_data_acquire_on_node_cb_sequential_consistency_quick(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick);
  262. /**
  263. Similar to starpu_data_acquire_on_node_cb_sequential_consistency(),
  264. except that the \e pre_sync_jobid and \e post_sync_jobid parameters can be used
  265. to retrieve the jobid of the synchronization tasks. \e pre_sync_jobid happens
  266. just before the acquisition, and \e post_sync_jobid happens just after the
  267. release.
  268. */
  269. int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick, long *pre_sync_jobid, long *post_sync_jobid);
  270. /**
  271. The application can call this function instead of starpu_data_acquire() so as to
  272. acquire the data like starpu_data_acquire(), but only if all
  273. previously-submitted tasks have completed, in which case starpu_data_acquire_try()
  274. returns 0. StarPU will have ensured that the application will get an up-to-date
  275. copy of \p handle in main memory located where the data was originally
  276. registered. starpu_data_release() must be called once the application no longer
  277. needs to access the piece of data.
  278. */
  279. int starpu_data_acquire_try(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
  280. /**
  281. Similar to starpu_data_acquire_try(), except that the
  282. data will be available on the given memory node instead of main
  283. memory.
  284. ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
  285. explicit node number.
  286. */
  287. int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode);
  288. #ifdef __GCC__
  289. /**
  290. STARPU_DATA_ACQUIRE_CB() is the same as starpu_data_acquire_cb(),
  291. except that the code to be executed in a callback is directly provided
  292. as a macro parameter, and the data \p handle is automatically released
  293. after it. This permits to easily execute code which depends on the
  294. value of some registered data. This is non-blocking too and may be
  295. called from task callbacks.
  296. */
  297. # define STARPU_DATA_ACQUIRE_CB(handle, mode, code) do \
  298. { \ \
  299. void callback(void *arg) \
  300. { \
  301. code; \
  302. starpu_data_release(handle); \
  303. } \
  304. starpu_data_acquire_cb(handle, mode, callback, NULL); \
  305. } \
  306. while(0)
  307. #endif
  308. /**
  309. Release the piece of data acquired by the
  310. application either by starpu_data_acquire() or by
  311. starpu_data_acquire_cb().
  312. */
  313. void starpu_data_release(starpu_data_handle_t handle);
  314. /**
  315. Similar to starpu_data_release(), except that the data
  316. will be available on the given memory \p node instead of main memory.
  317. The \p node parameter must be exactly the same as the corresponding \c
  318. starpu_data_acquire_on_node* call.
  319. */
  320. void starpu_data_release_on_node(starpu_data_handle_t handle, int node);
  321. /** @} */
  322. /**
  323. This is an arbiter, which implements an advanced but centralized
  324. management of concurrent data accesses, see \ref
  325. ConcurrentDataAccess for the details.
  326. */
  327. typedef struct starpu_arbiter *starpu_arbiter_t;
  328. /**
  329. Create a data access arbiter, see \ref ConcurrentDataAccess for the
  330. details
  331. */
  332. starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC;
  333. /**
  334. Make access to \p handle managed by \p arbiter
  335. */
  336. void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter);
  337. /**
  338. Destroy the \p arbiter . This must only be called after all data
  339. assigned to it have been unregistered.
  340. */
  341. void starpu_arbiter_destroy(starpu_arbiter_t arbiter);
  342. /**
  343. Explicitly ask StarPU to allocate room for a piece of data on
  344. the specified memory \p node.
  345. */
  346. int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node);
  347. /**
  348. Issue a fetch request for the data \p handle to \p node, i.e.
  349. requests that the data be replicated to the given node as soon as possible, so that it is
  350. available there for tasks. If \p async is 0, the call will
  351. block until the transfer is achieved, else the call will return immediately,
  352. after having just queued the request. In the latter case, the request will
  353. asynchronously wait for the completion of any task writing on the
  354. data.
  355. */
  356. int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
  357. /**
  358. Issue a prefetch request for the data \p handle to \p node, i.e.
  359. requests that the data be replicated to \p node when there is room for it, so that it is
  360. available there for tasks. If \p async is 0, the call will
  361. block until the transfer is achieved, else the call will return immediately,
  362. after having just queued the request. In the latter case, the request will
  363. asynchronously wait for the completion of any task writing on the
  364. data.
  365. */
  366. int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
  367. int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio);
  368. /**
  369. Issue an idle prefetch request for the data \p handle to \p node, i.e.
  370. requests that the data be replicated to \p node, so that it is
  371. available there for tasks, but only when the bus is really idle. If \p async is 0, the call will
  372. block until the transfer is achieved, else the call will return immediately,
  373. after having just queued the request. In the latter case, the request will
  374. asynchronously wait for the completion of any task writing on the data.
  375. */
  376. int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
  377. int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio);
  378. /**
  379. Check whether a valid copy of \p handle is currently available on
  380. memory node \p node.
  381. */
  382. unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node);
  383. /**
  384. Advise StarPU that \p handle will not be used in the close future, and is
  385. thus a good candidate for eviction from GPUs. StarPU will thus write its value
  386. back to its home node when the bus is idle, and select this data in priority
  387. for eviction when memory gets low.
  388. */
  389. void starpu_data_wont_use(starpu_data_handle_t handle);
  390. /**
  391. Set the write-through mask of the data \p handle (and
  392. its children), i.e. a bitmask of nodes where the data should be always
  393. replicated after modification. It also prevents the data from being
  394. evicted from these nodes when memory gets scarse. When the data is
  395. modified, it is automatically transfered into those memory nodes. For
  396. instance a <c>1<<0</c> write-through mask means that the CUDA workers
  397. will commit their changes in main memory (node 0).
  398. */
  399. void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask);
  400. /** @name Implicit Data Dependencies
  401. In this section, we describe how StarPU makes it possible to
  402. insert implicit task dependencies in order to enforce sequential data
  403. consistency. When this data consistency is enabled on a specific data
  404. handle, any data access will appear as sequentially consistent from
  405. the application. For instance, if the application submits two tasks
  406. that access the same piece of data in read-only mode, and then a third
  407. task that access it in write mode, dependencies will be added between
  408. the two first tasks and the third one. Implicit data dependencies are
  409. also inserted in the case of data accesses from the application.
  410. @{
  411. */
  412. /**
  413. Set the data consistency mode associated to a data handle. The
  414. consistency mode set using this function has the priority over the
  415. default mode which can be set with
  416. starpu_data_set_default_sequential_consistency_flag().
  417. */
  418. void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag);
  419. /**
  420. Get the data consistency mode associated to the data handle \p handle
  421. */
  422. unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle);
  423. /**
  424. Return the default sequential consistency flag
  425. */
  426. unsigned starpu_data_get_default_sequential_consistency_flag(void);
  427. /**
  428. Set the default sequential consistency flag. If a non-zero
  429. value is passed, a sequential data consistency will be enforced for
  430. all handles registered after this function call, otherwise it is
  431. disabled. By default, StarPU enables sequential data consistency. It
  432. is also possible to select the data consistency mode of a specific
  433. data handle with the function
  434. starpu_data_set_sequential_consistency_flag().
  435. */
  436. void starpu_data_set_default_sequential_consistency_flag(unsigned flag);
  437. /** @} */
  438. /**
  439. Set whether this data should be elligible to be evicted to disk
  440. storage (1) or not (0). The default is 1.
  441. */
  442. void starpu_data_set_ooc_flag(starpu_data_handle_t handle, unsigned flag);
  443. /**
  444. Get whether this data was set to be elligible to be evicted to disk
  445. storage (1) or not (0).
  446. */
  447. unsigned starpu_data_get_ooc_flag(starpu_data_handle_t handle);
  448. /**
  449. Query the status of \p handle on the specified \p memory_node.
  450. */
  451. void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested);
  452. struct starpu_codelet;
  453. /**
  454. Set the codelets to be used for \p handle when it is accessed in the
  455. mode ::STARPU_REDUX. Per-worker buffers will be initialized with
  456. the codelet \p init_cl, and reduction between per-worker buffers will be
  457. done with the codelet \p redux_cl.
  458. */
  459. void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl);
  460. struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle);
  461. unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node);
  462. void starpu_memchunk_tidy(unsigned memory_node);
  463. /**
  464. Set the field \c user_data for the \p handle to \p user_data . It can
  465. then be retrieved with starpu_data_get_user_data(). \p user_data can be any
  466. application-defined value, for instance a pointer to an object-oriented
  467. container for the data.
  468. */
  469. void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data);
  470. /**
  471. Retrieve the field \c user_data previously set for the \p handle.
  472. */
  473. void *starpu_data_get_user_data(starpu_data_handle_t handle);
  474. /**
  475. Copy the content of \p src_handle into \p dst_handle. The parameter \p
  476. asynchronous indicates whether the function should block or not. In
  477. the case of an asynchronous call, it is possible to synchronize with
  478. the termination of this operation either by the means of implicit
  479. dependencies (if enabled) or by calling starpu_task_wait_for_all(). If
  480. \p callback_func is not <c>NULL</c>, this callback function is executed after
  481. the handle has been copied, and it is given the pointer \p
  482. callback_arg as argument.
  483. */
  484. int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
  485. #ifdef __cplusplus
  486. }
  487. #endif
  488. /** @} */
  489. #endif /* __STARPU_DATA_H__ */