mpi.doxy 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2018 CNRS
  4. * Copyright (C) 2011-2012,2016,2017 Inria
  5. * Copyright (C) 2009-2011,2014-2018 Université de Bordeaux
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /*! \defgroup API_MPI_Support MPI Support
  19. @name Initialisation
  20. \ingroup API_MPI_Support
  21. \def STARPU_USE_MPI
  22. \ingroup API_MPI_Support
  23. Defined when StarPU has been installed with MPI support. It should be
  24. used in your code to detect the availability of MPI.
  25. \fn int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm)
  26. \ingroup API_MPI_Support
  27. Initialize the starpumpi library with the given communicator \p comm.
  28. \p initialize_mpi indicates if MPI should be initialized or not by StarPU.
  29. If the value is not 0, MPI will be initialized by calling
  30. <c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED, ...)</c>.
  31. starpu_init() must be called before starpu_mpi_init_comm().
  32. \fn int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi)
  33. \ingroup API_MPI_Support
  34. Call starpu_mpi_init_comm() with the MPI communicator \c MPI_COMM_WORLD.
  35. \fn int starpu_mpi_init_with_driver(int *argc, char ***argv, int initialize_mpi, struct starpu_conf *conf)
  36. \ingroup API_MPI_Support
  37. Call starpu_mpi_init_comm() with the MPI communicator MPI_COMM_WORLD,
  38. and keeps the CPU driver 0 for the MPI thread. This driver will be run
  39. and so execute tasks some times when the MPI thread has no requests to
  40. handle. starpu_mpi_init_with_driver() also calls starpu_init() internally.
  41. \fn int starpu_mpi_initialize(void)
  42. \deprecated
  43. \ingroup API_MPI_Support
  44. This function has been made deprecated. One should use instead the
  45. function starpu_mpi_init(). This function does not call \c MPI_Init(), it
  46. should be called beforehand.
  47. \fn int starpu_mpi_initialize_extended(int *rank, int *world_size)
  48. \deprecated
  49. \ingroup API_MPI_Support
  50. This function has been made deprecated. One should use instead the
  51. function starpu_mpi_init(). MPI will be initialized by starpumpi by
  52. calling <c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED,
  53. ...)</c>.
  54. \fn int starpu_mpi_shutdown(void)
  55. \ingroup API_MPI_Support
  56. Clean the starpumpi library. This must be called between calling
  57. \c starpu_mpi functions and starpu_shutdown(). \c MPI_Finalize() will be
  58. called if StarPU-MPI has been initialized by starpu_mpi_init().
  59. \fn void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts)
  60. \ingroup API_MPI_Support
  61. Retrieve the current amount of communications from the current node in
  62. the array \p comm_amounts which must have a size greater or equal to
  63. the world size. Communications statistics must be enabled (see
  64. \ref STARPU_COMM_STATS).
  65. \fn int starpu_mpi_comm_size(MPI_Comm comm, int *size)
  66. \ingroup API_MPI_Support
  67. Return in \p size the size of the communicator \p comm
  68. \fn int starpu_mpi_comm_rank(MPI_Comm comm, int *rank)
  69. \ingroup API_MPI_Support
  70. Return in \p rank the rank of the calling process in the communicator \p comm
  71. \fn int starpu_mpi_world_rank(void)
  72. \ingroup API_MPI_Support
  73. Return the rank of the calling process in the communicator \c MPI_COMM_WORLD
  74. \fn int starpu_mpi_world_size(void)
  75. \ingroup API_MPI_Support
  76. Return the size of the communicator \c MPI_COMM_WORLD
  77. @name Communication
  78. \anchor MPIPtpCommunication
  79. \ingroup API_MPI_Support
  80. \fn int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm)
  81. \ingroup API_MPI_Support
  82. Perform a standard-mode, blocking send of \p data_handle to the node
  83. \p dest using the message tag \p data_tag within the communicator \p
  84. comm.
  85. \fn int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
  86. \ingroup API_MPI_Support
  87. Similar to starpu_mpi_send, but takes a priority \p prio.
  88. \fn int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status)
  89. \ingroup API_MPI_Support
  90. Perform a standard-mode, blocking receive in \p data_handle from the
  91. node \p source using the message tag \p data_tag within the
  92. communicator \p comm.
  93. \fn int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm)
  94. \ingroup API_MPI_Support
  95. Post a standard-mode, non blocking send of \p data_handle to the node
  96. \p dest using the message tag \p data_tag within the communicator \p
  97. comm. After the call, the pointer to the request \p req can be used to
  98. test or to wait for the completion of the communication.
  99. \fn int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
  100. \ingroup API_MPI_Support
  101. Similar to starpu_mpi_isend, but takes a priority \p prio.
  102. \fn int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm)
  103. \ingroup API_MPI_Support
  104. Post a nonblocking receive in \p data_handle from the node \p source
  105. using the message tag \p data_tag within the communicator \p comm.
  106. After the call, the pointer to the request \p req can be used to test
  107. or to wait for the completion of the communication.
  108. \fn int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
  109. \ingroup API_MPI_Support
  110. Post a standard-mode, non blocking send of \p data_handle to the node
  111. \p dest using the message tag \p data_tag within the communicator \p
  112. comm. On completion, the \p callback function is called with the
  113. argument \p arg.
  114. Similarly to the pthread detached functionality, when a detached
  115. communication completes, its resources are automatically released back
  116. to the system, there is no need to test or to wait for the completion
  117. of the request.
  118. \fn int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
  119. \ingroup API_MPI_Support
  120. Similar to starpu_mpi_isend_detached, but takes a priority \p prio.
  121. \fn int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
  122. \ingroup API_MPI_Support
  123. Post a nonblocking receive in \p data_handle from the node \p source
  124. using the message tag \p data_tag within the communicator \p comm. On
  125. completion, the \p callback function is called with the argument \p
  126. arg.
  127. Similarly to the pthread detached functionality, when a detached
  128. communication completes, its resources are automatically released back
  129. to the system, there is no need to test or to wait for the completion
  130. of the request.
  131. \fn int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
  132. \ingroup API_MPI_Support
  133. Post a nonblocking receive in \p data_handle from the node \p source
  134. using the message tag \p data_tag within the communicator \p comm. On
  135. completion, the \p callback function is called with the argument \p
  136. arg.
  137. The parameter \p sequential_consistency allows to enable or disable
  138. the sequential consistency for \p data handle (sequential consistency
  139. will be enabled or disabled based on the value of the parameter \p
  140. sequential_consistency and the value of the sequential consistency
  141. defined for \p data_handle).
  142. Similarly to the pthread detached functionality, when a detached
  143. communication completes, its resources are automatically released back
  144. to the system, there is no need to test or to wait for the completion
  145. of the request.
  146. \fn int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm)
  147. \ingroup API_MPI_Support
  148. Perform a synchronous-mode, non-blocking send of \p data_handle to the node
  149. \p dest using the message tag \p data_tag within the communicator \p
  150. comm.
  151. \fn int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
  152. \ingroup API_MPI_Support
  153. Similar to starpu_mpi_issend, but takes a priority \p prio.
  154. \fn int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
  155. \ingroup API_MPI_Support
  156. Perform a synchronous-mode, non-blocking send of \p data_handle to the node
  157. \p dest using the message tag \p data_tag within the communicator \p
  158. comm. On completion, the \p callback function is called with the argument \p
  159. arg.
  160. Similarly to the pthread detached functionality, when a detached
  161. communication completes, its resources are automatically released back
  162. to the system, there is no need to test or to wait for the completion
  163. of the request.
  164. \fn int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status)
  165. \ingroup API_MPI_Support
  166. Return when the operation identified by request \p req is complete.
  167. \fn int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status)
  168. \ingroup API_MPI_Support
  169. If the operation identified by \p req is complete, set \p flag to 1.
  170. The \p status object is set to contain information on the completed
  171. operation.
  172. \fn int starpu_mpi_barrier(MPI_Comm comm)
  173. \ingroup API_MPI_Support
  174. Block the caller until all group members of the communicator \p comm
  175. have called it.
  176. \fn int starpu_mpi_wait_for_all(MPI_Comm comm)
  177. \ingroup API_MPI_Support
  178. Wait until all StarPU tasks and communications for the given communicator are completed.
  179. \fn int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag)
  180. \ingroup API_MPI_Support
  181. Post a standard-mode, non blocking send of \p data_handle to the node
  182. \p dest using the message tag \p data_tag within the communicator \p
  183. comm. On completion, \p tag is unlocked.
  184. \fn int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag)
  185. \ingroup API_MPI_Support
  186. Similar to starpu_mpi_isend_detached_unlock_tag(), but takes a priority \p prio.
  187. \fn int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag)
  188. \ingroup API_MPI_Support
  189. Post a nonblocking receive in \p data_handle from the node \p source
  190. using the message tag \p data_tag within the communicator \p comm. On
  191. completion, \p tag is unlocked.
  192. \fn int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
  193. \ingroup API_MPI_Support
  194. Post \p array_size standard-mode, non blocking send. Each post sends
  195. the n-th data of the array \p data_handle to the n-th node of the
  196. array \p dest using the n-th message tag of the array \p data_tag
  197. within the n-th communicator of the array \p comm. On completion of
  198. the all the requests, \p tag is unlocked.
  199. \fn int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag)
  200. \ingroup API_MPI_Support
  201. Similar to starpu_mpi_isend_array_detached_unlock_tag(), but takes a priority \p prio.
  202. \fn int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
  203. \ingroup API_MPI_Support
  204. Post \p array_size nonblocking receive. Each post receives in the n-th
  205. data of the array \p data_handle from the n-th node of the array \p
  206. source using the n-th message tag of the array \p data_tag within the
  207. n-th communicator of the array \p comm. On completion of the all the
  208. requests, \p tag is unlocked.
  209. \fn int starpu_mpi_get_communication_tag(void)
  210. \ingroup API_MPI_Support
  211. todo
  212. \fn void starpu_mpi_set_communication_tag(int tag)
  213. \ingroup API_MPI_Support
  214. todo
  215. \fn int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func)
  216. \ingroup API_MPI_Support
  217. Register functions to create and free a MPI datatype for the given handle.
  218. It is important that the function is called before any communication can take place for a data with the given handle. See \ref ExchangingUserDefinedDataInterface for an example.
  219. \fn int starpu_mpi_datatype_unregister(starpu_data_handle_t handle);
  220. \ingroup API_MPI_Support
  221. Unregister the MPI datatype functions stored for the interface of the given handle.
  222. \def STARPU_MPI_TAG_UB
  223. \ingroup API_MPI_Support
  224. When given to the function starpu_mpi_comm_get_attr(), retrieve the
  225. value for the upper bound for tag value.
  226. \fn int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag);
  227. \ingroup API_MPI_Support
  228. Retrieve an attribute value by key, similarly to the MPI function \c MPI_comm_get_attr(), except that the value is a pointer to int64_t instead of int.
  229. If an attribute is attached on \p comm to \p keyval, then the call
  230. returns \p flag equal to \c 1, and the attribute value in \p
  231. attribute_val. Otherwise, \p flag is set to \0.
  232. @name Communication Cache
  233. \ingroup API_MPI_Support
  234. \fn int starpu_mpi_cache_is_enabled()
  235. \ingroup API_MPI_Support
  236. Return 1 if the communication cache is enabled, 0 otherwise
  237. \fn int starpu_mpi_cache_set(int enabled)
  238. \ingroup API_MPI_Support
  239. If \p enabled is 1, enable the communication cache. Otherwise, clean the cache if it was enabled and disable it.
  240. \fn void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)
  241. \ingroup API_MPI_Support
  242. Clear the send and receive communication cache for the data
  243. \p data_handle and invalidate the value. The function has to be called at the
  244. same point of task graph submission by all the MPI nodes on which the handle was
  245. registered. The function does nothing if the cache mechanism is
  246. disabled (see \ref STARPU_MPI_CACHE).
  247. \fn void starpu_mpi_cache_flush_all_data(MPI_Comm comm)
  248. \ingroup API_MPI_Support
  249. Clear the send and receive communication cache for all data and invalidate their values. The
  250. function has to be called at the same point of task graph submission by all the MPI nodes. The
  251. function does nothing if the cache mechanism is disabled (see
  252. \ref STARPU_MPI_CACHE).
  253. \fn int starpu_mpi_cached_receive(starpu_data_handle_t data_handle);
  254. \ingroup API_MPI_Support
  255. Test whether \p data_handle is cached for reception, i.e. the value was
  256. previously received from the owner node, and not flushed since then.
  257. \fn int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest);
  258. \ingroup API_MPI_Support
  259. Test whether \p data_handle is cached for emission to node \p dest , i.e. the
  260. value was previously sent to \p dest, and not flushed since then.
  261. @name MPI Insert Task
  262. \anchor MPIInsertTask
  263. \ingroup API_MPI_Support
  264. \fn void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm)
  265. \ingroup API_MPI_Support
  266. Register to MPI a StarPU data handle with the given tag, rank and MPI communicator.
  267. It also automatically clears the MPI communication cache when unregistering the data.
  268. \def starpu_mpi_data_register(data_handle, data_tag, rank)
  269. \ingroup API_MPI_Support
  270. Register to MPI a StarPU data handle with the given tag, rank and the MPI communicator \c MPI_COMM_WORLD.
  271. It also automatically clears the MPI communication cache when unregistering the data.
  272. \fn void starpu_mpi_data_set_tag(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag)
  273. \ingroup API_MPI_Support
  274. Register to MPI a StarPU data handle with the given tag. No rank will be defined.
  275. It also automatically clears the MPI communication cache when unregistering the data.
  276. \def starpu_data_set_tag
  277. \ingroup API_MPI_Support
  278. Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_tag()
  279. \fn void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm)
  280. \ingroup API_MPI_Support
  281. Register to MPI a StarPU data handle with the given rank and given communicator. No tag will be defined.
  282. It also automatically clears the MPI communication cache when unregistering the data.
  283. \def starpu_mpi_data_set_rank
  284. \ingroup API_MPI_Support
  285. Register to MPI a StarPU data handle with the given rank and the MPI communicator \c MPI_COMM_WORLD. No tag will be defined.
  286. It also automatically clears the MPI communication cache when unregistering the data.
  287. Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_rank()
  288. \def starpu_data_set_rank
  289. \ingroup API_MPI_Support
  290. Register to MPI a StarPU data handle with the given rank and the MPI communicator \c MPI_COMM_WORLD. No tag will be defined.
  291. It also automatically clears the MPI communication cache when unregistering the data.
  292. Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_rank()
  293. \fn int starpu_mpi_data_get_rank(starpu_data_handle_t handle)
  294. \ingroup API_MPI_Support
  295. Return the rank of the given data.
  296. \def starpu_data_get_rank
  297. \ingroup API_MPI_Support
  298. Return the rank of the given data.
  299. Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_rank()
  300. \fn starpu_mpi_tag_t starpu_mpi_data_get_tag(starpu_data_handle_t handle)
  301. \ingroup API_MPI_Support
  302. Return the tag of the given data.
  303. \def starpu_data_get_tag
  304. \ingroup API_MPI_Support
  305. Return the tag of the given data.
  306. Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_tag()
  307. \def STARPU_MPI_PER_NODE
  308. \ingroup API_MPI_Support
  309. Can be used as rank when calling starpu_mpi_data_register() and alike, to
  310. specify that the data is per-node: each node will have its own value. Tasks
  311. writing to such data will be replicated on all nodes (and all parameters then
  312. have to be per-node). Tasks not writing to such data will just take the
  313. node-local value without any MPI communication.
  314. \fn void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t handle, int new_rank)
  315. \ingroup API_MPI_Support
  316. Submits migration of the data onto the \p new_rank MPI node. This means both submitting the transfer of
  317. the data to node \p new_rank if it hasn't been submitted already, and setting
  318. the home node of the data to the new node. Further data transfers submitted by
  319. starpu_mpi_task_insert() will be done from that new node. This function thus
  320. needs to be called on all nodes which have registered the data at the same point of tasks submissions. This also
  321. flushes the cache for this data to avoid incoherencies.
  322. \def STARPU_EXECUTE_ON_NODE
  323. \ingroup API_MPI_Support
  324. Used when calling starpu_mpi_task_insert(), must be
  325. followed by a integer value which specified the node on which to
  326. execute the codelet.
  327. \def STARPU_EXECUTE_ON_DATA
  328. \ingroup API_MPI_Support
  329. Used when calling starpu_mpi_task_insert(), must be
  330. followed by a data handle to specify that the node owning the given
  331. data will execute the codelet.
  332. \def STARPU_NODE_SELECTION_POLICY
  333. \ingroup API_MPI_Support
  334. Used when calling starpu_mpi_task_insert(), must be
  335. followed by a identifier to a node selection policy. This is needed when several
  336. nodes own data in ::STARPU_W mode.
  337. \fn int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
  338. \ingroup API_MPI_Support
  339. Call starpu_mpi_task_insert(). Symbol kept for backward compatibility.
  340. \fn int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...)
  341. \ingroup API_MPI_Support
  342. Create and submit a task corresponding to codelet with the following
  343. arguments. The argument list must be zero-terminated.
  344. The arguments following the codelet are the same types as for the
  345. function starpu_task_insert(). Access modes for data can also be set
  346. with ::STARPU_SSEND to specify the data has to be sent using a
  347. synchronous and non-blocking mode (see starpu_mpi_issend()).
  348. The extra argument
  349. ::STARPU_EXECUTE_ON_NODE followed by an integer allows to specify the
  350. MPI node to execute the codelet. It is also possible to specify that
  351. the node owning a specific data will execute the codelet, by using
  352. ::STARPU_EXECUTE_ON_DATA followed by a data handle.
  353. The internal algorithm is as follows:
  354. <ol>
  355. <li>
  356. Find out which MPI node is going to execute the codelet.
  357. <ul>
  358. <li>If there is only one node owning data in ::STARPU_W mode, it will be selected;
  359. <li>If there is several nodes owning data in ::STARPU_W mode, a node will be selected according to a given node selection policy (see ::STARPU_NODE_SELECTION_POLICY or starpu_mpi_node_selection_set_current_policy())
  360. <li>The argument ::STARPU_EXECUTE_ON_NODE followed by an integer can be used to specify the node;
  361. <li>The argument ::STARPU_EXECUTE_ON_DATA followed by a data handle can be used to specify that the node owing the given data will execute the codelet.
  362. </ul>
  363. </li>
  364. <li>
  365. Send and receive data as requested. Nodes owning data which need to be read by the task are sending them to the MPI node which will execute it. The latter receives them.
  366. </li>
  367. <li>
  368. Execute the codelet. This is done by the MPI node selected in the 1st step of the algorithm.
  369. </li>
  370. <li>
  371. If several MPI nodes own data to be written to, send written data back to their owners.
  372. </li>
  373. </ol>
  374. The algorithm also includes a communication cache mechanism that
  375. allows not to send data twice to the same MPI node, unless the data
  376. has been modified. The cache can be disabled (see \ref STARPU_MPI_CACHE).
  377. \fn struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...)
  378. \ingroup API_MPI_Support
  379. Create a task corresponding to \p codelet with the following given arguments.
  380. The argument list must be zero-terminated. The function performs the
  381. first two steps of the function starpu_mpi_task_insert(), i.e. submitting the
  382. MPI communications needed before the execution of the task, and the creation of
  383. the task on one node. Only the MPI
  384. node selected in the first step of the algorithm will return a valid
  385. task structure which can then be submitted, others will return <c>NULL</c>. The function
  386. starpu_mpi_task_post_build() MUST be called after that on all nodes, and after the submission of
  387. the task on the node which creates it, with the SAME list of arguments.
  388. \fn int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...)
  389. \ingroup API_MPI_Support
  390. MUST be called after a call to starpu_mpi_task_build(),
  391. with the SAME list of arguments. Perform the fourth -- last -- step of
  392. the algorithm described in starpu_mpi_task_insert().
  393. \fn void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node)
  394. \ingroup API_MPI_Support
  395. Transfer data \p data_handle to MPI node \p node, sending it from its
  396. owner if needed. At least the target node and the owner have to call
  397. the function.
  398. \fn void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg)
  399. \ingroup API_MPI_Support
  400. Transfer data \p data_handle to MPI node \p node, sending it from its
  401. owner if needed. At least the target node and the owner have to call
  402. the function. On reception, the \p callback function is called with
  403. the argument \p arg.
  404. \fn void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle_t data_handle)
  405. \ingroup API_MPI_Support
  406. Transfer data \p data_handle to all MPI nodes, sending it from its
  407. owner if needed. All nodes have to call the function.
  408. @name Node Selection Policy
  409. \anchor MPINodeSelectionPolicy
  410. \ingroup API_MPI_Support
  411. \def STARPU_MPI_NODE_SELECTION_CURRENT_POLICY
  412. \ingroup API_MPI_Support
  413. todo
  414. \def STARPU_MPI_NODE_SELECTION_MOST_R_DATA
  415. \ingroup API_MPI_Support
  416. todo
  417. \fn int starpu_mpi_node_selection_get_current_policy()
  418. \ingroup API_MPI_Support
  419. Return the current policy used to select the node which will execute the codelet
  420. \fn int starpu_mpi_node_selection_set_current_policy(int policy)
  421. \ingroup API_MPI_Support
  422. Set the current policy used to select the node which will
  423. execute the codelet. The policy ::STARPU_MPI_NODE_SELECTION_MOST_R_DATA selects the
  424. node having the most data in ::STARPU_R mode so as to minimize the amount of
  425. data to be transfered.
  426. \fn int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func)
  427. \ingroup API_MPI_Support
  428. Register a new policy which can then be used when there is several nodes owning data in ::STARPU_W mode.
  429. Here an example of function defining a node selection policy.
  430. The codelet will be executed on the node owing the first data with a size bigger than 1M, or on the node
  431. 0 if no data fits the given size.
  432. \code{.c}
  433. int my_node_selection_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)
  434. {
  435. // me is the current MPI rank
  436. // nb_nodes is the number of MPI nodes
  437. // descr is the description of the data specified when calling starpu_mpi_task_insert
  438. // nb_data is the number of data in descr
  439. int i;
  440. for(i= 0 ; i<nb_data ; i++)
  441. {
  442. starpu_data_handle_t data = descr[i].handle;
  443. enum starpu_data_access_mode mode = descr[i].mode;
  444. if (mode & STARPU_R)
  445. {
  446. int rank = starpu_data_get_rank(data);
  447. size_t size = starpu_data_get_size(data);
  448. if (size > 1024*1024) return rank;
  449. }
  450. }
  451. return 0;
  452. }
  453. \endcode
  454. \fn int starpu_mpi_node_selection_unregister_policy(int policy)
  455. \ingroup API_MPI_Support
  456. Unregister a previously registered policy.
  457. @name Collective Operations
  458. \anchor MPICollectiveOperations
  459. \ingroup API_MPI_Support
  460. \fn void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle)
  461. \ingroup API_MPI_Support
  462. Perform a reduction on the given data \p handle. All nodes send the data to its
  463. owner node which will perform a reduction.
  464. \fn void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio)
  465. \ingroup API_MPI_Support
  466. Similar to starpu_mpi_redux_data, but takes a priority \p prio.
  467. \fn int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
  468. \ingroup API_MPI_Support
  469. Scatter data among processes of the communicator based on the
  470. ownership of the data. For each data of the array \p data_handles, the
  471. process \p root sends the data to the process owning this data. Processes
  472. receiving data must have valid data handles to receive them. On
  473. completion of the collective communication, the \p scallback function is
  474. called with the argument \p sarg on the process \p root, the \p
  475. rcallback function is called with the argument \p rarg on any other
  476. process.
  477. \fn int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
  478. \ingroup API_MPI_Support
  479. Gather data from the different processes of the communicator onto the
  480. process \p root. Each process owning data handle in the array
  481. \p data_handles will send them to the process \p root. The process \p
  482. root must have valid data handles to receive the data. On completion
  483. of the collective communication, the \p rcallback function is called
  484. with the argument \p rarg on the process root, the \p scallback
  485. function is called with the argument \p sarg on any other process.
  486. @name MPI Master Slave
  487. \anchor MPIMasterSlaveSupport
  488. \ingroup API_MPI_Support
  489. \def STARPU_USE_MPI_MASTER_SLAVE
  490. \ingroup API_MPI_Support
  491. Defined when StarPU has been installed with MPI Master Slave
  492. support. It should be used in your code to detect the availability of
  493. MPI Master Slave.
  494. */