mpi.doxy 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. /*
  2. * This file is part of the StarPU Handbook.
  3. * Copyright (C) 2009--2011 Universit@'e de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
  6. * See the file version.doxy for copying conditions.
  7. */
  8. /*! \defgroup API_MPI_Support MPI Support
  9. @name Initialisation
  10. \ingroup API_MPI_Support
  11. \def STARPU_USE_MPI
  12. \ingroup API_MPI_Support
  13. This macro is defined when StarPU has been installed with MPI
  14. support. It should be used in your code to detect the availability of
  15. MPI.
  16. \fn int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi)
  17. \ingroup API_MPI_Support
  18. Initializes the starpumpi library. \p initialize_mpi indicates if MPI
  19. should be initialized or not by StarPU. If the value is not 0, MPI
  20. will be initialized by calling <c>MPI_Init_Thread(argc, argv,
  21. MPI_THREAD_SERIALIZED, ...)</c>.
  22. \fn int starpu_mpi_initialize(void)
  23. \deprecated
  24. \ingroup API_MPI_Support
  25. This function has been made deprecated. One should use instead the
  26. function starpu_mpi_init(). This function does not call MPI_Init(), it
  27. should be called beforehand.
  28. \fn int starpu_mpi_initialize_extended(int *rank, int *world_size)
  29. \deprecated
  30. \ingroup API_MPI_Support
  31. This function has been made deprecated. One should use instead the
  32. function starpu_mpi_init(). MPI will be initialized by starpumpi by
  33. calling <c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED,
  34. ...)</c>.
  35. \fn int starpu_mpi_shutdown(void)
  36. \ingroup API_MPI_Support
  37. Cleans the starpumpi library. This must be called between calling
  38. starpu_mpi functions and starpu_shutdown(). MPI_Finalize() will be
  39. called if StarPU-MPI has been initialized by starpu_mpi_init().
  40. \fn void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts)
  41. \ingroup API_MPI_Support
  42. Retrieve the current amount of communications from the current node in
  43. the array \p comm_amounts which must have a size greater or equal to
  44. the world size. Communications statistics must be enabled (see
  45. \ref STARPU_COMM_STATS).
  46. @name Communication
  47. \anchor MPIPtpCommunication
  48. \ingroup API_MPI_Support
  49. \fn int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm)
  50. \ingroup API_MPI_Support
  51. Performs a standard-mode, blocking send of \p data_handle to the node
  52. \p dest using the message tag \p mpi_tag within the communicator \p
  53. comm.
  54. \fn int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, MPI_Status *status)
  55. \ingroup API_MPI_Support
  56. Performs a standard-mode, blocking receive in \p data_handle from the
  57. node \p source using the message tag \p mpi_tag within the
  58. communicator \p comm.
  59. \fn int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm)
  60. \ingroup API_MPI_Support
  61. Posts a standard-mode, non blocking send of \p data_handle to the node
  62. \p dest using the message tag \p mpi_tag within the communicator \p
  63. comm. After the call, the pointer to the request \p req can be used to
  64. test or to wait for the completion of the communication.
  65. \fn int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int mpi_tag, MPI_Comm comm)
  66. \ingroup API_MPI_Support
  67. Posts a nonblocking receive in \p data_handle from the node \p source
  68. using the message tag \p mpi_tag within the communicator \p comm.
  69. After the call, the pointer to the request \p req can be used to test
  70. or to wait for the completion of the communication.
  71. \fn int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
  72. \ingroup API_MPI_Support
  73. Posts a standard-mode, non blocking send of \p data_handle to the node
  74. \p dest using the message tag \p mpi_tag within the communicator \p
  75. comm. On completion, the \p callback function is called with the
  76. argument \p arg.
  77. Similarly to the pthread detached functionality, when a detached
  78. communication completes, its resources are automatically released back
  79. to the system, there is no need to test or to wait for the completion
  80. of the request.
  81. \fn int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
  82. \ingroup API_MPI_Support
  83. Posts a nonblocking receive in \p data_handle from the node \p source
  84. using the message tag \p mpi_tag within the communicator \p comm. On
  85. completion, the \p callback function is called with the argument \p
  86. arg.
  87. Similarly to the pthread detached functionality, when a detached
  88. communication completes, its resources are automatically released back
  89. to the system, there is no need to test or to wait for the completion
  90. of the request.
  91. \fn int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
  92. \ingroup API_MPI_Support
  93. Posts a nonblocking receive in \p data_handle from the node \p source
  94. using the message tag \p mpi_tag within the communicator \p comm. On
  95. completion, the \p callback function is called with the argument \p
  96. arg.
  97. The parameter \p sequential_consistency allows to enable or disable
  98. the sequential consistency for \p data handle (sequential consistency
  99. will be enabled or disabled based on the value of the parameter \p
  100. sequential_consistency and the value of the sequential consistency
  101. defined for \p data_handle).
  102. Similarly to the pthread detached functionality, when a detached
  103. communication completes, its resources are automatically released back
  104. to the system, there is no need to test or to wait for the completion
  105. of the request.
  106. \fn int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm)
  107. \ingroup API_MPI_Support
  108. Performs a synchronous-mode, non-blocking send of \p data_handle to the node
  109. \p dest using the message tag \p mpi_tag within the communicator \p
  110. comm.
  111. int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
  112. \ingroup API_MPI_Support
  113. Performs a synchronous-mode, non-blocking send of \p data_handle to the node
  114. \p dest using the message tag \p mpi_tag within the communicator \p
  115. comm. On completion, the \p callback function is called with the argument \p
  116. arg.
  117. Similarly to the pthread detached functionality, when a detached
  118. communication completes, its resources are automatically released back
  119. to the system, there is no need to test or to wait for the completion
  120. of the request.
  121. \fn int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status)
  122. \ingroup API_MPI_Support
  123. Returns when the operation identified by request \p req is complete.
  124. \fn int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status)
  125. \ingroup API_MPI_Support
  126. If the operation identified by \p req is complete, set \p flag to 1.
  127. The \p status object is set to contain information on the completed
  128. operation.
  129. \fn int starpu_mpi_barrier(MPI_Comm comm)
  130. \ingroup API_MPI_Support
  131. Blocks the caller until all group members of the communicator \p comm
  132. have called it.
  133. \fn int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)
  134. \ingroup API_MPI_Support
  135. Posts a standard-mode, non blocking send of \p data_handle to the node
  136. \p dest using the message tag \p mpi_tag within the communicator \p
  137. comm. On completion, \p tag is unlocked.
  138. \fn int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)
  139. \ingroup API_MPI_Support
  140. Posts a nonblocking receive in \p data_handle from the node \p source
  141. using the message tag \p mpi_tag within the communicator \p comm. On
  142. completion, \p tag is unlocked.
  143. \fn int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)
  144. \ingroup API_MPI_Support
  145. Posts \p array_size standard-mode, non blocking send. Each post sends
  146. the n-th data of the array \p data_handle to the n-th node of the
  147. array \p dest using the n-th message tag of the array \p mpi_tag
  148. within the n-th communicator of the array \p comm. On completion of
  149. the all the requests, \p tag is unlocked.
  150. \fn int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)
  151. \ingroup API_MPI_Support
  152. Posts \p array_size nonblocking receive. Each post receives in the n-th
  153. data of the array \p data_handle from the n-th node of the array \p
  154. source using the n-th message tag of the array \p mpi_tag within the
  155. n-th communicator of the array \p comm. On completion of the all the
  156. requests, \p tag is unlocked.
  157. \fn int starpu_mpi_get_communication_tag(void)
  158. \ingroup API_MPI_Support
  159. todo
  160. \fn void starpu_mpi_set_communication_tag(int tag)
  161. \ingroup API_MPI_Support
  162. todo
  163. @name Communication Cache
  164. \ingroup API_MPI_Support
  165. \fn void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)
  166. \ingroup API_MPI_Support
  167. Clear the send and receive communication cache for the data
  168. \p data_handle and invalidate the value. The function has to be called synchronously by all the
  169. MPI nodes. The function does nothing if the cache mechanism is
  170. disabled (see \ref STARPU_MPI_CACHE).
  171. \fn void starpu_mpi_cache_flush_all_data(MPI_Comm comm)
  172. \ingroup API_MPI_Support
  173. Clear the send and receive communication cache for all data and invalidate their values. The
  174. function has to be called synchronously by all the MPI nodes. The
  175. function does nothing if the cache mechanism is disabled (see
  176. \ref STARPU_MPI_CACHE).
  177. @name MPI Insert Task
  178. \anchor MPIInsertTask
  179. \ingroup API_MPI_Support
  180. \fn int starpu_data_set_tag(starpu_data_handle_t handle, int tag)
  181. \ingroup API_MPI_Support
  182. Tell StarPU-MPI which MPI tag to use when exchanging the data.
  183. \fn int starpu_data_get_tag(starpu_data_handle_t handle)
  184. \ingroup API_MPI_Support
  185. Returns the MPI tag to be used when exchanging the data.
  186. \fn void starpu_mpi_data_register(starpu_data_handle_t data_handle, int tag, int rank)
  187. \ingroup API_MPI_Support
  188. Calling this function should be prefered to calling both
  189. starpu_data_set_rank() and starpu_data_set_tag() as it also allows to
  190. automatically clear the MPI communication cache when unregistering the data.
  191. \fn int starpu_data_set_rank(starpu_data_handle_t handle, int rank)
  192. \ingroup API_MPI_Support
  193. Tell StarPU-MPI which MPI node "owns" a given data, that is, the node
  194. which will always keep an up-to-date value, and will by default
  195. execute tasks which write to it.
  196. \fn int starpu_data_get_rank(starpu_data_handle_t handle)
  197. \ingroup API_MPI_Support
  198. Returns the last value set by starpu_data_set_rank().
  199. \def STARPU_EXECUTE_ON_NODE
  200. \ingroup API_MPI_Support
  201. this macro is used when calling starpu_mpi_task_insert(), and must be
  202. followed by a integer value which specified the node on which to
  203. execute the codelet.
  204. \def STARPU_EXECUTE_ON_DATA
  205. \ingroup API_MPI_Support
  206. this macro is used when calling starpu_mpi_task_insert(), and must be
  207. followed by a data handle to specify that the node owning the given
  208. data will execute the codelet.
  209. \fn int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
  210. \ingroup API_MPI_Support
  211. This function does the same as the function starpu_mpi_task_insert(). It has been kept to avoid breaking old codes.
  212. \fn int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...)
  213. \ingroup API_MPI_Support
  214. Create and submit a task corresponding to codelet with the following
  215. arguments. The argument list must be zero-terminated.
  216. The arguments following the codelet are the same types as for the
  217. function starpu_task_insert(). Access modes for data can also be set
  218. with ::STARPU_SSYNC to specify the data has to be sent using a
  219. synchronous and non-blocking mode (see starpu_mpi_issend()).
  220. The extra argument
  221. ::STARPU_EXECUTE_ON_NODE followed by an integer allows to specify the
  222. MPI node to execute the codelet. It is also possible to specify that
  223. the node owning a specific data will execute the codelet, by using
  224. ::STARPU_EXECUTE_ON_DATA followed by a data handle.
  225. The internal algorithm is as follows:
  226. <ol>
  227. <li>
  228. Find out which MPI node is going to execute the codelet.
  229. <ul>
  230. <li>If there is only one node owning data in ::STARPU_W mode, it will be selected;
  231. <li>If there is several nodes owning data in ::STARPU_W node, a node will be selected according to a given node selection policy (see ::STARPU_NODE_SELECTION_POLICY or starpu_mpi_node_selection_set_default_policy())
  232. <li>The argument ::STARPU_EXECUTE_ON_NODE followed by an integer can be used to specify the node;
  233. <li>The argument ::STARPU_EXECUTE_ON_DATA followed by a data handle can be used to specify that the node owing the given data will execute the codelet.
  234. </ul>
  235. </li>
  236. <li>
  237. Send and receive data as requested. Nodes owning data which need to be read by the task are sending them to the MPI node which will execute it. The latter receives them.
  238. </li>
  239. <li>
  240. Execute the codelet. This is done by the MPI node selected in the 1st step of the algorithm.
  241. </li>
  242. <li>
  243. If several MPI nodes own data to be written to, send written data back to their owners.
  244. </li>
  245. </ol>
  246. The algorithm also includes a communication cache mechanism that
  247. allows not to send data twice to the same MPI node, unless the data
  248. has been modified. The cache can be disabled (see \ref STARPU_MPI_CACHE).
  249. \fn struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...)
  250. \ingroup API_MPI_Support
  251. Create a task corresponding to codelet with the following arguments.
  252. The argument list must be zero-terminated. The function performs the
  253. first two steps of the function starpu_mpi_task_insert(). Only the MPI
  254. node selected in the first step of the algorithm will return a valid
  255. task structure which can then be submitted. The function
  256. starpu_mpi_task_post_build() MUST be called after the submission of
  257. the task, with the SAME list of arguments.
  258. \fn int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...)
  259. \ingroup API_MPI_Support
  260. This function MUST be called after a call to starpu_mpi_task_build(),
  261. with the SAME list of arguments. It performs the fourth -- last -- step of the algorithm described in
  262. starpu_mpi_task_insert().
  263. \fn void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node)
  264. \ingroup API_MPI_Support
  265. Transfer data \p data_handle to MPI node \p node, sending it from its
  266. owner if needed. At least the target node and the owner have to call
  267. the function.
  268. \fn void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg)
  269. \ingroup API_MPI_Support
  270. Transfer data \p data_handle to MPI node \p node, sending it from its
  271. owner if needed. At least the target node and the owner have to call
  272. the function. On reception, the \p callback function is called with
  273. the argument \p arg.
  274. \fn char *starpu_mpi_node_selection_get_default_policy()
  275. \ingroup API_MPI_Support
  276. Return the current default policy used to select the node which will execute the codelet
  277. \fn int starpu_mpi_node_selection_set_default_policy(char *policy)
  278. \ingroup API_MPI_Support
  279. Set the current default policy used to select the node which will
  280. execute the codelet. The policy "node_with_most_R_data" selects the
  281. node having the most data in R mode so as to minimize the amount of
  282. data to be transfered.
  283. @name Collective Operations
  284. \anchor MPICollectiveOperations
  285. \ingroup API_MPI_Support
  286. \fn void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle)
  287. \ingroup API_MPI_Support
  288. Perform a reduction on the given data. All nodes send the data to its
  289. owner node which will perform a reduction.
  290. \fn int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
  291. \ingroup API_MPI_Support
  292. Scatter data among processes of the communicator based on the
  293. ownership of the data. For each data of the array \p data_handles, the
  294. process \p root sends the data to the process owning this data. Processes
  295. receiving data must have valid data handles to receive them. On
  296. completion of the collective communication, the \p scallback function is
  297. called with the argument \p sarg on the process \p root, the \p
  298. rcallback function is called with the argument \p rarg on any other
  299. process.
  300. \fn int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
  301. \ingroup API_MPI_Support
  302. Gather data from the different processes of the communicator onto the
  303. process \p root. Each process owning data handle in the array
  304. \p data_handles will send them to the process \p root. The process \p
  305. root must have valid data handles to receive the data. On completion
  306. of the collective communication, the \p rcallback function is called
  307. with the argument \p rarg on the process root, the \p scallback
  308. function is called with the argument \p sarg on any other process.
  309. */