starpu_mpi_init.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <stdlib.h>
  17. #include <starpu_mpi.h>
  18. #include <starpu_mpi_datatype.h>
  19. #include <starpu_mpi_private.h>
  20. #include <starpu_mpi_cache.h>
  21. #include <starpu_profiling.h>
  22. #include <starpu_mpi_stats.h>
  23. #include <starpu_mpi_cache.h>
  24. #include <starpu_mpi_select_node.h>
  25. #include <common/config.h>
  26. #include <common/thread.h>
  27. #include <datawizard/interfaces/data_interface.h>
  28. #include <datawizard/coherency.h>
  29. #include <core/simgrid.h>
  30. #include <core/task.h>
  31. #ifdef STARPU_SIMGRID
  32. static int _mpi_world_size;
  33. static int _mpi_world_rank;
  34. #endif
  35. static int _mpi_initialized_starpu;
  36. static void _starpu_mpi_print_thread_level_support(int thread_level, char *msg)
  37. {
  38. switch (thread_level)
  39. {
  40. case MPI_THREAD_SERIALIZED:
  41. {
  42. _STARPU_DISP("MPI%s MPI_THREAD_SERIALIZED; Multiple threads may make MPI calls, but only one at a time.\n", msg);
  43. break;
  44. }
  45. case MPI_THREAD_FUNNELED:
  46. {
  47. _STARPU_DISP("MPI%s MPI_THREAD_FUNNELED; The application can safely make calls to StarPU-MPI functions, but should not call directly MPI communication functions.\n", msg);
  48. break;
  49. }
  50. case MPI_THREAD_SINGLE:
  51. {
  52. _STARPU_DISP("MPI%s MPI_THREAD_SINGLE; MPI does not have multi-thread support, this might cause problems. The application can make calls to StarPU-MPI functions, but not call directly MPI Communication functions.\n", msg);
  53. break;
  54. }
  55. case MPI_THREAD_MULTIPLE:
  56. /* no problem */
  57. break;
  58. }
  59. }
  60. void _starpu_mpi_do_initialize(struct _starpu_mpi_argc_argv *argc_argv)
  61. {
  62. if (argc_argv->initialize_mpi)
  63. {
  64. STARPU_ASSERT_MSG(argc_argv->comm == MPI_COMM_WORLD, "It does not make sense to ask StarPU-MPI to initialize MPI while a non-world communicator was given");
  65. int thread_support;
  66. _STARPU_DEBUG("Calling MPI_Init_thread\n");
  67. if (MPI_Init_thread(argc_argv->argc, argc_argv->argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS)
  68. {
  69. _STARPU_ERROR("MPI_Init_thread failed\n");
  70. }
  71. _starpu_mpi_print_thread_level_support(thread_support, "_Init_thread level =");
  72. }
  73. else
  74. {
  75. int provided;
  76. MPI_Query_thread(&provided);
  77. _starpu_mpi_print_thread_level_support(provided, " has been initialized with");
  78. }
  79. MPI_Comm_rank(argc_argv->comm, &argc_argv->rank);
  80. MPI_Comm_size(argc_argv->comm, &argc_argv->world_size);
  81. MPI_Comm_set_errhandler(argc_argv->comm, MPI_ERRORS_RETURN);
  82. #ifdef STARPU_SIMGRID
  83. _mpi_world_size = argc_argv->world_size;
  84. _mpi_world_rank = argc_argv->rank;
  85. #endif
  86. #ifdef STARPU_USE_MPI_FT
  87. starpu_mpi_ft_turn_on();
  88. #endif // STARPU_USE_MPI_FT
  89. }
  90. static
  91. void _starpu_mpi_backend_check()
  92. {
  93. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_init != NULL);
  94. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_shutdown != NULL);
  95. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_reserve_core != NULL);
  96. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_request_init != NULL);
  97. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_request_fill != NULL);
  98. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_request_destroy != NULL);
  99. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_data_clear != NULL);
  100. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_data_register != NULL);
  101. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_comm_register != NULL);
  102. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_progress_init != NULL);
  103. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_progress_shutdown != NULL);
  104. #ifdef STARPU_SIMGRID
  105. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_wait_for_initialization != NULL);
  106. #endif
  107. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_barrier != NULL);
  108. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_wait_for_all != NULL);
  109. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_wait != NULL);
  110. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_test != NULL);
  111. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_isend_size_func != NULL);
  112. STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_irecv_size_func != NULL);
  113. }
  114. static
  115. int _starpu_mpi_initialize(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm)
  116. {
  117. struct _starpu_mpi_argc_argv *argc_argv;
  118. _STARPU_MALLOC(argc_argv, sizeof(struct _starpu_mpi_argc_argv));
  119. argc_argv->initialize_mpi = initialize_mpi;
  120. argc_argv->argc = argc;
  121. argc_argv->argv = argv;
  122. argc_argv->comm = comm;
  123. _starpu_implicit_data_deps_write_hook(_starpu_mpi_data_flush);
  124. _starpu_mpi_backend_check();
  125. #ifdef STARPU_SIMGRID
  126. /* Call MPI_Init_thread as early as possible, to initialize simgrid
  127. * before working with mutexes etc. */
  128. _starpu_mpi_do_initialize(argc_argv);
  129. #endif
  130. int ret = _mpi_backend._starpu_mpi_backend_progress_init(argc_argv);
  131. if (starpu_get_env_number_default("STARPU_DISPLAY_BINDINGS", 0))
  132. {
  133. int rank, size, i;
  134. char hostname[65];
  135. starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
  136. starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
  137. gethostname(hostname, sizeof(hostname));
  138. /* We make a barrier between each node calling hwloc-ps, to avoid mixing
  139. * outputs in stdout. */
  140. for (i = 0; i < size; i++)
  141. {
  142. starpu_mpi_barrier(MPI_COMM_WORLD);
  143. if (rank == i)
  144. {
  145. fprintf(stdout, "== Binding for rank %d on node %s ==\n", rank, hostname);
  146. starpu_display_bindings();
  147. fflush(stdout);
  148. }
  149. }
  150. starpu_mpi_barrier(MPI_COMM_WORLD);
  151. if (rank == 0)
  152. {
  153. fprintf(stdout, "== End of bindings ==\n");
  154. fflush(stdout);
  155. }
  156. }
  157. return ret;
  158. }
  159. #ifdef STARPU_SIMGRID
  160. /* This is called before application's main, to initialize SMPI before we can
  161. * create MSG processes to run application's main */
  162. int _starpu_mpi_simgrid_init(int argc, char *argv[])
  163. {
  164. return _starpu_mpi_initialize(&argc, &argv, 1, MPI_COMM_WORLD);
  165. }
  166. #endif
  167. int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm)
  168. {
  169. #ifdef STARPU_SIMGRID
  170. (void)argc;
  171. (void)argv;
  172. (void)initialize_mpi;
  173. (void)comm;
  174. _mpi_backend._starpu_mpi_backend_wait_for_initialization();
  175. return 0;
  176. #else
  177. return _starpu_mpi_initialize(argc, argv, initialize_mpi, comm);
  178. #endif
  179. }
  180. int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi)
  181. {
  182. return starpu_mpi_init_comm(argc, argv, initialize_mpi, MPI_COMM_WORLD);
  183. }
  184. int starpu_mpi_initialize(void)
  185. {
  186. #ifdef STARPU_SIMGRID
  187. return 0;
  188. #else
  189. return _starpu_mpi_initialize(NULL, NULL, 0, MPI_COMM_WORLD);
  190. #endif
  191. }
  192. int starpu_mpi_initialize_extended(int *rank, int *world_size)
  193. {
  194. #ifdef STARPU_SIMGRID
  195. *world_size = _mpi_world_size;
  196. *rank = _mpi_world_rank;
  197. return 0;
  198. #else
  199. int ret;
  200. ret = _starpu_mpi_initialize(NULL, NULL, 1, MPI_COMM_WORLD);
  201. if (ret == 0)
  202. {
  203. _STARPU_DEBUG("Calling MPI_Comm_rank\n");
  204. MPI_Comm_rank(MPI_COMM_WORLD, rank);
  205. MPI_Comm_size(MPI_COMM_WORLD, world_size);
  206. }
  207. return ret;
  208. #endif
  209. }
  210. int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm, struct starpu_conf *conf)
  211. {
  212. struct starpu_conf localconf;
  213. if (!conf)
  214. {
  215. starpu_conf_init(&localconf);
  216. conf = &localconf;
  217. }
  218. _mpi_backend._starpu_mpi_backend_init(conf);
  219. /* Reserve a core only if required by the backend and if STARPU_NCPU isn't provided */
  220. if (_mpi_backend._starpu_mpi_backend_reserve_core() && conf->ncpus == -1)
  221. {
  222. /* Reserve a core for our progression thread */
  223. if (conf->reserve_ncpus == -1)
  224. conf->reserve_ncpus = 1;
  225. else
  226. conf->reserve_ncpus++;
  227. }
  228. conf->will_use_mpi = 1;
  229. int ret = starpu_init(conf);
  230. if (ret < 0)
  231. return ret;
  232. _mpi_initialized_starpu = 1;
  233. return starpu_mpi_init_comm(argc, argv, initialize_mpi, comm);
  234. }
  235. int starpu_mpi_shutdown(void)
  236. {
  237. void *value;
  238. int rank, world_size;
  239. /* Make sure we do not have MPI communications pending in the task graph
  240. * before shutting down MPI */
  241. starpu_mpi_wait_for_all(MPI_COMM_WORLD);
  242. /* We need to get the rank before calling MPI_Finalize to pass to _starpu_mpi_comm_amounts_display() */
  243. starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
  244. starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size);
  245. /* kill the progression thread */
  246. _mpi_backend._starpu_mpi_backend_progress_shutdown(&value);
  247. #ifdef STARPU_USE_FXT
  248. if (starpu_fxt_is_enabled())
  249. {
  250. _STARPU_MPI_TRACE_STOP(rank, world_size);
  251. }
  252. #endif // STARPU_USE_FXT
  253. #ifdef STARPU_USE_MPI_FT
  254. starpu_mpi_ft_turn_off();
  255. #endif // STARPU_USE_MPI_FT
  256. _starpu_mpi_comm_amounts_display(stderr, rank);
  257. _starpu_mpi_comm_amounts_shutdown();
  258. _starpu_mpi_cache_shutdown(world_size);
  259. _mpi_backend._starpu_mpi_backend_shutdown();
  260. if (_mpi_initialized_starpu)
  261. starpu_shutdown();
  262. return 0;
  263. }
  264. int starpu_mpi_comm_size(MPI_Comm comm, int *size)
  265. {
  266. if (_starpu_mpi_fake_world_size != -1)
  267. {
  268. *size = _starpu_mpi_fake_world_size;
  269. return 0;
  270. }
  271. #ifdef STARPU_SIMGRID
  272. STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
  273. *size = _mpi_world_size;
  274. return 0;
  275. #else
  276. return MPI_Comm_size(comm, size);
  277. #endif
  278. }
  279. int starpu_mpi_comm_rank(MPI_Comm comm, int *rank)
  280. {
  281. if (_starpu_mpi_fake_world_rank != -1)
  282. {
  283. *rank = _starpu_mpi_fake_world_rank;
  284. return 0;
  285. }
  286. #ifdef STARPU_SIMGRID
  287. STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
  288. *rank = _mpi_world_rank;
  289. return 0;
  290. #else
  291. return MPI_Comm_rank(comm, rank);
  292. #endif
  293. }
  294. int starpu_mpi_world_size(void)
  295. {
  296. int size;
  297. starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
  298. return size;
  299. }
  300. int starpu_mpi_world_rank(void)
  301. {
  302. int rank;
  303. starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
  304. return rank;
  305. }
  306. int starpu_mpi_get_thread_cpuid(void)
  307. {
  308. return _starpu_mpi_thread_cpuid;
  309. }