simgrid.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2016,2017 Inria
  4. * Copyright (C) 2012,2013,2015-2019 CNRS
  5. * Copyright (C) 2012-2019 Université de Bordeaux
  6. * Copyright (C) 2013 Thibaut Lambert
  7. *
  8. * StarPU is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU Lesser General Public License as published by
  10. * the Free Software Foundation; either version 2.1 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * StarPU is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. *
  17. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. */
  19. #include <starpu.h>
  20. #include <datawizard/memory_nodes.h>
  21. #include <common/config.h>
  22. #ifdef HAVE_UNISTD_H
  23. #include <unistd.h>
  24. #endif
  25. #include <core/perfmodel/perfmodel.h>
  26. #include <core/workers.h>
  27. #include <core/simgrid.h>
  28. #if defined(HAVE_SIMGRID_SIMDAG_H) && (SIMGRID_VERSION >= 31300)
  29. #include <simgrid/simdag.h>
  30. #endif
  31. #ifdef STARPU_SIMGRID
  32. #ifdef HAVE_GETRLIMIT
  33. #include <sys/resource.h>
  34. #endif
  35. #include <simgrid/simix.h>
  36. #ifdef STARPU_HAVE_SIMGRID_HOST_H
  37. #include <simgrid/host.h>
  38. #endif
  39. #include <smpi/smpi.h>
  40. #pragma weak starpu_main
  41. extern int starpu_main(int argc, char *argv[]);
  42. #if SIMGRID_VERSION < 31600
  43. #pragma weak smpi_main
  44. extern int smpi_main(int (*realmain) (int argc, char *argv[]), int argc, char *argv[]);
  45. #endif
  46. #pragma weak _starpu_mpi_simgrid_init
  47. extern int _starpu_mpi_simgrid_init(int argc, char *argv[]);
  48. #pragma weak smpi_process_set_user_data
  49. #if !HAVE_DECL_SMPI_PROCESS_SET_USER_DATA && !defined(smpi_process_set_user_data)
  50. extern void smpi_process_set_user_data(void *);
  51. #endif
  52. /* 1 when MSG_init was done, 2 when initialized through redirected main, 3 when
  53. * initialized through MSG_process_attach */
  54. static int simgrid_started;
  55. static int simgrid_transfer_cost = 1;
  56. static int runners_running;
  57. starpu_pthread_queue_t _starpu_simgrid_transfer_queue[STARPU_MAXNODES];
  58. static struct transfer_runner
  59. {
  60. struct transfer *first_transfer, *last_transfer;
  61. starpu_sem_t sem;
  62. starpu_pthread_t runner;
  63. } transfer_runner[STARPU_MAXNODES][STARPU_MAXNODES];
  64. static void *transfer_execute(void *arg);
  65. starpu_pthread_queue_t _starpu_simgrid_task_queue[STARPU_NMAXWORKERS];
  66. static struct worker_runner
  67. {
  68. struct task *first_task, *last_task;
  69. starpu_sem_t sem;
  70. starpu_pthread_t runner;
  71. } worker_runner[STARPU_NMAXWORKERS];
  72. static void *task_execute(void *arg);
  73. #if defined(HAVE_SG_ZONE_GET_BY_NAME) || defined(sg_zone_get_by_name)
  74. #define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME
  75. sg_netzone_t _starpu_simgrid_get_as_by_name(const char *name)
  76. {
  77. return sg_zone_get_by_name(name);
  78. }
  79. #elif defined(HAVE_MSG_ZONE_GET_BY_NAME) || defined(MSG_zone_get_by_name)
  80. #define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME
  81. msg_as_t _starpu_simgrid_get_as_by_name(const char *name)
  82. {
  83. return MSG_zone_get_by_name(name);
  84. }
  85. #elif defined(HAVE_MSG_GET_AS_BY_NAME) || defined(MSG_get_as_by_name)
  86. #define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME
  87. msg_as_t _starpu_simgrid_get_as_by_name(const char *name)
  88. {
  89. return MSG_get_as_by_name(name);
  90. }
  91. #elif defined(HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT) || defined(MSG_environment_as_get_routing_sons)
  92. #define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME
  93. static msg_as_t __starpu_simgrid_get_as_by_name(msg_as_t root, const char *name)
  94. {
  95. xbt_dict_t dict;
  96. xbt_dict_cursor_t cursor;
  97. const char *key;
  98. msg_as_t as, ret;
  99. dict = MSG_environment_as_get_routing_sons(root);
  100. xbt_dict_foreach(dict, cursor, key, as)
  101. {
  102. if (!strcmp(MSG_environment_as_get_name(as), name))
  103. return as;
  104. ret = __starpu_simgrid_get_as_by_name(as, name);
  105. if (ret)
  106. return ret;
  107. }
  108. return NULL;
  109. }
  110. msg_as_t _starpu_simgrid_get_as_by_name(const char *name)
  111. {
  112. return __starpu_simgrid_get_as_by_name(MSG_environment_get_routing_root(), name);
  113. }
  114. #endif /* HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT */
  115. int _starpu_simgrid_get_nbhosts(const char *prefix)
  116. {
  117. int ret;
  118. xbt_dynar_t hosts;
  119. unsigned i, nb;
  120. unsigned len = strlen(prefix);
  121. if (_starpu_simgrid_running_smpi())
  122. {
  123. #ifdef HAVE_STARPU_SIMGRID_GET_AS_BY_NAME
  124. char new_prefix[32];
  125. char name[32];
  126. STARPU_ASSERT(starpu_mpi_world_rank);
  127. snprintf(name, sizeof(name), STARPU_MPI_AS_PREFIX"%d", starpu_mpi_world_rank());
  128. #if defined(HAVE_MSG_ZONE_GET_HOSTS) || defined(MSG_zone_get_hosts)
  129. hosts = xbt_dynar_new(sizeof(sg_host_t), NULL);
  130. # if defined(HAVE_SG_ZONE_GET_BY_NAME) || defined(sg_zone_get_by_name)
  131. sg_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts);
  132. # else
  133. MSG_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts);
  134. # endif
  135. #else
  136. hosts = MSG_environment_as_get_hosts(_starpu_simgrid_get_as_by_name(name));
  137. #endif
  138. snprintf(new_prefix, sizeof(new_prefix), "%s-%s", name, prefix);
  139. prefix = new_prefix;
  140. len = strlen(prefix);
  141. #else
  142. STARPU_ABORT_MSG("can not continue without an implementation for _starpu_simgrid_get_as_by_name");
  143. #endif /* HAVE_STARPU_SIMGRID_GET_AS_BY_NAME */
  144. }
  145. else
  146. #ifdef STARPU_HAVE_SIMGRID_HOST_H
  147. hosts = sg_hosts_as_dynar();
  148. #else
  149. hosts = MSG_hosts_as_dynar();
  150. #endif
  151. nb = xbt_dynar_length(hosts);
  152. ret = 0;
  153. for (i = 0; i < nb; i++)
  154. {
  155. const char *name;
  156. #ifdef STARPU_HAVE_SIMGRID_HOST_H
  157. name = sg_host_get_name(xbt_dynar_get_as(hosts, i, sg_host_t));
  158. #else
  159. name = MSG_host_get_name(xbt_dynar_get_as(hosts, i, msg_host_t));
  160. #endif
  161. if (!strncmp(name, prefix, len))
  162. ret++;
  163. }
  164. xbt_dynar_free(&hosts);
  165. return ret;
  166. }
  167. unsigned long long _starpu_simgrid_get_memsize(const char *prefix, unsigned devid)
  168. {
  169. char name[32];
  170. starpu_sg_host_t host;
  171. const char *memsize;
  172. snprintf(name, sizeof(name), "%s%u", prefix, devid);
  173. host = _starpu_simgrid_get_host_by_name(name);
  174. if (!host)
  175. return 0;
  176. #ifdef HAVE_SG_HOST_GET_PROPERTIES
  177. if (!sg_host_get_properties(host))
  178. #else
  179. if (!MSG_host_get_properties(host))
  180. #endif
  181. return 0;
  182. #ifdef HAVE_SG_HOST_GET_PROPERTIES
  183. memsize = sg_host_get_property_value(host, "memsize");
  184. #else
  185. memsize = MSG_host_get_property_value(host, "memsize");
  186. #endif
  187. if (!memsize)
  188. return 0;
  189. return atoll(memsize);
  190. }
  191. starpu_sg_host_t _starpu_simgrid_get_host_by_name(const char *name)
  192. {
  193. if (_starpu_simgrid_running_smpi())
  194. {
  195. char mpiname[32];
  196. STARPU_ASSERT(starpu_mpi_world_rank);
  197. snprintf(mpiname, sizeof(mpiname), STARPU_MPI_AS_PREFIX"%d-%s", starpu_mpi_world_rank(), name);
  198. #ifdef STARPU_HAVE_SIMGRID_HOST_H
  199. return sg_host_by_name(mpiname);
  200. #else
  201. return MSG_get_host_by_name(mpiname);
  202. #endif
  203. }
  204. else
  205. #ifdef STARPU_HAVE_SIMGRID_HOST_H
  206. return sg_host_by_name(name);
  207. #else
  208. return MSG_get_host_by_name(name);
  209. #endif
  210. }
  211. starpu_sg_host_t _starpu_simgrid_get_host_by_worker(struct _starpu_worker *worker)
  212. {
  213. char *prefix;
  214. char name[16];
  215. starpu_sg_host_t host;
  216. switch (worker->arch)
  217. {
  218. case STARPU_CPU_WORKER:
  219. prefix = "CPU";
  220. break;
  221. case STARPU_CUDA_WORKER:
  222. prefix = "CUDA";
  223. break;
  224. case STARPU_OPENCL_WORKER:
  225. prefix = "OpenCL";
  226. break;
  227. default:
  228. STARPU_ASSERT(0);
  229. }
  230. snprintf(name, sizeof(name), "%s%u", prefix, worker->devid);
  231. host = _starpu_simgrid_get_host_by_name(name);
  232. STARPU_ASSERT_MSG(host, "Could not find host %s!", name);
  233. return host;
  234. }
  235. /* Simgrid up to 3.15 would rename main into smpi_simulated_main_, and call that
  236. * from SMPI initialization
  237. * In case the MPI application didn't use smpicc to build the file containing
  238. * main(), but included our #define main starpu_main, try to cope by calling
  239. * starpu_main */
  240. int _starpu_smpi_simulated_main_(int argc, char *argv[])
  241. {
  242. if (!starpu_main)
  243. {
  244. _STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h or starpu_simgrid_wrap.h included, to properly rename it into starpu_main\n");
  245. }
  246. return starpu_main(argc, argv);
  247. }
  248. int smpi_simulated_main_(int argc, char *argv[]) __attribute__((weak, alias("_starpu_smpi_simulated_main_")));
  249. /* This is used to start a non-MPI simgrid environment */
  250. void _starpu_start_simgrid(int *argc, char **argv)
  251. {
  252. char path[256];
  253. if (simgrid_started)
  254. return;
  255. simgrid_started = 1;
  256. MSG_init(argc, argv);
  257. /* Simgrid uses tiny stacks by default. This comes unexpected to our users. */
  258. unsigned stack_size = 8192;
  259. #ifdef HAVE_GETRLIMIT
  260. struct rlimit rlim;
  261. if (getrlimit(RLIMIT_STACK, &rlim) == 0 && rlim.rlim_cur != 0 && rlim.rlim_cur != RLIM_INFINITY)
  262. stack_size = rlim.rlim_cur / 1024;
  263. #endif
  264. #ifdef HAVE_SG_CFG_SET_INT
  265. sg_cfg_set_int("contexts/stack-size", stack_size);
  266. #elif SIMGRID_VERSION < 31300
  267. extern xbt_cfg_t _sg_cfg_set;
  268. xbt_cfg_set_int(_sg_cfg_set, "contexts/stack_size", stack_size);
  269. #else
  270. xbt_cfg_set_int("contexts/stack-size", stack_size);
  271. #endif
  272. /* Load XML platform */
  273. #if SIMGRID_VERSION < 31300
  274. _starpu_simgrid_get_platform_path(3, path, sizeof(path));
  275. #else
  276. _starpu_simgrid_get_platform_path(4, path, sizeof(path));
  277. #endif
  278. MSG_create_environment(path);
  279. simgrid_transfer_cost = starpu_get_env_number_default("STARPU_SIMGRID_TRANSFER_COST", 1);
  280. }
  281. static int main_ret;
  282. int do_starpu_main(int argc, char *argv[])
  283. {
  284. /* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
  285. starpu_sleep(0.000001);
  286. main_ret = starpu_main(argc, argv);
  287. return main_ret;
  288. }
  289. /* We need it only when using smpi */
  290. #pragma weak smpi_process_get_user_data
  291. extern void *smpi_process_get_user_data();
  292. /* This is hopefully called before the application and simgrid */
  293. #undef main
  294. #pragma weak main
  295. int main(int argc, char **argv)
  296. {
  297. #ifdef HAVE_SG_CONFIG_CONTINUE_AFTER_HELP
  298. sg_config_continue_after_help();
  299. #endif
  300. if (_starpu_simgrid_running_smpi())
  301. {
  302. if (!smpi_process_get_user_data)
  303. {
  304. _STARPU_ERROR("Your version of simgrid does not provide smpi_process_get_user_data, we can not continue without it\n");
  305. }
  306. #if SIMGRID_VERSION >= 31600
  307. /* Recent versions of simgrid dlopen() us, so we don't need to
  308. * do circumvolutions, just init MPI early and run the application's main */
  309. return _starpu_mpi_simgrid_init(argc, argv);
  310. #else
  311. /* Oops, we are running old SMPI, let it start Simgrid, and we'll
  312. * take back hand in _starpu_simgrid_init from starpu_init() */
  313. return smpi_main(_starpu_mpi_simgrid_init, argc, argv);
  314. #endif
  315. }
  316. /* Already initialized? It probably has been done through a
  317. * constructor and MSG_process_attach, directly jump to real main */
  318. if (simgrid_started == 3)
  319. {
  320. return do_starpu_main(argc, argv);
  321. }
  322. /* Managed to catch application's main, initialize simgrid first */
  323. _starpu_start_simgrid(&argc, argv);
  324. simgrid_started = 2;
  325. /* Create a simgrid process for main */
  326. char **argv_cpy;
  327. _STARPU_MALLOC(argv_cpy, argc * sizeof(char*));
  328. int i;
  329. for (i = 0; i < argc; i++)
  330. argv_cpy[i] = strdup(argv[i]);
  331. /* Run the application in a separate thread */
  332. _starpu_simgrid_actor_create("main", &do_starpu_main, _starpu_simgrid_get_host_by_name("MAIN"), argc, argv_cpy);
  333. /* And run maestro in the main thread */
  334. MSG_main();
  335. return main_ret;
  336. }
  337. #if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach)
  338. static void maestro(void *data STARPU_ATTRIBUTE_UNUSED)
  339. {
  340. MSG_main();
  341. }
  342. #endif
  343. /* This is called early from starpu_init, so thread functions etc. can work */
  344. void _starpu_simgrid_init_early(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv STARPU_ATTRIBUTE_UNUSED)
  345. {
  346. #ifdef HAVE_SG_CONFIG_CONTINUE_AFTER_HELP
  347. sg_config_continue_after_help();
  348. #endif
  349. #if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach) || defined(HAVE_SG_ACTOR_ATTACH)
  350. if (simgrid_started < 2 && !_starpu_simgrid_running_smpi())
  351. {
  352. /* "Cannot create_maestro with this ContextFactory.
  353. * Try using --cfg=contexts/factory:thread instead."
  354. * See https://github.com/simgrid/simgrid/issues/141 */
  355. _STARPU_DISP("Warning: In simgrid mode, the file containing the main() function of this application should to be compiled with starpu.h or starpu_simgrid_wrap.h included, to properly rename it into starpu_main to avoid having to use --cfg=contexts/factory:thread which reduces performance\n");
  356. #if SIMGRID_VERSION >= 31400 /* Only recent versions of simgrid support setting sg_cfg_set_string before starting simgrid */
  357. # ifdef HAVE_SG_CFG_SET_INT
  358. sg_cfg_set_string("contexts/factory", "thread");
  359. # else
  360. xbt_cfg_set_string("contexts/factory", "thread");
  361. # endif
  362. #endif
  363. /* We didn't catch application's main. */
  364. /* Start maestro as a separate thread */
  365. SIMIX_set_maestro(maestro, NULL);
  366. /* Initialize simgrid */
  367. _starpu_start_simgrid(argc, *argv);
  368. /* And attach the main thread to the main simgrid process */
  369. void **tsd;
  370. _STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
  371. #if defined(HAVE_SG_ACTOR_ATTACH) && defined (HAVE_SG_ACTOR_DATA)
  372. sg_actor_t actor = sg_actor_attach("main", NULL, _starpu_simgrid_get_host_by_name("MAIN"), NULL);
  373. sg_actor_data_set(actor, tsd);
  374. #else
  375. MSG_process_attach("main", tsd, _starpu_simgrid_get_host_by_name("MAIN"), NULL);
  376. #endif
  377. /* We initialized through MSG_process_attach */
  378. simgrid_started = 3;
  379. }
  380. #endif
  381. if (!simgrid_started && !starpu_main && !_starpu_simgrid_running_smpi())
  382. {
  383. /* Oops, we don't have MSG_process_attach and didn't catch the
  384. * 'main' symbol, there is no way for us */
  385. _STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h or starpu_simgrid_wrap.h included, to properly rename it into starpu_main\n");
  386. }
  387. if (_starpu_simgrid_running_smpi())
  388. {
  389. #ifndef STARPU_STATIC_ONLY
  390. _STARPU_ERROR("Simgrid currently does not support privatization for dynamically-linked libraries in SMPI. Please reconfigure and build StarPU with --disable-shared");
  391. #endif
  392. #if defined(HAVE_MSG_PROCESS_USERDATA_INIT) && !defined(HAVE_SG_ACTOR_DATA)
  393. MSG_process_userdata_init();
  394. #endif
  395. void **tsd;
  396. _STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
  397. #ifdef HAVE_SG_ACTOR_DATA
  398. sg_actor_data_set(sg_actor_self(), tsd);
  399. #else
  400. smpi_process_set_user_data(tsd);
  401. #endif
  402. }
  403. unsigned i;
  404. for (i = 0; i < STARPU_MAXNODES; i++)
  405. starpu_pthread_queue_init(&_starpu_simgrid_transfer_queue[i]);
  406. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  407. starpu_pthread_queue_init(&_starpu_simgrid_task_queue[i]);
  408. }
  409. /* This is called late from starpu_init, to start task executors */
  410. void _starpu_simgrid_init(void)
  411. {
  412. unsigned i;
  413. runners_running = 1;
  414. for (i = 0; i < starpu_worker_get_count(); i++)
  415. {
  416. char s[32];
  417. snprintf(s, sizeof(s), "worker %u runner", i);
  418. starpu_sem_init(&worker_runner[i].sem, 0, 0);
  419. starpu_pthread_create_on(s, &worker_runner[i].runner, NULL, task_execute, (void*)(uintptr_t) i, _starpu_simgrid_get_host_by_worker(_starpu_get_worker_struct(i)));
  420. }
  421. }
  422. void _starpu_simgrid_deinit_late(void)
  423. {
  424. #if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach) || defined(HAVE_SG_ACTOR_ATTACH)
  425. if (simgrid_started == 3)
  426. {
  427. /* Started with MSG_process_attach, now detach */
  428. #ifdef HAVE_SG_ACTOR_ATTACH
  429. sg_actor_detach();
  430. #else
  431. MSG_process_detach();
  432. #endif
  433. simgrid_started = 0;
  434. }
  435. #endif
  436. }
  437. void _starpu_simgrid_deinit(void)
  438. {
  439. unsigned i, j;
  440. runners_running = 0;
  441. for (i = 0; i < STARPU_MAXNODES; i++)
  442. {
  443. for (j = 0; j < STARPU_MAXNODES; j++)
  444. {
  445. struct transfer_runner *t = &transfer_runner[i][j];
  446. if (t->runner)
  447. {
  448. starpu_sem_post(&t->sem);
  449. #ifdef STARPU_HAVE_SIMGRID_ACTOR_H
  450. sg_actor_join(t->runner, 1000000);
  451. #elif SIMGRID_VERSION >= 31400
  452. MSG_process_join(t->runner, 1000000);
  453. #else
  454. starpu_sleep(1);
  455. #endif
  456. STARPU_ASSERT(t->first_transfer == NULL);
  457. STARPU_ASSERT(t->last_transfer == NULL);
  458. starpu_sem_destroy(&t->sem);
  459. }
  460. }
  461. /* FIXME: queue not empty at this point, needs proper unregistration */
  462. /* starpu_pthread_queue_destroy(&_starpu_simgrid_transfer_queue[i]); */
  463. }
  464. for (i = 0; i < starpu_worker_get_count(); i++)
  465. {
  466. struct worker_runner *w = &worker_runner[i];
  467. starpu_sem_post(&w->sem);
  468. #ifdef STARPU_HAVE_SIMGRID_ACTOR_H
  469. sg_actor_join(w->runner, 1000000);
  470. #elif SIMGRID_VERSION >= 31400
  471. MSG_process_join(w->runner, 1000000);
  472. #else
  473. starpu_sleep(1);
  474. #endif
  475. STARPU_ASSERT(w->first_task == NULL);
  476. STARPU_ASSERT(w->last_task == NULL);
  477. starpu_sem_destroy(&w->sem);
  478. starpu_pthread_queue_destroy(&_starpu_simgrid_task_queue[i]);
  479. }
  480. #if SIMGRID_VERSION >= 31300
  481. /* clean-atexit introduced in simgrid 3.13 */
  482. # ifdef HAVE_SG_CFG_SET_INT
  483. if ( sg_cfg_get_boolean("debug/clean-atexit"))
  484. # elif SIMGRID_VERSION >= 32300
  485. if ( xbt_cfg_get_boolean("debug/clean-atexit"))
  486. # else
  487. if ( xbt_cfg_get_boolean("clean-atexit"))
  488. # endif
  489. {
  490. _starpu_simgrid_deinit_late();
  491. }
  492. #endif
  493. }
  494. /*
  495. * Tasks
  496. */
  497. struct task
  498. {
  499. #ifdef HAVE_SG_ACTOR_SELF_EXECUTE
  500. double flops;
  501. #else
  502. msg_task_t task;
  503. #endif
  504. /* communication termination signalization */
  505. unsigned *finished;
  506. /* Next task on this worker */
  507. struct task *next;
  508. };
  509. /* Actually execute the task. */
  510. static void *task_execute(void *arg)
  511. {
  512. unsigned workerid = (uintptr_t) arg;
  513. struct worker_runner *w = &worker_runner[workerid];
  514. _STARPU_DEBUG("worker runner %u started\n", workerid);
  515. while (1)
  516. {
  517. struct task *task;
  518. starpu_sem_wait(&w->sem);
  519. if (!runners_running)
  520. break;
  521. task = w->first_task;
  522. w->first_task = task->next;
  523. if (w->last_task == task)
  524. w->last_task = NULL;
  525. _STARPU_DEBUG("task %p started\n", task);
  526. #ifdef HAVE_SG_ACTOR_SELF_EXECUTE
  527. sg_actor_self_execute(task->flops);
  528. #else
  529. MSG_task_execute(task->task);
  530. MSG_task_destroy(task->task);
  531. #endif
  532. _STARPU_DEBUG("task %p finished\n", task);
  533. *task->finished = 1;
  534. /* The worker which started this task may be sleeping out of tasks, wake it */
  535. _starpu_wake_worker_relax(workerid);
  536. free(task);
  537. }
  538. _STARPU_DEBUG("worker %u stopped\n", workerid);
  539. return 0;
  540. }
  541. /* Wait for completion of all asynchronous tasks for this worker */
  542. void _starpu_simgrid_wait_tasks(int workerid)
  543. {
  544. struct task *task = worker_runner[workerid].last_task;
  545. if (!task)
  546. return;
  547. unsigned *finished = task->finished;
  548. starpu_pthread_wait_t wait;
  549. starpu_pthread_wait_init(&wait);
  550. starpu_pthread_queue_register(&wait, &_starpu_simgrid_task_queue[workerid]);
  551. while(1)
  552. {
  553. starpu_pthread_wait_reset(&wait);
  554. if (*finished)
  555. break;
  556. starpu_pthread_wait_wait(&wait);
  557. }
  558. starpu_pthread_queue_unregister(&wait, &_starpu_simgrid_task_queue[workerid]);
  559. starpu_pthread_wait_destroy(&wait);
  560. }
  561. /* Task execution submitted by StarPU */
  562. void _starpu_simgrid_submit_job(int workerid, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch, double length, unsigned *finished)
  563. {
  564. struct starpu_task *starpu_task = j->task;
  565. double flops;
  566. #ifndef HAVE_SG_ACTOR_SELF_EXECUTE
  567. msg_task_t simgrid_task;
  568. #endif
  569. if (j->internal)
  570. /* This is not useful to include in simulation (and probably
  571. * doesn't have a perfmodel anyway) */
  572. return;
  573. if (isnan(length))
  574. {
  575. length = starpu_task_expected_length(starpu_task, perf_arch, j->nimpl);
  576. STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length),
  577. "Codelet %s does not have a perfmodel, or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated",
  578. _starpu_job_get_model_name(j));
  579. /* TODO: option to add variance according to performance model,
  580. * to be able to easily check scheduling robustness */
  581. }
  582. #if defined(HAVE_SG_HOST_SPEED) || defined(sg_host_speed)
  583. # if defined(HAVE_SG_HOST_SELF) || defined(sg_host_self)
  584. flops = length/1000000.0*sg_host_speed(sg_host_self());
  585. # else
  586. flops = length/1000000.0*sg_host_speed(MSG_host_self());
  587. # endif
  588. #elif defined HAVE_MSG_HOST_GET_SPEED || defined(MSG_host_get_speed)
  589. flops = length/1000000.0*MSG_host_get_speed(MSG_host_self());
  590. #else
  591. flops = length/1000000.0*MSG_get_host_speed(MSG_host_self());
  592. #endif
  593. #ifndef HAVE_SG_ACTOR_SELF_EXECUTE
  594. simgrid_task = MSG_task_create(_starpu_job_get_task_name(j), flops, 0, NULL);
  595. #endif
  596. if (finished == NULL)
  597. {
  598. /* Synchronous execution */
  599. /* First wait for previous tasks */
  600. _starpu_simgrid_wait_tasks(workerid);
  601. #ifdef HAVE_SG_ACTOR_SELF_EXECUTE
  602. sg_actor_self_execute(flops);
  603. #else
  604. MSG_task_execute(simgrid_task);
  605. MSG_task_destroy(simgrid_task);
  606. #endif
  607. }
  608. else
  609. {
  610. /* Asynchronous execution */
  611. struct task *task;
  612. struct worker_runner *w = &worker_runner[workerid];
  613. _STARPU_MALLOC(task, sizeof(*task));
  614. #ifdef HAVE_SG_ACTOR_SELF_EXECUTE
  615. task->flops = flops;
  616. #else
  617. task->task = simgrid_task;
  618. #endif
  619. task->finished = finished;
  620. *finished = 0;
  621. task->next = NULL;
  622. /* Sleep 10µs for the GPU task queueing */
  623. if (_starpu_simgrid_queue_malloc_cost())
  624. starpu_sleep(0.000010);
  625. if (w->last_task)
  626. {
  627. /* Already running a task, queue */
  628. w->last_task->next = task;
  629. w->last_task = task;
  630. }
  631. else
  632. {
  633. STARPU_ASSERT(!w->first_task);
  634. w->first_task = task;
  635. w->last_task = task;
  636. }
  637. starpu_sem_post(&w->sem);
  638. }
  639. }
  640. /*
  641. * Transfers
  642. */
  643. /* Note: simgrid is not parallel, so there is no need to hold locks for management of transfers. */
  644. LIST_TYPE(transfer,
  645. #ifdef HAVE_SG_HOST_SEND_TO
  646. size_t size;
  647. #else
  648. msg_task_t task;
  649. #endif
  650. int src_node;
  651. int dst_node;
  652. int run_node;
  653. /* communication termination signalization */
  654. unsigned *finished;
  655. /* transfers which wait for this transfer */
  656. struct transfer **wake;
  657. unsigned nwake;
  658. /* Number of transfers that this transfer waits for */
  659. unsigned nwait;
  660. /* Next transfer on this stream */
  661. struct transfer *next;
  662. )
  663. struct transfer_list pending;
  664. /* Tell for two transfers whether they should be handled in sequence */
  665. static int transfers_are_sequential(struct transfer *new_transfer, struct transfer *old_transfer)
  666. {
  667. int new_is_cuda STARPU_ATTRIBUTE_UNUSED, old_is_cuda STARPU_ATTRIBUTE_UNUSED;
  668. int new_is_opencl STARPU_ATTRIBUTE_UNUSED, old_is_opencl STARPU_ATTRIBUTE_UNUSED;
  669. int new_is_gpu_gpu, old_is_gpu_gpu;
  670. new_is_cuda = starpu_node_get_kind(new_transfer->src_node) == STARPU_CUDA_RAM;
  671. new_is_cuda |= starpu_node_get_kind(new_transfer->dst_node) == STARPU_CUDA_RAM;
  672. old_is_cuda = starpu_node_get_kind(old_transfer->src_node) == STARPU_CUDA_RAM;
  673. old_is_cuda |= starpu_node_get_kind(old_transfer->dst_node) == STARPU_CUDA_RAM;
  674. new_is_opencl = starpu_node_get_kind(new_transfer->src_node) == STARPU_OPENCL_RAM;
  675. new_is_opencl |= starpu_node_get_kind(new_transfer->dst_node) == STARPU_OPENCL_RAM;
  676. old_is_opencl = starpu_node_get_kind(old_transfer->src_node) == STARPU_OPENCL_RAM;
  677. old_is_opencl |= starpu_node_get_kind(old_transfer->dst_node) == STARPU_OPENCL_RAM;
  678. new_is_gpu_gpu = new_transfer->src_node && new_transfer->dst_node;
  679. old_is_gpu_gpu = old_transfer->src_node && old_transfer->dst_node;
  680. /* We ignore cuda-opencl transfers, they can not happen */
  681. STARPU_ASSERT(!((new_is_cuda && old_is_opencl) || (old_is_cuda && new_is_opencl)));
  682. /* The following constraints have been observed with CUDA alone */
  683. /* Same source/destination, sequential */
  684. if (new_transfer->src_node == old_transfer->src_node && new_transfer->dst_node == old_transfer->dst_node)
  685. return 1;
  686. /* Crossed GPU-GPU, sequential */
  687. if (new_is_gpu_gpu
  688. && new_transfer->src_node == old_transfer->dst_node
  689. && old_transfer->src_node == new_transfer->dst_node)
  690. return 1;
  691. /* GPU-GPU transfers are sequential with any RAM->GPU transfer */
  692. if (new_is_gpu_gpu
  693. && (old_transfer->dst_node == new_transfer->src_node
  694. || old_transfer->dst_node == new_transfer->dst_node))
  695. return 1;
  696. if (old_is_gpu_gpu
  697. && (new_transfer->dst_node == old_transfer->src_node
  698. || new_transfer->dst_node == old_transfer->dst_node))
  699. return 1;
  700. /* StarPU's constraint on CUDA transfers is using one stream per
  701. * source/destination pair, which is already handled above */
  702. return 0;
  703. }
  704. static void transfer_queue(struct transfer *transfer)
  705. {
  706. unsigned src = transfer->src_node;
  707. unsigned dst = transfer->dst_node;
  708. struct transfer_runner *t = &transfer_runner[src][dst];
  709. if (!t->runner)
  710. {
  711. /* No runner yet, start it */
  712. static starpu_pthread_mutex_t mutex; /* process_create may yield */
  713. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  714. if (!t->runner)
  715. {
  716. char s[64];
  717. snprintf(s, sizeof(s), "transfer %u-%u runner", src, dst);
  718. starpu_pthread_create_on(s, &t->runner, NULL, transfer_execute, (void*)(uintptr_t)((src<<16) + dst), _starpu_simgrid_get_memnode_host(src));
  719. starpu_sem_init(&t->sem, 0, 0);
  720. }
  721. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  722. }
  723. if (t->last_transfer)
  724. {
  725. /* Already running a transfer, queue */
  726. t->last_transfer->next = transfer;
  727. t->last_transfer = transfer;
  728. }
  729. else
  730. {
  731. STARPU_ASSERT(!t->first_transfer);
  732. t->first_transfer = transfer;
  733. t->last_transfer = transfer;
  734. }
  735. starpu_sem_post(&t->sem);
  736. }
  737. /* Actually execute the transfer, and then start transfers waiting for this one. */
  738. static void *transfer_execute(void *arg)
  739. {
  740. unsigned src_dst = (uintptr_t) arg;
  741. unsigned src = src_dst >> 16;
  742. unsigned dst = src_dst & 0xffff;
  743. struct transfer_runner *t = &transfer_runner[src][dst];
  744. _STARPU_DEBUG("transfer runner %u-%u started\n", src, dst);
  745. while (1)
  746. {
  747. struct transfer *transfer;
  748. starpu_sem_wait(&t->sem);
  749. if (!runners_running)
  750. break;
  751. transfer = t->first_transfer;
  752. t->first_transfer = transfer->next;
  753. if (t->last_transfer == transfer)
  754. t->last_transfer = NULL;
  755. #ifdef HAVE_SG_HOST_SEND_TO
  756. if (transfer->size)
  757. #else
  758. if (transfer->task)
  759. #endif
  760. {
  761. _STARPU_DEBUG("transfer %p started\n", transfer);
  762. #ifdef HAVE_SG_HOST_SEND_TO
  763. sg_host_send_to(_starpu_simgrid_memory_node_get_host(transfer->src_node),
  764. _starpu_simgrid_memory_node_get_host(transfer->dst_node),
  765. transfer->size);
  766. #else
  767. MSG_task_execute(transfer->task);
  768. MSG_task_destroy(transfer->task);
  769. #endif
  770. _STARPU_DEBUG("transfer %p finished\n", transfer);
  771. }
  772. *transfer->finished = 1;
  773. transfer_list_erase(&pending, transfer);
  774. /* The workers which started this request may be sleeping out of tasks, wake it */
  775. _starpu_wake_all_blocked_workers_on_node(transfer->run_node);
  776. unsigned i;
  777. /* Wake transfers waiting for my termination */
  778. /* Note: due to possible preemption inside process_create, the array
  779. * may grow while doing this */
  780. for (i = 0; i < transfer->nwake; i++)
  781. {
  782. struct transfer *wake = transfer->wake[i];
  783. STARPU_ASSERT(wake->nwait > 0);
  784. wake->nwait--;
  785. if (!wake->nwait)
  786. {
  787. _STARPU_DEBUG("triggering transfer %p\n", wake);
  788. transfer_queue(wake);
  789. }
  790. }
  791. free(transfer->wake);
  792. free(transfer);
  793. }
  794. return 0;
  795. }
  796. /* Look for sequentialization between this transfer and pending transfers, and submit this one */
  797. static void transfer_submit(struct transfer *transfer)
  798. {
  799. struct transfer *old;
  800. for (old = transfer_list_begin(&pending);
  801. old != transfer_list_end(&pending);
  802. old = transfer_list_next(old))
  803. {
  804. if (transfers_are_sequential(transfer, old))
  805. {
  806. _STARPU_DEBUG("transfer %p(%d->%d) waits for %p(%d->%d)\n",
  807. transfer, transfer->src_node, transfer->dst_node,
  808. old, old->src_node, old->dst_node);
  809. /* Make new wait for the old */
  810. transfer->nwait++;
  811. /* Make old wake the new */
  812. _STARPU_REALLOC(old->wake, (old->nwake + 1) * sizeof(old->wake));
  813. old->wake[old->nwake] = transfer;
  814. old->nwake++;
  815. }
  816. }
  817. transfer_list_push_front(&pending, transfer);
  818. if (!transfer->nwait)
  819. {
  820. _STARPU_DEBUG("transfer %p waits for nobody, starting\n", transfer);
  821. transfer_queue(transfer);
  822. }
  823. }
  824. int _starpu_simgrid_wait_transfer_event(union _starpu_async_channel_event *event)
  825. {
  826. /* this is not associated to a request so it's synchronous */
  827. starpu_pthread_wait_t wait;
  828. starpu_pthread_wait_init(&wait);
  829. starpu_pthread_queue_register(&wait, event->queue);
  830. while(1)
  831. {
  832. starpu_pthread_wait_reset(&wait);
  833. if (event->finished)
  834. break;
  835. starpu_pthread_wait_wait(&wait);
  836. }
  837. starpu_pthread_queue_unregister(&wait, event->queue);
  838. starpu_pthread_wait_destroy(&wait);
  839. return 0;
  840. }
  841. int _starpu_simgrid_test_transfer_event(union _starpu_async_channel_event *event)
  842. {
  843. return event->finished;
  844. }
  845. /* Wait for completion of all transfers */
  846. static void _starpu_simgrid_wait_transfers(void)
  847. {
  848. unsigned finished = 0;
  849. struct transfer *sync = transfer_new();
  850. struct transfer *cur;
  851. #ifdef HAVE_SG_HOST_SEND_TO
  852. sync->size = 0;
  853. #else
  854. sync->task = NULL;
  855. #endif
  856. sync->finished = &finished;
  857. sync->src_node = STARPU_MAIN_RAM;
  858. sync->dst_node = STARPU_MAIN_RAM;
  859. sync->run_node = STARPU_MAIN_RAM;
  860. sync->wake = NULL;
  861. sync->nwake = 0;
  862. sync->nwait = 0;
  863. sync->next = NULL;
  864. for (cur = transfer_list_begin(&pending);
  865. cur != transfer_list_end(&pending);
  866. cur = transfer_list_next(cur))
  867. {
  868. sync->nwait++;
  869. _STARPU_REALLOC(cur->wake, (cur->nwake + 1) * sizeof(cur->wake));
  870. cur->wake[cur->nwake] = sync;
  871. cur->nwake++;
  872. }
  873. if (sync->nwait == 0)
  874. {
  875. /* No transfer to wait for */
  876. free(sync);
  877. return;
  878. }
  879. /* Push synchronization pseudo-transfer */
  880. transfer_list_push_front(&pending, sync);
  881. /* And wait for it */
  882. starpu_pthread_wait_t wait;
  883. starpu_pthread_wait_init(&wait);
  884. starpu_pthread_queue_register(&wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]);
  885. while(1)
  886. {
  887. starpu_pthread_wait_reset(&wait);
  888. if (finished)
  889. break;
  890. starpu_pthread_wait_wait(&wait);
  891. }
  892. starpu_pthread_queue_unregister(&wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]);
  893. starpu_pthread_wait_destroy(&wait);
  894. }
  895. /* Data transfer issued by StarPU */
  896. int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node, struct _starpu_data_request *req)
  897. {
  898. /* Simgrid does not like 0-bytes transfers */
  899. if (!size)
  900. return 0;
  901. /* Explicitly disabled by user? */
  902. if (!simgrid_transfer_cost)
  903. return 0;
  904. union _starpu_async_channel_event *event, myevent;
  905. double start = 0.;
  906. struct transfer *transfer = transfer_new();
  907. _STARPU_DEBUG("creating transfer %p for %lu bytes\n", transfer, (unsigned long) size);
  908. #ifdef HAVE_SG_HOST_SEND_TO
  909. transfer->size = size;
  910. #else
  911. msg_task_t task;
  912. starpu_sg_host_t *hosts;
  913. double *computation;
  914. double *communication;
  915. _STARPU_CALLOC(hosts, 2, sizeof(*hosts));
  916. _STARPU_CALLOC(computation, 2, sizeof(*computation));
  917. _STARPU_CALLOC(communication, 4, sizeof(*communication));
  918. hosts[0] = _starpu_simgrid_memory_node_get_host(src_node);
  919. hosts[1] = _starpu_simgrid_memory_node_get_host(dst_node);
  920. STARPU_ASSERT(hosts[0] != hosts[1]);
  921. communication[1] = size;
  922. task = MSG_parallel_task_create("copy", 2, hosts, computation, communication, NULL);
  923. transfer->task = task;
  924. #endif
  925. transfer->src_node = src_node;
  926. transfer->dst_node = dst_node;
  927. transfer->run_node = starpu_worker_get_local_memory_node();
  928. if (req)
  929. event = &req->async_channel.event;
  930. else
  931. event = &myevent;
  932. event->finished = 0;
  933. transfer->finished = &event->finished;
  934. event->queue = &_starpu_simgrid_transfer_queue[transfer->run_node];
  935. transfer->wake = NULL;
  936. transfer->nwake = 0;
  937. transfer->nwait = 0;
  938. transfer->next = NULL;
  939. if (req)
  940. starpu_interface_start_driver_copy_async(src_node, dst_node, &start);
  941. /* Sleep 10µs for the GPU transfer queueing */
  942. if (_starpu_simgrid_queue_malloc_cost())
  943. starpu_sleep(0.000010);
  944. transfer_submit(transfer);
  945. /* Note: from here, transfer might be already freed */
  946. if (req)
  947. {
  948. starpu_interface_end_driver_copy_async(src_node, dst_node, start);
  949. starpu_interface_data_copy(src_node, dst_node, size);
  950. return -EAGAIN;
  951. }
  952. else
  953. {
  954. /* this is not associated to a request so it's synchronous */
  955. _starpu_simgrid_wait_transfer_event(event);
  956. return 0;
  957. }
  958. }
  959. /* Sync all GPUs (used on CUDA Free, typically) */
  960. void _starpu_simgrid_sync_gpus(void)
  961. {
  962. _starpu_simgrid_wait_transfers();
  963. }
  964. int
  965. _starpu_simgrid_thread_start(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[])
  966. {
  967. void *(*f)(void*) = (void*) (uintptr_t) strtol(argv[0], NULL, 16);
  968. void *arg = (void*) (uintptr_t) strtol(argv[1], NULL, 16);
  969. /* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
  970. starpu_sleep(0.000001);
  971. /* _args is freed with process context */
  972. f(arg);
  973. return 0;
  974. }
  975. starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t code, starpu_sg_host_t host, int argc, char *argv[])
  976. {
  977. void **tsd;
  978. _STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*));
  979. #ifdef HAVE_SG_ACTOR_INIT
  980. starpu_pthread_t actor = sg_actor_init(name, host);
  981. sg_actor_data_set(actor, tsd);
  982. sg_actor_start(actor, code, argc, argv);
  983. return actor;
  984. #else
  985. return MSG_process_create_with_arguments(name, code, tsd, host, argc, argv);
  986. #endif
  987. }
  988. starpu_sg_host_t _starpu_simgrid_get_memnode_host(unsigned node)
  989. {
  990. const char *fmt;
  991. char name[16];
  992. switch (starpu_node_get_kind(node))
  993. {
  994. case STARPU_CPU_RAM:
  995. fmt = "RAM";
  996. break;
  997. case STARPU_CUDA_RAM:
  998. fmt = "CUDA%u";
  999. break;
  1000. case STARPU_OPENCL_RAM:
  1001. fmt = "OpenCL%u";
  1002. break;
  1003. case STARPU_DISK_RAM:
  1004. fmt = "DISK%u";
  1005. break;
  1006. default:
  1007. STARPU_ABORT();
  1008. break;
  1009. }
  1010. snprintf(name, sizeof(name), fmt, starpu_memory_node_get_devid(node));
  1011. return _starpu_simgrid_get_host_by_name(name);
  1012. }
  1013. void _starpu_simgrid_count_ngpus(void)
  1014. {
  1015. #if (defined(HAVE_SG_LINK_NAME) || defined sg_link_name) && (SIMGRID_VERSION >= 31300)
  1016. unsigned src, dst;
  1017. starpu_sg_host_t ramhost = _starpu_simgrid_get_host_by_name("RAM");
  1018. /* For each pair of memory nodes, get the route */
  1019. for (src = 1; src < STARPU_MAXNODES; src++)
  1020. for (dst = 1; dst < STARPU_MAXNODES; dst++)
  1021. {
  1022. int busid;
  1023. starpu_sg_host_t srchost, dsthost;
  1024. #if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
  1025. xbt_dynar_t route_dynar = xbt_dynar_new(sizeof(SD_link_t), NULL);
  1026. SD_link_t *route;
  1027. #else
  1028. const SD_link_t *route;
  1029. #endif
  1030. int i, routesize;
  1031. int through;
  1032. unsigned src2;
  1033. unsigned ngpus;
  1034. const char *name;
  1035. if (dst == src)
  1036. continue;
  1037. busid = starpu_bus_get_id(src, dst);
  1038. if (busid == -1)
  1039. continue;
  1040. srchost = _starpu_simgrid_get_memnode_host(src);
  1041. dsthost = _starpu_simgrid_get_memnode_host(dst);
  1042. #if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
  1043. sg_host_route(srchost, dsthost, route_dynar);
  1044. routesize = xbt_dynar_length(route_dynar);
  1045. route = xbt_dynar_to_array(route_dynar);
  1046. #else
  1047. routesize = SD_route_get_size(srchost, dsthost);
  1048. route = SD_route_get_list(srchost, dsthost);
  1049. #endif
  1050. /* If it goes through "Host", do not care, there is no
  1051. * direct transfer support */
  1052. for (i = 0; i < routesize; i++)
  1053. if (!strcmp(sg_link_name(route[i]), "Host"))
  1054. break;
  1055. if (i < routesize)
  1056. continue;
  1057. /* Get the PCI bridge between down and up links */
  1058. through = -1;
  1059. for (i = 0; i < routesize; i++)
  1060. {
  1061. name = sg_link_name(route[i]);
  1062. size_t len = strlen(name);
  1063. if (!strcmp(" through", name+len-8))
  1064. through = i;
  1065. else if (!strcmp(" up", name+len-3))
  1066. break;
  1067. }
  1068. /* Didn't find it ?! */
  1069. if (through == -1)
  1070. {
  1071. _STARPU_DEBUG("Didn't find through-link for %d->%d\n", src, dst);
  1072. continue;
  1073. }
  1074. name = sg_link_name(route[through]);
  1075. /*
  1076. * count how many direct routes go through it between
  1077. * GPUs and RAM
  1078. */
  1079. ngpus = 0;
  1080. for (src2 = 1; src2 < STARPU_MAXNODES; src2++)
  1081. {
  1082. int numa;
  1083. int nnumas = starpu_memory_nodes_get_numa_count();
  1084. int found = 0;
  1085. for (numa = 0; numa < nnumas; numa++)
  1086. if (starpu_bus_get_id(src2, numa) != -1)
  1087. {
  1088. found = 1;
  1089. break;
  1090. }
  1091. if (!found)
  1092. continue;
  1093. starpu_sg_host_t srchost2 = _starpu_simgrid_get_memnode_host(src2);
  1094. int routesize2;
  1095. #if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
  1096. xbt_dynar_t route_dynar2 = xbt_dynar_new(sizeof(SD_link_t), NULL);
  1097. SD_link_t *route2;
  1098. sg_host_route(srchost2, ramhost, route_dynar2);
  1099. routesize2 = xbt_dynar_length(route_dynar2);
  1100. route2 = xbt_dynar_to_array(route_dynar2);
  1101. #else
  1102. const SD_link_t *route2 = SD_route_get_list(srchost2, ramhost);
  1103. routesize2 = SD_route_get_size(srchost2, ramhost);
  1104. #endif
  1105. for (i = 0; i < routesize2; i++)
  1106. if (!strcmp(name, sg_link_name(route2[i])))
  1107. {
  1108. /* This GPU goes through this PCI bridge to access RAM */
  1109. ngpus++;
  1110. break;
  1111. }
  1112. #if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
  1113. free(route2);
  1114. #endif
  1115. }
  1116. _STARPU_DEBUG("%d->%d through %s, %u GPUs\n", src, dst, name, ngpus);
  1117. starpu_bus_set_ngpus(busid, ngpus);
  1118. #if defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
  1119. free(route);
  1120. #endif
  1121. }
  1122. #endif
  1123. }
  1124. #if 0
  1125. static size_t used;
  1126. void _starpu_simgrid_data_new(size_t size)
  1127. {
  1128. // Note: this is just declarative
  1129. //_STARPU_DISP("data new: %zd, now %zd\n", size, used);
  1130. }
  1131. void _starpu_simgrid_data_increase(size_t size)
  1132. {
  1133. used += size;
  1134. _STARPU_DISP("data increase: %zd, now %zd\n", size, used);
  1135. }
  1136. void _starpu_simgrid_data_alloc(size_t size)
  1137. {
  1138. used += size;
  1139. _STARPU_DISP("data alloc: %zd, now %zd\n", size, used);
  1140. }
  1141. void _starpu_simgrid_data_free(size_t size)
  1142. {
  1143. used -= size;
  1144. _STARPU_DISP("data free: %zd, now %zd\n", size, used);
  1145. }
  1146. void _starpu_simgrid_data_transfer(size_t size, unsigned src_node, unsigned dst_node)
  1147. {
  1148. _STARPU_DISP("data transfer %zd from %u to %u\n", size, src_node, dst_node);
  1149. }
  1150. #endif
  1151. #endif