topology.c 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011 INRIA
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <stdlib.h>
  19. #include <stdio.h>
  20. #include <common/config.h>
  21. #include <core/workers.h>
  22. #include <core/debug.h>
  23. #include <core/topology.h>
  24. #include <drivers/cuda/driver_cuda.h>
  25. #include <drivers/mic/driver_mic_source.h>
  26. #include <drivers/scc/driver_scc_source.h>
  27. #include <drivers/mp_common/source_common.h>
  28. #include <drivers/opencl/driver_opencl.h>
  29. #include <profiling/profiling.h>
  30. #include <common/uthash.h>
  31. #ifdef STARPU_HAVE_HWLOC
  32. #include <hwloc.h>
  33. #ifndef HWLOC_API_VERSION
  34. #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
  35. #endif
  36. #endif
  37. #ifdef STARPU_HAVE_WINDOWS
  38. #include <windows.h>
  39. #endif
  40. #ifdef STARPU_SIMGRID
  41. #include <msg/msg.h>
  42. #include <core/simgrid.h>
  43. #endif
  44. static unsigned topology_is_initialized = 0;
  45. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
  46. struct handle_entry
  47. {
  48. UT_hash_handle hh;
  49. unsigned gpuid;
  50. };
  51. # if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  52. /* Entry in the `devices_using_cuda' hash table. */
  53. static struct handle_entry *devices_using_cuda;
  54. # endif
  55. static unsigned may_bind_automatically = 0;
  56. #endif // defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  57. /*
  58. * Discover the topology of the machine
  59. */
  60. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
  61. static void
  62. _starpu_initialize_workers_deviceid (int *explicit_workers_gpuid,
  63. int *current, int *workers_gpuid,
  64. const char *varname, unsigned nhwgpus)
  65. {
  66. char *strval;
  67. unsigned i;
  68. *current = 0;
  69. /* conf->workers_bindid indicates the successive cpu identifier that
  70. * should be used to bind the workers. It should be either filled
  71. * according to the user's explicit parameters (from starpu_conf) or
  72. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  73. * round-robin policy is used to distributed the workers over the
  74. * cpus. */
  75. /* what do we use, explicit value, env. variable, or round-robin ? */
  76. if ((strval = getenv(varname)))
  77. {
  78. /* STARPU_WORKERS_CUDAID certainly contains less entries than
  79. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  80. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  81. * 2". */
  82. unsigned wrap = 0;
  83. unsigned number_of_entries = 0;
  84. char *endptr;
  85. /* we use the content of the STARPU_WORKERS_CUDAID
  86. * env. variable */
  87. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  88. {
  89. if (!wrap)
  90. {
  91. long int val;
  92. val = strtol(strval, &endptr, 10);
  93. if (endptr != strval)
  94. {
  95. workers_gpuid[i] = (unsigned)val;
  96. strval = endptr;
  97. }
  98. else
  99. {
  100. /* there must be at least one entry */
  101. STARPU_ASSERT(i != 0);
  102. number_of_entries = i;
  103. /* there is no more values in the
  104. * string */
  105. wrap = 1;
  106. workers_gpuid[i] = workers_gpuid[0];
  107. }
  108. }
  109. else
  110. {
  111. workers_gpuid[i] =
  112. workers_gpuid[i % number_of_entries];
  113. }
  114. }
  115. }
  116. else if (explicit_workers_gpuid)
  117. {
  118. /* we use the explicit value from the user */
  119. memcpy(workers_gpuid,
  120. explicit_workers_gpuid,
  121. STARPU_NMAXWORKERS*sizeof(unsigned));
  122. }
  123. else
  124. {
  125. /* by default, we take a round robin policy */
  126. if (nhwgpus > 0)
  127. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  128. workers_gpuid[i] = (unsigned)(i % nhwgpus);
  129. /* StarPU can use sampling techniques to bind threads
  130. * correctly
  131. * TODO: use a private value for each kind of device */
  132. may_bind_automatically = 1;
  133. }
  134. }
  135. #endif
  136. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  137. static void
  138. _starpu_initialize_workers_cuda_gpuid (struct _starpu_machine_config *config)
  139. {
  140. struct _starpu_machine_topology *topology = &config->topology;
  141. struct starpu_conf *uconf = config->conf;
  142. _starpu_initialize_workers_deviceid (
  143. uconf->use_explicit_workers_cuda_gpuid == 0
  144. ? NULL
  145. : (int *)uconf->workers_cuda_gpuid,
  146. &(config->current_cuda_gpuid),
  147. (int *)topology->workers_cuda_gpuid,
  148. "STARPU_WORKERS_CUDAID",
  149. topology->nhwcudagpus);
  150. }
  151. static inline int
  152. _starpu_get_next_cuda_gpuid (struct _starpu_machine_config *config)
  153. {
  154. unsigned i =
  155. ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
  156. return (int)config->topology.workers_cuda_gpuid[i];
  157. }
  158. #endif
  159. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  160. static void
  161. _starpu_initialize_workers_opencl_gpuid (struct _starpu_machine_config*config)
  162. {
  163. struct _starpu_machine_topology *topology = &config->topology;
  164. struct starpu_conf *uconf = config->conf;
  165. _starpu_initialize_workers_deviceid(
  166. uconf->use_explicit_workers_opencl_gpuid == 0
  167. ? NULL
  168. : (int *)uconf->workers_opencl_gpuid,
  169. &(config->current_opencl_gpuid),
  170. (int *)topology->workers_opencl_gpuid,
  171. "STARPU_WORKERS_OPENCLID",
  172. topology->nhwopenclgpus);
  173. #ifdef STARPU_USE_CUDA
  174. // Detect devices which are already used with CUDA
  175. {
  176. unsigned tmp[STARPU_NMAXWORKERS];
  177. unsigned nb=0;
  178. int i;
  179. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  180. {
  181. struct handle_entry *entry;
  182. int devid = config->topology.workers_opencl_gpuid[i];
  183. HASH_FIND_INT(devices_using_cuda, &devid, entry);
  184. if (entry == NULL)
  185. {
  186. tmp[nb] = topology->workers_opencl_gpuid[i];
  187. nb++;
  188. }
  189. }
  190. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  191. tmp[i] = -1;
  192. memcpy (topology->workers_opencl_gpuid, tmp,
  193. sizeof(unsigned)*STARPU_NMAXWORKERS);
  194. }
  195. #endif /* STARPU_USE_CUDA */
  196. {
  197. // Detect identical devices
  198. struct handle_entry *devices_already_used = NULL;
  199. unsigned tmp[STARPU_NMAXWORKERS];
  200. unsigned nb=0;
  201. int i;
  202. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  203. {
  204. int devid = topology->workers_opencl_gpuid[i];
  205. struct handle_entry *entry;
  206. HASH_FIND_INT(devices_already_used, &devid, entry);
  207. if (entry == NULL)
  208. {
  209. struct handle_entry *entry2;
  210. entry2 = (struct handle_entry *) malloc(sizeof(*entry2));
  211. STARPU_ASSERT(entry2 != NULL);
  212. entry2->gpuid = devid;
  213. HASH_ADD_INT(devices_already_used, gpuid,
  214. entry2);
  215. tmp[nb] = devid;
  216. nb ++;
  217. }
  218. }
  219. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  220. tmp[i] = -1;
  221. memcpy (topology->workers_opencl_gpuid, tmp,
  222. sizeof(unsigned)*STARPU_NMAXWORKERS);
  223. }
  224. }
  225. static inline int
  226. _starpu_get_next_opencl_gpuid (struct _starpu_machine_config *config)
  227. {
  228. unsigned i =
  229. ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
  230. return (int)config->topology.workers_opencl_gpuid[i];
  231. }
  232. #endif
  233. #if 0
  234. #if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
  235. static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
  236. {
  237. struct _starpu_machine_topology *topology = &config->topology;
  238. struct starpu_conf *uconf = config->conf;
  239. _starpu_initialize_workers_deviceid(
  240. uconf->use_explicit_workers_mic_deviceid == 0
  241. ? NULL
  242. : (int *)config->user_conf->workers_mic_deviceid,
  243. &(config->current_mic_deviceid),
  244. (int *)topology->workers_mic_deviceid,
  245. "STARPU_WORKERS_MICID",
  246. topology->nhwmiccores);
  247. }
  248. #endif
  249. #endif
  250. #ifdef STARPU_USE_SCC
  251. static void _starpu_initialize_workers_scc_deviceid(struct _starpu_machine_config *config)
  252. {
  253. struct _starpu_machine_topology *topology = &config->topology;
  254. struct starpu_conf *uconf = config->conf;
  255. _starpu_initialize_workers_deviceid(
  256. uconf->use_explicit_workers_scc_deviceid == 0
  257. ? NULL
  258. : (int *) uconf->workers_scc_deviceid,
  259. &(config->current_scc_deviceid),
  260. (int *)topology->workers_scc_deviceid,
  261. "STARPU_WORKERS_SCCID",
  262. topology->nhwscc);
  263. }
  264. #endif /* STARPU_USE_SCC */
  265. #if 0
  266. #ifdef STARPU_USE_MIC
  267. static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
  268. {
  269. unsigned i = ((config->current_mic_deviceid++) % config->topology.nmicdevices);
  270. return (int)config->topology.workers_mic_deviceid[i];
  271. }
  272. #endif
  273. #endif
  274. #ifdef STARPU_USE_SCC
  275. static inline int _starpu_get_next_scc_deviceid(struct _starpu_machine_config *config)
  276. {
  277. unsigned i = ((config->current_scc_deviceid++) % config->topology.nsccdevices);
  278. return (int)config->topology.workers_scc_deviceid[i];
  279. }
  280. #endif
  281. #ifdef STARPU_USE_MIC
  282. static void
  283. _starpu_init_mic_topology (struct _starpu_machine_config *config, long mic_idx)
  284. {
  285. /* Discover the topology of the mic node identifier by MIC_IDX. That
  286. * means, make this StarPU instance aware of the number of cores available
  287. * on this MIC device. Update the `nhwmiccores' topology field
  288. * accordingly. */
  289. struct _starpu_machine_topology *topology = &config->topology;
  290. int nbcores;
  291. _starpu_src_common_sink_nbcores (mic_nodes[mic_idx], &nbcores);
  292. topology->nhwmiccores[mic_idx] = nbcores;
  293. }
  294. static int
  295. _starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
  296. COIENGINE *coi_handle, COIPROCESS *coi_process)
  297. {
  298. /* Initialize the MIC node of index MIC_IDX. */
  299. struct starpu_conf *user_conf = config->conf;
  300. char ***argv = _starpu_get_argv();
  301. const char *suffixes[] = {"-mic", "_mic", NULL};
  302. /* Environment variables to send to the Sink, it informs it what kind
  303. * of node it is (architecture and type) as there is no way to discover
  304. * it itself */
  305. char mic_idx_env[32];
  306. sprintf(mic_idx_env, "DEVID=%d", mic_idx);
  307. /* XXX: this is currently necessary so that the remote process does not
  308. * segfault. */
  309. char nb_mic_env[32];
  310. sprintf(nb_mic_env, "NB_MIC=%d", 2);
  311. const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", mic_idx_env, nb_mic_env, NULL};
  312. char mic_sink_program_path[1024];
  313. /* Let's get the helper program to run on the MIC device */
  314. int mic_file_found =
  315. _starpu_src_common_locate_file (mic_sink_program_path,
  316. getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
  317. getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
  318. user_conf->mic_sink_program_path,
  319. (argv ? (*argv)[0] : NULL),
  320. suffixes);
  321. if (0 != mic_file_found) {
  322. fprintf(stderr, "No MIC program specified, use the environment\n"
  323. "variable STARPU_MIC_SINK_PROGRAM_NAME or the environment\n"
  324. "or the field 'starpu_conf.mic_sink_program_path'\n"
  325. "to define it.\n");
  326. return -1;
  327. }
  328. COIRESULT res;
  329. /* Let's get the handle which let us manage the remote MIC device */
  330. res = COIEngineGetHandle(COI_ISA_MIC, mic_idx, coi_handle);
  331. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  332. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  333. /* We launch the helper on the MIC device, which will wait for us
  334. * to give it work to do.
  335. * As we will communicate further with the device throught scif we
  336. * don't need to keep the process pointer */
  337. res = COIProcessCreateFromFile(*coi_handle, mic_sink_program_path, 0, NULL, 0,
  338. mic_sink_env, 1, NULL, 0, NULL,
  339. coi_process);
  340. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  341. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  342. /* Let's create the node structure, we'll communicate with the peer
  343. * through scif thanks to it */
  344. mic_nodes[mic_idx] =
  345. _starpu_mp_common_node_create(STARPU_MIC_SOURCE, mic_idx);
  346. return 0;
  347. }
  348. #endif
  349. static void
  350. _starpu_init_topology (struct _starpu_machine_config *config)
  351. {
  352. /* Discover the topology, meaning finding all the available PUs for
  353. the compiled drivers. These drivers MUST have been initialized
  354. before calling this function. The discovered topology is filled in
  355. CONFIG. */
  356. struct _starpu_machine_topology *topology = &config->topology;
  357. if (topology_is_initialized)
  358. return;
  359. topology->nhwcpus = 0;
  360. #ifndef STARPU_SIMGRID
  361. #ifdef STARPU_HAVE_HWLOC
  362. hwloc_topology_init(&topology->hwtopology);
  363. hwloc_topology_load(topology->hwtopology);
  364. #endif
  365. #endif
  366. _starpu_cpu_discover_devices(config);
  367. _starpu_cuda_discover_devices(config);
  368. _starpu_opencl_discover_devices(config);
  369. #ifdef STARPU_USE_SCC
  370. config->topology.nhwscc = _starpu_scc_src_get_device_count();
  371. #endif
  372. topology_is_initialized = 1;
  373. }
  374. /*
  375. * Bind workers on the different processors
  376. */
  377. static void
  378. _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
  379. {
  380. char *strval;
  381. unsigned i;
  382. struct _starpu_machine_topology *topology = &config->topology;
  383. config->current_bindid = 0;
  384. /* conf->workers_bindid indicates the successive cpu identifier that
  385. * should be used to bind the workers. It should be either filled
  386. * according to the user's explicit parameters (from starpu_conf) or
  387. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  388. * round-robin policy is used to distributed the workers over the
  389. * cpus. */
  390. /* what do we use, explicit value, env. variable, or round-robin ? */
  391. if ((strval = getenv("STARPU_WORKERS_CPUID")))
  392. {
  393. /* STARPU_WORKERS_CPUID certainly contains less entries than
  394. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  395. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  396. * 2". */
  397. unsigned wrap = 0;
  398. unsigned number_of_entries = 0;
  399. char *endptr;
  400. /* we use the content of the STARPU_WORKERS_CUDAID
  401. * env. variable */
  402. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  403. {
  404. if (!wrap)
  405. {
  406. long int val;
  407. val = strtol(strval, &endptr, 10);
  408. if (endptr != strval)
  409. {
  410. topology->workers_bindid[i] =
  411. (unsigned)(val % topology->nhwcpus);
  412. strval = endptr;
  413. }
  414. else
  415. {
  416. /* there must be at least one entry */
  417. STARPU_ASSERT(i != 0);
  418. number_of_entries = i;
  419. /* there is no more values in the
  420. * string */
  421. wrap = 1;
  422. topology->workers_bindid[i] =
  423. topology->workers_bindid[0];
  424. }
  425. }
  426. else
  427. {
  428. topology->workers_bindid[i] =
  429. topology->workers_bindid[i % number_of_entries];
  430. }
  431. }
  432. }
  433. else if (config->conf->use_explicit_workers_bindid)
  434. {
  435. /* we use the explicit value from the user */
  436. memcpy(topology->workers_bindid,
  437. config->conf->workers_bindid,
  438. STARPU_NMAXWORKERS*sizeof(unsigned));
  439. }
  440. else
  441. {
  442. /* by default, we take a round robin policy */
  443. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  444. topology->workers_bindid[i] =
  445. (unsigned)(i % topology->nhwcpus);
  446. }
  447. }
  448. /* This function gets the identifier of the next cpu on which to bind a
  449. * worker. In case a list of preferred cpus was specified, we look for a an
  450. * available cpu among the list if possible, otherwise a round-robin policy is
  451. * used. */
  452. static inline int
  453. _starpu_get_next_bindid (struct _starpu_machine_config *config,
  454. int *preferred_binding, int npreferred)
  455. {
  456. struct _starpu_machine_topology *topology = &config->topology;
  457. unsigned found = 0;
  458. int current_preferred;
  459. for (current_preferred = 0;
  460. current_preferred < npreferred;
  461. current_preferred++)
  462. {
  463. if (found)
  464. break;
  465. unsigned requested_cpu = preferred_binding[current_preferred];
  466. /* can we bind the worker on the requested cpu ? */
  467. unsigned ind;
  468. for (ind = config->current_bindid;
  469. ind < topology->nhwcpus;
  470. ind++)
  471. {
  472. if (topology->workers_bindid[ind] == requested_cpu)
  473. {
  474. /* the cpu is available, we use it ! In order
  475. * to make sure that it will not be used again
  476. * later on, we remove the entry from the
  477. * list */
  478. topology->workers_bindid[ind] =
  479. topology->workers_bindid[config->current_bindid];
  480. topology->workers_bindid[config->current_bindid] = requested_cpu;
  481. found = 1;
  482. break;
  483. }
  484. }
  485. }
  486. unsigned i = ((config->current_bindid++) % STARPU_NMAXWORKERS);
  487. return (int)topology->workers_bindid[i];
  488. }
  489. unsigned
  490. _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
  491. {
  492. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  493. _starpu_opencl_init();
  494. #endif
  495. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  496. _starpu_init_cuda();
  497. #endif
  498. _starpu_init_topology(config);
  499. return config->topology.nhwcpus;
  500. }
  501. #ifdef STARPU_USE_MIC
  502. static void
  503. _starpu_init_mic_config (struct _starpu_machine_config *config,
  504. struct starpu_conf *user_conf,
  505. unsigned mic_idx)
  506. {
  507. // Configure the MIC device of index MIC_IDX.
  508. struct _starpu_machine_topology *topology = &config->topology;
  509. topology->nhwmiccores[mic_idx] = 0;
  510. _starpu_init_mic_topology (config, mic_idx);
  511. int nmiccores;
  512. nmiccores = starpu_get_env_number("STARPU_NMIC");
  513. /* STARPU_NMIC is not set. Did the user specify anything ? */
  514. if (nmiccores == -1 && user_conf)
  515. nmiccores = user_conf->nmic;
  516. if (nmiccores != 0)
  517. {
  518. if (nmiccores == -1)
  519. {
  520. /* Nothing was specified, so let's use the number of
  521. * detected mic cores. ! */
  522. nmiccores = topology->nhwmiccores[mic_idx];
  523. }
  524. else
  525. {
  526. if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
  527. {
  528. /* The user requires more MIC devices than there is available */
  529. fprintf(stderr,
  530. "# Warning: %d MIC devices requested. Only %d available.\n",
  531. nmiccores, topology->nhwmiccores[mic_idx]);
  532. nmiccores = topology->nhwmiccores[mic_idx];
  533. }
  534. }
  535. }
  536. topology->nmiccores[mic_idx] = nmiccores;
  537. STARPU_ASSERT(topology->nmiccores[mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS);
  538. /* _starpu_initialize_workers_mic_deviceid (config); */
  539. unsigned miccore_id;
  540. for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
  541. {
  542. int worker_idx = topology->nworkers + miccore_id;
  543. struct starpu_perfmodel_arch arch;
  544. arch.type = STARPU_MIC_WORKER;
  545. arch.devid = mic_idx;
  546. arch.ncore = 0;
  547. config->workers[worker_idx].arch = STARPU_MIC_WORKER;
  548. config->workers[worker_idx].perf_arch = arch;
  549. config->workers[worker_idx].mp_nodeid = mic_idx;
  550. config->workers[worker_idx].devid = miccore_id;
  551. config->workers[worker_idx].worker_mask = STARPU_MIC;
  552. config->worker_mask |= STARPU_MIC;
  553. }
  554. topology->nworkers += topology->nmiccores[mic_idx];
  555. }
  556. #ifdef STARPU_USE_MIC
  557. static COIENGINE handles[2];
  558. static COIPROCESS process[2];
  559. #endif
  560. static void
  561. _starpu_init_mp_config (struct _starpu_machine_config *config,
  562. struct starpu_conf *user_conf)
  563. {
  564. /* Discover and configure the mp topology. That means:
  565. * - discover the number of mp nodes;
  566. * - initialize each discovered node;
  567. * - discover the local topology (number of PUs/devices) of each node;
  568. * - configure the workers accordingly.
  569. */
  570. struct _starpu_machine_topology *topology = &config->topology;
  571. // We currently only support MIC at this level.
  572. #ifdef STARPU_USE_MIC
  573. /* Discover and initialize the number of MIC nodes through the mp
  574. * infrastructure. */
  575. unsigned nhwmicdevices = _starpu_mic_src_get_device_count();
  576. int reqmicdevices = starpu_get_env_number("STARPU_NMICDEVS");
  577. if (-1 == reqmicdevices)
  578. reqmicdevices = nhwmicdevices;
  579. topology->nmicdevices = 0;
  580. unsigned i;
  581. for (i = 0; i < STARPU_MIN (nhwmicdevices, (unsigned) reqmicdevices); i++)
  582. if (0 == _starpu_init_mic_node (config, i, &handles[i], &process[i]))
  583. topology->nmicdevices++;
  584. for (i = 0; i < topology->nmicdevices; i++)
  585. _starpu_init_mic_config (config, user_conf, i);
  586. #endif
  587. }
  588. static void
  589. _starpu_deinit_mic_node (unsigned mic_idx)
  590. {
  591. _starpu_mp_common_send_command(mic_nodes[mic_idx], STARPU_EXIT, NULL, 0);
  592. COIProcessDestroy(process[mic_idx], -1, 0, NULL, NULL);
  593. _starpu_mp_common_node_destroy(mic_nodes[mic_idx]);
  594. }
  595. static void
  596. _starpu_deinit_mp_config (struct _starpu_machine_config *config)
  597. {
  598. struct _starpu_machine_topology *topology = &config->topology;
  599. unsigned i;
  600. for (i = 0; i < topology->nmicdevices; i++)
  601. _starpu_deinit_mic_node (i);
  602. _starpu_mic_clear_kernels();
  603. }
  604. #endif
  605. static int
  606. _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_config)
  607. {
  608. int i;
  609. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  610. config->workers[i].workerid = i;
  611. struct _starpu_machine_topology *topology = &config->topology;
  612. topology->nworkers = 0;
  613. topology->ncombinedworkers = 0;
  614. topology->nsched_ctxs = 0;
  615. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  616. _starpu_opencl_init();
  617. #endif
  618. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  619. _starpu_init_cuda();
  620. #endif
  621. _starpu_init_topology(config);
  622. _starpu_initialize_workers_bindid(config);
  623. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  624. int ncuda = config->conf->ncuda;
  625. if (ncuda != 0)
  626. {
  627. /* The user did not disable CUDA. We need to initialize CUDA
  628. * early to count the number of devices */
  629. _starpu_init_cuda();
  630. int nb_devices = _starpu_get_cuda_device_count();
  631. if (ncuda == -1)
  632. {
  633. /* Nothing was specified, so let's choose ! */
  634. ncuda = nb_devices;
  635. }
  636. else
  637. {
  638. if (ncuda > nb_devices)
  639. {
  640. /* The user requires more CUDA devices than
  641. * there is available */
  642. _STARPU_DISP("Warning: %d CUDA devices requested. Only %d available.\n", ncuda, nb_devices);
  643. ncuda = nb_devices;
  644. }
  645. }
  646. }
  647. /* Now we know how many CUDA devices will be used */
  648. topology->ncudagpus = ncuda;
  649. STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
  650. _starpu_initialize_workers_cuda_gpuid(config);
  651. unsigned cudagpu;
  652. for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
  653. {
  654. int worker_idx = topology->nworkers + cudagpu;
  655. config->workers[worker_idx].arch = STARPU_CUDA_WORKER;
  656. int devid = _starpu_get_next_cuda_gpuid(config);
  657. config->workers[worker_idx].perf_arch.type = STARPU_CUDA_WORKER;
  658. config->workers[worker_idx].perf_arch.devid = cudagpu;
  659. config->workers[worker_idx].perf_arch.ncore = 0;
  660. config->workers[worker_idx].mp_nodeid = -1;
  661. config->workers[worker_idx].devid = devid;
  662. config->workers[worker_idx].worker_mask = STARPU_CUDA;
  663. config->worker_mask |= STARPU_CUDA;
  664. struct handle_entry *entry;
  665. entry = (struct handle_entry *) malloc(sizeof(*entry));
  666. STARPU_ASSERT(entry != NULL);
  667. entry->gpuid = devid;
  668. HASH_ADD_INT(devices_using_cuda, gpuid, entry);
  669. }
  670. topology->nworkers += topology->ncudagpus;
  671. #endif
  672. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  673. int nopencl = config->conf->nopencl;
  674. if (nopencl != 0)
  675. {
  676. /* The user did not disable OPENCL. We need to initialize
  677. * OpenCL early to count the number of devices */
  678. _starpu_opencl_init();
  679. int nb_devices;
  680. nb_devices = _starpu_opencl_get_device_count();
  681. if (nopencl == -1)
  682. {
  683. /* Nothing was specified, so let's choose ! */
  684. nopencl = nb_devices;
  685. if (nopencl > STARPU_MAXOPENCLDEVS)
  686. {
  687. _STARPU_DISP("Warning: %d OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldadev=xxx to update the maximum value of supported OpenCL devices.\n", nb_devices, STARPU_MAXOPENCLDEVS);
  688. nopencl = STARPU_MAXOPENCLDEVS;
  689. }
  690. }
  691. else
  692. {
  693. /* Let's make sure this value is OK. */
  694. if (nopencl > nb_devices)
  695. {
  696. /* The user requires more OpenCL devices than
  697. * there is available */
  698. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d available.\n", nopencl, nb_devices);
  699. nopencl = nb_devices;
  700. }
  701. /* Let's make sure this value is OK. */
  702. if (nopencl > STARPU_MAXOPENCLDEVS)
  703. {
  704. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices.\n", nopencl, STARPU_MAXOPENCLDEVS);
  705. nopencl = STARPU_MAXOPENCLDEVS;
  706. }
  707. }
  708. }
  709. topology->nopenclgpus = nopencl;
  710. STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
  711. _starpu_initialize_workers_opencl_gpuid(config);
  712. unsigned openclgpu;
  713. for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
  714. {
  715. int worker_idx = topology->nworkers + openclgpu;
  716. int devid = _starpu_get_next_opencl_gpuid(config);
  717. if (devid == -1)
  718. { // There is no more devices left
  719. topology->nopenclgpus = openclgpu;
  720. break;
  721. }
  722. config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
  723. config->workers[worker_idx].perf_arch.type = STARPU_OPENCL_WORKER;
  724. config->workers[worker_idx].perf_arch.devid = openclgpu;
  725. config->workers[worker_idx].perf_arch.ncore = 0;
  726. config->workers[worker_idx].mp_nodeid = -1;
  727. config->workers[worker_idx].devid = devid;
  728. config->workers[worker_idx].worker_mask = STARPU_OPENCL;
  729. config->worker_mask |= STARPU_OPENCL;
  730. }
  731. topology->nworkers += topology->nopenclgpus;
  732. #endif
  733. #ifdef STARPU_USE_SCC
  734. int nscc = config->conf->nscc;
  735. unsigned nb_scc_nodes = _starpu_scc_src_get_device_count();
  736. if (nscc != 0)
  737. {
  738. /* The user did not disable SCC. We need to count
  739. * the number of devices */
  740. int nb_devices = nb_scc_nodes;
  741. if (nscc == -1)
  742. {
  743. /* Nothing was specified, so let's choose ! */
  744. nscc = nb_devices;
  745. if (nscc > STARPU_MAXSCCDEVS)
  746. {
  747. _STARPU_DISP("Warning: %d SCC devices available. Only %d enabled. Use configuration option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nb_devices, STARPU_MAXSCCDEVS);
  748. nscc = STARPU_MAXSCCDEVS;
  749. }
  750. }
  751. else
  752. {
  753. /* Let's make sure this value is OK. */
  754. if (nscc > nb_devices)
  755. {
  756. /* The user requires more SCC devices than there is available */
  757. _STARPU_DISP("Warning: %d SCC devices requested. Only %d available.\n", nscc, nb_devices);
  758. nscc = nb_devices;
  759. }
  760. /* Let's make sure this value is OK. */
  761. if (nscc > STARPU_MAXSCCDEVS)
  762. {
  763. _STARPU_DISP("Warning: %d SCC devices requested. Only %d enabled. Use configure option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nscc, STARPU_MAXSCCDEVS);
  764. nscc = STARPU_MAXSCCDEVS;
  765. }
  766. }
  767. }
  768. /* Now we know how many SCC devices will be used */
  769. topology->nsccdevices = nscc;
  770. STARPU_ASSERT(topology->nsccdevices + topology->nworkers <= STARPU_NMAXWORKERS);
  771. _starpu_initialize_workers_scc_deviceid(config);
  772. unsigned sccdev;
  773. for (sccdev = 0; sccdev < topology->nsccdevices; sccdev++)
  774. {
  775. config->workers[topology->nworkers + sccdev].arch = STARPU_SCC_WORKER;
  776. int devid = _starpu_get_next_scc_deviceid(config);
  777. config->workers[topology->nworkers + sccdev].perf_arch.type = STARPU_SCC_WORKER;
  778. config->workers[topology->nworkers + sccdev].perf_arch.devid = sccdev;
  779. config->workers[topology->nworkers + sccdev].perf_arch.ncore = 0;
  780. config->workers[topology->nworkers + sccdev].mp_nodeid = -1;
  781. config->workers[topology->nworkers + sccdev].devid = devid;
  782. config->workers[topology->nworkers + sccdev].worker_mask = STARPU_SCC;
  783. config->worker_mask |= STARPU_SCC;
  784. }
  785. for (; sccdev < nb_scc_nodes; ++sccdev)
  786. _starpu_scc_exit_useless_node(sccdev);
  787. topology->nworkers += topology->nsccdevices;
  788. #endif /* STARPU_USE_SCC */
  789. /* Unless not requested, we need to complete configuration with the
  790. * ones of the mp nodes. */
  791. #ifdef STARPU_USE_MIC
  792. if (! no_mp_config)
  793. _starpu_init_mp_config (config, config->conf);
  794. #endif
  795. /* we put the CPU section after the accelerator : in case there was an
  796. * accelerator found, we devote one cpu */
  797. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  798. int ncpu = config->conf->ncpus;
  799. if (ncpu != 0)
  800. {
  801. if (ncpu == -1)
  802. {
  803. unsigned mic_busy_cpus = 0;
  804. unsigned j = 0;
  805. for (j = 0; j < STARPU_MAXMICDEVS; j++)
  806. mic_busy_cpus += (topology->nmiccores[j] ? 1 : 0);
  807. unsigned already_busy_cpus = mic_busy_cpus + topology->ncudagpus
  808. + topology->nopenclgpus + topology->nsccdevices;
  809. long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
  810. if (avail_cpus < 0)
  811. avail_cpus = 0;
  812. ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
  813. }
  814. else
  815. {
  816. if (ncpu > STARPU_MAXCPUS)
  817. {
  818. _STARPU_DISP("Warning: %d CPU devices requested. Only %d enabled. Use configure option --enable-maxcpus=xxx to update the maximum value of supported CPU devices.\n", ncpu, STARPU_MAXCPUS);
  819. ncpu = STARPU_MAXCPUS;
  820. }
  821. }
  822. }
  823. topology->ncpus = ncpu;
  824. STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
  825. unsigned cpu;
  826. for (cpu = 0; cpu < topology->ncpus; cpu++)
  827. {
  828. int worker_idx = topology->nworkers + cpu;
  829. config->workers[worker_idx].arch = STARPU_CPU_WORKER;
  830. config->workers[worker_idx].perf_arch.type = STARPU_CPU_WORKER;
  831. config->workers[worker_idx].perf_arch.devid = 0;
  832. config->workers[worker_idx].perf_arch.ncore = 0;
  833. config->workers[worker_idx].mp_nodeid = -1;
  834. config->workers[worker_idx].devid = cpu;
  835. config->workers[worker_idx].worker_mask = STARPU_CPU;
  836. config->worker_mask |= STARPU_CPU;
  837. }
  838. topology->nworkers += topology->ncpus;
  839. #endif
  840. if (topology->nworkers == 0)
  841. {
  842. _STARPU_DEBUG("No worker found, aborting ...\n");
  843. return -ENODEV;
  844. }
  845. return 0;
  846. }
  847. void
  848. _starpu_bind_thread_on_cpu (
  849. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  850. unsigned cpuid)
  851. {
  852. #ifdef STARPU_SIMGRID
  853. return;
  854. #endif
  855. if (starpu_get_env_number("STARPU_WORKERS_NOBIND") > 0)
  856. return;
  857. #ifdef STARPU_HAVE_HWLOC
  858. const struct hwloc_topology_support *support;
  859. #ifdef STARPU_USE_OPENCL
  860. _starpu_opencl_init();
  861. #endif
  862. #ifdef STARPU_USE_CUDA
  863. _starpu_init_cuda();
  864. #endif
  865. _starpu_init_topology(config);
  866. support = hwloc_topology_get_support (config->topology.hwtopology);
  867. if (support->cpubind->set_thisthread_cpubind)
  868. {
  869. hwloc_obj_t obj =
  870. hwloc_get_obj_by_depth (config->topology.hwtopology,
  871. config->cpu_depth, cpuid);
  872. hwloc_bitmap_t set = obj->cpuset;
  873. int ret;
  874. hwloc_bitmap_singlify(set);
  875. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  876. HWLOC_CPUBIND_THREAD);
  877. if (ret)
  878. {
  879. perror("hwloc_set_cpubind");
  880. STARPU_ABORT();
  881. }
  882. }
  883. #elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__)
  884. int ret;
  885. /* fix the thread on the correct cpu */
  886. cpu_set_t aff_mask;
  887. CPU_ZERO(&aff_mask);
  888. CPU_SET(cpuid, &aff_mask);
  889. starpu_pthread_t self = pthread_self();
  890. ret = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
  891. if (ret)
  892. {
  893. perror("binding thread");
  894. STARPU_ABORT();
  895. }
  896. #elif defined(__MINGW32__) || defined(__CYGWIN__)
  897. DWORD mask = 1 << cpuid;
  898. if (!SetThreadAffinityMask(GetCurrentThread(), mask))
  899. {
  900. _STARPU_ERROR("SetThreadMaskAffinity(%lx) failed\n", mask);
  901. }
  902. #else
  903. #warning no CPU binding support
  904. #endif
  905. }
  906. void
  907. _starpu_bind_thread_on_cpus (
  908. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  909. struct _starpu_combined_worker *combined_worker STARPU_ATTRIBUTE_UNUSED)
  910. {
  911. #ifdef STARPU_SIMGRID
  912. return;
  913. #endif
  914. #ifdef STARPU_HAVE_HWLOC
  915. const struct hwloc_topology_support *support;
  916. #ifdef STARPU_USE_OPENC
  917. _starpu_opencl_init();
  918. #endif
  919. #ifdef STARPU_USE_CUDA
  920. _starpu_init_cuda();
  921. #endif
  922. _starpu_init_topology(config);
  923. support = hwloc_topology_get_support(config->topology.hwtopology);
  924. if (support->cpubind->set_thisthread_cpubind)
  925. {
  926. hwloc_bitmap_t set = combined_worker->hwloc_cpu_set;
  927. int ret;
  928. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  929. HWLOC_CPUBIND_THREAD);
  930. if (ret)
  931. {
  932. perror("binding thread");
  933. STARPU_ABORT();
  934. }
  935. }
  936. #else
  937. #ifdef __GLIBC__
  938. sched_setaffinity(0,sizeof(combined_worker->cpu_set),&combined_worker->cpu_set);
  939. #else
  940. # warning no parallel worker CPU binding support
  941. #endif
  942. #endif
  943. }
  944. static void
  945. _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_config)
  946. {
  947. /* launch one thread per CPU */
  948. unsigned ram_memory_node;
  949. /* a single cpu is dedicated for the accelerators */
  950. int accelerator_bindid = -1;
  951. /* note that even if the CPU cpu are not used, we always have a RAM
  952. * node */
  953. /* TODO : support NUMA ;) */
  954. ram_memory_node = _starpu_memory_node_register(STARPU_CPU_RAM, -1);
  955. #ifdef STARPU_SIMGRID
  956. char name[16];
  957. xbt_dynar_t hosts = MSG_hosts_as_dynar();
  958. msg_host_t host = MSG_get_host_by_name("RAM");
  959. STARPU_ASSERT(host);
  960. _starpu_simgrid_memory_node_set_host(0, host);
  961. #endif
  962. /* We will store all the busid of the different (src, dst)
  963. * combinations in a matrix which we initialize here. */
  964. _starpu_initialize_busid_matrix();
  965. #ifdef STARPU_USE_MIC
  966. /* Each MIC device has its own memory node. */
  967. unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
  968. // Register the memory nodes for the MIC devices.
  969. if (! no_mp_config) {
  970. unsigned i = 0;
  971. for (i = 0; i < config->topology.nmicdevices; i++) {
  972. mic_memory_nodes[i] = _starpu_memory_node_register (STARPU_MIC_RAM, i);
  973. _starpu_register_bus(0, mic_memory_nodes[i]);
  974. _starpu_register_bus(mic_memory_nodes[i], 0);
  975. }
  976. }
  977. #endif
  978. unsigned worker;
  979. for (worker = 0; worker < config->topology.nworkers; worker++)
  980. {
  981. unsigned memory_node = -1;
  982. unsigned is_a_set_of_accelerators = 0;
  983. struct _starpu_worker *workerarg = &config->workers[worker];
  984. /* Perhaps the worker has some "favourite" bindings */
  985. int *preferred_binding = NULL;
  986. int npreferred = 0;
  987. /* select the memory node that contains worker's memory */
  988. switch (workerarg->arch)
  989. {
  990. case STARPU_CPU_WORKER:
  991. /* "dedicate" a cpu cpu to that worker */
  992. is_a_set_of_accelerators = 0;
  993. memory_node = ram_memory_node;
  994. _starpu_memory_node_add_nworkers(ram_memory_node);
  995. break;
  996. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  997. case STARPU_CUDA_WORKER:
  998. #ifndef STARPU_SIMGRID
  999. if (may_bind_automatically)
  1000. {
  1001. /* StarPU is allowed to bind threads automatically */
  1002. preferred_binding = _starpu_get_cuda_affinity_vector(workerarg->devid);
  1003. npreferred = config->topology.nhwcpus;
  1004. }
  1005. #endif
  1006. is_a_set_of_accelerators = 0;
  1007. memory_node = _starpu_memory_node_register(STARPU_CUDA_RAM, workerarg->devid);
  1008. #ifdef STARPU_SIMGRID
  1009. snprintf(name, sizeof(name), "CUDA%d", workerarg->devid);
  1010. host = MSG_get_host_by_name(name);
  1011. STARPU_ASSERT(host);
  1012. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1013. #endif
  1014. _starpu_memory_node_add_nworkers(memory_node);
  1015. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1016. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1017. #ifdef HAVE_CUDA_MEMCPY_PEER
  1018. unsigned worker2;
  1019. for (worker2 = 0; worker2 < worker; worker2++)
  1020. {
  1021. struct _starpu_worker *workerarg = &config->workers[worker];
  1022. if (workerarg->arch == STARPU_CUDA_WORKER)
  1023. {
  1024. unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
  1025. _starpu_register_bus(memory_node2, memory_node);
  1026. _starpu_register_bus(memory_node, memory_node2);
  1027. }
  1028. }
  1029. #endif
  1030. break;
  1031. #endif
  1032. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1033. case STARPU_OPENCL_WORKER:
  1034. #ifndef STARPU_SIMGRID
  1035. if (may_bind_automatically)
  1036. {
  1037. /* StarPU is allowed to bind threads automatically */
  1038. preferred_binding = _starpu_get_opencl_affinity_vector(workerarg->devid);
  1039. npreferred = config->topology.nhwcpus;
  1040. }
  1041. #endif
  1042. is_a_set_of_accelerators = 0;
  1043. memory_node = _starpu_memory_node_register(STARPU_OPENCL_RAM, workerarg->devid);
  1044. #ifdef STARPU_SIMGRID
  1045. snprintf(name, sizeof(name), "OpenCL%d", workerarg->devid);
  1046. host = MSG_get_host_by_name(name);
  1047. STARPU_ASSERT(host);
  1048. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1049. #endif
  1050. _starpu_memory_node_add_nworkers(memory_node);
  1051. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1052. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1053. break;
  1054. #endif
  1055. #ifdef STARPU_USE_MIC
  1056. case STARPU_MIC_WORKER:
  1057. //if (may_bind_automatically)
  1058. //{
  1059. // /* StarPU is allowed to bind threads automatically */
  1060. // preferred_binding = _starpu_get_mic_affinity_vector(workerarg->devid);
  1061. // npreferred = config->topology.nhwcpus;
  1062. //}
  1063. is_a_set_of_accelerators = 1;
  1064. memory_node = mic_memory_nodes[workerarg->mp_nodeid];
  1065. _starpu_memory_node_add_nworkers(memory_node);
  1066. /* memory_node = _starpu_memory_node_register(STARPU_MIC_RAM, workerarg->devid);*/
  1067. /* _starpu_register_bus(0, memory_node);
  1068. * _starpu_register_bus(memory_node, 0); */
  1069. break;
  1070. #endif /* STARPU_USE_MIC */
  1071. #ifdef STARPU_USE_SCC
  1072. case STARPU_SCC_WORKER:
  1073. {
  1074. /* Node 0 represents the SCC shared memory when we're on SCC. */
  1075. struct _starpu_memory_node_descr *descr = _starpu_memory_node_get_description();
  1076. descr->nodes[ram_memory_node] = STARPU_SCC_SHM;
  1077. is_a_set_of_accelerators = 0;
  1078. memory_node = ram_memory_node;
  1079. _starpu_memory_node_add_nworkers(memory_node);
  1080. }
  1081. break;
  1082. #endif
  1083. default:
  1084. STARPU_ABORT();
  1085. }
  1086. if (is_a_set_of_accelerators)
  1087. {
  1088. /* TODO: il faudrait changer quand on change de device */
  1089. if (accelerator_bindid == -1)
  1090. accelerator_bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1091. workerarg->bindid = accelerator_bindid;
  1092. }
  1093. else
  1094. {
  1095. workerarg->bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1096. }
  1097. workerarg->memory_node = memory_node;
  1098. #ifdef __GLIBC__
  1099. /* Save the initial cpuset */
  1100. CPU_ZERO(&workerarg->cpu_set);
  1101. CPU_SET(workerarg->bindid, &workerarg->cpu_set);
  1102. #endif /* __GLIBC__ */
  1103. #ifdef STARPU_HAVE_HWLOC
  1104. /* Put the worker descriptor in the userdata field of the
  1105. * hwloc object describing the CPU */
  1106. hwloc_obj_t worker_obj;
  1107. worker_obj =
  1108. hwloc_get_obj_by_depth (config->topology.hwtopology,
  1109. config->cpu_depth,
  1110. workerarg->bindid);
  1111. worker_obj->userdata = &config->workers[worker];
  1112. /* Clear the cpu set and set the cpu */
  1113. workerarg->hwloc_cpu_set =
  1114. hwloc_bitmap_dup (worker_obj->cpuset);
  1115. #endif
  1116. }
  1117. #ifdef STARPU_SIMGRID
  1118. xbt_dynar_free(&hosts);
  1119. #endif
  1120. }
  1121. int
  1122. _starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
  1123. {
  1124. int ret;
  1125. int i;
  1126. ret = _starpu_init_machine_config(config, no_mp_config);
  1127. if (ret)
  1128. return ret;
  1129. /* for the data management library */
  1130. _starpu_memory_nodes_init();
  1131. _starpu_init_workers_binding(config, no_mp_config);
  1132. config->cpus_nodeid = -1;
  1133. config->cuda_nodeid = -1;
  1134. config->opencl_nodeid = -1;
  1135. config->mic_nodeid = -1;
  1136. config->scc_nodeid = -1;
  1137. for (i = 0; i < starpu_worker_get_count(); i++)
  1138. {
  1139. switch (starpu_worker_get_type(i))
  1140. {
  1141. case STARPU_CPU_WORKER:
  1142. if (config->cpus_nodeid == -1)
  1143. config->cpus_nodeid = starpu_worker_get_memory_node(i);
  1144. else if (config->cpus_nodeid != starpu_worker_get_memory_node(i))
  1145. config->cpus_nodeid = -2;
  1146. break;
  1147. case STARPU_CUDA_WORKER:
  1148. if (config->cuda_nodeid == -1)
  1149. config->cuda_nodeid = starpu_worker_get_memory_node(i);
  1150. else if (config->cuda_nodeid != starpu_worker_get_memory_node(i))
  1151. config->cuda_nodeid = -2;
  1152. break;
  1153. case STARPU_OPENCL_WORKER:
  1154. if (config->opencl_nodeid == -1)
  1155. config->opencl_nodeid = starpu_worker_get_memory_node(i);
  1156. else if (config->opencl_nodeid != starpu_worker_get_memory_node(i))
  1157. config->opencl_nodeid = -2;
  1158. break;
  1159. case STARPU_MIC_WORKER:
  1160. if (config->mic_nodeid == -1)
  1161. config->mic_nodeid = starpu_worker_get_memory_node(i);
  1162. else if (config->mic_nodeid != starpu_worker_get_memory_node(i))
  1163. config->mic_nodeid = -2;
  1164. break;
  1165. case STARPU_SCC_WORKER:
  1166. if (config->scc_nodeid == -1)
  1167. config->scc_nodeid = starpu_worker_get_memory_node(i);
  1168. else if (config->scc_nodeid != starpu_worker_get_memory_node(i))
  1169. config->scc_nodeid = -2;
  1170. break;
  1171. case STARPU_ANY_WORKER:
  1172. STARPU_ASSERT(0);
  1173. }
  1174. }
  1175. return 0;
  1176. }
  1177. void
  1178. _starpu_destroy_topology (
  1179. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED)
  1180. {
  1181. #ifdef STARPU_USE_MIC
  1182. _starpu_deinit_mp_config(config);
  1183. #endif
  1184. /* cleanup StarPU internal data structures */
  1185. _starpu_memory_nodes_deinit();
  1186. unsigned worker;
  1187. for (worker = 0; worker < config->topology.nworkers; worker++)
  1188. {
  1189. #ifdef STARPU_HAVE_HWLOC
  1190. struct _starpu_worker *workerarg = &config->workers[worker];
  1191. hwloc_bitmap_free(workerarg->hwloc_cpu_set);
  1192. #endif
  1193. }
  1194. #ifdef STARPU_HAVE_HWLOC
  1195. hwloc_topology_destroy(config->topology.hwtopology);
  1196. #endif
  1197. topology_is_initialized = 0;
  1198. #ifdef STARPU_USE_CUDA
  1199. struct handle_entry *entry, *tmp;
  1200. HASH_ITER(hh, devices_using_cuda, entry, tmp)
  1201. {
  1202. HASH_DEL(devices_using_cuda, entry);
  1203. free(entry);
  1204. }
  1205. devices_using_cuda = NULL;
  1206. #endif
  1207. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  1208. may_bind_automatically = 0;
  1209. #endif
  1210. }
  1211. void
  1212. starpu_topology_print (FILE *output)
  1213. {
  1214. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1215. struct _starpu_machine_topology *topology = &config->topology;
  1216. unsigned core;
  1217. unsigned worker;
  1218. unsigned nworkers = starpu_worker_get_count();
  1219. unsigned ncombinedworkers = topology->ncombinedworkers;
  1220. for (core = 0; core < topology->nhwcpus; core++)
  1221. {
  1222. fprintf(output, "core %u\t", core);
  1223. for (worker = 0;
  1224. worker < nworkers + ncombinedworkers;
  1225. worker++)
  1226. {
  1227. if (worker < nworkers)
  1228. {
  1229. if (topology->workers_bindid[worker] == core)
  1230. {
  1231. char name[256];
  1232. starpu_worker_get_name (worker, name,
  1233. sizeof(name));
  1234. fprintf(output, "%s\t", name);
  1235. }
  1236. }
  1237. else
  1238. {
  1239. int worker_size, i;
  1240. int *combined_workerid;
  1241. starpu_combined_worker_get_description(worker, &worker_size, &combined_workerid);
  1242. for (i = 0; i < worker_size; i++)
  1243. {
  1244. if (topology->workers_bindid[combined_workerid[i]] == core)
  1245. fprintf(output, "comb %u\t", worker-nworkers);
  1246. }
  1247. }
  1248. }
  1249. fprintf(output, "\n");
  1250. }
  1251. }