topology.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2012 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <stdlib.h>
  18. #include <stdio.h>
  19. #include <common/config.h>
  20. #include <core/workers.h>
  21. #include <core/debug.h>
  22. #include <core/topology.h>
  23. #include <drivers/cuda/driver_cuda.h>
  24. #include <drivers/opencl/driver_opencl.h>
  25. #include <profiling/profiling.h>
  26. #include <common/uthash.h>
  27. #ifdef STARPU_HAVE_HWLOC
  28. #include <hwloc.h>
  29. #ifndef HWLOC_API_VERSION
  30. #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
  31. #endif
  32. #endif
  33. #ifdef STARPU_HAVE_WINDOWS
  34. #include <windows.h>
  35. #endif
  36. static unsigned topology_is_initialized = 0;
  37. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  38. struct handle_entry
  39. {
  40. UT_hash_handle hh;
  41. unsigned gpuid;
  42. };
  43. # ifdef STARPU_USE_CUDA
  44. /* Entry in the `devices_using_cuda' hash table. */
  45. static struct handle_entry *devices_using_cuda;
  46. # endif
  47. static unsigned may_bind_automatically = 0;
  48. #endif // defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  49. /*
  50. * Discover the topology of the machine
  51. */
  52. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  53. static void
  54. _starpu_initialize_workers_gpuid (int *explicit_workers_gpuid,
  55. int *current, int *workers_gpuid,
  56. const char *varname, unsigned nhwgpus)
  57. {
  58. char *strval;
  59. unsigned i;
  60. *current = 0;
  61. /* conf->workers_bindid indicates the successive cpu identifier that
  62. * should be used to bind the workers. It should be either filled
  63. * according to the user's explicit parameters (from starpu_conf) or
  64. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  65. * round-robin policy is used to distributed the workers over the
  66. * cpus. */
  67. /* what do we use, explicit value, env. variable, or round-robin ? */
  68. if ((strval = getenv(varname)))
  69. {
  70. /* STARPU_WORKERS_CUDAID certainly contains less entries than
  71. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  72. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  73. * 2". */
  74. unsigned wrap = 0;
  75. unsigned number_of_entries = 0;
  76. char *endptr;
  77. /* we use the content of the STARPU_WORKERS_CUDAID
  78. * env. variable */
  79. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  80. {
  81. if (!wrap)
  82. {
  83. long int val;
  84. val = strtol(strval, &endptr, 10);
  85. if (endptr != strval)
  86. {
  87. workers_gpuid[i] = (unsigned)val;
  88. strval = endptr;
  89. }
  90. else
  91. {
  92. /* there must be at least one entry */
  93. STARPU_ASSERT(i != 0);
  94. number_of_entries = i;
  95. /* there is no more values in the
  96. * string */
  97. wrap = 1;
  98. workers_gpuid[i] = workers_gpuid[0];
  99. }
  100. }
  101. else
  102. {
  103. workers_gpuid[i] =
  104. workers_gpuid[i % number_of_entries];
  105. }
  106. }
  107. }
  108. else if (explicit_workers_gpuid)
  109. {
  110. /* we use the explicit value from the user */
  111. memcpy(workers_gpuid,
  112. explicit_workers_gpuid,
  113. STARPU_NMAXWORKERS*sizeof(unsigned));
  114. }
  115. else
  116. {
  117. /* by default, we take a round robin policy */
  118. if (nhwgpus > 0)
  119. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  120. workers_gpuid[i] = (unsigned)(i % nhwgpus);
  121. /* StarPU can use sampling techniques to bind threads
  122. * correctly */
  123. may_bind_automatically = 1;
  124. }
  125. }
  126. #endif
  127. #ifdef STARPU_USE_CUDA
  128. static void
  129. _starpu_initialize_workers_cuda_gpuid (struct _starpu_machine_config *config)
  130. {
  131. struct starpu_machine_topology *topology = &config->topology;
  132. struct starpu_conf *uconf = config->conf;
  133. _starpu_initialize_workers_gpuid (
  134. uconf->use_explicit_workers_cuda_gpuid == 0
  135. ? NULL
  136. : (int *)uconf->workers_cuda_gpuid,
  137. &(config->current_cuda_gpuid),
  138. (int *)topology->workers_cuda_gpuid,
  139. "STARPU_WORKERS_CUDAID",
  140. topology->nhwcudagpus);
  141. }
  142. static inline int
  143. _starpu_get_next_cuda_gpuid (struct _starpu_machine_config *config)
  144. {
  145. unsigned i =
  146. ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
  147. return (int)config->topology.workers_cuda_gpuid[i];
  148. }
  149. #endif
  150. #ifdef STARPU_USE_OPENCL
  151. static void
  152. _starpu_initialize_workers_opencl_gpuid (struct _starpu_machine_config*config)
  153. {
  154. struct starpu_machine_topology *topology = &config->topology;
  155. struct starpu_conf *uconf = config->conf;
  156. _starpu_initialize_workers_gpuid(
  157. uconf->use_explicit_workers_opencl_gpuid == 0
  158. ? NULL
  159. : (int *)uconf->workers_opencl_gpuid,
  160. &(config->current_opencl_gpuid),
  161. (int *)topology->workers_opencl_gpuid,
  162. "STARPU_WORKERS_OPENCLID",
  163. topology->nhwopenclgpus);
  164. #ifdef STARPU_USE_CUDA
  165. // Detect devices which are already used with CUDA
  166. {
  167. unsigned tmp[STARPU_NMAXWORKERS];
  168. unsigned nb=0;
  169. int i;
  170. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  171. {
  172. struct handle_entry *entry;
  173. int devid = config->topology.workers_opencl_gpuid[i];
  174. HASH_FIND_INT(devices_using_cuda, &devid, entry);
  175. if (entry == NULL)
  176. {
  177. tmp[nb] = topology->workers_opencl_gpuid[i];
  178. nb++;
  179. }
  180. }
  181. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  182. tmp[i] = -1;
  183. memcpy (topology->workers_opencl_gpuid, tmp,
  184. sizeof(unsigned)*STARPU_NMAXWORKERS);
  185. }
  186. #endif /* STARPU_USE_CUDA */
  187. {
  188. // Detect identical devices
  189. struct handle_entry *devices_already_used = NULL;
  190. unsigned tmp[STARPU_NMAXWORKERS];
  191. unsigned nb=0;
  192. int i;
  193. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  194. {
  195. int devid = topology->workers_opencl_gpuid[i];
  196. struct handle_entry *entry;
  197. HASH_FIND_INT(devices_already_used, &devid, entry);
  198. if (entry == NULL)
  199. {
  200. struct handle_entry *entry2;
  201. entry2 = (struct handle_entry *) malloc(sizeof(*entry2));
  202. STARPU_ASSERT(entry2 != NULL);
  203. entry2->gpuid = devid;
  204. HASH_ADD_INT(devices_already_used, gpuid,
  205. entry2);
  206. tmp[nb] = devid;
  207. nb ++;
  208. }
  209. }
  210. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  211. tmp[i] = -1;
  212. memcpy (topology->workers_opencl_gpuid, tmp,
  213. sizeof(unsigned)*STARPU_NMAXWORKERS);
  214. }
  215. }
  216. static inline int
  217. _starpu_get_next_opencl_gpuid (struct _starpu_machine_config *config)
  218. {
  219. unsigned i =
  220. ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
  221. return (int)config->topology.workers_opencl_gpuid[i];
  222. }
  223. #endif
  224. static void
  225. _starpu_init_topology (struct _starpu_machine_config *config)
  226. {
  227. /* Discover the topology, meaning finding all the available PUs for
  228. the compiled drivers. These drivers MUST have been initialized
  229. before calling this function. The discovered topology is filled in
  230. CONFIG. */
  231. struct starpu_machine_topology *topology = &config->topology;
  232. if (topology_is_initialized)
  233. return;
  234. topology->nhwcpus = 0;
  235. #ifdef STARPU_HAVE_HWLOC
  236. hwloc_topology_init(&topology->hwtopology);
  237. hwloc_topology_load(topology->hwtopology);
  238. #endif
  239. _starpu_cpu_discover_devices(config);
  240. _starpu_cuda_discover_devices(config);
  241. _starpu_opencl_discover_devices(config);
  242. topology_is_initialized = 1;
  243. }
  244. /*
  245. * Bind workers on the different processors
  246. */
  247. static void
  248. _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
  249. {
  250. char *strval;
  251. unsigned i;
  252. struct starpu_machine_topology *topology = &config->topology;
  253. config->current_bindid = 0;
  254. /* conf->workers_bindid indicates the successive cpu identifier that
  255. * should be used to bind the workers. It should be either filled
  256. * according to the user's explicit parameters (from starpu_conf) or
  257. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  258. * round-robin policy is used to distributed the workers over the
  259. * cpus. */
  260. /* what do we use, explicit value, env. variable, or round-robin ? */
  261. if ((strval = getenv("STARPU_WORKERS_CPUID")))
  262. {
  263. /* STARPU_WORKERS_CPUID certainly contains less entries than
  264. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  265. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  266. * 2". */
  267. unsigned wrap = 0;
  268. unsigned number_of_entries = 0;
  269. char *endptr;
  270. /* we use the content of the STARPU_WORKERS_CUDAID
  271. * env. variable */
  272. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  273. {
  274. if (!wrap)
  275. {
  276. long int val;
  277. val = strtol(strval, &endptr, 10);
  278. if (endptr != strval)
  279. {
  280. topology->workers_bindid[i] =
  281. (unsigned)(val % topology->nhwcpus);
  282. strval = endptr;
  283. }
  284. else
  285. {
  286. /* there must be at least one entry */
  287. STARPU_ASSERT(i != 0);
  288. number_of_entries = i;
  289. /* there is no more values in the
  290. * string */
  291. wrap = 1;
  292. topology->workers_bindid[i] =
  293. topology->workers_bindid[0];
  294. }
  295. }
  296. else
  297. {
  298. topology->workers_bindid[i] =
  299. topology->workers_bindid[i % number_of_entries];
  300. }
  301. }
  302. }
  303. else if (config->conf->use_explicit_workers_bindid)
  304. {
  305. /* we use the explicit value from the user */
  306. memcpy(topology->workers_bindid,
  307. config->conf->workers_bindid,
  308. STARPU_NMAXWORKERS*sizeof(unsigned));
  309. }
  310. else
  311. {
  312. /* by default, we take a round robin policy */
  313. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  314. topology->workers_bindid[i] =
  315. (unsigned)(i % topology->nhwcpus);
  316. }
  317. }
  318. /* This function gets the identifier of the next cpu on which to bind a
  319. * worker. In case a list of preferred cpus was specified, we look for a an
  320. * available cpu among the list if possible, otherwise a round-robin policy is
  321. * used. */
  322. static inline int
  323. _starpu_get_next_bindid (struct _starpu_machine_config *config,
  324. int *preferred_binding, int npreferred)
  325. {
  326. struct starpu_machine_topology *topology = &config->topology;
  327. unsigned found = 0;
  328. int current_preferred;
  329. for (current_preferred = 0;
  330. current_preferred < npreferred;
  331. current_preferred++)
  332. {
  333. if (found)
  334. break;
  335. unsigned requested_cpu = preferred_binding[current_preferred];
  336. /* can we bind the worker on the requested cpu ? */
  337. unsigned ind;
  338. for (ind = config->current_bindid;
  339. ind < topology->nhwcpus;
  340. ind++)
  341. {
  342. if (topology->workers_bindid[ind] == requested_cpu)
  343. {
  344. /* the cpu is available, we use it ! In order
  345. * to make sure that it will not be used again
  346. * later on, we remove the entry from the
  347. * list */
  348. topology->workers_bindid[ind] =
  349. topology->workers_bindid[config->current_bindid];
  350. topology->workers_bindid[config->current_bindid] = requested_cpu;
  351. found = 1;
  352. break;
  353. }
  354. }
  355. }
  356. unsigned i = ((config->current_bindid++) % STARPU_NMAXWORKERS);
  357. return (int)topology->workers_bindid[i];
  358. }
  359. unsigned
  360. _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
  361. {
  362. #ifdef STARPU_USE_OPENCL
  363. _starpu_opencl_init();
  364. #endif
  365. #ifdef STARPU_USE_CUDA
  366. _starpu_init_cuda();
  367. #endif
  368. _starpu_init_topology(config);
  369. return config->topology.nhwcpus;
  370. }
  371. static int
  372. _starpu_init_machine_config (struct _starpu_machine_config *config)
  373. {
  374. int i;
  375. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  376. config->workers[i].workerid = i;
  377. struct starpu_machine_topology *topology = &config->topology;
  378. topology->nworkers = 0;
  379. topology->ncombinedworkers = 0;
  380. #ifdef STARPU_USE_OPENCL
  381. _starpu_opencl_init();
  382. #endif
  383. #ifdef STARPU_USE_CUDA
  384. _starpu_init_cuda();
  385. #endif
  386. _starpu_init_topology(config);
  387. _starpu_initialize_workers_bindid(config);
  388. #ifdef STARPU_USE_CUDA
  389. int ncuda = config->conf->ncuda;
  390. if (ncuda != 0)
  391. {
  392. /* The user did not disable CUDA. We need to initialize CUDA
  393. * early to count the number of devices */
  394. _starpu_init_cuda();
  395. int nb_devices = _starpu_get_cuda_device_count();
  396. if (ncuda == -1)
  397. {
  398. /* Nothing was specified, so let's choose ! */
  399. ncuda = nb_devices;
  400. }
  401. else
  402. {
  403. if (ncuda > nb_devices)
  404. {
  405. /* The user requires more CUDA devices than
  406. * there is available */
  407. fprintf(stderr,
  408. "# Warning: %d CUDA devices "
  409. "requested. Only %d available.\n",
  410. ncuda, nb_devices);
  411. ncuda = nb_devices;
  412. }
  413. }
  414. }
  415. /* Now we know how many CUDA devices will be used */
  416. topology->ncudagpus = ncuda;
  417. STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
  418. _starpu_initialize_workers_cuda_gpuid(config);
  419. unsigned cudagpu;
  420. for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
  421. {
  422. int worker_idx = topology->nworkers + cudagpu;
  423. config->workers[worker_idx].arch = STARPU_CUDA_WORKER;
  424. int devid = _starpu_get_next_cuda_gpuid(config);
  425. enum starpu_perf_archtype arch =
  426. (enum starpu_perf_archtype)((int)STARPU_CUDA_DEFAULT + devid);
  427. config->workers[worker_idx].devid = devid;
  428. config->workers[worker_idx].perf_arch = arch;
  429. config->workers[worker_idx].worker_mask = STARPU_CUDA;
  430. config->worker_mask |= STARPU_CUDA;
  431. struct handle_entry *entry;
  432. entry = (struct handle_entry *) malloc(sizeof(*entry));
  433. STARPU_ASSERT(entry != NULL);
  434. entry->gpuid = devid;
  435. HASH_ADD_INT(devices_using_cuda, gpuid, entry);
  436. }
  437. topology->nworkers += topology->ncudagpus;
  438. #endif
  439. #ifdef STARPU_USE_OPENCL
  440. int nopencl = config->conf->nopencl;
  441. if (nopencl != 0)
  442. {
  443. /* The user did not disable OPENCL. We need to initialize
  444. * OpenCL early to count the number of devices */
  445. _starpu_opencl_init();
  446. int nb_devices;
  447. nb_devices = _starpu_opencl_get_device_count();
  448. if (nopencl == -1)
  449. {
  450. /* Nothing was specified, so let's choose ! */
  451. nopencl = nb_devices;
  452. if (nopencl > STARPU_MAXOPENCLDEVS)
  453. {
  454. fprintf(stderr,
  455. "# Warning: %d OpenCL devices "
  456. "available. Only %d enabled. "
  457. "Use configure option "
  458. "--enable-maxopencldadev=xxx to "
  459. "update the maximum value of "
  460. "supported OpenCL devices.\n",
  461. nb_devices, STARPU_MAXOPENCLDEVS);
  462. nopencl = STARPU_MAXOPENCLDEVS;
  463. }
  464. }
  465. else
  466. {
  467. /* Let's make sure this value is OK. */
  468. if (nopencl > nb_devices)
  469. {
  470. /* The user requires more OpenCL devices than
  471. * there is available */
  472. fprintf(stderr,
  473. "# Warning: %d OpenCL devices "
  474. "requested. Only %d available.\n",
  475. nopencl, nb_devices);
  476. nopencl = nb_devices;
  477. }
  478. /* Let's make sure this value is OK. */
  479. if (nopencl > STARPU_MAXOPENCLDEVS)
  480. {
  481. fprintf(stderr,
  482. "# Warning: %d OpenCL devices "
  483. "requested. Only %d enabled. Use "
  484. "configure option "
  485. "--enable-maxopencldev=xxx to update "
  486. "the maximum value of supported "
  487. "OpenCL devices.\n",
  488. nopencl, STARPU_MAXOPENCLDEVS);
  489. nopencl = STARPU_MAXOPENCLDEVS;
  490. }
  491. }
  492. }
  493. topology->nopenclgpus = nopencl;
  494. STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
  495. _starpu_initialize_workers_opencl_gpuid(config);
  496. unsigned openclgpu;
  497. for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
  498. {
  499. int worker_idx = topology->nworkers + openclgpu;
  500. int devid = _starpu_get_next_opencl_gpuid(config);
  501. if (devid == -1)
  502. { // There is no more devices left
  503. topology->nopenclgpus = openclgpu;
  504. break;
  505. }
  506. config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
  507. enum starpu_perf_archtype arch =
  508. (enum starpu_perf_archtype)((int)STARPU_OPENCL_DEFAULT + devid);
  509. config->workers[worker_idx].devid = devid;
  510. config->workers[worker_idx].perf_arch = arch;
  511. config->workers[worker_idx].worker_mask = STARPU_OPENCL;
  512. config->worker_mask |= STARPU_OPENCL;
  513. }
  514. topology->nworkers += topology->nopenclgpus;
  515. #endif
  516. #ifdef STARPU_USE_GORDON
  517. int ngordon = config->conf->ngordon;
  518. if (ngordon != 0)
  519. {
  520. if (ngordon == -1)
  521. {
  522. /* Nothing was specified, so let's choose ! */
  523. ngordon = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
  524. }
  525. else
  526. {
  527. STARPU_ASSERT(ngordon <= NMAXGORDONSPUS);
  528. if (ngordon > STARPU_MAXGORDONSPUS);
  529. {
  530. fprintf(stderr,
  531. "# Warning: %d Gordon CPUs devices "
  532. "requested. Only %d supported\n",
  533. ngordon, NMAXGORDONSPUS);
  534. ngordon = NMAXGORDONSPUS;
  535. }
  536. }
  537. }
  538. topology->ngordon_spus = ngordon;
  539. STARPU_ASSERT(topology->ngordon_spus + topology->nworkers <= STARPU_NMAXWORKERS);
  540. unsigned spu;
  541. for (spu = 0; spu < config->ngordon_spus; spu++)
  542. {
  543. int worker_idx = topology->nworkers + spu;
  544. config->workers[worker_idx].arch = STARPU_GORDON_WORKER;
  545. config->workers[worker_idx].perf_arch = STARPU_GORDON_DEFAULT;
  546. config->workers[worker_idx].id = spu;
  547. config->workers[worker_idx].worker_is_running = 0;
  548. config->workers[worker_idx].worker_mask = STARPU_GORDON;
  549. config->worker_mask |= STARPU_GORDON;
  550. }
  551. topology->nworkers += topology->ngordon_spus;
  552. #endif
  553. /* we put the CPU section after the accelerator : in case there was an
  554. * accelerator found, we devote one cpu */
  555. #ifdef STARPU_USE_CPU
  556. int ncpu = config->conf->ncpus;
  557. if (ncpu != 0)
  558. {
  559. if (ncpu == -1)
  560. {
  561. unsigned already_busy_cpus =
  562. (topology->ngordon_spus ? 1 : 0) + topology->ncudagpus + topology->nopenclgpus;
  563. long avail_cpus = topology->nhwcpus - already_busy_cpus;
  564. if (avail_cpus < 0)
  565. avail_cpus = 0;
  566. ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
  567. }
  568. else
  569. {
  570. if (ncpu > STARPU_MAXCPUS)
  571. {
  572. fprintf(stderr,
  573. "# Warning: %d CPU devices requested."
  574. " Only %d enabled. Use configure "
  575. "option --enable-maxcpus=xxx to "
  576. "update the maximum value of "
  577. "supported CPU devices.\n",
  578. ncpu, STARPU_MAXCPUS);
  579. ncpu = STARPU_MAXCPUS;
  580. }
  581. }
  582. }
  583. topology->ncpus = ncpu;
  584. STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
  585. unsigned cpu;
  586. for (cpu = 0; cpu < topology->ncpus; cpu++)
  587. {
  588. int worker_idx = topology->nworkers + cpu;
  589. config->workers[worker_idx].arch = STARPU_CPU_WORKER;
  590. config->workers[worker_idx].perf_arch = STARPU_CPU_DEFAULT;
  591. config->workers[worker_idx].devid = cpu;
  592. config->workers[worker_idx].worker_mask = STARPU_CPU;
  593. config->worker_mask |= STARPU_CPU;
  594. }
  595. topology->nworkers += topology->ncpus;
  596. #endif
  597. if (topology->nworkers == 0)
  598. {
  599. _STARPU_DEBUG("No worker found, aborting ...\n");
  600. return -ENODEV;
  601. }
  602. return 0;
  603. }
  604. void
  605. _starpu_bind_thread_on_cpu (
  606. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  607. unsigned cpuid)
  608. {
  609. if (starpu_get_env_number("STARPU_WORKERS_NOBIND") > 0)
  610. return;
  611. #ifdef STARPU_HAVE_HWLOC
  612. const struct hwloc_topology_support *support;
  613. #ifdef STARPU_USE_OPENCL
  614. _starpu_opencl_init();
  615. #endif
  616. #ifdef STARPU_USE_CUDA
  617. _starpu_init_cuda();
  618. #endif
  619. _starpu_init_topology(config);
  620. support = hwloc_topology_get_support (config->topology.hwtopology);
  621. if (support->cpubind->set_thisthread_cpubind)
  622. {
  623. hwloc_obj_t obj =
  624. hwloc_get_obj_by_depth (config->topology.hwtopology,
  625. config->cpu_depth, cpuid);
  626. hwloc_bitmap_t set = obj->cpuset;
  627. int ret;
  628. hwloc_bitmap_singlify(set);
  629. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  630. HWLOC_CPUBIND_THREAD);
  631. if (ret)
  632. {
  633. perror("binding thread");
  634. STARPU_ABORT();
  635. }
  636. }
  637. #elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__)
  638. int ret;
  639. /* fix the thread on the correct cpu */
  640. cpu_set_t aff_mask;
  641. CPU_ZERO(&aff_mask);
  642. CPU_SET(cpuid, &aff_mask);
  643. pthread_t self = pthread_self();
  644. ret = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
  645. if (ret)
  646. {
  647. perror("binding thread");
  648. STARPU_ABORT();
  649. }
  650. #elif defined(__MINGW32__) || defined(__CYGWIN__)
  651. DWORD mask = 1 << cpuid;
  652. if (!SetThreadAffinityMask(GetCurrentThread(), mask))
  653. {
  654. fprintf(stderr,"SetThreadMaskAffinity(%lx) failed\n", mask);
  655. STARPU_ABORT();
  656. }
  657. #else
  658. #warning no CPU binding support
  659. #endif
  660. }
  661. void
  662. _starpu_bind_thread_on_cpus (
  663. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  664. struct _starpu_combined_worker *combined_worker)
  665. {
  666. #ifdef STARPU_HAVE_HWLOC
  667. const struct hwloc_topology_support *support;
  668. #ifdef STARPU_USE_OPENC
  669. _starpu_opencl_init();
  670. #endif
  671. #ifdef STARPU_USE_CUDA
  672. _starpu_init_cuda();
  673. #endif
  674. _starpu_init_topology(config);
  675. support = hwloc_topology_get_support(config->topology.hwtopology);
  676. if (support->cpubind->set_thisthread_cpubind)
  677. {
  678. hwloc_bitmap_t set = combined_worker->hwloc_cpu_set;
  679. int ret;
  680. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  681. HWLOC_CPUBIND_THREAD);
  682. if (ret)
  683. {
  684. perror("binding thread");
  685. STARPU_ABORT();
  686. }
  687. }
  688. #else
  689. #warning no parallel worker CPU binding support
  690. #endif
  691. }
  692. static void
  693. _starpu_init_workers_binding (struct _starpu_machine_config *config)
  694. {
  695. /* launch one thread per CPU */
  696. unsigned ram_memory_node;
  697. /* a single cpu is dedicated for the accelerators */
  698. int accelerator_bindid = -1;
  699. /* note that even if the CPU cpu are not used, we always have a RAM
  700. * node */
  701. /* TODO : support NUMA ;) */
  702. ram_memory_node = _starpu_register_memory_node(STARPU_CPU_RAM, -1);
  703. /* We will store all the busid of the different (src, dst)
  704. * combinations in a matrix which we initialize here. */
  705. _starpu_initialize_busid_matrix();
  706. unsigned worker;
  707. for (worker = 0; worker < config->topology.nworkers; worker++)
  708. {
  709. unsigned memory_node = -1;
  710. unsigned is_a_set_of_accelerators = 0;
  711. struct _starpu_worker *workerarg = &config->workers[worker];
  712. /* Perhaps the worker has some "favourite" bindings */
  713. int *preferred_binding = NULL;
  714. int npreferred = 0;
  715. /* select the memory node that contains worker's memory */
  716. switch (workerarg->arch)
  717. {
  718. case STARPU_CPU_WORKER:
  719. /* "dedicate" a cpu cpu to that worker */
  720. is_a_set_of_accelerators = 0;
  721. memory_node = ram_memory_node;
  722. _starpu_memory_node_worker_add(ram_memory_node);
  723. break;
  724. #ifdef STARPU_USE_GORDON
  725. case STARPU_GORDON_WORKER:
  726. is_a_set_of_accelerators = 1;
  727. memory_node = ram_memory_node;
  728. _starpu_memory_node_worker_add(ram_memory_node);
  729. break;
  730. #endif
  731. #ifdef STARPU_USE_CUDA
  732. case STARPU_CUDA_WORKER:
  733. if (may_bind_automatically)
  734. {
  735. /* StarPU is allowed to bind threads automatically */
  736. preferred_binding = _starpu_get_cuda_affinity_vector(workerarg->devid);
  737. npreferred = config->topology.nhwcpus;
  738. }
  739. is_a_set_of_accelerators = 0;
  740. memory_node = _starpu_register_memory_node(STARPU_CUDA_RAM, workerarg->devid);
  741. _starpu_memory_node_worker_add(memory_node);
  742. _starpu_register_bus(0, memory_node);
  743. _starpu_register_bus(memory_node, 0);
  744. #ifdef HAVE_CUDA_MEMCPY_PEER
  745. unsigned worker2;
  746. for (worker2 = 0; worker2 < worker; worker2++)
  747. {
  748. struct _starpu_worker *workerarg = &config->workers[worker];
  749. if (workerarg->arch == STARPU_CUDA_WORKER)
  750. {
  751. unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
  752. _starpu_register_bus(memory_node2, memory_node);
  753. _starpu_register_bus(memory_node, memory_node2);
  754. }
  755. }
  756. #endif
  757. break;
  758. #endif
  759. #ifdef STARPU_USE_OPENCL
  760. case STARPU_OPENCL_WORKER:
  761. if (may_bind_automatically)
  762. {
  763. /* StarPU is allowed to bind threads automatically */
  764. preferred_binding = _starpu_get_opencl_affinity_vector(workerarg->devid);
  765. npreferred = config->topology.nhwcpus;
  766. }
  767. is_a_set_of_accelerators = 0;
  768. memory_node = _starpu_register_memory_node(STARPU_OPENCL_RAM, workerarg->devid);
  769. _starpu_memory_node_worker_add(memory_node);
  770. _starpu_register_bus(0, memory_node);
  771. _starpu_register_bus(memory_node, 0);
  772. break;
  773. #endif
  774. default:
  775. STARPU_ABORT();
  776. }
  777. if (is_a_set_of_accelerators)
  778. {
  779. if (accelerator_bindid == -1)
  780. accelerator_bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  781. workerarg->bindid = accelerator_bindid;
  782. }
  783. else
  784. {
  785. workerarg->bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  786. }
  787. workerarg->memory_node = memory_node;
  788. #ifdef __GLIBC__
  789. /* Save the initial cpuset */
  790. CPU_ZERO(&workerarg->initial_cpu_set);
  791. CPU_SET(workerarg->bindid, &workerarg->initial_cpu_set);
  792. CPU_ZERO(&workerarg->current_cpu_set);
  793. CPU_SET(workerarg->bindid, &workerarg->current_cpu_set);
  794. #endif /* __GLIBC__ */
  795. #ifdef STARPU_HAVE_HWLOC
  796. /* Put the worker descriptor in the userdata field of the
  797. * hwloc object describing the CPU */
  798. hwloc_obj_t worker_obj;
  799. worker_obj =
  800. hwloc_get_obj_by_depth (config->topology.hwtopology,
  801. config->cpu_depth,
  802. workerarg->bindid);
  803. worker_obj->userdata = &config->workers[worker];
  804. /* Clear the cpu set and set the cpu */
  805. workerarg->initial_hwloc_cpu_set =
  806. hwloc_bitmap_dup (worker_obj->cpuset);
  807. workerarg->current_hwloc_cpu_set =
  808. hwloc_bitmap_dup (worker_obj->cpuset);
  809. #endif
  810. }
  811. }
  812. int
  813. _starpu_build_topology (struct _starpu_machine_config *config)
  814. {
  815. int ret;
  816. ret = _starpu_init_machine_config(config);
  817. if (ret)
  818. return ret;
  819. /* for the data management library */
  820. _starpu_init_memory_nodes();
  821. _starpu_init_workers_binding(config);
  822. return 0;
  823. }
  824. void
  825. _starpu_destroy_topology (
  826. struct _starpu_machine_config *config __attribute__ ((unused)))
  827. {
  828. /* cleanup StarPU internal data structures */
  829. _starpu_deinit_memory_nodes();
  830. unsigned worker;
  831. for (worker = 0; worker < config->topology.nworkers; worker++)
  832. {
  833. #ifdef STARPU_HAVE_HWLOC
  834. struct _starpu_worker *workerarg = &config->workers[worker];
  835. hwloc_bitmap_free(workerarg->initial_hwloc_cpu_set);
  836. hwloc_bitmap_free(workerarg->current_hwloc_cpu_set);
  837. #endif
  838. }
  839. #ifdef STARPU_HAVE_HWLOC
  840. hwloc_topology_destroy(config->topology.hwtopology);
  841. #endif
  842. topology_is_initialized = 0;
  843. #ifdef STARPU_USE_CUDA
  844. struct handle_entry *entry, *tmp;
  845. HASH_ITER(hh, devices_using_cuda, entry, tmp)
  846. {
  847. HASH_DEL(devices_using_cuda, entry);
  848. free(entry);
  849. }
  850. devices_using_cuda = NULL;
  851. #endif
  852. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  853. may_bind_automatically = 0;
  854. #endif
  855. }
  856. void
  857. starpu_topology_print (FILE *output)
  858. {
  859. struct _starpu_machine_config *config = _starpu_get_machine_config();
  860. struct starpu_machine_topology *topology = &config->topology;
  861. unsigned core;
  862. unsigned worker;
  863. unsigned nworkers = starpu_worker_get_count();
  864. unsigned ncombinedworkers = topology->ncombinedworkers;
  865. for (core = 0; core < topology->nhwcpus; core++) {
  866. fprintf(output, "core %u\t", core);
  867. for (worker = 0;
  868. worker < nworkers + ncombinedworkers;
  869. worker++)
  870. {
  871. if (worker < nworkers)
  872. {
  873. if (topology->workers_bindid[worker] == core)
  874. {
  875. char name[256];
  876. starpu_worker_get_name (worker, name,
  877. sizeof(name));
  878. fprintf(output, "%s\t", name);
  879. }
  880. }
  881. else
  882. {
  883. int worker_size, i;
  884. int *combined_workerid;
  885. starpu_combined_worker_get_description(worker, &worker_size, &combined_workerid);
  886. for (i = 0; i < worker_size; i++)
  887. {
  888. if (topology->workers_bindid[combined_workerid[i]] == core)
  889. fprintf(output, "comb %u\t", worker-nworkers);
  890. }
  891. }
  892. }
  893. fprintf(output, "\n");
  894. }
  895. }