topology.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2012 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011 INRIA
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <stdlib.h>
  19. #include <stdio.h>
  20. #include <common/config.h>
  21. #include <core/workers.h>
  22. #include <core/debug.h>
  23. #include <core/topology.h>
  24. #include <drivers/cuda/driver_cuda.h>
  25. #include <drivers/opencl/driver_opencl.h>
  26. #include <profiling/profiling.h>
  27. #include <common/uthash.h>
  28. #ifdef STARPU_HAVE_HWLOC
  29. #include <hwloc.h>
  30. #ifndef HWLOC_API_VERSION
  31. #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
  32. #endif
  33. #endif
  34. #ifdef STARPU_HAVE_WINDOWS
  35. #include <windows.h>
  36. #endif
  37. #ifdef STARPU_SIMGRID
  38. #include <msg/msg.h>
  39. #include <core/simgrid.h>
  40. #endif
  41. static unsigned topology_is_initialized = 0;
  42. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  43. struct handle_entry
  44. {
  45. UT_hash_handle hh;
  46. unsigned gpuid;
  47. };
  48. # ifdef STARPU_USE_CUDA
  49. /* Entry in the `devices_using_cuda' hash table. */
  50. static struct handle_entry *devices_using_cuda;
  51. # endif
  52. static unsigned may_bind_automatically = 0;
  53. #endif // defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  54. /*
  55. * Discover the topology of the machine
  56. */
  57. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  58. static void
  59. _starpu_initialize_workers_gpuid (int *explicit_workers_gpuid,
  60. int *current, int *workers_gpuid,
  61. const char *varname, unsigned nhwgpus)
  62. {
  63. char *strval;
  64. unsigned i;
  65. *current = 0;
  66. /* conf->workers_bindid indicates the successive cpu identifier that
  67. * should be used to bind the workers. It should be either filled
  68. * according to the user's explicit parameters (from starpu_conf) or
  69. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  70. * round-robin policy is used to distributed the workers over the
  71. * cpus. */
  72. /* what do we use, explicit value, env. variable, or round-robin ? */
  73. if ((strval = getenv(varname)))
  74. {
  75. /* STARPU_WORKERS_CUDAID certainly contains less entries than
  76. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  77. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  78. * 2". */
  79. unsigned wrap = 0;
  80. unsigned number_of_entries = 0;
  81. char *endptr;
  82. /* we use the content of the STARPU_WORKERS_CUDAID
  83. * env. variable */
  84. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  85. {
  86. if (!wrap)
  87. {
  88. long int val;
  89. val = strtol(strval, &endptr, 10);
  90. if (endptr != strval)
  91. {
  92. workers_gpuid[i] = (unsigned)val;
  93. strval = endptr;
  94. }
  95. else
  96. {
  97. /* there must be at least one entry */
  98. STARPU_ASSERT(i != 0);
  99. number_of_entries = i;
  100. /* there is no more values in the
  101. * string */
  102. wrap = 1;
  103. workers_gpuid[i] = workers_gpuid[0];
  104. }
  105. }
  106. else
  107. {
  108. workers_gpuid[i] =
  109. workers_gpuid[i % number_of_entries];
  110. }
  111. }
  112. }
  113. else if (explicit_workers_gpuid)
  114. {
  115. /* we use the explicit value from the user */
  116. memcpy(workers_gpuid,
  117. explicit_workers_gpuid,
  118. STARPU_NMAXWORKERS*sizeof(unsigned));
  119. }
  120. else
  121. {
  122. /* by default, we take a round robin policy */
  123. if (nhwgpus > 0)
  124. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  125. workers_gpuid[i] = (unsigned)(i % nhwgpus);
  126. /* StarPU can use sampling techniques to bind threads
  127. * correctly */
  128. may_bind_automatically = 1;
  129. }
  130. }
  131. #endif
  132. #ifdef STARPU_USE_CUDA
  133. static void
  134. _starpu_initialize_workers_cuda_gpuid (struct _starpu_machine_config *config)
  135. {
  136. struct starpu_machine_topology *topology = &config->topology;
  137. struct starpu_conf *uconf = config->conf;
  138. _starpu_initialize_workers_gpuid (
  139. uconf->use_explicit_workers_cuda_gpuid == 0
  140. ? NULL
  141. : (int *)uconf->workers_cuda_gpuid,
  142. &(config->current_cuda_gpuid),
  143. (int *)topology->workers_cuda_gpuid,
  144. "STARPU_WORKERS_CUDAID",
  145. topology->nhwcudagpus);
  146. }
  147. static inline int
  148. _starpu_get_next_cuda_gpuid (struct _starpu_machine_config *config)
  149. {
  150. unsigned i =
  151. ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
  152. return (int)config->topology.workers_cuda_gpuid[i];
  153. }
  154. #endif
  155. #ifdef STARPU_USE_OPENCL
  156. static void
  157. _starpu_initialize_workers_opencl_gpuid (struct _starpu_machine_config*config)
  158. {
  159. struct starpu_machine_topology *topology = &config->topology;
  160. struct starpu_conf *uconf = config->conf;
  161. _starpu_initialize_workers_gpuid(
  162. uconf->use_explicit_workers_opencl_gpuid == 0
  163. ? NULL
  164. : (int *)uconf->workers_opencl_gpuid,
  165. &(config->current_opencl_gpuid),
  166. (int *)topology->workers_opencl_gpuid,
  167. "STARPU_WORKERS_OPENCLID",
  168. topology->nhwopenclgpus);
  169. #ifdef STARPU_USE_CUDA
  170. // Detect devices which are already used with CUDA
  171. {
  172. unsigned tmp[STARPU_NMAXWORKERS];
  173. unsigned nb=0;
  174. int i;
  175. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  176. {
  177. struct handle_entry *entry;
  178. int devid = config->topology.workers_opencl_gpuid[i];
  179. HASH_FIND_INT(devices_using_cuda, &devid, entry);
  180. if (entry == NULL)
  181. {
  182. tmp[nb] = topology->workers_opencl_gpuid[i];
  183. nb++;
  184. }
  185. }
  186. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  187. tmp[i] = -1;
  188. memcpy (topology->workers_opencl_gpuid, tmp,
  189. sizeof(unsigned)*STARPU_NMAXWORKERS);
  190. }
  191. #endif /* STARPU_USE_CUDA */
  192. {
  193. // Detect identical devices
  194. struct handle_entry *devices_already_used = NULL;
  195. unsigned tmp[STARPU_NMAXWORKERS];
  196. unsigned nb=0;
  197. int i;
  198. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  199. {
  200. int devid = topology->workers_opencl_gpuid[i];
  201. struct handle_entry *entry;
  202. HASH_FIND_INT(devices_already_used, &devid, entry);
  203. if (entry == NULL)
  204. {
  205. struct handle_entry *entry2;
  206. entry2 = (struct handle_entry *) malloc(sizeof(*entry2));
  207. STARPU_ASSERT(entry2 != NULL);
  208. entry2->gpuid = devid;
  209. HASH_ADD_INT(devices_already_used, gpuid,
  210. entry2);
  211. tmp[nb] = devid;
  212. nb ++;
  213. }
  214. }
  215. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  216. tmp[i] = -1;
  217. memcpy (topology->workers_opencl_gpuid, tmp,
  218. sizeof(unsigned)*STARPU_NMAXWORKERS);
  219. }
  220. }
  221. static inline int
  222. _starpu_get_next_opencl_gpuid (struct _starpu_machine_config *config)
  223. {
  224. unsigned i =
  225. ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
  226. return (int)config->topology.workers_opencl_gpuid[i];
  227. }
  228. #endif
  229. static void
  230. _starpu_init_topology (struct _starpu_machine_config *config)
  231. {
  232. /* Discover the topology, meaning finding all the available PUs for
  233. the compiled drivers. These drivers MUST have been initialized
  234. before calling this function. The discovered topology is filled in
  235. CONFIG. */
  236. struct starpu_machine_topology *topology = &config->topology;
  237. if (topology_is_initialized)
  238. return;
  239. #ifdef STARPU_SIMGRID
  240. struct starpu_conf *conf = config->conf;
  241. topology->nhwcpus = conf->ncpus?conf->ncpus:1;
  242. topology->nhwcudagpus = conf->ncuda;
  243. topology->nhwopenclgpus = conf->nopencl;
  244. #else
  245. topology->nhwcpus = 0;
  246. #ifdef STARPU_HAVE_HWLOC
  247. hwloc_topology_init(&topology->hwtopology);
  248. hwloc_topology_load(topology->hwtopology);
  249. #endif
  250. _starpu_cpu_discover_devices(config);
  251. _starpu_cuda_discover_devices(config);
  252. _starpu_opencl_discover_devices(config);
  253. #endif
  254. topology_is_initialized = 1;
  255. }
  256. /*
  257. * Bind workers on the different processors
  258. */
  259. static void
  260. _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
  261. {
  262. char *strval;
  263. unsigned i;
  264. struct starpu_machine_topology *topology = &config->topology;
  265. config->current_bindid = 0;
  266. /* conf->workers_bindid indicates the successive cpu identifier that
  267. * should be used to bind the workers. It should be either filled
  268. * according to the user's explicit parameters (from starpu_conf) or
  269. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  270. * round-robin policy is used to distributed the workers over the
  271. * cpus. */
  272. /* what do we use, explicit value, env. variable, or round-robin ? */
  273. if ((strval = getenv("STARPU_WORKERS_CPUID")))
  274. {
  275. /* STARPU_WORKERS_CPUID certainly contains less entries than
  276. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  277. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  278. * 2". */
  279. unsigned wrap = 0;
  280. unsigned number_of_entries = 0;
  281. char *endptr;
  282. /* we use the content of the STARPU_WORKERS_CUDAID
  283. * env. variable */
  284. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  285. {
  286. if (!wrap)
  287. {
  288. long int val;
  289. val = strtol(strval, &endptr, 10);
  290. if (endptr != strval)
  291. {
  292. topology->workers_bindid[i] =
  293. (unsigned)(val % topology->nhwcpus);
  294. strval = endptr;
  295. }
  296. else
  297. {
  298. /* there must be at least one entry */
  299. STARPU_ASSERT(i != 0);
  300. number_of_entries = i;
  301. /* there is no more values in the
  302. * string */
  303. wrap = 1;
  304. topology->workers_bindid[i] =
  305. topology->workers_bindid[0];
  306. }
  307. }
  308. else
  309. {
  310. topology->workers_bindid[i] =
  311. topology->workers_bindid[i % number_of_entries];
  312. }
  313. }
  314. }
  315. else if (config->conf->use_explicit_workers_bindid)
  316. {
  317. /* we use the explicit value from the user */
  318. memcpy(topology->workers_bindid,
  319. config->conf->workers_bindid,
  320. STARPU_NMAXWORKERS*sizeof(unsigned));
  321. }
  322. else
  323. {
  324. /* by default, we take a round robin policy */
  325. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  326. topology->workers_bindid[i] =
  327. (unsigned)(i % topology->nhwcpus);
  328. }
  329. }
  330. /* This function gets the identifier of the next cpu on which to bind a
  331. * worker. In case a list of preferred cpus was specified, we look for a an
  332. * available cpu among the list if possible, otherwise a round-robin policy is
  333. * used. */
  334. static inline int
  335. _starpu_get_next_bindid (struct _starpu_machine_config *config,
  336. int *preferred_binding, int npreferred)
  337. {
  338. struct starpu_machine_topology *topology = &config->topology;
  339. unsigned found = 0;
  340. int current_preferred;
  341. for (current_preferred = 0;
  342. current_preferred < npreferred;
  343. current_preferred++)
  344. {
  345. if (found)
  346. break;
  347. unsigned requested_cpu = preferred_binding[current_preferred];
  348. /* can we bind the worker on the requested cpu ? */
  349. unsigned ind;
  350. for (ind = config->current_bindid;
  351. ind < topology->nhwcpus;
  352. ind++)
  353. {
  354. if (topology->workers_bindid[ind] == requested_cpu)
  355. {
  356. /* the cpu is available, we use it ! In order
  357. * to make sure that it will not be used again
  358. * later on, we remove the entry from the
  359. * list */
  360. topology->workers_bindid[ind] =
  361. topology->workers_bindid[config->current_bindid];
  362. topology->workers_bindid[config->current_bindid] = requested_cpu;
  363. found = 1;
  364. break;
  365. }
  366. }
  367. }
  368. unsigned i = ((config->current_bindid++) % STARPU_NMAXWORKERS);
  369. return (int)topology->workers_bindid[i];
  370. }
  371. unsigned
  372. _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
  373. {
  374. #ifdef STARPU_USE_OPENCL
  375. _starpu_opencl_init();
  376. #endif
  377. #ifdef STARPU_USE_CUDA
  378. _starpu_init_cuda();
  379. #endif
  380. _starpu_init_topology(config);
  381. return config->topology.nhwcpus;
  382. }
  383. static int
  384. _starpu_init_machine_config (struct _starpu_machine_config *config)
  385. {
  386. int i;
  387. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  388. config->workers[i].workerid = i;
  389. struct starpu_machine_topology *topology = &config->topology;
  390. topology->nworkers = 0;
  391. topology->ncombinedworkers = 0;
  392. topology->nsched_ctxs = 0;
  393. #ifdef STARPU_USE_OPENCL
  394. _starpu_opencl_init();
  395. #endif
  396. #ifdef STARPU_USE_CUDA
  397. _starpu_init_cuda();
  398. #endif
  399. _starpu_init_topology(config);
  400. _starpu_initialize_workers_bindid(config);
  401. #ifdef STARPU_USE_CUDA
  402. int ncuda = config->conf->ncuda;
  403. #ifndef STARPU_SIMGRID
  404. if (ncuda != 0)
  405. {
  406. /* The user did not disable CUDA. We need to initialize CUDA
  407. * early to count the number of devices */
  408. _starpu_init_cuda();
  409. int nb_devices = _starpu_get_cuda_device_count();
  410. if (ncuda == -1)
  411. {
  412. /* Nothing was specified, so let's choose ! */
  413. ncuda = nb_devices;
  414. }
  415. else
  416. {
  417. if (ncuda > nb_devices)
  418. {
  419. /* The user requires more CUDA devices than
  420. * there is available */
  421. _STARPU_DISP("Warning: %d CUDA devices requested. Only %d available.\n", ncuda, nb_devices);
  422. ncuda = nb_devices;
  423. }
  424. }
  425. }
  426. #endif
  427. /* Now we know how many CUDA devices will be used */
  428. topology->ncudagpus = ncuda;
  429. STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
  430. _starpu_initialize_workers_cuda_gpuid(config);
  431. unsigned cudagpu;
  432. for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
  433. {
  434. int worker_idx = topology->nworkers + cudagpu;
  435. config->workers[worker_idx].arch = STARPU_CUDA_WORKER;
  436. int devid = _starpu_get_next_cuda_gpuid(config);
  437. enum starpu_perf_archtype arch =
  438. (enum starpu_perf_archtype)((int)STARPU_CUDA_DEFAULT + devid);
  439. config->workers[worker_idx].devid = devid;
  440. config->workers[worker_idx].perf_arch = arch;
  441. config->workers[worker_idx].worker_mask = STARPU_CUDA;
  442. _starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + cudagpu].workerid);
  443. config->worker_mask |= STARPU_CUDA;
  444. struct handle_entry *entry;
  445. entry = (struct handle_entry *) malloc(sizeof(*entry));
  446. STARPU_ASSERT(entry != NULL);
  447. entry->gpuid = devid;
  448. HASH_ADD_INT(devices_using_cuda, gpuid, entry);
  449. }
  450. topology->nworkers += topology->ncudagpus;
  451. #endif
  452. #ifdef STARPU_USE_OPENCL
  453. int nopencl = config->conf->nopencl;
  454. #ifndef STARPU_SIMGRID
  455. if (nopencl != 0)
  456. {
  457. /* The user did not disable OPENCL. We need to initialize
  458. * OpenCL early to count the number of devices */
  459. _starpu_opencl_init();
  460. int nb_devices;
  461. nb_devices = _starpu_opencl_get_device_count();
  462. if (nopencl == -1)
  463. {
  464. /* Nothing was specified, so let's choose ! */
  465. nopencl = nb_devices;
  466. if (nopencl > STARPU_MAXOPENCLDEVS)
  467. {
  468. _STARPU_DISP("Warning: %d OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldadev=xxx to update the maximum value of supported OpenCL devices.\n", nb_devices, STARPU_MAXOPENCLDEVS);
  469. nopencl = STARPU_MAXOPENCLDEVS;
  470. }
  471. }
  472. else
  473. {
  474. /* Let's make sure this value is OK. */
  475. if (nopencl > nb_devices)
  476. {
  477. /* The user requires more OpenCL devices than
  478. * there is available */
  479. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d available.\n", nopencl, nb_devices);
  480. nopencl = nb_devices;
  481. }
  482. /* Let's make sure this value is OK. */
  483. if (nopencl > STARPU_MAXOPENCLDEVS)
  484. {
  485. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices.\n", nopencl, STARPU_MAXOPENCLDEVS);
  486. nopencl = STARPU_MAXOPENCLDEVS;
  487. }
  488. }
  489. }
  490. #endif
  491. topology->nopenclgpus = nopencl;
  492. STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
  493. _starpu_initialize_workers_opencl_gpuid(config);
  494. unsigned openclgpu;
  495. for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
  496. {
  497. int worker_idx = topology->nworkers + openclgpu;
  498. int devid = _starpu_get_next_opencl_gpuid(config);
  499. if (devid == -1)
  500. { // There is no more devices left
  501. topology->nopenclgpus = openclgpu;
  502. break;
  503. }
  504. config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
  505. enum starpu_perf_archtype arch =
  506. (enum starpu_perf_archtype)((int)STARPU_OPENCL_DEFAULT + devid);
  507. config->workers[worker_idx].devid = devid;
  508. config->workers[worker_idx].perf_arch = arch;
  509. config->workers[worker_idx].worker_mask = STARPU_OPENCL;
  510. _starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + openclgpu].workerid);
  511. config->worker_mask |= STARPU_OPENCL;
  512. }
  513. topology->nworkers += topology->nopenclgpus;
  514. #endif
  515. #ifdef STARPU_USE_GORDON
  516. int ngordon = config->conf->ngordon;
  517. if (ngordon != 0)
  518. {
  519. if (ngordon == -1)
  520. {
  521. /* Nothing was specified, so let's choose ! */
  522. ngordon = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
  523. }
  524. else
  525. {
  526. STARPU_ASSERT(ngordon <= NMAXGORDONSPUS);
  527. if (ngordon > STARPU_MAXGORDONSPUS);
  528. {
  529. _STARPU_DISP("Warning: %d Gordon CPUs devices requested. Only %d supported\n", ngordon, NMAXGORDONSPUS);
  530. ngordon = NMAXGORDONSPUS;
  531. }
  532. }
  533. }
  534. topology->ngordon_spus = ngordon;
  535. STARPU_ASSERT(topology->ngordon_spus + topology->nworkers <= STARPU_NMAXWORKERS);
  536. unsigned spu;
  537. for (spu = 0; spu < config->ngordon_spus; spu++)
  538. {
  539. int worker_idx = topology->nworkers + spu;
  540. config->workers[worker_idx].arch = STARPU_GORDON_WORKER;
  541. config->workers[worker_idx].perf_arch = STARPU_GORDON_DEFAULT;
  542. config->workers[worker_idx].id = spu;
  543. config->workers[worker_idx].worker_is_running = 0;
  544. config->workers[worker_idx].worker_mask = STARPU_GORDON;
  545. _starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + spu].workerid);
  546. config->worker_mask |= STARPU_GORDON;
  547. }
  548. topology->nworkers += topology->ngordon_spus;
  549. #endif
  550. /* we put the CPU section after the accelerator : in case there was an
  551. * accelerator found, we devote one cpu */
  552. #ifdef STARPU_USE_CPU
  553. int ncpu = config->conf->ncpus;
  554. if (ncpu != 0)
  555. {
  556. if (ncpu == -1)
  557. {
  558. unsigned already_busy_cpus =
  559. (topology->ngordon_spus ? 1 : 0) + topology->ncudagpus + topology->nopenclgpus;
  560. long avail_cpus = topology->nhwcpus - already_busy_cpus;
  561. if (avail_cpus < 0)
  562. avail_cpus = 0;
  563. ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
  564. }
  565. else
  566. {
  567. if (ncpu > STARPU_MAXCPUS)
  568. {
  569. _STARPU_DISP("Warning: %d CPU devices requested. Only %d enabled. Use configure option --enable-maxcpus=xxx to update the maximum value of supported CPU devices.\n", ncpu, STARPU_MAXCPUS);
  570. ncpu = STARPU_MAXCPUS;
  571. }
  572. }
  573. }
  574. topology->ncpus = ncpu;
  575. STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
  576. unsigned cpu;
  577. for (cpu = 0; cpu < topology->ncpus; cpu++)
  578. {
  579. int worker_idx = topology->nworkers + cpu;
  580. config->workers[worker_idx].arch = STARPU_CPU_WORKER;
  581. config->workers[worker_idx].perf_arch = STARPU_CPU_DEFAULT;
  582. config->workers[worker_idx].devid = cpu;
  583. config->workers[worker_idx].worker_mask = STARPU_CPU;
  584. config->worker_mask |= STARPU_CPU;
  585. _starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + cpu].workerid);
  586. }
  587. topology->nworkers += topology->ncpus;
  588. #endif
  589. if (topology->nworkers == 0)
  590. {
  591. _STARPU_DEBUG("No worker found, aborting ...\n");
  592. return -ENODEV;
  593. }
  594. return 0;
  595. }
  596. void
  597. _starpu_bind_thread_on_cpu (
  598. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  599. unsigned cpuid)
  600. {
  601. #ifdef STARPU_SIMGRID
  602. return;
  603. #endif
  604. if (starpu_get_env_number("STARPU_WORKERS_NOBIND") > 0)
  605. return;
  606. #ifdef STARPU_HAVE_HWLOC
  607. const struct hwloc_topology_support *support;
  608. #ifdef STARPU_USE_OPENCL
  609. _starpu_opencl_init();
  610. #endif
  611. #ifdef STARPU_USE_CUDA
  612. _starpu_init_cuda();
  613. #endif
  614. _starpu_init_topology(config);
  615. support = hwloc_topology_get_support (config->topology.hwtopology);
  616. if (support->cpubind->set_thisthread_cpubind)
  617. {
  618. hwloc_obj_t obj =
  619. hwloc_get_obj_by_depth (config->topology.hwtopology,
  620. config->cpu_depth, cpuid);
  621. hwloc_bitmap_t set = obj->cpuset;
  622. int ret;
  623. hwloc_bitmap_singlify(set);
  624. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  625. HWLOC_CPUBIND_THREAD);
  626. if (ret)
  627. {
  628. perror("binding thread");
  629. STARPU_ABORT();
  630. }
  631. }
  632. #elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__)
  633. int ret;
  634. /* fix the thread on the correct cpu */
  635. cpu_set_t aff_mask;
  636. CPU_ZERO(&aff_mask);
  637. CPU_SET(cpuid, &aff_mask);
  638. pthread_t self = pthread_self();
  639. ret = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
  640. if (ret)
  641. {
  642. perror("binding thread");
  643. STARPU_ABORT();
  644. }
  645. #elif defined(__MINGW32__) || defined(__CYGWIN__)
  646. DWORD mask = 1 << cpuid;
  647. if (!SetThreadAffinityMask(GetCurrentThread(), mask))
  648. {
  649. _STARPU_ERROR("SetThreadMaskAffinity(%lx) failed\n", mask);
  650. }
  651. #else
  652. #warning no CPU binding support
  653. #endif
  654. }
  655. void
  656. _starpu_bind_thread_on_cpus (
  657. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  658. struct _starpu_combined_worker *combined_worker STARPU_ATTRIBUTE_UNUSED)
  659. {
  660. #ifdef STARPU_SIMGRID
  661. return;
  662. #endif
  663. #ifdef STARPU_HAVE_HWLOC
  664. const struct hwloc_topology_support *support;
  665. #ifdef STARPU_USE_OPENC
  666. _starpu_opencl_init();
  667. #endif
  668. #ifdef STARPU_USE_CUDA
  669. _starpu_init_cuda();
  670. #endif
  671. _starpu_init_topology(config);
  672. support = hwloc_topology_get_support(config->topology.hwtopology);
  673. if (support->cpubind->set_thisthread_cpubind)
  674. {
  675. hwloc_bitmap_t set = combined_worker->hwloc_cpu_set;
  676. int ret;
  677. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  678. HWLOC_CPUBIND_THREAD);
  679. if (ret)
  680. {
  681. perror("binding thread");
  682. STARPU_ABORT();
  683. }
  684. }
  685. #else
  686. #warning no parallel worker CPU binding support
  687. #endif
  688. }
  689. static void
  690. _starpu_init_workers_binding (struct _starpu_machine_config *config)
  691. {
  692. /* launch one thread per CPU */
  693. unsigned ram_memory_node;
  694. /* a single cpu is dedicated for the accelerators */
  695. int accelerator_bindid = -1;
  696. /* note that even if the CPU cpu are not used, we always have a RAM
  697. * node */
  698. /* TODO : support NUMA ;) */
  699. ram_memory_node = _starpu_register_memory_node(STARPU_CPU_RAM, -1);
  700. #ifdef STARPU_SIMGRID
  701. char name[16];
  702. xbt_dynar_t hosts = MSG_hosts_as_dynar();
  703. msg_host_t host = MSG_get_host_by_name("RAM");
  704. STARPU_ASSERT(host);
  705. _starpu_simgrid_memory_node_set_host(0, host);
  706. #endif
  707. /* We will store all the busid of the different (src, dst)
  708. * combinations in a matrix which we initialize here. */
  709. _starpu_initialize_busid_matrix();
  710. unsigned worker;
  711. for (worker = 0; worker < config->topology.nworkers; worker++)
  712. {
  713. unsigned memory_node = -1;
  714. unsigned is_a_set_of_accelerators = 0;
  715. struct _starpu_worker *workerarg = &config->workers[worker];
  716. /* Perhaps the worker has some "favourite" bindings */
  717. int *preferred_binding = NULL;
  718. int npreferred = 0;
  719. /* select the memory node that contains worker's memory */
  720. switch (workerarg->arch)
  721. {
  722. case STARPU_CPU_WORKER:
  723. /* "dedicate" a cpu cpu to that worker */
  724. is_a_set_of_accelerators = 0;
  725. memory_node = ram_memory_node;
  726. _starpu_memory_node_worker_add(ram_memory_node);
  727. break;
  728. #ifdef STARPU_USE_GORDON
  729. case STARPU_GORDON_WORKER:
  730. is_a_set_of_accelerators = 1;
  731. memory_node = ram_memory_node;
  732. _starpu_memory_node_worker_add(ram_memory_node);
  733. break;
  734. #endif
  735. #ifdef STARPU_USE_CUDA
  736. case STARPU_CUDA_WORKER:
  737. if (may_bind_automatically)
  738. {
  739. /* StarPU is allowed to bind threads automatically */
  740. preferred_binding = _starpu_get_cuda_affinity_vector(workerarg->devid);
  741. npreferred = config->topology.nhwcpus;
  742. }
  743. is_a_set_of_accelerators = 0;
  744. memory_node = _starpu_register_memory_node(STARPU_CUDA_RAM, workerarg->devid);
  745. #ifdef STARPU_SIMGRID
  746. snprintf(name, sizeof(name), "CUDA%d", workerarg->devid);
  747. host = MSG_get_host_by_name(name);
  748. STARPU_ASSERT(host);
  749. _starpu_simgrid_memory_node_set_host(memory_node, host);
  750. #endif
  751. _starpu_memory_node_worker_add(memory_node);
  752. _starpu_register_bus(0, memory_node);
  753. _starpu_register_bus(memory_node, 0);
  754. #ifdef HAVE_CUDA_MEMCPY_PEER
  755. unsigned worker2;
  756. for (worker2 = 0; worker2 < worker; worker2++)
  757. {
  758. struct _starpu_worker *workerarg = &config->workers[worker];
  759. if (workerarg->arch == STARPU_CUDA_WORKER)
  760. {
  761. unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
  762. _starpu_register_bus(memory_node2, memory_node);
  763. _starpu_register_bus(memory_node, memory_node2);
  764. }
  765. }
  766. #endif
  767. break;
  768. #endif
  769. #ifdef STARPU_USE_OPENCL
  770. case STARPU_OPENCL_WORKER:
  771. if (may_bind_automatically)
  772. {
  773. /* StarPU is allowed to bind threads automatically */
  774. preferred_binding = _starpu_get_opencl_affinity_vector(workerarg->devid);
  775. npreferred = config->topology.nhwcpus;
  776. }
  777. is_a_set_of_accelerators = 0;
  778. memory_node = _starpu_register_memory_node(STARPU_OPENCL_RAM, workerarg->devid);
  779. #ifdef STARPU_SIMGRID
  780. snprintf(name, sizeof(name), "OpenCL%d", workerarg->devid);
  781. host = MSG_get_host_by_name(name);
  782. STARPU_ASSERT(host);
  783. _starpu_simgrid_memory_node_set_host(memory_node, host);
  784. #endif
  785. _starpu_memory_node_worker_add(memory_node);
  786. _starpu_register_bus(0, memory_node);
  787. _starpu_register_bus(memory_node, 0);
  788. break;
  789. #endif
  790. default:
  791. STARPU_ABORT();
  792. }
  793. if (is_a_set_of_accelerators)
  794. {
  795. if (accelerator_bindid == -1)
  796. accelerator_bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  797. workerarg->bindid = accelerator_bindid;
  798. }
  799. else
  800. {
  801. workerarg->bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  802. }
  803. workerarg->memory_node = memory_node;
  804. #ifdef __GLIBC__
  805. /* Save the initial cpuset */
  806. CPU_ZERO(&workerarg->initial_cpu_set);
  807. CPU_SET(workerarg->bindid, &workerarg->initial_cpu_set);
  808. CPU_ZERO(&workerarg->current_cpu_set);
  809. CPU_SET(workerarg->bindid, &workerarg->current_cpu_set);
  810. #endif /* __GLIBC__ */
  811. #ifdef STARPU_HAVE_HWLOC
  812. /* Put the worker descriptor in the userdata field of the
  813. * hwloc object describing the CPU */
  814. hwloc_obj_t worker_obj;
  815. worker_obj =
  816. hwloc_get_obj_by_depth (config->topology.hwtopology,
  817. config->cpu_depth,
  818. workerarg->bindid);
  819. worker_obj->userdata = &config->workers[worker];
  820. /* Clear the cpu set and set the cpu */
  821. workerarg->initial_hwloc_cpu_set =
  822. hwloc_bitmap_dup (worker_obj->cpuset);
  823. workerarg->current_hwloc_cpu_set =
  824. hwloc_bitmap_dup (worker_obj->cpuset);
  825. #endif
  826. }
  827. #ifdef STARPU_SIMGRID
  828. xbt_dynar_free(&hosts);
  829. #endif
  830. }
  831. int
  832. _starpu_build_topology (struct _starpu_machine_config *config)
  833. {
  834. int ret;
  835. ret = _starpu_init_machine_config(config);
  836. if (ret)
  837. return ret;
  838. /* for the data management library */
  839. _starpu_init_memory_nodes();
  840. _starpu_init_workers_binding(config);
  841. return 0;
  842. }
  843. void
  844. _starpu_destroy_topology (
  845. struct _starpu_machine_config *config __attribute__ ((unused)))
  846. {
  847. /* cleanup StarPU internal data structures */
  848. _starpu_deinit_memory_nodes();
  849. unsigned worker;
  850. for (worker = 0; worker < config->topology.nworkers; worker++)
  851. {
  852. #ifdef STARPU_HAVE_HWLOC
  853. struct _starpu_worker *workerarg = &config->workers[worker];
  854. hwloc_bitmap_free(workerarg->initial_hwloc_cpu_set);
  855. hwloc_bitmap_free(workerarg->current_hwloc_cpu_set);
  856. #endif
  857. }
  858. #ifdef STARPU_HAVE_HWLOC
  859. hwloc_topology_destroy(config->topology.hwtopology);
  860. #endif
  861. topology_is_initialized = 0;
  862. #ifdef STARPU_USE_CUDA
  863. struct handle_entry *entry, *tmp;
  864. HASH_ITER(hh, devices_using_cuda, entry, tmp)
  865. {
  866. HASH_DEL(devices_using_cuda, entry);
  867. free(entry);
  868. }
  869. devices_using_cuda = NULL;
  870. #endif
  871. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  872. may_bind_automatically = 0;
  873. #endif
  874. }
  875. void
  876. starpu_topology_print (FILE *output)
  877. {
  878. struct _starpu_machine_config *config = _starpu_get_machine_config();
  879. struct starpu_machine_topology *topology = &config->topology;
  880. unsigned core;
  881. unsigned worker;
  882. unsigned nworkers = starpu_worker_get_count();
  883. unsigned ncombinedworkers = topology->ncombinedworkers;
  884. for (core = 0; core < topology->nhwcpus; core++) {
  885. fprintf(output, "core %u\t", core);
  886. for (worker = 0;
  887. worker < nworkers + ncombinedworkers;
  888. worker++)
  889. {
  890. if (worker < nworkers)
  891. {
  892. if (topology->workers_bindid[worker] == core)
  893. {
  894. char name[256];
  895. starpu_worker_get_name (worker, name,
  896. sizeof(name));
  897. fprintf(output, "%s\t", name);
  898. }
  899. }
  900. else
  901. {
  902. int worker_size, i;
  903. int *combined_workerid;
  904. starpu_combined_worker_get_description(worker, &worker_size, &combined_workerid);
  905. for (i = 0; i < worker_size; i++)
  906. {
  907. if (topology->workers_bindid[combined_workerid[i]] == core)
  908. fprintf(output, "comb %u\t", worker-nworkers);
  909. }
  910. }
  911. }
  912. fprintf(output, "\n");
  913. }
  914. }