topology.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750
  1. /*
  2. * StarPU
  3. * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. #include <common/config.h>
  19. #include <core/workers.h>
  20. #include <core/debug.h>
  21. #include <core/topology.h>
  22. #include <drivers/cuda/driver_cuda.h>
  23. #include <common/hash.h>
  24. #include <profiling/profiling.h>
  25. #ifdef STARPU_HAVE_HWLOC
  26. #include <hwloc.h>
  27. #ifndef HWLOC_API_VERSION
  28. #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
  29. #endif
  30. #endif
  31. #ifdef STARPU_HAVE_WINDOWS
  32. #include <windows.h>
  33. #endif
  34. static unsigned topology_is_initialized = 0;
  35. static void _starpu_initialize_workers_bindid(struct starpu_machine_config_s *config);
  36. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  37. # ifdef STARPU_USE_CUDA
  38. static void _starpu_initialize_workers_cuda_gpuid(struct starpu_machine_config_s *config);
  39. static struct starpu_htbl32_node_s *devices_using_cuda = NULL;
  40. # endif
  41. # ifdef STARPU_USE_OPENCL
  42. static void _starpu_initialize_workers_opencl_gpuid(struct starpu_machine_config_s *config);
  43. # endif
  44. static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int *explicit_workers_gpuid,
  45. int *current, int *workers_gpuid, const char *varname, unsigned nhwgpus);
  46. static unsigned may_bind_automatically = 0;
  47. #endif
  48. /*
  49. * Discover the topology of the machine
  50. */
  51. #ifdef STARPU_USE_CUDA
  52. static void _starpu_initialize_workers_cuda_gpuid(struct starpu_machine_config_s *config)
  53. {
  54. struct starpu_machine_topology_s *topology = &config->topology;
  55. _starpu_initialize_workers_gpuid(config->user_conf==NULL?0:config->user_conf->use_explicit_workers_cuda_gpuid,
  56. config->user_conf==NULL?NULL:(int *)config->user_conf->workers_cuda_gpuid,
  57. &(config->current_cuda_gpuid), (int *)topology->workers_cuda_gpuid, "STARPU_WORKERS_CUDAID",
  58. topology->nhwcudagpus);
  59. }
  60. #endif
  61. #ifdef STARPU_USE_OPENCL
  62. static void _starpu_initialize_workers_opencl_gpuid(struct starpu_machine_config_s *config)
  63. {
  64. struct starpu_machine_topology_s *topology = &config->topology;
  65. _starpu_initialize_workers_gpuid(config->user_conf==NULL?0:config->user_conf->use_explicit_workers_opencl_gpuid,
  66. config->user_conf==NULL?NULL:(int *)config->user_conf->workers_opencl_gpuid,
  67. &(config->current_opencl_gpuid), (int *)topology->workers_opencl_gpuid, "STARPU_WORKERS_OPENCLID",
  68. topology->nhwopenclgpus);
  69. #ifdef STARPU_USE_CUDA
  70. // Detect devices which are already used with CUDA
  71. {
  72. unsigned tmp[STARPU_NMAXWORKERS];
  73. unsigned nb=0;
  74. int i;
  75. for(i=0 ; i<STARPU_NMAXWORKERS ; i++) {
  76. uint32_t key = _starpu_crc32_be(config->topology.workers_opencl_gpuid[i], 0);
  77. if (_starpu_htbl_search_32(devices_using_cuda, key) == NULL) {
  78. tmp[nb] = topology->workers_opencl_gpuid[i];
  79. nb++;
  80. }
  81. }
  82. for(i=nb ; i<STARPU_NMAXWORKERS ; i++) tmp[i] = -1;
  83. memcpy(topology->workers_opencl_gpuid, tmp, sizeof(unsigned)*STARPU_NMAXWORKERS);
  84. }
  85. #endif /* STARPU_USE_CUDA */
  86. {
  87. // Detect identical devices
  88. struct starpu_htbl32_node_s *devices_already_used = NULL;
  89. unsigned tmp[STARPU_NMAXWORKERS];
  90. unsigned nb=0;
  91. int i;
  92. for(i=0 ; i<STARPU_NMAXWORKERS ; i++) {
  93. uint32_t key = _starpu_crc32_be(topology->workers_opencl_gpuid[i], 0);
  94. if (_starpu_htbl_search_32(devices_already_used, key) == NULL) {
  95. _starpu_htbl_insert_32(&devices_already_used, key, config);
  96. tmp[nb] = topology->workers_opencl_gpuid[i];
  97. nb ++;
  98. }
  99. }
  100. for(i=nb ; i<STARPU_NMAXWORKERS ; i++) tmp[i] = -1;
  101. memcpy(topology->workers_opencl_gpuid, tmp, sizeof(unsigned)*STARPU_NMAXWORKERS);
  102. }
  103. }
  104. #endif
  105. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  106. static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int *explicit_workers_gpuid,
  107. int *current, int *workers_gpuid, const char *varname, unsigned nhwgpus)
  108. {
  109. char *strval;
  110. unsigned i;
  111. *current = 0;
  112. /* conf->workers_bindid indicates the successive cpu identifier that
  113. * should be used to bind the workers. It should be either filled
  114. * according to the user's explicit parameters (from starpu_conf) or
  115. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  116. * round-robin policy is used to distributed the workers over the
  117. * cpus. */
  118. /* what do we use, explicit value, env. variable, or round-robin ? */
  119. if (use_explicit_workers_gpuid)
  120. {
  121. /* we use the explicit value from the user */
  122. memcpy(workers_gpuid,
  123. explicit_workers_gpuid,
  124. STARPU_NMAXWORKERS*sizeof(unsigned));
  125. }
  126. else if ((strval = getenv(varname)))
  127. {
  128. /* STARPU_WORKERS_CUDAID certainly contains less entries than
  129. * STARPU_NMAXWORKERS, so we reuse its entries in a round robin
  130. * fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1 2". */
  131. unsigned wrap = 0;
  132. unsigned number_of_entries = 0;
  133. char *endptr;
  134. /* we use the content of the STARPU_WORKERS_CUDAID env. variable */
  135. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  136. {
  137. if (!wrap) {
  138. long int val;
  139. val = strtol(strval, &endptr, 10);
  140. if (endptr != strval)
  141. {
  142. workers_gpuid[i] = (unsigned)val;
  143. strval = endptr;
  144. }
  145. else {
  146. /* there must be at least one entry */
  147. STARPU_ASSERT(i != 0);
  148. number_of_entries = i;
  149. /* there is no more values in the string */
  150. wrap = 1;
  151. workers_gpuid[i] = workers_gpuid[0];
  152. }
  153. }
  154. else {
  155. workers_gpuid[i] = workers_gpuid[i % number_of_entries];
  156. }
  157. }
  158. }
  159. else
  160. {
  161. /* by default, we take a round robin policy */
  162. if (nhwgpus > 0)
  163. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  164. workers_gpuid[i] = (unsigned)(i % nhwgpus);
  165. /* StarPU can use sampling techniques to bind threads correctly */
  166. may_bind_automatically = 1;
  167. }
  168. }
  169. #endif
  170. #ifdef STARPU_USE_CUDA
  171. static inline int _starpu_get_next_cuda_gpuid(struct starpu_machine_config_s *config)
  172. {
  173. unsigned i = ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
  174. return (int)config->topology.workers_cuda_gpuid[i];
  175. }
  176. #endif
  177. #ifdef STARPU_USE_OPENCL
  178. static inline int _starpu_get_next_opencl_gpuid(struct starpu_machine_config_s *config)
  179. {
  180. unsigned i = ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
  181. return (int)config->topology.workers_opencl_gpuid[i];
  182. }
  183. #endif
  184. static void _starpu_init_topology(struct starpu_machine_config_s *config)
  185. {
  186. struct starpu_machine_topology_s *topology = &config->topology;
  187. if (!topology_is_initialized)
  188. {
  189. #ifdef STARPU_HAVE_HWLOC
  190. hwloc_topology_init(&topology->hwtopology);
  191. hwloc_topology_load(topology->hwtopology);
  192. config->cpu_depth = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_CORE);
  193. /* Would be very odd */
  194. STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE);
  195. if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN)
  196. /* unknown, using logical procesors as fallback */
  197. config->cpu_depth = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_PU);
  198. topology->nhwcpus = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->cpu_depth);
  199. #elif defined(__MINGW32__) || defined(__CYGWIN__)
  200. SYSTEM_INFO sysinfo;
  201. GetSystemInfo(&sysinfo);
  202. topology->nhwcpus += sysinfo.dwNumberOfProcessors;
  203. #elif defined(HAVE_SYSCONF)
  204. topology->nhwcpus = sysconf(_SC_NPROCESSORS_ONLN);
  205. #else
  206. #warning no way to know number of cores, assuming 1
  207. topology->nhwcpus = 1;
  208. #endif
  209. #ifdef STARPU_USE_CUDA
  210. config->topology.nhwcudagpus = _starpu_get_cuda_device_count();
  211. #endif
  212. #ifdef STARPU_USE_OPENCL
  213. config->topology.nhwopenclgpus = _starpu_opencl_get_device_count();
  214. #endif
  215. topology_is_initialized = 1;
  216. }
  217. }
  218. unsigned _starpu_topology_get_nhwcpu(struct starpu_machine_config_s *config)
  219. {
  220. _starpu_init_topology(config);
  221. return config->topology.nhwcpus;
  222. }
  223. static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
  224. struct starpu_conf *user_conf)
  225. {
  226. int explicitval __attribute__((unused));
  227. unsigned use_accelerator = 0;
  228. struct starpu_machine_topology_s *topology = &config->topology;
  229. topology->nworkers = 0;
  230. topology->ncombinedworkers = 0;
  231. _starpu_init_topology(config);
  232. _starpu_initialize_workers_bindid(config);
  233. #ifdef STARPU_USE_CUDA
  234. if (user_conf && (user_conf->ncuda == 0))
  235. {
  236. /* the user explicitely disabled CUDA */
  237. topology->ncudagpus = 0;
  238. }
  239. else {
  240. /* we need to initialize CUDA early to count the number of devices */
  241. _starpu_init_cuda();
  242. if (user_conf && (user_conf->ncuda != -1))
  243. {
  244. explicitval = user_conf->ncuda;
  245. }
  246. else {
  247. explicitval = starpu_get_env_number("STARPU_NCUDA");
  248. }
  249. if (explicitval < 0) {
  250. config->topology.ncudagpus =
  251. STARPU_MIN(_starpu_get_cuda_device_count(), STARPU_MAXCUDADEVS);
  252. } else {
  253. /* use the specified value */
  254. topology->ncudagpus = (unsigned)explicitval;
  255. STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
  256. }
  257. STARPU_ASSERT(config->topology.ncudagpus + config->topology.nworkers <= STARPU_NMAXWORKERS);
  258. }
  259. if (topology->ncudagpus > 0)
  260. use_accelerator = 1;
  261. _starpu_initialize_workers_cuda_gpuid(config);
  262. unsigned cudagpu;
  263. for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
  264. {
  265. config->workers[topology->nworkers + cudagpu].arch = STARPU_CUDA_WORKER;
  266. int devid = _starpu_get_next_cuda_gpuid(config);
  267. enum starpu_perf_archtype arch = STARPU_CUDA_DEFAULT + devid;
  268. config->workers[topology->nworkers + cudagpu].devid = devid;
  269. config->workers[topology->nworkers + cudagpu].perf_arch = arch;
  270. config->workers[topology->nworkers + cudagpu].worker_mask = STARPU_CUDA;
  271. config->worker_mask |= STARPU_CUDA;
  272. uint32_t key = _starpu_crc32_be(devid, 0);
  273. _starpu_htbl_insert_32(&devices_using_cuda, key, config);
  274. }
  275. topology->nworkers += topology->ncudagpus;
  276. #endif
  277. #ifdef STARPU_USE_OPENCL
  278. if (user_conf && (user_conf->nopencl == 0))
  279. {
  280. /* the user explicitely disabled OpenCL */
  281. topology->nopenclgpus = 0;
  282. }
  283. else {
  284. /* we need to initialize OpenCL early to count the number of devices */
  285. _starpu_opencl_init();
  286. if (user_conf && (user_conf->nopencl != -1))
  287. {
  288. explicitval = user_conf->nopencl;
  289. }
  290. else {
  291. explicitval = starpu_get_env_number("STARPU_NOPENCL");
  292. }
  293. if (explicitval < 0) {
  294. topology->nopenclgpus =
  295. STARPU_MIN(_starpu_opencl_get_device_count(), STARPU_MAXOPENCLDEVS);
  296. } else {
  297. /* use the specified value */
  298. topology->nopenclgpus = (unsigned)explicitval;
  299. STARPU_ASSERT(topology->nopenclgpus <= STARPU_MAXOPENCLDEVS);
  300. }
  301. STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
  302. }
  303. if (topology->nopenclgpus > 0)
  304. use_accelerator = 1;
  305. // TODO: use_accelerator pour les OpenCL?
  306. _starpu_initialize_workers_opencl_gpuid(config);
  307. unsigned openclgpu;
  308. for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
  309. {
  310. int devid = _starpu_get_next_opencl_gpuid(config);
  311. if (devid == -1) { // There is no more devices left
  312. topology->nopenclgpus = openclgpu;
  313. break;
  314. }
  315. config->workers[topology->nworkers + openclgpu].arch = STARPU_OPENCL_WORKER;
  316. enum starpu_perf_archtype arch = STARPU_OPENCL_DEFAULT + devid;
  317. config->workers[topology->nworkers + openclgpu].devid = devid;
  318. config->workers[topology->nworkers + openclgpu].perf_arch = arch;
  319. config->workers[topology->nworkers + openclgpu].worker_mask = STARPU_OPENCL;
  320. config->worker_mask |= STARPU_OPENCL;
  321. }
  322. topology->nworkers += topology->nopenclgpus;
  323. #endif
  324. #ifdef STARPU_USE_GORDON
  325. if (user_conf && (user_conf->ncuda != -1)) {
  326. explicitval = user_conf->ncuda;
  327. }
  328. else {
  329. explicitval = starpu_get_env_number("STARPU_NGORDON");
  330. }
  331. if (explicitval < 0) {
  332. topology->ngordon_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
  333. } else {
  334. /* use the specified value */
  335. topology->ngordon_spus = (unsigned)explicitval;
  336. STARPU_ASSERT(topology->ngordon_spus <= NMAXGORDONSPUS);
  337. }
  338. STARPU_ASSERT(topology->ngordon_spus + topology->nworkers <= STARPU_NMAXWORKERS);
  339. if (topology->ngordon_spus > 0)
  340. use_accelerator = 1;
  341. unsigned spu;
  342. for (spu = 0; spu < config->ngordon_spus; spu++)
  343. {
  344. config->workers[topology->nworkers + spu].arch = STARPU_GORDON_WORKER;
  345. config->workers[topology->nworkers + spu].perf_arch = STARPU_GORDON_DEFAULT;
  346. config->workers[topology->nworkers + spu].id = spu;
  347. config->workers[topology->nworkers + spu].worker_is_running = 0;
  348. config->workers[topology->nworkers + spu].worker_mask = STARPU_GORDON;
  349. config->worker_mask |= STARPU_GORDON;
  350. }
  351. topology->nworkers += topology->ngordon_spus;
  352. #endif
  353. /* we put the CPU section after the accelerator : in case there was an
  354. * accelerator found, we devote one cpu */
  355. #ifdef STARPU_USE_CPU
  356. if (user_conf && (user_conf->ncpus != -1)) {
  357. explicitval = user_conf->ncpus;
  358. }
  359. else {
  360. explicitval = starpu_get_env_number("STARPU_NCPUS");
  361. }
  362. if (explicitval < 0) {
  363. unsigned already_busy_cpus = (topology->ngordon_spus?1:0) + topology->ncudagpus;
  364. long avail_cpus = topology->nhwcpus - (use_accelerator?already_busy_cpus:0);
  365. topology->ncpus = STARPU_MIN(avail_cpus, STARPU_NMAXCPUS);
  366. } else {
  367. /* use the specified value */
  368. topology->ncpus = (unsigned)explicitval;
  369. STARPU_ASSERT(topology->ncpus <= STARPU_NMAXCPUS);
  370. }
  371. STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
  372. unsigned cpu;
  373. for (cpu = 0; cpu < topology->ncpus; cpu++)
  374. {
  375. config->workers[topology->nworkers + cpu].arch = STARPU_CPU_WORKER;
  376. config->workers[topology->nworkers + cpu].perf_arch = STARPU_CPU_DEFAULT;
  377. config->workers[topology->nworkers + cpu].devid = cpu;
  378. config->workers[topology->nworkers + cpu].worker_mask = STARPU_CPU;
  379. config->worker_mask |= STARPU_CPU;
  380. }
  381. topology->nworkers += topology->ncpus;
  382. #endif
  383. if (topology->nworkers == 0)
  384. {
  385. _STARPU_DEBUG("No worker found, aborting ...\n");
  386. return -ENODEV;
  387. }
  388. return 0;
  389. }
  390. /*
  391. * Bind workers on the different processors
  392. */
  393. static void _starpu_initialize_workers_bindid(struct starpu_machine_config_s *config)
  394. {
  395. char *strval;
  396. unsigned i;
  397. struct starpu_machine_topology_s *topology = &config->topology;
  398. config->current_bindid = 0;
  399. /* conf->workers_bindid indicates the successive cpu identifier that
  400. * should be used to bind the workers. It should be either filled
  401. * according to the user's explicit parameters (from starpu_conf) or
  402. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  403. * round-robin policy is used to distributed the workers over the
  404. * cpus. */
  405. /* what do we use, explicit value, env. variable, or round-robin ? */
  406. if (config->user_conf && config->user_conf->use_explicit_workers_bindid)
  407. {
  408. /* we use the explicit value from the user */
  409. memcpy(topology->workers_bindid,
  410. config->user_conf->workers_bindid,
  411. STARPU_NMAXWORKERS*sizeof(unsigned));
  412. }
  413. else if ((strval = getenv("STARPU_WORKERS_CPUID")))
  414. {
  415. /* STARPU_WORKERS_CPUID certainly contains less entries than
  416. * STARPU_NMAXWORKERS, so we reuse its entries in a round robin
  417. * fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1 2". */
  418. unsigned wrap = 0;
  419. unsigned number_of_entries = 0;
  420. char *endptr;
  421. /* we use the content of the STARPU_WORKERS_CUDAID env. variable */
  422. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  423. {
  424. if (!wrap) {
  425. long int val;
  426. val = strtol(strval, &endptr, 10);
  427. if (endptr != strval)
  428. {
  429. topology->workers_bindid[i] = (unsigned)(val % topology->nhwcpus);
  430. strval = endptr;
  431. }
  432. else {
  433. /* there must be at least one entry */
  434. STARPU_ASSERT(i != 0);
  435. number_of_entries = i;
  436. /* there is no more values in the string */
  437. wrap = 1;
  438. topology->workers_bindid[i] = topology->workers_bindid[0];
  439. }
  440. }
  441. else {
  442. topology->workers_bindid[i] = topology->workers_bindid[i % number_of_entries];
  443. }
  444. }
  445. }
  446. else
  447. {
  448. /* by default, we take a round robin policy */
  449. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  450. topology->workers_bindid[i] = (unsigned)(i % topology->nhwcpus);
  451. }
  452. }
  453. /* This function gets the identifier of the next cpu on which to bind a
  454. * worker. In case a list of preferred cpus was specified, we look for a an
  455. * available cpu among the list if possible, otherwise a round-robin policy is
  456. * used. */
  457. static inline int _starpu_get_next_bindid(struct starpu_machine_config_s *config,
  458. int *preferred_binding, int npreferred)
  459. {
  460. struct starpu_machine_topology_s *topology = &config->topology;
  461. unsigned found = 0;
  462. int current_preferred;
  463. for (current_preferred = 0; current_preferred < npreferred; current_preferred++)
  464. {
  465. if (found)
  466. break;
  467. unsigned requested_cpu = preferred_binding[current_preferred];
  468. /* can we bind the worker on the requested cpu ? */
  469. unsigned ind;
  470. for (ind = config->current_bindid; ind < topology->nhwcpus; ind++)
  471. {
  472. if (topology->workers_bindid[ind] == requested_cpu)
  473. {
  474. /* the cpu is available, we use it ! In order
  475. * to make sure that it will not be used again
  476. * later on, we remove the entry from the list
  477. * */
  478. topology->workers_bindid[ind] =
  479. topology->workers_bindid[config->current_bindid];
  480. topology->workers_bindid[config->current_bindid] = requested_cpu;
  481. found = 1;
  482. break;
  483. }
  484. }
  485. }
  486. unsigned i = ((config->current_bindid++) % STARPU_NMAXWORKERS);
  487. return (int)topology->workers_bindid[i];
  488. }
  489. void _starpu_bind_thread_on_cpu(struct starpu_machine_config_s *config __attribute__((unused)), unsigned cpuid)
  490. {
  491. #ifdef STARPU_HAVE_HWLOC
  492. int ret;
  493. _starpu_init_topology(config);
  494. hwloc_obj_t obj = hwloc_get_obj_by_depth(config->topology.hwtopology, config->cpu_depth, cpuid);
  495. hwloc_cpuset_t set = obj->cpuset;
  496. hwloc_cpuset_singlify(set);
  497. ret = hwloc_set_cpubind(config->topology.hwtopology, set, HWLOC_CPUBIND_THREAD);
  498. if (ret)
  499. {
  500. perror("binding thread");
  501. STARPU_ABORT();
  502. }
  503. #elif defined(HAVE_PTHREAD_SETAFFINITY_NP)
  504. int ret;
  505. /* fix the thread on the correct cpu */
  506. cpu_set_t aff_mask;
  507. CPU_ZERO(&aff_mask);
  508. CPU_SET(cpuid, &aff_mask);
  509. pthread_t self = pthread_self();
  510. ret = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
  511. if (ret)
  512. {
  513. perror("binding thread");
  514. STARPU_ABORT();
  515. }
  516. #elif defined(__MINGW32__) || defined(__CYGWIN__)
  517. DWORD mask = 1 << cpuid;
  518. if (!SetThreadAffinityMask(GetCurrentThread(), mask)) {
  519. fprintf(stderr,"SetThreadMaskAffinity(%lx) failed\n", mask);
  520. STARPU_ABORT();
  521. }
  522. #else
  523. #warning no CPU binding support
  524. #endif
  525. }
  526. static void _starpu_init_workers_binding(struct starpu_machine_config_s *config)
  527. {
  528. /* launch one thread per CPU */
  529. unsigned ram_memory_node;
  530. /* a single cpu is dedicated for the accelerators */
  531. int accelerator_bindid = -1;
  532. /* note that even if the CPU cpu are not used, we always have a RAM node */
  533. /* TODO : support NUMA ;) */
  534. ram_memory_node = _starpu_register_memory_node(STARPU_CPU_RAM);
  535. /* We will store all the busid of the different (src, dst) combinations
  536. * in a matrix which we initialize here. */
  537. _starpu_initialize_busid_matrix();
  538. unsigned worker;
  539. for (worker = 0; worker < config->topology.nworkers; worker++)
  540. {
  541. unsigned memory_node = -1;
  542. unsigned is_a_set_of_accelerators = 0;
  543. struct starpu_worker_s *workerarg = &config->workers[worker];
  544. /* Perhaps the worker has some "favourite" bindings */
  545. int *preferred_binding = NULL;
  546. int npreferred = 0;
  547. /* select the memory node that contains worker's memory */
  548. switch (workerarg->arch) {
  549. case STARPU_CPU_WORKER:
  550. /* "dedicate" a cpu cpu to that worker */
  551. is_a_set_of_accelerators = 0;
  552. memory_node = ram_memory_node;
  553. break;
  554. #ifdef STARPU_USE_GORDON
  555. case STARPU_GORDON_WORKER:
  556. is_a_set_of_accelerators = 1;
  557. memory_node = ram_memory_node;
  558. break;
  559. #endif
  560. #ifdef STARPU_USE_CUDA
  561. case STARPU_CUDA_WORKER:
  562. if (may_bind_automatically)
  563. {
  564. /* StarPU is allowed to bind threads automatically */
  565. preferred_binding = _starpu_get_cuda_affinity_vector(workerarg->devid);
  566. npreferred = config->topology.nhwcpus;
  567. }
  568. is_a_set_of_accelerators = 0;
  569. memory_node = _starpu_register_memory_node(STARPU_CUDA_RAM);
  570. _starpu_register_bus(0, memory_node);
  571. _starpu_register_bus(memory_node, 0);
  572. break;
  573. #endif
  574. #ifdef STARPU_USE_OPENCL
  575. case STARPU_OPENCL_WORKER:
  576. if (may_bind_automatically)
  577. {
  578. /* StarPU is allowed to bind threads automatically */
  579. preferred_binding = _starpu_get_opencl_affinity_vector(workerarg->devid);
  580. npreferred = config->topology.nhwcpus;
  581. }
  582. is_a_set_of_accelerators = 0;
  583. memory_node = _starpu_register_memory_node(STARPU_OPENCL_RAM);
  584. _starpu_register_bus(0, memory_node);
  585. _starpu_register_bus(memory_node, 0);
  586. break;
  587. #endif
  588. default:
  589. STARPU_ABORT();
  590. }
  591. if (is_a_set_of_accelerators) {
  592. if (accelerator_bindid == -1)
  593. accelerator_bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  594. workerarg->bindid = accelerator_bindid;
  595. }
  596. else {
  597. workerarg->bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  598. }
  599. workerarg->memory_node = memory_node;
  600. /* Save the initial cpuset */
  601. CPU_ZERO(&workerarg->initial_cpu_set);
  602. CPU_SET(workerarg->bindid, &workerarg->initial_cpu_set);
  603. CPU_ZERO(&workerarg->current_cpu_set);
  604. CPU_SET(workerarg->bindid, &workerarg->current_cpu_set);
  605. #ifdef STARPU_HAVE_HWLOC
  606. /* Clear the cpu set and set the cpu */
  607. workerarg->initial_hwloc_cpu_set = hwloc_cpuset_alloc();
  608. hwloc_cpuset_cpu(workerarg->initial_hwloc_cpu_set, workerarg->bindid);
  609. workerarg->current_hwloc_cpu_set = hwloc_cpuset_alloc();
  610. hwloc_cpuset_cpu(workerarg->current_hwloc_cpu_set, workerarg->bindid);
  611. #endif
  612. }
  613. }
  614. int _starpu_build_topology(struct starpu_machine_config_s *config)
  615. {
  616. int ret;
  617. struct starpu_conf *user_conf = config->user_conf;
  618. ret = _starpu_init_machine_config(config, user_conf);
  619. if (ret)
  620. return ret;
  621. /* for the data management library */
  622. _starpu_init_memory_nodes();
  623. _starpu_init_workers_binding(config);
  624. return 0;
  625. }
  626. void _starpu_destroy_topology(struct starpu_machine_config_s *config __attribute__ ((unused)))
  627. {
  628. /* cleanup StarPU internal data structures */
  629. _starpu_deinit_memory_nodes();
  630. #ifdef STARPU_HAVE_HWLOC
  631. hwloc_topology_destroy(config->topology.hwtopology);
  632. #endif
  633. topology_is_initialized = 0;
  634. }