topology.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731
  1. /*
  2. * StarPU
  3. * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. #include <common/config.h>
  19. #include <core/workers.h>
  20. #include <core/debug.h>
  21. #include <core/topology.h>
  22. #include <drivers/cuda/driver_cuda.h>
  23. #include <common/hash.h>
  24. #include <profiling/profiling.h>
  25. #ifdef STARPU_HAVE_HWLOC
  26. #include <hwloc.h>
  27. #ifndef HWLOC_API_VERSION
  28. #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
  29. #endif
  30. #endif
  31. #if defined(__MINGW32__) || defined(__CYGWIN__)
  32. #include <windows.h>
  33. #endif
  34. static unsigned topology_is_initialized = 0;
  35. static void _starpu_initialize_workers_bindid(struct starpu_machine_config_s *config);
  36. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  37. # ifdef STARPU_USE_CUDA
  38. static void _starpu_initialize_workers_cuda_gpuid(struct starpu_machine_config_s *config);
  39. static struct starpu_htbl32_node_s *devices_using_cuda = NULL;
  40. # endif
  41. # ifdef STARPU_USE_OPENCL
  42. static void _starpu_initialize_workers_opencl_gpuid(struct starpu_machine_config_s *config);
  43. # endif
  44. static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int *explicit_workers_gpuid,
  45. int *current, int *workers_gpuid, const char *varname, unsigned nhwgpus);
  46. static unsigned may_bind_automatically = 0;
  47. #endif
  48. /*
  49. * Discover the topology of the machine
  50. */
  51. #ifdef STARPU_USE_CUDA
  52. static void _starpu_initialize_workers_cuda_gpuid(struct starpu_machine_config_s *config)
  53. {
  54. struct starpu_machine_topology_s *topology = &config->topology;
  55. _starpu_initialize_workers_gpuid(config->user_conf==NULL?0:config->user_conf->use_explicit_workers_cuda_gpuid,
  56. config->user_conf==NULL?NULL:(int *)config->user_conf->workers_cuda_gpuid,
  57. &(config->current_cuda_gpuid), (int *)topology->workers_cuda_gpuid, "STARPU_WORKERS_CUDAID",
  58. topology->nhwcudagpus);
  59. }
  60. #endif
  61. #ifdef STARPU_USE_OPENCL
  62. static void _starpu_initialize_workers_opencl_gpuid(struct starpu_machine_config_s *config)
  63. {
  64. struct starpu_machine_topology_s *topology = &config->topology;
  65. _starpu_initialize_workers_gpuid(config->user_conf==NULL?0:config->user_conf->use_explicit_workers_opencl_gpuid,
  66. config->user_conf==NULL?NULL:(int *)config->user_conf->workers_opencl_gpuid,
  67. &(config->current_opencl_gpuid), (int *)topology->workers_opencl_gpuid, "STARPU_WORKERS_OPENCLID",
  68. topology->nhwopenclgpus);
  69. #ifdef STARPU_USE_CUDA
  70. // Detect devices which are already used with CUDA
  71. {
  72. unsigned tmp[STARPU_NMAXWORKERS];
  73. unsigned nb=0;
  74. int i;
  75. for(i=0 ; i<STARPU_NMAXWORKERS ; i++) {
  76. uint32_t key = _starpu_crc32_be(config->topology.workers_opencl_gpuid[i], 0);
  77. if (_starpu_htbl_search_32(devices_using_cuda, key) == NULL) {
  78. tmp[nb] = topology->workers_opencl_gpuid[i];
  79. nb++;
  80. }
  81. }
  82. for(i=nb ; i<STARPU_NMAXWORKERS ; i++) tmp[i] = -1;
  83. memcpy(topology->workers_opencl_gpuid, tmp, sizeof(unsigned)*STARPU_NMAXWORKERS);
  84. }
  85. #endif /* STARPU_USE_CUDA */
  86. {
  87. // Detect identical devices
  88. struct starpu_htbl32_node_s *devices_already_used = NULL;
  89. unsigned tmp[STARPU_NMAXWORKERS];
  90. unsigned nb=0;
  91. int i;
  92. for(i=0 ; i<STARPU_NMAXWORKERS ; i++) {
  93. uint32_t key = _starpu_crc32_be(topology->workers_opencl_gpuid[i], 0);
  94. if (_starpu_htbl_search_32(devices_already_used, key) == NULL) {
  95. _starpu_htbl_insert_32(&devices_already_used, key, config);
  96. tmp[nb] = topology->workers_opencl_gpuid[i];
  97. nb ++;
  98. }
  99. }
  100. for(i=nb ; i<STARPU_NMAXWORKERS ; i++) tmp[i] = -1;
  101. memcpy(topology->workers_opencl_gpuid, tmp, sizeof(unsigned)*STARPU_NMAXWORKERS);
  102. }
  103. }
  104. #endif
  105. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  106. static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int *explicit_workers_gpuid,
  107. int *current, int *workers_gpuid, const char *varname, unsigned nhwgpus)
  108. {
  109. char *strval;
  110. unsigned i;
  111. *current = 0;
  112. /* conf->workers_bindid indicates the successive cpu identifier that
  113. * should be used to bind the workers. It should be either filled
  114. * according to the user's explicit parameters (from starpu_conf) or
  115. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  116. * round-robin policy is used to distributed the workers over the
  117. * cpus. */
  118. /* what do we use, explicit value, env. variable, or round-robin ? */
  119. if (use_explicit_workers_gpuid)
  120. {
  121. /* we use the explicit value from the user */
  122. memcpy(workers_gpuid,
  123. explicit_workers_gpuid,
  124. STARPU_NMAXWORKERS*sizeof(unsigned));
  125. }
  126. else if ((strval = getenv(varname)))
  127. {
  128. /* STARPU_WORKERS_CUDAID certainly contains less entries than
  129. * STARPU_NMAXWORKERS, so we reuse its entries in a round robin
  130. * fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1 2". */
  131. unsigned wrap = 0;
  132. unsigned number_of_entries = 0;
  133. char *endptr;
  134. /* we use the content of the STARPU_WORKERS_CUDAID env. variable */
  135. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  136. {
  137. if (!wrap) {
  138. long int val;
  139. val = strtol(strval, &endptr, 10);
  140. if (endptr != strval)
  141. {
  142. workers_gpuid[i] = (unsigned)val;
  143. strval = endptr;
  144. }
  145. else {
  146. /* there must be at least one entry */
  147. STARPU_ASSERT(i != 0);
  148. number_of_entries = i;
  149. /* there is no more values in the string */
  150. wrap = 1;
  151. workers_gpuid[i] = workers_gpuid[0];
  152. }
  153. }
  154. else {
  155. workers_gpuid[i] = workers_gpuid[i % number_of_entries];
  156. }
  157. }
  158. }
  159. else
  160. {
  161. /* by default, we take a round robin policy */
  162. if (nhwgpus > 0)
  163. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  164. workers_gpuid[i] = (unsigned)(i % nhwgpus);
  165. /* StarPU can use sampling techniques to bind threads correctly */
  166. may_bind_automatically = 1;
  167. }
  168. }
  169. #endif
  170. static inline int _starpu_get_next_cuda_gpuid(struct starpu_machine_config_s *config)
  171. {
  172. unsigned i = ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
  173. return (int)config->topology.workers_cuda_gpuid[i];
  174. }
  175. static inline int _starpu_get_next_opencl_gpuid(struct starpu_machine_config_s *config)
  176. {
  177. unsigned i = ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
  178. return (int)config->topology.workers_opencl_gpuid[i];
  179. }
  180. static void _starpu_init_topology(struct starpu_machine_config_s *config)
  181. {
  182. struct starpu_machine_topology_s *topology = &config->topology;
  183. if (!topology_is_initialized)
  184. {
  185. #ifdef STARPU_HAVE_HWLOC
  186. hwloc_topology_init(&topology->hwtopology);
  187. hwloc_topology_load(topology->hwtopology);
  188. config->cpu_depth = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_CORE);
  189. /* Would be very odd */
  190. STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE);
  191. if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN)
  192. /* unknown, using logical procesors as fallback */
  193. config->cpu_depth = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_PU);
  194. topology->nhwcpus = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->cpu_depth);
  195. #elif defined(__MINGW32__) || defined(__CYGWIN__)
  196. SYSTEM_INFO sysinfo;
  197. GetSystemInfo(&sysinfo);
  198. topology->nhwcpus += sysinfo.dwNumberOfProcessors;
  199. #elif defined(HAVE_SYSCONF)
  200. topology->nhwcpus = sysconf(_SC_NPROCESSORS_ONLN);
  201. #else
  202. #warning no way to know number of cores, assuming 1
  203. topology->nhwcpus = 1;
  204. #endif
  205. #ifdef STARPU_USE_CUDA
  206. config->topology.nhwcudagpus = _starpu_get_cuda_device_count();
  207. #endif
  208. #ifdef STARPU_USE_OPENCL
  209. config->topology.nhwopenclgpus = _starpu_opencl_get_device_count();
  210. #endif
  211. topology_is_initialized = 1;
  212. }
  213. }
  214. unsigned _starpu_topology_get_nhwcpu(struct starpu_machine_config_s *config)
  215. {
  216. _starpu_init_topology(config);
  217. return config->topology.nhwcpus;
  218. }
  219. static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
  220. struct starpu_conf *user_conf)
  221. {
  222. int explicitval __attribute__((unused));
  223. unsigned use_accelerator = 0;
  224. struct starpu_machine_topology_s *topology = &config->topology;
  225. topology->nworkers = 0;
  226. _starpu_init_topology(config);
  227. _starpu_initialize_workers_bindid(config);
  228. #ifdef STARPU_USE_CUDA
  229. if (user_conf && (user_conf->ncuda == 0))
  230. {
  231. /* the user explicitely disabled CUDA */
  232. topology->ncudagpus = 0;
  233. }
  234. else {
  235. /* we need to initialize CUDA early to count the number of devices */
  236. _starpu_init_cuda();
  237. if (user_conf && (user_conf->ncuda != -1))
  238. {
  239. explicitval = user_conf->ncuda;
  240. }
  241. else {
  242. explicitval = starpu_get_env_number("STARPU_NCUDA");
  243. }
  244. if (explicitval < 0) {
  245. config->topology.ncudagpus =
  246. STARPU_MIN(_starpu_get_cuda_device_count(), STARPU_MAXCUDADEVS);
  247. } else {
  248. /* use the specified value */
  249. topology->ncudagpus = (unsigned)explicitval;
  250. STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
  251. }
  252. STARPU_ASSERT(config->topology.ncudagpus + config->topology.nworkers <= STARPU_NMAXWORKERS);
  253. }
  254. if (topology->ncudagpus > 0)
  255. use_accelerator = 1;
  256. _starpu_initialize_workers_cuda_gpuid(config);
  257. unsigned cudagpu;
  258. for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
  259. {
  260. config->workers[topology->nworkers + cudagpu].arch = STARPU_CUDA_WORKER;
  261. int devid = _starpu_get_next_cuda_gpuid(config);
  262. enum starpu_perf_archtype arch = STARPU_CUDA_DEFAULT + devid;
  263. config->workers[topology->nworkers + cudagpu].devid = devid;
  264. config->workers[topology->nworkers + cudagpu].perf_arch = arch;
  265. config->workers[topology->nworkers + cudagpu].worker_mask = STARPU_CUDA;
  266. config->worker_mask |= STARPU_CUDA;
  267. uint32_t key = _starpu_crc32_be(devid, 0);
  268. _starpu_htbl_insert_32(&devices_using_cuda, key, config);
  269. }
  270. topology->nworkers += topology->ncudagpus;
  271. #endif
  272. #ifdef STARPU_USE_OPENCL
  273. if (user_conf && (user_conf->nopencl == 0))
  274. {
  275. /* the user explicitely disabled OpenCL */
  276. topology->nopenclgpus = 0;
  277. }
  278. else {
  279. /* we need to initialize OpenCL early to count the number of devices */
  280. _starpu_opencl_init();
  281. if (user_conf && (user_conf->nopencl != -1))
  282. {
  283. explicitval = user_conf->nopencl;
  284. }
  285. else {
  286. explicitval = starpu_get_env_number("STARPU_NOPENCL");
  287. }
  288. if (explicitval < 0) {
  289. topology->nopenclgpus =
  290. STARPU_MIN(_starpu_opencl_get_device_count(), STARPU_MAXOPENCLDEVS);
  291. } else {
  292. /* use the specified value */
  293. topology->nopenclgpus = (unsigned)explicitval;
  294. STARPU_ASSERT(topology->nopenclgpus <= STARPU_MAXOPENCLDEVS);
  295. }
  296. STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
  297. }
  298. if (topology->nopenclgpus > 0)
  299. use_accelerator = 1;
  300. // TODO: use_accelerator pour les OpenCL?
  301. _starpu_initialize_workers_opencl_gpuid(config);
  302. unsigned openclgpu;
  303. for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
  304. {
  305. int devid = _starpu_get_next_opencl_gpuid(config);
  306. if (devid == -1) { // There is no more devices left
  307. topology->nopenclgpus = openclgpu;
  308. break;
  309. }
  310. config->workers[topology->nworkers + openclgpu].arch = STARPU_OPENCL_WORKER;
  311. enum starpu_perf_archtype arch = STARPU_OPENCL_DEFAULT + devid;
  312. config->workers[topology->nworkers + openclgpu].devid = devid;
  313. config->workers[topology->nworkers + openclgpu].perf_arch = arch;
  314. config->workers[topology->nworkers + openclgpu].worker_mask = STARPU_OPENCL;
  315. config->worker_mask |= STARPU_OPENCL;
  316. }
  317. topology->nworkers += topology->nopenclgpus;
  318. #endif
  319. #ifdef STARPU_USE_GORDON
  320. if (user_conf && (user_conf->ncuda != -1)) {
  321. explicitval = user_conf->ncuda;
  322. }
  323. else {
  324. explicitval = starpu_get_env_number("STARPU_NGORDON");
  325. }
  326. if (explicitval < 0) {
  327. topology->ngordon_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
  328. } else {
  329. /* use the specified value */
  330. topology->ngordon_spus = (unsigned)explicitval;
  331. STARPU_ASSERT(topology->ngordon_spus <= NMAXGORDONSPUS);
  332. }
  333. STARPU_ASSERT(topology->ngordon_spus + topology->nworkers <= STARPU_NMAXWORKERS);
  334. if (topology->ngordon_spus > 0)
  335. use_accelerator = 1;
  336. unsigned spu;
  337. for (spu = 0; spu < config->ngordon_spus; spu++)
  338. {
  339. config->workers[topology->nworkers + spu].arch = STARPU_GORDON_WORKER;
  340. config->workers[topology->nworkers + spu].perf_arch = STARPU_GORDON_DEFAULT;
  341. config->workers[topology->nworkers + spu].id = spu;
  342. config->workers[topology->nworkers + spu].worker_is_running = 0;
  343. config->workers[topology->nworkers + spu].worker_mask = STARPU_GORDON;
  344. config->worker_mask |= STARPU_GORDON;
  345. }
  346. topology->nworkers += topology->ngordon_spus;
  347. #endif
  348. /* we put the CPU section after the accelerator : in case there was an
  349. * accelerator found, we devote one cpu */
  350. #ifdef STARPU_USE_CPU
  351. if (user_conf && (user_conf->ncpus != -1)) {
  352. explicitval = user_conf->ncpus;
  353. }
  354. else {
  355. explicitval = starpu_get_env_number("STARPU_NCPUS");
  356. }
  357. if (explicitval < 0) {
  358. unsigned already_busy_cpus = (topology->ngordon_spus?1:0) + topology->ncudagpus;
  359. long avail_cpus = topology->nhwcpus - (use_accelerator?already_busy_cpus:0);
  360. topology->ncpus = STARPU_MIN(avail_cpus, STARPU_NMAXCPUS);
  361. } else {
  362. /* use the specified value */
  363. topology->ncpus = (unsigned)explicitval;
  364. STARPU_ASSERT(topology->ncpus <= STARPU_NMAXCPUS);
  365. }
  366. STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
  367. unsigned cpu;
  368. for (cpu = 0; cpu < topology->ncpus; cpu++)
  369. {
  370. config->workers[topology->nworkers + cpu].arch = STARPU_CPU_WORKER;
  371. config->workers[topology->nworkers + cpu].perf_arch = STARPU_CPU_DEFAULT;
  372. config->workers[topology->nworkers + cpu].devid = cpu;
  373. config->workers[topology->nworkers + cpu].worker_mask = STARPU_CPU;
  374. config->worker_mask |= STARPU_CPU;
  375. }
  376. topology->nworkers += topology->ncpus;
  377. #endif
  378. if (topology->nworkers == 0)
  379. {
  380. _STARPU_DEBUG("No worker found, aborting ...\n");
  381. return -ENODEV;
  382. }
  383. return 0;
  384. }
  385. /*
  386. * Bind workers on the different processors
  387. */
  388. static void _starpu_initialize_workers_bindid(struct starpu_machine_config_s *config)
  389. {
  390. char *strval;
  391. unsigned i;
  392. struct starpu_machine_topology_s *topology = &config->topology;
  393. config->current_bindid = 0;
  394. /* conf->workers_bindid indicates the successive cpu identifier that
  395. * should be used to bind the workers. It should be either filled
  396. * according to the user's explicit parameters (from starpu_conf) or
  397. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  398. * round-robin policy is used to distributed the workers over the
  399. * cpus. */
  400. /* what do we use, explicit value, env. variable, or round-robin ? */
  401. if (config->user_conf && config->user_conf->use_explicit_workers_bindid)
  402. {
  403. /* we use the explicit value from the user */
  404. memcpy(topology->workers_bindid,
  405. config->user_conf->workers_bindid,
  406. STARPU_NMAXWORKERS*sizeof(unsigned));
  407. }
  408. else if ((strval = getenv("STARPU_WORKERS_CPUID")))
  409. {
  410. /* STARPU_WORKERS_CPUID certainly contains less entries than
  411. * STARPU_NMAXWORKERS, so we reuse its entries in a round robin
  412. * fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1 2". */
  413. unsigned wrap = 0;
  414. unsigned number_of_entries = 0;
  415. char *endptr;
  416. /* we use the content of the STARPU_WORKERS_CUDAID env. variable */
  417. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  418. {
  419. if (!wrap) {
  420. long int val;
  421. val = strtol(strval, &endptr, 10);
  422. if (endptr != strval)
  423. {
  424. topology->workers_bindid[i] = (unsigned)(val % topology->nhwcpus);
  425. strval = endptr;
  426. }
  427. else {
  428. /* there must be at least one entry */
  429. STARPU_ASSERT(i != 0);
  430. number_of_entries = i;
  431. /* there is no more values in the string */
  432. wrap = 1;
  433. topology->workers_bindid[i] = topology->workers_bindid[0];
  434. }
  435. }
  436. else {
  437. topology->workers_bindid[i] = topology->workers_bindid[i % number_of_entries];
  438. }
  439. }
  440. }
  441. else
  442. {
  443. /* by default, we take a round robin policy */
  444. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  445. topology->workers_bindid[i] = (unsigned)(i % topology->nhwcpus);
  446. }
  447. }
  448. /* This function gets the identifier of the next cpu on which to bind a
  449. * worker. In case a list of preferred cpus was specified, we look for a an
  450. * available cpu among the list if possible, otherwise a round-robin policy is
  451. * used. */
  452. static inline int _starpu_get_next_bindid(struct starpu_machine_config_s *config,
  453. int *preferred_binding, int npreferred)
  454. {
  455. struct starpu_machine_topology_s *topology = &config->topology;
  456. unsigned found = 0;
  457. int current_preferred;
  458. for (current_preferred = 0; current_preferred < npreferred; current_preferred++)
  459. {
  460. if (found)
  461. break;
  462. unsigned requested_cpu = preferred_binding[current_preferred];
  463. /* can we bind the worker on the requested cpu ? */
  464. unsigned ind;
  465. for (ind = config->current_bindid; ind < topology->nhwcpus; ind++)
  466. {
  467. if (topology->workers_bindid[ind] == requested_cpu)
  468. {
  469. /* the cpu is available, we use it ! In order
  470. * to make sure that it will not be used again
  471. * later on, we remove the entry from the list
  472. * */
  473. topology->workers_bindid[ind] =
  474. topology->workers_bindid[config->current_bindid];
  475. topology->workers_bindid[config->current_bindid] = requested_cpu;
  476. found = 1;
  477. break;
  478. }
  479. }
  480. }
  481. unsigned i = ((config->current_bindid++) % STARPU_NMAXWORKERS);
  482. return (int)topology->workers_bindid[i];
  483. }
  484. void _starpu_bind_thread_on_cpu(struct starpu_machine_config_s *config __attribute__((unused)), unsigned cpuid)
  485. {
  486. #ifdef STARPU_HAVE_HWLOC
  487. int ret;
  488. _starpu_init_topology(config);
  489. hwloc_obj_t obj = hwloc_get_obj_by_depth(config->topology.hwtopology, config->cpu_depth, cpuid);
  490. hwloc_cpuset_t set = obj->cpuset;
  491. hwloc_cpuset_singlify(set);
  492. ret = hwloc_set_cpubind(config->topology.hwtopology, set, HWLOC_CPUBIND_THREAD);
  493. if (ret)
  494. {
  495. perror("binding thread");
  496. STARPU_ABORT();
  497. }
  498. #elif defined(HAVE_PTHREAD_SETAFFINITY_NP)
  499. int ret;
  500. /* fix the thread on the correct cpu */
  501. cpu_set_t aff_mask;
  502. CPU_ZERO(&aff_mask);
  503. CPU_SET(cpuid, &aff_mask);
  504. pthread_t self = pthread_self();
  505. ret = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
  506. if (ret)
  507. {
  508. perror("binding thread");
  509. STARPU_ABORT();
  510. }
  511. #elif defined(__MINGW32__) || defined(__CYGWIN__)
  512. DWORD mask = 1 << cpuid;
  513. if (!SetThreadAffinityMask(GetCurrentThread(), mask)) {
  514. fprintf(stderr,"SetThreadMaskAffinity(%lx) failed\n", mask);
  515. STARPU_ABORT();
  516. }
  517. #else
  518. #warning no CPU binding support
  519. #endif
  520. }
  521. static void _starpu_init_workers_binding(struct starpu_machine_config_s *config)
  522. {
  523. /* launch one thread per CPU */
  524. unsigned ram_memory_node;
  525. /* a single cpu is dedicated for the accelerators */
  526. int accelerator_bindid = -1;
  527. /* note that even if the CPU cpu are not used, we always have a RAM node */
  528. /* TODO : support NUMA ;) */
  529. ram_memory_node = _starpu_register_memory_node(STARPU_CPU_RAM);
  530. /* We will store all the busid of the different (src, dst) combinations
  531. * in a matrix which we initialize here. */
  532. _starpu_initialize_busid_matrix();
  533. unsigned worker;
  534. for (worker = 0; worker < config->topology.nworkers; worker++)
  535. {
  536. unsigned memory_node = -1;
  537. unsigned is_a_set_of_accelerators = 0;
  538. struct starpu_worker_s *workerarg = &config->workers[worker];
  539. /* Perhaps the worker has some "favourite" bindings */
  540. int *preferred_binding = NULL;
  541. int npreferred = 0;
  542. /* select the memory node that contains worker's memory */
  543. switch (workerarg->arch) {
  544. case STARPU_CPU_WORKER:
  545. /* "dedicate" a cpu cpu to that worker */
  546. is_a_set_of_accelerators = 0;
  547. memory_node = ram_memory_node;
  548. break;
  549. #ifdef STARPU_USE_GORDON
  550. case STARPU_GORDON_WORKER:
  551. is_a_set_of_accelerators = 1;
  552. memory_node = ram_memory_node;
  553. break;
  554. #endif
  555. #ifdef STARPU_USE_CUDA
  556. case STARPU_CUDA_WORKER:
  557. if (may_bind_automatically)
  558. {
  559. /* StarPU is allowed to bind threads automatically */
  560. preferred_binding = _starpu_get_cuda_affinity_vector(workerarg->devid);
  561. npreferred = config->topology.nhwcpus;
  562. }
  563. is_a_set_of_accelerators = 0;
  564. memory_node = _starpu_register_memory_node(STARPU_CUDA_RAM);
  565. _starpu_register_bus(0, memory_node);
  566. _starpu_register_bus(memory_node, 0);
  567. break;
  568. #endif
  569. #ifdef STARPU_USE_OPENCL
  570. case STARPU_OPENCL_WORKER:
  571. if (may_bind_automatically)
  572. {
  573. /* StarPU is allowed to bind threads automatically */
  574. preferred_binding = _starpu_get_opencl_affinity_vector(workerarg->devid);
  575. npreferred = config->topology.nhwcpus;
  576. }
  577. is_a_set_of_accelerators = 0;
  578. memory_node = _starpu_register_memory_node(STARPU_OPENCL_RAM);
  579. _starpu_register_bus(0, memory_node);
  580. _starpu_register_bus(memory_node, 0);
  581. break;
  582. #endif
  583. default:
  584. STARPU_ABORT();
  585. }
  586. if (is_a_set_of_accelerators) {
  587. if (accelerator_bindid == -1)
  588. accelerator_bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  589. workerarg->bindid = accelerator_bindid;
  590. }
  591. else {
  592. workerarg->bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  593. }
  594. workerarg->memory_node = memory_node;
  595. }
  596. }
  597. int _starpu_build_topology(struct starpu_machine_config_s *config)
  598. {
  599. int ret;
  600. struct starpu_conf *user_conf = config->user_conf;
  601. ret = _starpu_init_machine_config(config, user_conf);
  602. if (ret)
  603. return ret;
  604. /* for the data management library */
  605. _starpu_init_memory_nodes();
  606. _starpu_init_workers_binding(config);
  607. return 0;
  608. }
  609. void _starpu_destroy_topology(struct starpu_machine_config_s *config __attribute__ ((unused)))
  610. {
  611. /* cleanup StarPU internal data structures */
  612. _starpu_deinit_memory_nodes();
  613. #ifdef STARPU_HAVE_HWLOC
  614. hwloc_topology_destroy(config->topology.hwtopology);
  615. #endif
  616. topology_is_initialized = 0;
  617. }