topology.c 49 KB


  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2015 Université de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015 CNRS
  5. * Copyright (C) 2011 INRIA
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <stdlib.h>
  19. #include <stdio.h>
  20. #include <common/config.h>
  21. #include <core/workers.h>
  22. #include <core/debug.h>
  23. #include <core/topology.h>
  24. #include <drivers/cuda/driver_cuda.h>
  25. #include <drivers/mic/driver_mic_source.h>
  26. #include <drivers/scc/driver_scc_source.h>
  27. #include <drivers/mp_common/source_common.h>
  28. #include <drivers/opencl/driver_opencl.h>
  29. #include <profiling/profiling.h>
  30. #include <datawizard/memory_nodes.h>
  31. #include <common/uthash.h>
  32. #ifdef STARPU_HAVE_HWLOC
  33. #include <hwloc.h>
  34. #ifndef HWLOC_API_VERSION
  35. #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
  36. #endif
  37. #endif
  38. #ifdef STARPU_HAVE_WINDOWS
  39. #include <windows.h>
  40. #endif
  41. #ifdef STARPU_SIMGRID
  42. #include <core/simgrid.h>
  43. #endif
  44. static unsigned topology_is_initialized = 0;
  45. static int nobind;
  46. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
  47. struct handle_entry
  48. {
  49. UT_hash_handle hh;
  50. unsigned gpuid;
  51. };
  52. # if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  53. /* Entry in the `devices_using_cuda' hash table. */
  54. static struct handle_entry *devices_using_cuda;
  55. # endif
  56. static unsigned may_bind_automatically[STARPU_NARCH] = { 0 };
  57. #endif // defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  58. /*
  59. * Discover the topology of the machine
  60. */
  61. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
  62. static void
  63. _starpu_initialize_workers_deviceid (int *explicit_workers_gpuid,
  64. int *current, int *workers_gpuid,
  65. const char *varname, unsigned nhwgpus,
  66. enum starpu_worker_archtype type)
  67. {
  68. char *strval;
  69. unsigned i;
  70. *current = 0;
  71. /* conf->workers_gpuid indicates the successive GPU identifier that
  72. * should be used to bind the workers. It should be either filled
  73. * according to the user's explicit parameters (from starpu_conf) or
  74. * according to the STARPU_WORKERS_CUDAID env. variable. Otherwise, a
  75. * round-robin policy is used to distributed the workers over the
  76. * cores. */
  77. /* what do we use, explicit value, env. variable, or round-robin ? */
  78. if ((strval = starpu_getenv(varname)))
  79. {
  80. /* STARPU_WORKERS_CUDAID certainly contains less entries than
  81. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  82. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  83. * 2". */
  84. unsigned wrap = 0;
  85. unsigned number_of_entries = 0;
  86. char *endptr;
  87. /* we use the content of the STARPU_WORKERS_CUDAID
  88. * env. variable */
  89. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  90. {
  91. if (!wrap)
  92. {
  93. long int val;
  94. val = strtol(strval, &endptr, 10);
  95. if (endptr != strval)
  96. {
  97. workers_gpuid[i] = (unsigned)val;
  98. strval = endptr;
  99. }
  100. else
  101. {
  102. /* there must be at least one entry */
  103. STARPU_ASSERT(i != 0);
  104. number_of_entries = i;
  105. /* there is no more values in the
  106. * string */
  107. wrap = 1;
  108. workers_gpuid[i] = workers_gpuid[0];
  109. }
  110. }
  111. else
  112. {
  113. workers_gpuid[i] =
  114. workers_gpuid[i % number_of_entries];
  115. }
  116. }
  117. }
  118. else if (explicit_workers_gpuid)
  119. {
  120. /* we use the explicit value from the user */
  121. memcpy(workers_gpuid,
  122. explicit_workers_gpuid,
  123. STARPU_NMAXWORKERS*sizeof(unsigned));
  124. }
  125. else
  126. {
  127. /* by default, we take a round robin policy */
  128. if (nhwgpus > 0)
  129. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  130. workers_gpuid[i] = (unsigned)(i % nhwgpus);
  131. /* StarPU can use sampling techniques to bind threads
  132. * correctly */
  133. may_bind_automatically[type] = 1;
  134. }
  135. }
  136. #endif
  137. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  138. static void
  139. _starpu_initialize_workers_cuda_gpuid (struct _starpu_machine_config *config)
  140. {
  141. struct _starpu_machine_topology *topology = &config->topology;
  142. struct starpu_conf *uconf = config->conf;
  143. _starpu_initialize_workers_deviceid (
  144. uconf->use_explicit_workers_cuda_gpuid == 0
  145. ? NULL
  146. : (int *)uconf->workers_cuda_gpuid,
  147. &(config->current_cuda_gpuid),
  148. (int *)topology->workers_cuda_gpuid,
  149. "STARPU_WORKERS_CUDAID",
  150. topology->nhwcudagpus,
  151. STARPU_CUDA_WORKER);
  152. }
  153. static inline int
  154. _starpu_get_next_cuda_gpuid (struct _starpu_machine_config *config)
  155. {
  156. unsigned i =
  157. ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
  158. return (int)config->topology.workers_cuda_gpuid[i];
  159. }
  160. #endif
  161. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  162. static void
  163. _starpu_initialize_workers_opencl_gpuid (struct _starpu_machine_config*config)
  164. {
  165. struct _starpu_machine_topology *topology = &config->topology;
  166. struct starpu_conf *uconf = config->conf;
  167. _starpu_initialize_workers_deviceid(
  168. uconf->use_explicit_workers_opencl_gpuid == 0
  169. ? NULL
  170. : (int *)uconf->workers_opencl_gpuid,
  171. &(config->current_opencl_gpuid),
  172. (int *)topology->workers_opencl_gpuid,
  173. "STARPU_WORKERS_OPENCLID",
  174. topology->nhwopenclgpus,
  175. STARPU_OPENCL_WORKER);
  176. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  177. // Detect devices which are already used with CUDA
  178. {
  179. unsigned tmp[STARPU_NMAXWORKERS];
  180. unsigned nb=0;
  181. int i;
  182. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  183. {
  184. struct handle_entry *entry;
  185. int devid = config->topology.workers_opencl_gpuid[i];
  186. HASH_FIND_INT(devices_using_cuda, &devid, entry);
  187. if (entry == NULL)
  188. {
  189. tmp[nb] = topology->workers_opencl_gpuid[i];
  190. nb++;
  191. }
  192. }
  193. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  194. tmp[i] = -1;
  195. memcpy (topology->workers_opencl_gpuid, tmp,
  196. sizeof(unsigned)*STARPU_NMAXWORKERS);
  197. }
  198. #endif /* STARPU_USE_CUDA */
  199. {
  200. // Detect identical devices
  201. struct handle_entry *devices_already_used = NULL;
  202. unsigned tmp[STARPU_NMAXWORKERS];
  203. unsigned nb=0;
  204. int i;
  205. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  206. {
  207. int devid = topology->workers_opencl_gpuid[i];
  208. struct handle_entry *entry;
  209. HASH_FIND_INT(devices_already_used, &devid, entry);
  210. if (entry == NULL)
  211. {
  212. struct handle_entry *entry2;
  213. entry2 = (struct handle_entry *) malloc(sizeof(*entry2));
  214. STARPU_ASSERT(entry2 != NULL);
  215. entry2->gpuid = devid;
  216. HASH_ADD_INT(devices_already_used, gpuid,
  217. entry2);
  218. tmp[nb] = devid;
  219. nb ++;
  220. }
  221. }
  222. struct handle_entry *entry, *tempo;
  223. HASH_ITER(hh, devices_already_used, entry, tempo)
  224. {
  225. HASH_DEL(devices_already_used, entry);
  226. free(entry);
  227. }
  228. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  229. tmp[i] = -1;
  230. memcpy (topology->workers_opencl_gpuid, tmp,
  231. sizeof(unsigned)*STARPU_NMAXWORKERS);
  232. }
  233. }
  234. static inline int
  235. _starpu_get_next_opencl_gpuid (struct _starpu_machine_config *config)
  236. {
  237. unsigned i =
  238. ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
  239. return (int)config->topology.workers_opencl_gpuid[i];
  240. }
  241. #endif
  242. #if 0
  243. #if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
  244. static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
  245. {
  246. struct _starpu_machine_topology *topology = &config->topology;
  247. struct starpu_conf *uconf = config->conf;
  248. _starpu_initialize_workers_deviceid(
  249. uconf->use_explicit_workers_mic_deviceid == 0
  250. ? NULL
  251. : (int *)config->user_conf->workers_mic_deviceid,
  252. &(config->current_mic_deviceid),
  253. (int *)topology->workers_mic_deviceid,
  254. "STARPU_WORKERS_MICID",
  255. topology->nhwmiccores,
  256. STARPU_MIC_WORKER);
  257. }
  258. #endif
  259. #endif
  260. #ifdef STARPU_USE_SCC
  261. static void _starpu_initialize_workers_scc_deviceid(struct _starpu_machine_config *config)
  262. {
  263. struct _starpu_machine_topology *topology = &config->topology;
  264. struct starpu_conf *uconf = config->conf;
  265. _starpu_initialize_workers_deviceid(
  266. uconf->use_explicit_workers_scc_deviceid == 0
  267. ? NULL
  268. : (int *) uconf->workers_scc_deviceid,
  269. &(config->current_scc_deviceid),
  270. (int *)topology->workers_scc_deviceid,
  271. "STARPU_WORKERS_SCCID",
  272. topology->nhwscc,
  273. STARPU_SCC_WORKER);
  274. }
  275. #endif /* STARPU_USE_SCC */
  276. #if 0
  277. #ifdef STARPU_USE_MIC
  278. static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
  279. {
  280. unsigned i = ((config->current_mic_deviceid++) % config->topology.nmicdevices);
  281. return (int)config->topology.workers_mic_deviceid[i];
  282. }
  283. #endif
  284. #endif
  285. #ifdef STARPU_USE_SCC
  286. static inline int _starpu_get_next_scc_deviceid(struct _starpu_machine_config *config)
  287. {
  288. unsigned i = ((config->current_scc_deviceid++) % config->topology.nsccdevices);
  289. return (int)config->topology.workers_scc_deviceid[i];
  290. }
  291. #endif
  292. #ifdef STARPU_USE_MIC
  293. static void
  294. _starpu_init_mic_topology (struct _starpu_machine_config *config, long mic_idx)
  295. {
  296. /* Discover the topology of the mic node identifier by MIC_IDX. That
  297. * means, make this StarPU instance aware of the number of cores available
  298. * on this MIC device. Update the `nhwmiccores' topology field
  299. * accordingly. */
  300. struct _starpu_machine_topology *topology = &config->topology;
  301. int nbcores;
  302. _starpu_src_common_sink_nbcores (mic_nodes[mic_idx], &nbcores);
  303. topology->nhwmiccores[mic_idx] = nbcores;
  304. }
  305. static int
  306. _starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
  307. COIENGINE *coi_handle, COIPROCESS *coi_process)
  308. {
  309. /* Initialize the MIC node of index MIC_IDX. */
  310. struct starpu_conf *user_conf = config->conf;
  311. char ***argv = _starpu_get_argv();
  312. const char *suffixes[] = {"-mic", "_mic", NULL};
  313. /* Environment variables to send to the Sink, it informs it what kind
  314. * of node it is (architecture and type) as there is no way to discover
  315. * it itself */
  316. char mic_idx_env[32];
  317. sprintf(mic_idx_env, "_STARPU_MIC_DEVID=%d", mic_idx);
  318. /* XXX: this is currently necessary so that the remote process does not
  319. * segfault. */
  320. char nb_mic_env[32];
  321. sprintf(nb_mic_env, "_STARPU_MIC_NB=%d", 2);
  322. const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", mic_idx_env, nb_mic_env, NULL};
  323. char mic_sink_program_path[1024];
  324. /* Let's get the helper program to run on the MIC device */
  325. int mic_file_found =
  326. _starpu_src_common_locate_file (mic_sink_program_path,
  327. starpu_getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
  328. starpu_getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
  329. user_conf->mic_sink_program_path,
  330. (argv ? (*argv)[0] : NULL),
  331. suffixes);
  332. if (0 != mic_file_found)
  333. {
  334. fprintf(stderr, "No MIC program specified, use the environment\n"
  335. "variable STARPU_MIC_SINK_PROGRAM_NAME or the environment\n"
  336. "or the field 'starpu_conf.mic_sink_program_path'\n"
  337. "to define it.\n");
  338. return -1;
  339. }
  340. COIRESULT res;
  341. /* Let's get the handle which let us manage the remote MIC device */
  342. res = COIEngineGetHandle(COI_ISA_MIC, mic_idx, coi_handle);
  343. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  344. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  345. /* We launch the helper on the MIC device, which will wait for us
  346. * to give it work to do.
  347. * As we will communicate further with the device throught scif we
  348. * don't need to keep the process pointer */
  349. res = COIProcessCreateFromFile(*coi_handle, mic_sink_program_path, 0, NULL, 0,
  350. mic_sink_env, 1, NULL, 0, NULL,
  351. coi_process);
  352. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  353. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  354. /* Let's create the node structure, we'll communicate with the peer
  355. * through scif thanks to it */
  356. mic_nodes[mic_idx] =
  357. _starpu_mp_common_node_create(STARPU_MIC_SOURCE, mic_idx);
  358. return 0;
  359. }
  360. #endif
  361. static void
  362. _starpu_init_topology (struct _starpu_machine_config *config)
  363. {
  364. /* Discover the topology, meaning finding all the available PUs for
  365. the compiled drivers. These drivers MUST have been initialized
  366. before calling this function. The discovered topology is filled in
  367. CONFIG. */
  368. struct _starpu_machine_topology *topology = &config->topology;
  369. if (topology_is_initialized)
  370. return;
  371. nobind = starpu_get_env_number("STARPU_WORKERS_NOBIND");
  372. topology->nhwcpus = 0;
  373. topology->nhwpus = 0;
  374. #ifndef STARPU_SIMGRID
  375. #ifdef STARPU_HAVE_HWLOC
  376. hwloc_topology_init(&topology->hwtopology);
  377. hwloc_topology_load(topology->hwtopology);
  378. #endif
  379. #endif
  380. #ifdef STARPU_SIMGRID
  381. config->topology.nhwcpus = config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
  382. #elif defined(STARPU_HAVE_HWLOC)
  383. /* Discover the CPUs relying on the hwloc interface and fills CONFIG
  384. * accordingly. */
  385. config->cpu_depth = hwloc_get_type_depth (topology->hwtopology,
  386. HWLOC_OBJ_CORE);
  387. config->pu_depth = hwloc_get_type_depth (topology->hwtopology,
  388. HWLOC_OBJ_PU);
  389. /* Would be very odd */
  390. STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE);
  391. if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN)
  392. {
  393. /* unknown, using logical procesors as fallback */
  394. _STARPU_DISP("Warning: The OS did not report CPU cores. Assuming there is only one hardware thread per core.\n");
  395. config->cpu_depth = hwloc_get_type_depth(topology->hwtopology,
  396. HWLOC_OBJ_PU);
  397. }
  398. topology->nhwcpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
  399. config->cpu_depth);
  400. topology->nhwpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
  401. config->pu_depth);
  402. #elif defined(HAVE_SYSCONF)
  403. /* Discover the CPUs relying on the sysconf(3) function and fills
  404. * CONFIG accordingly. */
  405. config->topology.nhwcpus = config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
  406. #elif defined(_WIN32)
  407. /* Discover the CPUs on Cygwin and MinGW systems. */
  408. SYSTEM_INFO sysinfo;
  409. GetSystemInfo(&sysinfo);
  410. config->topology.nhwcpus = config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
  411. #else
  412. #warning no way to know number of cores, assuming 1
  413. config->topology.nhwcpus = config->topology.nhwpus = 1;
  414. #endif
  415. _starpu_cuda_discover_devices(config);
  416. _starpu_opencl_discover_devices(config);
  417. #ifdef STARPU_USE_SCC
  418. config->topology.nhwscc = _starpu_scc_src_get_device_count();
  419. #endif
  420. topology_is_initialized = 1;
  421. }
  422. /*
  423. * Bind workers on the different processors
  424. */
  425. static void
  426. _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
  427. {
  428. char *strval;
  429. unsigned i;
  430. struct _starpu_machine_topology *topology = &config->topology;
  431. config->current_bindid = 0;
  432. /* conf->workers_bindid indicates the successive logical PU identifier that
  433. * should be used to bind the workers. It should be either filled
  434. * according to the user's explicit parameters (from starpu_conf) or
  435. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  436. * round-robin policy is used to distributed the workers over the
  437. * cores. */
  438. /* what do we use, explicit value, env. variable, or round-robin ? */
  439. if ((strval = starpu_getenv("STARPU_WORKERS_CPUID")))
  440. {
  441. /* STARPU_WORKERS_CPUID certainly contains less entries than
  442. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  443. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  444. * 2". */
  445. unsigned wrap = 0;
  446. unsigned number_of_entries = 0;
  447. char *endptr;
  448. /* we use the content of the STARPU_WORKERS_CPUID
  449. * env. variable */
  450. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  451. {
  452. if (!wrap)
  453. {
  454. long int val;
  455. val = strtol(strval, &endptr, 10);
  456. if (endptr != strval)
  457. {
  458. topology->workers_bindid[i] =
  459. (unsigned)(val % topology->nhwpus);
  460. strval = endptr;
  461. }
  462. else
  463. {
  464. /* there must be at least one entry */
  465. STARPU_ASSERT(i != 0);
  466. number_of_entries = i;
  467. /* there is no more values in the
  468. * string */
  469. wrap = 1;
  470. topology->workers_bindid[i] =
  471. topology->workers_bindid[0];
  472. }
  473. }
  474. else
  475. {
  476. topology->workers_bindid[i] =
  477. topology->workers_bindid[i % number_of_entries];
  478. }
  479. }
  480. }
  481. else if (config->conf->use_explicit_workers_bindid)
  482. {
  483. /* we use the explicit value from the user */
  484. memcpy(topology->workers_bindid,
  485. config->conf->workers_bindid,
  486. STARPU_NMAXWORKERS*sizeof(unsigned));
  487. }
  488. else
  489. {
  490. int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
  491. int k;
  492. int nbindids=0;
  493. int nhyperthreads = topology->nhwpus / topology->nhwcpus;
  494. STARPU_ASSERT_MSG(nth_per_core > 0 && nth_per_core <= nhyperthreads , "Incorrect number of hyperthreads");
  495. i = 0; /* PU number currently assigned */
  496. k = 0; /* Number of threads already put on the current core */
  497. while(nbindids < STARPU_NMAXWORKERS)
  498. {
  499. if (k >= nth_per_core)
  500. {
  501. /* We have already put enough workers on this
  502. * core, skip remaining PUs from this core, and
  503. * proceed with next core */
  504. i += nhyperthreads-nth_per_core;
  505. k = 0;
  506. continue;
  507. }
  508. /* Add a worker to this core, by using this logical PU */
  509. topology->workers_bindid[nbindids++] =
  510. (unsigned)(i % topology->nhwpus);
  511. k++;
  512. i++;
  513. }
  514. }
  515. }
  516. /* This function gets the identifier of the next core on which to bind a
  517. * worker. In case a list of preferred cores was specified (logical indexes),
  518. * we look for a an available core among the list if possible, otherwise a
  519. * round-robin policy is used. */
  520. static inline int
  521. _starpu_get_next_bindid (struct _starpu_machine_config *config,
  522. int *preferred_binding, int npreferred)
  523. {
  524. struct _starpu_machine_topology *topology = &config->topology;
  525. unsigned found = 0;
  526. int current_preferred;
  527. int nhyperthreads = topology->nhwpus / topology->nhwcpus;
  528. /* loop over the preference list */
  529. for (current_preferred = 0;
  530. current_preferred < npreferred;
  531. current_preferred++)
  532. {
  533. if (found)
  534. break;
  535. /* Try to get this core */
  536. unsigned requested_core = preferred_binding[current_preferred];
  537. /* can we bind the worker on the preferred core ? */
  538. unsigned ind;
  539. /* Look at the remaining cores to be bound to */
  540. for (ind = config->current_bindid;
  541. ind < topology->nhwpus / nhyperthreads;
  542. ind++)
  543. {
  544. if (topology->workers_bindid[ind] == requested_core * nhyperthreads)
  545. {
  546. /* the cpu is available, we use it ! In order
  547. * to make sure that it will not be used again
  548. * later on, we exchange it with the next bindid we were supposed to use */
  549. topology->workers_bindid[ind] =
  550. topology->workers_bindid[config->current_bindid];
  551. topology->workers_bindid[config->current_bindid] = requested_core * nhyperthreads;
  552. found = 1;
  553. break;
  554. }
  555. }
  556. }
  557. unsigned i = ((config->current_bindid++) % STARPU_NMAXWORKERS);
  558. return (int)topology->workers_bindid[i];
  559. }
  560. unsigned
  561. _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
  562. {
  563. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  564. _starpu_opencl_init();
  565. #endif
  566. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  567. _starpu_init_cuda();
  568. #endif
  569. _starpu_init_topology(config);
  570. return config->topology.nhwcpus;
  571. }
  572. unsigned
  573. _starpu_topology_get_nhwpu (struct _starpu_machine_config *config)
  574. {
  575. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  576. _starpu_opencl_init();
  577. #endif
  578. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  579. _starpu_init_cuda();
  580. #endif
  581. _starpu_init_topology(config);
  582. return config->topology.nhwpus;
  583. }
  584. #ifdef STARPU_USE_MIC
  585. static void
  586. _starpu_init_mic_config (struct _starpu_machine_config *config,
  587. struct starpu_conf *user_conf,
  588. unsigned mic_idx)
  589. {
  590. // Configure the MIC device of index MIC_IDX.
  591. struct _starpu_machine_topology *topology = &config->topology;
  592. topology->nhwmiccores[mic_idx] = 0;
  593. _starpu_init_mic_topology (config, mic_idx);
  594. int nmiccores;
  595. nmiccores = starpu_get_env_number("STARPU_NMIC");
  596. /* STARPU_NMIC is not set. Did the user specify anything ? */
  597. if (nmiccores == -1 && user_conf)
  598. nmiccores = user_conf->nmic;
  599. if (nmiccores != 0)
  600. {
  601. if (nmiccores == -1)
  602. {
  603. /* Nothing was specified, so let's use the number of
  604. * detected mic cores. ! */
  605. nmiccores = topology->nhwmiccores[mic_idx];
  606. }
  607. else
  608. {
  609. if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
  610. {
  611. /* The user requires more MIC devices than there is available */
  612. fprintf(stderr,
  613. "# Warning: %d MIC devices requested. Only %d available.\n",
  614. nmiccores, topology->nhwmiccores[mic_idx]);
  615. nmiccores = topology->nhwmiccores[mic_idx];
  616. }
  617. }
  618. }
  619. topology->nmiccores[mic_idx] = nmiccores;
  620. STARPU_ASSERT_MSG(topology->nmiccores[mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
  621. "topology->nmiccores[mic_idx(%d)] (%d) + topology->nworkers (%d) <= STARPU_NMAXWORKERS (%d)",
  622. mic_idx, topology->nmiccores[mic_idx], topology->nworkers, STARPU_NMAXWORKERS);
  623. /* _starpu_initialize_workers_mic_deviceid (config); */
  624. unsigned miccore_id;
  625. for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
  626. {
  627. int worker_idx = topology->nworkers + miccore_id;
  628. config->workers[worker_idx].arch = STARPU_MIC_WORKER;
  629. config->workers[worker_idx].perf_arch.devices = (struct starpu_perfmodel_device *) malloc(sizeof(struct starpu_perfmodel_device));
  630. config->workers[worker_idx].perf_arch.ndevices = 1;
  631. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_MIC_WORKER;
  632. config->workers[worker_idx].perf_arch.devices[0].devid = mic_idx;
  633. config->workers[worker_idx].perf_arch.devices[0].ncores = 0;
  634. config->workers[worker_idx].devid = mic_idx;
  635. config->workers[worker_idx].subworkerid = miccore_id;
  636. config->workers[worker_idx].worker_mask = STARPU_MIC;
  637. config->worker_mask |= STARPU_MIC;
  638. }
  639. topology->nworkers += topology->nmiccores[mic_idx];
  640. }
  641. #ifdef STARPU_USE_MIC
  642. static COIENGINE handles[2];
  643. static COIPROCESS process[2];
  644. #endif
  645. static void
  646. _starpu_init_mp_config (struct _starpu_machine_config *config,
  647. struct starpu_conf *user_conf)
  648. {
  649. /* Discover and configure the mp topology. That means:
  650. * - discover the number of mp nodes;
  651. * - initialize each discovered node;
  652. * - discover the local topology (number of PUs/devices) of each node;
  653. * - configure the workers accordingly.
  654. */
  655. struct _starpu_machine_topology *topology = &config->topology;
  656. // We currently only support MIC at this level.
  657. #ifdef STARPU_USE_MIC
  658. /* Discover and initialize the number of MIC nodes through the mp
  659. * infrastructure. */
  660. unsigned nhwmicdevices = _starpu_mic_src_get_device_count();
  661. int reqmicdevices = starpu_get_env_number("STARPU_NMICDEVS");
  662. if (-1 == reqmicdevices)
  663. reqmicdevices = nhwmicdevices;
  664. topology->nmicdevices = 0;
  665. unsigned i;
  666. for (i = 0; i < STARPU_MIN (nhwmicdevices, (unsigned) reqmicdevices); i++)
  667. if (0 == _starpu_init_mic_node (config, i, &handles[i], &process[i]))
  668. topology->nmicdevices++;
  669. for (i = 0; i < topology->nmicdevices; i++)
  670. _starpu_init_mic_config (config, user_conf, i);
  671. #endif
  672. }
  673. static void
  674. _starpu_deinit_mic_node (unsigned mic_idx)
  675. {
  676. _starpu_mp_common_send_command(mic_nodes[mic_idx], STARPU_EXIT, NULL, 0);
  677. COIProcessDestroy(process[mic_idx], -1, 0, NULL, NULL);
  678. _starpu_mp_common_node_destroy(mic_nodes[mic_idx]);
  679. }
  680. static void
  681. _starpu_deinit_mp_config (struct _starpu_machine_config *config)
  682. {
  683. struct _starpu_machine_topology *topology = &config->topology;
  684. unsigned i;
  685. for (i = 0; i < topology->nmicdevices; i++)
  686. _starpu_deinit_mic_node (i);
  687. _starpu_mic_clear_kernels();
  688. }
  689. #endif
  690. static int
  691. _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
  692. {
  693. int i;
  694. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  695. config->workers[i].workerid = i;
  696. struct _starpu_machine_topology *topology = &config->topology;
  697. topology->nworkers = 0;
  698. topology->ncombinedworkers = 0;
  699. topology->nsched_ctxs = 0;
  700. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  701. _starpu_opencl_init();
  702. #endif
  703. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  704. _starpu_init_cuda();
  705. #endif
  706. _starpu_init_topology(config);
  707. _starpu_initialize_workers_bindid(config);
  708. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  709. int ncuda = config->conf->ncuda;
  710. int nworker_per_cuda = starpu_get_env_number_default("STARPU_NWORKER_PER_CUDA", 1);
  711. STARPU_ASSERT_MSG(nworker_per_cuda > 0, "STARPU_NWORKER_PER_CUDA has to be > 0");
  712. #ifndef STARPU_NON_BLOCKING_DRIVERS
  713. if (nworker_per_cuda > 1)
  714. {
  715. _STARPU_DISP("Warning: reducing STARPU_NWORKER_PER_CUDA to 1 because blocking drivers are enabled\n");
  716. nworker_per_cuda = 1;
  717. }
  718. #endif
  719. if (ncuda != 0)
  720. {
  721. /* The user did not disable CUDA. We need to initialize CUDA
  722. * early to count the number of devices */
  723. _starpu_init_cuda();
  724. int nb_devices = _starpu_get_cuda_device_count();
  725. if (ncuda == -1)
  726. {
  727. /* Nothing was specified, so let's choose ! */
  728. ncuda = nb_devices;
  729. }
  730. else
  731. {
  732. if (ncuda > nb_devices)
  733. {
  734. /* The user requires more CUDA devices than
  735. * there is available */
  736. _STARPU_DISP("Warning: %d CUDA devices requested. Only %d available.\n", ncuda, nb_devices);
  737. ncuda = nb_devices;
  738. }
  739. }
  740. }
  741. /* Now we know how many CUDA devices will be used */
  742. topology->ncudagpus = ncuda;
  743. STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
  744. _starpu_initialize_workers_cuda_gpuid(config);
  745. unsigned cudagpu;
  746. for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
  747. {
  748. int devid = _starpu_get_next_cuda_gpuid(config);
  749. for (i = 0; i < nworker_per_cuda; i++)
  750. {
  751. int worker_idx = topology->nworkers + cudagpu * nworker_per_cuda + i;
  752. config->workers[worker_idx].arch = STARPU_CUDA_WORKER;
  753. config->workers[worker_idx].perf_arch.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
  754. config->workers[worker_idx].perf_arch.ndevices = 1;
  755. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CUDA_WORKER;
  756. config->workers[worker_idx].perf_arch.devices[0].devid = devid;
  757. // TODO: fix perfmodels etc.
  758. //config->workers[worker_idx].perf_arch.ncore = nworker_per_cuda - 1;
  759. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  760. config->workers[worker_idx].devid = devid;
  761. config->workers[worker_idx].subworkerid = i;
  762. config->workers[worker_idx].worker_mask = STARPU_CUDA;
  763. config->worker_mask |= STARPU_CUDA;
  764. struct handle_entry *entry;
  765. entry = (struct handle_entry *) malloc(sizeof(*entry));
  766. STARPU_ASSERT(entry != NULL);
  767. entry->gpuid = devid;
  768. HASH_ADD_INT(devices_using_cuda, gpuid, entry);
  769. }
  770. }
  771. topology->nworkers += topology->ncudagpus * nworker_per_cuda;
  772. #endif
  773. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  774. int nopencl = config->conf->nopencl;
  775. if (nopencl != 0)
  776. {
  777. /* The user did not disable OPENCL. We need to initialize
  778. * OpenCL early to count the number of devices */
  779. _starpu_opencl_init();
  780. int nb_devices;
  781. nb_devices = _starpu_opencl_get_device_count();
  782. if (nopencl == -1)
  783. {
  784. /* Nothing was specified, so let's choose ! */
  785. nopencl = nb_devices;
  786. if (nopencl > STARPU_MAXOPENCLDEVS)
  787. {
  788. _STARPU_DISP("Warning: %d OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldadev=xxx to update the maximum value of supported OpenCL devices.\n", nb_devices, STARPU_MAXOPENCLDEVS);
  789. nopencl = STARPU_MAXOPENCLDEVS;
  790. }
  791. }
  792. else
  793. {
  794. /* Let's make sure this value is OK. */
  795. if (nopencl > nb_devices)
  796. {
  797. /* The user requires more OpenCL devices than
  798. * there is available */
  799. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d available.\n", nopencl, nb_devices);
  800. nopencl = nb_devices;
  801. }
  802. /* Let's make sure this value is OK. */
  803. if (nopencl > STARPU_MAXOPENCLDEVS)
  804. {
  805. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices.\n", nopencl, STARPU_MAXOPENCLDEVS);
  806. nopencl = STARPU_MAXOPENCLDEVS;
  807. }
  808. }
  809. }
  810. topology->nopenclgpus = nopencl;
  811. STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
  812. _starpu_initialize_workers_opencl_gpuid(config);
  813. unsigned openclgpu;
  814. for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
  815. {
  816. int worker_idx = topology->nworkers + openclgpu;
  817. int devid = _starpu_get_next_opencl_gpuid(config);
  818. if (devid == -1)
  819. { // There is no more devices left
  820. topology->nopenclgpus = openclgpu;
  821. break;
  822. }
  823. config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
  824. config->workers[worker_idx].perf_arch.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
  825. config->workers[worker_idx].perf_arch.ndevices = 1;
  826. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_OPENCL_WORKER;
  827. config->workers[worker_idx].perf_arch.devices[0].devid = devid;
  828. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  829. config->workers[worker_idx].subworkerid = 0;
  830. config->workers[worker_idx].devid = devid;
  831. config->workers[worker_idx].worker_mask = STARPU_OPENCL;
  832. config->worker_mask |= STARPU_OPENCL;
  833. }
  834. topology->nworkers += topology->nopenclgpus;
  835. #endif
  836. #ifdef STARPU_USE_SCC
  837. int nscc = config->conf->nscc;
  838. unsigned nb_scc_nodes = _starpu_scc_src_get_device_count();
  839. if (nscc != 0)
  840. {
  841. /* The user did not disable SCC. We need to count
  842. * the number of devices */
  843. int nb_devices = nb_scc_nodes;
  844. if (nscc == -1)
  845. {
  846. /* Nothing was specified, so let's choose ! */
  847. nscc = nb_devices;
  848. if (nscc > STARPU_MAXSCCDEVS)
  849. {
  850. _STARPU_DISP("Warning: %d SCC devices available. Only %d enabled. Use configuration option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nb_devices, STARPU_MAXSCCDEVS);
  851. nscc = STARPU_MAXSCCDEVS;
  852. }
  853. }
  854. else
  855. {
  856. /* Let's make sure this value is OK. */
  857. if (nscc > nb_devices)
  858. {
  859. /* The user requires more SCC devices than there is available */
  860. _STARPU_DISP("Warning: %d SCC devices requested. Only %d available.\n", nscc, nb_devices);
  861. nscc = nb_devices;
  862. }
  863. /* Let's make sure this value is OK. */
  864. if (nscc > STARPU_MAXSCCDEVS)
  865. {
  866. _STARPU_DISP("Warning: %d SCC devices requested. Only %d enabled. Use configure option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nscc, STARPU_MAXSCCDEVS);
  867. nscc = STARPU_MAXSCCDEVS;
  868. }
  869. }
  870. }
  871. /* Now we know how many SCC devices will be used */
  872. topology->nsccdevices = nscc;
  873. STARPU_ASSERT(topology->nsccdevices + topology->nworkers <= STARPU_NMAXWORKERS);
  874. _starpu_initialize_workers_scc_deviceid(config);
  875. unsigned sccdev;
  876. for (sccdev = 0; sccdev < topology->nsccdevices; sccdev++)
  877. {
  878. config->workers[topology->nworkers + sccdev].arch = STARPU_SCC_WORKER;
  879. int devid = _starpu_get_next_scc_deviceid(config);
  880. config->workers[topology->nworkers + sccdev].perf_arch.devices = (struct starpu_perfmodel_device)malloc(sizeof(struct starpu_perfmodel_device));
  881. config->workers[topology->nworkers + sccdev].perf_arch.ndevices = 1;
  882. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].type = STARPU_SCC_WORKER;
  883. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].devid = sccdev;
  884. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].ncore = 1;
  885. config->workers[topology->nworkers + sccdev].subworkerid = 0;
  886. config->workers[topology->nworkers + sccdev].devid = devid;
  887. config->workers[topology->nworkers + sccdev].worker_mask = STARPU_SCC;
  888. config->worker_mask |= STARPU_SCC;
  889. }
  890. for (; sccdev < nb_scc_nodes; ++sccdev)
  891. _starpu_scc_exit_useless_node(sccdev);
  892. topology->nworkers += topology->nsccdevices;
  893. #endif /* STARPU_USE_SCC */
  894. /* Unless not requested, we need to complete configuration with the
  895. * ones of the mp nodes. */
  896. #ifdef STARPU_USE_MIC
  897. if (! no_mp_config)
  898. _starpu_init_mp_config (config, config->conf);
  899. #endif
  900. /* we put the CPU section after the accelerator : in case there was an
  901. * accelerator found, we devote one cpu */
  902. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  903. int ncpu = config->conf->ncpus;
  904. if (ncpu != 0)
  905. {
  906. if (ncpu == -1)
  907. {
  908. unsigned mic_busy_cpus = 0;
  909. unsigned j = 0;
  910. for (j = 0; j < STARPU_MAXMICDEVS; j++)
  911. mic_busy_cpus += (topology->nmiccores[j] ? 1 : 0);
  912. unsigned already_busy_cpus = mic_busy_cpus + topology->ncudagpus
  913. + topology->nopenclgpus + topology->nsccdevices;
  914. long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
  915. if (avail_cpus < 0)
  916. avail_cpus = 0;
  917. int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
  918. avail_cpus *= nth_per_core;
  919. ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
  920. }
  921. else
  922. {
  923. if (ncpu > STARPU_MAXCPUS)
  924. {
  925. _STARPU_DISP("Warning: %d CPU devices requested. Only %d enabled. Use configure option --enable-maxcpus=xxx to update the maximum value of supported CPU devices.\n", ncpu, STARPU_MAXCPUS);
  926. ncpu = STARPU_MAXCPUS;
  927. }
  928. }
  929. }
  930. topology->ncpus = ncpu;
  931. STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
  932. unsigned cpu;
  933. for (cpu = 0; cpu < topology->ncpus; cpu++)
  934. {
  935. int worker_idx = topology->nworkers + cpu;
  936. config->workers[worker_idx].arch = STARPU_CPU_WORKER;
  937. config->workers[worker_idx].perf_arch.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
  938. config->workers[worker_idx].perf_arch.ndevices = 1;
  939. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CPU_WORKER;
  940. config->workers[worker_idx].perf_arch.devices[0].devid = 0;
  941. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  942. config->workers[worker_idx].subworkerid = 0;
  943. config->workers[worker_idx].devid = cpu;
  944. config->workers[worker_idx].worker_mask = STARPU_CPU;
  945. config->worker_mask |= STARPU_CPU;
  946. }
  947. topology->nworkers += topology->ncpus;
  948. #endif
  949. if (topology->nworkers == 0)
  950. {
  951. _STARPU_DEBUG("No worker found, aborting ...\n");
  952. return -ENODEV;
  953. }
  954. return 0;
  955. }
  956. void _starpu_destroy_machine_config(struct _starpu_machine_config *config)
  957. {
  958. _starpu_close_debug_logfile();
  959. unsigned worker;
  960. for (worker = 0; worker < config->topology.nworkers; worker++)
  961. {
  962. struct _starpu_worker *workerarg = &config->workers[worker];
  963. free(workerarg->perf_arch.devices);
  964. #ifdef STARPU_HAVE_HWLOC
  965. hwloc_bitmap_free(workerarg->hwloc_cpu_set);
  966. if (workerarg->bindid != -1)
  967. {
  968. hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
  969. config->pu_depth,
  970. workerarg->bindid);
  971. if (worker_obj->userdata)
  972. {
  973. _starpu_worker_list_delete(worker_obj->userdata);
  974. worker_obj->userdata = NULL;
  975. }
  976. }
  977. #endif
  978. }
  979. unsigned combined_worker_id;
  980. for(combined_worker_id=0 ; combined_worker_id < config->topology.ncombinedworkers ; combined_worker_id++)
  981. {
  982. struct _starpu_combined_worker *combined_worker = &config->combined_workers[combined_worker_id];
  983. free(combined_worker->perf_arch.devices);
  984. }
  985. #ifdef STARPU_HAVE_HWLOC
  986. hwloc_topology_destroy(config->topology.hwtopology);
  987. #endif
  988. topology_is_initialized = 0;
  989. #ifdef STARPU_USE_CUDA
  990. struct handle_entry *entry, *tmp;
  991. HASH_ITER(hh, devices_using_cuda, entry, tmp)
  992. {
  993. HASH_DEL(devices_using_cuda, entry);
  994. free(entry);
  995. }
  996. devices_using_cuda = NULL;
  997. #endif
  998. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  999. int i;
  1000. for (i=0; i<STARPU_NARCH; i++)
  1001. may_bind_automatically[i] = 0;
  1002. #endif
  1003. }
  1004. void
  1005. _starpu_bind_thread_on_cpu (
  1006. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  1007. int cpuid STARPU_ATTRIBUTE_UNUSED)
  1008. {
  1009. #ifdef STARPU_SIMGRID
  1010. return;
  1011. #else
  1012. if (nobind > 0)
  1013. return;
  1014. if (cpuid < 0)
  1015. return;
  1016. #ifdef STARPU_HAVE_HWLOC
  1017. const struct hwloc_topology_support *support;
  1018. #ifdef STARPU_USE_OPENCL
  1019. _starpu_opencl_init();
  1020. #endif
  1021. #ifdef STARPU_USE_CUDA
  1022. _starpu_init_cuda();
  1023. #endif
  1024. _starpu_init_topology(config);
  1025. support = hwloc_topology_get_support (config->topology.hwtopology);
  1026. if (support->cpubind->set_thisthread_cpubind)
  1027. {
  1028. hwloc_obj_t obj =
  1029. hwloc_get_obj_by_depth (config->topology.hwtopology,
  1030. config->pu_depth, cpuid);
  1031. hwloc_bitmap_t set = obj->cpuset;
  1032. int ret;
  1033. hwloc_bitmap_singlify(set);
  1034. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  1035. HWLOC_CPUBIND_THREAD);
  1036. if (ret)
  1037. {
  1038. perror("hwloc_set_cpubind");
  1039. STARPU_ABORT();
  1040. }
  1041. }
  1042. #elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__)
  1043. int ret;
  1044. /* fix the thread on the correct cpu */
  1045. cpu_set_t aff_mask;
  1046. CPU_ZERO(&aff_mask);
  1047. CPU_SET(cpuid, &aff_mask);
  1048. starpu_pthread_t self = pthread_self();
  1049. ret = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
  1050. if (ret)
  1051. {
  1052. perror("binding thread");
  1053. STARPU_ABORT();
  1054. }
  1055. #elif defined(_WIN32)
  1056. DWORD mask = 1 << cpuid;
  1057. if (!SetThreadAffinityMask(GetCurrentThread(), mask))
  1058. {
  1059. _STARPU_ERROR("SetThreadMaskAffinity(%lx) failed\n", mask);
  1060. }
  1061. #else
  1062. #warning no CPU binding support
  1063. #endif
  1064. #endif
  1065. }
  1066. void
  1067. _starpu_bind_thread_on_cpus (
  1068. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  1069. struct _starpu_combined_worker *combined_worker STARPU_ATTRIBUTE_UNUSED)
  1070. {
  1071. #ifdef STARPU_SIMGRID
  1072. return;
  1073. #endif
  1074. #ifdef STARPU_HAVE_HWLOC
  1075. const struct hwloc_topology_support *support;
  1076. #ifdef STARPU_USE_OPENC
  1077. _starpu_opencl_init();
  1078. #endif
  1079. #ifdef STARPU_USE_CUDA
  1080. _starpu_init_cuda();
  1081. #endif
  1082. _starpu_init_topology(config);
  1083. support = hwloc_topology_get_support(config->topology.hwtopology);
  1084. if (support->cpubind->set_thisthread_cpubind)
  1085. {
  1086. hwloc_bitmap_t set = combined_worker->hwloc_cpu_set;
  1087. int ret;
  1088. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  1089. HWLOC_CPUBIND_THREAD);
  1090. if (ret)
  1091. {
  1092. perror("binding thread");
  1093. STARPU_ABORT();
  1094. }
  1095. }
  1096. #else
  1097. #ifdef __GLIBC__
  1098. sched_setaffinity(0,sizeof(combined_worker->cpu_set),&combined_worker->cpu_set);
  1099. #else
  1100. # warning no parallel worker CPU binding support
  1101. #endif
  1102. #endif
  1103. }
  1104. static void
  1105. _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
  1106. {
  1107. /* launch one thread per CPU */
  1108. unsigned ram_memory_node;
  1109. /* note that even if the CPU cpu are not used, we always have a RAM
  1110. * node */
  1111. /* TODO : support NUMA ;) */
  1112. ram_memory_node = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
  1113. STARPU_ASSERT(ram_memory_node == STARPU_MAIN_RAM);
  1114. #ifdef STARPU_SIMGRID
  1115. char name[16];
  1116. msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
  1117. STARPU_ASSERT(host);
  1118. _starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
  1119. #endif
  1120. /* We will store all the busid of the different (src, dst)
  1121. * combinations in a matrix which we initialize here. */
  1122. _starpu_initialize_busid_matrix();
  1123. /* Each device is initialized,
  1124. * giving it a memory node and a core bind id.
  1125. */
  1126. /* TODO: STARPU_MAXNUMANODES */
  1127. unsigned numa_init[1] = { 1 };
  1128. unsigned numa_memory_nodes[1] = { ram_memory_node };
  1129. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1130. unsigned cuda_init[STARPU_MAXCUDADEVS] = { };
  1131. unsigned cuda_memory_nodes[STARPU_MAXCUDADEVS];
  1132. #ifndef STARPU_SIMGRID
  1133. unsigned cuda_bindid[STARPU_MAXCUDADEVS];
  1134. #endif
  1135. #endif
  1136. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1137. unsigned opencl_init[STARPU_MAXOPENCLDEVS] = { };
  1138. unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS];
  1139. #ifndef STARPU_SIMGRID
  1140. unsigned opencl_bindid[STARPU_MAXOPENCLDEVS];
  1141. #endif
  1142. #endif
  1143. #ifdef STARPU_USE_MIC
  1144. unsigned mic_init[STARPU_MAXMICDEVS] = { };
  1145. unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
  1146. #ifndef STARPU_SIGMRID
  1147. unsigned mic_bindid[STARPU_MAXMICDEVS];
  1148. #endif
  1149. #endif
  1150. unsigned worker;
  1151. for (worker = 0; worker < config->topology.nworkers; worker++)
  1152. {
  1153. unsigned memory_node = -1;
  1154. struct _starpu_worker *workerarg = &config->workers[worker];
  1155. unsigned devid = workerarg->devid;
  1156. #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC)) && !defined(STARPU_SIMGRID)
  1157. /* Perhaps the worker has some "favourite" bindings */
  1158. int *preferred_binding = NULL;
  1159. int npreferred = 0;
  1160. #endif
  1161. /* select the memory node that contains worker's memory */
  1162. switch (workerarg->arch)
  1163. {
  1164. case STARPU_CPU_WORKER:
  1165. {
  1166. /* TODO: NUMA */
  1167. int numaid = 0;
  1168. /* "dedicate" a cpu core to that worker */
  1169. if (numa_init[numaid])
  1170. {
  1171. memory_node = numa_memory_nodes[numaid];
  1172. }
  1173. else
  1174. {
  1175. numa_init[numaid] = 1;
  1176. memory_node = numa_memory_nodes[numaid] = _starpu_memory_node_register(STARPU_CPU_RAM, numaid);
  1177. #ifdef STARPU_SIMGRID
  1178. snprintf(name, sizeof(name), "RAM%d", numaid);
  1179. host = _starpu_simgrid_get_host_by_name(name);
  1180. STARPU_ASSERT(host);
  1181. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1182. #endif
  1183. }
  1184. workerarg->bindid = _starpu_get_next_bindid(config, NULL, 0);
  1185. _starpu_memory_node_add_nworkers(memory_node);
  1186. break;
  1187. }
  1188. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1189. case STARPU_CUDA_WORKER:
  1190. #ifndef STARPU_SIMGRID
  1191. if (may_bind_automatically[STARPU_CUDA_WORKER])
  1192. {
  1193. /* StarPU is allowed to bind threads automatically */
  1194. preferred_binding = _starpu_get_cuda_affinity_vector(devid);
  1195. npreferred = config->topology.nhwpus;
  1196. }
  1197. #endif /* SIMGRID */
  1198. if (cuda_init[devid])
  1199. {
  1200. memory_node = cuda_memory_nodes[devid];
  1201. #ifndef STARPU_SIMGRID
  1202. workerarg->bindid = cuda_bindid[devid];
  1203. #endif /* SIMGRID */
  1204. }
  1205. else
  1206. {
  1207. cuda_init[devid] = 1;
  1208. #ifndef STARPU_SIMGRID
  1209. workerarg->bindid = cuda_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1210. #endif /* SIMGRID */
  1211. memory_node = cuda_memory_nodes[devid] = _starpu_memory_node_register(STARPU_CUDA_RAM, devid);
  1212. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1213. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1214. #ifdef STARPU_SIMGRID
  1215. const char* cuda_memcpy_peer;
  1216. snprintf(name, sizeof(name), "CUDA%d", devid);
  1217. host = _starpu_simgrid_get_host_by_name(name);
  1218. STARPU_ASSERT(host);
  1219. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1220. cuda_memcpy_peer = MSG_host_get_property_value(host, "memcpy_peer");
  1221. #endif /* SIMGRID */
  1222. if (
  1223. #ifdef STARPU_SIMGRID
  1224. cuda_memcpy_peer && atoll(cuda_memcpy_peer)
  1225. #elif defined(HAVE_CUDA_MEMCPY_PEER)
  1226. 1
  1227. #else /* MEMCPY_PEER */
  1228. 0
  1229. #endif /* MEMCPY_PEER */
  1230. )
  1231. {
  1232. unsigned worker2;
  1233. for (worker2 = 0; worker2 < worker; worker2++)
  1234. {
  1235. struct _starpu_worker *workerarg2 = &config->workers[worker2];
  1236. if (workerarg2->arch == STARPU_CUDA_WORKER)
  1237. {
  1238. unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
  1239. _starpu_register_bus(memory_node2, memory_node);
  1240. _starpu_register_bus(memory_node, memory_node2);
  1241. }
  1242. }
  1243. }
  1244. }
  1245. _starpu_memory_node_add_nworkers(memory_node);
  1246. break;
  1247. #endif
  1248. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1249. case STARPU_OPENCL_WORKER:
  1250. #ifndef STARPU_SIMGRID
  1251. if (may_bind_automatically[STARPU_OPENCL_WORKER])
  1252. {
  1253. /* StarPU is allowed to bind threads automatically */
  1254. preferred_binding = _starpu_get_opencl_affinity_vector(devid);
  1255. npreferred = config->topology.nhwpus;
  1256. }
  1257. #endif /* SIMGRID */
  1258. if (opencl_init[devid])
  1259. {
  1260. memory_node = opencl_memory_nodes[devid];
  1261. #ifndef STARPU_SIMGRID
  1262. workerarg->bindid = opencl_bindid[devid];
  1263. #endif /* SIMGRID */
  1264. }
  1265. else
  1266. {
  1267. opencl_init[devid] = 1;
  1268. #ifndef STARPU_SIMGRID
  1269. workerarg->bindid = opencl_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1270. #endif /* SIMGRID */
  1271. memory_node = opencl_memory_nodes[devid] = _starpu_memory_node_register(STARPU_OPENCL_RAM, devid);
  1272. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1273. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1274. #ifdef STARPU_SIMGRID
  1275. snprintf(name, sizeof(name), "OpenCL%d", devid);
  1276. host = _starpu_simgrid_get_host_by_name(name);
  1277. STARPU_ASSERT(host);
  1278. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1279. #endif /* SIMGRID */
  1280. }
  1281. _starpu_memory_node_add_nworkers(memory_node);
  1282. break;
  1283. #endif
  1284. #ifdef STARPU_USE_MIC
  1285. case STARPU_MIC_WORKER:
  1286. if (mic_init[devid])
  1287. {
  1288. memory_node = mic_memory_nodes[devid];
  1289. }
  1290. else
  1291. {
  1292. mic_init[devid] = 1;
  1293. #ifndef STARPU_SIMGRID
  1294. /* TODO */
  1295. //if (may_bind_automatically)
  1296. //{
  1297. // /* StarPU is allowed to bind threads automatically */
  1298. // preferred_binding = _starpu_get_mic_affinity_vector(devid);
  1299. // npreferred = config->topology.nhwpus;
  1300. //}
  1301. mic_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1302. #endif /* SIMGRID */
  1303. memory_node = mic_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MIC_RAM, devid);
  1304. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1305. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1306. }
  1307. workerarg->bindid = mic_bindid[devid];
  1308. _starpu_memory_node_add_nworkers(memory_node);
  1309. break;
  1310. #endif /* STARPU_USE_MIC */
  1311. #ifdef STARPU_USE_SCC
  1312. case STARPU_SCC_WORKER:
  1313. {
  1314. /* Node 0 represents the SCC shared memory when we're on SCC. */
  1315. struct _starpu_memory_node_descr *descr = _starpu_memory_node_get_description();
  1316. descr->nodes[ram_memory_node] = STARPU_SCC_SHM;
  1317. memory_node = ram_memory_node;
  1318. _starpu_memory_node_add_nworkers(memory_node);
  1319. }
  1320. break;
  1321. #endif
  1322. default:
  1323. STARPU_ABORT();
  1324. }
  1325. workerarg->memory_node = memory_node;
  1326. _STARPU_DEBUG("worker %d type %d devid %d bound to cpu %d, STARPU memory node %d\n", worker, workerarg->arch, devid, workerarg->bindid, memory_node);
  1327. #ifdef __GLIBC__
  1328. if (workerarg->bindid != -1)
  1329. {
  1330. /* Save the initial cpuset */
  1331. CPU_ZERO(&workerarg->cpu_set);
  1332. CPU_SET(workerarg->bindid, &workerarg->cpu_set);
  1333. }
  1334. #endif /* __GLIBC__ */
  1335. #ifdef STARPU_HAVE_HWLOC
  1336. if (workerarg->bindid == -1)
  1337. {
  1338. workerarg->hwloc_cpu_set = hwloc_bitmap_alloc();
  1339. }
  1340. else
  1341. {
  1342. /* Put the worker descriptor in the userdata field of the
  1343. * hwloc object describing the CPU */
  1344. hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
  1345. config->pu_depth,
  1346. workerarg->bindid);
  1347. if (worker_obj->userdata == NULL)
  1348. {
  1349. worker_obj->userdata = _starpu_worker_list_new();
  1350. }
  1351. _starpu_worker_list_push_front(worker_obj->userdata, workerarg);
  1352. /* Clear the cpu set and set the cpu */
  1353. workerarg->hwloc_cpu_set = hwloc_bitmap_dup (worker_obj->cpuset);
  1354. }
  1355. #endif
  1356. }
  1357. }
  1358. int
  1359. _starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
  1360. {
  1361. int ret;
  1362. unsigned i;
  1363. ret = _starpu_init_machine_config(config, no_mp_config);
  1364. if (ret)
  1365. return ret;
  1366. /* for the data management library */
  1367. _starpu_memory_nodes_init();
  1368. _starpu_init_workers_binding(config, no_mp_config);
  1369. config->cpus_nodeid = -1;
  1370. config->cuda_nodeid = -1;
  1371. config->opencl_nodeid = -1;
  1372. config->mic_nodeid = -1;
  1373. config->scc_nodeid = -1;
  1374. for (i = 0; i < starpu_worker_get_count(); i++)
  1375. {
  1376. switch (starpu_worker_get_type(i))
  1377. {
  1378. case STARPU_CPU_WORKER:
  1379. if (config->cpus_nodeid == -1)
  1380. config->cpus_nodeid = starpu_worker_get_memory_node(i);
  1381. else if (config->cpus_nodeid != (int) starpu_worker_get_memory_node(i))
  1382. config->cpus_nodeid = -2;
  1383. break;
  1384. case STARPU_CUDA_WORKER:
  1385. if (config->cuda_nodeid == -1)
  1386. config->cuda_nodeid = starpu_worker_get_memory_node(i);
  1387. else if (config->cuda_nodeid != (int) starpu_worker_get_memory_node(i))
  1388. config->cuda_nodeid = -2;
  1389. break;
  1390. case STARPU_OPENCL_WORKER:
  1391. if (config->opencl_nodeid == -1)
  1392. config->opencl_nodeid = starpu_worker_get_memory_node(i);
  1393. else if (config->opencl_nodeid != (int) starpu_worker_get_memory_node(i))
  1394. config->opencl_nodeid = -2;
  1395. break;
  1396. case STARPU_MIC_WORKER:
  1397. if (config->mic_nodeid == -1)
  1398. config->mic_nodeid = starpu_worker_get_memory_node(i);
  1399. else if (config->mic_nodeid != (int) starpu_worker_get_memory_node(i))
  1400. config->mic_nodeid = -2;
  1401. break;
  1402. case STARPU_SCC_WORKER:
  1403. if (config->scc_nodeid == -1)
  1404. config->scc_nodeid = starpu_worker_get_memory_node(i);
  1405. else if (config->scc_nodeid != (int) starpu_worker_get_memory_node(i))
  1406. config->scc_nodeid = -2;
  1407. break;
  1408. case STARPU_ANY_WORKER:
  1409. STARPU_ASSERT(0);
  1410. }
  1411. }
  1412. return 0;
  1413. }
  1414. void _starpu_destroy_topology(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED)
  1415. {
  1416. #ifdef STARPU_USE_MIC
  1417. _starpu_deinit_mp_config(config);
  1418. #endif
  1419. /* cleanup StarPU internal data structures */
  1420. _starpu_memory_nodes_deinit();
  1421. _starpu_destroy_machine_config(config);
  1422. }
  1423. void
  1424. starpu_topology_print (FILE *output)
  1425. {
  1426. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1427. struct _starpu_machine_topology *topology = &config->topology;
  1428. unsigned pu;
  1429. unsigned worker;
  1430. unsigned nworkers = starpu_worker_get_count();
  1431. unsigned ncombinedworkers = topology->ncombinedworkers;
  1432. unsigned nthreads_per_core = topology->nhwpus / topology->nhwcpus;
  1433. for (pu = 0; pu < topology->nhwpus; pu++)
  1434. {
  1435. if ((pu % nthreads_per_core) == 0)
  1436. fprintf(output, "core %u", pu / nthreads_per_core);
  1437. fprintf(output, "\tPU %u\t", pu);
  1438. for (worker = 0;
  1439. worker < nworkers + ncombinedworkers;
  1440. worker++)
  1441. {
  1442. if (worker < nworkers)
  1443. {
  1444. struct _starpu_worker *workerarg = &config->workers[worker];
  1445. if (workerarg->bindid == (int) pu)
  1446. {
  1447. char name[256];
  1448. starpu_worker_get_name (worker, name,
  1449. sizeof(name));
  1450. fprintf(output, "%s\t", name);
  1451. }
  1452. }
  1453. else
  1454. {
  1455. int worker_size, i;
  1456. int *combined_workerid;
  1457. starpu_combined_worker_get_description(worker, &worker_size, &combined_workerid);
  1458. for (i = 0; i < worker_size; i++)
  1459. {
  1460. if (topology->workers_bindid[combined_workerid[i]] == pu)
  1461. fprintf(output, "comb %u\t", worker-nworkers);
  1462. }
  1463. }
  1464. }
  1465. fprintf(output, "\n");
  1466. }
  1467. }