topology.c 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2015 Université de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015 CNRS
  5. * Copyright (C) 2011 INRIA
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <stdlib.h>
  19. #include <stdio.h>
  20. #include <common/config.h>
  21. #include <core/workers.h>
  22. #include <core/debug.h>
  23. #include <core/topology.h>
  24. #include <drivers/cuda/driver_cuda.h>
  25. #include <drivers/mic/driver_mic_source.h>
  26. #include <drivers/scc/driver_scc_source.h>
  27. #include <drivers/mp_common/source_common.h>
  28. #include <drivers/opencl/driver_opencl.h>
  29. #include <profiling/profiling.h>
  30. #include <datawizard/memory_nodes.h>
  31. #include <common/uthash.h>
  32. #ifdef STARPU_HAVE_HWLOC
  33. #include <hwloc.h>
  34. #ifndef HWLOC_API_VERSION
  35. #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
  36. #endif
  37. #endif
  38. #ifdef STARPU_HAVE_WINDOWS
  39. #include <windows.h>
  40. #endif
  41. #ifdef STARPU_SIMGRID
  42. #include <core/simgrid.h>
  43. #endif
  44. static unsigned topology_is_initialized = 0;
  45. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
  46. struct handle_entry
  47. {
  48. UT_hash_handle hh;
  49. unsigned gpuid;
  50. };
  51. # if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  52. /* Entry in the `devices_using_cuda' hash table. */
  53. static struct handle_entry *devices_using_cuda;
  54. # endif
  55. static unsigned may_bind_automatically = 0;
  56. #endif // defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  57. /*
  58. * Discover the topology of the machine
  59. */
  60. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
  61. static void
  62. _starpu_initialize_workers_deviceid (int *explicit_workers_gpuid,
  63. int *current, int *workers_gpuid,
  64. const char *varname, unsigned nhwgpus)
  65. {
  66. char *strval;
  67. unsigned i;
  68. *current = 0;
  69. /* conf->workers_gpuid indicates the successive GPU identifier that
  70. * should be used to bind the workers. It should be either filled
  71. * according to the user's explicit parameters (from starpu_conf) or
  72. * according to the STARPU_WORKERS_CUDAID env. variable. Otherwise, a
  73. * round-robin policy is used to distributed the workers over the
  74. * cores. */
  75. /* what do we use, explicit value, env. variable, or round-robin ? */
  76. if ((strval = getenv(varname)))
  77. {
  78. /* STARPU_WORKERS_CUDAID certainly contains less entries than
  79. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  80. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  81. * 2". */
  82. unsigned wrap = 0;
  83. unsigned number_of_entries = 0;
  84. char *endptr;
  85. /* we use the content of the STARPU_WORKERS_CUDAID
  86. * env. variable */
  87. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  88. {
  89. if (!wrap)
  90. {
  91. long int val;
  92. val = strtol(strval, &endptr, 10);
  93. if (endptr != strval)
  94. {
  95. workers_gpuid[i] = (unsigned)val;
  96. strval = endptr;
  97. }
  98. else
  99. {
  100. /* there must be at least one entry */
  101. STARPU_ASSERT(i != 0);
  102. number_of_entries = i;
  103. /* there is no more values in the
  104. * string */
  105. wrap = 1;
  106. workers_gpuid[i] = workers_gpuid[0];
  107. }
  108. }
  109. else
  110. {
  111. workers_gpuid[i] =
  112. workers_gpuid[i % number_of_entries];
  113. }
  114. }
  115. }
  116. else if (explicit_workers_gpuid)
  117. {
  118. /* we use the explicit value from the user */
  119. memcpy(workers_gpuid,
  120. explicit_workers_gpuid,
  121. STARPU_NMAXWORKERS*sizeof(unsigned));
  122. }
  123. else
  124. {
  125. /* by default, we take a round robin policy */
  126. if (nhwgpus > 0)
  127. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  128. workers_gpuid[i] = (unsigned)(i % nhwgpus);
  129. /* StarPU can use sampling techniques to bind threads
  130. * correctly
  131. * TODO: use a private value for each kind of device */
  132. may_bind_automatically = 1;
  133. }
  134. }
  135. #endif
  136. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  137. static void
  138. _starpu_initialize_workers_cuda_gpuid (struct _starpu_machine_config *config)
  139. {
  140. struct _starpu_machine_topology *topology = &config->topology;
  141. struct starpu_conf *uconf = config->conf;
  142. _starpu_initialize_workers_deviceid (
  143. uconf->use_explicit_workers_cuda_gpuid == 0
  144. ? NULL
  145. : (int *)uconf->workers_cuda_gpuid,
  146. &(config->current_cuda_gpuid),
  147. (int *)topology->workers_cuda_gpuid,
  148. "STARPU_WORKERS_CUDAID",
  149. topology->nhwcudagpus);
  150. }
  151. static inline int
  152. _starpu_get_next_cuda_gpuid (struct _starpu_machine_config *config)
  153. {
  154. unsigned i =
  155. ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
  156. return (int)config->topology.workers_cuda_gpuid[i];
  157. }
  158. #endif
  159. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  160. static void
  161. _starpu_initialize_workers_opencl_gpuid (struct _starpu_machine_config*config)
  162. {
  163. struct _starpu_machine_topology *topology = &config->topology;
  164. struct starpu_conf *uconf = config->conf;
  165. _starpu_initialize_workers_deviceid(
  166. uconf->use_explicit_workers_opencl_gpuid == 0
  167. ? NULL
  168. : (int *)uconf->workers_opencl_gpuid,
  169. &(config->current_opencl_gpuid),
  170. (int *)topology->workers_opencl_gpuid,
  171. "STARPU_WORKERS_OPENCLID",
  172. topology->nhwopenclgpus);
  173. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  174. // Detect devices which are already used with CUDA
  175. {
  176. unsigned tmp[STARPU_NMAXWORKERS];
  177. unsigned nb=0;
  178. int i;
  179. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  180. {
  181. struct handle_entry *entry;
  182. int devid = config->topology.workers_opencl_gpuid[i];
  183. HASH_FIND_INT(devices_using_cuda, &devid, entry);
  184. if (entry == NULL)
  185. {
  186. tmp[nb] = topology->workers_opencl_gpuid[i];
  187. nb++;
  188. }
  189. }
  190. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  191. tmp[i] = -1;
  192. memcpy (topology->workers_opencl_gpuid, tmp,
  193. sizeof(unsigned)*STARPU_NMAXWORKERS);
  194. }
  195. #endif /* STARPU_USE_CUDA */
  196. {
  197. // Detect identical devices
  198. struct handle_entry *devices_already_used = NULL;
  199. unsigned tmp[STARPU_NMAXWORKERS];
  200. unsigned nb=0;
  201. int i;
  202. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  203. {
  204. int devid = topology->workers_opencl_gpuid[i];
  205. struct handle_entry *entry;
  206. HASH_FIND_INT(devices_already_used, &devid, entry);
  207. if (entry == NULL)
  208. {
  209. struct handle_entry *entry2;
  210. entry2 = (struct handle_entry *) malloc(sizeof(*entry2));
  211. STARPU_ASSERT(entry2 != NULL);
  212. entry2->gpuid = devid;
  213. HASH_ADD_INT(devices_already_used, gpuid,
  214. entry2);
  215. tmp[nb] = devid;
  216. nb ++;
  217. }
  218. }
  219. struct handle_entry *entry, *tempo;
  220. HASH_ITER(hh, devices_already_used, entry, tempo)
  221. {
  222. HASH_DEL(devices_already_used, entry);
  223. free(entry);
  224. }
  225. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  226. tmp[i] = -1;
  227. memcpy (topology->workers_opencl_gpuid, tmp,
  228. sizeof(unsigned)*STARPU_NMAXWORKERS);
  229. }
  230. }
  231. static inline int
  232. _starpu_get_next_opencl_gpuid (struct _starpu_machine_config *config)
  233. {
  234. unsigned i =
  235. ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
  236. return (int)config->topology.workers_opencl_gpuid[i];
  237. }
  238. #endif
  239. #if 0
  240. #if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
  241. static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
  242. {
  243. struct _starpu_machine_topology *topology = &config->topology;
  244. struct starpu_conf *uconf = config->conf;
  245. _starpu_initialize_workers_deviceid(
  246. uconf->use_explicit_workers_mic_deviceid == 0
  247. ? NULL
  248. : (int *)config->user_conf->workers_mic_deviceid,
  249. &(config->current_mic_deviceid),
  250. (int *)topology->workers_mic_deviceid,
  251. "STARPU_WORKERS_MICID",
  252. topology->nhwmiccores);
  253. }
  254. #endif
  255. #endif
  256. #ifdef STARPU_USE_SCC
  257. static void _starpu_initialize_workers_scc_deviceid(struct _starpu_machine_config *config)
  258. {
  259. struct _starpu_machine_topology *topology = &config->topology;
  260. struct starpu_conf *uconf = config->conf;
  261. _starpu_initialize_workers_deviceid(
  262. uconf->use_explicit_workers_scc_deviceid == 0
  263. ? NULL
  264. : (int *) uconf->workers_scc_deviceid,
  265. &(config->current_scc_deviceid),
  266. (int *)topology->workers_scc_deviceid,
  267. "STARPU_WORKERS_SCCID",
  268. topology->nhwscc);
  269. }
  270. #endif /* STARPU_USE_SCC */
  271. #if 0
  272. #ifdef STARPU_USE_MIC
  273. static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
  274. {
  275. unsigned i = ((config->current_mic_deviceid++) % config->topology.nmicdevices);
  276. return (int)config->topology.workers_mic_deviceid[i];
  277. }
  278. #endif
  279. #endif
  280. #ifdef STARPU_USE_SCC
  281. static inline int _starpu_get_next_scc_deviceid(struct _starpu_machine_config *config)
  282. {
  283. unsigned i = ((config->current_scc_deviceid++) % config->topology.nsccdevices);
  284. return (int)config->topology.workers_scc_deviceid[i];
  285. }
  286. #endif
  287. #ifdef STARPU_USE_MIC
  288. static void
  289. _starpu_init_mic_topology (struct _starpu_machine_config *config, long mic_idx)
  290. {
  291. /* Discover the topology of the mic node identifier by MIC_IDX. That
  292. * means, make this StarPU instance aware of the number of cores available
  293. * on this MIC device. Update the `nhwmiccores' topology field
  294. * accordingly. */
  295. struct _starpu_machine_topology *topology = &config->topology;
  296. int nbcores;
  297. _starpu_src_common_sink_nbcores (mic_nodes[mic_idx], &nbcores);
  298. topology->nhwmiccores[mic_idx] = nbcores;
  299. }
  300. static int
  301. _starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
  302. COIENGINE *coi_handle, COIPROCESS *coi_process)
  303. {
  304. /* Initialize the MIC node of index MIC_IDX. */
  305. struct starpu_conf *user_conf = config->conf;
  306. char ***argv = _starpu_get_argv();
  307. const char *suffixes[] = {"-mic", "_mic", NULL};
  308. /* Environment variables to send to the Sink, it informs it what kind
  309. * of node it is (architecture and type) as there is no way to discover
  310. * it itself */
  311. char mic_idx_env[32];
  312. sprintf(mic_idx_env, "_STARPU_MIC_DEVID=%d", mic_idx);
  313. /* XXX: this is currently necessary so that the remote process does not
  314. * segfault. */
  315. char nb_mic_env[32];
  316. sprintf(nb_mic_env, "_STARPU_MIC_NB=%d", 2);
  317. const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", mic_idx_env, nb_mic_env, NULL};
  318. char mic_sink_program_path[1024];
  319. /* Let's get the helper program to run on the MIC device */
  320. int mic_file_found =
  321. _starpu_src_common_locate_file (mic_sink_program_path,
  322. getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
  323. getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
  324. user_conf->mic_sink_program_path,
  325. (argv ? (*argv)[0] : NULL),
  326. suffixes);
  327. if (0 != mic_file_found)
  328. {
  329. fprintf(stderr, "No MIC program specified, use the environment\n"
  330. "variable STARPU_MIC_SINK_PROGRAM_NAME or the environment\n"
  331. "or the field 'starpu_conf.mic_sink_program_path'\n"
  332. "to define it.\n");
  333. return -1;
  334. }
  335. COIRESULT res;
  336. /* Let's get the handle which let us manage the remote MIC device */
  337. res = COIEngineGetHandle(COI_ISA_MIC, mic_idx, coi_handle);
  338. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  339. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  340. /* We launch the helper on the MIC device, which will wait for us
  341. * to give it work to do.
  342. * As we will communicate further with the device throught scif we
  343. * don't need to keep the process pointer */
  344. res = COIProcessCreateFromFile(*coi_handle, mic_sink_program_path, 0, NULL, 0,
  345. mic_sink_env, 1, NULL, 0, NULL,
  346. coi_process);
  347. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  348. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  349. /* Let's create the node structure, we'll communicate with the peer
  350. * through scif thanks to it */
  351. mic_nodes[mic_idx] =
  352. _starpu_mp_common_node_create(STARPU_MIC_SOURCE, mic_idx);
  353. return 0;
  354. }
  355. #endif
  356. static void
  357. _starpu_init_topology (struct _starpu_machine_config *config)
  358. {
  359. /* Discover the topology, meaning finding all the available PUs for
  360. the compiled drivers. These drivers MUST have been initialized
  361. before calling this function. The discovered topology is filled in
  362. CONFIG. */
  363. struct _starpu_machine_topology *topology = &config->topology;
  364. if (topology_is_initialized)
  365. return;
  366. topology->nhwcpus = 0;
  367. topology->nhwpus = 0;
  368. #ifndef STARPU_SIMGRID
  369. #ifdef STARPU_HAVE_HWLOC
  370. hwloc_topology_init(&topology->hwtopology);
  371. hwloc_topology_load(topology->hwtopology);
  372. #endif
  373. #endif
  374. #ifdef STARPU_SIMGRID
  375. config->topology.nhwcpus = config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
  376. #elif defined(STARPU_HAVE_HWLOC)
  377. /* Discover the CPUs relying on the hwloc interface and fills CONFIG
  378. * accordingly. */
  379. config->cpu_depth = hwloc_get_type_depth (topology->hwtopology,
  380. HWLOC_OBJ_CORE);
  381. config->pu_depth = hwloc_get_type_depth (topology->hwtopology,
  382. HWLOC_OBJ_PU);
  383. /* Would be very odd */
  384. STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE);
  385. if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN)
  386. {
  387. /* unknown, using logical procesors as fallback */
  388. _STARPU_DISP("Warning: The OS did not report CPU cores. Assuming there is only one hardware thread per core.\n");
  389. config->cpu_depth = hwloc_get_type_depth(topology->hwtopology,
  390. HWLOC_OBJ_PU);
  391. }
  392. topology->nhwcpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
  393. config->cpu_depth);
  394. topology->nhwpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
  395. config->pu_depth);
  396. #elif defined(HAVE_SYSCONF)
  397. /* Discover the CPUs relying on the sysconf(3) function and fills
  398. * CONFIG accordingly. */
  399. config->topology.nhwcpus = config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
  400. #elif defined(_WIN32)
  401. /* Discover the CPUs on Cygwin and MinGW systems. */
  402. SYSTEM_INFO sysinfo;
  403. GetSystemInfo(&sysinfo);
  404. config->topology.nhwcpus = config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
  405. #else
  406. #warning no way to know number of cores, assuming 1
  407. config->topology.nhwcpus = config->topology.nhwpus = 1;
  408. #endif
  409. _starpu_cuda_discover_devices(config);
  410. _starpu_opencl_discover_devices(config);
  411. #ifdef STARPU_USE_SCC
  412. config->topology.nhwscc = _starpu_scc_src_get_device_count();
  413. #endif
  414. topology_is_initialized = 1;
  415. }
  416. /*
  417. * Bind workers on the different processors
  418. */
  419. static void
  420. _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
  421. {
  422. char *strval;
  423. unsigned i;
  424. struct _starpu_machine_topology *topology = &config->topology;
  425. config->current_bindid = 0;
  426. /* conf->workers_bindid indicates the successive logical PU identifier that
  427. * should be used to bind the workers. It should be either filled
  428. * according to the user's explicit parameters (from starpu_conf) or
  429. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  430. * round-robin policy is used to distributed the workers over the
  431. * cores. */
  432. /* what do we use, explicit value, env. variable, or round-robin ? */
  433. if ((strval = getenv("STARPU_WORKERS_CPUID")))
  434. {
  435. /* STARPU_WORKERS_CPUID certainly contains less entries than
  436. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  437. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  438. * 2". */
  439. unsigned wrap = 0;
  440. unsigned number_of_entries = 0;
  441. char *endptr;
  442. /* we use the content of the STARPU_WORKERS_CPUID
  443. * env. variable */
  444. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  445. {
  446. if (!wrap)
  447. {
  448. long int val;
  449. val = strtol(strval, &endptr, 10);
  450. if (endptr != strval)
  451. {
  452. topology->workers_bindid[i] =
  453. (unsigned)(val % topology->nhwpus);
  454. strval = endptr;
  455. }
  456. else
  457. {
  458. /* there must be at least one entry */
  459. STARPU_ASSERT(i != 0);
  460. number_of_entries = i;
  461. /* there is no more values in the
  462. * string */
  463. wrap = 1;
  464. topology->workers_bindid[i] =
  465. topology->workers_bindid[0];
  466. }
  467. }
  468. else
  469. {
  470. topology->workers_bindid[i] =
  471. topology->workers_bindid[i % number_of_entries];
  472. }
  473. }
  474. }
  475. else if (config->conf->use_explicit_workers_bindid)
  476. {
  477. /* we use the explicit value from the user */
  478. memcpy(topology->workers_bindid,
  479. config->conf->workers_bindid,
  480. STARPU_NMAXWORKERS*sizeof(unsigned));
  481. }
  482. else
  483. {
  484. int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
  485. int k;
  486. int nbindids=0;
  487. int nhyperthreads = topology->nhwpus / topology->nhwcpus;
  488. STARPU_ASSERT_MSG(nth_per_core > 0 && nth_per_core <= nhyperthreads , "Incorrect number of hyperthreads");
  489. i = 0; /* PU number currently assigned */
  490. k = 0; /* Number of threads already put on the current core */
  491. while(nbindids < STARPU_NMAXWORKERS)
  492. {
  493. if (k >= nth_per_core)
  494. {
  495. /* We have already put enough workers on this
  496. * core, skip remaining PUs from this core, and
  497. * proceed with next core */
  498. i += nhyperthreads-nth_per_core;
  499. k = 0;
  500. continue;
  501. }
  502. /* Add a worker to this core, by using this logical PU */
  503. topology->workers_bindid[nbindids++] =
  504. (unsigned)(i % topology->nhwpus);
  505. k++;
  506. i++;
  507. }
  508. }
  509. }
  510. /* This function gets the identifier of the next core on which to bind a
  511. * worker. In case a list of preferred cores was specified (logical indexes),
  512. * we look for a an available core among the list if possible, otherwise a
  513. * round-robin policy is used. */
  514. static inline int
  515. _starpu_get_next_bindid (struct _starpu_machine_config *config,
  516. int *preferred_binding, int npreferred)
  517. {
  518. struct _starpu_machine_topology *topology = &config->topology;
  519. unsigned found = 0;
  520. int current_preferred;
  521. int nhyperthreads = topology->nhwpus / topology->nhwcpus;
  522. /* loop over the preference list */
  523. for (current_preferred = 0;
  524. current_preferred < npreferred;
  525. current_preferred++)
  526. {
  527. if (found)
  528. break;
  529. /* Try to get this core */
  530. unsigned requested_core = preferred_binding[current_preferred];
  531. /* can we bind the worker on the preferred core ? */
  532. unsigned ind;
  533. /* Look at the remaining cores to be bound to */
  534. for (ind = config->current_bindid;
  535. ind < topology->nhwpus / nhyperthreads;
  536. ind++)
  537. {
  538. if (topology->workers_bindid[ind] == requested_core * nhyperthreads)
  539. {
  540. /* the cpu is available, we use it ! In order
  541. * to make sure that it will not be used again
  542. * later on, we exchange it with the next bindid we were supposed to use */
  543. topology->workers_bindid[ind] =
  544. topology->workers_bindid[config->current_bindid];
  545. topology->workers_bindid[config->current_bindid] = requested_core * nhyperthreads;
  546. found = 1;
  547. break;
  548. }
  549. }
  550. }
  551. unsigned i = ((config->current_bindid++) % STARPU_NMAXWORKERS);
  552. return (int)topology->workers_bindid[i];
  553. }
  554. unsigned
  555. _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
  556. {
  557. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  558. _starpu_opencl_init();
  559. #endif
  560. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  561. _starpu_init_cuda();
  562. #endif
  563. _starpu_init_topology(config);
  564. return config->topology.nhwcpus;
  565. }
  566. unsigned
  567. _starpu_topology_get_nhwpu (struct _starpu_machine_config *config)
  568. {
  569. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  570. _starpu_opencl_init();
  571. #endif
  572. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  573. _starpu_init_cuda();
  574. #endif
  575. _starpu_init_topology(config);
  576. return config->topology.nhwpus;
  577. }
  578. #ifdef STARPU_USE_MIC
  579. static void
  580. _starpu_init_mic_config (struct _starpu_machine_config *config,
  581. struct starpu_conf *user_conf,
  582. unsigned mic_idx)
  583. {
  584. // Configure the MIC device of index MIC_IDX.
  585. struct _starpu_machine_topology *topology = &config->topology;
  586. topology->nhwmiccores[mic_idx] = 0;
  587. _starpu_init_mic_topology (config, mic_idx);
  588. int nmiccores;
  589. nmiccores = starpu_get_env_number("STARPU_NMIC");
  590. /* STARPU_NMIC is not set. Did the user specify anything ? */
  591. if (nmiccores == -1 && user_conf)
  592. nmiccores = user_conf->nmic;
  593. if (nmiccores != 0)
  594. {
  595. if (nmiccores == -1)
  596. {
  597. /* Nothing was specified, so let's use the number of
  598. * detected mic cores. ! */
  599. nmiccores = topology->nhwmiccores[mic_idx];
  600. }
  601. else
  602. {
  603. if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
  604. {
  605. /* The user requires more MIC devices than there is available */
  606. fprintf(stderr,
  607. "# Warning: %d MIC devices requested. Only %d available.\n",
  608. nmiccores, topology->nhwmiccores[mic_idx]);
  609. nmiccores = topology->nhwmiccores[mic_idx];
  610. }
  611. }
  612. }
  613. topology->nmiccores[mic_idx] = nmiccores;
  614. STARPU_ASSERT_MSG(topology->nmiccores[mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
  615. "topology->nmiccores[mic_idx(%d)] (%d) + topology->nworkers (%d) <= STARPU_NMAXWORKERS (%d)",
  616. mic_idx, topology->nmiccores[mic_idx], topology->nworkers, STARPU_NMAXWORKERS);
  617. /* _starpu_initialize_workers_mic_deviceid (config); */
  618. unsigned miccore_id;
  619. for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
  620. {
  621. int worker_idx = topology->nworkers + miccore_id;
  622. config->workers[worker_idx].arch = STARPU_MIC_WORKER;
  623. config->workers[worker_idx].perf_arch.devices = (struct starpu_perfmodel_device *) malloc(sizeof(struct starpu_perfmodel_device));
  624. config->workers[worker_idx].perf_arch.ndevices = 1;
  625. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_MIC_WORKER;
  626. config->workers[worker_idx].perf_arch.devices[0].devid = mic_idx;
  627. config->workers[worker_idx].perf_arch.devices[0].ncores = 0;
  628. config->workers[worker_idx].devid = mic_idx;
  629. config->workers[worker_idx].subworkerid = miccore_id;
  630. config->workers[worker_idx].worker_mask = STARPU_MIC;
  631. config->worker_mask |= STARPU_MIC;
  632. }
  633. topology->nworkers += topology->nmiccores[mic_idx];
  634. }
  635. #ifdef STARPU_USE_MIC
  636. static COIENGINE handles[2];
  637. static COIPROCESS process[2];
  638. #endif
  639. static void
  640. _starpu_init_mp_config (struct _starpu_machine_config *config,
  641. struct starpu_conf *user_conf)
  642. {
  643. /* Discover and configure the mp topology. That means:
  644. * - discover the number of mp nodes;
  645. * - initialize each discovered node;
  646. * - discover the local topology (number of PUs/devices) of each node;
  647. * - configure the workers accordingly.
  648. */
  649. struct _starpu_machine_topology *topology = &config->topology;
  650. // We currently only support MIC at this level.
  651. #ifdef STARPU_USE_MIC
  652. /* Discover and initialize the number of MIC nodes through the mp
  653. * infrastructure. */
  654. unsigned nhwmicdevices = _starpu_mic_src_get_device_count();
  655. int reqmicdevices = starpu_get_env_number("STARPU_NMICDEVS");
  656. if (-1 == reqmicdevices)
  657. reqmicdevices = nhwmicdevices;
  658. topology->nmicdevices = 0;
  659. unsigned i;
  660. for (i = 0; i < STARPU_MIN (nhwmicdevices, (unsigned) reqmicdevices); i++)
  661. if (0 == _starpu_init_mic_node (config, i, &handles[i], &process[i]))
  662. topology->nmicdevices++;
  663. for (i = 0; i < topology->nmicdevices; i++)
  664. _starpu_init_mic_config (config, user_conf, i);
  665. #endif
  666. }
  667. static void
  668. _starpu_deinit_mic_node (unsigned mic_idx)
  669. {
  670. _starpu_mp_common_send_command(mic_nodes[mic_idx], STARPU_EXIT, NULL, 0);
  671. COIProcessDestroy(process[mic_idx], -1, 0, NULL, NULL);
  672. _starpu_mp_common_node_destroy(mic_nodes[mic_idx]);
  673. }
  674. static void
  675. _starpu_deinit_mp_config (struct _starpu_machine_config *config)
  676. {
  677. struct _starpu_machine_topology *topology = &config->topology;
  678. unsigned i;
  679. for (i = 0; i < topology->nmicdevices; i++)
  680. _starpu_deinit_mic_node (i);
  681. _starpu_mic_clear_kernels();
  682. }
  683. #endif
  684. static int
  685. _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
  686. {
  687. int i;
  688. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  689. config->workers[i].workerid = i;
  690. struct _starpu_machine_topology *topology = &config->topology;
  691. topology->nworkers = 0;
  692. topology->ncombinedworkers = 0;
  693. topology->nsched_ctxs = 0;
  694. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  695. _starpu_opencl_init();
  696. #endif
  697. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  698. _starpu_init_cuda();
  699. #endif
  700. _starpu_init_topology(config);
  701. _starpu_initialize_workers_bindid(config);
  702. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  703. int ncuda = config->conf->ncuda;
  704. int nworker_per_cuda = starpu_get_env_number_default("STARPU_NWORKER_PER_CUDA", 1);
  705. STARPU_ASSERT_MSG(nworker_per_cuda > 0, "STARPU_NWORKER_PER_CUDA has to be > 0");
  706. #ifndef STARPU_NON_BLOCKING_DRIVERS
  707. if (nworker_per_cuda > 1)
  708. {
  709. _STARPU_DISP("Warning: reducing STARPU_NWORKER_PER_CUDA to 1 because blocking drivers are enabled\n");
  710. nworker_per_cuda = 1;
  711. }
  712. #endif
  713. if (ncuda != 0)
  714. {
  715. /* The user did not disable CUDA. We need to initialize CUDA
  716. * early to count the number of devices */
  717. _starpu_init_cuda();
  718. int nb_devices = _starpu_get_cuda_device_count();
  719. if (ncuda == -1)
  720. {
  721. /* Nothing was specified, so let's choose ! */
  722. ncuda = nb_devices;
  723. }
  724. else
  725. {
  726. if (ncuda > nb_devices)
  727. {
  728. /* The user requires more CUDA devices than
  729. * there is available */
  730. _STARPU_DISP("Warning: %d CUDA devices requested. Only %d available.\n", ncuda, nb_devices);
  731. ncuda = nb_devices;
  732. }
  733. }
  734. }
  735. /* Now we know how many CUDA devices will be used */
  736. topology->ncudagpus = ncuda;
  737. STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
  738. _starpu_initialize_workers_cuda_gpuid(config);
  739. unsigned cudagpu;
  740. for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
  741. {
  742. int devid = _starpu_get_next_cuda_gpuid(config);
  743. for (i = 0; i < nworker_per_cuda; i++)
  744. {
  745. int worker_idx = topology->nworkers + cudagpu * nworker_per_cuda + i;
  746. config->workers[worker_idx].arch = STARPU_CUDA_WORKER;
  747. config->workers[worker_idx].perf_arch.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
  748. config->workers[worker_idx].perf_arch.ndevices = 1;
  749. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CUDA_WORKER;
  750. config->workers[worker_idx].perf_arch.devices[0].devid = devid;
  751. // TODO: fix perfmodels etc.
  752. //config->workers[worker_idx].perf_arch.ncore = nworker_per_cuda - 1;
  753. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  754. config->workers[worker_idx].devid = devid;
  755. config->workers[worker_idx].subworkerid = i;
  756. config->workers[worker_idx].worker_mask = STARPU_CUDA;
  757. config->worker_mask |= STARPU_CUDA;
  758. struct handle_entry *entry;
  759. entry = (struct handle_entry *) malloc(sizeof(*entry));
  760. STARPU_ASSERT(entry != NULL);
  761. entry->gpuid = devid;
  762. HASH_ADD_INT(devices_using_cuda, gpuid, entry);
  763. }
  764. }
  765. topology->nworkers += topology->ncudagpus * nworker_per_cuda;
  766. #endif
  767. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  768. int nopencl = config->conf->nopencl;
  769. if (nopencl != 0)
  770. {
  771. /* The user did not disable OPENCL. We need to initialize
  772. * OpenCL early to count the number of devices */
  773. _starpu_opencl_init();
  774. int nb_devices;
  775. nb_devices = _starpu_opencl_get_device_count();
  776. if (nopencl == -1)
  777. {
  778. /* Nothing was specified, so let's choose ! */
  779. nopencl = nb_devices;
  780. if (nopencl > STARPU_MAXOPENCLDEVS)
  781. {
  782. _STARPU_DISP("Warning: %d OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldadev=xxx to update the maximum value of supported OpenCL devices.\n", nb_devices, STARPU_MAXOPENCLDEVS);
  783. nopencl = STARPU_MAXOPENCLDEVS;
  784. }
  785. }
  786. else
  787. {
  788. /* Let's make sure this value is OK. */
  789. if (nopencl > nb_devices)
  790. {
  791. /* The user requires more OpenCL devices than
  792. * there is available */
  793. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d available.\n", nopencl, nb_devices);
  794. nopencl = nb_devices;
  795. }
  796. /* Let's make sure this value is OK. */
  797. if (nopencl > STARPU_MAXOPENCLDEVS)
  798. {
  799. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices.\n", nopencl, STARPU_MAXOPENCLDEVS);
  800. nopencl = STARPU_MAXOPENCLDEVS;
  801. }
  802. }
  803. }
  804. topology->nopenclgpus = nopencl;
  805. STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
  806. _starpu_initialize_workers_opencl_gpuid(config);
  807. unsigned openclgpu;
  808. for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
  809. {
  810. int worker_idx = topology->nworkers + openclgpu;
  811. int devid = _starpu_get_next_opencl_gpuid(config);
  812. if (devid == -1)
  813. { // There is no more devices left
  814. topology->nopenclgpus = openclgpu;
  815. break;
  816. }
  817. config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
  818. config->workers[worker_idx].perf_arch.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
  819. config->workers[worker_idx].perf_arch.ndevices = 1;
  820. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_OPENCL_WORKER;
  821. config->workers[worker_idx].perf_arch.devices[0].devid = devid;
  822. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  823. config->workers[worker_idx].subworkerid = 0;
  824. config->workers[worker_idx].devid = devid;
  825. config->workers[worker_idx].worker_mask = STARPU_OPENCL;
  826. config->worker_mask |= STARPU_OPENCL;
  827. }
  828. topology->nworkers += topology->nopenclgpus;
  829. #endif
  830. #ifdef STARPU_USE_SCC
  831. int nscc = config->conf->nscc;
  832. unsigned nb_scc_nodes = _starpu_scc_src_get_device_count();
  833. if (nscc != 0)
  834. {
  835. /* The user did not disable SCC. We need to count
  836. * the number of devices */
  837. int nb_devices = nb_scc_nodes;
  838. if (nscc == -1)
  839. {
  840. /* Nothing was specified, so let's choose ! */
  841. nscc = nb_devices;
  842. if (nscc > STARPU_MAXSCCDEVS)
  843. {
  844. _STARPU_DISP("Warning: %d SCC devices available. Only %d enabled. Use configuration option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nb_devices, STARPU_MAXSCCDEVS);
  845. nscc = STARPU_MAXSCCDEVS;
  846. }
  847. }
  848. else
  849. {
  850. /* Let's make sure this value is OK. */
  851. if (nscc > nb_devices)
  852. {
  853. /* The user requires more SCC devices than there is available */
  854. _STARPU_DISP("Warning: %d SCC devices requested. Only %d available.\n", nscc, nb_devices);
  855. nscc = nb_devices;
  856. }
  857. /* Let's make sure this value is OK. */
  858. if (nscc > STARPU_MAXSCCDEVS)
  859. {
  860. _STARPU_DISP("Warning: %d SCC devices requested. Only %d enabled. Use configure option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nscc, STARPU_MAXSCCDEVS);
  861. nscc = STARPU_MAXSCCDEVS;
  862. }
  863. }
  864. }
  865. /* Now we know how many SCC devices will be used */
  866. topology->nsccdevices = nscc;
  867. STARPU_ASSERT(topology->nsccdevices + topology->nworkers <= STARPU_NMAXWORKERS);
  868. _starpu_initialize_workers_scc_deviceid(config);
  869. unsigned sccdev;
  870. for (sccdev = 0; sccdev < topology->nsccdevices; sccdev++)
  871. {
  872. config->workers[topology->nworkers + sccdev].arch = STARPU_SCC_WORKER;
  873. int devid = _starpu_get_next_scc_deviceid(config);
  874. config->workers[topology->nworkers + sccdev].perf_arch.devices = (struct starpu_perfmodel_device)malloc(sizeof(struct starpu_perfmodel_device));
  875. config->workers[topology->nworkers + sccdev].perf_arch.ndevices = 1;
  876. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].type = STARPU_SCC_WORKER;
  877. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].devid = sccdev;
  878. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].ncore = 1;
  879. config->workers[topology->nworkers + sccdev].subworkerid = 0;
  880. config->workers[topology->nworkers + sccdev].devid = devid;
  881. config->workers[topology->nworkers + sccdev].worker_mask = STARPU_SCC;
  882. config->worker_mask |= STARPU_SCC;
  883. }
  884. for (; sccdev < nb_scc_nodes; ++sccdev)
  885. _starpu_scc_exit_useless_node(sccdev);
  886. topology->nworkers += topology->nsccdevices;
  887. #endif /* STARPU_USE_SCC */
  888. /* Unless not requested, we need to complete configuration with the
  889. * ones of the mp nodes. */
  890. #ifdef STARPU_USE_MIC
  891. if (! no_mp_config)
  892. _starpu_init_mp_config (config, config->conf);
  893. #endif
  894. /* we put the CPU section after the accelerator : in case there was an
  895. * accelerator found, we devote one cpu */
  896. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  897. int ncpu = config->conf->ncpus;
  898. if (ncpu != 0)
  899. {
  900. if (ncpu == -1)
  901. {
  902. unsigned mic_busy_cpus = 0;
  903. unsigned j = 0;
  904. for (j = 0; j < STARPU_MAXMICDEVS; j++)
  905. mic_busy_cpus += (topology->nmiccores[j] ? 1 : 0);
  906. unsigned already_busy_cpus = mic_busy_cpus + topology->ncudagpus
  907. + topology->nopenclgpus + topology->nsccdevices;
  908. long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
  909. if (avail_cpus < 0)
  910. avail_cpus = 0;
  911. int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
  912. avail_cpus *= nth_per_core;
  913. ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
  914. }
  915. else
  916. {
  917. if (ncpu > STARPU_MAXCPUS)
  918. {
  919. _STARPU_DISP("Warning: %d CPU devices requested. Only %d enabled. Use configure option --enable-maxcpus=xxx to update the maximum value of supported CPU devices.\n", ncpu, STARPU_MAXCPUS);
  920. ncpu = STARPU_MAXCPUS;
  921. }
  922. }
  923. }
  924. topology->ncpus = ncpu;
  925. STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
  926. unsigned cpu;
  927. for (cpu = 0; cpu < topology->ncpus; cpu++)
  928. {
  929. int worker_idx = topology->nworkers + cpu;
  930. config->workers[worker_idx].arch = STARPU_CPU_WORKER;
  931. config->workers[worker_idx].perf_arch.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
  932. config->workers[worker_idx].perf_arch.ndevices = 1;
  933. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CPU_WORKER;
  934. config->workers[worker_idx].perf_arch.devices[0].devid = 0;
  935. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  936. config->workers[worker_idx].subworkerid = 0;
  937. config->workers[worker_idx].devid = cpu;
  938. config->workers[worker_idx].worker_mask = STARPU_CPU;
  939. config->worker_mask |= STARPU_CPU;
  940. }
  941. topology->nworkers += topology->ncpus;
  942. #endif
  943. if (topology->nworkers == 0)
  944. {
  945. _STARPU_DEBUG("No worker found, aborting ...\n");
  946. return -ENODEV;
  947. }
  948. return 0;
  949. }
  950. void _starpu_destroy_machine_config(struct _starpu_machine_config *config)
  951. {
  952. _starpu_close_debug_logfile();
  953. unsigned worker;
  954. for (worker = 0; worker < config->topology.nworkers; worker++)
  955. {
  956. struct _starpu_worker *workerarg = &config->workers[worker];
  957. free(workerarg->perf_arch.devices);
  958. #ifdef STARPU_HAVE_HWLOC
  959. hwloc_bitmap_free(workerarg->hwloc_cpu_set);
  960. if (workerarg->bindid != -1)
  961. {
  962. hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
  963. config->pu_depth,
  964. workerarg->bindid);
  965. if (worker_obj->userdata)
  966. {
  967. _starpu_worker_list_delete(worker_obj->userdata);
  968. worker_obj->userdata = NULL;
  969. }
  970. }
  971. #endif
  972. }
  973. unsigned combined_worker_id;
  974. for(combined_worker_id=0 ; combined_worker_id < config->topology.ncombinedworkers ; combined_worker_id++)
  975. {
  976. struct _starpu_combined_worker *combined_worker = &config->combined_workers[combined_worker_id];
  977. free(combined_worker->perf_arch.devices);
  978. }
  979. #ifdef STARPU_HAVE_HWLOC
  980. hwloc_topology_destroy(config->topology.hwtopology);
  981. #endif
  982. topology_is_initialized = 0;
  983. #ifdef STARPU_USE_CUDA
  984. struct handle_entry *entry, *tmp;
  985. HASH_ITER(hh, devices_using_cuda, entry, tmp)
  986. {
  987. HASH_DEL(devices_using_cuda, entry);
  988. free(entry);
  989. }
  990. devices_using_cuda = NULL;
  991. #endif
  992. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  993. may_bind_automatically = 0;
  994. #endif
  995. }
  996. void
  997. _starpu_bind_thread_on_cpu (
  998. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  999. int cpuid STARPU_ATTRIBUTE_UNUSED)
  1000. {
  1001. #ifdef STARPU_SIMGRID
  1002. return;
  1003. #else
  1004. if (starpu_get_env_number("STARPU_WORKERS_NOBIND") > 0)
  1005. return;
  1006. if (cpuid < 0)
  1007. return;
  1008. #ifdef STARPU_HAVE_HWLOC
  1009. const struct hwloc_topology_support *support;
  1010. #ifdef STARPU_USE_OPENCL
  1011. _starpu_opencl_init();
  1012. #endif
  1013. #ifdef STARPU_USE_CUDA
  1014. _starpu_init_cuda();
  1015. #endif
  1016. _starpu_init_topology(config);
  1017. support = hwloc_topology_get_support (config->topology.hwtopology);
  1018. if (support->cpubind->set_thisthread_cpubind)
  1019. {
  1020. hwloc_obj_t obj =
  1021. hwloc_get_obj_by_depth (config->topology.hwtopology,
  1022. config->pu_depth, cpuid);
  1023. hwloc_bitmap_t set = obj->cpuset;
  1024. int ret;
  1025. hwloc_bitmap_singlify(set);
  1026. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  1027. HWLOC_CPUBIND_THREAD);
  1028. if (ret)
  1029. {
  1030. perror("hwloc_set_cpubind");
  1031. STARPU_ABORT();
  1032. }
  1033. }
  1034. #elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__)
  1035. int ret;
  1036. /* fix the thread on the correct cpu */
  1037. cpu_set_t aff_mask;
  1038. CPU_ZERO(&aff_mask);
  1039. CPU_SET(cpuid, &aff_mask);
  1040. starpu_pthread_t self = pthread_self();
  1041. ret = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
  1042. if (ret)
  1043. {
  1044. perror("binding thread");
  1045. STARPU_ABORT();
  1046. }
  1047. #elif defined(_WIN32)
  1048. DWORD mask = 1 << cpuid;
  1049. if (!SetThreadAffinityMask(GetCurrentThread(), mask))
  1050. {
  1051. _STARPU_ERROR("SetThreadMaskAffinity(%lx) failed\n", mask);
  1052. }
  1053. #else
  1054. #warning no CPU binding support
  1055. #endif
  1056. #endif
  1057. }
  1058. void
  1059. _starpu_bind_thread_on_cpus (
  1060. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  1061. struct _starpu_combined_worker *combined_worker STARPU_ATTRIBUTE_UNUSED)
  1062. {
  1063. #ifdef STARPU_SIMGRID
  1064. return;
  1065. #endif
  1066. #ifdef STARPU_HAVE_HWLOC
  1067. const struct hwloc_topology_support *support;
  1068. #ifdef STARPU_USE_OPENC
  1069. _starpu_opencl_init();
  1070. #endif
  1071. #ifdef STARPU_USE_CUDA
  1072. _starpu_init_cuda();
  1073. #endif
  1074. _starpu_init_topology(config);
  1075. support = hwloc_topology_get_support(config->topology.hwtopology);
  1076. if (support->cpubind->set_thisthread_cpubind)
  1077. {
  1078. hwloc_bitmap_t set = combined_worker->hwloc_cpu_set;
  1079. int ret;
  1080. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  1081. HWLOC_CPUBIND_THREAD);
  1082. if (ret)
  1083. {
  1084. perror("binding thread");
  1085. STARPU_ABORT();
  1086. }
  1087. }
  1088. #else
  1089. #ifdef __GLIBC__
  1090. sched_setaffinity(0,sizeof(combined_worker->cpu_set),&combined_worker->cpu_set);
  1091. #else
  1092. # warning no parallel worker CPU binding support
  1093. #endif
  1094. #endif
  1095. }
  1096. static void
  1097. _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
  1098. {
  1099. /* launch one thread per CPU */
  1100. unsigned ram_memory_node;
  1101. /* note that even if the CPU cpu are not used, we always have a RAM
  1102. * node */
  1103. /* TODO : support NUMA ;) */
  1104. ram_memory_node = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
  1105. STARPU_ASSERT(ram_memory_node == STARPU_MAIN_RAM);
  1106. #ifdef STARPU_SIMGRID
  1107. char name[16];
  1108. msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
  1109. STARPU_ASSERT(host);
  1110. _starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
  1111. #endif
  1112. /* We will store all the busid of the different (src, dst)
  1113. * combinations in a matrix which we initialize here. */
  1114. _starpu_initialize_busid_matrix();
  1115. /* Each device is initialized,
  1116. * giving it a memory node and a core bind id.
  1117. */
  1118. /* TODO: STARPU_MAXNUMANODES */
  1119. unsigned numa_init[1] = { 1 };
  1120. unsigned numa_memory_nodes[1] = { ram_memory_node };
  1121. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1122. unsigned cuda_init[STARPU_MAXCUDADEVS] = { };
  1123. unsigned cuda_memory_nodes[STARPU_MAXCUDADEVS];
  1124. #ifndef STARPU_SIMGRID
  1125. unsigned cuda_bindid[STARPU_MAXCUDADEVS];
  1126. #endif
  1127. #endif
  1128. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1129. unsigned opencl_init[STARPU_MAXOPENCLDEVS] = { };
  1130. unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS];
  1131. #ifndef STARPU_SIMGRID
  1132. unsigned opencl_bindid[STARPU_MAXOPENCLDEVS];
  1133. #endif
  1134. #endif
  1135. #ifdef STARPU_USE_MIC
  1136. unsigned mic_init[STARPU_MAXMICDEVS] = { };
  1137. unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
  1138. #ifndef STARPU_SIGMRID
  1139. unsigned mic_bindid[STARPU_MAXMICDEVS];
  1140. #endif
  1141. #endif
  1142. unsigned worker;
  1143. for (worker = 0; worker < config->topology.nworkers; worker++)
  1144. {
  1145. unsigned memory_node = -1;
  1146. struct _starpu_worker *workerarg = &config->workers[worker];
  1147. unsigned devid = workerarg->devid;
  1148. #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC)) && !defined(STARPU_SIMGRID)
  1149. /* Perhaps the worker has some "favourite" bindings */
  1150. int *preferred_binding = NULL;
  1151. int npreferred = 0;
  1152. #endif
  1153. /* select the memory node that contains worker's memory */
  1154. switch (workerarg->arch)
  1155. {
  1156. case STARPU_CPU_WORKER:
  1157. {
  1158. /* TODO: NUMA */
  1159. int numaid = 0;
  1160. /* "dedicate" a cpu core to that worker */
  1161. if (numa_init[numaid])
  1162. {
  1163. memory_node = numa_memory_nodes[numaid];
  1164. }
  1165. else
  1166. {
  1167. numa_init[numaid] = 1;
  1168. memory_node = numa_memory_nodes[numaid] = _starpu_memory_node_register(STARPU_CPU_RAM, numaid);
  1169. #ifdef STARPU_SIMGRID
  1170. snprintf(name, sizeof(name), "RAM%d", numaid);
  1171. host = _starpu_simgrid_get_host_by_name(name);
  1172. STARPU_ASSERT(host);
  1173. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1174. #endif
  1175. }
  1176. workerarg->bindid = _starpu_get_next_bindid(config, NULL, 0);
  1177. _starpu_memory_node_add_nworkers(memory_node);
  1178. break;
  1179. }
  1180. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1181. case STARPU_CUDA_WORKER:
  1182. #ifndef STARPU_SIMGRID
  1183. if (may_bind_automatically)
  1184. {
  1185. /* StarPU is allowed to bind threads automatically */
  1186. preferred_binding = _starpu_get_cuda_affinity_vector(devid);
  1187. npreferred = config->topology.nhwpus;
  1188. }
  1189. #endif /* SIMGRID */
  1190. if (cuda_init[devid])
  1191. {
  1192. memory_node = cuda_memory_nodes[devid];
  1193. #ifndef STARPU_SIMGRID
  1194. workerarg->bindid = cuda_bindid[devid];
  1195. #endif /* SIMGRID */
  1196. }
  1197. else
  1198. {
  1199. cuda_init[devid] = 1;
  1200. #ifndef STARPU_SIMGRID
  1201. workerarg->bindid = cuda_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1202. #endif /* SIMGRID */
  1203. memory_node = cuda_memory_nodes[devid] = _starpu_memory_node_register(STARPU_CUDA_RAM, devid);
  1204. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1205. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1206. #ifdef STARPU_SIMGRID
  1207. const char* cuda_memcpy_peer;
  1208. snprintf(name, sizeof(name), "CUDA%d", devid);
  1209. host = _starpu_simgrid_get_host_by_name(name);
  1210. STARPU_ASSERT(host);
  1211. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1212. cuda_memcpy_peer = MSG_host_get_property_value(host, "memcpy_peer");
  1213. #endif /* SIMGRID */
  1214. if (
  1215. #ifdef STARPU_SIMGRID
  1216. cuda_memcpy_peer && atoll(cuda_memcpy_peer)
  1217. #elif defined(HAVE_CUDA_MEMCPY_PEER)
  1218. 1
  1219. #else /* MEMCPY_PEER */
  1220. 0
  1221. #endif /* MEMCPY_PEER */
  1222. )
  1223. {
  1224. unsigned worker2;
  1225. for (worker2 = 0; worker2 < worker; worker2++)
  1226. {
  1227. struct _starpu_worker *workerarg2 = &config->workers[worker2];
  1228. if (workerarg2->arch == STARPU_CUDA_WORKER)
  1229. {
  1230. unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
  1231. _starpu_register_bus(memory_node2, memory_node);
  1232. _starpu_register_bus(memory_node, memory_node2);
  1233. }
  1234. }
  1235. }
  1236. }
  1237. _starpu_memory_node_add_nworkers(memory_node);
  1238. break;
  1239. #endif
  1240. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1241. case STARPU_OPENCL_WORKER:
  1242. #ifndef STARPU_SIMGRID
  1243. if (may_bind_automatically)
  1244. {
  1245. /* StarPU is allowed to bind threads automatically */
  1246. preferred_binding = _starpu_get_opencl_affinity_vector(devid);
  1247. npreferred = config->topology.nhwpus;
  1248. }
  1249. #endif /* SIMGRID */
  1250. if (opencl_init[devid])
  1251. {
  1252. memory_node = opencl_memory_nodes[devid];
  1253. #ifndef STARPU_SIMGRID
  1254. workerarg->bindid = opencl_bindid[devid];
  1255. #endif /* SIMGRID */
  1256. }
  1257. else
  1258. {
  1259. opencl_init[devid] = 1;
  1260. #ifndef STARPU_SIMGRID
  1261. workerarg->bindid = opencl_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1262. #endif /* SIMGRID */
  1263. memory_node = opencl_memory_nodes[devid] = _starpu_memory_node_register(STARPU_OPENCL_RAM, devid);
  1264. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1265. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1266. #ifdef STARPU_SIMGRID
  1267. snprintf(name, sizeof(name), "OpenCL%d", devid);
  1268. host = _starpu_simgrid_get_host_by_name(name);
  1269. STARPU_ASSERT(host);
  1270. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1271. #endif /* SIMGRID */
  1272. }
  1273. _starpu_memory_node_add_nworkers(memory_node);
  1274. break;
  1275. #endif
  1276. #ifdef STARPU_USE_MIC
  1277. case STARPU_MIC_WORKER:
  1278. if (mic_init[devid])
  1279. {
  1280. memory_node = mic_memory_nodes[devid];
  1281. }
  1282. else
  1283. {
  1284. mic_init[devid] = 1;
  1285. #ifndef STARPU_SIMGRID
  1286. /* TODO */
  1287. //if (may_bind_automatically)
  1288. //{
  1289. // /* StarPU is allowed to bind threads automatically */
  1290. // preferred_binding = _starpu_get_mic_affinity_vector(devid);
  1291. // npreferred = config->topology.nhwpus;
  1292. //}
  1293. mic_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1294. #endif /* SIMGRID */
  1295. memory_node = mic_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MIC_RAM, devid);
  1296. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1297. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1298. }
  1299. workerarg->bindid = mic_bindid[devid];
  1300. _starpu_memory_node_add_nworkers(memory_node);
  1301. break;
  1302. #endif /* STARPU_USE_MIC */
  1303. #ifdef STARPU_USE_SCC
  1304. case STARPU_SCC_WORKER:
  1305. {
  1306. /* Node 0 represents the SCC shared memory when we're on SCC. */
  1307. struct _starpu_memory_node_descr *descr = _starpu_memory_node_get_description();
  1308. descr->nodes[ram_memory_node] = STARPU_SCC_SHM;
  1309. memory_node = ram_memory_node;
  1310. _starpu_memory_node_add_nworkers(memory_node);
  1311. }
  1312. break;
  1313. #endif
  1314. default:
  1315. STARPU_ABORT();
  1316. }
  1317. workerarg->memory_node = memory_node;
  1318. _STARPU_DEBUG("worker %d type %d devid %d bound to cpu %d, STARPU memory node %d\n", worker, workerarg->arch, devid, workerarg->bindid, memory_node);
  1319. #ifdef __GLIBC__
  1320. if (workerarg->bindid != -1)
  1321. {
  1322. /* Save the initial cpuset */
  1323. CPU_ZERO(&workerarg->cpu_set);
  1324. CPU_SET(workerarg->bindid, &workerarg->cpu_set);
  1325. }
  1326. #endif /* __GLIBC__ */
  1327. #ifdef STARPU_HAVE_HWLOC
  1328. if (workerarg->bindid == -1)
  1329. {
  1330. workerarg->hwloc_cpu_set = hwloc_bitmap_alloc();
  1331. }
  1332. else
  1333. {
  1334. /* Put the worker descriptor in the userdata field of the
  1335. * hwloc object describing the CPU */
  1336. hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
  1337. config->pu_depth,
  1338. workerarg->bindid);
  1339. if (worker_obj->userdata == NULL)
  1340. {
  1341. worker_obj->userdata = _starpu_worker_list_new();
  1342. }
  1343. _starpu_worker_list_push_front(worker_obj->userdata, workerarg);
  1344. /* Clear the cpu set and set the cpu */
  1345. workerarg->hwloc_cpu_set = hwloc_bitmap_dup (worker_obj->cpuset);
  1346. }
  1347. #endif
  1348. }
  1349. }
  1350. int
  1351. _starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
  1352. {
  1353. int ret;
  1354. unsigned i;
  1355. ret = _starpu_init_machine_config(config, no_mp_config);
  1356. if (ret)
  1357. return ret;
  1358. /* for the data management library */
  1359. _starpu_memory_nodes_init();
  1360. _starpu_init_workers_binding(config, no_mp_config);
  1361. config->cpus_nodeid = -1;
  1362. config->cuda_nodeid = -1;
  1363. config->opencl_nodeid = -1;
  1364. config->mic_nodeid = -1;
  1365. config->scc_nodeid = -1;
  1366. for (i = 0; i < starpu_worker_get_count(); i++)
  1367. {
  1368. switch (starpu_worker_get_type(i))
  1369. {
  1370. case STARPU_CPU_WORKER:
  1371. if (config->cpus_nodeid == -1)
  1372. config->cpus_nodeid = starpu_worker_get_memory_node(i);
  1373. else if (config->cpus_nodeid != (int) starpu_worker_get_memory_node(i))
  1374. config->cpus_nodeid = -2;
  1375. break;
  1376. case STARPU_CUDA_WORKER:
  1377. if (config->cuda_nodeid == -1)
  1378. config->cuda_nodeid = starpu_worker_get_memory_node(i);
  1379. else if (config->cuda_nodeid != (int) starpu_worker_get_memory_node(i))
  1380. config->cuda_nodeid = -2;
  1381. break;
  1382. case STARPU_OPENCL_WORKER:
  1383. if (config->opencl_nodeid == -1)
  1384. config->opencl_nodeid = starpu_worker_get_memory_node(i);
  1385. else if (config->opencl_nodeid != (int) starpu_worker_get_memory_node(i))
  1386. config->opencl_nodeid = -2;
  1387. break;
  1388. case STARPU_MIC_WORKER:
  1389. if (config->mic_nodeid == -1)
  1390. config->mic_nodeid = starpu_worker_get_memory_node(i);
  1391. else if (config->mic_nodeid != (int) starpu_worker_get_memory_node(i))
  1392. config->mic_nodeid = -2;
  1393. break;
  1394. case STARPU_SCC_WORKER:
  1395. if (config->scc_nodeid == -1)
  1396. config->scc_nodeid = starpu_worker_get_memory_node(i);
  1397. else if (config->scc_nodeid != (int) starpu_worker_get_memory_node(i))
  1398. config->scc_nodeid = -2;
  1399. break;
  1400. case STARPU_ANY_WORKER:
  1401. STARPU_ASSERT(0);
  1402. }
  1403. }
  1404. return 0;
  1405. }
  1406. void _starpu_destroy_topology(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED)
  1407. {
  1408. #ifdef STARPU_USE_MIC
  1409. _starpu_deinit_mp_config(config);
  1410. #endif
  1411. /* cleanup StarPU internal data structures */
  1412. _starpu_memory_nodes_deinit();
  1413. _starpu_destroy_machine_config(config);
  1414. }
  1415. void
  1416. starpu_topology_print (FILE *output)
  1417. {
  1418. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1419. struct _starpu_machine_topology *topology = &config->topology;
  1420. unsigned pu;
  1421. unsigned worker;
  1422. unsigned nworkers = starpu_worker_get_count();
  1423. unsigned ncombinedworkers = topology->ncombinedworkers;
  1424. unsigned nthreads_per_core = topology->nhwpus / topology->nhwcpus;
  1425. for (pu = 0; pu < topology->nhwpus; pu++)
  1426. {
  1427. if ((pu % nthreads_per_core) == 0)
  1428. fprintf(output, "core %u", pu / nthreads_per_core);
  1429. fprintf(output, "\tPU %u\t", pu);
  1430. for (worker = 0;
  1431. worker < nworkers + ncombinedworkers;
  1432. worker++)
  1433. {
  1434. if (worker < nworkers)
  1435. {
  1436. struct _starpu_worker *workerarg = &config->workers[worker];
  1437. if (workerarg->bindid == (int) pu)
  1438. {
  1439. char name[256];
  1440. starpu_worker_get_name (worker, name,
  1441. sizeof(name));
  1442. fprintf(output, "%s\t", name);
  1443. }
  1444. }
  1445. else
  1446. {
  1447. int worker_size, i;
  1448. int *combined_workerid;
  1449. starpu_combined_worker_get_description(worker, &worker_size, &combined_workerid);
  1450. for (i = 0; i < worker_size; i++)
  1451. {
  1452. if (topology->workers_bindid[combined_workerid[i]] == pu)
  1453. fprintf(output, "comb %u\t", worker-nworkers);
  1454. }
  1455. }
  1456. }
  1457. fprintf(output, "\n");
  1458. }
  1459. }