topology.c 70 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2017 Université de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 CNRS
  5. * Copyright (C) 2011, 2016, 2017 INRIA
  6. * Copyright (C) 2016 Uppsala University
  7. *
  8. * StarPU is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU Lesser General Public License as published by
  10. * the Free Software Foundation; either version 2.1 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * StarPU is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. *
  17. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. */
  19. #include <stdlib.h>
  20. #include <stdio.h>
  21. #include <common/config.h>
  22. #include <core/workers.h>
  23. #include <core/debug.h>
  24. #include <core/topology.h>
  25. #include <drivers/cuda/driver_cuda.h>
  26. #include <drivers/mic/driver_mic_source.h>
  27. #include <drivers/scc/driver_scc_source.h>
  28. #include <drivers/mpi/driver_mpi_source.h>
  29. #include <drivers/mpi/driver_mpi_common.h>
  30. #include <drivers/mp_common/source_common.h>
  31. #include <drivers/opencl/driver_opencl.h>
  32. #include <profiling/profiling.h>
  33. #include <datawizard/datastats.h>
  34. #include <datawizard/memory_nodes.h>
  35. #include <common/uthash.h>
  36. #ifdef STARPU_HAVE_HWLOC
  37. #include <hwloc.h>
  38. #ifndef HWLOC_API_VERSION
  39. #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
  40. #endif
  41. #endif
  42. #ifdef STARPU_HAVE_WINDOWS
  43. #include <windows.h>
  44. #endif
  45. #ifdef STARPU_SIMGRID
  46. #include <core/simgrid.h>
  47. #endif
  48. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  49. #include <hwloc/cuda.h>
  50. #endif
  51. static unsigned topology_is_initialized = 0;
  52. static int nobind;
  53. /* For checking whether two workers share the same PU, indexed by PU number */
  54. static int cpu_worker[STARPU_MAXCPUS];
  55. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  56. struct handle_entry
  57. {
  58. UT_hash_handle hh;
  59. unsigned gpuid;
  60. };
  61. # if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  62. /* Entry in the `devices_using_cuda' hash table. */
  63. static struct handle_entry *devices_using_cuda;
  64. # endif
  65. static unsigned may_bind_automatically[STARPU_NARCH] = { 0 };
  66. #endif // defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  67. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  68. static struct _starpu_worker_set cuda_worker_set[STARPU_MAXCUDADEVS];
  69. #endif
  70. #ifdef STARPU_USE_MIC
  71. static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
  72. #endif
  73. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  74. struct _starpu_worker_set mpi_worker_set[STARPU_MAXMPIDEVS];
  75. #endif
  76. struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d)
  77. {
  78. unsigned nworkers = starpu_worker_get_count();
  79. unsigned workerid;
  80. for (workerid = 0; workerid < nworkers; workerid++)
  81. {
  82. if (starpu_worker_get_type(workerid) == d->type)
  83. {
  84. struct _starpu_worker *worker;
  85. worker = _starpu_get_worker_struct(workerid);
  86. switch (d->type)
  87. {
  88. #ifdef STARPU_USE_CPU
  89. case STARPU_CPU_WORKER:
  90. if (worker->devid == d->id.cpu_id)
  91. return worker;
  92. break;
  93. #endif
  94. #ifdef STARPU_USE_OPENCL
  95. case STARPU_OPENCL_WORKER:
  96. {
  97. cl_device_id device;
  98. starpu_opencl_get_device(worker->devid, &device);
  99. if (device == d->id.opencl_id)
  100. return worker;
  101. break;
  102. }
  103. #endif
  104. #ifdef STARPU_USE_CUDA
  105. case STARPU_CUDA_WORKER:
  106. {
  107. if (worker->devid == d->id.cuda_id)
  108. return worker;
  109. break;
  110. }
  111. #endif
  112. default:
  113. (void) worker;
  114. _STARPU_DEBUG("Invalid device type\n");
  115. return NULL;
  116. }
  117. }
  118. }
  119. return NULL;
  120. }
  121. /*
  122. * Discover the topology of the machine
  123. */
  124. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  125. static void
  126. _starpu_initialize_workers_deviceid (int *explicit_workers_gpuid,
  127. int *current, int *workers_gpuid,
  128. const char *varname, unsigned nhwgpus,
  129. enum starpu_worker_archtype type)
  130. {
  131. char *strval;
  132. unsigned i;
  133. *current = 0;
  134. /* conf->workers_gpuid indicates the successive GPU identifier that
  135. * should be used to bind the workers. It should be either filled
  136. * according to the user's explicit parameters (from starpu_conf) or
  137. * according to the STARPU_WORKERS_CUDAID env. variable. Otherwise, a
  138. * round-robin policy is used to distributed the workers over the
  139. * cores. */
  140. /* what do we use, explicit value, env. variable, or round-robin ? */
  141. if ((strval = starpu_getenv(varname)))
  142. {
  143. /* STARPU_WORKERS_CUDAID certainly contains less entries than
  144. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  145. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  146. * 2". */
  147. unsigned wrap = 0;
  148. unsigned number_of_entries = 0;
  149. char *endptr;
  150. /* we use the content of the STARPU_WORKERS_CUDAID
  151. * env. variable */
  152. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  153. {
  154. if (!wrap)
  155. {
  156. long int val;
  157. val = strtol(strval, &endptr, 10);
  158. if (endptr != strval)
  159. {
  160. workers_gpuid[i] = (unsigned)val;
  161. strval = endptr;
  162. }
  163. else
  164. {
  165. /* there must be at least one entry */
  166. STARPU_ASSERT(i != 0);
  167. number_of_entries = i;
  168. /* there is no more values in the
  169. * string */
  170. wrap = 1;
  171. workers_gpuid[i] = workers_gpuid[0];
  172. }
  173. }
  174. else
  175. {
  176. workers_gpuid[i] =
  177. workers_gpuid[i % number_of_entries];
  178. }
  179. }
  180. }
  181. else if (explicit_workers_gpuid)
  182. {
  183. /* we use the explicit value from the user */
  184. memcpy(workers_gpuid,
  185. explicit_workers_gpuid,
  186. STARPU_NMAXWORKERS*sizeof(unsigned));
  187. }
  188. else
  189. {
  190. /* by default, we take a round robin policy */
  191. if (nhwgpus > 0)
  192. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  193. workers_gpuid[i] = (unsigned)(i % nhwgpus);
  194. /* StarPU can use sampling techniques to bind threads
  195. * correctly */
  196. may_bind_automatically[type] = 1;
  197. }
  198. }
  199. #endif
  200. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  201. static void
  202. _starpu_initialize_workers_cuda_gpuid (struct _starpu_machine_config *config)
  203. {
  204. struct _starpu_machine_topology *topology = &config->topology;
  205. struct starpu_conf *uconf = &config->conf;
  206. _starpu_initialize_workers_deviceid (
  207. uconf->use_explicit_workers_cuda_gpuid == 0
  208. ? NULL
  209. : (int *)uconf->workers_cuda_gpuid,
  210. &(config->current_cuda_gpuid),
  211. (int *)topology->workers_cuda_gpuid,
  212. "STARPU_WORKERS_CUDAID",
  213. topology->nhwcudagpus,
  214. STARPU_CUDA_WORKER);
  215. }
  216. static inline int
  217. _starpu_get_next_cuda_gpuid (struct _starpu_machine_config *config)
  218. {
  219. unsigned i =
  220. ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
  221. return (int)config->topology.workers_cuda_gpuid[i];
  222. }
  223. #endif
  224. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  225. static void
  226. _starpu_initialize_workers_opencl_gpuid (struct _starpu_machine_config*config)
  227. {
  228. struct _starpu_machine_topology *topology = &config->topology;
  229. struct starpu_conf *uconf = &config->conf;
  230. _starpu_initialize_workers_deviceid(
  231. uconf->use_explicit_workers_opencl_gpuid == 0
  232. ? NULL
  233. : (int *)uconf->workers_opencl_gpuid,
  234. &(config->current_opencl_gpuid),
  235. (int *)topology->workers_opencl_gpuid,
  236. "STARPU_WORKERS_OPENCLID",
  237. topology->nhwopenclgpus,
  238. STARPU_OPENCL_WORKER);
  239. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  240. // Detect devices which are already used with CUDA
  241. {
  242. unsigned tmp[STARPU_NMAXWORKERS];
  243. unsigned nb=0;
  244. int i;
  245. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  246. {
  247. struct handle_entry *entry;
  248. int devid = config->topology.workers_opencl_gpuid[i];
  249. HASH_FIND_INT(devices_using_cuda, &devid, entry);
  250. if (entry == NULL)
  251. {
  252. tmp[nb] = topology->workers_opencl_gpuid[i];
  253. nb++;
  254. }
  255. }
  256. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  257. tmp[i] = -1;
  258. memcpy (topology->workers_opencl_gpuid, tmp,
  259. sizeof(unsigned)*STARPU_NMAXWORKERS);
  260. }
  261. #endif /* STARPU_USE_CUDA */
  262. {
  263. // Detect identical devices
  264. struct handle_entry *devices_already_used = NULL;
  265. unsigned tmp[STARPU_NMAXWORKERS];
  266. unsigned nb=0;
  267. int i;
  268. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  269. {
  270. int devid = topology->workers_opencl_gpuid[i];
  271. struct handle_entry *entry;
  272. HASH_FIND_INT(devices_already_used, &devid, entry);
  273. if (entry == NULL)
  274. {
  275. struct handle_entry *entry2;
  276. _STARPU_MALLOC(entry2, sizeof(*entry2));
  277. entry2->gpuid = devid;
  278. HASH_ADD_INT(devices_already_used, gpuid,
  279. entry2);
  280. tmp[nb] = devid;
  281. nb ++;
  282. }
  283. }
  284. struct handle_entry *entry, *tempo;
  285. HASH_ITER(hh, devices_already_used, entry, tempo)
  286. {
  287. HASH_DEL(devices_already_used, entry);
  288. free(entry);
  289. }
  290. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  291. tmp[i] = -1;
  292. memcpy (topology->workers_opencl_gpuid, tmp,
  293. sizeof(unsigned)*STARPU_NMAXWORKERS);
  294. }
  295. }
  296. static inline int
  297. _starpu_get_next_opencl_gpuid (struct _starpu_machine_config *config)
  298. {
  299. unsigned i =
  300. ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
  301. return (int)config->topology.workers_opencl_gpuid[i];
  302. }
  303. #endif
  304. #if 0
  305. #if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
  306. static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
  307. {
  308. struct _starpu_machine_topology *topology = &config->topology;
  309. struct starpu_conf *uconf = &config->conf;
  310. _starpu_initialize_workers_deviceid(
  311. uconf->use_explicit_workers_mic_deviceid == 0
  312. ? NULL
  313. : (int *)config->user_conf->workers_mic_deviceid,
  314. &(config->current_mic_deviceid),
  315. (int *)topology->workers_mic_deviceid,
  316. "STARPU_WORKERS_MICID",
  317. topology->nhwmiccores,
  318. STARPU_MIC_WORKER);
  319. }
  320. #endif
  321. #endif
  322. #ifdef STARPU_USE_SCC
  323. static void _starpu_initialize_workers_scc_deviceid(struct _starpu_machine_config *config)
  324. {
  325. struct _starpu_machine_topology *topology = &config->topology;
  326. struct starpu_conf *uconf = &config->conf;
  327. _starpu_initialize_workers_deviceid(
  328. uconf->use_explicit_workers_scc_deviceid == 0
  329. ? NULL
  330. : (int *) uconf->workers_scc_deviceid,
  331. &(config->current_scc_deviceid),
  332. (int *)topology->workers_scc_deviceid,
  333. "STARPU_WORKERS_SCCID",
  334. topology->nhwscc,
  335. STARPU_SCC_WORKER);
  336. }
  337. #endif /* STARPU_USE_SCC */
  338. #if 0
  339. #ifdef STARPU_USE_MIC
  340. static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
  341. {
  342. unsigned i = ((config->current_mic_deviceid++) % config->topology.nmicdevices);
  343. return (int)config->topology.workers_mic_deviceid[i];
  344. }
  345. #endif
  346. #endif
  347. #ifdef STARPU_USE_SCC
  348. static inline int _starpu_get_next_scc_deviceid(struct _starpu_machine_config *config)
  349. {
  350. unsigned i = ((config->current_scc_deviceid++) % config->topology.nsccdevices);
  351. return (int)config->topology.workers_scc_deviceid[i];
  352. }
  353. #endif
  354. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  355. static inline int _starpu_get_next_mpi_deviceid(struct _starpu_machine_config *config)
  356. {
  357. unsigned i = ((config->current_mpi_deviceid++) % config->topology.nmpidevices);
  358. return (int)config->topology.workers_mpi_ms_deviceid[i];
  359. }
  360. static void
  361. _starpu_init_mpi_topology (struct _starpu_machine_config *config, long mpi_idx)
  362. {
  363. /* Discover the topology of the mpi node identifier by MPI_IDX. That
  364. * means, make this StarPU instance aware of the number of cores available
  365. * on this MPI device. Update the `nhwmpicores' topology field
  366. * accordingly. */
  367. struct _starpu_machine_topology *topology = &config->topology;
  368. int nbcores;
  369. _starpu_src_common_sink_nbcores (mpi_ms_nodes[mpi_idx], &nbcores);
  370. topology->nhwmpicores[mpi_idx] = nbcores;
  371. }
  372. #endif /* STARPU_USE_MPI_MASTER_SLAVE */
  373. #ifdef STARPU_USE_MIC
  374. static void
  375. _starpu_init_mic_topology (struct _starpu_machine_config *config, long mic_idx)
  376. {
  377. /* Discover the topology of the mic node identifier by MIC_IDX. That
  378. * means, make this StarPU instance aware of the number of cores available
  379. * on this MIC device. Update the `nhwmiccores' topology field
  380. * accordingly. */
  381. struct _starpu_machine_topology *topology = &config->topology;
  382. int nbcores;
  383. _starpu_src_common_sink_nbcores (mic_nodes[mic_idx], &nbcores);
  384. topology->nhwmiccores[mic_idx] = nbcores;
  385. }
  386. static int
  387. _starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
  388. COIENGINE *coi_handle, COIPROCESS *coi_process)
  389. {
  390. /* Initialize the MIC node of index MIC_IDX. */
  391. struct starpu_conf *user_conf = &config->conf;
  392. char ***argv = _starpu_get_argv();
  393. const char *suffixes[] = {"-mic", "_mic", NULL};
  394. /* Environment variables to send to the Sink, it informs it what kind
  395. * of node it is (architecture and type) as there is no way to discover
  396. * it itself */
  397. char mic_idx_env[32];
  398. sprintf(mic_idx_env, "_STARPU_MIC_DEVID=%d", mic_idx);
  399. /* XXX: this is currently necessary so that the remote process does not
  400. * segfault. */
  401. char nb_mic_env[32];
  402. sprintf(nb_mic_env, "_STARPU_MIC_NB=%d", 2);
  403. const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", mic_idx_env, nb_mic_env, NULL};
  404. char mic_sink_program_path[1024];
  405. /* Let's get the helper program to run on the MIC device */
  406. int mic_file_found =
  407. _starpu_src_common_locate_file (mic_sink_program_path,
  408. starpu_getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
  409. starpu_getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
  410. user_conf->mic_sink_program_path,
  411. (argv ? (*argv)[0] : NULL),
  412. suffixes);
  413. if (0 != mic_file_found)
  414. {
  415. _STARPU_MSG("No MIC program specified, use the environment\n"
  416. "variable STARPU_MIC_SINK_PROGRAM_NAME or the environment\n"
  417. "or the field 'starpu_conf.mic_sink_program_path'\n"
  418. "to define it.\n");
  419. return -1;
  420. }
  421. COIRESULT res;
  422. /* Let's get the handle which let us manage the remote MIC device */
  423. res = COIEngineGetHandle(COI_ISA_MIC, mic_idx, coi_handle);
  424. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  425. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  426. /* We launch the helper on the MIC device, which will wait for us
  427. * to give it work to do.
  428. * As we will communicate further with the device throught scif we
  429. * don't need to keep the process pointer */
  430. res = COIProcessCreateFromFile(*coi_handle, mic_sink_program_path, 0, NULL, 0,
  431. mic_sink_env, 1, NULL, 0, NULL,
  432. coi_process);
  433. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  434. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  435. /* Let's create the node structure, we'll communicate with the peer
  436. * through scif thanks to it */
  437. mic_nodes[mic_idx] =
  438. _starpu_mp_common_node_create(STARPU_NODE_MIC_SOURCE, mic_idx);
  439. return 0;
  440. }
  441. #endif
  442. #ifndef STARPU_SIMGRID
  443. #ifdef STARPU_HAVE_HWLOC
  444. static void
  445. _starpu_allocate_topology_userdata(hwloc_obj_t obj)
  446. {
  447. unsigned i;
  448. _STARPU_CALLOC(obj->userdata, 1, sizeof(struct _starpu_hwloc_userdata));
  449. for (i = 0; i < obj->arity; i++)
  450. _starpu_allocate_topology_userdata(obj->children[i]);
  451. }
  452. static void
  453. _starpu_deallocate_topology_userdata(hwloc_obj_t obj)
  454. {
  455. unsigned i;
  456. struct _starpu_hwloc_userdata *data = obj->userdata;
  457. STARPU_ASSERT(!data->worker_list || data->worker_list == (void*)-1);
  458. free(data);
  459. for (i = 0; i < obj->arity; i++)
  460. _starpu_deallocate_topology_userdata(obj->children[i]);
  461. }
  462. #endif
  463. #endif
  464. static void
  465. _starpu_init_topology (struct _starpu_machine_config *config)
  466. {
  467. /* Discover the topology, meaning finding all the available PUs for
  468. the compiled drivers. These drivers MUST have been initialized
  469. before calling this function. The discovered topology is filled in
  470. CONFIG. */
  471. struct _starpu_machine_topology *topology = &config->topology;
  472. if (topology_is_initialized)
  473. return;
  474. nobind = starpu_get_env_number("STARPU_WORKERS_NOBIND");
  475. topology->nhwcpus = 0;
  476. topology->nhwpus = 0;
  477. #ifndef STARPU_SIMGRID
  478. #ifdef STARPU_HAVE_HWLOC
  479. hwloc_topology_init(&topology->hwtopology);
  480. _starpu_topology_filter(topology->hwtopology);
  481. hwloc_topology_load(topology->hwtopology);
  482. _starpu_allocate_topology_userdata(hwloc_get_root_obj(topology->hwtopology));
  483. #endif
  484. #endif
  485. #ifdef STARPU_SIMGRID
  486. config->topology.nhwcpus = config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
  487. #elif defined(STARPU_HAVE_HWLOC)
  488. /* Discover the CPUs relying on the hwloc interface and fills CONFIG
  489. * accordingly. */
  490. config->cpu_depth = hwloc_get_type_depth (topology->hwtopology,
  491. HWLOC_OBJ_CORE);
  492. config->pu_depth = hwloc_get_type_depth (topology->hwtopology,
  493. HWLOC_OBJ_PU);
  494. /* Would be very odd */
  495. STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE);
  496. if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN)
  497. {
  498. /* unknown, using logical procesors as fallback */
  499. _STARPU_DISP("Warning: The OS did not report CPU cores. Assuming there is only one hardware thread per core.\n");
  500. config->cpu_depth = hwloc_get_type_depth(topology->hwtopology,
  501. HWLOC_OBJ_PU);
  502. }
  503. topology->nhwcpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
  504. config->cpu_depth);
  505. topology->nhwpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
  506. config->pu_depth);
  507. #elif defined(HAVE_SYSCONF)
  508. /* Discover the CPUs relying on the sysconf(3) function and fills
  509. * CONFIG accordingly. */
  510. config->topology.nhwcpus = config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
  511. #elif defined(_WIN32)
  512. /* Discover the CPUs on Cygwin and MinGW systems. */
  513. SYSTEM_INFO sysinfo;
  514. GetSystemInfo(&sysinfo);
  515. config->topology.nhwcpus = config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
  516. #else
  517. #warning no way to know number of cores, assuming 1
  518. config->topology.nhwcpus = config->topology.nhwpus = 1;
  519. #endif
  520. _starpu_cuda_discover_devices(config);
  521. _starpu_opencl_discover_devices(config);
  522. #ifdef STARPU_USE_SCC
  523. config->topology.nhwscc = _starpu_scc_src_get_device_count();
  524. #endif
  525. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  526. config->topology.nhwmpi = _starpu_mpi_src_get_device_count();
  527. #endif
  528. topology_is_initialized = 1;
  529. }
  530. /*
  531. * Bind workers on the different processors
  532. */
  533. static void
  534. _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
  535. {
  536. char *strval;
  537. unsigned i;
  538. struct _starpu_machine_topology *topology = &config->topology;
  539. config->current_bindid = 0;
  540. /* conf->workers_bindid indicates the successive logical PU identifier that
  541. * should be used to bind the workers. It should be either filled
  542. * according to the user's explicit parameters (from starpu_conf) or
  543. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  544. * round-robin policy is used to distributed the workers over the
  545. * cores. */
  546. /* what do we use, explicit value, env. variable, or round-robin ? */
  547. if ((strval = starpu_getenv("STARPU_WORKERS_CPUID")))
  548. {
  549. /* STARPU_WORKERS_CPUID certainly contains less entries than
  550. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  551. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  552. * 2". */
  553. unsigned wrap = 0;
  554. unsigned number_of_entries = 0;
  555. char *endptr;
  556. /* we use the content of the STARPU_WORKERS_CPUID
  557. * env. variable */
  558. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  559. {
  560. if (!wrap)
  561. {
  562. long int val;
  563. val = strtol(strval, &endptr, 10);
  564. if (endptr != strval)
  565. {
  566. topology->workers_bindid[i] =
  567. (unsigned)(val % topology->nhwpus);
  568. strval = endptr;
  569. if (*strval == '-')
  570. {
  571. /* range of values */
  572. long int endval;
  573. strval++;
  574. if (*strval && *strval != ' ' && *strval != ',')
  575. {
  576. endval = strtol(strval, &endptr, 10);
  577. strval = endptr;
  578. }
  579. else
  580. {
  581. endval = topology->nhwpus-1;
  582. if (*strval)
  583. strval++;
  584. }
  585. for (val++; val <= endval && i < STARPU_NMAXWORKERS-1; val++)
  586. {
  587. i++;
  588. topology->workers_bindid[i] =
  589. (unsigned)(val % topology->nhwpus);
  590. }
  591. }
  592. if (*strval == ',')
  593. strval++;
  594. }
  595. else
  596. {
  597. /* there must be at least one entry */
  598. STARPU_ASSERT(i != 0);
  599. number_of_entries = i;
  600. /* there is no more values in the
  601. * string */
  602. wrap = 1;
  603. topology->workers_bindid[i] =
  604. topology->workers_bindid[0];
  605. }
  606. }
  607. else
  608. {
  609. topology->workers_bindid[i] =
  610. topology->workers_bindid[i % number_of_entries];
  611. }
  612. }
  613. }
  614. else if (config->conf.use_explicit_workers_bindid)
  615. {
  616. /* we use the explicit value from the user */
  617. memcpy(topology->workers_bindid,
  618. config->conf.workers_bindid,
  619. STARPU_NMAXWORKERS*sizeof(unsigned));
  620. }
  621. else
  622. {
  623. int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
  624. int k;
  625. int nbindids=0;
  626. int nhyperthreads = topology->nhwpus / topology->nhwcpus;
  627. STARPU_ASSERT_MSG(nth_per_core > 0 && nth_per_core <= nhyperthreads , "Incorrect number of hyperthreads");
  628. i = 0; /* PU number currently assigned */
  629. k = 0; /* Number of threads already put on the current core */
  630. while(nbindids < STARPU_NMAXWORKERS)
  631. {
  632. if (k >= nth_per_core)
  633. {
  634. /* We have already put enough workers on this
  635. * core, skip remaining PUs from this core, and
  636. * proceed with next core */
  637. i += nhyperthreads-nth_per_core;
  638. k = 0;
  639. continue;
  640. }
  641. /* Add a worker to this core, by using this logical PU */
  642. topology->workers_bindid[nbindids++] =
  643. (unsigned)(i % topology->nhwpus);
  644. k++;
  645. i++;
  646. }
  647. }
  648. for (i = 0; i < STARPU_MAXCPUS;i++)
  649. cpu_worker[i] = STARPU_NOWORKERID;
  650. /* no binding yet */
  651. memset(&config->currently_bound, 0, sizeof(config->currently_bound));
  652. }
  653. /* This function gets the identifier of the next core on which to bind a
  654. * worker. In case a list of preferred cores was specified (logical indexes),
  655. * we look for a an available core among the list if possible, otherwise a
  656. * round-robin policy is used. */
  657. static inline int
  658. _starpu_get_next_bindid (struct _starpu_machine_config *config,
  659. int *preferred_binding, int npreferred)
  660. {
  661. struct _starpu_machine_topology *topology = &config->topology;
  662. int current_preferred;
  663. int nhyperthreads = topology->nhwpus / topology->nhwcpus;
  664. unsigned i;
  665. if (npreferred)
  666. {
  667. STARPU_ASSERT_MSG(preferred_binding, "Passing NULL pointer for parameter preferred_binding with a non-0 value of parameter npreferred");
  668. }
  669. /* loop over the preference list */
  670. for (current_preferred = 0;
  671. current_preferred < npreferred;
  672. current_preferred++)
  673. {
  674. /* Try to get this core */
  675. unsigned requested_core = preferred_binding[current_preferred];
  676. unsigned requested_bindid = requested_core * nhyperthreads;
  677. /* can we bind the worker on the preferred core ? */
  678. unsigned ind;
  679. /* Look at the remaining cores to be bound to */
  680. for (ind = 0;
  681. ind < topology->nhwpus / nhyperthreads;
  682. ind++)
  683. {
  684. if (topology->workers_bindid[ind] == requested_bindid && !config->currently_bound[ind])
  685. {
  686. /* the cpu is available, we use it ! */
  687. config->currently_bound[ind] = 1;
  688. return requested_bindid;
  689. }
  690. }
  691. }
  692. for (i = config->current_bindid; i < topology->nhwpus / nhyperthreads; i++)
  693. if (!config->currently_bound[i])
  694. /* Found a cpu ready for use, use it! */
  695. break;
  696. STARPU_ASSERT(i < topology->nhwpus / nhyperthreads);
  697. int bindid = topology->workers_bindid[i];
  698. config->currently_bound[i] = 1;
  699. i++;
  700. if (i == topology->nhwpus / nhyperthreads)
  701. {
  702. /* Finished binding on all cpus, restart from start in
  703. * case the user really wants overloading */
  704. memset(&config->currently_bound, 0, sizeof(config->currently_bound));
  705. i = 0;
  706. }
  707. config->current_bindid = i;
  708. return bindid;
  709. }
  710. unsigned
  711. _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
  712. {
  713. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  714. _starpu_opencl_init();
  715. #endif
  716. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  717. _starpu_init_cuda();
  718. #endif
  719. _starpu_init_topology(config);
  720. return config->topology.nhwcpus;
  721. }
  722. unsigned
  723. _starpu_topology_get_nhwpu (struct _starpu_machine_config *config)
  724. {
  725. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  726. _starpu_opencl_init();
  727. #endif
  728. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  729. _starpu_init_cuda();
  730. #endif
  731. _starpu_init_topology(config);
  732. return config->topology.nhwpus;
  733. }
  734. #ifdef STARPU_HAVE_HWLOC
  735. void _starpu_topology_filter(hwloc_topology_t topology)
  736. {
  737. #if HWLOC_API_VERSION >= 0x20000
  738. hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
  739. #else
  740. hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
  741. #endif
  742. }
  743. #endif
  744. #ifdef STARPU_USE_MIC
  745. static void
  746. _starpu_init_mic_config (struct _starpu_machine_config *config,
  747. struct starpu_conf *user_conf,
  748. unsigned mic_idx)
  749. {
  750. // Configure the MIC device of index MIC_IDX.
  751. struct _starpu_machine_topology *topology = &config->topology;
  752. topology->nhwmiccores[mic_idx] = 0;
  753. _starpu_init_mic_topology (config, mic_idx);
  754. int nmiccores;
  755. nmiccores = starpu_get_env_number("STARPU_NMICTHREADS");
  756. if (nmiccores == -1)
  757. {
  758. /* Nothing was specified, so let's use the number of
  759. * detected mic cores. ! */
  760. nmiccores = topology->nhwmiccores[mic_idx];
  761. }
  762. else
  763. {
  764. if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
  765. {
  766. /* The user requires more MIC cores than there is available */
  767. _STARPU_MSG("# Warning: %d MIC cores requested. Only %u available.\n", nmiccores, topology->nhwmiccores[mic_idx]);
  768. nmiccores = topology->nhwmiccores[mic_idx];
  769. }
  770. }
  771. topology->nmiccores[mic_idx] = nmiccores;
  772. STARPU_ASSERT_MSG(topology->nmiccores[mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
  773. "topology->nmiccores[mic_idx(%d)] (%d) + topology->nworkers (%d) <= STARPU_NMAXWORKERS (%d)",
  774. mic_idx, topology->nmiccores[mic_idx], topology->nworkers, STARPU_NMAXWORKERS);
  775. /* _starpu_initialize_workers_mic_deviceid (config); */
  776. mic_worker_set[mic_idx].workers = &config->workers[topology->nworkers];
  777. mic_worker_set[mic_idx].nworkers = topology->nmiccores[mic_idx];
  778. unsigned miccore_id;
  779. for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
  780. {
  781. int worker_idx = topology->nworkers + miccore_id;
  782. config->workers[worker_idx].set = &mic_worker_set[mic_idx];
  783. config->workers[worker_idx].arch = STARPU_MIC_WORKER;
  784. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  785. config->workers[worker_idx].perf_arch.ndevices = 1;
  786. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_MIC_WORKER;
  787. config->workers[worker_idx].perf_arch.devices[0].devid = mic_idx;
  788. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  789. config->workers[worker_idx].devid = mic_idx;
  790. config->workers[worker_idx].subworkerid = miccore_id;
  791. config->workers[worker_idx].worker_mask = STARPU_MIC;
  792. config->worker_mask |= STARPU_MIC;
  793. }
  794. topology->nworkers += topology->nmiccores[mic_idx];
  795. }
  796. static COIENGINE mic_handles[STARPU_MAXMICDEVS];
  797. COIPROCESS _starpu_mic_process[STARPU_MAXMICDEVS];
  798. #endif
  799. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  800. static void
  801. _starpu_init_mpi_config (struct _starpu_machine_config *config,
  802. struct starpu_conf *user_conf,
  803. unsigned mpi_idx)
  804. {
  805. struct _starpu_machine_topology *topology = &config->topology;
  806. topology->nhwmpicores[mpi_idx] = 0;
  807. _starpu_init_mpi_topology (config, mpi_idx);
  808. int nmpicores;
  809. nmpicores = starpu_get_env_number("STARPU_NMPIMSTHREADS");
  810. if (nmpicores == -1)
  811. {
  812. /* Nothing was specified, so let's use the number of
  813. * detected mpi cores. ! */
  814. nmpicores = topology->nhwmpicores[mpi_idx];
  815. }
  816. else
  817. {
  818. if ((unsigned) nmpicores > topology->nhwmpicores[mpi_idx])
  819. {
  820. /* The user requires more MPI cores than there is available */
  821. _STARPU_MSG("# Warning: %d MPI cores requested. Only %u available.\n",
  822. nmpicores, topology->nhwmpicores[mpi_idx]);
  823. nmpicores = topology->nhwmpicores[mpi_idx];
  824. }
  825. }
  826. topology->nmpicores[mpi_idx] = nmpicores;
  827. STARPU_ASSERT_MSG(topology->nmpicores[mpi_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
  828. "topology->nmpicores[mpi_idx(%d)] (%d) + topology->nworkers (%d) <= STARPU_NMAXWORKERS (%d)",
  829. mpi_idx, topology->nmpicores[mpi_idx], topology->nworkers, STARPU_NMAXWORKERS);
  830. mpi_worker_set[mpi_idx].workers = &config->workers[topology->nworkers];
  831. mpi_worker_set[mpi_idx].nworkers = topology->nmpicores[mpi_idx];
  832. unsigned mpicore_id;
  833. for (mpicore_id = 0; mpicore_id < topology->nmpicores[mpi_idx]; mpicore_id++)
  834. {
  835. int worker_idx = topology->nworkers + mpicore_id;
  836. config->workers[worker_idx].set = &mpi_worker_set[mpi_idx];
  837. config->workers[worker_idx].arch = STARPU_MPI_MS_WORKER;
  838. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  839. config->workers[worker_idx].perf_arch.ndevices = 1;
  840. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_MPI_MS_WORKER;
  841. config->workers[worker_idx].perf_arch.devices[0].devid = mpi_idx;
  842. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  843. config->workers[worker_idx].devid = mpi_idx;
  844. config->workers[worker_idx].subworkerid = mpicore_id;
  845. config->workers[worker_idx].worker_mask = STARPU_MPI_MS;
  846. config->worker_mask |= STARPU_MPI_MS;
  847. }
  848. mpi_ms_nodes[mpi_idx]->baseworkerid = topology->nworkers;
  849. topology->nworkers += topology->nmpicores[mpi_idx];
  850. }
  851. #endif
  852. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  853. static void
  854. _starpu_init_mp_config (struct _starpu_machine_config *config,
  855. struct starpu_conf *user_conf, int no_mp_config)
  856. {
  857. /* Discover and configure the mp topology. That means:
  858. * - discover the number of mp nodes;
  859. * - initialize each discovered node;
  860. * - discover the local topology (number of PUs/devices) of each node;
  861. * - configure the workers accordingly.
  862. */
  863. #ifdef STARPU_USE_MIC
  864. if (!no_mp_config)
  865. {
  866. struct _starpu_machine_topology *topology = &config->topology;
  867. /* Discover and initialize the number of MIC nodes through the mp
  868. * infrastructure. */
  869. unsigned nhwmicdevices = _starpu_mic_src_get_device_count();
  870. int reqmicdevices = starpu_get_env_number("STARPU_NMIC");
  871. if (reqmicdevices == -1 && user_conf)
  872. reqmicdevices = user_conf->nmic;
  873. if (reqmicdevices == -1)
  874. /* Nothing was specified, so let's use the number of
  875. * detected mic devices. ! */
  876. reqmicdevices = nhwmicdevices;
  877. if (reqmicdevices != -1)
  878. {
  879. if ((unsigned) reqmicdevices > nhwmicdevices)
  880. {
  881. /* The user requires more MIC devices than there is available */
  882. _STARPU_MSG("# Warning: %d MIC devices requested. Only %u available.\n", reqmicdevices, nhwmicdevices);
  883. reqmicdevices = nhwmicdevices;
  884. }
  885. }
  886. topology->nmicdevices = 0;
  887. unsigned i;
  888. for (i = 0; i < (unsigned) reqmicdevices; i++)
  889. if (0 == _starpu_init_mic_node (config, i, &mic_handles[i], &_starpu_mic_process[i]))
  890. topology->nmicdevices++;
  891. for (i = 0; i < topology->nmicdevices; i++)
  892. _starpu_init_mic_config (config, user_conf, i);
  893. }
  894. #endif
  895. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  896. {
  897. struct _starpu_machine_topology *topology = &config->topology;
  898. /* Discover and initialize the number of MPI nodes through the mp
  899. * infrastructure. */
  900. unsigned nhwmpidevices = _starpu_mpi_src_get_device_count();
  901. int reqmpidevices = starpu_get_env_number("STARPU_NMPI_MS");
  902. if (reqmpidevices == -1 && user_conf)
  903. reqmpidevices = user_conf->nmpi_ms;
  904. if (reqmpidevices == -1)
  905. /* Nothing was specified, so let's use the number of
  906. * detected mpi devices. ! */
  907. reqmpidevices = nhwmpidevices;
  908. if (reqmpidevices != -1)
  909. {
  910. if ((unsigned) reqmpidevices > nhwmpidevices)
  911. {
  912. /* The user requires more MPI devices than there is available */
  913. _STARPU_MSG("# Warning: %d MPI Master-Slave devices requested. Only %u available.\n",
  914. reqmpidevices, nhwmpidevices);
  915. reqmpidevices = nhwmpidevices;
  916. }
  917. }
  918. topology->nmpidevices = reqmpidevices;
  919. /* if user don't want to use MPI slaves, we close the slave processes */
  920. if (no_mp_config && topology->nmpidevices == 0)
  921. {
  922. _starpu_mpi_common_mp_deinit();
  923. exit(0);
  924. }
  925. if (!no_mp_config)
  926. {
  927. unsigned i;
  928. for (i = 0; i < topology->nmpidevices; i++)
  929. mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
  930. for (i = 0; i < topology->nmpidevices; i++)
  931. _starpu_init_mpi_config (config, user_conf, i);
  932. }
  933. }
  934. #endif
  935. }
  936. #endif
  937. #ifdef STARPU_USE_MIC
  938. static void
  939. _starpu_deinit_mic_node (unsigned mic_idx)
  940. {
  941. _starpu_mp_common_send_command(mic_nodes[mic_idx], STARPU_MP_COMMAND_EXIT, NULL, 0);
  942. COIProcessDestroy(_starpu_mic_process[mic_idx], -1, 0, NULL, NULL);
  943. _starpu_mp_common_node_destroy(mic_nodes[mic_idx]);
  944. }
  945. #endif
  946. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  947. static void _starpu_deinit_mpi_node(int devid)
  948. {
  949. _starpu_mp_common_send_command(mpi_ms_nodes[devid], STARPU_MP_COMMAND_EXIT, NULL, 0);
  950. _starpu_mp_common_node_destroy(mpi_ms_nodes[devid]);
  951. }
  952. #endif
  953. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  954. static void
  955. _starpu_deinit_mp_config (struct _starpu_machine_config *config)
  956. {
  957. struct _starpu_machine_topology *topology = &config->topology;
  958. unsigned i;
  959. #ifdef STARPU_USE_MIC
  960. for (i = 0; i < topology->nmicdevices; i++)
  961. _starpu_deinit_mic_node (i);
  962. _starpu_mic_clear_kernels();
  963. #endif
  964. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  965. for (i = 0; i < topology->nmpidevices; i++)
  966. _starpu_deinit_mpi_node (i);
  967. #endif
  968. }
  969. #endif
  970. #ifdef STARPU_HAVE_HWLOC
  971. static unsigned
  972. _starpu_topology_count_ngpus(hwloc_obj_t obj)
  973. {
  974. struct _starpu_hwloc_userdata *data = obj->userdata;
  975. unsigned n = data->ngpus;
  976. unsigned i;
  977. for (i = 0; i < obj->arity; i++)
  978. n += _starpu_topology_count_ngpus(obj->children[i]);
  979. data->ngpus = n;
  980. #ifdef STARPU_VERBOSE
  981. {
  982. char name[64];
  983. hwloc_obj_type_snprintf(name, sizeof(name), obj, 0);
  984. _STARPU_DEBUG("hwloc obj %s has %u GPUs below\n", name, n);
  985. }
  986. #endif
  987. return n;
  988. }
  989. #endif
  990. static int
  991. _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
  992. {
  993. int i;
  994. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  995. {
  996. config->workers[i].workerid = i;
  997. config->workers[i].set = NULL;
  998. }
  999. struct _starpu_machine_topology *topology = &config->topology;
  1000. topology->nworkers = 0;
  1001. topology->ncombinedworkers = 0;
  1002. topology->nsched_ctxs = 0;
  1003. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1004. _starpu_opencl_init();
  1005. #endif
  1006. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1007. _starpu_init_cuda();
  1008. #endif
  1009. _starpu_init_topology(config);
  1010. _starpu_initialize_workers_bindid(config);
  1011. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1012. for (i = 0; i < (int) (sizeof(cuda_worker_set)/sizeof(cuda_worker_set[0])); i++)
  1013. cuda_worker_set[i].workers = NULL;
  1014. #endif
  1015. #ifdef STARPU_USE_MIC
  1016. for (i = 0; i < (int) (sizeof(mic_worker_set)/sizeof(mic_worker_set[0])); i++)
  1017. mic_worker_set[i].workers = NULL;
  1018. #endif
  1019. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1020. for (i = 0; i < (int) (sizeof(mpi_worker_set)/sizeof(mpi_worker_set[0])); i++)
  1021. mpi_worker_set[i].workers = NULL;
  1022. #endif
  1023. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1024. int ncuda = config->conf.ncuda;
  1025. int nworker_per_cuda = starpu_get_env_number_default("STARPU_NWORKER_PER_CUDA", 1);
  1026. STARPU_ASSERT_MSG(nworker_per_cuda > 0, "STARPU_NWORKER_PER_CUDA has to be > 0");
  1027. STARPU_ASSERT_MSG(nworker_per_cuda < STARPU_NMAXWORKERS, "STARPU_NWORKER_PER_CUDA (%d) cannot be higher than STARPU_NMAXWORKERS (%d)\n", nworker_per_cuda, STARPU_NMAXWORKERS);
  1028. #ifndef STARPU_NON_BLOCKING_DRIVERS
  1029. if (nworker_per_cuda > 1)
  1030. {
  1031. _STARPU_DISP("Warning: reducing STARPU_NWORKER_PER_CUDA to 1 because blocking drivers are enabled\n");
  1032. nworker_per_cuda = 1;
  1033. }
  1034. #endif
  1035. if (ncuda != 0)
  1036. {
  1037. /* The user did not disable CUDA. We need to initialize CUDA
  1038. * early to count the number of devices */
  1039. _starpu_init_cuda();
  1040. int nb_devices = _starpu_get_cuda_device_count();
  1041. if (ncuda == -1)
  1042. {
  1043. /* Nothing was specified, so let's choose ! */
  1044. ncuda = nb_devices;
  1045. }
  1046. else
  1047. {
  1048. if (ncuda > nb_devices)
  1049. {
  1050. /* The user requires more CUDA devices than
  1051. * there is available */
  1052. _STARPU_DISP("Warning: %d CUDA devices requested. Only %d available.\n", ncuda, nb_devices);
  1053. ncuda = nb_devices;
  1054. }
  1055. }
  1056. }
  1057. /* Now we know how many CUDA devices will be used */
  1058. topology->ncudagpus = ncuda;
  1059. topology->nworkerpercuda = nworker_per_cuda;
  1060. STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
  1061. _starpu_initialize_workers_cuda_gpuid(config);
  1062. /* allow having one worker per stream */
  1063. topology->cuda_th_per_stream = starpu_get_env_number_default("STARPU_CUDA_THREAD_PER_WORKER", -1);
  1064. topology->cuda_th_per_dev = starpu_get_env_number_default("STARPU_CUDA_THREAD_PER_DEV", -1);
  1065. /* per device by default */
  1066. if (topology->cuda_th_per_dev == -1)
  1067. {
  1068. if (topology->cuda_th_per_stream == 1)
  1069. topology->cuda_th_per_dev = 0;
  1070. else
  1071. topology->cuda_th_per_dev = 1;
  1072. }
  1073. /* Not per stream by default */
  1074. if (topology->cuda_th_per_stream == -1)
  1075. {
  1076. topology->cuda_th_per_stream = 0;
  1077. }
  1078. STARPU_ASSERT_MSG(topology->cuda_th_per_dev != 1 || topology->cuda_th_per_stream != 1, "It does not make sense to set both STARPU_CUDA_THREAD_PER_WORKER and STARPU_CUDA_THREAD_PER_DEV to 1, please choose either per worker or per device or none");
  1079. if (!topology->cuda_th_per_dev)
  1080. {
  1081. cuda_worker_set[0].workers = &config->workers[topology->nworkers];
  1082. cuda_worker_set[0].nworkers = topology->ncudagpus * nworker_per_cuda;
  1083. }
  1084. unsigned cudagpu;
  1085. for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
  1086. {
  1087. int devid = _starpu_get_next_cuda_gpuid(config);
  1088. int worker_idx0 = topology->nworkers + cudagpu * nworker_per_cuda;
  1089. struct _starpu_worker_set *worker_set;
  1090. if (topology->cuda_th_per_dev)
  1091. {
  1092. worker_set = &cuda_worker_set[devid];
  1093. worker_set->workers = &config->workers[worker_idx0];
  1094. worker_set->nworkers = nworker_per_cuda;
  1095. }
  1096. else
  1097. {
  1098. /* Same worker set for all devices */
  1099. worker_set = &cuda_worker_set[0];
  1100. }
  1101. for (i = 0; i < nworker_per_cuda; i++)
  1102. {
  1103. int worker_idx = worker_idx0 + i;
  1104. if(topology->cuda_th_per_stream)
  1105. {
  1106. /* Just one worker in the set */
  1107. _STARPU_CALLOC(config->workers[worker_idx].set, 1, sizeof(struct _starpu_worker_set));
  1108. config->workers[worker_idx].set->workers = &config->workers[worker_idx];
  1109. config->workers[worker_idx].set->nworkers = 1;
  1110. }
  1111. else
  1112. config->workers[worker_idx].set = worker_set;
  1113. config->workers[worker_idx].arch = STARPU_CUDA_WORKER;
  1114. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1115. config->workers[worker_idx].perf_arch.ndevices = 1;
  1116. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CUDA_WORKER;
  1117. config->workers[worker_idx].perf_arch.devices[0].devid = devid;
  1118. // TODO: fix perfmodels etc.
  1119. //config->workers[worker_idx].perf_arch.ncore = nworker_per_cuda - 1;
  1120. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1121. config->workers[worker_idx].devid = devid;
  1122. config->workers[worker_idx].subworkerid = i;
  1123. config->workers[worker_idx].worker_mask = STARPU_CUDA;
  1124. config->worker_mask |= STARPU_CUDA;
  1125. struct handle_entry *entry;
  1126. HASH_FIND_INT(devices_using_cuda, &devid, entry);
  1127. if (!entry)
  1128. {
  1129. _STARPU_MALLOC(entry, sizeof(*entry));
  1130. entry->gpuid = devid;
  1131. HASH_ADD_INT(devices_using_cuda, gpuid, entry);
  1132. }
  1133. }
  1134. #ifndef STARPU_SIMGRID
  1135. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  1136. {
  1137. hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(topology->hwtopology, devid);
  1138. if (obj)
  1139. {
  1140. struct _starpu_hwloc_userdata *data = obj->userdata;
  1141. data->ngpus++;
  1142. }
  1143. else
  1144. {
  1145. _STARPU_DISP("Warning: could not find location of CUDA%u, do you have the hwloc CUDA plugin installed?\n", devid);
  1146. }
  1147. }
  1148. #endif
  1149. #endif
  1150. }
  1151. topology->nworkers += topology->ncudagpus * nworker_per_cuda;
  1152. #endif
  1153. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1154. int nopencl = config->conf.nopencl;
  1155. if (nopencl != 0)
  1156. {
  1157. /* The user did not disable OPENCL. We need to initialize
  1158. * OpenCL early to count the number of devices */
  1159. _starpu_opencl_init();
  1160. int nb_devices;
  1161. nb_devices = _starpu_opencl_get_device_count();
  1162. if (nopencl == -1)
  1163. {
  1164. /* Nothing was specified, so let's choose ! */
  1165. nopencl = nb_devices;
  1166. if (nopencl > STARPU_MAXOPENCLDEVS)
  1167. {
  1168. _STARPU_DISP("Warning: %d OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldadev=xxx to update the maximum value of supported OpenCL devices.\n", nb_devices, STARPU_MAXOPENCLDEVS);
  1169. nopencl = STARPU_MAXOPENCLDEVS;
  1170. }
  1171. }
  1172. else
  1173. {
  1174. /* Let's make sure this value is OK. */
  1175. if (nopencl > nb_devices)
  1176. {
  1177. /* The user requires more OpenCL devices than
  1178. * there is available */
  1179. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d available.\n", nopencl, nb_devices);
  1180. nopencl = nb_devices;
  1181. }
  1182. /* Let's make sure this value is OK. */
  1183. if (nopencl > STARPU_MAXOPENCLDEVS)
  1184. {
  1185. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices.\n", nopencl, STARPU_MAXOPENCLDEVS);
  1186. nopencl = STARPU_MAXOPENCLDEVS;
  1187. }
  1188. }
  1189. }
  1190. topology->nopenclgpus = nopencl;
  1191. STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
  1192. _starpu_initialize_workers_opencl_gpuid(config);
  1193. unsigned openclgpu;
  1194. for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
  1195. {
  1196. int worker_idx = topology->nworkers + openclgpu;
  1197. int devid = _starpu_get_next_opencl_gpuid(config);
  1198. if (devid == -1)
  1199. {
  1200. // There is no more devices left
  1201. topology->nopenclgpus = openclgpu;
  1202. break;
  1203. }
  1204. config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
  1205. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1206. config->workers[worker_idx].perf_arch.ndevices = 1;
  1207. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_OPENCL_WORKER;
  1208. config->workers[worker_idx].perf_arch.devices[0].devid = devid;
  1209. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1210. config->workers[worker_idx].subworkerid = 0;
  1211. config->workers[worker_idx].devid = devid;
  1212. config->workers[worker_idx].worker_mask = STARPU_OPENCL;
  1213. config->worker_mask |= STARPU_OPENCL;
  1214. }
  1215. topology->nworkers += topology->nopenclgpus;
  1216. #endif
  1217. #ifdef STARPU_USE_SCC
  1218. int nscc = config->conf.nscc;
  1219. unsigned nb_scc_nodes = _starpu_scc_src_get_device_count();
  1220. if (nscc != 0)
  1221. {
  1222. /* The user did not disable SCC. We need to count
  1223. * the number of devices */
  1224. int nb_devices = nb_scc_nodes;
  1225. if (nscc == -1)
  1226. {
  1227. /* Nothing was specified, so let's choose ! */
  1228. nscc = nb_devices;
  1229. if (nscc > STARPU_MAXSCCDEVS)
  1230. {
  1231. _STARPU_DISP("Warning: %d SCC devices available. Only %d enabled. Use configuration option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nb_devices, STARPU_MAXSCCDEVS);
  1232. nscc = STARPU_MAXSCCDEVS;
  1233. }
  1234. }
  1235. else
  1236. {
  1237. /* Let's make sure this value is OK. */
  1238. if (nscc > nb_devices)
  1239. {
  1240. /* The user requires more SCC devices than there is available */
  1241. _STARPU_DISP("Warning: %d SCC devices requested. Only %d available.\n", nscc, nb_devices);
  1242. nscc = nb_devices;
  1243. }
  1244. /* Let's make sure this value is OK. */
  1245. if (nscc > STARPU_MAXSCCDEVS)
  1246. {
  1247. _STARPU_DISP("Warning: %d SCC devices requested. Only %d enabled. Use configure option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nscc, STARPU_MAXSCCDEVS);
  1248. nscc = STARPU_MAXSCCDEVS;
  1249. }
  1250. }
  1251. }
  1252. /* Now we know how many SCC devices will be used */
  1253. topology->nsccdevices = nscc;
  1254. STARPU_ASSERT(topology->nsccdevices + topology->nworkers <= STARPU_NMAXWORKERS);
  1255. _starpu_initialize_workers_scc_deviceid(config);
  1256. unsigned sccdev;
  1257. for (sccdev = 0; sccdev < topology->nsccdevices; sccdev++)
  1258. {
  1259. config->workers[topology->nworkers + sccdev].arch = STARPU_SCC_WORKER;
  1260. int devid = _starpu_get_next_scc_deviceid(config);
  1261. _STARPU_MALLOC(config->workers[topology->nworkers + sccdev].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1262. config->workers[topology->nworkers + sccdev].perf_arch.ndevices = 1;
  1263. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].type = STARPU_SCC_WORKER;
  1264. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].devid = sccdev;
  1265. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].ncores = 1;
  1266. config->workers[topology->nworkers + sccdev].subworkerid = 0;
  1267. config->workers[topology->nworkers + sccdev].devid = devid;
  1268. config->workers[topology->nworkers + sccdev].worker_mask = STARPU_SCC;
  1269. config->worker_mask |= STARPU_SCC;
  1270. }
  1271. for (; sccdev < nb_scc_nodes; ++sccdev)
  1272. _starpu_scc_exit_useless_node(sccdev);
  1273. topology->nworkers += topology->nsccdevices;
  1274. #endif /* STARPU_USE_SCC */
  1275. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  1276. _starpu_init_mp_config (config, &config->conf, no_mp_config);
  1277. #endif
  1278. /* we put the CPU section after the accelerator : in case there was an
  1279. * accelerator found, we devote one cpu */
  1280. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  1281. int ncpu = config->conf.ncpus;
  1282. if (ncpu != 0)
  1283. {
  1284. if (ncpu == -1)
  1285. {
  1286. unsigned mic_busy_cpus = 0;
  1287. int j = 0;
  1288. for (j = 0; j < STARPU_MAXMICDEVS; j++)
  1289. mic_busy_cpus += (topology->nmiccores[j] ? 1 : 0);
  1290. unsigned mpi_ms_busy_cpus = 0;
  1291. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1292. #ifdef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
  1293. for (j = 0; j < STARPU_MAXMPIDEVS; j++)
  1294. mpi_ms_busy_cpus += (topology->nmpicores[j] ? 1 : 0);
  1295. #else
  1296. mpi_ms_busy_cpus = 1; /* we launch one thread to control all slaves */
  1297. #endif
  1298. #endif /* STARPU_USE_MPI_MASTER_SLAVE */
  1299. unsigned cuda_busy_cpus = 0;
  1300. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1301. cuda_busy_cpus =
  1302. topology->cuda_th_per_dev == 0 && topology->cuda_th_per_stream == 0 ?
  1303. (topology->ncudagpus ? 1 : 0) :
  1304. topology->cuda_th_per_stream ?
  1305. (nworker_per_cuda * topology->ncudagpus) :
  1306. topology->ncudagpus;
  1307. #endif
  1308. unsigned already_busy_cpus = mpi_ms_busy_cpus + mic_busy_cpus
  1309. + cuda_busy_cpus
  1310. + topology->nopenclgpus + topology->nsccdevices;
  1311. long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
  1312. if (avail_cpus < 0)
  1313. avail_cpus = 0;
  1314. int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
  1315. avail_cpus *= nth_per_core;
  1316. ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
  1317. }
  1318. else
  1319. {
  1320. if (ncpu > STARPU_MAXCPUS)
  1321. {
  1322. _STARPU_DISP("Warning: %d CPU devices requested. Only %d enabled. Use configure option --enable-maxcpus=xxx to update the maximum value of supported CPU devices.\n", ncpu, STARPU_MAXCPUS);
  1323. ncpu = STARPU_MAXCPUS;
  1324. }
  1325. }
  1326. }
  1327. topology->ncpus = ncpu;
  1328. STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
  1329. unsigned cpu;
  1330. unsigned homogeneous = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
  1331. for (cpu = 0; cpu < topology->ncpus; cpu++)
  1332. {
  1333. int worker_idx = topology->nworkers + cpu;
  1334. config->workers[worker_idx].arch = STARPU_CPU_WORKER;
  1335. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1336. config->workers[worker_idx].perf_arch.ndevices = 1;
  1337. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CPU_WORKER;
  1338. config->workers[worker_idx].perf_arch.devices[0].devid = homogeneous ? 0 : cpu;
  1339. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1340. config->workers[worker_idx].subworkerid = 0;
  1341. config->workers[worker_idx].devid = cpu;
  1342. config->workers[worker_idx].worker_mask = STARPU_CPU;
  1343. config->worker_mask |= STARPU_CPU;
  1344. }
  1345. topology->nworkers += topology->ncpus;
  1346. #endif
  1347. if (topology->nworkers == 0)
  1348. {
  1349. _STARPU_DEBUG("No worker found, aborting ...\n");
  1350. return -ENODEV;
  1351. }
  1352. return 0;
  1353. }
  1354. void _starpu_destroy_machine_config(struct _starpu_machine_config *config)
  1355. {
  1356. _starpu_close_debug_logfile();
  1357. unsigned worker;
  1358. for (worker = 0; worker < config->topology.nworkers; worker++)
  1359. {
  1360. struct _starpu_worker *workerarg = &config->workers[worker];
  1361. int bindid = workerarg->bindid;
  1362. free(workerarg->perf_arch.devices);
  1363. #ifdef STARPU_HAVE_HWLOC
  1364. hwloc_bitmap_free(workerarg->hwloc_cpu_set);
  1365. if (bindid != -1)
  1366. {
  1367. hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
  1368. config->pu_depth,
  1369. bindid);
  1370. struct _starpu_hwloc_userdata *data = worker_obj->userdata;
  1371. if (data->worker_list)
  1372. {
  1373. _starpu_worker_list_delete(data->worker_list);
  1374. data->worker_list = NULL;
  1375. }
  1376. }
  1377. #endif
  1378. if (bindid != -1)
  1379. {
  1380. free(config->bindid_workers[bindid].workerids);
  1381. config->bindid_workers[bindid].workerids = NULL;
  1382. }
  1383. }
  1384. free(config->bindid_workers);
  1385. config->bindid_workers = NULL;
  1386. config->nbindid = 0;
  1387. unsigned combined_worker_id;
  1388. for(combined_worker_id=0 ; combined_worker_id < config->topology.ncombinedworkers ; combined_worker_id++)
  1389. {
  1390. struct _starpu_combined_worker *combined_worker = &config->combined_workers[combined_worker_id];
  1391. #ifdef STARPU_HAVE_HWLOC
  1392. hwloc_bitmap_free(combined_worker->hwloc_cpu_set);
  1393. #endif
  1394. free(combined_worker->perf_arch.devices);
  1395. }
  1396. #ifdef STARPU_HAVE_HWLOC
  1397. _starpu_deallocate_topology_userdata(hwloc_get_root_obj(config->topology.hwtopology));
  1398. hwloc_topology_destroy(config->topology.hwtopology);
  1399. #endif
  1400. topology_is_initialized = 0;
  1401. #ifdef STARPU_USE_CUDA
  1402. struct handle_entry *entry, *tmp;
  1403. HASH_ITER(hh, devices_using_cuda, entry, tmp)
  1404. {
  1405. HASH_DEL(devices_using_cuda, entry);
  1406. free(entry);
  1407. }
  1408. devices_using_cuda = NULL;
  1409. #endif
  1410. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  1411. int i;
  1412. for (i=0; i<STARPU_NARCH; i++)
  1413. may_bind_automatically[i] = 0;
  1414. #endif
  1415. }
  1416. void
  1417. _starpu_bind_thread_on_cpu (
  1418. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  1419. int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED)
  1420. {
  1421. #ifdef STARPU_SIMGRID
  1422. return;
  1423. #else
  1424. if (nobind > 0)
  1425. return;
  1426. if (cpuid < 0)
  1427. return;
  1428. if (workerid != STARPU_NOWORKERID && cpuid < STARPU_MAXCPUS)
  1429. {
  1430. int previous = cpu_worker[cpuid];
  1431. if (previous != STARPU_NOWORKERID && previous != workerid)
  1432. _STARPU_DISP("Warning: both workers %d and %d are bound to the same PU %d, this will strongly degrade performance\n", previous, workerid, cpuid);
  1433. else
  1434. cpu_worker[cpuid] = workerid;
  1435. }
  1436. #ifdef STARPU_HAVE_HWLOC
  1437. const struct hwloc_topology_support *support;
  1438. #ifdef STARPU_USE_OPENCL
  1439. _starpu_opencl_init();
  1440. #endif
  1441. #ifdef STARPU_USE_CUDA
  1442. _starpu_init_cuda();
  1443. #endif
  1444. _starpu_init_topology(config);
  1445. support = hwloc_topology_get_support (config->topology.hwtopology);
  1446. if (support->cpubind->set_thisthread_cpubind)
  1447. {
  1448. hwloc_obj_t obj =
  1449. hwloc_get_obj_by_depth (config->topology.hwtopology,
  1450. config->pu_depth, cpuid);
  1451. hwloc_bitmap_t set = obj->cpuset;
  1452. int ret;
  1453. hwloc_bitmap_singlify(set);
  1454. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  1455. HWLOC_CPUBIND_THREAD);
  1456. if (ret)
  1457. {
  1458. perror("hwloc_set_cpubind");
  1459. STARPU_ABORT();
  1460. }
  1461. }
  1462. #elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__)
  1463. int ret;
  1464. /* fix the thread on the correct cpu */
  1465. cpu_set_t aff_mask;
  1466. CPU_ZERO(&aff_mask);
  1467. CPU_SET(cpuid, &aff_mask);
  1468. starpu_pthread_t self = starpu_pthread_self();
  1469. ret = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
  1470. if (ret)
  1471. {
  1472. const char *msg = strerror(ret);
  1473. _STARPU_MSG("pthread_setaffinity_np: %s\n", msg);
  1474. STARPU_ABORT();
  1475. }
  1476. #elif defined(_WIN32)
  1477. DWORD mask = 1 << cpuid;
  1478. if (!SetThreadAffinityMask(GetCurrentThread(), mask))
  1479. {
  1480. _STARPU_ERROR("SetThreadMaskAffinity(%lx) failed\n", mask);
  1481. }
  1482. #else
  1483. #warning no CPU binding support
  1484. #endif
  1485. #endif
  1486. }
  1487. void
  1488. _starpu_bind_thread_on_cpus (
  1489. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  1490. struct _starpu_combined_worker *combined_worker STARPU_ATTRIBUTE_UNUSED)
  1491. {
  1492. #ifdef STARPU_SIMGRID
  1493. return;
  1494. #endif
  1495. #ifdef STARPU_HAVE_HWLOC
  1496. const struct hwloc_topology_support *support;
  1497. #ifdef STARPU_USE_OPENC
  1498. _starpu_opencl_init();
  1499. #endif
  1500. #ifdef STARPU_USE_CUDA
  1501. _starpu_init_cuda();
  1502. #endif
  1503. _starpu_init_topology(config);
  1504. support = hwloc_topology_get_support(config->topology.hwtopology);
  1505. if (support->cpubind->set_thisthread_cpubind)
  1506. {
  1507. hwloc_bitmap_t set = combined_worker->hwloc_cpu_set;
  1508. int ret;
  1509. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  1510. HWLOC_CPUBIND_THREAD);
  1511. if (ret)
  1512. {
  1513. perror("binding thread");
  1514. STARPU_ABORT();
  1515. }
  1516. }
  1517. #else
  1518. #ifdef __GLIBC__
  1519. sched_setaffinity(0,sizeof(combined_worker->cpu_set),&combined_worker->cpu_set);
  1520. #else
  1521. # warning no parallel worker CPU binding support
  1522. #endif
  1523. #endif
  1524. }
  1525. static void
  1526. _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
  1527. {
  1528. /* launch one thread per CPU */
  1529. unsigned ram_memory_node;
  1530. /* note that even if the CPU cpu are not used, we always have a RAM
  1531. * node */
  1532. /* TODO : support NUMA ;) */
  1533. ram_memory_node = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
  1534. STARPU_ASSERT(ram_memory_node == STARPU_MAIN_RAM);
  1535. #ifdef STARPU_SIMGRID
  1536. char name[16];
  1537. msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
  1538. STARPU_ASSERT(host);
  1539. _starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
  1540. #endif
  1541. /* We will store all the busid of the different (src, dst)
  1542. * combinations in a matrix which we initialize here. */
  1543. _starpu_initialize_busid_matrix();
  1544. /* Each device is initialized,
  1545. * giving it a memory node and a core bind id.
  1546. */
  1547. /* TODO: STARPU_MAXNUMANODES */
  1548. unsigned numa_init[1] = { 1 };
  1549. unsigned numa_memory_nodes[1] = { ram_memory_node };
  1550. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1551. unsigned cuda_init[STARPU_MAXCUDADEVS] = { };
  1552. unsigned cuda_memory_nodes[STARPU_MAXCUDADEVS];
  1553. unsigned cuda_bindid[STARPU_MAXCUDADEVS];
  1554. int cuda_globalbindid = -1;
  1555. #endif
  1556. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1557. unsigned opencl_init[STARPU_MAXOPENCLDEVS] = { };
  1558. unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS];
  1559. unsigned opencl_bindid[STARPU_MAXOPENCLDEVS];
  1560. #endif
  1561. #ifdef STARPU_USE_MIC
  1562. unsigned mic_init[STARPU_MAXMICDEVS] = { };
  1563. unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
  1564. unsigned mic_bindid[STARPU_MAXMICDEVS];
  1565. #endif
  1566. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1567. unsigned mpi_init[STARPU_MAXMPIDEVS] = { };
  1568. unsigned mpi_memory_nodes[STARPU_MAXMPIDEVS];
  1569. unsigned mpi_bindid[STARPU_MAXMPIDEVS];
  1570. #endif
  1571. unsigned bindid;
  1572. for (bindid = 0; bindid < config->nbindid; bindid++)
  1573. {
  1574. free(config->bindid_workers[bindid].workerids);
  1575. config->bindid_workers[bindid].workerids = NULL;
  1576. config->bindid_workers[bindid].nworkers = 0;
  1577. }
  1578. unsigned worker;
  1579. for (worker = 0; worker < config->topology.nworkers; worker++)
  1580. {
  1581. unsigned memory_node = -1;
  1582. struct _starpu_worker *workerarg = &config->workers[worker];
  1583. unsigned devid = workerarg->devid;
  1584. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  1585. /* Perhaps the worker has some "favourite" bindings */
  1586. int *preferred_binding = NULL;
  1587. int npreferred = 0;
  1588. #endif
  1589. /* select the memory node that contains worker's memory */
  1590. switch (workerarg->arch)
  1591. {
  1592. case STARPU_CPU_WORKER:
  1593. {
  1594. /* TODO: NUMA */
  1595. int numaid = 0;
  1596. /* "dedicate" a cpu core to that worker */
  1597. if (numa_init[numaid])
  1598. {
  1599. memory_node = numa_memory_nodes[numaid];
  1600. }
  1601. else
  1602. {
  1603. numa_init[numaid] = 1;
  1604. memory_node = numa_memory_nodes[numaid] = _starpu_memory_node_register(STARPU_CPU_RAM, numaid);
  1605. #ifdef STARPU_SIMGRID
  1606. snprintf(name, sizeof(name), "RAM%d", numaid);
  1607. host = _starpu_simgrid_get_host_by_name(name);
  1608. STARPU_ASSERT(host);
  1609. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1610. #endif
  1611. }
  1612. workerarg->bindid = _starpu_get_next_bindid(config, NULL, 0);
  1613. _starpu_memory_node_add_nworkers(memory_node);
  1614. _starpu_worker_drives_memory_node(workerarg, STARPU_MAIN_RAM);
  1615. _starpu_worker_drives_memory_node(workerarg, memory_node);
  1616. break;
  1617. }
  1618. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1619. case STARPU_CUDA_WORKER:
  1620. #ifndef STARPU_SIMGRID
  1621. if (may_bind_automatically[STARPU_CUDA_WORKER])
  1622. {
  1623. /* StarPU is allowed to bind threads automatically */
  1624. preferred_binding = _starpu_get_cuda_affinity_vector(devid);
  1625. npreferred = config->topology.nhwpus;
  1626. }
  1627. #endif /* SIMGRID */
  1628. if (cuda_init[devid])
  1629. {
  1630. memory_node = cuda_memory_nodes[devid];
  1631. if (config->topology.cuda_th_per_stream == 0)
  1632. workerarg->bindid = cuda_bindid[devid];
  1633. else
  1634. workerarg->bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1635. }
  1636. else
  1637. {
  1638. cuda_init[devid] = 1;
  1639. if (config->topology.cuda_th_per_dev == 0 && config->topology.cuda_th_per_stream == 0)
  1640. {
  1641. if (cuda_globalbindid == -1)
  1642. cuda_globalbindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1643. workerarg->bindid = cuda_bindid[devid] = cuda_globalbindid;
  1644. }
  1645. else
  1646. workerarg->bindid = cuda_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1647. memory_node = cuda_memory_nodes[devid] = _starpu_memory_node_register(STARPU_CUDA_RAM, devid);
  1648. _starpu_cuda_bus_ids[0][devid+1] = _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1649. _starpu_cuda_bus_ids[devid+1][0] = _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1650. #ifdef STARPU_SIMGRID
  1651. const char* cuda_memcpy_peer;
  1652. snprintf(name, sizeof(name), "CUDA%u", devid);
  1653. host = _starpu_simgrid_get_host_by_name(name);
  1654. STARPU_ASSERT(host);
  1655. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1656. cuda_memcpy_peer = MSG_host_get_property_value(host, "memcpy_peer");
  1657. #endif /* SIMGRID */
  1658. if (
  1659. #ifdef STARPU_SIMGRID
  1660. cuda_memcpy_peer && atoll(cuda_memcpy_peer)
  1661. #elif defined(HAVE_CUDA_MEMCPY_PEER)
  1662. 1
  1663. #else /* MEMCPY_PEER */
  1664. 0
  1665. #endif /* MEMCPY_PEER */
  1666. )
  1667. {
  1668. unsigned worker2;
  1669. for (worker2 = 0; worker2 < worker; worker2++)
  1670. {
  1671. struct _starpu_worker *workerarg2 = &config->workers[worker2];
  1672. int devid2 = workerarg2->devid;
  1673. if (workerarg2->arch == STARPU_CUDA_WORKER)
  1674. {
  1675. unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
  1676. _starpu_cuda_bus_ids[devid2][devid] = _starpu_register_bus(memory_node2, memory_node);
  1677. _starpu_cuda_bus_ids[devid][devid2] = _starpu_register_bus(memory_node, memory_node2);
  1678. #ifndef STARPU_SIMGRID
  1679. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  1680. {
  1681. hwloc_obj_t obj, obj2, ancestor;
  1682. obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid);
  1683. obj2 = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid2);
  1684. ancestor = hwloc_get_common_ancestor_obj(config->topology.hwtopology, obj, obj2);
  1685. if (ancestor)
  1686. {
  1687. struct _starpu_hwloc_userdata *data = ancestor->userdata;
  1688. #ifdef STARPU_VERBOSE
  1689. {
  1690. char name[64];
  1691. hwloc_obj_type_snprintf(name, sizeof(name), ancestor, 0);
  1692. _STARPU_DEBUG("CUDA%u and CUDA%u are linked through %s, along %u GPUs\n", devid, devid2, name, data->ngpus);
  1693. }
  1694. #endif
  1695. starpu_bus_set_ngpus(_starpu_cuda_bus_ids[devid2][devid], data->ngpus);
  1696. starpu_bus_set_ngpus(_starpu_cuda_bus_ids[devid][devid2], data->ngpus);
  1697. }
  1698. }
  1699. #endif
  1700. #endif
  1701. }
  1702. }
  1703. }
  1704. }
  1705. _starpu_memory_node_add_nworkers(memory_node);
  1706. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], STARPU_MAIN_RAM);
  1707. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
  1708. break;
  1709. #endif
  1710. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1711. case STARPU_OPENCL_WORKER:
  1712. #ifndef STARPU_SIMGRID
  1713. if (may_bind_automatically[STARPU_OPENCL_WORKER])
  1714. {
  1715. /* StarPU is allowed to bind threads automatically */
  1716. preferred_binding = _starpu_get_opencl_affinity_vector(devid);
  1717. npreferred = config->topology.nhwpus;
  1718. }
  1719. #endif /* SIMGRID */
  1720. if (opencl_init[devid])
  1721. {
  1722. memory_node = opencl_memory_nodes[devid];
  1723. #ifndef STARPU_SIMGRID
  1724. workerarg->bindid = opencl_bindid[devid];
  1725. #endif /* SIMGRID */
  1726. }
  1727. else
  1728. {
  1729. opencl_init[devid] = 1;
  1730. workerarg->bindid = opencl_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1731. memory_node = opencl_memory_nodes[devid] = _starpu_memory_node_register(STARPU_OPENCL_RAM, devid);
  1732. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1733. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1734. #ifdef STARPU_SIMGRID
  1735. snprintf(name, sizeof(name), "OpenCL%u", devid);
  1736. host = _starpu_simgrid_get_host_by_name(name);
  1737. STARPU_ASSERT(host);
  1738. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1739. #endif /* SIMGRID */
  1740. }
  1741. _starpu_memory_node_add_nworkers(memory_node);
  1742. _starpu_worker_drives_memory_node(workerarg, STARPU_MAIN_RAM);
  1743. _starpu_worker_drives_memory_node(workerarg, memory_node);
  1744. break;
  1745. #endif
  1746. #ifdef STARPU_USE_MIC
  1747. case STARPU_MIC_WORKER:
  1748. if (mic_init[devid])
  1749. {
  1750. memory_node = mic_memory_nodes[devid];
  1751. }
  1752. else
  1753. {
  1754. mic_init[devid] = 1;
  1755. /* TODO */
  1756. //if (may_bind_automatically)
  1757. //{
  1758. // /* StarPU is allowed to bind threads automatically */
  1759. // preferred_binding = _starpu_get_mic_affinity_vector(devid);
  1760. // npreferred = config->topology.nhwpus;
  1761. //}
  1762. mic_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1763. memory_node = mic_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MIC_RAM, devid);
  1764. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1765. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1766. }
  1767. workerarg->bindid = mic_bindid[devid];
  1768. _starpu_memory_node_add_nworkers(memory_node);
  1769. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], STARPU_MAIN_RAM);
  1770. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
  1771. break;
  1772. #endif /* STARPU_USE_MIC */
  1773. #ifdef STARPU_USE_SCC
  1774. case STARPU_SCC_WORKER:
  1775. {
  1776. /* Node 0 represents the SCC shared memory when we're on SCC. */
  1777. struct _starpu_memory_node_descr *descr = _starpu_memory_node_get_description();
  1778. descr->nodes[ram_memory_node] = STARPU_SCC_SHM;
  1779. memory_node = ram_memory_node;
  1780. _starpu_memory_node_add_nworkers(memory_node);
  1781. _starpu_worker_drives_memory_node(workerarg, STARPU_MAIN_RAM);
  1782. _starpu_worker_drives_memory_node(workerarg, memory_node);
  1783. }
  1784. break;
  1785. #endif /* STARPU_USE_SCC */
  1786. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1787. case STARPU_MPI_MS_WORKER:
  1788. {
  1789. if (mpi_init[devid])
  1790. {
  1791. memory_node = mpi_memory_nodes[devid];
  1792. }
  1793. else
  1794. {
  1795. mpi_init[devid] = 1;
  1796. mpi_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1797. memory_node = mpi_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MPI_MS_RAM, devid);
  1798. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1799. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1800. }
  1801. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], STARPU_MAIN_RAM);
  1802. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
  1803. #ifndef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
  1804. /* MPI driver thread can manage all slave memories if we disable the MPI multiple thread */
  1805. unsigned findworker;
  1806. for (findworker = 0; findworker < worker; findworker++)
  1807. {
  1808. struct _starpu_worker *findworkerarg = &config->workers[findworker];
  1809. if (findworkerarg->arch == STARPU_MPI_MS_WORKER)
  1810. {
  1811. _starpu_worker_drives_memory_node(workerarg, findworkerarg->memory_node);
  1812. _starpu_worker_drives_memory_node(findworkerarg, memory_node);
  1813. }
  1814. }
  1815. #endif
  1816. workerarg->bindid = mpi_bindid[devid];
  1817. _starpu_memory_node_add_nworkers(memory_node);
  1818. break;
  1819. }
  1820. #endif /* STARPU_USE_MPI_MASTER_SLAVE */
  1821. default:
  1822. STARPU_ABORT();
  1823. }
  1824. workerarg->memory_node = memory_node;
  1825. _STARPU_DEBUG("worker %d type %d devid %u bound to cpu %d, STARPU memory node %u\n", worker, workerarg->arch, devid, workerarg->bindid, memory_node);
  1826. #ifdef __GLIBC__
  1827. if (workerarg->bindid != -1)
  1828. {
  1829. /* Save the initial cpuset */
  1830. CPU_ZERO(&workerarg->cpu_set);
  1831. CPU_SET(workerarg->bindid, &workerarg->cpu_set);
  1832. }
  1833. #endif /* __GLIBC__ */
  1834. #ifdef STARPU_HAVE_HWLOC
  1835. if (workerarg->bindid == -1)
  1836. {
  1837. workerarg->hwloc_cpu_set = hwloc_bitmap_alloc();
  1838. }
  1839. else
  1840. {
  1841. /* Put the worker descriptor in the userdata field of the
  1842. * hwloc object describing the CPU */
  1843. hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
  1844. config->pu_depth,
  1845. workerarg->bindid);
  1846. struct _starpu_hwloc_userdata *data = worker_obj->userdata;
  1847. if (data->worker_list == NULL)
  1848. data->worker_list = _starpu_worker_list_new();
  1849. _starpu_worker_list_push_front(data->worker_list, workerarg);
  1850. /* Clear the cpu set and set the cpu */
  1851. workerarg->hwloc_cpu_set = hwloc_bitmap_dup (worker_obj->cpuset);
  1852. }
  1853. #endif
  1854. if (workerarg->bindid != -1)
  1855. {
  1856. bindid = workerarg->bindid;
  1857. unsigned old_nbindid = config->nbindid;
  1858. if (bindid >= old_nbindid)
  1859. {
  1860. /* More room needed */
  1861. if (!old_nbindid)
  1862. config->nbindid = STARPU_NMAXWORKERS;
  1863. else
  1864. config->nbindid = 2 * old_nbindid;
  1865. _STARPU_REALLOC(config->bindid_workers, config->nbindid * sizeof(config->bindid_workers[0]));
  1866. memset(&config->bindid_workers[old_nbindid], 0, (config->nbindid - old_nbindid) * sizeof(config->bindid_workers[0]));
  1867. }
  1868. /* Add slot for this worker */
  1869. /* Don't care about amortizing the cost, there are usually very few workers sharing the same bindid */
  1870. config->bindid_workers[bindid].nworkers++;
  1871. _STARPU_REALLOC(config->bindid_workers[bindid].workerids, config->bindid_workers[bindid].nworkers * sizeof(config->bindid_workers[bindid].workerids[0]));
  1872. config->bindid_workers[bindid].workerids[config->bindid_workers[bindid].nworkers-1] = worker;
  1873. }
  1874. }
  1875. #ifdef STARPU_SIMGRID
  1876. _starpu_simgrid_count_ngpus();
  1877. #else
  1878. #ifdef STARPU_HAVE_HWLOC
  1879. _starpu_topology_count_ngpus(hwloc_get_root_obj(config->topology.hwtopology));
  1880. #endif
  1881. #endif
  1882. }
  1883. int
  1884. _starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
  1885. {
  1886. int ret;
  1887. unsigned i;
  1888. ret = _starpu_init_machine_config(config, no_mp_config);
  1889. if (ret)
  1890. return ret;
  1891. /* for the data management library */
  1892. _starpu_memory_nodes_init();
  1893. _starpu_datastats_init();
  1894. _starpu_init_workers_binding(config, no_mp_config);
  1895. config->cpus_nodeid = -1;
  1896. config->cuda_nodeid = -1;
  1897. config->opencl_nodeid = -1;
  1898. config->mic_nodeid = -1;
  1899. config->scc_nodeid = -1;
  1900. config->mpi_nodeid = -1;
  1901. for (i = 0; i < starpu_worker_get_count(); i++)
  1902. {
  1903. switch (starpu_worker_get_type(i))
  1904. {
  1905. case STARPU_CPU_WORKER:
  1906. if (config->cpus_nodeid == -1)
  1907. config->cpus_nodeid = starpu_worker_get_memory_node(i);
  1908. else if (config->cpus_nodeid != (int) starpu_worker_get_memory_node(i))
  1909. config->cpus_nodeid = -2;
  1910. break;
  1911. case STARPU_CUDA_WORKER:
  1912. if (config->cuda_nodeid == -1)
  1913. config->cuda_nodeid = starpu_worker_get_memory_node(i);
  1914. else if (config->cuda_nodeid != (int) starpu_worker_get_memory_node(i))
  1915. config->cuda_nodeid = -2;
  1916. break;
  1917. case STARPU_OPENCL_WORKER:
  1918. if (config->opencl_nodeid == -1)
  1919. config->opencl_nodeid = starpu_worker_get_memory_node(i);
  1920. else if (config->opencl_nodeid != (int) starpu_worker_get_memory_node(i))
  1921. config->opencl_nodeid = -2;
  1922. break;
  1923. case STARPU_MIC_WORKER:
  1924. if (config->mic_nodeid == -1)
  1925. config->mic_nodeid = starpu_worker_get_memory_node(i);
  1926. else if (config->mic_nodeid != (int) starpu_worker_get_memory_node(i))
  1927. config->mic_nodeid = -2;
  1928. break;
  1929. case STARPU_SCC_WORKER:
  1930. if (config->scc_nodeid == -1)
  1931. config->scc_nodeid = starpu_worker_get_memory_node(i);
  1932. else if (config->scc_nodeid != (int) starpu_worker_get_memory_node(i))
  1933. config->scc_nodeid = -2;
  1934. break;
  1935. case STARPU_MPI_MS_WORKER:
  1936. if (config->mpi_nodeid == -1)
  1937. config->mpi_nodeid = starpu_worker_get_memory_node(i);
  1938. else if (config->mpi_nodeid != (int) starpu_worker_get_memory_node(i))
  1939. config->mpi_nodeid = -2;
  1940. break;
  1941. case STARPU_ANY_WORKER:
  1942. STARPU_ASSERT(0);
  1943. }
  1944. }
  1945. return 0;
  1946. }
  1947. void _starpu_destroy_topology(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED)
  1948. {
  1949. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  1950. _starpu_deinit_mp_config(config);
  1951. #endif
  1952. /* cleanup StarPU internal data structures */
  1953. _starpu_memory_nodes_deinit();
  1954. _starpu_destroy_machine_config(config);
  1955. }
  1956. void
  1957. starpu_topology_print (FILE *output)
  1958. {
  1959. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1960. struct _starpu_machine_topology *topology = &config->topology;
  1961. unsigned pu;
  1962. unsigned worker;
  1963. unsigned nworkers = starpu_worker_get_count();
  1964. unsigned ncombinedworkers = topology->ncombinedworkers;
  1965. unsigned nthreads_per_core = topology->nhwpus / topology->nhwcpus;
  1966. #ifdef STARPU_HAVE_HWLOC
  1967. hwloc_topology_t topo = topology->hwtopology;
  1968. hwloc_obj_t pu_obj;
  1969. hwloc_obj_t last_numa_obj = NULL, numa_obj;
  1970. hwloc_obj_t last_package_obj = NULL, package_obj;
  1971. #endif
  1972. for (pu = 0; pu < topology->nhwpus; pu++)
  1973. {
  1974. #ifdef STARPU_HAVE_HWLOC
  1975. pu_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, pu);
  1976. numa_obj = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NODE, pu_obj);
  1977. if (numa_obj != last_numa_obj)
  1978. {
  1979. fprintf(output, "numa %u", numa_obj->logical_index);
  1980. last_numa_obj = numa_obj;
  1981. }
  1982. fprintf(output, "\t");
  1983. package_obj = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_SOCKET, pu_obj);
  1984. if (package_obj != last_package_obj)
  1985. {
  1986. fprintf(output, "pack %u", package_obj->logical_index);
  1987. last_package_obj = package_obj;
  1988. }
  1989. fprintf(output, "\t");
  1990. #endif
  1991. if ((pu % nthreads_per_core) == 0)
  1992. fprintf(output, "core %u", pu / nthreads_per_core);
  1993. fprintf(output, "\tPU %u\t", pu);
  1994. for (worker = 0;
  1995. worker < nworkers + ncombinedworkers;
  1996. worker++)
  1997. {
  1998. if (worker < nworkers)
  1999. {
  2000. struct _starpu_worker *workerarg = &config->workers[worker];
  2001. if (workerarg->bindid == (int) pu)
  2002. {
  2003. char name[256];
  2004. starpu_worker_get_name (worker, name,
  2005. sizeof(name));
  2006. fprintf(output, "%s\t", name);
  2007. }
  2008. }
  2009. else
  2010. {
  2011. int worker_size, i;
  2012. int *combined_workerid;
  2013. starpu_combined_worker_get_description(worker, &worker_size, &combined_workerid);
  2014. for (i = 0; i < worker_size; i++)
  2015. {
  2016. if (topology->workers_bindid[combined_workerid[i]] == pu)
  2017. fprintf(output, "comb %u\t", worker-nworkers);
  2018. }
  2019. }
  2020. }
  2021. fprintf(output, "\n");
  2022. }
  2023. }