topology.c 85 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2017 Inria
  4. * Copyright (C) 2009-2018 Université de Bordeaux
  5. * Copyright (C) 2010-2017 CNRS
  6. * Copyright (C) 2013 Thibaut Lambert
  7. * Copyright (C) 2016 Uppsala University
  8. *
  9. * StarPU is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation; either version 2.1 of the License, or (at
  12. * your option) any later version.
  13. *
  14. * StarPU is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  19. */
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <common/config.h>
  23. #include <core/workers.h>
  24. #include <core/debug.h>
  25. #include <core/topology.h>
  26. #include <drivers/cuda/driver_cuda.h>
  27. #include <drivers/mic/driver_mic_source.h>
  28. #include <drivers/scc/driver_scc_source.h>
  29. #include <drivers/mpi/driver_mpi_source.h>
  30. #include <drivers/mpi/driver_mpi_common.h>
  31. #include <drivers/mp_common/source_common.h>
  32. #include <drivers/opencl/driver_opencl.h>
  33. #include <drivers/opencl/driver_opencl_utils.h>
  34. #include <profiling/profiling.h>
  35. #include <datawizard/datastats.h>
  36. #include <datawizard/memory_nodes.h>
  37. #include <common/uthash.h>
  38. #ifdef STARPU_HAVE_HWLOC
  39. #include <hwloc.h>
  40. #ifndef HWLOC_API_VERSION
  41. #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
  42. #endif
  43. #if HWLOC_API_VERSION < 0x00010b00
  44. #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
  45. #endif
  46. #endif
  47. #ifdef STARPU_HAVE_WINDOWS
  48. #include <windows.h>
  49. #endif
  50. #ifdef STARPU_SIMGRID
  51. #include <core/simgrid.h>
  52. #endif
  53. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  54. #include <hwloc/cuda.h>
  55. #endif
  56. #if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_OPENCL)
  57. #include <hwloc/opencl.h>
  58. #endif
  59. static unsigned topology_is_initialized = 0;
  60. static int nobind;
  61. /* For checking whether two workers share the same PU, indexed by PU number */
  62. static int cpu_worker[STARPU_MAXCPUS];
  63. static unsigned nb_numa_nodes = 0;
  64. static int numa_memory_nodes_to_hwloclogid[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in hwloc logid */
  65. static int numa_memory_nodes_to_physicalid[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in physical id */
  66. static unsigned numa_bus_id[STARPU_MAXNUMANODES*STARPU_MAXNUMANODES];
  67. static int _starpu_get_logical_numa_node_worker(unsigned workerid);
  68. #define STARPU_NUMA_UNINITIALIZED (-2)
  69. #define STARPU_NUMA_MAIN_RAM (-1)
  70. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  71. struct handle_entry
  72. {
  73. UT_hash_handle hh;
  74. unsigned gpuid;
  75. };
  76. # if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  77. /* Entry in the `devices_using_cuda' hash table. */
  78. static struct handle_entry *devices_using_cuda;
  79. # endif
  80. static unsigned may_bind_automatically[STARPU_NARCH] = { 0 };
  81. #endif // defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  82. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  83. static struct _starpu_worker_set cuda_worker_set[STARPU_MAXCUDADEVS];
  84. #endif
  85. #ifdef STARPU_USE_MIC
  86. static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
  87. #endif
  88. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  89. struct _starpu_worker_set mpi_worker_set[STARPU_MAXMPIDEVS];
  90. #endif
  91. int starpu_memory_nodes_get_numa_count(void)
  92. {
  93. return nb_numa_nodes;
  94. }
  95. #if defined(STARPU_HAVE_HWLOC)
  96. static hwloc_obj_t numa_get_obj(hwloc_obj_t obj)
  97. {
  98. #if HWLOC_API_VERSION >= 0x00020000
  99. while (obj->memory_first_child == NULL)
  100. {
  101. obj = obj->parent;
  102. if (!obj)
  103. return NULL;
  104. }
  105. return obj->memory_first_child;
  106. #else
  107. while (obj->type != HWLOC_OBJ_NUMANODE)
  108. {
  109. obj = obj->parent;
  110. /* If we don't find a "node" obj before the root, this means
  111. * hwloc does not know whether there are numa nodes or not, so
  112. * we should not use a per-node sampling in that case. */
  113. if (!obj)
  114. return NULL;
  115. }
  116. return obj;
  117. #endif
  118. }
  119. static int numa_get_logical_id(hwloc_obj_t obj)
  120. {
  121. STARPU_ASSERT(obj);
  122. obj = numa_get_obj(obj);
  123. if (!obj)
  124. return 0;
  125. return obj->logical_index;
  126. }
  127. static int numa_get_physical_id(hwloc_obj_t obj)
  128. {
  129. STARPU_ASSERT(obj);
  130. obj = numa_get_obj(obj);
  131. if (!obj)
  132. return 0;
  133. return obj->os_index;
  134. }
  135. #endif
  136. static int _starpu_get_logical_numa_node_worker(unsigned workerid)
  137. {
  138. #if defined(STARPU_HAVE_HWLOC)
  139. if (starpu_get_env_number_default("STARPU_USE_NUMA", 0))
  140. {
  141. struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
  142. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
  143. struct _starpu_machine_topology *topology = &config->topology ;
  144. hwloc_obj_t obj;
  145. switch(worker->arch)
  146. {
  147. case STARPU_CPU_WORKER:
  148. obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
  149. break;
  150. default:
  151. STARPU_ABORT();
  152. }
  153. return numa_get_logical_id(obj);
  154. }
  155. else
  156. #endif
  157. {
  158. (void) workerid; /* unused */
  159. return STARPU_NUMA_MAIN_RAM;
  160. }
  161. }
  162. static int _starpu_get_physical_numa_node_worker(unsigned workerid)
  163. {
  164. #if defined(STARPU_HAVE_HWLOC)
  165. if (starpu_get_env_number_default("STARPU_USE_NUMA", 0))
  166. {
  167. struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
  168. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
  169. struct _starpu_machine_topology *topology = &config->topology ;
  170. hwloc_obj_t obj;
  171. switch(worker->arch)
  172. {
  173. case STARPU_CPU_WORKER:
  174. obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
  175. break;
  176. default:
  177. STARPU_ABORT();
  178. }
  179. return numa_get_physical_id(obj);
  180. }
  181. else
  182. #endif
  183. {
  184. (void) workerid; /* unused */
  185. return STARPU_NUMA_MAIN_RAM;
  186. }
  187. }
  188. struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d)
  189. {
  190. unsigned nworkers = starpu_worker_get_count();
  191. unsigned workerid;
  192. for (workerid = 0; workerid < nworkers; workerid++)
  193. {
  194. if (starpu_worker_get_type(workerid) == d->type)
  195. {
  196. struct _starpu_worker *worker;
  197. worker = _starpu_get_worker_struct(workerid);
  198. switch (d->type)
  199. {
  200. #ifdef STARPU_USE_CPU
  201. case STARPU_CPU_WORKER:
  202. if (worker->devid == d->id.cpu_id)
  203. return worker;
  204. break;
  205. #endif
  206. #ifdef STARPU_USE_OPENCL
  207. case STARPU_OPENCL_WORKER:
  208. {
  209. cl_device_id device;
  210. starpu_opencl_get_device(worker->devid, &device);
  211. if (device == d->id.opencl_id)
  212. return worker;
  213. break;
  214. }
  215. #endif
  216. #ifdef STARPU_USE_CUDA
  217. case STARPU_CUDA_WORKER:
  218. {
  219. if (worker->devid == d->id.cuda_id)
  220. return worker;
  221. break;
  222. }
  223. #endif
  224. default:
  225. (void) worker;
  226. _STARPU_DEBUG("Invalid device type\n");
  227. return NULL;
  228. }
  229. }
  230. }
  231. return NULL;
  232. }
  233. /*
  234. * Discover the topology of the machine
  235. */
  236. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  237. static void
  238. _starpu_initialize_workers_deviceid (int *explicit_workers_gpuid,
  239. int *current, int *workers_gpuid,
  240. const char *varname, unsigned nhwgpus,
  241. enum starpu_worker_archtype type)
  242. {
  243. char *strval;
  244. unsigned i;
  245. *current = 0;
  246. /* conf->workers_gpuid indicates the successive GPU identifier that
  247. * should be used to bind the workers. It should be either filled
  248. * according to the user's explicit parameters (from starpu_conf) or
  249. * according to the STARPU_WORKERS_CUDAID env. variable. Otherwise, a
  250. * round-robin policy is used to distributed the workers over the
  251. * cores. */
  252. /* what do we use, explicit value, env. variable, or round-robin ? */
  253. strval = starpu_getenv(varname);
  254. if (strval)
  255. {
  256. /* STARPU_WORKERS_CUDAID certainly contains less entries than
  257. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  258. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  259. * 2". */
  260. unsigned wrap = 0;
  261. unsigned number_of_entries = 0;
  262. char *endptr;
  263. /* we use the content of the STARPU_WORKERS_CUDAID
  264. * env. variable */
  265. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  266. {
  267. if (!wrap)
  268. {
  269. long int val;
  270. val = strtol(strval, &endptr, 10);
  271. if (endptr != strval)
  272. {
  273. workers_gpuid[i] = (unsigned)val;
  274. strval = endptr;
  275. }
  276. else
  277. {
  278. /* there must be at least one entry */
  279. STARPU_ASSERT(i != 0);
  280. number_of_entries = i;
  281. /* there is no more values in the
  282. * string */
  283. wrap = 1;
  284. workers_gpuid[i] = workers_gpuid[0];
  285. }
  286. }
  287. else
  288. {
  289. workers_gpuid[i] =
  290. workers_gpuid[i % number_of_entries];
  291. }
  292. }
  293. }
  294. else if (explicit_workers_gpuid)
  295. {
  296. /* we use the explicit value from the user */
  297. memcpy(workers_gpuid,
  298. explicit_workers_gpuid,
  299. STARPU_NMAXWORKERS*sizeof(unsigned));
  300. }
  301. else
  302. {
  303. /* by default, we take a round robin policy */
  304. if (nhwgpus > 0)
  305. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  306. workers_gpuid[i] = (unsigned)(i % nhwgpus);
  307. /* StarPU can use sampling techniques to bind threads
  308. * correctly */
  309. may_bind_automatically[type] = 1;
  310. }
  311. }
  312. #endif
  313. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  314. static void
  315. _starpu_initialize_workers_cuda_gpuid (struct _starpu_machine_config *config)
  316. {
  317. struct _starpu_machine_topology *topology = &config->topology;
  318. struct starpu_conf *uconf = &config->conf;
  319. _starpu_initialize_workers_deviceid (
  320. uconf->use_explicit_workers_cuda_gpuid == 0
  321. ? NULL
  322. : (int *)uconf->workers_cuda_gpuid,
  323. &(config->current_cuda_gpuid),
  324. (int *)topology->workers_cuda_gpuid,
  325. "STARPU_WORKERS_CUDAID",
  326. topology->nhwcudagpus,
  327. STARPU_CUDA_WORKER);
  328. }
  329. static inline int
  330. _starpu_get_next_cuda_gpuid (struct _starpu_machine_config *config)
  331. {
  332. unsigned i =
  333. ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
  334. return (int)config->topology.workers_cuda_gpuid[i];
  335. }
  336. #endif
  337. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  338. static void
  339. _starpu_initialize_workers_opencl_gpuid (struct _starpu_machine_config*config)
  340. {
  341. struct _starpu_machine_topology *topology = &config->topology;
  342. struct starpu_conf *uconf = &config->conf;
  343. _starpu_initialize_workers_deviceid(
  344. uconf->use_explicit_workers_opencl_gpuid == 0
  345. ? NULL
  346. : (int *)uconf->workers_opencl_gpuid,
  347. &(config->current_opencl_gpuid),
  348. (int *)topology->workers_opencl_gpuid,
  349. "STARPU_WORKERS_OPENCLID",
  350. topology->nhwopenclgpus,
  351. STARPU_OPENCL_WORKER);
  352. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  353. // Detect devices which are already used with CUDA
  354. {
  355. unsigned tmp[STARPU_NMAXWORKERS];
  356. unsigned nb=0;
  357. int i;
  358. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  359. {
  360. struct handle_entry *entry;
  361. int devid = config->topology.workers_opencl_gpuid[i];
  362. HASH_FIND_INT(devices_using_cuda, &devid, entry);
  363. if (entry == NULL)
  364. {
  365. tmp[nb] = topology->workers_opencl_gpuid[i];
  366. nb++;
  367. }
  368. }
  369. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  370. tmp[i] = -1;
  371. memcpy (topology->workers_opencl_gpuid, tmp,
  372. sizeof(unsigned)*STARPU_NMAXWORKERS);
  373. }
  374. #endif /* STARPU_USE_CUDA */
  375. {
  376. // Detect identical devices
  377. struct handle_entry *devices_already_used = NULL;
  378. unsigned tmp[STARPU_NMAXWORKERS];
  379. unsigned nb=0;
  380. int i;
  381. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  382. {
  383. int devid = topology->workers_opencl_gpuid[i];
  384. struct handle_entry *entry;
  385. HASH_FIND_INT(devices_already_used, &devid, entry);
  386. if (entry == NULL)
  387. {
  388. struct handle_entry *entry2;
  389. _STARPU_MALLOC(entry2, sizeof(*entry2));
  390. entry2->gpuid = devid;
  391. HASH_ADD_INT(devices_already_used, gpuid,
  392. entry2);
  393. tmp[nb] = devid;
  394. nb ++;
  395. }
  396. }
  397. struct handle_entry *entry, *tempo;
  398. HASH_ITER(hh, devices_already_used, entry, tempo)
  399. {
  400. HASH_DEL(devices_already_used, entry);
  401. free(entry);
  402. }
  403. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  404. tmp[i] = -1;
  405. memcpy (topology->workers_opencl_gpuid, tmp,
  406. sizeof(unsigned)*STARPU_NMAXWORKERS);
  407. }
  408. }
  409. static inline int
  410. _starpu_get_next_opencl_gpuid (struct _starpu_machine_config *config)
  411. {
  412. unsigned i =
  413. ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
  414. return (int)config->topology.workers_opencl_gpuid[i];
  415. }
  416. #endif
  417. #if 0
  418. #if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
  419. static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
  420. {
  421. struct _starpu_machine_topology *topology = &config->topology;
  422. struct starpu_conf *uconf = &config->conf;
  423. _starpu_initialize_workers_deviceid(
  424. uconf->use_explicit_workers_mic_deviceid == 0
  425. ? NULL
  426. : (int *)config->user_conf->workers_mic_deviceid,
  427. &(config->current_mic_deviceid),
  428. (int *)topology->workers_mic_deviceid,
  429. "STARPU_WORKERS_MICID",
  430. topology->nhwmiccores,
  431. STARPU_MIC_WORKER);
  432. }
  433. #endif
  434. #endif
  435. #ifdef STARPU_USE_SCC
  436. static void _starpu_initialize_workers_scc_deviceid(struct _starpu_machine_config *config)
  437. {
  438. struct _starpu_machine_topology *topology = &config->topology;
  439. struct starpu_conf *uconf = &config->conf;
  440. _starpu_initialize_workers_deviceid(
  441. uconf->use_explicit_workers_scc_deviceid == 0
  442. ? NULL
  443. : (int *) uconf->workers_scc_deviceid,
  444. &(config->current_scc_deviceid),
  445. (int *)topology->workers_scc_deviceid,
  446. "STARPU_WORKERS_SCCID",
  447. topology->nhwscc,
  448. STARPU_SCC_WORKER);
  449. }
  450. #endif /* STARPU_USE_SCC */
  451. #if 0
  452. #ifdef STARPU_USE_MIC
  453. static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
  454. {
  455. unsigned i = ((config->current_mic_deviceid++) % config->topology.nmicdevices);
  456. return (int)config->topology.workers_mic_deviceid[i];
  457. }
  458. #endif
  459. #endif
  460. #ifdef STARPU_USE_SCC
  461. static inline int _starpu_get_next_scc_deviceid(struct _starpu_machine_config *config)
  462. {
  463. unsigned i = ((config->current_scc_deviceid++) % config->topology.nsccdevices);
  464. return (int)config->topology.workers_scc_deviceid[i];
  465. }
  466. #endif
  467. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  468. static inline int _starpu_get_next_mpi_deviceid(struct _starpu_machine_config *config)
  469. {
  470. unsigned i = ((config->current_mpi_deviceid++) % config->topology.nmpidevices);
  471. return (int)config->topology.workers_mpi_ms_deviceid[i];
  472. }
  473. static void
  474. _starpu_init_mpi_topology (struct _starpu_machine_config *config, long mpi_idx)
  475. {
  476. /* Discover the topology of the mpi node identifier by MPI_IDX. That
  477. * means, make this StarPU instance aware of the number of cores available
  478. * on this MPI device. Update the `nhwmpicores' topology field
  479. * accordingly. */
  480. struct _starpu_machine_topology *topology = &config->topology;
  481. int nbcores;
  482. _starpu_src_common_sink_nbcores (_starpu_mpi_ms_nodes[mpi_idx], &nbcores);
  483. topology->nhwmpicores[mpi_idx] = nbcores;
  484. }
  485. #endif /* STARPU_USE_MPI_MASTER_SLAVE */
  486. #ifdef STARPU_USE_MIC
  487. static void
  488. _starpu_init_mic_topology (struct _starpu_machine_config *config, long mic_idx)
  489. {
  490. /* Discover the topology of the mic node identifier by MIC_IDX. That
  491. * means, make this StarPU instance aware of the number of cores available
  492. * on this MIC device. Update the `nhwmiccores' topology field
  493. * accordingly. */
  494. struct _starpu_machine_topology *topology = &config->topology;
  495. int nbcores;
  496. _starpu_src_common_sink_nbcores (_starpu_mic_nodes[mic_idx], &nbcores);
  497. topology->nhwmiccores[mic_idx] = nbcores;
  498. }
  499. static int
  500. _starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
  501. COIENGINE *coi_handle, COIPROCESS *coi_process)
  502. {
  503. /* Initialize the MIC node of index MIC_IDX. */
  504. struct starpu_conf *user_conf = &config->conf;
  505. char ***argv = _starpu_get_argv();
  506. const char *suffixes[] = {"-mic", "_mic", NULL};
  507. /* Environment variables to send to the Sink, it informs it what kind
  508. * of node it is (architecture and type) as there is no way to discover
  509. * it itself */
  510. char mic_idx_env[32];
  511. snprintf(mic_idx_env, sizeof(mic_idx_env), "_STARPU_MIC_DEVID=%d", mic_idx);
  512. /* XXX: this is currently necessary so that the remote process does not
  513. * segfault. */
  514. char nb_mic_env[32];
  515. snprintf(nb_mic_env, sizeof(nb_mic_env), "_STARPU_MIC_NB=%d", 2);
  516. const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", mic_idx_env, nb_mic_env, NULL};
  517. char mic_sink_program_path[1024];
  518. /* Let's get the helper program to run on the MIC device */
  519. int mic_file_found =
  520. _starpu_src_common_locate_file (mic_sink_program_path,
  521. sizeof(mic_sink_program_path),
  522. starpu_getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
  523. starpu_getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
  524. user_conf->mic_sink_program_path,
  525. (argv ? (*argv)[0] : NULL),
  526. suffixes);
  527. if (0 != mic_file_found)
  528. {
  529. _STARPU_MSG("No MIC program specified, use the environment\n"
  530. "variable STARPU_MIC_SINK_PROGRAM_NAME or the environment\n"
  531. "or the field 'starpu_conf.mic_sink_program_path'\n"
  532. "to define it.\n");
  533. return -1;
  534. }
  535. COIRESULT res;
  536. /* Let's get the handle which let us manage the remote MIC device */
  537. res = COIEngineGetHandle(COI_ISA_MIC, mic_idx, coi_handle);
  538. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  539. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  540. /* We launch the helper on the MIC device, which will wait for us
  541. * to give it work to do.
  542. * As we will communicate further with the device throught scif we
  543. * don't need to keep the process pointer */
  544. res = COIProcessCreateFromFile(*coi_handle, mic_sink_program_path, 0, NULL, 0,
  545. mic_sink_env, 1, NULL, 0, NULL,
  546. coi_process);
  547. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  548. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  549. /* Let's create the node structure, we'll communicate with the peer
  550. * through scif thanks to it */
  551. _starpu_mic_nodes[mic_idx] =
  552. _starpu_mp_common_node_create(STARPU_NODE_MIC_SOURCE, mic_idx);
  553. return 0;
  554. }
  555. #endif
  556. #ifndef STARPU_SIMGRID
  557. #ifdef STARPU_HAVE_HWLOC
  558. static void
  559. _starpu_allocate_topology_userdata(hwloc_obj_t obj)
  560. {
  561. unsigned i;
  562. _STARPU_CALLOC(obj->userdata, 1, sizeof(struct _starpu_hwloc_userdata));
  563. for (i = 0; i < obj->arity; i++)
  564. _starpu_allocate_topology_userdata(obj->children[i]);
  565. #if HWLOC_API_VERSION >= 0x00020000
  566. hwloc_obj_t child;
  567. for (child = obj->io_first_child; child; child = child->next_sibling)
  568. _starpu_allocate_topology_userdata(child);
  569. #endif
  570. }
  571. static void
  572. _starpu_deallocate_topology_userdata(hwloc_obj_t obj)
  573. {
  574. unsigned i;
  575. struct _starpu_hwloc_userdata *data = obj->userdata;
  576. STARPU_ASSERT(!data->worker_list || data->worker_list == (void*)-1);
  577. free(data);
  578. for (i = 0; i < obj->arity; i++)
  579. _starpu_deallocate_topology_userdata(obj->children[i]);
  580. #if HWLOC_API_VERSION >= 0x00020000
  581. hwloc_obj_t child;
  582. for (child = obj->io_first_child; child; child = child->next_sibling)
  583. _starpu_deallocate_topology_userdata(child);
  584. #endif
  585. }
  586. #endif
  587. #endif
  588. static void
  589. _starpu_init_topology (struct _starpu_machine_config *config)
  590. {
  591. /* Discover the topology, meaning finding all the available PUs for
  592. the compiled drivers. These drivers MUST have been initialized
  593. before calling this function. The discovered topology is filled in
  594. CONFIG. */
  595. struct _starpu_machine_topology *topology = &config->topology;
  596. if (topology_is_initialized)
  597. return;
  598. nobind = starpu_get_env_number("STARPU_WORKERS_NOBIND");
  599. topology->nhwcpus = 0;
  600. topology->nhwpus = 0;
  601. #ifndef STARPU_SIMGRID
  602. #ifdef STARPU_HAVE_HWLOC
  603. hwloc_topology_init(&topology->hwtopology);
  604. _starpu_topology_filter(topology->hwtopology);
  605. hwloc_topology_load(topology->hwtopology);
  606. _starpu_allocate_topology_userdata(hwloc_get_root_obj(topology->hwtopology));
  607. #endif
  608. #endif
  609. #ifdef STARPU_SIMGRID
  610. config->topology.nhwcpus = config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
  611. #elif defined(STARPU_HAVE_HWLOC)
  612. /* Discover the CPUs relying on the hwloc interface and fills CONFIG
  613. * accordingly. */
  614. config->cpu_depth = hwloc_get_type_depth (topology->hwtopology,
  615. HWLOC_OBJ_CORE);
  616. config->pu_depth = hwloc_get_type_depth (topology->hwtopology,
  617. HWLOC_OBJ_PU);
  618. /* Would be very odd */
  619. STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE);
  620. if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN)
  621. {
  622. /* unknown, using logical procesors as fallback */
  623. _STARPU_DISP("Warning: The OS did not report CPU cores. Assuming there is only one hardware thread per core.\n");
  624. config->cpu_depth = hwloc_get_type_depth(topology->hwtopology,
  625. HWLOC_OBJ_PU);
  626. }
  627. topology->nhwcpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
  628. config->cpu_depth);
  629. topology->nhwpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
  630. config->pu_depth);
  631. #elif defined(HAVE_SYSCONF)
  632. /* Discover the CPUs relying on the sysconf(3) function and fills
  633. * CONFIG accordingly. */
  634. config->topology.nhwcpus = config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
  635. #elif defined(_WIN32)
  636. /* Discover the CPUs on Cygwin and MinGW systems. */
  637. SYSTEM_INFO sysinfo;
  638. GetSystemInfo(&sysinfo);
  639. config->topology.nhwcpus = config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
  640. #else
  641. #warning no way to know number of cores, assuming 1
  642. config->topology.nhwcpus = config->topology.nhwpus = 1;
  643. #endif
  644. _starpu_cuda_discover_devices(config);
  645. _starpu_opencl_discover_devices(config);
  646. #ifdef STARPU_USE_SCC
  647. config->topology.nhwscc = _starpu_scc_src_get_device_count();
  648. #endif
  649. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  650. config->topology.nhwmpi = _starpu_mpi_src_get_device_count();
  651. #endif
  652. topology_is_initialized = 1;
  653. }
  654. /*
  655. * Bind workers on the different processors
  656. */
  657. static void
  658. _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
  659. {
  660. char *strval;
  661. unsigned i;
  662. struct _starpu_machine_topology *topology = &config->topology;
  663. config->current_bindid = 0;
  664. /* conf->workers_bindid indicates the successive logical PU identifier that
  665. * should be used to bind the workers. It should be either filled
  666. * according to the user's explicit parameters (from starpu_conf) or
  667. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  668. * round-robin policy is used to distributed the workers over the
  669. * cores. */
  670. /* what do we use, explicit value, env. variable, or round-robin ? */
  671. strval = starpu_getenv("STARPU_WORKERS_CPUID");
  672. if (strval)
  673. {
  674. /* STARPU_WORKERS_CPUID certainly contains less entries than
  675. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  676. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  677. * 2". */
  678. unsigned wrap = 0;
  679. unsigned number_of_entries = 0;
  680. char *endptr;
  681. /* we use the content of the STARPU_WORKERS_CPUID
  682. * env. variable */
  683. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  684. {
  685. if (!wrap)
  686. {
  687. long int val;
  688. val = strtol(strval, &endptr, 10);
  689. if (endptr != strval)
  690. {
  691. topology->workers_bindid[i] =
  692. (unsigned)(val % topology->nhwpus);
  693. strval = endptr;
  694. if (*strval == '-')
  695. {
  696. /* range of values */
  697. long int endval;
  698. strval++;
  699. if (*strval && *strval != ' ' && *strval != ',')
  700. {
  701. endval = strtol(strval, &endptr, 10);
  702. strval = endptr;
  703. }
  704. else
  705. {
  706. endval = topology->nhwpus-1;
  707. if (*strval)
  708. strval++;
  709. }
  710. for (val++; val <= endval && i < STARPU_NMAXWORKERS-1; val++)
  711. {
  712. i++;
  713. topology->workers_bindid[i] =
  714. (unsigned)(val % topology->nhwpus);
  715. }
  716. }
  717. if (*strval == ',')
  718. strval++;
  719. }
  720. else
  721. {
  722. /* there must be at least one entry */
  723. STARPU_ASSERT(i != 0);
  724. number_of_entries = i;
  725. /* there is no more values in the
  726. * string */
  727. wrap = 1;
  728. topology->workers_bindid[i] =
  729. topology->workers_bindid[0];
  730. }
  731. }
  732. else
  733. {
  734. topology->workers_bindid[i] =
  735. topology->workers_bindid[i % number_of_entries];
  736. }
  737. }
  738. }
  739. else if (config->conf.use_explicit_workers_bindid)
  740. {
  741. /* we use the explicit value from the user */
  742. memcpy(topology->workers_bindid,
  743. config->conf.workers_bindid,
  744. STARPU_NMAXWORKERS*sizeof(unsigned));
  745. }
  746. else
  747. {
  748. int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
  749. int k;
  750. int nbindids=0;
  751. int nhyperthreads = topology->nhwpus / topology->nhwcpus;
  752. STARPU_ASSERT_MSG(nth_per_core > 0 && nth_per_core <= nhyperthreads , "Incorrect number of hyperthreads");
  753. i = 0; /* PU number currently assigned */
  754. k = 0; /* Number of threads already put on the current core */
  755. while(nbindids < STARPU_NMAXWORKERS)
  756. {
  757. if (k >= nth_per_core)
  758. {
  759. /* We have already put enough workers on this
  760. * core, skip remaining PUs from this core, and
  761. * proceed with next core */
  762. i += nhyperthreads-nth_per_core;
  763. k = 0;
  764. continue;
  765. }
  766. /* Add a worker to this core, by using this logical PU */
  767. topology->workers_bindid[nbindids++] =
  768. (unsigned)(i % topology->nhwpus);
  769. k++;
  770. i++;
  771. }
  772. }
  773. for (i = 0; i < STARPU_MAXCPUS;i++)
  774. cpu_worker[i] = STARPU_NOWORKERID;
  775. /* no binding yet */
  776. memset(&config->currently_bound, 0, sizeof(config->currently_bound));
  777. }
  778. /* This function gets the identifier of the next core on which to bind a
  779. * worker. In case a list of preferred cores was specified (logical indexes),
  780. * we look for a an available core among the list if possible, otherwise a
  781. * round-robin policy is used. */
  782. static inline int
  783. _starpu_get_next_bindid (struct _starpu_machine_config *config,
  784. int *preferred_binding, int npreferred)
  785. {
  786. struct _starpu_machine_topology *topology = &config->topology;
  787. int current_preferred;
  788. int nhyperthreads = topology->nhwpus / topology->nhwcpus;
  789. unsigned i;
  790. if (npreferred)
  791. {
  792. STARPU_ASSERT_MSG(preferred_binding, "Passing NULL pointer for parameter preferred_binding with a non-0 value of parameter npreferred");
  793. }
  794. /* loop over the preference list */
  795. for (current_preferred = 0;
  796. current_preferred < npreferred;
  797. current_preferred++)
  798. {
  799. /* Try to get this core */
  800. unsigned requested_core = preferred_binding[current_preferred];
  801. unsigned requested_bindid = requested_core * nhyperthreads;
  802. /* can we bind the worker on the preferred core ? */
  803. unsigned ind;
  804. /* Look at the remaining cores to be bound to */
  805. for (ind = 0;
  806. ind < topology->nhwpus / nhyperthreads;
  807. ind++)
  808. {
  809. if (topology->workers_bindid[ind] == requested_bindid && !config->currently_bound[ind])
  810. {
  811. /* the cpu is available, we use it ! */
  812. config->currently_bound[ind] = 1;
  813. return requested_bindid;
  814. }
  815. }
  816. }
  817. for (i = config->current_bindid; i < topology->nhwpus / nhyperthreads; i++)
  818. if (!config->currently_bound[i])
  819. /* Found a cpu ready for use, use it! */
  820. break;
  821. STARPU_ASSERT(i < topology->nhwpus / nhyperthreads);
  822. int bindid = topology->workers_bindid[i];
  823. config->currently_bound[i] = 1;
  824. i++;
  825. if (i == topology->nhwpus / nhyperthreads)
  826. {
  827. /* Finished binding on all cpus, restart from start in
  828. * case the user really wants overloading */
  829. memset(&config->currently_bound, 0, sizeof(config->currently_bound));
  830. i = 0;
  831. }
  832. config->current_bindid = i;
  833. return bindid;
  834. }
  835. unsigned
  836. _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
  837. {
  838. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  839. _starpu_opencl_init();
  840. #endif
  841. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  842. _starpu_init_cuda();
  843. #endif
  844. _starpu_init_topology(config);
  845. return config->topology.nhwcpus;
  846. }
  847. unsigned
  848. _starpu_topology_get_nhwpu (struct _starpu_machine_config *config)
  849. {
  850. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  851. _starpu_opencl_init();
  852. #endif
  853. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  854. _starpu_init_cuda();
  855. #endif
  856. _starpu_init_topology(config);
  857. return config->topology.nhwpus;
  858. }
  859. unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED)
  860. {
  861. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  862. _starpu_opencl_init();
  863. #endif
  864. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  865. _starpu_init_cuda();
  866. #endif
  867. _starpu_init_topology(config);
  868. int res;
  869. #if defined(STARPU_HAVE_HWLOC)
  870. if (starpu_get_env_number_default("STARPU_USE_NUMA", 0))
  871. {
  872. struct _starpu_machine_topology *topology = &config->topology ;
  873. int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NUMANODE) ;
  874. res = nnumanodes > 0 ? nnumanodes : 1 ;
  875. }
  876. else
  877. #endif
  878. {
  879. res = 1;
  880. }
  881. STARPU_ASSERT_MSG(res <= STARPU_MAXNUMANODES, "Number of NUMA nodes discovered is higher than maximum accepted ! Use configure option --enable-maxnumanodes=xxx to increase the maximum value of supported NUMA nodes.\n");
  882. return res;
  883. }
  884. //TODO change this in an array
  885. int starpu_memory_nodes_numa_hwloclogid_to_id(int logid)
  886. {
  887. unsigned n;
  888. for (n = 0; n < nb_numa_nodes; n++)
  889. if (numa_memory_nodes_to_hwloclogid[n] == logid)
  890. return n;
  891. return -1;
  892. }
  893. int starpu_memory_nodes_numa_id_to_hwloclogid(unsigned id)
  894. {
  895. STARPU_ASSERT(id < STARPU_MAXNUMANODES);
  896. return numa_memory_nodes_to_hwloclogid[id];
  897. }
  898. int starpu_memory_nodes_numa_devid_to_id(unsigned id)
  899. {
  900. STARPU_ASSERT(id < STARPU_MAXNUMANODES);
  901. return numa_memory_nodes_to_physicalid[id];
  902. }
  903. //TODO change this in an array
  904. int starpu_memory_nodes_numa_id_to_devid(int osid)
  905. {
  906. unsigned n;
  907. for (n = 0; n < nb_numa_nodes; n++)
  908. if (numa_memory_nodes_to_physicalid[n] == osid)
  909. return n;
  910. return -1;
  911. }
  912. #ifdef STARPU_HAVE_HWLOC
  913. void _starpu_topology_filter(hwloc_topology_t topology)
  914. {
  915. #if HWLOC_API_VERSION >= 0x20000
  916. hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
  917. #else
  918. hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
  919. #endif
  920. }
  921. #endif
  922. #ifdef STARPU_USE_MIC
  923. static void
  924. _starpu_init_mic_config (struct _starpu_machine_config *config,
  925. struct starpu_conf *user_conf,
  926. unsigned mic_idx)
  927. {
  928. // Configure the MIC device of index MIC_IDX.
  929. struct _starpu_machine_topology *topology = &config->topology;
  930. topology->nhwmiccores[mic_idx] = 0;
  931. _starpu_init_mic_topology (config, mic_idx);
  932. int nmiccores;
  933. nmiccores = starpu_get_env_number("STARPU_NMICTHREADS");
  934. if (nmiccores == -1)
  935. {
  936. /* Nothing was specified, so let's use the number of
  937. * detected mic cores. ! */
  938. nmiccores = topology->nhwmiccores[mic_idx];
  939. }
  940. else
  941. {
  942. if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
  943. {
  944. /* The user requires more MIC cores than there is available */
  945. _STARPU_MSG("# Warning: %d MIC cores requested. Only %u available.\n", nmiccores, topology->nhwmiccores[mic_idx]);
  946. nmiccores = topology->nhwmiccores[mic_idx];
  947. }
  948. }
  949. topology->nmiccores[mic_idx] = nmiccores;
  950. STARPU_ASSERT_MSG(topology->nmiccores[mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
  951. "topology->nmiccores[mic_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
  952. mic_idx, topology->nmiccores[mic_idx], topology->nworkers, STARPU_NMAXWORKERS);
  953. /* _starpu_initialize_workers_mic_deviceid (config); */
  954. mic_worker_set[mic_idx].workers = &config->workers[topology->nworkers];
  955. mic_worker_set[mic_idx].nworkers = topology->nmiccores[mic_idx];
  956. unsigned miccore_id;
  957. for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
  958. {
  959. int worker_idx = topology->nworkers + miccore_id;
  960. config->workers[worker_idx].set = &mic_worker_set[mic_idx];
  961. config->workers[worker_idx].arch = STARPU_MIC_WORKER;
  962. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  963. config->workers[worker_idx].perf_arch.ndevices = 1;
  964. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_MIC_WORKER;
  965. config->workers[worker_idx].perf_arch.devices[0].devid = mic_idx;
  966. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  967. config->workers[worker_idx].devid = mic_idx;
  968. config->workers[worker_idx].subworkerid = miccore_id;
  969. config->workers[worker_idx].worker_mask = STARPU_MIC;
  970. config->worker_mask |= STARPU_MIC;
  971. }
  972. _starpu_mic_nodes[mic_idx]->baseworkerid = topology->nworkers;
  973. topology->nworkers += topology->nmiccores[mic_idx];
  974. }
  975. static COIENGINE mic_handles[STARPU_MAXMICDEVS];
  976. COIPROCESS _starpu_mic_process[STARPU_MAXMICDEVS];
  977. #endif
  978. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  979. static void
  980. _starpu_init_mpi_config (struct _starpu_machine_config *config,
  981. struct starpu_conf *user_conf,
  982. unsigned mpi_idx)
  983. {
  984. struct _starpu_machine_topology *topology = &config->topology;
  985. topology->nhwmpicores[mpi_idx] = 0;
  986. _starpu_init_mpi_topology (config, mpi_idx);
  987. int nmpicores;
  988. nmpicores = starpu_get_env_number("STARPU_NMPIMSTHREADS");
  989. if (nmpicores == -1)
  990. {
  991. /* Nothing was specified, so let's use the number of
  992. * detected mpi cores. ! */
  993. nmpicores = topology->nhwmpicores[mpi_idx];
  994. }
  995. else
  996. {
  997. if ((unsigned) nmpicores > topology->nhwmpicores[mpi_idx])
  998. {
  999. /* The user requires more MPI cores than there is available */
  1000. _STARPU_MSG("# Warning: %d MPI cores requested. Only %u available.\n",
  1001. nmpicores, topology->nhwmpicores[mpi_idx]);
  1002. nmpicores = topology->nhwmpicores[mpi_idx];
  1003. }
  1004. }
  1005. topology->nmpicores[mpi_idx] = nmpicores;
  1006. STARPU_ASSERT_MSG(topology->nmpicores[mpi_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
  1007. "topology->nmpicores[mpi_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
  1008. mpi_idx, topology->nmpicores[mpi_idx], topology->nworkers, STARPU_NMAXWORKERS);
  1009. mpi_worker_set[mpi_idx].workers = &config->workers[topology->nworkers];
  1010. mpi_worker_set[mpi_idx].nworkers = topology->nmpicores[mpi_idx];
  1011. unsigned mpicore_id;
  1012. for (mpicore_id = 0; mpicore_id < topology->nmpicores[mpi_idx]; mpicore_id++)
  1013. {
  1014. int worker_idx = topology->nworkers + mpicore_id;
  1015. config->workers[worker_idx].set = &mpi_worker_set[mpi_idx];
  1016. config->workers[worker_idx].arch = STARPU_MPI_MS_WORKER;
  1017. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1018. config->workers[worker_idx].perf_arch.ndevices = 1;
  1019. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_MPI_MS_WORKER;
  1020. config->workers[worker_idx].perf_arch.devices[0].devid = mpi_idx;
  1021. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1022. config->workers[worker_idx].devid = mpi_idx;
  1023. config->workers[worker_idx].subworkerid = mpicore_id;
  1024. config->workers[worker_idx].worker_mask = STARPU_MPI_MS;
  1025. config->worker_mask |= STARPU_MPI_MS;
  1026. }
  1027. _starpu_mpi_ms_nodes[mpi_idx]->baseworkerid = topology->nworkers;
  1028. topology->nworkers += topology->nmpicores[mpi_idx];
  1029. }
  1030. #endif
  1031. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  1032. static void
  1033. _starpu_init_mp_config (struct _starpu_machine_config *config,
  1034. struct starpu_conf *user_conf, int no_mp_config)
  1035. {
  1036. /* Discover and configure the mp topology. That means:
  1037. * - discover the number of mp nodes;
  1038. * - initialize each discovered node;
  1039. * - discover the local topology (number of PUs/devices) of each node;
  1040. * - configure the workers accordingly.
  1041. */
  1042. #ifdef STARPU_USE_MIC
  1043. if (!no_mp_config)
  1044. {
  1045. struct _starpu_machine_topology *topology = &config->topology;
  1046. /* Discover and initialize the number of MIC nodes through the mp
  1047. * infrastructure. */
  1048. unsigned nhwmicdevices = _starpu_mic_src_get_device_count();
  1049. int reqmicdevices = starpu_get_env_number("STARPU_NMIC");
  1050. if (reqmicdevices == -1 && user_conf)
  1051. reqmicdevices = user_conf->nmic;
  1052. if (reqmicdevices == -1)
  1053. /* Nothing was specified, so let's use the number of
  1054. * detected mic devices. ! */
  1055. reqmicdevices = nhwmicdevices;
  1056. if (reqmicdevices != -1)
  1057. {
  1058. if ((unsigned) reqmicdevices > nhwmicdevices)
  1059. {
  1060. /* The user requires more MIC devices than there is available */
  1061. _STARPU_MSG("# Warning: %d MIC devices requested. Only %u available.\n", reqmicdevices, nhwmicdevices);
  1062. reqmicdevices = nhwmicdevices;
  1063. }
  1064. }
  1065. topology->nmicdevices = 0;
  1066. unsigned i;
  1067. for (i = 0; i < (unsigned) reqmicdevices; i++)
  1068. if (0 == _starpu_init_mic_node (config, i, &mic_handles[i], &_starpu_mic_process[i]))
  1069. topology->nmicdevices++;
  1070. for (i = 0; i < topology->nmicdevices; i++)
  1071. _starpu_init_mic_config (config, user_conf, i);
  1072. }
  1073. #endif
  1074. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1075. {
  1076. struct _starpu_machine_topology *topology = &config->topology;
  1077. /* Discover and initialize the number of MPI nodes through the mp
  1078. * infrastructure. */
  1079. unsigned nhwmpidevices = _starpu_mpi_src_get_device_count();
  1080. int reqmpidevices = starpu_get_env_number("STARPU_NMPI_MS");
  1081. if (reqmpidevices == -1 && user_conf)
  1082. reqmpidevices = user_conf->nmpi_ms;
  1083. if (reqmpidevices == -1)
  1084. /* Nothing was specified, so let's use the number of
  1085. * detected mpi devices. ! */
  1086. reqmpidevices = nhwmpidevices;
  1087. if (reqmpidevices != -1)
  1088. {
  1089. if ((unsigned) reqmpidevices > nhwmpidevices)
  1090. {
  1091. /* The user requires more MPI devices than there is available */
  1092. _STARPU_MSG("# Warning: %d MPI Master-Slave devices requested. Only %u available.\n",
  1093. reqmpidevices, nhwmpidevices);
  1094. reqmpidevices = nhwmpidevices;
  1095. }
  1096. }
  1097. topology->nmpidevices = reqmpidevices;
  1098. /* if user don't want to use MPI slaves, we close the slave processes */
  1099. if (no_mp_config && topology->nmpidevices == 0)
  1100. {
  1101. _starpu_mpi_common_mp_deinit();
  1102. exit(0);
  1103. }
  1104. if (!no_mp_config)
  1105. {
  1106. unsigned i;
  1107. for (i = 0; i < topology->nmpidevices; i++)
  1108. _starpu_mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
  1109. for (i = 0; i < topology->nmpidevices; i++)
  1110. _starpu_init_mpi_config (config, user_conf, i);
  1111. }
  1112. }
  1113. #endif
  1114. }
  1115. #endif
  1116. #ifdef STARPU_USE_MIC
  1117. static void
  1118. _starpu_deinit_mic_node (unsigned mic_idx)
  1119. {
  1120. _starpu_mp_common_send_command(_starpu_mic_nodes[mic_idx], STARPU_MP_COMMAND_EXIT, NULL, 0);
  1121. COIProcessDestroy(_starpu_mic_process[mic_idx], -1, 0, NULL, NULL);
  1122. _starpu_mp_common_node_destroy(_starpu_mic_nodes[mic_idx]);
  1123. }
  1124. #endif
  1125. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1126. static void _starpu_deinit_mpi_node(int devid)
  1127. {
  1128. _starpu_mp_common_send_command(_starpu_mpi_ms_nodes[devid], STARPU_MP_COMMAND_EXIT, NULL, 0);
  1129. _starpu_mp_common_node_destroy(_starpu_mpi_ms_nodes[devid]);
  1130. }
  1131. #endif
  1132. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  1133. static void
  1134. _starpu_deinit_mp_config (struct _starpu_machine_config *config)
  1135. {
  1136. struct _starpu_machine_topology *topology = &config->topology;
  1137. unsigned i;
  1138. #ifdef STARPU_USE_MIC
  1139. for (i = 0; i < topology->nmicdevices; i++)
  1140. _starpu_deinit_mic_node (i);
  1141. _starpu_mic_clear_kernels();
  1142. #endif
  1143. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1144. for (i = 0; i < topology->nmpidevices; i++)
  1145. _starpu_deinit_mpi_node (i);
  1146. #endif
  1147. }
  1148. #endif
  1149. #ifdef STARPU_HAVE_HWLOC
  1150. static unsigned
  1151. _starpu_topology_count_ngpus(hwloc_obj_t obj)
  1152. {
  1153. struct _starpu_hwloc_userdata *data = obj->userdata;
  1154. unsigned n = data->ngpus;
  1155. unsigned i;
  1156. for (i = 0; i < obj->arity; i++)
  1157. n += _starpu_topology_count_ngpus(obj->children[i]);
  1158. data->ngpus = n;
  1159. //#ifdef STARPU_VERBOSE
  1160. // {
  1161. // char name[64];
  1162. // hwloc_obj_type_snprintf(name, sizeof(name), obj, 0);
  1163. // _STARPU_DEBUG("hwloc obj %s has %u GPUs below\n", name, n);
  1164. // }
  1165. //#endif
  1166. return n;
  1167. }
  1168. #endif
  1169. static int
  1170. _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
  1171. {
  1172. int i;
  1173. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  1174. {
  1175. config->workers[i].workerid = i;
  1176. config->workers[i].set = NULL;
  1177. }
  1178. struct _starpu_machine_topology *topology = &config->topology;
  1179. topology->nworkers = 0;
  1180. topology->ncombinedworkers = 0;
  1181. topology->nsched_ctxs = 0;
  1182. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1183. _starpu_opencl_init();
  1184. #endif
  1185. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1186. _starpu_init_cuda();
  1187. #endif
  1188. _starpu_init_topology(config);
  1189. _starpu_initialize_workers_bindid(config);
  1190. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1191. for (i = 0; i < (int) (sizeof(cuda_worker_set)/sizeof(cuda_worker_set[0])); i++)
  1192. cuda_worker_set[i].workers = NULL;
  1193. #endif
  1194. #ifdef STARPU_USE_MIC
  1195. for (i = 0; i < (int) (sizeof(mic_worker_set)/sizeof(mic_worker_set[0])); i++)
  1196. mic_worker_set[i].workers = NULL;
  1197. #endif
  1198. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1199. for (i = 0; i < (int) (sizeof(mpi_worker_set)/sizeof(mpi_worker_set[0])); i++)
  1200. mpi_worker_set[i].workers = NULL;
  1201. #endif
  1202. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1203. int ncuda = config->conf.ncuda;
  1204. int nworker_per_cuda = starpu_get_env_number_default("STARPU_NWORKER_PER_CUDA", 1);
  1205. STARPU_ASSERT_MSG(nworker_per_cuda > 0, "STARPU_NWORKER_PER_CUDA has to be > 0");
  1206. STARPU_ASSERT_MSG(nworker_per_cuda < STARPU_NMAXWORKERS, "STARPU_NWORKER_PER_CUDA (%d) cannot be higher than STARPU_NMAXWORKERS (%d)\n", nworker_per_cuda, STARPU_NMAXWORKERS);
  1207. #ifndef STARPU_NON_BLOCKING_DRIVERS
  1208. if (nworker_per_cuda > 1)
  1209. {
  1210. _STARPU_DISP("Warning: reducing STARPU_NWORKER_PER_CUDA to 1 because blocking drivers are enabled\n");
  1211. nworker_per_cuda = 1;
  1212. }
  1213. #endif
  1214. if (ncuda != 0)
  1215. {
  1216. /* The user did not disable CUDA. We need to initialize CUDA
  1217. * early to count the number of devices */
  1218. _starpu_init_cuda();
  1219. int nb_devices = _starpu_get_cuda_device_count();
  1220. if (ncuda == -1)
  1221. {
  1222. /* Nothing was specified, so let's choose ! */
  1223. ncuda = nb_devices;
  1224. }
  1225. else
  1226. {
  1227. if (ncuda > nb_devices)
  1228. {
  1229. /* The user requires more CUDA devices than
  1230. * there is available */
  1231. _STARPU_DISP("Warning: %d CUDA devices requested. Only %d available.\n", ncuda, nb_devices);
  1232. ncuda = nb_devices;
  1233. }
  1234. }
  1235. }
  1236. /* Now we know how many CUDA devices will be used */
  1237. topology->ncudagpus = ncuda;
  1238. topology->nworkerpercuda = nworker_per_cuda;
  1239. STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
  1240. _starpu_initialize_workers_cuda_gpuid(config);
  1241. /* allow having one worker per stream */
  1242. topology->cuda_th_per_stream = starpu_get_env_number_default("STARPU_CUDA_THREAD_PER_WORKER", -1);
  1243. topology->cuda_th_per_dev = starpu_get_env_number_default("STARPU_CUDA_THREAD_PER_DEV", -1);
  1244. /* per device by default */
  1245. if (topology->cuda_th_per_dev == -1)
  1246. {
  1247. if (topology->cuda_th_per_stream == 1)
  1248. topology->cuda_th_per_dev = 0;
  1249. else
  1250. topology->cuda_th_per_dev = 1;
  1251. }
  1252. /* Not per stream by default */
  1253. if (topology->cuda_th_per_stream == -1)
  1254. {
  1255. topology->cuda_th_per_stream = 0;
  1256. }
  1257. STARPU_ASSERT_MSG(topology->cuda_th_per_dev != 1 || topology->cuda_th_per_stream != 1, "It does not make sense to set both STARPU_CUDA_THREAD_PER_WORKER and STARPU_CUDA_THREAD_PER_DEV to 1, please choose either per worker or per device or none");
  1258. if (!topology->cuda_th_per_dev)
  1259. {
  1260. cuda_worker_set[0].workers = &config->workers[topology->nworkers];
  1261. cuda_worker_set[0].nworkers = topology->ncudagpus * nworker_per_cuda;
  1262. }
  1263. unsigned cudagpu;
  1264. for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
  1265. {
  1266. int devid = _starpu_get_next_cuda_gpuid(config);
  1267. int worker_idx0 = topology->nworkers + cudagpu * nworker_per_cuda;
  1268. struct _starpu_worker_set *worker_set;
  1269. if (topology->cuda_th_per_dev)
  1270. {
  1271. worker_set = &cuda_worker_set[devid];
  1272. worker_set->workers = &config->workers[worker_idx0];
  1273. worker_set->nworkers = nworker_per_cuda;
  1274. }
  1275. else
  1276. {
  1277. /* Same worker set for all devices */
  1278. worker_set = &cuda_worker_set[0];
  1279. }
  1280. for (i = 0; i < nworker_per_cuda; i++)
  1281. {
  1282. int worker_idx = worker_idx0 + i;
  1283. if(topology->cuda_th_per_stream)
  1284. {
  1285. /* Just one worker in the set */
  1286. _STARPU_CALLOC(config->workers[worker_idx].set, 1, sizeof(struct _starpu_worker_set));
  1287. config->workers[worker_idx].set->workers = &config->workers[worker_idx];
  1288. config->workers[worker_idx].set->nworkers = 1;
  1289. }
  1290. else
  1291. config->workers[worker_idx].set = worker_set;
  1292. config->workers[worker_idx].arch = STARPU_CUDA_WORKER;
  1293. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1294. config->workers[worker_idx].perf_arch.ndevices = 1;
  1295. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CUDA_WORKER;
  1296. config->workers[worker_idx].perf_arch.devices[0].devid = devid;
  1297. // TODO: fix perfmodels etc.
  1298. //config->workers[worker_idx].perf_arch.ncore = nworker_per_cuda - 1;
  1299. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1300. config->workers[worker_idx].devid = devid;
  1301. config->workers[worker_idx].subworkerid = i;
  1302. config->workers[worker_idx].worker_mask = STARPU_CUDA;
  1303. config->worker_mask |= STARPU_CUDA;
  1304. struct handle_entry *entry;
  1305. HASH_FIND_INT(devices_using_cuda, &devid, entry);
  1306. if (!entry)
  1307. {
  1308. _STARPU_MALLOC(entry, sizeof(*entry));
  1309. entry->gpuid = devid;
  1310. HASH_ADD_INT(devices_using_cuda, gpuid, entry);
  1311. }
  1312. }
  1313. #ifndef STARPU_SIMGRID
  1314. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  1315. {
  1316. hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(topology->hwtopology, devid);
  1317. if (obj)
  1318. {
  1319. struct _starpu_hwloc_userdata *data = obj->userdata;
  1320. data->ngpus++;
  1321. }
  1322. else
  1323. {
  1324. _STARPU_DISP("Warning: could not find location of CUDA%u, do you have the hwloc CUDA plugin installed?\n", devid);
  1325. }
  1326. }
  1327. #endif
  1328. #endif
  1329. }
  1330. topology->nworkers += topology->ncudagpus * nworker_per_cuda;
  1331. #endif
  1332. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1333. int nopencl = config->conf.nopencl;
  1334. if (nopencl != 0)
  1335. {
  1336. /* The user did not disable OPENCL. We need to initialize
  1337. * OpenCL early to count the number of devices */
  1338. _starpu_opencl_init();
  1339. int nb_devices;
  1340. nb_devices = _starpu_opencl_get_device_count();
  1341. if (nopencl == -1)
  1342. {
  1343. /* Nothing was specified, so let's choose ! */
  1344. nopencl = nb_devices;
  1345. if (nopencl > STARPU_MAXOPENCLDEVS)
  1346. {
  1347. _STARPU_DISP("Warning: %d OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldadev=xxx to update the maximum value of supported OpenCL devices.\n", nb_devices, STARPU_MAXOPENCLDEVS);
  1348. nopencl = STARPU_MAXOPENCLDEVS;
  1349. }
  1350. }
  1351. else
  1352. {
  1353. /* Let's make sure this value is OK. */
  1354. if (nopencl > nb_devices)
  1355. {
  1356. /* The user requires more OpenCL devices than
  1357. * there is available */
  1358. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d available.\n", nopencl, nb_devices);
  1359. nopencl = nb_devices;
  1360. }
  1361. /* Let's make sure this value is OK. */
  1362. if (nopencl > STARPU_MAXOPENCLDEVS)
  1363. {
  1364. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices.\n", nopencl, STARPU_MAXOPENCLDEVS);
  1365. nopencl = STARPU_MAXOPENCLDEVS;
  1366. }
  1367. }
  1368. }
  1369. topology->nopenclgpus = nopencl;
  1370. STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
  1371. _starpu_initialize_workers_opencl_gpuid(config);
  1372. unsigned openclgpu;
  1373. for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
  1374. {
  1375. int worker_idx = topology->nworkers + openclgpu;
  1376. int devid = _starpu_get_next_opencl_gpuid(config);
  1377. if (devid == -1)
  1378. {
  1379. // There is no more devices left
  1380. topology->nopenclgpus = openclgpu;
  1381. break;
  1382. }
  1383. config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
  1384. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1385. config->workers[worker_idx].perf_arch.ndevices = 1;
  1386. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_OPENCL_WORKER;
  1387. config->workers[worker_idx].perf_arch.devices[0].devid = devid;
  1388. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1389. config->workers[worker_idx].subworkerid = 0;
  1390. config->workers[worker_idx].devid = devid;
  1391. config->workers[worker_idx].worker_mask = STARPU_OPENCL;
  1392. config->worker_mask |= STARPU_OPENCL;
  1393. }
  1394. topology->nworkers += topology->nopenclgpus;
  1395. #endif
  1396. #ifdef STARPU_USE_SCC
  1397. int nscc = config->conf.nscc;
  1398. unsigned nb_scc_nodes = _starpu_scc_src_get_device_count();
  1399. if (nscc != 0)
  1400. {
  1401. /* The user did not disable SCC. We need to count
  1402. * the number of devices */
  1403. int nb_devices = nb_scc_nodes;
  1404. if (nscc == -1)
  1405. {
  1406. /* Nothing was specified, so let's choose ! */
  1407. nscc = nb_devices;
  1408. if (nscc > STARPU_MAXSCCDEVS)
  1409. {
  1410. _STARPU_DISP("Warning: %d SCC devices available. Only %d enabled. Use configuration option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nb_devices, STARPU_MAXSCCDEVS);
  1411. nscc = STARPU_MAXSCCDEVS;
  1412. }
  1413. }
  1414. else
  1415. {
  1416. /* Let's make sure this value is OK. */
  1417. if (nscc > nb_devices)
  1418. {
  1419. /* The user requires more SCC devices than there is available */
  1420. _STARPU_DISP("Warning: %d SCC devices requested. Only %d available.\n", nscc, nb_devices);
  1421. nscc = nb_devices;
  1422. }
  1423. /* Let's make sure this value is OK. */
  1424. if (nscc > STARPU_MAXSCCDEVS)
  1425. {
  1426. _STARPU_DISP("Warning: %d SCC devices requested. Only %d enabled. Use configure option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nscc, STARPU_MAXSCCDEVS);
  1427. nscc = STARPU_MAXSCCDEVS;
  1428. }
  1429. }
  1430. }
  1431. /* Now we know how many SCC devices will be used */
  1432. topology->nsccdevices = nscc;
  1433. STARPU_ASSERT(topology->nsccdevices + topology->nworkers <= STARPU_NMAXWORKERS);
  1434. _starpu_initialize_workers_scc_deviceid(config);
  1435. unsigned sccdev;
  1436. for (sccdev = 0; sccdev < topology->nsccdevices; sccdev++)
  1437. {
  1438. config->workers[topology->nworkers + sccdev].arch = STARPU_SCC_WORKER;
  1439. int devid = _starpu_get_next_scc_deviceid(config);
  1440. _STARPU_MALLOC(config->workers[topology->nworkers + sccdev].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1441. config->workers[topology->nworkers + sccdev].perf_arch.ndevices = 1;
  1442. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].type = STARPU_SCC_WORKER;
  1443. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].devid = sccdev;
  1444. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].ncores = 1;
  1445. config->workers[topology->nworkers + sccdev].subworkerid = 0;
  1446. config->workers[topology->nworkers + sccdev].devid = devid;
  1447. config->workers[topology->nworkers + sccdev].worker_mask = STARPU_SCC;
  1448. config->worker_mask |= STARPU_SCC;
  1449. }
  1450. for (; sccdev < nb_scc_nodes; ++sccdev)
  1451. _starpu_scc_exit_useless_node(sccdev);
  1452. topology->nworkers += topology->nsccdevices;
  1453. #endif /* STARPU_USE_SCC */
  1454. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  1455. _starpu_init_mp_config (config, &config->conf, no_mp_config);
  1456. #endif
  1457. /* we put the CPU section after the accelerator : in case there was an
  1458. * accelerator found, we devote one cpu */
  1459. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  1460. int ncpu = config->conf.ncpus;
  1461. if (ncpu != 0)
  1462. {
  1463. if (ncpu == -1)
  1464. {
  1465. unsigned mic_busy_cpus = 0;
  1466. int j = 0;
  1467. for (j = 0; j < STARPU_MAXMICDEVS; j++)
  1468. mic_busy_cpus += (topology->nmiccores[j] ? 1 : 0);
  1469. unsigned mpi_ms_busy_cpus = 0;
  1470. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1471. #ifdef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
  1472. for (j = 0; j < STARPU_MAXMPIDEVS; j++)
  1473. mpi_ms_busy_cpus += (topology->nmpicores[j] ? 1 : 0);
  1474. #else
  1475. mpi_ms_busy_cpus = 1; /* we launch one thread to control all slaves */
  1476. #endif
  1477. #endif /* STARPU_USE_MPI_MASTER_SLAVE */
  1478. unsigned cuda_busy_cpus = 0;
  1479. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1480. cuda_busy_cpus =
  1481. topology->cuda_th_per_dev == 0 && topology->cuda_th_per_stream == 0 ?
  1482. (topology->ncudagpus ? 1 : 0) :
  1483. topology->cuda_th_per_stream ?
  1484. (nworker_per_cuda * topology->ncudagpus) :
  1485. topology->ncudagpus;
  1486. #endif
  1487. unsigned already_busy_cpus = mpi_ms_busy_cpus + mic_busy_cpus
  1488. + cuda_busy_cpus
  1489. + topology->nopenclgpus + topology->nsccdevices;
  1490. long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
  1491. if (avail_cpus < 0)
  1492. avail_cpus = 0;
  1493. int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
  1494. avail_cpus *= nth_per_core;
  1495. ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
  1496. }
  1497. else
  1498. {
  1499. if (ncpu > STARPU_MAXCPUS)
  1500. {
  1501. _STARPU_DISP("Warning: %d CPU devices requested. Only %d enabled. Use configure option --enable-maxcpus=xxx to update the maximum value of supported CPU devices.\n", ncpu, STARPU_MAXCPUS);
  1502. ncpu = STARPU_MAXCPUS;
  1503. }
  1504. }
  1505. }
  1506. topology->ncpus = ncpu;
  1507. STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
  1508. unsigned cpu;
  1509. unsigned homogeneous = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
  1510. for (cpu = 0; cpu < topology->ncpus; cpu++)
  1511. {
  1512. int worker_idx = topology->nworkers + cpu;
  1513. config->workers[worker_idx].arch = STARPU_CPU_WORKER;
  1514. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1515. config->workers[worker_idx].perf_arch.ndevices = 1;
  1516. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CPU_WORKER;
  1517. config->workers[worker_idx].perf_arch.devices[0].devid = homogeneous ? 0 : cpu;
  1518. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1519. config->workers[worker_idx].subworkerid = 0;
  1520. config->workers[worker_idx].devid = cpu;
  1521. config->workers[worker_idx].worker_mask = STARPU_CPU;
  1522. config->worker_mask |= STARPU_CPU;
  1523. }
  1524. topology->nworkers += topology->ncpus;
  1525. #endif
  1526. if (topology->nworkers == 0)
  1527. {
  1528. _STARPU_DEBUG("No worker found, aborting ...\n");
  1529. return -ENODEV;
  1530. }
  1531. return 0;
  1532. }
  1533. void _starpu_destroy_machine_config(struct _starpu_machine_config *config)
  1534. {
  1535. _starpu_close_debug_logfile();
  1536. unsigned worker;
  1537. for (worker = 0; worker < config->topology.nworkers; worker++)
  1538. {
  1539. struct _starpu_worker *workerarg = &config->workers[worker];
  1540. int bindid = workerarg->bindid;
  1541. free(workerarg->perf_arch.devices);
  1542. #ifdef STARPU_HAVE_HWLOC
  1543. hwloc_bitmap_free(workerarg->hwloc_cpu_set);
  1544. if (bindid != -1)
  1545. {
  1546. hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
  1547. config->pu_depth,
  1548. bindid);
  1549. struct _starpu_hwloc_userdata *data = worker_obj->userdata;
  1550. if (data->worker_list)
  1551. {
  1552. _starpu_worker_list_delete(data->worker_list);
  1553. data->worker_list = NULL;
  1554. }
  1555. }
  1556. #endif
  1557. if (bindid != -1)
  1558. {
  1559. free(config->bindid_workers[bindid].workerids);
  1560. config->bindid_workers[bindid].workerids = NULL;
  1561. }
  1562. }
  1563. free(config->bindid_workers);
  1564. config->bindid_workers = NULL;
  1565. config->nbindid = 0;
  1566. unsigned combined_worker_id;
  1567. for(combined_worker_id=0 ; combined_worker_id < config->topology.ncombinedworkers ; combined_worker_id++)
  1568. {
  1569. struct _starpu_combined_worker *combined_worker = &config->combined_workers[combined_worker_id];
  1570. #ifdef STARPU_HAVE_HWLOC
  1571. hwloc_bitmap_free(combined_worker->hwloc_cpu_set);
  1572. #endif
  1573. free(combined_worker->perf_arch.devices);
  1574. }
  1575. #ifdef STARPU_HAVE_HWLOC
  1576. _starpu_deallocate_topology_userdata(hwloc_get_root_obj(config->topology.hwtopology));
  1577. hwloc_topology_destroy(config->topology.hwtopology);
  1578. #endif
  1579. topology_is_initialized = 0;
  1580. #ifdef STARPU_USE_CUDA
  1581. struct handle_entry *entry, *tmp;
  1582. HASH_ITER(hh, devices_using_cuda, entry, tmp)
  1583. {
  1584. HASH_DEL(devices_using_cuda, entry);
  1585. free(entry);
  1586. }
  1587. devices_using_cuda = NULL;
  1588. #endif
  1589. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  1590. int i;
  1591. for (i=0; i<STARPU_NARCH; i++)
  1592. may_bind_automatically[i] = 0;
  1593. #endif
  1594. }
  1595. void
  1596. _starpu_bind_thread_on_cpu (
  1597. int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED)
  1598. {
  1599. #ifdef STARPU_SIMGRID
  1600. return;
  1601. #else
  1602. if (nobind > 0)
  1603. return;
  1604. if (cpuid < 0)
  1605. return;
  1606. #ifdef STARPU_HAVE_HWLOC
  1607. const struct hwloc_topology_support *support;
  1608. #ifdef STARPU_USE_OPENCL
  1609. _starpu_opencl_init();
  1610. #endif
  1611. #ifdef STARPU_USE_CUDA
  1612. _starpu_init_cuda();
  1613. #endif
  1614. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1615. _starpu_init_topology(config);
  1616. if (workerid != STARPU_NOWORKERID && cpuid < STARPU_MAXCPUS)
  1617. {
  1618. int previous = cpu_worker[cpuid];
  1619. if (previous != STARPU_NOWORKERID && previous != workerid)
  1620. _STARPU_DISP("Warning: both workers %d and %d are bound to the same PU %d, this will strongly degrade performance. Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported %d cores and %d threads, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler?\n", previous, workerid, cpuid, config->topology.nhwcpus, config->topology.nhwpus);
  1621. else
  1622. cpu_worker[cpuid] = workerid;
  1623. }
  1624. support = hwloc_topology_get_support (config->topology.hwtopology);
  1625. if (support->cpubind->set_thisthread_cpubind)
  1626. {
  1627. hwloc_obj_t obj =
  1628. hwloc_get_obj_by_depth (config->topology.hwtopology,
  1629. config->pu_depth, cpuid);
  1630. hwloc_bitmap_t set = obj->cpuset;
  1631. int ret;
  1632. hwloc_bitmap_singlify(set);
  1633. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  1634. HWLOC_CPUBIND_THREAD);
  1635. if (ret)
  1636. {
  1637. perror("hwloc_set_cpubind");
  1638. STARPU_ABORT();
  1639. }
  1640. }
  1641. #elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__)
  1642. int ret;
  1643. /* fix the thread on the correct cpu */
  1644. cpu_set_t aff_mask;
  1645. CPU_ZERO(&aff_mask);
  1646. CPU_SET(cpuid, &aff_mask);
  1647. starpu_pthread_t self = starpu_pthread_self();
  1648. ret = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
  1649. if (ret)
  1650. {
  1651. const char *msg = strerror(ret);
  1652. _STARPU_MSG("pthread_setaffinity_np: %s\n", msg);
  1653. STARPU_ABORT();
  1654. }
  1655. #elif defined(_WIN32)
  1656. DWORD mask = 1 << cpuid;
  1657. if (!SetThreadAffinityMask(GetCurrentThread(), mask))
  1658. {
  1659. _STARPU_ERROR("SetThreadMaskAffinity(%lx) failed\n", mask);
  1660. }
  1661. #else
  1662. #warning no CPU binding support
  1663. #endif
  1664. #endif
  1665. }
  1666. void
  1667. _starpu_bind_thread_on_cpus (
  1668. struct _starpu_combined_worker *combined_worker STARPU_ATTRIBUTE_UNUSED)
  1669. {
  1670. #ifdef STARPU_SIMGRID
  1671. return;
  1672. #endif
  1673. #ifdef STARPU_HAVE_HWLOC
  1674. const struct hwloc_topology_support *support;
  1675. #ifdef STARPU_USE_OPENC
  1676. _starpu_opencl_init();
  1677. #endif
  1678. #ifdef STARPU_USE_CUDA
  1679. _starpu_init_cuda();
  1680. #endif
  1681. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1682. _starpu_init_topology(config);
  1683. support = hwloc_topology_get_support(config->topology.hwtopology);
  1684. if (support->cpubind->set_thisthread_cpubind)
  1685. {
  1686. hwloc_bitmap_t set = combined_worker->hwloc_cpu_set;
  1687. int ret;
  1688. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  1689. HWLOC_CPUBIND_THREAD);
  1690. if (ret)
  1691. {
  1692. perror("binding thread");
  1693. STARPU_ABORT();
  1694. }
  1695. }
  1696. #else
  1697. #ifdef __GLIBC__
  1698. sched_setaffinity(0,sizeof(combined_worker->cpu_set),&combined_worker->cpu_set);
  1699. #else
  1700. # warning no parallel worker CPU binding support
  1701. #endif
  1702. #endif
  1703. }
  1704. static void _starpu_init_binding_cpu(struct _starpu_machine_config *config)
  1705. {
  1706. unsigned worker;
  1707. for (worker = 0; worker < config->topology.nworkers; worker++)
  1708. {
  1709. struct _starpu_worker *workerarg = &config->workers[worker];
  1710. switch (workerarg->arch)
  1711. {
  1712. case STARPU_CPU_WORKER:
  1713. {
  1714. /* Dedicate a cpu core to that worker */
  1715. workerarg->bindid = _starpu_get_next_bindid(config, NULL, 0);
  1716. break;
  1717. }
  1718. default:
  1719. /* Do nothing */
  1720. break;
  1721. }
  1722. }
  1723. }
  1724. //TODO : Check SIMGRID
  1725. static void _starpu_init_numa_node(struct _starpu_machine_config *config)
  1726. {
  1727. nb_numa_nodes = 0;
  1728. unsigned i;
  1729. for (i = 0; i < STARPU_MAXNUMANODES; i++)
  1730. {
  1731. numa_memory_nodes_to_hwloclogid[i] = STARPU_NUMA_UNINITIALIZED;
  1732. numa_memory_nodes_to_physicalid[i] = STARPU_NUMA_UNINITIALIZED;
  1733. }
  1734. #ifdef STARPU_SIMGRID
  1735. char name[16];
  1736. msg_host_t host;
  1737. #endif
  1738. int numa_enabled = starpu_get_env_number_default("STARPU_USE_NUMA", 0);
  1739. /* NUMA mode activated */
  1740. if (numa_enabled)
  1741. {
  1742. /* Take all NUMA nodes used by CPU workers */
  1743. unsigned worker;
  1744. for (worker = 0; worker < config->topology.nworkers; worker++)
  1745. {
  1746. struct _starpu_worker *workerarg = &config->workers[worker];
  1747. if (workerarg->arch == STARPU_CPU_WORKER)
  1748. {
  1749. int numa_logical_id = _starpu_get_logical_numa_node_worker(worker);
  1750. /* Convert logical id to StarPU id to check if this NUMA node is already saved or not */
  1751. int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(numa_logical_id);
  1752. /* This shouldn't happen */
  1753. if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
  1754. {
  1755. _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
  1756. STARPU_ABORT();
  1757. }
  1758. if (numa_starpu_id == -1)
  1759. {
  1760. int devid = numa_logical_id == STARPU_NUMA_MAIN_RAM ? 0 : numa_logical_id;
  1761. int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, devid);
  1762. STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
  1763. numa_memory_nodes_to_hwloclogid[memnode] = numa_logical_id;
  1764. int numa_physical_id = _starpu_get_physical_numa_node_worker(worker);
  1765. numa_memory_nodes_to_physicalid[memnode] = numa_physical_id;
  1766. nb_numa_nodes++;
  1767. #ifdef STARPU_SIMGRID
  1768. snprintf(name, sizeof(name), "RAM%d", memnode);
  1769. host = _starpu_simgrid_get_host_by_name(name);
  1770. STARPU_ASSERT(host);
  1771. _starpu_simgrid_memory_node_set_host(memnode, host);
  1772. #endif
  1773. }
  1774. }
  1775. }
  1776. /* If we found NUMA nodes from CPU workers, it's good */
  1777. if (nb_numa_nodes != 0)
  1778. return;
  1779. _STARPU_DISP("No NUMA nodes found when checking CPU workers...\n");
  1780. #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && defined(STARPU_HAVE_HWLOC)
  1781. _STARPU_DISP("Take NUMA nodes attached to CUDA and OpenCL devices...\n");
  1782. #endif
  1783. #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
  1784. for (i = 0; i < config->topology.ncudagpus; i++)
  1785. {
  1786. hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
  1787. if (obj)
  1788. obj = numa_get_obj(obj);
  1789. /* Hwloc cannot recognize some devices */
  1790. if (!obj)
  1791. continue;
  1792. int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(obj->logical_index);
  1793. /* This shouldn't happen */
  1794. if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
  1795. {
  1796. _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
  1797. STARPU_ABORT();
  1798. }
  1799. if (numa_starpu_id == -1)
  1800. {
  1801. int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
  1802. STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
  1803. numa_memory_nodes_to_hwloclogid[memnode] = obj->logical_index;
  1804. numa_memory_nodes_to_physicalid[memnode] = obj->os_index;
  1805. nb_numa_nodes++;
  1806. #ifdef STARPU_SIMGRID
  1807. snprintf(name, sizeof(name), "RAM%d", memnode);
  1808. host = _starpu_simgrid_get_host_by_name(name);
  1809. STARPU_ASSERT(host);
  1810. _starpu_simgrid_memory_node_set_host(memnode, host);
  1811. #endif
  1812. }
  1813. }
  1814. #endif
  1815. #if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
  1816. if (config->topology.nopenclgpus > 0)
  1817. {
  1818. cl_int err;
  1819. cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
  1820. cl_uint nb_platforms;
  1821. unsigned platform;
  1822. unsigned nb_opencl_devices = 0, num = 0;
  1823. err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
  1824. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  1825. nb_platforms=0;
  1826. cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
  1827. if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
  1828. device_type |= CL_DEVICE_TYPE_CPU;
  1829. if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
  1830. device_type = CL_DEVICE_TYPE_CPU;
  1831. for (platform = 0; platform < nb_platforms ; platform++)
  1832. {
  1833. err = clGetDeviceIDs(platform_id[platform], device_type, 0, NULL, &num);
  1834. if (err != CL_SUCCESS)
  1835. num = 0;
  1836. nb_opencl_devices += num;
  1837. for (i = 0; i < num; i++)
  1838. {
  1839. hwloc_obj_t obj = hwloc_opencl_get_device_osdev_by_index(config->topology.hwtopology, platform, i);
  1840. if (obj)
  1841. obj = numa_get_obj(obj);
  1842. /* Hwloc cannot recognize some devices */
  1843. if (!obj)
  1844. continue;
  1845. int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(obj->logical_index);
  1846. /* This shouldn't happen */
  1847. if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
  1848. {
  1849. _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
  1850. STARPU_ABORT();
  1851. }
  1852. if (numa_starpu_id == -1)
  1853. {
  1854. int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
  1855. STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
  1856. numa_memory_nodes_to_hwloclogid[memnode] = obj->logical_index;
  1857. numa_memory_nodes_to_physicalid[memnode] = obj->os_index;
  1858. nb_numa_nodes++;
  1859. #ifdef STARPU_SIMGRID
  1860. snprintf(name, sizeof(name), "RAM%d", memnode);
  1861. host = _starpu_simgrid_get_host_by_name(name);
  1862. STARPU_ASSERT(host);
  1863. _starpu_simgrid_memory_node_set_host(memnode, host);
  1864. #endif
  1865. }
  1866. }
  1867. }
  1868. }
  1869. #endif
  1870. }
  1871. #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && defined(STARPU_HAVE_HWLOC)
  1872. //Found NUMA nodes from CUDA nodes
  1873. if (nb_numa_nodes != 0)
  1874. return;
  1875. /* In case, we do not find any NUMA nodes when checking NUMA nodes attached to GPUs, we take all of them */
  1876. if (numa_enabled)
  1877. _STARPU_DISP("No NUMA nodes found when checking GPUs devices...\n");
  1878. #endif
  1879. if (numa_enabled)
  1880. _STARPU_DISP("Finally, take all NUMA nodes available... \n");
  1881. unsigned nnuma = _starpu_topology_get_nnumanodes(config);
  1882. if (nnuma > STARPU_MAXNUMANODES)
  1883. {
  1884. _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
  1885. nnuma = STARPU_MAXNUMANODES;
  1886. }
  1887. unsigned numa;
  1888. for (numa = 0; numa < nnuma; numa++)
  1889. {
  1890. #if defined(STARPU_HAVE_HWLOC)
  1891. if (nnuma > 1)
  1892. {
  1893. hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa);
  1894. unsigned numa_logical_id = obj->logical_index;
  1895. unsigned numa_physical_id = obj->os_index;
  1896. int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
  1897. STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available) \n", memnode, STARPU_MAXNUMANODES);
  1898. numa_memory_nodes_to_hwloclogid[memnode] = numa_logical_id;
  1899. numa_memory_nodes_to_physicalid[memnode] = numa_physical_id;
  1900. nb_numa_nodes++;
  1901. #ifdef STARPU_SIMGRID
  1902. snprintf(name, sizeof(name), "RAM%d", memnode);
  1903. host = _starpu_simgrid_get_host_by_name(name);
  1904. STARPU_ASSERT(host);
  1905. _starpu_simgrid_memory_node_set_host(memnode, host);
  1906. #endif
  1907. }
  1908. else
  1909. #endif /* defined(STARPU_HAVE_HWLOC) */
  1910. {
  1911. /* In this case, nnuma has only one node */
  1912. int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
  1913. STARPU_ASSERT_MSG(memnode == STARPU_MAIN_RAM, "Wrong Memory Node : %d (expected %d) \n", memnode, STARPU_MAIN_RAM);
  1914. numa_memory_nodes_to_hwloclogid[memnode] = STARPU_NUMA_MAIN_RAM;
  1915. numa_memory_nodes_to_physicalid[memnode] = STARPU_NUMA_MAIN_RAM;
  1916. nb_numa_nodes++;
  1917. #ifdef STARPU_SIMGRID
  1918. host = _starpu_simgrid_get_host_by_name("RAM");
  1919. STARPU_ASSERT(host);
  1920. _starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
  1921. #endif
  1922. }
  1923. }
  1924. STARPU_ASSERT_MSG(nb_numa_nodes > 0, "No NUMA node found... We need at least one memory node !\n");
  1925. }
  1926. static void _starpu_init_numa_bus()
  1927. {
  1928. unsigned i, j;
  1929. for (i = 0; i < nb_numa_nodes; i++)
  1930. for (j = 0; j < nb_numa_nodes; j++)
  1931. if (i != j)
  1932. numa_bus_id[i*nb_numa_nodes+j] = _starpu_register_bus(i, j);
  1933. }
  1934. static void
  1935. _starpu_init_workers_binding_and_memory (struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
  1936. {
  1937. /* We will store all the busid of the different (src, dst)
  1938. * combinations in a matrix which we initialize here. */
  1939. _starpu_initialize_busid_matrix();
  1940. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1941. unsigned cuda_init[STARPU_MAXCUDADEVS] = { };
  1942. unsigned cuda_memory_nodes[STARPU_MAXCUDADEVS];
  1943. unsigned cuda_bindid[STARPU_MAXCUDADEVS];
  1944. int cuda_globalbindid = -1;
  1945. #endif
  1946. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1947. unsigned opencl_init[STARPU_MAXOPENCLDEVS] = { };
  1948. unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS];
  1949. unsigned opencl_bindid[STARPU_MAXOPENCLDEVS];
  1950. #endif
  1951. #ifdef STARPU_USE_MIC
  1952. unsigned mic_init[STARPU_MAXMICDEVS] = { };
  1953. unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
  1954. unsigned mic_bindid[STARPU_MAXMICDEVS];
  1955. #endif
  1956. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1957. unsigned mpi_init[STARPU_MAXMPIDEVS] = { };
  1958. unsigned mpi_memory_nodes[STARPU_MAXMPIDEVS];
  1959. unsigned mpi_bindid[STARPU_MAXMPIDEVS];
  1960. #endif
  1961. unsigned bindid;
  1962. for (bindid = 0; bindid < config->nbindid; bindid++)
  1963. {
  1964. free(config->bindid_workers[bindid].workerids);
  1965. config->bindid_workers[bindid].workerids = NULL;
  1966. config->bindid_workers[bindid].nworkers = 0;
  1967. }
  1968. /* Init CPU binding before NUMA nodes, because we use it to discover NUMA nodes */
  1969. _starpu_init_binding_cpu(config);
  1970. /* Initialize NUMA nodes */
  1971. _starpu_init_numa_node(config);
  1972. _starpu_init_numa_bus();
  1973. unsigned worker;
  1974. for (worker = 0; worker < config->topology.nworkers; worker++)
  1975. {
  1976. unsigned memory_node = -1;
  1977. struct _starpu_worker *workerarg = &config->workers[worker];
  1978. unsigned devid STARPU_ATTRIBUTE_UNUSED = workerarg->devid;
  1979. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  1980. /* Perhaps the worker has some "favourite" bindings */
  1981. int *preferred_binding = NULL;
  1982. int npreferred = 0;
  1983. #endif
  1984. /* select the memory node that contains worker's memory */
  1985. switch (workerarg->arch)
  1986. {
  1987. case STARPU_CPU_WORKER:
  1988. {
  1989. int numa_logical_id = _starpu_get_logical_numa_node_worker(worker);
  1990. int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(numa_logical_id);
  1991. if (numa_starpu_id < 0 || numa_starpu_id >= STARPU_MAXNUMANODES)
  1992. numa_starpu_id = STARPU_MAIN_RAM;
  1993. workerarg->numa_memory_node = memory_node = numa_starpu_id;
  1994. _starpu_memory_node_add_nworkers(memory_node);
  1995. _starpu_worker_drives_memory_node(workerarg, numa_starpu_id);
  1996. break;
  1997. }
  1998. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1999. case STARPU_CUDA_WORKER:
  2000. {
  2001. unsigned numa;
  2002. #ifndef STARPU_SIMGRID
  2003. if (may_bind_automatically[STARPU_CUDA_WORKER])
  2004. {
  2005. /* StarPU is allowed to bind threads automatically */
  2006. preferred_binding = _starpu_get_cuda_affinity_vector(devid);
  2007. npreferred = config->topology.nhwpus;
  2008. }
  2009. #endif /* SIMGRID */
  2010. if (cuda_init[devid])
  2011. {
  2012. memory_node = cuda_memory_nodes[devid];
  2013. if (config->topology.cuda_th_per_stream == 0)
  2014. workerarg->bindid = cuda_bindid[devid];
  2015. else
  2016. workerarg->bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  2017. }
  2018. else
  2019. {
  2020. cuda_init[devid] = 1;
  2021. if (config->topology.cuda_th_per_dev == 0 && config->topology.cuda_th_per_stream == 0)
  2022. {
  2023. if (cuda_globalbindid == -1)
  2024. cuda_globalbindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  2025. workerarg->bindid = cuda_bindid[devid] = cuda_globalbindid;
  2026. }
  2027. else
  2028. workerarg->bindid = cuda_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  2029. memory_node = cuda_memory_nodes[devid] = _starpu_memory_node_register(STARPU_CUDA_RAM, devid);
  2030. for (numa = 0; numa < nb_numa_nodes; numa++)
  2031. {
  2032. _starpu_cuda_bus_ids[numa][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(numa, memory_node);
  2033. _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][numa] = _starpu_register_bus(memory_node, numa);
  2034. }
  2035. #ifdef STARPU_SIMGRID
  2036. const char* cuda_memcpy_peer;
  2037. char name[16];
  2038. snprintf(name, sizeof(name), "CUDA%u", devid);
  2039. msg_host_t host = _starpu_simgrid_get_host_by_name(name);
  2040. STARPU_ASSERT(host);
  2041. _starpu_simgrid_memory_node_set_host(memory_node, host);
  2042. cuda_memcpy_peer = MSG_host_get_property_value(host, "memcpy_peer");
  2043. #endif /* SIMGRID */
  2044. if (
  2045. #ifdef STARPU_SIMGRID
  2046. cuda_memcpy_peer && atoll(cuda_memcpy_peer)
  2047. #elif defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
  2048. 1
  2049. #else /* MEMCPY_PEER */
  2050. 0
  2051. #endif /* MEMCPY_PEER */
  2052. )
  2053. {
  2054. unsigned worker2;
  2055. for (worker2 = 0; worker2 < worker; worker2++)
  2056. {
  2057. struct _starpu_worker *workerarg2 = &config->workers[worker2];
  2058. int devid2 = workerarg2->devid;
  2059. if (workerarg2->arch == STARPU_CUDA_WORKER)
  2060. {
  2061. unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
  2062. _starpu_cuda_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(memory_node2, memory_node);
  2063. _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES] = _starpu_register_bus(memory_node, memory_node2);
  2064. #ifndef STARPU_SIMGRID
  2065. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  2066. {
  2067. hwloc_obj_t obj, obj2, ancestor;
  2068. obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid);
  2069. obj2 = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid2);
  2070. ancestor = hwloc_get_common_ancestor_obj(config->topology.hwtopology, obj, obj2);
  2071. if (ancestor)
  2072. {
  2073. struct _starpu_hwloc_userdata *data = ancestor->userdata;
  2074. #ifdef STARPU_VERBOSE
  2075. {
  2076. char name[64];
  2077. hwloc_obj_type_snprintf(name, sizeof(name), ancestor, 0);
  2078. _STARPU_DEBUG("CUDA%u and CUDA%u are linked through %s, along %u GPUs\n", devid, devid2, name, data->ngpus);
  2079. }
  2080. #endif
  2081. starpu_bus_set_ngpus(_starpu_cuda_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES], data->ngpus);
  2082. starpu_bus_set_ngpus(_starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES], data->ngpus);
  2083. }
  2084. }
  2085. #endif
  2086. #endif
  2087. }
  2088. }
  2089. }
  2090. }
  2091. _starpu_memory_node_add_nworkers(memory_node);
  2092. //This worker can manage transfers on NUMA nodes
  2093. for (numa = 0; numa < nb_numa_nodes; numa++)
  2094. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa);
  2095. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
  2096. break;
  2097. }
  2098. #endif
  2099. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  2100. case STARPU_OPENCL_WORKER:
  2101. {
  2102. unsigned numa;
  2103. #ifndef STARPU_SIMGRID
  2104. if (may_bind_automatically[STARPU_OPENCL_WORKER])
  2105. {
  2106. /* StarPU is allowed to bind threads automatically */
  2107. preferred_binding = _starpu_get_opencl_affinity_vector(devid);
  2108. npreferred = config->topology.nhwpus;
  2109. }
  2110. #endif /* SIMGRID */
  2111. if (opencl_init[devid])
  2112. {
  2113. memory_node = opencl_memory_nodes[devid];
  2114. #ifndef STARPU_SIMGRID
  2115. workerarg->bindid = opencl_bindid[devid];
  2116. #endif /* SIMGRID */
  2117. }
  2118. else
  2119. {
  2120. opencl_init[devid] = 1;
  2121. workerarg->bindid = opencl_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  2122. memory_node = opencl_memory_nodes[devid] = _starpu_memory_node_register(STARPU_OPENCL_RAM, devid);
  2123. for (numa = 0; numa < nb_numa_nodes; numa++)
  2124. {
  2125. _starpu_register_bus(numa, memory_node);
  2126. _starpu_register_bus(memory_node, numa);
  2127. }
  2128. #ifdef STARPU_SIMGRID
  2129. char name[16];
  2130. snprintf(name, sizeof(name), "OpenCL%u", devid);
  2131. msg_host_t host = _starpu_simgrid_get_host_by_name(name);
  2132. STARPU_ASSERT(host);
  2133. _starpu_simgrid_memory_node_set_host(memory_node, host);
  2134. #endif /* SIMGRID */
  2135. }
  2136. _starpu_memory_node_add_nworkers(memory_node);
  2137. //This worker can manage transfers on NUMA nodes
  2138. for (numa = 0; numa < nb_numa_nodes; numa++)
  2139. _starpu_worker_drives_memory_node(workerarg, numa);
  2140. _starpu_worker_drives_memory_node(workerarg, memory_node);
  2141. break;
  2142. }
  2143. #endif
  2144. #ifdef STARPU_USE_MIC
  2145. case STARPU_MIC_WORKER:
  2146. {
  2147. unsigned numa;
  2148. if (mic_init[devid])
  2149. {
  2150. memory_node = mic_memory_nodes[devid];
  2151. }
  2152. else
  2153. {
  2154. mic_init[devid] = 1;
  2155. /* TODO */
  2156. //if (may_bind_automatically)
  2157. //{
  2158. // /* StarPU is allowed to bind threads automatically */
  2159. // preferred_binding = _starpu_get_mic_affinity_vector(devid);
  2160. // npreferred = config->topology.nhwpus;
  2161. //}
  2162. mic_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  2163. memory_node = mic_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MIC_RAM, devid);
  2164. for (numa = 0; numa < nb_numa_nodes; numa++)
  2165. {
  2166. _starpu_register_bus(numa, memory_node);
  2167. _starpu_register_bus(memory_node, numa);
  2168. }
  2169. }
  2170. workerarg->bindid = mic_bindid[devid];
  2171. _starpu_memory_node_add_nworkers(memory_node);
  2172. //This worker can manage transfers on NUMA nodes
  2173. for (numa = 0; numa < nb_numa_nodes; numa++)
  2174. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa);
  2175. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
  2176. break;
  2177. }
  2178. #endif /* STARPU_USE_MIC */
  2179. #ifdef STARPU_USE_SCC
  2180. case STARPU_SCC_WORKER:
  2181. {
  2182. unsigned numa;
  2183. /* Node 0 represents the SCC shared memory when we're on SCC. */
  2184. struct _starpu_memory_node_descr *descr = _starpu_memory_node_get_description();
  2185. descr->nodes[ram_memory_node] = STARPU_SCC_SHM;
  2186. memory_node = ram_memory_node;
  2187. _starpu_memory_node_add_nworkers(memory_node);
  2188. //This worker can manage transfers on NUMA nodes
  2189. for (numa = 0; numa < nb_numa_nodes; numa++)
  2190. _starpu_worker_drives_memory_node(workerarg, numa);
  2191. _starpu_worker_drives_memory_node(workerarg, memory_node);
  2192. }
  2193. break;
  2194. #endif /* STARPU_USE_SCC */
  2195. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  2196. case STARPU_MPI_MS_WORKER:
  2197. {
  2198. unsigned numa;
  2199. if (mpi_init[devid])
  2200. {
  2201. memory_node = mpi_memory_nodes[devid];
  2202. }
  2203. else
  2204. {
  2205. mpi_init[devid] = 1;
  2206. mpi_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  2207. memory_node = mpi_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MPI_MS_RAM, devid);
  2208. for (numa = 0; numa < nb_numa_nodes; numa++)
  2209. {
  2210. _starpu_register_bus(numa, memory_node);
  2211. _starpu_register_bus(memory_node, numa);
  2212. }
  2213. }
  2214. //This worker can manage transfers on NUMA nodes
  2215. for (numa = 0; numa < nb_numa_nodes; numa++)
  2216. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa);
  2217. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
  2218. #ifndef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
  2219. /* MPI driver thread can manage all slave memories if we disable the MPI multiple thread */
  2220. unsigned findworker;
  2221. for (findworker = 0; findworker < worker; findworker++)
  2222. {
  2223. struct _starpu_worker *findworkerarg = &config->workers[findworker];
  2224. if (findworkerarg->arch == STARPU_MPI_MS_WORKER)
  2225. {
  2226. _starpu_worker_drives_memory_node(workerarg, findworkerarg->memory_node);
  2227. _starpu_worker_drives_memory_node(findworkerarg, memory_node);
  2228. }
  2229. }
  2230. #endif
  2231. workerarg->bindid = mpi_bindid[devid];
  2232. _starpu_memory_node_add_nworkers(memory_node);
  2233. break;
  2234. }
  2235. #endif /* STARPU_USE_MPI_MASTER_SLAVE */
  2236. default:
  2237. STARPU_ABORT();
  2238. }
  2239. workerarg->memory_node = memory_node;
  2240. _STARPU_DEBUG("worker %u type %d devid %u bound to cpu %d, STARPU memory node %u\n", worker, workerarg->arch, devid, workerarg->bindid, memory_node);
  2241. #ifdef __GLIBC__
  2242. if (workerarg->bindid != -1)
  2243. {
  2244. /* Save the initial cpuset */
  2245. CPU_ZERO(&workerarg->cpu_set);
  2246. CPU_SET(workerarg->bindid, &workerarg->cpu_set);
  2247. }
  2248. #endif /* __GLIBC__ */
  2249. #ifdef STARPU_HAVE_HWLOC
  2250. if (workerarg->bindid == -1)
  2251. {
  2252. workerarg->hwloc_cpu_set = hwloc_bitmap_alloc();
  2253. }
  2254. else
  2255. {
  2256. /* Put the worker descriptor in the userdata field of the
  2257. * hwloc object describing the CPU */
  2258. hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
  2259. config->pu_depth,
  2260. workerarg->bindid);
  2261. struct _starpu_hwloc_userdata *data = worker_obj->userdata;
  2262. if (data->worker_list == NULL)
  2263. data->worker_list = _starpu_worker_list_new();
  2264. _starpu_worker_list_push_front(data->worker_list, workerarg);
  2265. /* Clear the cpu set and set the cpu */
  2266. workerarg->hwloc_cpu_set = hwloc_bitmap_dup (worker_obj->cpuset);
  2267. }
  2268. #endif
  2269. if (workerarg->bindid != -1)
  2270. {
  2271. bindid = workerarg->bindid;
  2272. unsigned old_nbindid = config->nbindid;
  2273. if (bindid >= old_nbindid)
  2274. {
  2275. /* More room needed */
  2276. if (!old_nbindid)
  2277. config->nbindid = STARPU_NMAXWORKERS;
  2278. else
  2279. config->nbindid = 2 * old_nbindid;
  2280. if (bindid > config->nbindid)
  2281. {
  2282. config->nbindid = bindid+1;
  2283. }
  2284. _STARPU_REALLOC(config->bindid_workers, config->nbindid * sizeof(config->bindid_workers[0]));
  2285. memset(&config->bindid_workers[old_nbindid], 0, (config->nbindid - old_nbindid) * sizeof(config->bindid_workers[0]));
  2286. }
  2287. /* Add slot for this worker */
  2288. /* Don't care about amortizing the cost, there are usually very few workers sharing the same bindid */
  2289. config->bindid_workers[bindid].nworkers++;
  2290. _STARPU_REALLOC(config->bindid_workers[bindid].workerids, config->bindid_workers[bindid].nworkers * sizeof(config->bindid_workers[bindid].workerids[0]));
  2291. config->bindid_workers[bindid].workerids[config->bindid_workers[bindid].nworkers-1] = worker;
  2292. }
  2293. }
  2294. #ifdef STARPU_SIMGRID
  2295. _starpu_simgrid_count_ngpus();
  2296. #else
  2297. #ifdef STARPU_HAVE_HWLOC
  2298. _starpu_topology_count_ngpus(hwloc_get_root_obj(config->topology.hwtopology));
  2299. #endif
  2300. #endif
  2301. }
  2302. int
  2303. _starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
  2304. {
  2305. int ret;
  2306. unsigned i;
  2307. ret = _starpu_init_machine_config(config, no_mp_config);
  2308. if (ret)
  2309. return ret;
  2310. /* for the data management library */
  2311. _starpu_memory_nodes_init();
  2312. _starpu_datastats_init();
  2313. _starpu_init_workers_binding_and_memory(config, no_mp_config);
  2314. config->cpus_nodeid = -1;
  2315. config->cuda_nodeid = -1;
  2316. config->opencl_nodeid = -1;
  2317. config->mic_nodeid = -1;
  2318. config->scc_nodeid = -1;
  2319. config->mpi_nodeid = -1;
  2320. for (i = 0; i < starpu_worker_get_count(); i++)
  2321. {
  2322. switch (starpu_worker_get_type(i))
  2323. {
  2324. case STARPU_CPU_WORKER:
  2325. if (config->cpus_nodeid == -1)
  2326. config->cpus_nodeid = starpu_worker_get_memory_node(i);
  2327. else if (config->cpus_nodeid != (int) starpu_worker_get_memory_node(i))
  2328. config->cpus_nodeid = -2;
  2329. break;
  2330. case STARPU_CUDA_WORKER:
  2331. if (config->cuda_nodeid == -1)
  2332. config->cuda_nodeid = starpu_worker_get_memory_node(i);
  2333. else if (config->cuda_nodeid != (int) starpu_worker_get_memory_node(i))
  2334. config->cuda_nodeid = -2;
  2335. break;
  2336. case STARPU_OPENCL_WORKER:
  2337. if (config->opencl_nodeid == -1)
  2338. config->opencl_nodeid = starpu_worker_get_memory_node(i);
  2339. else if (config->opencl_nodeid != (int) starpu_worker_get_memory_node(i))
  2340. config->opencl_nodeid = -2;
  2341. break;
  2342. case STARPU_MIC_WORKER:
  2343. if (config->mic_nodeid == -1)
  2344. config->mic_nodeid = starpu_worker_get_memory_node(i);
  2345. else if (config->mic_nodeid != (int) starpu_worker_get_memory_node(i))
  2346. config->mic_nodeid = -2;
  2347. break;
  2348. case STARPU_SCC_WORKER:
  2349. if (config->scc_nodeid == -1)
  2350. config->scc_nodeid = starpu_worker_get_memory_node(i);
  2351. else if (config->scc_nodeid != (int) starpu_worker_get_memory_node(i))
  2352. config->scc_nodeid = -2;
  2353. break;
  2354. case STARPU_MPI_MS_WORKER:
  2355. if (config->mpi_nodeid == -1)
  2356. config->mpi_nodeid = starpu_worker_get_memory_node(i);
  2357. else if (config->mpi_nodeid != (int) starpu_worker_get_memory_node(i))
  2358. config->mpi_nodeid = -2;
  2359. break;
  2360. case STARPU_ANY_WORKER:
  2361. STARPU_ASSERT(0);
  2362. }
  2363. }
  2364. return 0;
  2365. }
  2366. void _starpu_destroy_topology(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED)
  2367. {
  2368. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  2369. _starpu_deinit_mp_config(config);
  2370. #endif
  2371. /* cleanup StarPU internal data structures */
  2372. _starpu_memory_nodes_deinit();
  2373. _starpu_destroy_machine_config(config);
  2374. }
  2375. void
  2376. starpu_topology_print (FILE *output)
  2377. {
  2378. struct _starpu_machine_config *config = _starpu_get_machine_config();
  2379. struct _starpu_machine_topology *topology = &config->topology;
  2380. unsigned pu;
  2381. unsigned worker;
  2382. unsigned nworkers = starpu_worker_get_count();
  2383. unsigned ncombinedworkers = topology->ncombinedworkers;
  2384. unsigned nthreads_per_core = topology->nhwpus / topology->nhwcpus;
  2385. #ifdef STARPU_HAVE_HWLOC
  2386. hwloc_topology_t topo = topology->hwtopology;
  2387. hwloc_obj_t pu_obj;
  2388. hwloc_obj_t last_numa_obj = NULL, numa_obj;
  2389. hwloc_obj_t last_package_obj = NULL, package_obj;
  2390. #endif
  2391. for (pu = 0; pu < topology->nhwpus; pu++)
  2392. {
  2393. #ifdef STARPU_HAVE_HWLOC
  2394. pu_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, pu);
  2395. numa_obj = numa_get_obj(pu_obj);
  2396. if (numa_obj != last_numa_obj)
  2397. {
  2398. fprintf(output, "numa %u", numa_obj->logical_index);
  2399. last_numa_obj = numa_obj;
  2400. }
  2401. fprintf(output, "\t");
  2402. package_obj = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_SOCKET, pu_obj);
  2403. if (package_obj != last_package_obj)
  2404. {
  2405. fprintf(output, "pack %u", package_obj->logical_index);
  2406. last_package_obj = package_obj;
  2407. }
  2408. fprintf(output, "\t");
  2409. #endif
  2410. if ((pu % nthreads_per_core) == 0)
  2411. fprintf(output, "core %u", pu / nthreads_per_core);
  2412. fprintf(output, "\tPU %u\t", pu);
  2413. for (worker = 0;
  2414. worker < nworkers + ncombinedworkers;
  2415. worker++)
  2416. {
  2417. if (worker < nworkers)
  2418. {
  2419. struct _starpu_worker *workerarg = &config->workers[worker];
  2420. if (workerarg->bindid == (int) pu)
  2421. {
  2422. char name[256];
  2423. starpu_worker_get_name (worker, name,
  2424. sizeof(name));
  2425. fprintf(output, "%s\t", name);
  2426. }
  2427. }
  2428. else
  2429. {
  2430. int worker_size, i;
  2431. int *combined_workerid;
  2432. starpu_combined_worker_get_description(worker, &worker_size, &combined_workerid);
  2433. for (i = 0; i < worker_size; i++)
  2434. {
  2435. if (topology->workers_bindid[combined_workerid[i]] == pu)
  2436. fprintf(output, "comb %u\t", worker-nworkers);
  2437. }
  2438. }
  2439. }
  2440. fprintf(output, "\n");
  2441. }
  2442. }