topology.c 99 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. * Copyright (C) 2013 Thibaut Lambert
  5. * Copyright (C) 2016 Uppsala University
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <stdlib.h>
  19. #include <stdio.h>
  20. #include <common/config.h>
  21. #ifdef HAVE_UNISTD_H
  22. #include <unistd.h>
  23. #endif
  24. #include <core/workers.h>
  25. #include <core/debug.h>
  26. #include <core/topology.h>
  27. #include <drivers/cuda/driver_cuda.h>
  28. #include <drivers/cpu/driver_cpu.h>
  29. #include <drivers/mic/driver_mic_source.h>
  30. #include <drivers/mpi/driver_mpi_source.h>
  31. #include <drivers/mpi/driver_mpi_common.h>
  32. #include <drivers/mp_common/source_common.h>
  33. #include <drivers/opencl/driver_opencl.h>
  34. #include <drivers/opencl/driver_opencl_utils.h>
  35. #include <profiling/profiling.h>
  36. #include <datawizard/datastats.h>
  37. #include <datawizard/memory_nodes.h>
  38. #include <datawizard/memory_manager.h>
  39. #include <common/uthash.h>
  40. #ifdef STARPU_HAVE_HWLOC
  41. #include <hwloc.h>
  42. #ifndef HWLOC_API_VERSION
  43. #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
  44. #endif
  45. #if HWLOC_API_VERSION < 0x00010b00
  46. #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
  47. #endif
  48. #endif
  49. #ifdef STARPU_HAVE_WINDOWS
  50. #include <windows.h>
  51. #endif
  52. #ifdef STARPU_SIMGRID
  53. #include <core/simgrid.h>
  54. #endif
  55. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  56. #include <hwloc/cuda.h>
  57. #endif
  58. #if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_OPENCL)
  59. #include <hwloc/opencl.h>
  60. #endif
  61. static unsigned topology_is_initialized = 0;
  62. static int nobind;
  63. static int numa_enabled = -1;
  64. /* For checking whether two workers share the same PU, indexed by PU number */
  65. static int cpu_worker[STARPU_MAXCPUS];
  66. static char * cpu_name[STARPU_MAXCPUS];
  67. static unsigned nb_numa_nodes = 0;
  68. static int numa_memory_nodes_to_hwloclogid[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in hwloc logid */
  69. static int numa_memory_nodes_to_physicalid[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in physical id */
  70. static unsigned numa_bus_id[STARPU_MAXNUMANODES*STARPU_MAXNUMANODES];
  71. static int _starpu_get_logical_numa_node_worker(unsigned workerid);
  72. #define STARPU_NUMA_UNINITIALIZED (-2)
  73. #define STARPU_NUMA_MAIN_RAM (-1)
  74. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  75. struct handle_entry
  76. {
  77. UT_hash_handle hh;
  78. unsigned gpuid;
  79. };
  80. # if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  81. /* Entry in the `devices_using_cuda' hash table. */
  82. static struct handle_entry *devices_using_cuda;
  83. # endif
  84. static unsigned may_bind_automatically[STARPU_NARCH] = { 0 };
  85. #endif // defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  86. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  87. static struct _starpu_worker_set cuda_worker_set[STARPU_MAXCUDADEVS];
  88. #endif
  89. #ifdef STARPU_USE_MIC
  90. static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
  91. #endif
  92. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  93. struct _starpu_worker_set mpi_worker_set[STARPU_MAXMPIDEVS];
  94. #endif
  95. int starpu_memory_nodes_get_numa_count(void)
  96. {
  97. return nb_numa_nodes;
  98. }
  99. #if defined(STARPU_HAVE_HWLOC)
  100. static hwloc_obj_t numa_get_obj(hwloc_obj_t obj)
  101. {
  102. #if HWLOC_API_VERSION >= 0x00020000
  103. while (obj->memory_first_child == NULL)
  104. {
  105. obj = obj->parent;
  106. if (!obj)
  107. return NULL;
  108. }
  109. return obj->memory_first_child;
  110. #else
  111. while (obj->type != HWLOC_OBJ_NUMANODE)
  112. {
  113. obj = obj->parent;
  114. /* If we don't find a "node" obj before the root, this means
  115. * hwloc does not know whether there are numa nodes or not, so
  116. * we should not use a per-node sampling in that case. */
  117. if (!obj)
  118. return NULL;
  119. }
  120. return obj;
  121. #endif
  122. }
  123. static int numa_get_logical_id(hwloc_obj_t obj)
  124. {
  125. STARPU_ASSERT(obj);
  126. obj = numa_get_obj(obj);
  127. if (!obj)
  128. return 0;
  129. return obj->logical_index;
  130. }
  131. static int numa_get_physical_id(hwloc_obj_t obj)
  132. {
  133. STARPU_ASSERT(obj);
  134. obj = numa_get_obj(obj);
  135. if (!obj)
  136. return 0;
  137. return obj->os_index;
  138. }
  139. #endif
  140. /* This returns the exact NUMA node next to a worker */
  141. static int _starpu_get_logical_numa_node_worker(unsigned workerid)
  142. {
  143. #if defined(STARPU_HAVE_HWLOC)
  144. STARPU_ASSERT(numa_enabled != -1);
  145. if (numa_enabled)
  146. {
  147. struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
  148. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
  149. struct _starpu_machine_topology *topology = &config->topology ;
  150. hwloc_obj_t obj;
  151. switch(worker->arch)
  152. {
  153. case STARPU_CPU_WORKER:
  154. obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
  155. break;
  156. default:
  157. STARPU_ABORT();
  158. }
  159. return numa_get_logical_id(obj);
  160. }
  161. else
  162. #endif
  163. {
  164. (void) workerid; /* unused */
  165. return STARPU_NUMA_MAIN_RAM;
  166. }
  167. }
  168. /* This returns the exact NUMA node next to a worker */
  169. static int _starpu_get_physical_numa_node_worker(unsigned workerid)
  170. {
  171. #if defined(STARPU_HAVE_HWLOC)
  172. STARPU_ASSERT(numa_enabled != -1);
  173. if (numa_enabled)
  174. {
  175. struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
  176. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
  177. struct _starpu_machine_topology *topology = &config->topology ;
  178. hwloc_obj_t obj;
  179. switch(worker->arch)
  180. {
  181. case STARPU_CPU_WORKER:
  182. obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
  183. break;
  184. default:
  185. STARPU_ABORT();
  186. }
  187. return numa_get_physical_id(obj);
  188. }
  189. else
  190. #endif
  191. {
  192. (void) workerid; /* unused */
  193. return STARPU_NUMA_MAIN_RAM;
  194. }
  195. }
  196. /* This returns the CPU NUMA memory close to a worker */
  197. static int _starpu_get_logical_close_numa_node_worker(unsigned workerid)
  198. {
  199. #if defined(STARPU_HAVE_HWLOC)
  200. STARPU_ASSERT(numa_enabled != -1);
  201. if (numa_enabled)
  202. {
  203. struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
  204. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
  205. struct _starpu_machine_topology *topology = &config->topology ;
  206. hwloc_obj_t obj;
  207. switch(worker->arch)
  208. {
  209. default:
  210. obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
  211. break;
  212. #ifndef STARPU_SIMGRID
  213. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  214. case STARPU_CUDA_WORKER:
  215. obj = hwloc_cuda_get_device_osdev_by_index(topology->hwtopology, worker->devid);
  216. if (!obj)
  217. obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
  218. break;
  219. #endif
  220. #endif
  221. }
  222. return numa_get_logical_id(obj);
  223. }
  224. else
  225. #endif
  226. {
  227. (void) workerid; /* unused */
  228. return STARPU_NUMA_MAIN_RAM;
  229. }
  230. }
  231. //TODO change this in an array
  232. int starpu_memory_nodes_numa_hwloclogid_to_id(int logid)
  233. {
  234. unsigned n;
  235. for (n = 0; n < nb_numa_nodes; n++)
  236. if (numa_memory_nodes_to_hwloclogid[n] == logid)
  237. return n;
  238. return -1;
  239. }
  240. int starpu_memory_nodes_numa_id_to_hwloclogid(unsigned id)
  241. {
  242. STARPU_ASSERT(id < STARPU_MAXNUMANODES);
  243. return numa_memory_nodes_to_hwloclogid[id];
  244. }
  245. int starpu_memory_nodes_numa_devid_to_id(unsigned id)
  246. {
  247. STARPU_ASSERT(id < STARPU_MAXNUMANODES);
  248. return numa_memory_nodes_to_physicalid[id];
  249. }
  250. //TODO change this in an array
  251. int starpu_memory_nodes_numa_id_to_devid(int osid)
  252. {
  253. unsigned n;
  254. for (n = 0; n < nb_numa_nodes; n++)
  255. if (numa_memory_nodes_to_physicalid[n] == osid)
  256. return n;
  257. return -1;
  258. }
  259. // TODO: cache the values instead of looking in hwloc each time
  260. /* Avoid using this one, prefer _starpu_task_data_get_node_on_worker */
  261. int _starpu_task_data_get_node_on_node(struct starpu_task *task, unsigned index, unsigned local_node)
  262. {
  263. int node = STARPU_SPECIFIC_NODE_LOCAL;
  264. if (task->cl->specific_nodes)
  265. node = STARPU_CODELET_GET_NODE(task->cl, index);
  266. switch (node)
  267. {
  268. case STARPU_SPECIFIC_NODE_LOCAL:
  269. // TODO: rather find MCDRAM
  270. node = local_node;
  271. break;
  272. case STARPU_SPECIFIC_NODE_CPU:
  273. switch (starpu_node_get_kind(local_node))
  274. {
  275. case STARPU_CPU_RAM:
  276. node = local_node;
  277. break;
  278. default:
  279. // TODO: rather take close NUMA node
  280. node = STARPU_MAIN_RAM;
  281. break;
  282. }
  283. break;
  284. case STARPU_SPECIFIC_NODE_SLOW:
  285. // TODO: rather leave in DDR
  286. node = local_node;
  287. break;
  288. case STARPU_SPECIFIC_NODE_LOCAL_OR_CPU:
  289. {
  290. enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index);
  291. if (mode & STARPU_R)
  292. {
  293. if (mode & STARPU_R && task->handles[index]->per_node[local_node].state != STARPU_INVALID)
  294. {
  295. /* It is here already, rather access it from here */
  296. node = local_node;
  297. }
  298. else
  299. {
  300. /* It is not here already, do not bother moving it */
  301. node = STARPU_MAIN_RAM;
  302. }
  303. }
  304. else
  305. {
  306. /* Nothing to read, consider where to write */
  307. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
  308. if (handle->wt_mask & (1 << STARPU_MAIN_RAM))
  309. /* Write through, better simply write to the main memory */
  310. node = STARPU_MAIN_RAM;
  311. else
  312. /* Better keep temporary data on the accelerator to save PCI bandwidth */
  313. node = local_node;
  314. }
  315. break;
  316. }
  317. }
  318. return node;
  319. }
  320. int _starpu_task_data_get_node_on_worker(struct starpu_task *task, unsigned index, unsigned worker)
  321. {
  322. unsigned local_node = starpu_worker_get_memory_node(worker);
  323. int node = STARPU_SPECIFIC_NODE_LOCAL;
  324. if (task->cl->specific_nodes)
  325. node = STARPU_CODELET_GET_NODE(task->cl, index);
  326. switch (node)
  327. {
  328. case STARPU_SPECIFIC_NODE_LOCAL:
  329. // TODO: rather find MCDRAM
  330. node = local_node;
  331. break;
  332. case STARPU_SPECIFIC_NODE_CPU:
  333. node = starpu_memory_nodes_numa_hwloclogid_to_id(_starpu_get_logical_close_numa_node_worker(worker));
  334. if (node == -1)
  335. node = STARPU_MAIN_RAM;
  336. break;
  337. case STARPU_SPECIFIC_NODE_SLOW:
  338. // TODO: rather leave in DDR
  339. node = local_node;
  340. break;
  341. case STARPU_SPECIFIC_NODE_LOCAL_OR_CPU:
  342. {
  343. enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index);
  344. if (mode & STARPU_R)
  345. {
  346. if (task->handles[index]->per_node[local_node].state != STARPU_INVALID)
  347. {
  348. /* It is here already, rather access it from here */
  349. node = local_node;
  350. }
  351. else
  352. {
  353. /* It is not here already, do not bother moving it */
  354. node = STARPU_MAIN_RAM;
  355. }
  356. }
  357. else
  358. {
  359. /* Nothing to read, consider where to write */
  360. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
  361. if (handle->wt_mask & (1 << STARPU_MAIN_RAM))
  362. /* Write through, better simply write to the main memory */
  363. node = STARPU_MAIN_RAM;
  364. else
  365. /* Better keep temporary data on the accelerator to save PCI bandwidth */
  366. node = local_node;
  367. }
  368. break;
  369. }
  370. }
  371. return node;
  372. }
  373. struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d)
  374. {
  375. unsigned nworkers = starpu_worker_get_count();
  376. unsigned workerid;
  377. for (workerid = 0; workerid < nworkers; workerid++)
  378. {
  379. if (starpu_worker_get_type(workerid) == d->type)
  380. {
  381. struct _starpu_worker *worker;
  382. worker = _starpu_get_worker_struct(workerid);
  383. switch (d->type)
  384. {
  385. #ifdef STARPU_USE_CPU
  386. case STARPU_CPU_WORKER:
  387. if (worker->devid == d->id.cpu_id)
  388. return worker;
  389. break;
  390. #endif
  391. #ifdef STARPU_USE_OPENCL
  392. case STARPU_OPENCL_WORKER:
  393. {
  394. cl_device_id device;
  395. starpu_opencl_get_device(worker->devid, &device);
  396. if (device == d->id.opencl_id)
  397. return worker;
  398. break;
  399. }
  400. #endif
  401. #ifdef STARPU_USE_CUDA
  402. case STARPU_CUDA_WORKER:
  403. {
  404. if (worker->devid == d->id.cuda_id)
  405. return worker;
  406. break;
  407. }
  408. #endif
  409. default:
  410. (void) worker;
  411. _STARPU_DEBUG("Invalid device type\n");
  412. return NULL;
  413. }
  414. }
  415. }
  416. return NULL;
  417. }
  418. /*
  419. * Discover the topology of the machine
  420. */
  421. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  422. static void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid,
  423. int *current, int *workers_gpuid,
  424. const char *varname, unsigned nhwgpus,
  425. enum starpu_worker_archtype type)
  426. {
  427. char *strval;
  428. unsigned i;
  429. *current = 0;
  430. /* conf->workers_gpuid indicates the successive GPU identifier that
  431. * should be used to bind the workers. It should be either filled
  432. * according to the user's explicit parameters (from starpu_conf) or
  433. * according to the STARPU_WORKERS_CUDAID env. variable. Otherwise, a
  434. * round-robin policy is used to distributed the workers over the
  435. * cores. */
  436. /* what do we use, explicit value, env. variable, or round-robin ? */
  437. strval = starpu_getenv(varname);
  438. if (strval)
  439. {
  440. /* STARPU_WORKERS_CUDAID certainly contains less entries than
  441. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  442. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  443. * 2". */
  444. unsigned wrap = 0;
  445. unsigned number_of_entries = 0;
  446. char *endptr;
  447. /* we use the content of the STARPU_WORKERS_CUDAID
  448. * env. variable */
  449. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  450. {
  451. if (!wrap)
  452. {
  453. long int val;
  454. val = strtol(strval, &endptr, 10);
  455. if (endptr != strval)
  456. {
  457. workers_gpuid[i] = (unsigned)val;
  458. strval = endptr;
  459. }
  460. else
  461. {
  462. /* there must be at least one entry */
  463. STARPU_ASSERT(i != 0);
  464. number_of_entries = i;
  465. /* there is no more values in the
  466. * string */
  467. wrap = 1;
  468. workers_gpuid[i] = workers_gpuid[0];
  469. }
  470. }
  471. else
  472. {
  473. workers_gpuid[i] =
  474. workers_gpuid[i % number_of_entries];
  475. }
  476. }
  477. }
  478. else if (explicit_workers_gpuid)
  479. {
  480. /* we use the explicit value from the user */
  481. memcpy(workers_gpuid,
  482. explicit_workers_gpuid,
  483. STARPU_NMAXWORKERS*sizeof(unsigned));
  484. }
  485. else
  486. {
  487. /* by default, we take a round robin policy */
  488. if (nhwgpus > 0)
  489. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  490. workers_gpuid[i] = (unsigned)(i % nhwgpus);
  491. /* StarPU can use sampling techniques to bind threads
  492. * correctly */
  493. may_bind_automatically[type] = 1;
  494. }
  495. }
  496. #endif
  497. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  498. static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config *config)
  499. {
  500. struct _starpu_machine_topology *topology = &config->topology;
  501. struct starpu_conf *uconf = &config->conf;
  502. _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_cuda_gpuid == 0
  503. ? NULL
  504. : (int *)uconf->workers_cuda_gpuid,
  505. &(config->current_cuda_gpuid),
  506. (int *)topology->workers_cuda_gpuid,
  507. "STARPU_WORKERS_CUDAID",
  508. topology->nhwdevices[STARPU_CUDA_WORKER],
  509. STARPU_CUDA_WORKER);
  510. }
  511. static inline int _starpu_get_next_cuda_gpuid(struct _starpu_machine_config *config)
  512. {
  513. unsigned i = ((config->current_cuda_gpuid++) % config->topology.ndevices[STARPU_CUDA_WORKER]);
  514. return (int)config->topology.workers_cuda_gpuid[i];
  515. }
  516. #endif
  517. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  518. static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_config*config)
  519. {
  520. struct _starpu_machine_topology *topology = &config->topology;
  521. struct starpu_conf *uconf = &config->conf;
  522. _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_opencl_gpuid == 0
  523. ? NULL
  524. : (int *)uconf->workers_opencl_gpuid,
  525. &(config->current_opencl_gpuid),
  526. (int *)topology->workers_opencl_gpuid,
  527. "STARPU_WORKERS_OPENCLID",
  528. topology->nhwdevices[STARPU_OPENCL_WORKER],
  529. STARPU_OPENCL_WORKER);
  530. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  531. // Detect devices which are already used with CUDA
  532. {
  533. unsigned tmp[STARPU_NMAXWORKERS];
  534. unsigned nb=0;
  535. int i;
  536. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  537. {
  538. struct handle_entry *entry;
  539. int devid = config->topology.workers_opencl_gpuid[i];
  540. HASH_FIND_INT(devices_using_cuda, &devid, entry);
  541. if (entry == NULL)
  542. {
  543. tmp[nb] = topology->workers_opencl_gpuid[i];
  544. nb++;
  545. }
  546. }
  547. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  548. tmp[i] = -1;
  549. memcpy(topology->workers_opencl_gpuid, tmp, sizeof(unsigned)*STARPU_NMAXWORKERS);
  550. }
  551. #endif /* STARPU_USE_CUDA */
  552. {
  553. // Detect identical devices
  554. struct handle_entry *devices_already_used = NULL;
  555. unsigned tmp[STARPU_NMAXWORKERS];
  556. unsigned nb=0;
  557. int i;
  558. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  559. {
  560. int devid = topology->workers_opencl_gpuid[i];
  561. struct handle_entry *entry;
  562. HASH_FIND_INT(devices_already_used, &devid, entry);
  563. if (entry == NULL)
  564. {
  565. struct handle_entry *entry2;
  566. _STARPU_MALLOC(entry2, sizeof(*entry2));
  567. entry2->gpuid = devid;
  568. HASH_ADD_INT(devices_already_used, gpuid,
  569. entry2);
  570. tmp[nb] = devid;
  571. nb ++;
  572. }
  573. }
  574. struct handle_entry *entry=NULL, *tempo=NULL;
  575. HASH_ITER(hh, devices_already_used, entry, tempo)
  576. {
  577. HASH_DEL(devices_already_used, entry);
  578. free(entry);
  579. }
  580. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  581. tmp[i] = -1;
  582. memcpy(topology->workers_opencl_gpuid, tmp, sizeof(unsigned)*STARPU_NMAXWORKERS);
  583. }
  584. }
  585. static inline int _starpu_get_next_opencl_gpuid(struct _starpu_machine_config *config)
  586. {
  587. unsigned i = ((config->current_opencl_gpuid++) % config->topology.ndevices[STARPU_OPENCL_WORKER]);
  588. return (int)config->topology.workers_opencl_gpuid[i];
  589. }
  590. #endif
  591. #if 0
  592. #if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
  593. static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
  594. {
  595. struct _starpu_machine_topology *topology = &config->topology;
  596. struct starpu_conf *uconf = &config->conf;
  597. _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_mic_deviceid == 0
  598. ? NULL
  599. : (int *)config->user_conf->workers_mic_deviceid,
  600. &(config->current_mic_deviceid),
  601. (int *)topology->workers_mic_deviceid,
  602. "STARPU_WORKERS_MICID",
  603. topology->nhwdevices[STARPU_MIC_WORKER],
  604. STARPU_MIC_WORKER);
  605. }
  606. #endif
  607. #endif
  608. #if 0
  609. #ifdef STARPU_USE_MIC
  610. static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
  611. {
  612. unsigned i = ((config->current_mic_deviceid++) % config->topology.ndevices[STARPU_MIC_WORKER]);
  613. return (int)config->topology.workers_mic_deviceid[i];
  614. }
  615. #endif
  616. #endif
  617. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  618. static inline int _starpu_get_next_mpi_deviceid(struct _starpu_machine_config *config)
  619. {
  620. unsigned i = ((config->current_mpi_deviceid++) % config->topology.ndevices[STARPU_MPI_MS_WORKER]);
  621. return (int)config->topology.workers_mpi_ms_deviceid[i];
  622. }
  623. static void _starpu_init_mpi_topology(struct _starpu_machine_config *config, long mpi_idx)
  624. {
  625. /* Discover the topology of the mpi node identifier by MPI_IDX. That
  626. * means, make this StarPU instance aware of the number of cores available
  627. * on this MPI device. Update the `nhwworker[STARPU_MPI_MS_WORKER]' topology field
  628. * accordingly. */
  629. struct _starpu_machine_topology *topology = &config->topology;
  630. int nbcores;
  631. _starpu_src_common_sink_nbcores(_starpu_mpi_ms_nodes[mpi_idx], &nbcores);
  632. topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx] = nbcores;
  633. }
  634. #endif /* STARPU_USE_MPI_MASTER_SLAVE */
  635. #ifdef STARPU_USE_MIC
  636. static void _starpu_init_mic_topology(struct _starpu_machine_config *config, long mic_idx)
  637. {
  638. /* Discover the topology of the mic node identifier by MIC_IDX. That
  639. * means, make this StarPU instance aware of the number of cores available
  640. * on this MIC device. Update the `nhwworker[STARPU_MIC_WORKER]' topology field
  641. * accordingly. */
  642. struct _starpu_machine_topology *topology = &config->topology;
  643. int nbcores;
  644. _starpu_src_common_sink_nbcores(_starpu_mic_nodes[mic_idx], &nbcores);
  645. topology->nhwworker[STARPU_MIC_WORKER][mic_idx] = nbcores;
  646. }
  647. static int _starpu_init_mic_node(struct _starpu_machine_config *config, int mic_idx,
  648. COIENGINE *coi_handle, COIPROCESS *coi_process)
  649. {
  650. /* Initialize the MIC node of index MIC_IDX. */
  651. struct starpu_conf *user_conf = &config->conf;
  652. char ***argv = _starpu_get_argv();
  653. const char *suffixes[] = {"-mic", "_mic", NULL};
  654. /* Environment variables to send to the Sink, it informs it what kind
  655. * of node it is (architecture and type) as there is no way to discover
  656. * it itself */
  657. char mic_idx_env[32];
  658. snprintf(mic_idx_env, sizeof(mic_idx_env), "_STARPU_MIC_DEVID=%d", mic_idx);
  659. /* XXX: this is currently necessary so that the remote process does not
  660. * segfault. */
  661. char nb_mic_env[32];
  662. snprintf(nb_mic_env, sizeof(nb_mic_env), "_STARPU_MIC_NB=%d", 2);
  663. const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", mic_idx_env, nb_mic_env, NULL};
  664. char mic_sink_program_path[1024];
  665. /* Let's get the helper program to run on the MIC device */
  666. int mic_file_found = _starpu_src_common_locate_file(mic_sink_program_path,
  667. sizeof(mic_sink_program_path),
  668. starpu_getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
  669. starpu_getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
  670. user_conf->mic_sink_program_path,
  671. (argv ? (*argv)[0] : NULL),
  672. suffixes);
  673. if (0 != mic_file_found)
  674. {
  675. _STARPU_MSG("No MIC program specified, use the environment\n"
  676. "variable STARPU_MIC_SINK_PROGRAM_NAME or the environment\n"
  677. "or the field 'starpu_conf.mic_sink_program_path'\n"
  678. "to define it.\n");
  679. return -1;
  680. }
  681. COIRESULT res;
  682. /* Let's get the handle which let us manage the remote MIC device */
  683. res = COIEngineGetHandle(COI_ISA_MIC, mic_idx, coi_handle);
  684. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  685. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  686. /* We launch the helper on the MIC device, which will wait for us
  687. * to give it work to do.
  688. * As we will communicate further with the device throught scif we
  689. * don't need to keep the process pointer */
  690. res = COIProcessCreateFromFile(*coi_handle, mic_sink_program_path, 0, NULL, 0,
  691. mic_sink_env, 1, NULL, 0, NULL,
  692. coi_process);
  693. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  694. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  695. /* Let's create the node structure, we'll communicate with the peer
  696. * through scif thanks to it */
  697. _starpu_mic_nodes[mic_idx] =
  698. _starpu_mp_common_node_create(STARPU_NODE_MIC_SOURCE, mic_idx);
  699. return 0;
  700. }
  701. #endif
  702. #ifndef STARPU_SIMGRID
  703. #ifdef STARPU_HAVE_HWLOC
  704. static void _starpu_allocate_topology_userdata(hwloc_obj_t obj)
  705. {
  706. unsigned i;
  707. _STARPU_CALLOC(obj->userdata, 1, sizeof(struct _starpu_hwloc_userdata));
  708. for (i = 0; i < obj->arity; i++)
  709. _starpu_allocate_topology_userdata(obj->children[i]);
  710. #if HWLOC_API_VERSION >= 0x00020000
  711. hwloc_obj_t child;
  712. for (child = obj->io_first_child; child; child = child->next_sibling)
  713. _starpu_allocate_topology_userdata(child);
  714. #endif
  715. }
  716. static void _starpu_deallocate_topology_userdata(hwloc_obj_t obj)
  717. {
  718. unsigned i;
  719. struct _starpu_hwloc_userdata *data = obj->userdata;
  720. STARPU_ASSERT(!data->worker_list || data->worker_list == (void*)-1);
  721. free(data);
  722. for (i = 0; i < obj->arity; i++)
  723. _starpu_deallocate_topology_userdata(obj->children[i]);
  724. #if HWLOC_API_VERSION >= 0x00020000
  725. hwloc_obj_t child;
  726. for (child = obj->io_first_child; child; child = child->next_sibling)
  727. _starpu_deallocate_topology_userdata(child);
  728. #endif
  729. }
  730. #endif
  731. #endif
  732. static void _starpu_init_topology(struct _starpu_machine_config *config)
  733. {
  734. /* Discover the topology, meaning finding all the available PUs for
  735. the compiled drivers. These drivers MUST have been initialized
  736. before calling this function. The discovered topology is filled in
  737. CONFIG. */
  738. struct _starpu_machine_topology *topology = &config->topology;
  739. if (topology_is_initialized)
  740. return;
  741. nobind = starpu_get_env_number("STARPU_WORKERS_NOBIND");
  742. topology->nhwdevices[STARPU_CPU_WORKER] = 1;
  743. topology->nhwworker[STARPU_CPU_WORKER][0] = 0;
  744. topology->nhwpus = 0;
  745. #ifndef STARPU_SIMGRID
  746. #ifdef STARPU_HAVE_HWLOC
  747. hwloc_topology_init(&topology->hwtopology);
  748. char *hwloc_input = starpu_getenv("STARPU_HWLOC_INPUT");
  749. if (hwloc_input && hwloc_input[0])
  750. {
  751. int err = hwloc_topology_set_xml(topology->hwtopology, hwloc_input);
  752. if (err < 0) _STARPU_DISP("Could not load hwloc input %s\n", hwloc_input);
  753. }
  754. _starpu_topology_filter(topology->hwtopology);
  755. hwloc_topology_load(topology->hwtopology);
  756. #ifdef HAVE_HWLOC_CPUKINDS_GET_NR
  757. int nr_kinds = hwloc_cpukinds_get_nr(topology->hwtopology, 0);
  758. if (nr_kinds > 1)
  759. _STARPU_DISP("Warning: there are several kinds of CPU on this system. For now StarPU assumes all CPU are equal\n");
  760. #endif
  761. if (starpu_get_env_number_default("STARPU_WORKERS_GETBIND", 0))
  762. {
  763. /* Respect the existing binding */
  764. hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
  765. int ret = hwloc_get_cpubind(topology->hwtopology, cpuset, HWLOC_CPUBIND_THREAD);
  766. if (ret)
  767. _STARPU_DISP("Warning: could not get current CPU binding: %s\n", strerror(errno));
  768. else
  769. {
  770. ret = hwloc_topology_restrict(topology->hwtopology, cpuset, 0);
  771. if (ret)
  772. _STARPU_DISP("Warning: could not restrict hwloc to cpuset: %s\n", strerror(errno));
  773. }
  774. hwloc_bitmap_free(cpuset);
  775. }
  776. _starpu_allocate_topology_userdata(hwloc_get_root_obj(topology->hwtopology));
  777. #endif
  778. #endif
  779. #ifdef STARPU_SIMGRID
  780. config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
  781. #elif defined(STARPU_HAVE_HWLOC)
  782. /* Discover the CPUs relying on the hwloc interface and fills CONFIG
  783. * accordingly. */
  784. config->cpu_depth = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_CORE);
  785. config->pu_depth = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_PU);
  786. /* Would be very odd */
  787. STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE);
  788. if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN)
  789. {
  790. /* unknown, using logical procesors as fallback */
  791. _STARPU_DISP("Warning: The OS did not report CPU cores. Assuming there is only one hardware thread per core.\n");
  792. config->cpu_depth = hwloc_get_type_depth(topology->hwtopology,
  793. HWLOC_OBJ_PU);
  794. }
  795. topology->nhwworker[STARPU_CPU_WORKER][0] = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->cpu_depth);
  796. topology->nhwpus = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->pu_depth);
  797. #elif defined(HAVE_SYSCONF)
  798. /* Discover the CPUs relying on the sysconf(3) function and fills
  799. * CONFIG accordingly. */
  800. config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
  801. #elif defined(_WIN32)
  802. /* Discover the CPUs on Cygwin and MinGW systems. */
  803. SYSTEM_INFO sysinfo;
  804. GetSystemInfo(&sysinfo);
  805. config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
  806. #else
  807. #warning no way to know number of cores, assuming 1
  808. config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = 1;
  809. #endif
  810. if (config->conf.ncuda != 0)
  811. _starpu_cuda_discover_devices(config);
  812. if (config->conf.nopencl != 0)
  813. _starpu_opencl_discover_devices(config);
  814. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  815. config->topology.nhwdevices[STARPU_MPI_MS_WORKER] = _starpu_mpi_src_get_device_count();
  816. #endif
  817. topology_is_initialized = 1;
  818. }
  819. /*
  820. * Bind workers on the different processors
  821. */
  822. static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *config)
  823. {
  824. char *strval;
  825. unsigned i;
  826. struct _starpu_machine_topology *topology = &config->topology;
  827. int nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0];
  828. unsigned bind_on_core = 0;
  829. int scale = 1;
  830. config->current_bindid = 0;
  831. if (starpu_getenv("STARPU_WORKERS_CPUID") && starpu_getenv("STARPU_WORKERS_COREID"))
  832. {
  833. _STARPU_DISP("Warning: STARPU_WORKERS_CPUID and STARPU_WORKERS_COREID cannot be set at the same time. STARPU_WORKERS_CPUID will be used.\n");
  834. }
  835. /* conf->workers_bindid indicates the successive logical PU identifier that
  836. * should be used to bind the workers. It should be either filled
  837. * according to the user's explicit parameters (from starpu_conf) or
  838. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  839. * round-robin policy is used to distributed the workers over the
  840. * cores. */
  841. /* what do we use, explicit value, env. variable, or round-robin ? */
  842. strval = starpu_getenv("STARPU_WORKERS_CPUID");
  843. if (strval == NULL)
  844. {
  845. strval = starpu_getenv("STARPU_WORKERS_COREID");
  846. if (strval)
  847. {
  848. bind_on_core = 1;
  849. scale = nhyperthreads;
  850. }
  851. }
  852. if (strval)
  853. {
  854. /* STARPU_WORKERS_CPUID certainly contains less entries than
  855. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  856. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  857. * 2". */
  858. unsigned wrap = 0;
  859. unsigned number_of_entries = 0;
  860. char *endptr;
  861. /* we use the content of the STARPU_WORKERS_CPUID
  862. * env. variable */
  863. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  864. {
  865. if (!wrap)
  866. {
  867. long int val;
  868. val = strtol(strval, &endptr, 10);
  869. if (endptr != strval)
  870. {
  871. topology->workers_bindid[i] = (unsigned)((val * scale) % topology->nhwpus);
  872. strval = endptr;
  873. if (*strval == '-')
  874. {
  875. /* range of values */
  876. long int endval;
  877. strval++;
  878. if (*strval && *strval != ' ' && *strval != ',')
  879. {
  880. endval = strtol(strval, &endptr, 10);
  881. strval = endptr;
  882. }
  883. else
  884. {
  885. endval = (bind_on_core ? topology->nhwworker[STARPU_CPU_WORKER][0] : topology->nhwpus) - 1;
  886. if (*strval)
  887. strval++;
  888. }
  889. for (val++; val <= endval && i < STARPU_NMAXWORKERS-1; val++)
  890. {
  891. i++;
  892. topology->workers_bindid[i] = (unsigned)((val * scale) % topology->nhwpus);
  893. }
  894. }
  895. if (*strval == ',')
  896. strval++;
  897. }
  898. else
  899. {
  900. /* there must be at least one entry */
  901. STARPU_ASSERT(i != 0);
  902. number_of_entries = i;
  903. /* there is no more values in the
  904. * string */
  905. wrap = 1;
  906. topology->workers_bindid[i] =
  907. topology->workers_bindid[0];
  908. }
  909. }
  910. else
  911. {
  912. topology->workers_bindid[i] =
  913. topology->workers_bindid[i % number_of_entries];
  914. }
  915. }
  916. }
  917. else if (config->conf.use_explicit_workers_bindid)
  918. {
  919. /* we use the explicit value from the user */
  920. memcpy(topology->workers_bindid,
  921. config->conf.workers_bindid,
  922. STARPU_NMAXWORKERS*sizeof(unsigned));
  923. }
  924. else
  925. {
  926. int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
  927. int k;
  928. int nbindids=0;
  929. STARPU_ASSERT_MSG(nth_per_core > 0 && nth_per_core <= nhyperthreads , "Incorrect number of hyperthreads");
  930. i = 0; /* PU number currently assigned */
  931. k = 0; /* Number of threads already put on the current core */
  932. while(nbindids < STARPU_NMAXWORKERS)
  933. {
  934. if (k >= nth_per_core)
  935. {
  936. /* We have already put enough workers on this
  937. * core, skip remaining PUs from this core, and
  938. * proceed with next core */
  939. i += nhyperthreads-nth_per_core;
  940. k = 0;
  941. continue;
  942. }
  943. /* Add a worker to this core, by using this logical PU */
  944. topology->workers_bindid[nbindids++] = (unsigned)(i % topology->nhwpus);
  945. k++;
  946. i++;
  947. }
  948. }
  949. for (i = 0; i < STARPU_MAXCPUS;i++)
  950. cpu_worker[i] = STARPU_NOWORKERID;
  951. /* no binding yet */
  952. memset(&config->currently_bound, 0, sizeof(config->currently_bound));
  953. memset(&config->currently_shared, 0, sizeof(config->currently_shared));
  954. }
  955. static void _starpu_deinitialize_workers_bindid(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED)
  956. {
  957. unsigned i;
  958. for (i = 0; i < STARPU_MAXCPUS;i++)
  959. {
  960. if (cpu_name[i])
  961. {
  962. free(cpu_name[i]);
  963. cpu_name[i] = NULL;
  964. }
  965. }
  966. }
  967. /* This function gets the identifier of the next core on which to bind a
  968. * worker. In case a list of preferred cores was specified (logical indexes),
  969. * we look for a an available core among the list if possible, otherwise a
  970. * round-robin policy is used. */
  971. static inline unsigned _starpu_get_next_bindid(struct _starpu_machine_config *config, unsigned flags,
  972. unsigned *preferred_binding, unsigned npreferred)
  973. {
  974. struct _starpu_machine_topology *topology = &config->topology;
  975. STARPU_ASSERT_MSG(topology_is_initialized, "The StarPU core is not initialized yet, have you called starpu_init?");
  976. unsigned current_preferred;
  977. unsigned nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0];
  978. unsigned ncores = topology->nhwpus / nhyperthreads;
  979. unsigned i;
  980. if (npreferred)
  981. {
  982. STARPU_ASSERT_MSG(preferred_binding, "Passing NULL pointer for parameter preferred_binding with a non-0 value of parameter npreferred");
  983. }
  984. /* loop over the preference list */
  985. for (current_preferred = 0;
  986. current_preferred < npreferred;
  987. current_preferred++)
  988. {
  989. /* can we bind the worker on the preferred core ? */
  990. unsigned requested_core = preferred_binding[current_preferred];
  991. unsigned requested_bindid = requested_core * nhyperthreads;
  992. /* Look at the remaining cores to be bound to */
  993. for (i = 0; i < ncores; i++)
  994. {
  995. if (topology->workers_bindid[i] == requested_bindid &&
  996. (!config->currently_bound[i] ||
  997. (config->currently_shared[i] && !(flags & STARPU_THREAD_ACTIVE)))
  998. )
  999. {
  1000. /* the cpu is available, or shareable with us, we use it ! */
  1001. config->currently_bound[i] = 1;
  1002. if (!(flags & STARPU_THREAD_ACTIVE))
  1003. config->currently_shared[i] = 1;
  1004. return requested_bindid;
  1005. }
  1006. }
  1007. }
  1008. if (!(flags & STARPU_THREAD_ACTIVE))
  1009. {
  1010. /* Try to find a shareable PU */
  1011. for (i = 0; i < ncores; i++)
  1012. if (config->currently_shared[i])
  1013. return topology->workers_bindid[i];
  1014. }
  1015. /* Try to find an available PU from last used PU */
  1016. for (i = config->current_bindid; i < ncores; i++)
  1017. if (!config->currently_bound[i])
  1018. /* Found a cpu ready for use, use it! */
  1019. break;
  1020. if (i == ncores)
  1021. {
  1022. /* Finished binding on all cpus, restart from start in
  1023. * case the user really wants overloading */
  1024. memset(&config->currently_bound, 0, sizeof(config->currently_bound));
  1025. i = 0;
  1026. }
  1027. STARPU_ASSERT(i < ncores);
  1028. unsigned bindid = topology->workers_bindid[i];
  1029. config->currently_bound[i] = 1;
  1030. if (!(flags & STARPU_THREAD_ACTIVE))
  1031. config->currently_shared[i] = 1;
  1032. config->current_bindid = i;
  1033. return bindid;
  1034. }
  1035. unsigned starpu_get_next_bindid(unsigned flags, unsigned *preferred, unsigned npreferred)
  1036. {
  1037. return _starpu_get_next_bindid(_starpu_get_machine_config(), flags, preferred, npreferred);
  1038. }
  1039. unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config)
  1040. {
  1041. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1042. if (config->conf.nopencl != 0)
  1043. _starpu_opencl_init();
  1044. #endif
  1045. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1046. if (config->conf.ncuda != 0)
  1047. _starpu_init_cuda();
  1048. #endif
  1049. _starpu_init_topology(config);
  1050. return config->topology.nhwworker[STARPU_CPU_WORKER][0];
  1051. }
  1052. unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config)
  1053. {
  1054. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1055. if (config->conf.nopencl != 0)
  1056. _starpu_opencl_init();
  1057. #endif
  1058. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1059. if (config->conf.ncuda != 0)
  1060. _starpu_init_cuda();
  1061. #endif
  1062. _starpu_init_topology(config);
  1063. return config->topology.nhwpus;
  1064. }
  1065. unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED)
  1066. {
  1067. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1068. if (config->conf.nopencl != 0)
  1069. _starpu_opencl_init();
  1070. #endif
  1071. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1072. if (config->conf.ncuda != 0)
  1073. _starpu_init_cuda();
  1074. #endif
  1075. _starpu_init_topology(config);
  1076. int res;
  1077. #if defined(STARPU_HAVE_HWLOC)
  1078. if (numa_enabled == -1)
  1079. numa_enabled = starpu_get_env_number_default("STARPU_USE_NUMA", 0);
  1080. if (numa_enabled)
  1081. {
  1082. struct _starpu_machine_topology *topology = &config->topology ;
  1083. int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NUMANODE) ;
  1084. res = nnumanodes > 0 ? nnumanodes : 1 ;
  1085. }
  1086. else
  1087. #endif
  1088. {
  1089. res = 1;
  1090. }
  1091. STARPU_ASSERT_MSG(res <= STARPU_MAXNUMANODES, "Number of NUMA nodes discovered %d is higher than maximum accepted %d ! Use configure option --enable-maxnumanodes=xxx to increase the maximum value of supported NUMA nodes.\n", res, STARPU_MAXNUMANODES);
  1092. return res;
  1093. }
  1094. #ifdef STARPU_HAVE_HWLOC
  1095. void _starpu_topology_filter(hwloc_topology_t topology)
  1096. {
  1097. #if HWLOC_API_VERSION >= 0x20000
  1098. hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
  1099. hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM);
  1100. #else
  1101. hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
  1102. #endif
  1103. #ifdef HAVE_HWLOC_TOPOLOGY_SET_COMPONENTS
  1104. # ifndef STARPU_USE_CUDA
  1105. hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "cuda");
  1106. hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "nvml");
  1107. # endif
  1108. # ifndef STARPU_USE_OPENCL
  1109. hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "opencl");
  1110. # endif
  1111. #endif
  1112. }
  1113. #endif
  1114. #ifdef STARPU_USE_MIC
  1115. static void _starpu_init_mic_config(struct _starpu_machine_config *config,
  1116. struct starpu_conf *user_conf,
  1117. unsigned mic_idx)
  1118. {
  1119. // Configure the MIC device of index MIC_IDX.
  1120. struct _starpu_machine_topology *topology = &config->topology;
  1121. topology->nhwworker[STARPU_MIC_WORKER][mic_idx] = 0;
  1122. _starpu_init_mic_topology(config, mic_idx);
  1123. int nmiccores;
  1124. nmiccores = starpu_get_env_number("STARPU_NMICTHREADS");
  1125. STARPU_ASSERT_MSG(nmiccores >= -1, "nmiccores can not be negative and different from -1 (is is %d)", nmiccores);
  1126. if (nmiccores == -1)
  1127. {
  1128. /* Nothing was specified, so let's use the number of
  1129. * detected mic cores. ! */
  1130. nmiccores = topology->nhwworker[STARPU_MIC_WORKER][mic_idx];
  1131. }
  1132. else
  1133. {
  1134. if ((unsigned) nmiccores > topology->nhwworker[STARPU_MIC_WORKER][mic_idx])
  1135. {
  1136. /* The user requires more MIC cores than there is available */
  1137. _STARPU_MSG("# Warning: %d MIC cores requested. Only %u available.\n", nmiccores, topology->nhwworker[STARPU_MIC_WORKER][mic_idx]);
  1138. nmiccores = topology->nhwworker[STARPU_MIC_WORKER][mic_idx];
  1139. }
  1140. }
  1141. topology->nworker[STARPU_MIC_WORKER][mic_idx] = nmiccores;
  1142. STARPU_ASSERT_MSG(topology->nworker[STARPU_MIC_WORKER][mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
  1143. "topology->nworker[STARPU_MIC_WORKER][mic_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
  1144. mic_idx, topology->nworker[STARPU_MIC_WORKER][mic_idx], topology->nworkers, STARPU_NMAXWORKERS);
  1145. /* _starpu_initialize_workers_mic_deviceid (config); */
  1146. mic_worker_set[mic_idx].workers = &config->workers[topology->nworkers];
  1147. mic_worker_set[mic_idx].nworkers = topology->nworker[STARPU_MIC_WORKER][mic_idx];
  1148. unsigned miccore_id;
  1149. for (miccore_id = 0; miccore_id < topology->nworker[STARPU_MIC_WORKER][mic_idx]; miccore_id++)
  1150. {
  1151. int worker_idx = topology->nworkers + miccore_id;
  1152. config->workers[worker_idx].set = &mic_worker_set[mic_idx];
  1153. config->workers[worker_idx].arch = STARPU_MIC_WORKER;
  1154. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1155. config->workers[worker_idx].perf_arch.ndevices = 1;
  1156. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_MIC_WORKER;
  1157. config->workers[worker_idx].perf_arch.devices[0].devid = mic_idx;
  1158. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1159. config->workers[worker_idx].devid = mic_idx;
  1160. config->workers[worker_idx].subworkerid = miccore_id;
  1161. config->workers[worker_idx].worker_mask = STARPU_MIC;
  1162. config->worker_mask |= STARPU_MIC;
  1163. }
  1164. _starpu_mic_nodes[mic_idx]->baseworkerid = topology->nworkers;
  1165. topology->nworkers += topology->nworker[STARPU_MIC_WORKER][mic_idx];
  1166. }
  1167. static COIENGINE mic_handles[STARPU_MAXMICDEVS];
  1168. COIPROCESS _starpu_mic_process[STARPU_MAXMICDEVS];
  1169. #endif
  1170. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1171. static void _starpu_init_mpi_config(struct _starpu_machine_config *config,
  1172. struct starpu_conf *user_conf,
  1173. unsigned mpi_idx)
  1174. {
  1175. struct _starpu_machine_topology *topology = &config->topology;
  1176. topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx] = 0;
  1177. _starpu_init_mpi_topology(config, mpi_idx);
  1178. int nmpicores;
  1179. nmpicores = starpu_get_env_number("STARPU_NMPIMSTHREADS");
  1180. if (nmpicores == -1)
  1181. {
  1182. /* Nothing was specified, so let's use the number of
  1183. * detected mpi cores. ! */
  1184. nmpicores = topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx];
  1185. }
  1186. else
  1187. {
  1188. if ((unsigned) nmpicores > topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx])
  1189. {
  1190. /* The user requires more MPI cores than there is available */
  1191. _STARPU_MSG("# Warning: %d MPI cores requested. Only %u available.\n",
  1192. nmpicores, topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx]);
  1193. nmpicores = topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx];
  1194. }
  1195. }
  1196. topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx] = nmpicores;
  1197. STARPU_ASSERT_MSG(topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
  1198. "topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx(%u)] (%u) + topology->nworkers (%u) <= STARPU_NMAXWORKERS (%d)",
  1199. mpi_idx, topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx], topology->nworkers, STARPU_NMAXWORKERS);
  1200. mpi_worker_set[mpi_idx].workers = &config->workers[topology->nworkers];
  1201. mpi_worker_set[mpi_idx].nworkers = topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx];
  1202. unsigned mpicore_id;
  1203. for (mpicore_id = 0; mpicore_id < topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx]; mpicore_id++)
  1204. {
  1205. int worker_idx = topology->nworkers + mpicore_id;
  1206. config->workers[worker_idx].set = &mpi_worker_set[mpi_idx];
  1207. config->workers[worker_idx].arch = STARPU_MPI_MS_WORKER;
  1208. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1209. config->workers[worker_idx].perf_arch.ndevices = 1;
  1210. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_MPI_MS_WORKER;
  1211. config->workers[worker_idx].perf_arch.devices[0].devid = mpi_idx;
  1212. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1213. config->workers[worker_idx].devid = mpi_idx;
  1214. config->workers[worker_idx].subworkerid = mpicore_id;
  1215. config->workers[worker_idx].worker_mask = STARPU_MPI_MS;
  1216. config->worker_mask |= STARPU_MPI_MS;
  1217. }
  1218. _starpu_mpi_ms_nodes[mpi_idx]->baseworkerid = topology->nworkers;
  1219. topology->nworkers += topology->nworker[STARPU_MPI_MS_WORKER][mpi_idx];
  1220. }
  1221. #endif
  1222. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  1223. static void _starpu_init_mp_config(struct _starpu_machine_config *config,
  1224. struct starpu_conf *user_conf, int no_mp_config)
  1225. {
  1226. /* Discover and configure the mp topology. That means:
  1227. * - discover the number of mp nodes;
  1228. * - initialize each discovered node;
  1229. * - discover the local topology (number of PUs/devices) of each node;
  1230. * - configure the workers accordingly.
  1231. */
  1232. #ifdef STARPU_USE_MIC
  1233. if (!no_mp_config)
  1234. {
  1235. struct _starpu_machine_topology *topology = &config->topology;
  1236. /* Discover and initialize the number of MIC nodes through the mp
  1237. * infrastructure. */
  1238. unsigned nhwmicdevices = _starpu_mic_src_get_device_count();
  1239. int reqmicdevices = starpu_get_env_number("STARPU_NMIC");
  1240. if (reqmicdevices == -1 && user_conf)
  1241. reqmicdevices = user_conf->nmic;
  1242. if (reqmicdevices == -1)
  1243. /* Nothing was specified, so let's use the number of
  1244. * detected mic devices. ! */
  1245. reqmicdevices = nhwmicdevices;
  1246. STARPU_ASSERT_MSG(reqmicdevices >= -1, "nmic can not be negative and different from -1 (is is %d)", reqmicdevices);
  1247. if (reqmicdevices != -1)
  1248. {
  1249. if ((unsigned) reqmicdevices > nhwmicdevices)
  1250. {
  1251. /* The user requires more MIC devices than there is available */
  1252. _STARPU_MSG("# Warning: %d MIC devices requested. Only %u available.\n", reqmicdevices, nhwmicdevices);
  1253. reqmicdevices = nhwmicdevices;
  1254. }
  1255. }
  1256. topology->ndevices[STARPU_MIC_WORKER] = 0;
  1257. unsigned i;
  1258. for (i = 0; i < (unsigned) reqmicdevices; i++)
  1259. if (0 == _starpu_init_mic_node(config, i, &mic_handles[i], &_starpu_mic_process[i]))
  1260. topology->ndevices[STARPU_MIC_WORKER]++;
  1261. for (i = 0; i < topology->ndevices[STARPU_MIC_WORKER]; i++)
  1262. _starpu_init_mic_config(config, user_conf, i);
  1263. }
  1264. #endif
  1265. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1266. {
  1267. struct _starpu_machine_topology *topology = &config->topology;
  1268. /* Discover and initialize the number of MPI nodes through the mp
  1269. * infrastructure. */
  1270. unsigned nhwmpidevices = _starpu_mpi_src_get_device_count();
  1271. int reqmpidevices = starpu_get_env_number("STARPU_NMPI_MS");
  1272. if (reqmpidevices == -1 && user_conf)
  1273. reqmpidevices = user_conf->nmpi_ms;
  1274. if (reqmpidevices == -1)
  1275. /* Nothing was specified, so let's use the number of
  1276. * detected mpi devices. ! */
  1277. reqmpidevices = nhwmpidevices;
  1278. if (reqmpidevices != -1)
  1279. {
  1280. if ((unsigned) reqmpidevices > nhwmpidevices)
  1281. {
  1282. /* The user requires more MPI devices than there is available */
  1283. _STARPU_MSG("# Warning: %d MPI Master-Slave devices requested. Only %u available.\n",
  1284. reqmpidevices, nhwmpidevices);
  1285. reqmpidevices = nhwmpidevices;
  1286. }
  1287. }
  1288. topology->ndevices[STARPU_MPI_MS_WORKER] = reqmpidevices;
  1289. /* if user don't want to use MPI slaves, we close the slave processes */
  1290. if (no_mp_config && topology->ndevices[STARPU_MPI_MS_WORKER] == 0)
  1291. {
  1292. _starpu_mpi_common_mp_deinit();
  1293. exit(0);
  1294. }
  1295. if (!no_mp_config)
  1296. {
  1297. unsigned i;
  1298. for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++)
  1299. _starpu_mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
  1300. for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++)
  1301. _starpu_init_mpi_config(config, user_conf, i);
  1302. }
  1303. }
  1304. #endif
  1305. }
  1306. #endif
  1307. #ifdef STARPU_USE_MIC
  1308. static void _starpu_deinit_mic_node(unsigned mic_idx)
  1309. {
  1310. _starpu_mp_common_send_command(_starpu_mic_nodes[mic_idx], STARPU_MP_COMMAND_EXIT, NULL, 0);
  1311. COIProcessDestroy(_starpu_mic_process[mic_idx], -1, 0, NULL, NULL);
  1312. _starpu_mp_common_node_destroy(_starpu_mic_nodes[mic_idx]);
  1313. }
  1314. #endif
  1315. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1316. static void _starpu_deinit_mpi_node(int devid)
  1317. {
  1318. _starpu_mp_common_send_command(_starpu_mpi_ms_nodes[devid], STARPU_MP_COMMAND_EXIT, NULL, 0);
  1319. _starpu_mp_common_node_destroy(_starpu_mpi_ms_nodes[devid]);
  1320. }
  1321. #endif
  1322. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  1323. static void _starpu_deinit_mp_config(struct _starpu_machine_config *config)
  1324. {
  1325. struct _starpu_machine_topology *topology = &config->topology;
  1326. unsigned i;
  1327. #ifdef STARPU_USE_MIC
  1328. for (i = 0; i < topology->ndevices[STARPU_MIC_WORKER]; i++)
  1329. _starpu_deinit_mic_node(i);
  1330. _starpu_mic_clear_kernels();
  1331. #endif
  1332. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1333. for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++)
  1334. _starpu_deinit_mpi_node(i);
  1335. #endif
  1336. }
  1337. #endif
  1338. #ifdef STARPU_HAVE_HWLOC
  1339. static unsigned _starpu_topology_count_ngpus(hwloc_obj_t obj)
  1340. {
  1341. struct _starpu_hwloc_userdata *data = obj->userdata;
  1342. unsigned n = data->ngpus;
  1343. unsigned i;
  1344. for (i = 0; i < obj->arity; i++)
  1345. n += _starpu_topology_count_ngpus(obj->children[i]);
  1346. data->ngpus = n;
  1347. //#ifdef STARPU_VERBOSE
  1348. // {
  1349. // char name[64];
  1350. // hwloc_obj_type_snprintf(name, sizeof(name), obj, 0);
  1351. // _STARPU_DEBUG("hwloc obj %s has %u GPUs below\n", name, n);
  1352. // }
  1353. //#endif
  1354. return n;
  1355. }
  1356. #endif
  1357. static int _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
  1358. {
  1359. int i;
  1360. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  1361. {
  1362. config->workers[i].workerid = i;
  1363. config->workers[i].set = NULL;
  1364. }
  1365. struct _starpu_machine_topology *topology = &config->topology;
  1366. topology->nworkers = 0;
  1367. topology->ncombinedworkers = 0;
  1368. topology->nsched_ctxs = 0;
  1369. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1370. if (config->conf.nopencl != 0)
  1371. _starpu_opencl_init();
  1372. #endif
  1373. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1374. if (config->conf.ncuda != 0)
  1375. _starpu_init_cuda();
  1376. #endif
  1377. _starpu_init_topology(config);
  1378. _starpu_initialize_workers_bindid(config);
  1379. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1380. for (i = 0; i < (int) (sizeof(cuda_worker_set)/sizeof(cuda_worker_set[0])); i++)
  1381. cuda_worker_set[i].workers = NULL;
  1382. #endif
  1383. #ifdef STARPU_USE_MIC
  1384. for (i = 0; i < (int) (sizeof(mic_worker_set)/sizeof(mic_worker_set[0])); i++)
  1385. mic_worker_set[i].workers = NULL;
  1386. #endif
  1387. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1388. for (i = 0; i < (int) (sizeof(mpi_worker_set)/sizeof(mpi_worker_set[0])); i++)
  1389. mpi_worker_set[i].workers = NULL;
  1390. #endif
  1391. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1392. int ncuda = config->conf.ncuda;
  1393. int nworker_per_cuda = starpu_get_env_number_default("STARPU_NWORKER_PER_CUDA", 1);
  1394. STARPU_ASSERT_MSG(nworker_per_cuda > 0, "STARPU_NWORKER_PER_CUDA has to be > 0");
  1395. STARPU_ASSERT_MSG(nworker_per_cuda < STARPU_NMAXWORKERS, "STARPU_NWORKER_PER_CUDA (%d) cannot be higher than STARPU_NMAXWORKERS (%d)\n", nworker_per_cuda, STARPU_NMAXWORKERS);
  1396. #ifndef STARPU_NON_BLOCKING_DRIVERS
  1397. if (nworker_per_cuda > 1)
  1398. {
  1399. _STARPU_DISP("Warning: reducing STARPU_NWORKER_PER_CUDA to 1 because blocking drivers are enabled\n");
  1400. nworker_per_cuda = 1;
  1401. }
  1402. #endif
  1403. if (ncuda != 0)
  1404. {
  1405. /* The user did not disable CUDA. We need to initialize CUDA
  1406. * early to count the number of devices */
  1407. _starpu_init_cuda();
  1408. int nb_devices = _starpu_get_cuda_device_count();
  1409. STARPU_ASSERT_MSG(ncuda >= -1, "ncuda can not be negative and different from -1 (is is %d)", ncuda);
  1410. if (ncuda == -1)
  1411. {
  1412. /* Nothing was specified, so let's choose ! */
  1413. ncuda = nb_devices;
  1414. }
  1415. else
  1416. {
  1417. if (ncuda > nb_devices)
  1418. {
  1419. /* The user requires more CUDA devices than
  1420. * there is available */
  1421. _STARPU_DISP("Warning: %d CUDA devices requested. Only %d available.\n", ncuda, nb_devices);
  1422. ncuda = nb_devices;
  1423. }
  1424. }
  1425. }
  1426. /* Now we know how many CUDA devices will be used */
  1427. topology->ndevices[STARPU_CUDA_WORKER] = ncuda;
  1428. for (i = 0; i < ncuda; i++)
  1429. topology->nworker[STARPU_CUDA_WORKER][i] = nworker_per_cuda;
  1430. STARPU_ASSERT(topology->ndevices[STARPU_CUDA_WORKER] <= STARPU_MAXCUDADEVS);
  1431. _starpu_initialize_workers_cuda_gpuid(config);
  1432. /* allow having one worker per stream */
  1433. topology->cuda_th_per_stream = starpu_get_env_number_default("STARPU_CUDA_THREAD_PER_WORKER", -1);
  1434. topology->cuda_th_per_dev = starpu_get_env_number_default("STARPU_CUDA_THREAD_PER_DEV", -1);
  1435. STARPU_ASSERT_MSG(!(topology->cuda_th_per_stream == 1 && topology->cuda_th_per_dev != -1), "It does not make sense to set both STARPU_CUDA_THREAD_PER_WORKER to 1 and to set STARPU_CUDA_THREAD_PER_DEV, please choose either per worker or per device or none");
  1436. /* per device by default */
  1437. if (topology->cuda_th_per_dev == -1)
  1438. {
  1439. if (topology->cuda_th_per_stream == 1)
  1440. topology->cuda_th_per_dev = 0;
  1441. else
  1442. topology->cuda_th_per_dev = 1;
  1443. }
  1444. /* Not per stream by default */
  1445. if (topology->cuda_th_per_stream == -1)
  1446. {
  1447. topology->cuda_th_per_stream = 0;
  1448. }
  1449. if (!topology->cuda_th_per_dev)
  1450. {
  1451. cuda_worker_set[0].workers = &config->workers[topology->nworkers];
  1452. cuda_worker_set[0].nworkers = topology->ndevices[STARPU_CUDA_WORKER] * nworker_per_cuda;
  1453. }
  1454. unsigned cudagpu;
  1455. for (cudagpu = 0; cudagpu < topology->ndevices[STARPU_CUDA_WORKER]; cudagpu++)
  1456. {
  1457. int devid = _starpu_get_next_cuda_gpuid(config);
  1458. int worker_idx0 = topology->nworkers + cudagpu * nworker_per_cuda;
  1459. struct _starpu_worker_set *worker_set;
  1460. if (topology->cuda_th_per_dev)
  1461. {
  1462. worker_set = &cuda_worker_set[devid];
  1463. worker_set->workers = &config->workers[worker_idx0];
  1464. worker_set->nworkers = nworker_per_cuda;
  1465. }
  1466. else
  1467. {
  1468. /* Same worker set for all devices */
  1469. worker_set = &cuda_worker_set[0];
  1470. }
  1471. for (i = 0; i < nworker_per_cuda; i++)
  1472. {
  1473. int worker_idx = worker_idx0 + i;
  1474. if(topology->cuda_th_per_stream)
  1475. {
  1476. /* Just one worker in the set */
  1477. _STARPU_CALLOC(config->workers[worker_idx].set, 1, sizeof(struct _starpu_worker_set));
  1478. config->workers[worker_idx].set->workers = &config->workers[worker_idx];
  1479. config->workers[worker_idx].set->nworkers = 1;
  1480. }
  1481. else
  1482. config->workers[worker_idx].set = worker_set;
  1483. config->workers[worker_idx].arch = STARPU_CUDA_WORKER;
  1484. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1485. config->workers[worker_idx].perf_arch.ndevices = 1;
  1486. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CUDA_WORKER;
  1487. config->workers[worker_idx].perf_arch.devices[0].devid = devid;
  1488. // TODO: fix perfmodels etc.
  1489. //config->workers[worker_idx].perf_arch.ncore = nworker_per_cuda - 1;
  1490. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1491. config->workers[worker_idx].devid = devid;
  1492. config->workers[worker_idx].subworkerid = i;
  1493. config->workers[worker_idx].worker_mask = STARPU_CUDA;
  1494. config->worker_mask |= STARPU_CUDA;
  1495. struct handle_entry *entry;
  1496. HASH_FIND_INT(devices_using_cuda, &devid, entry);
  1497. if (!entry)
  1498. {
  1499. _STARPU_MALLOC(entry, sizeof(*entry));
  1500. entry->gpuid = devid;
  1501. HASH_ADD_INT(devices_using_cuda, gpuid, entry);
  1502. }
  1503. }
  1504. #ifndef STARPU_SIMGRID
  1505. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  1506. {
  1507. hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(topology->hwtopology, devid);
  1508. if (obj)
  1509. {
  1510. struct _starpu_hwloc_userdata *data = obj->userdata;
  1511. data->ngpus++;
  1512. }
  1513. else
  1514. {
  1515. _STARPU_DEBUG("Warning: could not find location of CUDA%u, do you have the hwloc CUDA plugin installed?\n", devid);
  1516. }
  1517. }
  1518. #endif
  1519. #endif
  1520. }
  1521. topology->nworkers += topology->ndevices[STARPU_CUDA_WORKER] * nworker_per_cuda;
  1522. #endif
  1523. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1524. int nopencl = config->conf.nopencl;
  1525. if (nopencl != 0)
  1526. {
  1527. /* The user did not disable OPENCL. We need to initialize
  1528. * OpenCL early to count the number of devices */
  1529. _starpu_opencl_init();
  1530. int nb_devices;
  1531. nb_devices = _starpu_opencl_get_device_count();
  1532. STARPU_ASSERT_MSG(nopencl >= -1, "nopencl can not be negative and different from -1 (is is %d)", nopencl);
  1533. if (nopencl == -1)
  1534. {
  1535. /* Nothing was specified, so let's choose ! */
  1536. nopencl = nb_devices;
  1537. if (nopencl > STARPU_MAXOPENCLDEVS)
  1538. {
  1539. _STARPU_DISP("Warning: %d OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldadev=xxx to update the maximum value of supported OpenCL devices.\n", nb_devices, STARPU_MAXOPENCLDEVS);
  1540. nopencl = STARPU_MAXOPENCLDEVS;
  1541. }
  1542. }
  1543. else
  1544. {
  1545. /* Let's make sure this value is OK. */
  1546. if (nopencl > nb_devices)
  1547. {
  1548. /* The user requires more OpenCL devices than
  1549. * there is available */
  1550. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d available.\n", nopencl, nb_devices);
  1551. nopencl = nb_devices;
  1552. }
  1553. /* Let's make sure this value is OK. */
  1554. if (nopencl > STARPU_MAXOPENCLDEVS)
  1555. {
  1556. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices.\n", nopencl, STARPU_MAXOPENCLDEVS);
  1557. nopencl = STARPU_MAXOPENCLDEVS;
  1558. }
  1559. }
  1560. }
  1561. topology->ndevices[STARPU_OPENCL_WORKER] = nopencl;
  1562. for (i = 0; i < nopencl; i++)
  1563. topology->nworker[STARPU_OPENCL_WORKER][i] = 1;
  1564. STARPU_ASSERT(topology->ndevices[STARPU_OPENCL_WORKER] + topology->nworkers <= STARPU_NMAXWORKERS);
  1565. _starpu_initialize_workers_opencl_gpuid(config);
  1566. unsigned openclgpu;
  1567. for (openclgpu = 0; openclgpu < topology->ndevices[STARPU_OPENCL_WORKER]; openclgpu++)
  1568. {
  1569. int worker_idx = topology->nworkers + openclgpu;
  1570. int devid = _starpu_get_next_opencl_gpuid(config);
  1571. if (devid == -1)
  1572. {
  1573. // There is no more devices left
  1574. topology->ndevices[STARPU_OPENCL_WORKER] = openclgpu;
  1575. break;
  1576. }
  1577. config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
  1578. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1579. config->workers[worker_idx].perf_arch.ndevices = 1;
  1580. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_OPENCL_WORKER;
  1581. config->workers[worker_idx].perf_arch.devices[0].devid = devid;
  1582. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1583. config->workers[worker_idx].subworkerid = 0;
  1584. config->workers[worker_idx].devid = devid;
  1585. config->workers[worker_idx].worker_mask = STARPU_OPENCL;
  1586. config->worker_mask |= STARPU_OPENCL;
  1587. }
  1588. topology->nworkers += topology->ndevices[STARPU_OPENCL_WORKER];
  1589. #endif
  1590. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  1591. _starpu_init_mp_config(config, &config->conf, no_mp_config);
  1592. #endif
  1593. /* we put the CPU section after the accelerator : in case there was an
  1594. * accelerator found, we devote one cpu */
  1595. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  1596. int ncpu = config->conf.ncpus;
  1597. if (ncpu != 0)
  1598. {
  1599. STARPU_ASSERT_MSG(ncpu >= -1, "ncpus can not be negative and different from -1 (is is %d)", ncpu);
  1600. if (ncpu == -1)
  1601. {
  1602. unsigned mic_busy_cpus = 0;
  1603. int j = 0;
  1604. for (j = 0; j < STARPU_MAXMICDEVS; j++)
  1605. mic_busy_cpus += (topology->nworker[STARPU_MIC_WORKER][j] ? 1 : 0);
  1606. unsigned mpi_ms_busy_cpus = 0;
  1607. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1608. #ifdef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
  1609. for (j = 0; j < STARPU_MAXMPIDEVS; j++)
  1610. mpi_ms_busy_cpus += (topology->nworker[STARPU_MPI_MS_WORKER][j] ? 1 : 0);
  1611. #else
  1612. mpi_ms_busy_cpus = 1; /* we launch one thread to control all slaves */
  1613. #endif
  1614. #endif /* STARPU_USE_MPI_MASTER_SLAVE */
  1615. unsigned cuda_busy_cpus = 0;
  1616. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1617. cuda_busy_cpus =
  1618. topology->cuda_th_per_dev == 0 && topology->cuda_th_per_stream == 0 ? (topology->ndevices[STARPU_CUDA_WORKER] ? 1 : 0) :
  1619. topology->cuda_th_per_stream ? (nworker_per_cuda * topology->ndevices[STARPU_CUDA_WORKER]) : topology->ndevices[STARPU_CUDA_WORKER];
  1620. #endif
  1621. unsigned already_busy_cpus = mpi_ms_busy_cpus + mic_busy_cpus
  1622. + cuda_busy_cpus
  1623. + topology->ndevices[STARPU_OPENCL_WORKER];
  1624. long avail_cpus = (long) topology->nhwworker[STARPU_CPU_WORKER][0] - (long) already_busy_cpus;
  1625. if (avail_cpus < 0)
  1626. avail_cpus = 0;
  1627. int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
  1628. avail_cpus *= nth_per_core;
  1629. ncpu = avail_cpus;
  1630. }
  1631. if (ncpu > STARPU_MAXCPUS)
  1632. {
  1633. _STARPU_DISP("Warning: %d CPU cores requested. Only %d enabled. Use configure option --enable-maxcpus=xxx to update the maximum value of supported CPU devices.\n", ncpu, STARPU_MAXCPUS);
  1634. ncpu = STARPU_MAXCPUS;
  1635. }
  1636. if (config->conf.reserve_ncpus > 0)
  1637. {
  1638. if (ncpu < config->conf.reserve_ncpus)
  1639. {
  1640. _STARPU_DISP("Warning: %d CPU cores were requested to be reserved, but only %d were available,\n", config->conf.reserve_ncpus, ncpu);
  1641. ncpu = 0;
  1642. }
  1643. else
  1644. {
  1645. ncpu -= config->conf.reserve_ncpus;
  1646. }
  1647. }
  1648. }
  1649. topology->ndevices[STARPU_CPU_WORKER] = 1;
  1650. topology->nworker[STARPU_CPU_WORKER][0] = ncpu;
  1651. STARPU_ASSERT(topology->nworker[STARPU_CPU_WORKER][0] + topology->nworkers <= STARPU_NMAXWORKERS);
  1652. unsigned cpu;
  1653. unsigned homogeneous = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
  1654. for (cpu = 0; cpu < topology->nworker[STARPU_CPU_WORKER][0]; cpu++)
  1655. {
  1656. int worker_idx = topology->nworkers + cpu;
  1657. config->workers[worker_idx].arch = STARPU_CPU_WORKER;
  1658. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1659. config->workers[worker_idx].perf_arch.ndevices = 1;
  1660. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CPU_WORKER;
  1661. config->workers[worker_idx].perf_arch.devices[0].devid = homogeneous ? 0 : cpu;
  1662. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1663. config->workers[worker_idx].subworkerid = 0;
  1664. config->workers[worker_idx].devid = cpu;
  1665. config->workers[worker_idx].worker_mask = STARPU_CPU;
  1666. config->worker_mask |= STARPU_CPU;
  1667. }
  1668. topology->nworkers += topology->nworker[STARPU_CPU_WORKER][0];
  1669. #endif
  1670. if (topology->nworkers == 0)
  1671. {
  1672. _STARPU_DEBUG("No worker found, aborting ...\n");
  1673. return -ENODEV;
  1674. }
  1675. return 0;
  1676. }
  1677. void _starpu_destroy_machine_config(struct _starpu_machine_config *config)
  1678. {
  1679. _starpu_close_debug_logfile();
  1680. unsigned worker;
  1681. for (worker = 0; worker < config->topology.nworkers; worker++)
  1682. {
  1683. struct _starpu_worker *workerarg = &config->workers[worker];
  1684. int bindid = workerarg->bindid;
  1685. free(workerarg->perf_arch.devices);
  1686. #ifdef STARPU_HAVE_HWLOC
  1687. hwloc_bitmap_free(workerarg->hwloc_cpu_set);
  1688. if (bindid != -1)
  1689. {
  1690. hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
  1691. config->pu_depth,
  1692. bindid);
  1693. struct _starpu_hwloc_userdata *data = worker_obj->userdata;
  1694. if (data->worker_list)
  1695. {
  1696. _starpu_worker_list_delete(data->worker_list);
  1697. data->worker_list = NULL;
  1698. }
  1699. }
  1700. #endif
  1701. if (bindid != -1)
  1702. {
  1703. free(config->bindid_workers[bindid].workerids);
  1704. config->bindid_workers[bindid].workerids = NULL;
  1705. }
  1706. }
  1707. free(config->bindid_workers);
  1708. config->bindid_workers = NULL;
  1709. config->nbindid = 0;
  1710. unsigned combined_worker_id;
  1711. for(combined_worker_id=0 ; combined_worker_id < config->topology.ncombinedworkers ; combined_worker_id++)
  1712. {
  1713. struct _starpu_combined_worker *combined_worker = &config->combined_workers[combined_worker_id];
  1714. #ifdef STARPU_HAVE_HWLOC
  1715. hwloc_bitmap_free(combined_worker->hwloc_cpu_set);
  1716. #endif
  1717. free(combined_worker->perf_arch.devices);
  1718. }
  1719. #ifdef STARPU_HAVE_HWLOC
  1720. _starpu_deallocate_topology_userdata(hwloc_get_root_obj(config->topology.hwtopology));
  1721. hwloc_topology_destroy(config->topology.hwtopology);
  1722. #endif
  1723. topology_is_initialized = 0;
  1724. #ifdef STARPU_USE_CUDA
  1725. struct handle_entry *entry=NULL, *tmp=NULL;
  1726. HASH_ITER(hh, devices_using_cuda, entry, tmp)
  1727. {
  1728. HASH_DEL(devices_using_cuda, entry);
  1729. free(entry);
  1730. }
  1731. devices_using_cuda = NULL;
  1732. #endif
  1733. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  1734. int i;
  1735. for (i=0; i<STARPU_NARCH; i++)
  1736. may_bind_automatically[i] = 0;
  1737. #endif
  1738. }
  1739. int _starpu_bind_thread_on_cpu(int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, const char *name STARPU_ATTRIBUTE_UNUSED)
  1740. {
  1741. int ret = 0;
  1742. #ifdef STARPU_SIMGRID
  1743. return ret;
  1744. #else
  1745. if (nobind > 0)
  1746. return ret;
  1747. if (cpuid < 0)
  1748. return ret;
  1749. #ifdef STARPU_HAVE_HWLOC
  1750. const struct hwloc_topology_support *support;
  1751. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1752. #ifdef STARPU_USE_OPENCL
  1753. if (config->conf.nopencl != 0)
  1754. _starpu_opencl_init();
  1755. #endif
  1756. #ifdef STARPU_USE_CUDA
  1757. if (config->conf.ncuda != 0)
  1758. _starpu_init_cuda();
  1759. #endif
  1760. _starpu_init_topology(config);
  1761. if (workerid != STARPU_NOWORKERID && cpuid < STARPU_MAXCPUS)
  1762. {
  1763. /* TODO: mutex... */
  1764. int previous = cpu_worker[cpuid];
  1765. /* We would like the PU to be available, or we are perhaps fine to share it */
  1766. if ( !( previous == STARPU_NOWORKERID ||
  1767. (previous == STARPU_NONACTIVETHREAD && workerid == STARPU_NONACTIVETHREAD) ||
  1768. (previous >= 0 && previous == workerid) ||
  1769. (name && cpu_name[cpuid] && !strcmp(name, cpu_name[cpuid])) ) )
  1770. {
  1771. char hostname[65];
  1772. gethostname(hostname, sizeof(hostname));
  1773. if (previous == STARPU_ACTIVETHREAD)
  1774. _STARPU_DISP("[%s] Warning: active thread %s was already bound to PU %d\n", hostname, cpu_name[cpuid], cpuid);
  1775. else if (previous == STARPU_NONACTIVETHREAD)
  1776. _STARPU_DISP("[%s] Warning: non-active thread %s was already bound to PU %d\n", hostname, cpu_name[cpuid], cpuid);
  1777. else
  1778. _STARPU_DISP("[%s] Warning: worker %d was already bound to PU %d\n", hostname, previous, cpuid);
  1779. if (workerid == STARPU_ACTIVETHREAD)
  1780. _STARPU_DISP("and we were told to also bind active thread %s to it.\n", name);
  1781. else if (previous == STARPU_NONACTIVETHREAD)
  1782. _STARPU_DISP("and we were told to also bind non-active thread %s to it.\n", name);
  1783. else
  1784. _STARPU_DISP("and we were told to also bind worker %d to it.\n", workerid);
  1785. _STARPU_DISP("This will strongly degrade performance.\n");
  1786. if (workerid >= 0)
  1787. /* This shouldn't happen for workers */
  1788. _STARPU_DISP("[%s] Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported %d cores and %d threads, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler?\n", hostname, config->topology.nhwworker[STARPU_CPU_WORKER][0], config->topology.nhwpus);
  1789. ret = -1;
  1790. }
  1791. else
  1792. {
  1793. cpu_worker[cpuid] = workerid;
  1794. if (name)
  1795. {
  1796. if (cpu_name[cpuid])
  1797. free(cpu_name[cpuid]);
  1798. cpu_name[cpuid] = strdup(name);
  1799. }
  1800. }
  1801. }
  1802. support = hwloc_topology_get_support(config->topology.hwtopology);
  1803. if (support->cpubind->set_thisthread_cpubind)
  1804. {
  1805. hwloc_obj_t obj = hwloc_get_obj_by_depth(config->topology.hwtopology, config->pu_depth, cpuid);
  1806. hwloc_bitmap_t set = obj->cpuset;
  1807. int res;
  1808. hwloc_bitmap_singlify(set);
  1809. res = hwloc_set_cpubind(config->topology.hwtopology, set, HWLOC_CPUBIND_THREAD);
  1810. if (res)
  1811. {
  1812. perror("hwloc_set_cpubind");
  1813. STARPU_ABORT();
  1814. }
  1815. }
  1816. #elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__)
  1817. int res;
  1818. /* fix the thread on the correct cpu */
  1819. cpu_set_t aff_mask;
  1820. CPU_ZERO(&aff_mask);
  1821. CPU_SET(cpuid, &aff_mask);
  1822. starpu_pthread_t self = starpu_pthread_self();
  1823. res = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
  1824. if (res)
  1825. {
  1826. const char *msg = strerror(res);
  1827. _STARPU_MSG("pthread_setaffinity_np: %s\n", msg);
  1828. STARPU_ABORT();
  1829. }
  1830. #elif defined(_WIN32)
  1831. DWORD mask = 1 << cpuid;
  1832. if (!SetThreadAffinityMask(GetCurrentThread(), mask))
  1833. {
  1834. _STARPU_ERROR("SetThreadMaskAffinity(%lx) failed\n", mask);
  1835. }
  1836. #else
  1837. #warning no CPU binding support
  1838. #endif
  1839. #endif
  1840. return ret;
  1841. }
  1842. int
  1843. starpu_bind_thread_on(int cpuid, unsigned flags, const char *name)
  1844. {
  1845. int workerid;
  1846. STARPU_ASSERT_MSG(name, "starpu_bind_thread_on must be provided with a name");
  1847. starpu_pthread_setname(name);
  1848. if (flags & STARPU_THREAD_ACTIVE)
  1849. workerid = STARPU_ACTIVETHREAD;
  1850. else
  1851. workerid = STARPU_NONACTIVETHREAD;
  1852. return _starpu_bind_thread_on_cpu(cpuid, workerid, name);
  1853. }
  1854. void _starpu_bind_thread_on_cpus(struct _starpu_combined_worker *combined_worker STARPU_ATTRIBUTE_UNUSED)
  1855. {
  1856. #ifdef STARPU_SIMGRID
  1857. return;
  1858. #endif
  1859. #ifdef STARPU_HAVE_HWLOC
  1860. const struct hwloc_topology_support *support;
  1861. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1862. #ifdef STARPU_USE_OPENC
  1863. if (config->conf.nopencl != 0)
  1864. _starpu_opencl_init();
  1865. #endif
  1866. #ifdef STARPU_USE_CUDA
  1867. if (config->conf.ncuda != 0)
  1868. _starpu_init_cuda();
  1869. #endif
  1870. _starpu_init_topology(config);
  1871. support = hwloc_topology_get_support(config->topology.hwtopology);
  1872. if (support->cpubind->set_thisthread_cpubind)
  1873. {
  1874. hwloc_bitmap_t set = combined_worker->hwloc_cpu_set;
  1875. int ret;
  1876. ret = hwloc_set_cpubind(config->topology.hwtopology, set, HWLOC_CPUBIND_THREAD);
  1877. if (ret)
  1878. {
  1879. perror("binding thread");
  1880. STARPU_ABORT();
  1881. }
  1882. }
  1883. #else
  1884. #ifdef __GLIBC__
  1885. sched_setaffinity(0,sizeof(combined_worker->cpu_set),&combined_worker->cpu_set);
  1886. #else
  1887. # warning no parallel worker CPU binding support
  1888. #endif
  1889. #endif
  1890. }
  1891. static void _starpu_init_binding_cpu(struct _starpu_machine_config *config)
  1892. {
  1893. unsigned worker;
  1894. for (worker = 0; worker < config->topology.nworkers; worker++)
  1895. {
  1896. struct _starpu_worker *workerarg = &config->workers[worker];
  1897. switch (workerarg->arch)
  1898. {
  1899. case STARPU_CPU_WORKER:
  1900. {
  1901. /* Dedicate a cpu core to that worker */
  1902. workerarg->bindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, NULL, 0);
  1903. break;
  1904. }
  1905. default:
  1906. /* Do nothing */
  1907. break;
  1908. }
  1909. }
  1910. }
  1911. static size_t _starpu_cpu_get_global_mem_size(int nodeid, struct _starpu_machine_config *config)
  1912. {
  1913. size_t global_mem;
  1914. starpu_ssize_t limit = -1;
  1915. #if defined(STARPU_HAVE_HWLOC)
  1916. struct _starpu_machine_topology *topology = &config->topology;
  1917. STARPU_ASSERT(numa_enabled != -1);
  1918. if (numa_enabled)
  1919. {
  1920. int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NUMANODE);
  1921. if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN)
  1922. {
  1923. #if HWLOC_API_VERSION >= 0x00020000
  1924. global_mem = hwloc_get_root_obj(topology->hwtopology)->total_memory;
  1925. #else
  1926. global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
  1927. #endif
  1928. }
  1929. else
  1930. {
  1931. char name[32];
  1932. hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid);
  1933. #if HWLOC_API_VERSION >= 0x00020000
  1934. global_mem = obj->attr->numanode.local_memory;
  1935. #else
  1936. global_mem = obj->memory.local_memory;
  1937. #endif
  1938. snprintf(name, sizeof(name), "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index);
  1939. limit = starpu_get_env_number(name);
  1940. }
  1941. }
  1942. else
  1943. {
  1944. /* Do not limit ourself to a single NUMA node */
  1945. #if HWLOC_API_VERSION >= 0x00020000
  1946. global_mem = hwloc_get_root_obj(topology->hwtopology)->total_memory;
  1947. #else
  1948. global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory;
  1949. #endif
  1950. }
  1951. #else /* STARPU_HAVE_HWLOC */
  1952. #ifdef STARPU_DEVEL
  1953. # warning TODO: use sysinfo when available to get global size
  1954. #endif
  1955. global_mem = 0;
  1956. #endif
  1957. if (limit == -1)
  1958. limit = starpu_get_env_number("STARPU_LIMIT_CPU_NUMA_MEM");
  1959. if (limit == -1)
  1960. {
  1961. limit = starpu_get_env_number("STARPU_LIMIT_CPU_MEM");
  1962. if (limit != -1 && numa_enabled)
  1963. {
  1964. _STARPU_DISP("NUMA is enabled and STARPU_LIMIT_CPU_MEM is set to %luMB. Assuming that it should be distributed over the %d NUMA node(s). You probably want to use STARPU_LIMIT_CPU_NUMA_MEM instead.\n", (long) limit, _starpu_topology_get_nnumanodes(config));
  1965. limit /= _starpu_topology_get_nnumanodes(config);
  1966. }
  1967. }
  1968. if (limit < 0)
  1969. // No limit is defined, we return the global memory size
  1970. return global_mem;
  1971. else if (global_mem && (size_t)limit * 1024*1024 > global_mem)
  1972. {
  1973. if (numa_enabled)
  1974. _STARPU_DISP("The requested limit %ldMB for NUMA node %d is higher that available memory %luMB, using the latter\n", (unsigned long) limit, nodeid, (unsigned long) global_mem / (1024*1024));
  1975. else
  1976. _STARPU_DISP("The requested limit %ldMB is higher that available memory %luMB, using the latter\n", (long) limit, (unsigned long) global_mem / (1024*1024));
  1977. return global_mem;
  1978. }
  1979. else
  1980. // We limit the memory
  1981. return limit*1024*1024;
  1982. }
  1983. //TODO : Check SIMGRID
  1984. static void _starpu_init_numa_node(struct _starpu_machine_config *config)
  1985. {
  1986. nb_numa_nodes = 0;
  1987. unsigned i;
  1988. for (i = 0; i < STARPU_MAXNUMANODES; i++)
  1989. {
  1990. numa_memory_nodes_to_hwloclogid[i] = STARPU_NUMA_UNINITIALIZED;
  1991. numa_memory_nodes_to_physicalid[i] = STARPU_NUMA_UNINITIALIZED;
  1992. }
  1993. #ifdef STARPU_SIMGRID
  1994. char name[16];
  1995. starpu_sg_host_t host;
  1996. #endif
  1997. numa_enabled = starpu_get_env_number_default("STARPU_USE_NUMA", 0);
  1998. /* NUMA mode activated */
  1999. if (numa_enabled)
  2000. {
  2001. /* Take all NUMA nodes used by CPU workers */
  2002. unsigned worker;
  2003. for (worker = 0; worker < config->topology.nworkers; worker++)
  2004. {
  2005. struct _starpu_worker *workerarg = &config->workers[worker];
  2006. if (workerarg->arch == STARPU_CPU_WORKER)
  2007. {
  2008. int numa_logical_id = _starpu_get_logical_numa_node_worker(worker);
  2009. /* Convert logical id to StarPU id to check if this NUMA node is already saved or not */
  2010. int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(numa_logical_id);
  2011. /* This shouldn't happen */
  2012. if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
  2013. {
  2014. _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
  2015. STARPU_ABORT();
  2016. }
  2017. if (numa_starpu_id == -1)
  2018. {
  2019. int devid = numa_logical_id == STARPU_NUMA_MAIN_RAM ? 0 : numa_logical_id;
  2020. int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, devid, &_starpu_driver_cpu_node_ops);
  2021. _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cpu_get_global_mem_size(devid, config));
  2022. STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
  2023. numa_memory_nodes_to_hwloclogid[memnode] = numa_logical_id;
  2024. int numa_physical_id = _starpu_get_physical_numa_node_worker(worker);
  2025. numa_memory_nodes_to_physicalid[memnode] = numa_physical_id;
  2026. nb_numa_nodes++;
  2027. #ifdef STARPU_SIMGRID
  2028. snprintf(name, sizeof(name), "RAM%d", memnode);
  2029. host = _starpu_simgrid_get_host_by_name(name);
  2030. STARPU_ASSERT(host);
  2031. _starpu_simgrid_memory_node_set_host(memnode, host);
  2032. #endif
  2033. }
  2034. }
  2035. }
  2036. /* If we found NUMA nodes from CPU workers, it's good */
  2037. if (nb_numa_nodes != 0)
  2038. return;
  2039. _STARPU_DISP("No NUMA nodes found when checking CPU workers...\n");
  2040. #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && defined(STARPU_HAVE_HWLOC)
  2041. _STARPU_DISP("Take NUMA nodes attached to CUDA and OpenCL devices...\n");
  2042. #endif
  2043. #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
  2044. for (i = 0; i < config->topology.ndevices[STARPU_CUDA_WORKER]; i++)
  2045. {
  2046. hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
  2047. if (obj)
  2048. obj = numa_get_obj(obj);
  2049. /* Hwloc cannot recognize some devices */
  2050. if (!obj)
  2051. continue;
  2052. int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(obj->logical_index);
  2053. /* This shouldn't happen */
  2054. if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
  2055. {
  2056. _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
  2057. STARPU_ABORT();
  2058. }
  2059. if (numa_starpu_id == -1)
  2060. {
  2061. int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index, &_starpu_driver_cpu_node_ops);
  2062. _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cpu_get_global_mem_size(obj->logical_index, config));
  2063. STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
  2064. numa_memory_nodes_to_hwloclogid[memnode] = obj->logical_index;
  2065. numa_memory_nodes_to_physicalid[memnode] = obj->os_index;
  2066. nb_numa_nodes++;
  2067. #ifdef STARPU_SIMGRID
  2068. snprintf(name, sizeof(name), "RAM%d", memnode);
  2069. host = _starpu_simgrid_get_host_by_name(name);
  2070. STARPU_ASSERT(host);
  2071. _starpu_simgrid_memory_node_set_host(memnode, host);
  2072. #endif
  2073. }
  2074. }
  2075. #endif
  2076. #if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
  2077. if (config->topology.ndevices[STARPU_OPENCL_WORKER] > 0)
  2078. {
  2079. cl_int err;
  2080. cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
  2081. cl_uint nb_platforms;
  2082. unsigned platform;
  2083. unsigned nb_opencl_devices = 0, num = 0;
  2084. err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
  2085. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  2086. nb_platforms=0;
  2087. cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
  2088. if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
  2089. device_type |= CL_DEVICE_TYPE_CPU;
  2090. if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
  2091. device_type = CL_DEVICE_TYPE_CPU;
  2092. for (platform = 0; platform < nb_platforms ; platform++)
  2093. {
  2094. err = clGetDeviceIDs(platform_id[platform], device_type, 0, NULL, &num);
  2095. if (err != CL_SUCCESS)
  2096. num = 0;
  2097. nb_opencl_devices += num;
  2098. for (i = 0; i < num; i++)
  2099. {
  2100. hwloc_obj_t obj = hwloc_opencl_get_device_osdev_by_index(config->topology.hwtopology, platform, i);
  2101. if (obj)
  2102. obj = numa_get_obj(obj);
  2103. /* Hwloc cannot recognize some devices */
  2104. if (!obj)
  2105. continue;
  2106. int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(obj->logical_index);
  2107. /* This shouldn't happen */
  2108. if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
  2109. {
  2110. _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
  2111. STARPU_ABORT();
  2112. }
  2113. if (numa_starpu_id == -1)
  2114. {
  2115. int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index, &_starpu_driver_cpu_node_ops);
  2116. _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cpu_get_global_mem_size(obj->logical_index, config));
  2117. STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
  2118. numa_memory_nodes_to_hwloclogid[memnode] = obj->logical_index;
  2119. numa_memory_nodes_to_physicalid[memnode] = obj->os_index;
  2120. nb_numa_nodes++;
  2121. #ifdef STARPU_SIMGRID
  2122. snprintf(name, sizeof(name), "RAM%d", memnode);
  2123. host = _starpu_simgrid_get_host_by_name(name);
  2124. STARPU_ASSERT(host);
  2125. _starpu_simgrid_memory_node_set_host(memnode, host);
  2126. #endif
  2127. }
  2128. }
  2129. }
  2130. }
  2131. #endif
  2132. }
  2133. #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && defined(STARPU_HAVE_HWLOC)
  2134. //Found NUMA nodes from CUDA nodes
  2135. if (nb_numa_nodes != 0)
  2136. return;
  2137. /* In case, we do not find any NUMA nodes when checking NUMA nodes attached to GPUs, we take all of them */
  2138. if (numa_enabled)
  2139. _STARPU_DISP("No NUMA nodes found when checking GPUs devices...\n");
  2140. #endif
  2141. if (numa_enabled)
  2142. _STARPU_DISP("Finally, take all NUMA nodes available... \n");
  2143. unsigned nnuma = _starpu_topology_get_nnumanodes(config);
  2144. if (nnuma > STARPU_MAXNUMANODES)
  2145. {
  2146. _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
  2147. nnuma = STARPU_MAXNUMANODES;
  2148. }
  2149. unsigned numa;
  2150. for (numa = 0; numa < nnuma; numa++)
  2151. {
  2152. unsigned numa_logical_id;
  2153. unsigned numa_physical_id;
  2154. #if defined(STARPU_HAVE_HWLOC)
  2155. hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa);
  2156. if (obj)
  2157. {
  2158. numa_logical_id = obj->logical_index;
  2159. numa_physical_id = obj->os_index;
  2160. }
  2161. else
  2162. #endif
  2163. {
  2164. numa_logical_id = 0;
  2165. numa_physical_id = 0;
  2166. }
  2167. int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, numa_logical_id, &_starpu_driver_cpu_node_ops);
  2168. _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cpu_get_global_mem_size(numa_logical_id, config));
  2169. numa_memory_nodes_to_hwloclogid[memnode] = numa_logical_id;
  2170. numa_memory_nodes_to_physicalid[memnode] = numa_physical_id;
  2171. nb_numa_nodes++;
  2172. if (numa == 0)
  2173. STARPU_ASSERT_MSG(memnode == STARPU_MAIN_RAM, "Wrong Memory Node : %d (expected %d) \n", memnode, STARPU_MAIN_RAM);
  2174. STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available) \n", memnode, STARPU_MAXNUMANODES);
  2175. #ifdef STARPU_SIMGRID
  2176. if (nnuma > 1)
  2177. {
  2178. snprintf(name, sizeof(name), "RAM%d", memnode);
  2179. host = _starpu_simgrid_get_host_by_name(name);
  2180. }
  2181. else
  2182. {
  2183. /* In this case, nnuma has only one node */
  2184. host = _starpu_simgrid_get_host_by_name("RAM");
  2185. }
  2186. STARPU_ASSERT(host);
  2187. _starpu_simgrid_memory_node_set_host(memnode, host);
  2188. #endif
  2189. }
  2190. STARPU_ASSERT_MSG(nb_numa_nodes > 0, "No NUMA node found... We need at least one memory node !\n");
  2191. }
  2192. static void _starpu_init_numa_bus()
  2193. {
  2194. unsigned i, j;
  2195. for (i = 0; i < nb_numa_nodes; i++)
  2196. for (j = 0; j < nb_numa_nodes; j++)
  2197. if (i != j)
  2198. numa_bus_id[i*nb_numa_nodes+j] = _starpu_register_bus(i, j);
  2199. }
  2200. #if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_SIMGRID)
  2201. static int _starpu_find_pu_driving_numa_from(hwloc_obj_t root, unsigned node)
  2202. {
  2203. unsigned i;
  2204. int found = 0;
  2205. if (!root->arity)
  2206. {
  2207. if (root->type == HWLOC_OBJ_PU)
  2208. {
  2209. struct _starpu_hwloc_userdata *userdata = root->userdata;
  2210. if (userdata->pu_worker)
  2211. {
  2212. /* Cool, found a worker! */
  2213. _STARPU_DEBUG("found PU %d to drive memory node %d\n", userdata->pu_worker->bindid, node);
  2214. _starpu_worker_drives_memory_node(userdata->pu_worker, node);
  2215. found = 1;
  2216. }
  2217. }
  2218. }
  2219. for (i = 0; i < root->arity; i++)
  2220. {
  2221. if (_starpu_find_pu_driving_numa_from(root->children[i], node))
  2222. found = 1;
  2223. }
  2224. return found;
  2225. }
  2226. /* Look upward to find a level containing the given NUMA node and workers to drive it */
  2227. static int _starpu_find_pu_driving_numa_up(hwloc_obj_t root, unsigned node)
  2228. {
  2229. if (_starpu_find_pu_driving_numa_from(root, node))
  2230. /* Ok, we already managed to find drivers */
  2231. return 1;
  2232. if (!root->parent)
  2233. /* And no parent!? nobody can drive this... */
  2234. return 0;
  2235. /* Try from parent */
  2236. return _starpu_find_pu_driving_numa_up(root->parent, node);
  2237. }
  2238. #endif
  2239. static void _starpu_init_workers_binding_and_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
  2240. {
  2241. /* We will store all the busid of the different (src, dst)
  2242. * combinations in a matrix which we initialize here. */
  2243. _starpu_initialize_busid_matrix();
  2244. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  2245. unsigned cuda_init[STARPU_MAXCUDADEVS] = { };
  2246. unsigned cuda_memory_nodes[STARPU_MAXCUDADEVS];
  2247. unsigned cuda_bindid[STARPU_MAXCUDADEVS];
  2248. int cuda_globalbindid = -1;
  2249. #endif
  2250. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  2251. unsigned opencl_init[STARPU_MAXOPENCLDEVS] = { };
  2252. unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS];
  2253. unsigned opencl_bindid[STARPU_MAXOPENCLDEVS];
  2254. #endif
  2255. #ifdef STARPU_USE_MIC
  2256. unsigned mic_init[STARPU_MAXMICDEVS] = { };
  2257. unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
  2258. unsigned mic_bindid[STARPU_MAXMICDEVS];
  2259. #endif
  2260. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  2261. unsigned mpi_init[STARPU_MAXMPIDEVS] = { };
  2262. unsigned mpi_memory_nodes[STARPU_MAXMPIDEVS];
  2263. unsigned mpi_bindid[STARPU_MAXMPIDEVS];
  2264. #endif
  2265. unsigned bindid;
  2266. for (bindid = 0; bindid < config->nbindid; bindid++)
  2267. {
  2268. free(config->bindid_workers[bindid].workerids);
  2269. config->bindid_workers[bindid].workerids = NULL;
  2270. config->bindid_workers[bindid].nworkers = 0;
  2271. }
  2272. /* Init CPU binding before NUMA nodes, because we use it to discover NUMA nodes */
  2273. _starpu_init_binding_cpu(config);
  2274. /* Initialize NUMA nodes */
  2275. _starpu_init_numa_node(config);
  2276. _starpu_init_numa_bus();
  2277. unsigned worker;
  2278. for (worker = 0; worker < config->topology.nworkers; worker++)
  2279. {
  2280. unsigned memory_node = -1;
  2281. struct _starpu_worker *workerarg = &config->workers[worker];
  2282. unsigned devid STARPU_ATTRIBUTE_UNUSED = workerarg->devid;
  2283. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  2284. /* Perhaps the worker has some "favourite" bindings */
  2285. unsigned *preferred_binding = NULL;
  2286. unsigned npreferred = 0;
  2287. #endif
  2288. /* select the memory node that contains worker's memory */
  2289. switch (workerarg->arch)
  2290. {
  2291. case STARPU_CPU_WORKER:
  2292. {
  2293. int numa_logical_id = _starpu_get_logical_numa_node_worker(worker);
  2294. int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(numa_logical_id);
  2295. if (numa_starpu_id < 0 || numa_starpu_id >= STARPU_MAXNUMANODES)
  2296. numa_starpu_id = STARPU_MAIN_RAM;
  2297. #if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_SIMGRID)
  2298. hwloc_obj_t pu_obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_PU, workerarg->bindid);
  2299. struct _starpu_hwloc_userdata *userdata = pu_obj->userdata;
  2300. userdata->pu_worker = workerarg;
  2301. #endif
  2302. workerarg->numa_memory_node = memory_node = numa_starpu_id;
  2303. _starpu_memory_node_add_nworkers(memory_node);
  2304. _starpu_worker_drives_memory_node(workerarg, numa_starpu_id);
  2305. break;
  2306. }
  2307. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  2308. case STARPU_CUDA_WORKER:
  2309. {
  2310. unsigned numa;
  2311. #ifndef STARPU_SIMGRID
  2312. if (may_bind_automatically[STARPU_CUDA_WORKER])
  2313. {
  2314. /* StarPU is allowed to bind threads automatically */
  2315. preferred_binding = _starpu_get_cuda_affinity_vector(devid);
  2316. npreferred = config->topology.nhwpus;
  2317. }
  2318. #endif /* SIMGRID */
  2319. if (cuda_init[devid])
  2320. {
  2321. memory_node = cuda_memory_nodes[devid];
  2322. if (config->topology.cuda_th_per_stream == 0)
  2323. workerarg->bindid = cuda_bindid[devid];
  2324. else
  2325. workerarg->bindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred);
  2326. }
  2327. else
  2328. {
  2329. cuda_init[devid] = 1;
  2330. if (config->topology.cuda_th_per_dev == 0 && config->topology.cuda_th_per_stream == 0)
  2331. {
  2332. if (cuda_globalbindid == -1)
  2333. cuda_globalbindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred);
  2334. workerarg->bindid = cuda_bindid[devid] = cuda_globalbindid;
  2335. }
  2336. else
  2337. workerarg->bindid = cuda_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred);
  2338. memory_node = cuda_memory_nodes[devid] = _starpu_memory_node_register(STARPU_CUDA_RAM, devid, &_starpu_driver_cuda_node_ops);
  2339. for (numa = 0; numa < nb_numa_nodes; numa++)
  2340. {
  2341. _starpu_cuda_bus_ids[numa][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(numa, memory_node);
  2342. _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][numa] = _starpu_register_bus(memory_node, numa);
  2343. }
  2344. #ifdef STARPU_SIMGRID
  2345. const char* cuda_memcpy_peer;
  2346. char name[16];
  2347. snprintf(name, sizeof(name), "CUDA%u", devid);
  2348. starpu_sg_host_t host = _starpu_simgrid_get_host_by_name(name);
  2349. STARPU_ASSERT(host);
  2350. _starpu_simgrid_memory_node_set_host(memory_node, host);
  2351. # ifdef STARPU_HAVE_SIMGRID_ACTOR_H
  2352. cuda_memcpy_peer = sg_host_get_property_value(host, "memcpy_peer");
  2353. # else
  2354. cuda_memcpy_peer = MSG_host_get_property_value(host, "memcpy_peer");
  2355. # endif
  2356. #endif /* SIMGRID */
  2357. if (
  2358. #ifdef STARPU_SIMGRID
  2359. cuda_memcpy_peer && atoll(cuda_memcpy_peer)
  2360. #elif defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
  2361. 1
  2362. #else /* MEMCPY_PEER */
  2363. 0
  2364. #endif /* MEMCPY_PEER */
  2365. )
  2366. {
  2367. unsigned worker2;
  2368. for (worker2 = 0; worker2 < worker; worker2++)
  2369. {
  2370. struct _starpu_worker *workerarg2 = &config->workers[worker2];
  2371. int devid2 = workerarg2->devid;
  2372. if (workerarg2->arch == STARPU_CUDA_WORKER)
  2373. {
  2374. unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
  2375. _starpu_cuda_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(memory_node2, memory_node);
  2376. _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES] = _starpu_register_bus(memory_node, memory_node2);
  2377. #ifndef STARPU_SIMGRID
  2378. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  2379. {
  2380. hwloc_obj_t obj, obj2, ancestor;
  2381. obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid);
  2382. obj2 = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid2);
  2383. ancestor = hwloc_get_common_ancestor_obj(config->topology.hwtopology, obj, obj2);
  2384. if (ancestor)
  2385. {
  2386. struct _starpu_hwloc_userdata *data = ancestor->userdata;
  2387. #ifdef STARPU_VERBOSE
  2388. {
  2389. char name[64];
  2390. hwloc_obj_type_snprintf(name, sizeof(name), ancestor, 0);
  2391. _STARPU_DEBUG("CUDA%u and CUDA%u are linked through %s, along %u GPUs\n", devid, devid2, name, data->ngpus);
  2392. }
  2393. #endif
  2394. starpu_bus_set_ngpus(_starpu_cuda_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES], data->ngpus);
  2395. starpu_bus_set_ngpus(_starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES], data->ngpus);
  2396. }
  2397. }
  2398. #endif
  2399. #endif
  2400. }
  2401. }
  2402. }
  2403. }
  2404. _starpu_memory_node_add_nworkers(memory_node);
  2405. //This worker can manage transfers on NUMA nodes
  2406. for (numa = 0; numa < nb_numa_nodes; numa++)
  2407. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa);
  2408. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
  2409. break;
  2410. }
  2411. #endif
  2412. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  2413. case STARPU_OPENCL_WORKER:
  2414. {
  2415. unsigned numa;
  2416. #ifndef STARPU_SIMGRID
  2417. if (may_bind_automatically[STARPU_OPENCL_WORKER])
  2418. {
  2419. /* StarPU is allowed to bind threads automatically */
  2420. preferred_binding = _starpu_get_opencl_affinity_vector(devid);
  2421. npreferred = config->topology.nhwpus;
  2422. }
  2423. #endif /* SIMGRID */
  2424. if (opencl_init[devid])
  2425. {
  2426. memory_node = opencl_memory_nodes[devid];
  2427. #ifndef STARPU_SIMGRID
  2428. workerarg->bindid = opencl_bindid[devid];
  2429. #endif /* SIMGRID */
  2430. }
  2431. else
  2432. {
  2433. opencl_init[devid] = 1;
  2434. workerarg->bindid = opencl_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred);
  2435. memory_node = opencl_memory_nodes[devid] = _starpu_memory_node_register(STARPU_OPENCL_RAM, devid, &_starpu_driver_opencl_node_ops);
  2436. for (numa = 0; numa < nb_numa_nodes; numa++)
  2437. {
  2438. _starpu_register_bus(numa, memory_node);
  2439. _starpu_register_bus(memory_node, numa);
  2440. }
  2441. #ifdef STARPU_SIMGRID
  2442. char name[16];
  2443. snprintf(name, sizeof(name), "OpenCL%u", devid);
  2444. starpu_sg_host_t host = _starpu_simgrid_get_host_by_name(name);
  2445. STARPU_ASSERT(host);
  2446. _starpu_simgrid_memory_node_set_host(memory_node, host);
  2447. #endif /* SIMGRID */
  2448. }
  2449. _starpu_memory_node_add_nworkers(memory_node);
  2450. //This worker can manage transfers on NUMA nodes
  2451. for (numa = 0; numa < nb_numa_nodes; numa++)
  2452. _starpu_worker_drives_memory_node(workerarg, numa);
  2453. _starpu_worker_drives_memory_node(workerarg, memory_node);
  2454. break;
  2455. }
  2456. #endif
  2457. #ifdef STARPU_USE_MIC
  2458. case STARPU_MIC_WORKER:
  2459. {
  2460. unsigned numa;
  2461. if (mic_init[devid])
  2462. {
  2463. memory_node = mic_memory_nodes[devid];
  2464. }
  2465. else
  2466. {
  2467. mic_init[devid] = 1;
  2468. /* TODO */
  2469. //if (may_bind_automatically)
  2470. //{
  2471. // /* StarPU is allowed to bind threads automatically */
  2472. // preferred_binding = _starpu_get_mic_affinity_vector(devid);
  2473. // npreferred = config->topology.nhwpus;
  2474. //}
  2475. mic_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred);
  2476. memory_node = mic_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MIC_RAM, devid, &_starpu_driver_mic_node_ops);
  2477. for (numa = 0; numa < nb_numa_nodes; numa++)
  2478. {
  2479. _starpu_register_bus(numa, memory_node);
  2480. _starpu_register_bus(memory_node, numa);
  2481. }
  2482. }
  2483. workerarg->bindid = mic_bindid[devid];
  2484. _starpu_memory_node_add_nworkers(memory_node);
  2485. //This worker can manage transfers on NUMA nodes
  2486. for (numa = 0; numa < nb_numa_nodes; numa++)
  2487. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa);
  2488. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
  2489. break;
  2490. }
  2491. #endif /* STARPU_USE_MIC */
  2492. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  2493. case STARPU_MPI_MS_WORKER:
  2494. {
  2495. unsigned numa;
  2496. if (mpi_init[devid])
  2497. {
  2498. memory_node = mpi_memory_nodes[devid];
  2499. }
  2500. else
  2501. {
  2502. mpi_init[devid] = 1;
  2503. mpi_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred);
  2504. memory_node = mpi_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MPI_MS_RAM, devid, &_starpu_driver_mpi_node_ops);
  2505. for (numa = 0; numa < nb_numa_nodes; numa++)
  2506. {
  2507. _starpu_register_bus(numa, memory_node);
  2508. _starpu_register_bus(memory_node, numa);
  2509. }
  2510. }
  2511. //This worker can manage transfers on NUMA nodes
  2512. for (numa = 0; numa < nb_numa_nodes; numa++)
  2513. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa);
  2514. _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node);
  2515. #ifndef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
  2516. /* MPI driver thread can manage all slave memories if we disable the MPI multiple thread */
  2517. unsigned findworker;
  2518. for (findworker = 0; findworker < worker; findworker++)
  2519. {
  2520. struct _starpu_worker *findworkerarg = &config->workers[findworker];
  2521. if (findworkerarg->arch == STARPU_MPI_MS_WORKER)
  2522. {
  2523. _starpu_worker_drives_memory_node(workerarg, findworkerarg->memory_node);
  2524. _starpu_worker_drives_memory_node(findworkerarg, memory_node);
  2525. }
  2526. }
  2527. #endif
  2528. workerarg->bindid = mpi_bindid[devid];
  2529. _starpu_memory_node_add_nworkers(memory_node);
  2530. break;
  2531. }
  2532. #endif /* STARPU_USE_MPI_MASTER_SLAVE */
  2533. default:
  2534. STARPU_ABORT();
  2535. }
  2536. workerarg->memory_node = memory_node;
  2537. _STARPU_DEBUG("worker %u type %d devid %u bound to cpu %d, STARPU memory node %u\n", worker, workerarg->arch, devid, workerarg->bindid, memory_node);
  2538. #ifdef __GLIBC__
  2539. if (workerarg->bindid != -1)
  2540. {
  2541. /* Save the initial cpuset */
  2542. CPU_ZERO(&workerarg->cpu_set);
  2543. CPU_SET(workerarg->bindid, &workerarg->cpu_set);
  2544. }
  2545. #endif /* __GLIBC__ */
  2546. #ifdef STARPU_HAVE_HWLOC
  2547. if (workerarg->bindid == -1)
  2548. {
  2549. workerarg->hwloc_cpu_set = hwloc_bitmap_alloc();
  2550. workerarg->hwloc_obj = NULL;
  2551. }
  2552. else
  2553. {
  2554. /* Put the worker descriptor in the userdata field of the
  2555. * hwloc object describing the CPU */
  2556. hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
  2557. config->pu_depth,
  2558. workerarg->bindid);
  2559. struct _starpu_hwloc_userdata *data = worker_obj->userdata;
  2560. if (data->worker_list == NULL)
  2561. data->worker_list = _starpu_worker_list_new();
  2562. _starpu_worker_list_push_front(data->worker_list, workerarg);
  2563. /* Clear the cpu set and set the cpu */
  2564. workerarg->hwloc_cpu_set = hwloc_bitmap_dup(worker_obj->cpuset);
  2565. workerarg->hwloc_obj = worker_obj;
  2566. }
  2567. #endif
  2568. if (workerarg->bindid != -1)
  2569. {
  2570. bindid = workerarg->bindid;
  2571. unsigned old_nbindid = config->nbindid;
  2572. if (bindid >= old_nbindid)
  2573. {
  2574. /* More room needed */
  2575. if (!old_nbindid)
  2576. config->nbindid = STARPU_NMAXWORKERS;
  2577. else
  2578. config->nbindid = 2 * old_nbindid;
  2579. if (bindid >= config->nbindid)
  2580. {
  2581. config->nbindid = bindid+1;
  2582. }
  2583. _STARPU_REALLOC(config->bindid_workers, config->nbindid * sizeof(config->bindid_workers[0]));
  2584. memset(&config->bindid_workers[old_nbindid], 0, (config->nbindid - old_nbindid) * sizeof(config->bindid_workers[0]));
  2585. }
  2586. /* Add slot for this worker */
  2587. /* Don't care about amortizing the cost, there are usually very few workers sharing the same bindid */
  2588. config->bindid_workers[bindid].nworkers++;
  2589. _STARPU_REALLOC(config->bindid_workers[bindid].workerids, config->bindid_workers[bindid].nworkers * sizeof(config->bindid_workers[bindid].workerids[0]));
  2590. config->bindid_workers[bindid].workerids[config->bindid_workers[bindid].nworkers-1] = worker;
  2591. }
  2592. }
  2593. #if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_SIMGRID)
  2594. /* If some NUMA nodes don't have drivers, attribute some */
  2595. unsigned node, nnodes = starpu_memory_nodes_get_count();;
  2596. for (node = 0; node < nnodes; node++)
  2597. {
  2598. if (starpu_node_get_kind(node) != STARPU_CPU_RAM)
  2599. /* Only RAM nodes can be processed by any CPU */
  2600. continue;
  2601. for (worker = 0; worker < config->topology.nworkers; worker++)
  2602. {
  2603. if (_starpu_worker_drives_memory[worker][node])
  2604. break;
  2605. }
  2606. if (worker < config->topology.nworkers)
  2607. /* Already somebody driving it */
  2608. continue;
  2609. /* Nobody driving this node! Attribute some */
  2610. _STARPU_DEBUG("nobody drives memory node %d\n", node);
  2611. hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, starpu_memory_nodes_numa_id_to_hwloclogid(node));
  2612. int ret = _starpu_find_pu_driving_numa_up(numa_node_obj, node);
  2613. STARPU_ASSERT_MSG(ret, "oops, didn't find any worker to drive memory node %d!?", node);
  2614. }
  2615. #endif
  2616. #ifdef STARPU_SIMGRID
  2617. _starpu_simgrid_count_ngpus();
  2618. #else
  2619. #ifdef STARPU_HAVE_HWLOC
  2620. _starpu_topology_count_ngpus(hwloc_get_root_obj(config->topology.hwtopology));
  2621. #endif
  2622. #endif
  2623. }
  2624. int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_config)
  2625. {
  2626. int ret;
  2627. unsigned i;
  2628. enum starpu_worker_archtype type;
  2629. ret = _starpu_init_machine_config(config, no_mp_config);
  2630. if (ret)
  2631. return ret;
  2632. /* for the data management library */
  2633. _starpu_memory_nodes_init();
  2634. _starpu_datastats_init();
  2635. _starpu_init_workers_binding_and_memory(config, no_mp_config);
  2636. _starpu_mem_chunk_init_last();
  2637. for (type = 0; type < STARPU_NARCH; type++)
  2638. config->arch_nodeid[type] = -1;
  2639. for (i = 0; i < starpu_worker_get_count(); i++)
  2640. {
  2641. type = starpu_worker_get_type(i);
  2642. if (config->arch_nodeid[type] == -1)
  2643. config->arch_nodeid[type] = starpu_worker_get_memory_node(i);
  2644. else if (config->arch_nodeid[type] != (int) starpu_worker_get_memory_node(i))
  2645. config->arch_nodeid[type] = -2;
  2646. }
  2647. return 0;
  2648. }
  2649. void _starpu_destroy_topology(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED)
  2650. {
  2651. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
  2652. _starpu_deinit_mp_config(config);
  2653. #endif
  2654. /* cleanup StarPU internal data structures */
  2655. _starpu_memory_nodes_deinit();
  2656. _starpu_destroy_machine_config(config);
  2657. _starpu_deinitialize_workers_bindid(config);
  2658. }
  2659. void starpu_topology_print(FILE *output)
  2660. {
  2661. struct _starpu_machine_config *config = _starpu_get_machine_config();
  2662. struct _starpu_machine_topology *topology = &config->topology;
  2663. unsigned pu;
  2664. unsigned worker;
  2665. unsigned nworkers = starpu_worker_get_count();
  2666. unsigned ncombinedworkers = topology->ncombinedworkers;
  2667. unsigned nthreads_per_core = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0];
  2668. #ifdef STARPU_HAVE_HWLOC
  2669. hwloc_topology_t topo = topology->hwtopology;
  2670. hwloc_obj_t pu_obj;
  2671. hwloc_obj_t last_numa_obj = NULL, numa_obj;
  2672. hwloc_obj_t last_package_obj = NULL, package_obj;
  2673. #endif
  2674. for (pu = 0; pu < topology->nhwpus; pu++)
  2675. {
  2676. #ifdef STARPU_HAVE_HWLOC
  2677. pu_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, pu);
  2678. numa_obj = numa_get_obj(pu_obj);
  2679. if (numa_obj != last_numa_obj)
  2680. {
  2681. fprintf(output, "numa %u", numa_obj->logical_index);
  2682. last_numa_obj = numa_obj;
  2683. }
  2684. fprintf(output, "\t");
  2685. package_obj = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_SOCKET, pu_obj);
  2686. if (package_obj != last_package_obj)
  2687. {
  2688. fprintf(output, "pack %u", package_obj->logical_index);
  2689. last_package_obj = package_obj;
  2690. }
  2691. fprintf(output, "\t");
  2692. #endif
  2693. if ((pu % nthreads_per_core) == 0)
  2694. fprintf(output, "core %u", pu / nthreads_per_core);
  2695. fprintf(output, "\tPU %u\t", pu);
  2696. for (worker = 0;
  2697. worker < nworkers + ncombinedworkers;
  2698. worker++)
  2699. {
  2700. if (worker < nworkers)
  2701. {
  2702. struct _starpu_worker *workerarg = &config->workers[worker];
  2703. if (workerarg->bindid == (int) pu)
  2704. {
  2705. char name[256];
  2706. starpu_worker_get_name(worker, name, sizeof(name));
  2707. fprintf(output, "%s\t", name);
  2708. }
  2709. }
  2710. else
  2711. {
  2712. int worker_size, i;
  2713. int *combined_workerid;
  2714. starpu_combined_worker_get_description(worker, &worker_size, &combined_workerid);
  2715. for (i = 0; i < worker_size; i++)
  2716. {
  2717. if (topology->workers_bindid[combined_workerid[i]] == pu)
  2718. fprintf(output, "comb %u\t", worker-nworkers);
  2719. }
  2720. }
  2721. }
  2722. fprintf(output, "\n");
  2723. }
  2724. }
  2725. int starpu_get_pu_os_index(unsigned logical_index)
  2726. {
  2727. #ifdef STARPU_HAVE_HWLOC
  2728. struct _starpu_machine_config *config = _starpu_get_machine_config();
  2729. struct _starpu_machine_topology *topology = &config->topology;
  2730. hwloc_topology_t topo = topology->hwtopology;
  2731. return hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, logical_index)->os_index;
  2732. #else
  2733. return logical_index;
  2734. #endif
  2735. }
  2736. #ifdef STARPU_HAVE_HWLOC
  2737. hwloc_topology_t starpu_get_hwloc_topology(void)
  2738. {
  2739. struct _starpu_machine_config *config = _starpu_get_machine_config();
  2740. return config->topology.hwtopology;
  2741. }
  2742. #endif
  2743. unsigned _starpu_get_nhyperthreads()
  2744. {
  2745. struct _starpu_machine_config *config = _starpu_get_machine_config();
  2746. return config->topology.nhwpus / config->topology.nhwworker[STARPU_CPU_WORKER][0];
  2747. }