topology.c 59 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2016 Université de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016 CNRS
  5. * Copyright (C) 2011, 2016 INRIA
  6. * Copyright (C) 2016 Uppsala University
  7. *
  8. * StarPU is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU Lesser General Public License as published by
  10. * the Free Software Foundation; either version 2.1 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * StarPU is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. *
  17. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. */
  19. #include <stdlib.h>
  20. #include <stdio.h>
  21. #include <common/config.h>
  22. #include <core/workers.h>
  23. #include <core/debug.h>
  24. #include <core/topology.h>
  25. #include <drivers/cuda/driver_cuda.h>
  26. #include <drivers/mic/driver_mic_source.h>
  27. #include <drivers/scc/driver_scc_source.h>
  28. #include <drivers/mp_common/source_common.h>
  29. #include <drivers/opencl/driver_opencl.h>
  30. #include <profiling/profiling.h>
  31. #include <datawizard/datastats.h>
  32. #include <datawizard/memory_nodes.h>
  33. #include <common/uthash.h>
  34. #ifdef STARPU_HAVE_HWLOC
  35. #include <hwloc.h>
  36. #ifndef HWLOC_API_VERSION
  37. #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
  38. #endif
  39. #endif
  40. #ifdef STARPU_HAVE_WINDOWS
  41. #include <windows.h>
  42. #endif
  43. #ifdef STARPU_SIMGRID
  44. #include <core/simgrid.h>
  45. #endif
  46. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  47. #include <hwloc/cuda.h>
  48. #endif
  49. static unsigned topology_is_initialized = 0;
  50. static int nobind;
  51. /* For checking whether two workers share the same PU, indexed by PU number */
  52. static int cpu_worker[STARPU_MAXCPUS];
  53. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
  54. struct handle_entry
  55. {
  56. UT_hash_handle hh;
  57. unsigned gpuid;
  58. };
  59. # if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  60. /* Entry in the `devices_using_cuda' hash table. */
  61. static struct handle_entry *devices_using_cuda;
  62. # endif
  63. static unsigned may_bind_automatically[STARPU_NARCH] = { 0 };
  64. #endif // defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  65. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  66. static struct _starpu_worker_set cuda_worker_set[STARPU_MAXCUDADEVS];
  67. #endif
  68. #ifdef STARPU_USE_MIC
  69. static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
  70. #endif
  71. void *
  72. _starpu_get_worker_from_driver(struct starpu_driver *d)
  73. {
  74. unsigned nworkers = starpu_worker_get_count();
  75. unsigned workerid;
  76. #ifdef STARPU_USE_CUDA
  77. if (d->type == STARPU_CUDA_WORKER)
  78. {
  79. unsigned th_per_stream = starpu_get_env_number_default("STARPU_ONE_THREAD_PER_STREAM", 1);
  80. if(th_per_stream == 0)
  81. return &cuda_worker_set[d->id.cuda_id];
  82. }
  83. #endif
  84. for (workerid = 0; workerid < nworkers; workerid++)
  85. {
  86. if (starpu_worker_get_type(workerid) == d->type)
  87. {
  88. struct _starpu_worker *worker;
  89. worker = _starpu_get_worker_struct(workerid);
  90. switch (d->type)
  91. {
  92. #ifdef STARPU_USE_CPU
  93. case STARPU_CPU_WORKER:
  94. if (worker->devid == d->id.cpu_id)
  95. return worker;
  96. break;
  97. #endif
  98. #ifdef STARPU_USE_OPENCL
  99. case STARPU_OPENCL_WORKER:
  100. {
  101. cl_device_id device;
  102. starpu_opencl_get_device(worker->devid, &device);
  103. if (device == d->id.opencl_id)
  104. return worker;
  105. break;
  106. }
  107. #endif
  108. #ifdef STARPU_USE_CUDA
  109. case STARPU_CUDA_WORKER:
  110. {
  111. if (worker->devid == d->id.cuda_id)
  112. return &worker->set;
  113. break;
  114. }
  115. #endif
  116. default:
  117. _STARPU_DEBUG("Invalid device type\n");
  118. return NULL;
  119. }
  120. }
  121. }
  122. return NULL;
  123. }
  124. /*
  125. * Discover the topology of the machine
  126. */
  127. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
  128. static void
  129. _starpu_initialize_workers_deviceid (int *explicit_workers_gpuid,
  130. int *current, int *workers_gpuid,
  131. const char *varname, unsigned nhwgpus,
  132. enum starpu_worker_archtype type)
  133. {
  134. char *strval;
  135. unsigned i;
  136. *current = 0;
  137. /* conf->workers_gpuid indicates the successive GPU identifier that
  138. * should be used to bind the workers. It should be either filled
  139. * according to the user's explicit parameters (from starpu_conf) or
  140. * according to the STARPU_WORKERS_CUDAID env. variable. Otherwise, a
  141. * round-robin policy is used to distributed the workers over the
  142. * cores. */
  143. /* what do we use, explicit value, env. variable, or round-robin ? */
  144. if ((strval = starpu_getenv(varname)))
  145. {
  146. /* STARPU_WORKERS_CUDAID certainly contains less entries than
  147. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  148. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  149. * 2". */
  150. unsigned wrap = 0;
  151. unsigned number_of_entries = 0;
  152. char *endptr;
  153. /* we use the content of the STARPU_WORKERS_CUDAID
  154. * env. variable */
  155. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  156. {
  157. if (!wrap)
  158. {
  159. long int val;
  160. val = strtol(strval, &endptr, 10);
  161. if (endptr != strval)
  162. {
  163. workers_gpuid[i] = (unsigned)val;
  164. strval = endptr;
  165. }
  166. else
  167. {
  168. /* there must be at least one entry */
  169. STARPU_ASSERT(i != 0);
  170. number_of_entries = i;
  171. /* there is no more values in the
  172. * string */
  173. wrap = 1;
  174. workers_gpuid[i] = workers_gpuid[0];
  175. }
  176. }
  177. else
  178. {
  179. workers_gpuid[i] =
  180. workers_gpuid[i % number_of_entries];
  181. }
  182. }
  183. }
  184. else if (explicit_workers_gpuid)
  185. {
  186. /* we use the explicit value from the user */
  187. memcpy(workers_gpuid,
  188. explicit_workers_gpuid,
  189. STARPU_NMAXWORKERS*sizeof(unsigned));
  190. }
  191. else
  192. {
  193. /* by default, we take a round robin policy */
  194. if (nhwgpus > 0)
  195. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  196. workers_gpuid[i] = (unsigned)(i % nhwgpus);
  197. /* StarPU can use sampling techniques to bind threads
  198. * correctly */
  199. may_bind_automatically[type] = 1;
  200. }
  201. }
  202. #endif
  203. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  204. static void
  205. _starpu_initialize_workers_cuda_gpuid (struct _starpu_machine_config *config)
  206. {
  207. struct _starpu_machine_topology *topology = &config->topology;
  208. struct starpu_conf *uconf = &config->conf;
  209. _starpu_initialize_workers_deviceid (
  210. uconf->use_explicit_workers_cuda_gpuid == 0
  211. ? NULL
  212. : (int *)uconf->workers_cuda_gpuid,
  213. &(config->current_cuda_gpuid),
  214. (int *)topology->workers_cuda_gpuid,
  215. "STARPU_WORKERS_CUDAID",
  216. topology->nhwcudagpus,
  217. STARPU_CUDA_WORKER);
  218. }
  219. static inline int
  220. _starpu_get_next_cuda_gpuid (struct _starpu_machine_config *config)
  221. {
  222. unsigned i =
  223. ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
  224. return (int)config->topology.workers_cuda_gpuid[i];
  225. }
  226. #endif
  227. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  228. static void
  229. _starpu_initialize_workers_opencl_gpuid (struct _starpu_machine_config*config)
  230. {
  231. struct _starpu_machine_topology *topology = &config->topology;
  232. struct starpu_conf *uconf = &config->conf;
  233. _starpu_initialize_workers_deviceid(
  234. uconf->use_explicit_workers_opencl_gpuid == 0
  235. ? NULL
  236. : (int *)uconf->workers_opencl_gpuid,
  237. &(config->current_opencl_gpuid),
  238. (int *)topology->workers_opencl_gpuid,
  239. "STARPU_WORKERS_OPENCLID",
  240. topology->nhwopenclgpus,
  241. STARPU_OPENCL_WORKER);
  242. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  243. // Detect devices which are already used with CUDA
  244. {
  245. unsigned tmp[STARPU_NMAXWORKERS];
  246. unsigned nb=0;
  247. int i;
  248. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  249. {
  250. struct handle_entry *entry;
  251. int devid = config->topology.workers_opencl_gpuid[i];
  252. HASH_FIND_INT(devices_using_cuda, &devid, entry);
  253. if (entry == NULL)
  254. {
  255. tmp[nb] = topology->workers_opencl_gpuid[i];
  256. nb++;
  257. }
  258. }
  259. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  260. tmp[i] = -1;
  261. memcpy (topology->workers_opencl_gpuid, tmp,
  262. sizeof(unsigned)*STARPU_NMAXWORKERS);
  263. }
  264. #endif /* STARPU_USE_CUDA */
  265. {
  266. // Detect identical devices
  267. struct handle_entry *devices_already_used = NULL;
  268. unsigned tmp[STARPU_NMAXWORKERS];
  269. unsigned nb=0;
  270. int i;
  271. for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
  272. {
  273. int devid = topology->workers_opencl_gpuid[i];
  274. struct handle_entry *entry;
  275. HASH_FIND_INT(devices_already_used, &devid, entry);
  276. if (entry == NULL)
  277. {
  278. struct handle_entry *entry2;
  279. _STARPU_MALLOC(entry2, sizeof(*entry2));
  280. entry2->gpuid = devid;
  281. HASH_ADD_INT(devices_already_used, gpuid,
  282. entry2);
  283. tmp[nb] = devid;
  284. nb ++;
  285. }
  286. }
  287. struct handle_entry *entry, *tempo;
  288. HASH_ITER(hh, devices_already_used, entry, tempo)
  289. {
  290. HASH_DEL(devices_already_used, entry);
  291. free(entry);
  292. }
  293. for (i=nb ; i<STARPU_NMAXWORKERS ; i++)
  294. tmp[i] = -1;
  295. memcpy (topology->workers_opencl_gpuid, tmp,
  296. sizeof(unsigned)*STARPU_NMAXWORKERS);
  297. }
  298. }
  299. static inline int
  300. _starpu_get_next_opencl_gpuid (struct _starpu_machine_config *config)
  301. {
  302. unsigned i =
  303. ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
  304. return (int)config->topology.workers_opencl_gpuid[i];
  305. }
  306. #endif
  307. #if 0
  308. #if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
  309. static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
  310. {
  311. struct _starpu_machine_topology *topology = &config->topology;
  312. struct starpu_conf *uconf = &config->conf;
  313. _starpu_initialize_workers_deviceid(
  314. uconf->use_explicit_workers_mic_deviceid == 0
  315. ? NULL
  316. : (int *)config->user_conf->workers_mic_deviceid,
  317. &(config->current_mic_deviceid),
  318. (int *)topology->workers_mic_deviceid,
  319. "STARPU_WORKERS_MICID",
  320. topology->nhwmiccores,
  321. STARPU_MIC_WORKER);
  322. }
  323. #endif
  324. #endif
  325. #ifdef STARPU_USE_SCC
  326. static void _starpu_initialize_workers_scc_deviceid(struct _starpu_machine_config *config)
  327. {
  328. struct _starpu_machine_topology *topology = &config->topology;
  329. struct starpu_conf *uconf = &config->conf;
  330. _starpu_initialize_workers_deviceid(
  331. uconf->use_explicit_workers_scc_deviceid == 0
  332. ? NULL
  333. : (int *) uconf->workers_scc_deviceid,
  334. &(config->current_scc_deviceid),
  335. (int *)topology->workers_scc_deviceid,
  336. "STARPU_WORKERS_SCCID",
  337. topology->nhwscc,
  338. STARPU_SCC_WORKER);
  339. }
  340. #endif /* STARPU_USE_SCC */
  341. #if 0
  342. #ifdef STARPU_USE_MIC
  343. static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
  344. {
  345. unsigned i = ((config->current_mic_deviceid++) % config->topology.nmicdevices);
  346. return (int)config->topology.workers_mic_deviceid[i];
  347. }
  348. #endif
  349. #endif
  350. #ifdef STARPU_USE_SCC
  351. static inline int _starpu_get_next_scc_deviceid(struct _starpu_machine_config *config)
  352. {
  353. unsigned i = ((config->current_scc_deviceid++) % config->topology.nsccdevices);
  354. return (int)config->topology.workers_scc_deviceid[i];
  355. }
  356. #endif
  357. #ifdef STARPU_USE_MIC
  358. static void
  359. _starpu_init_mic_topology (struct _starpu_machine_config *config, long mic_idx)
  360. {
  361. /* Discover the topology of the mic node identifier by MIC_IDX. That
  362. * means, make this StarPU instance aware of the number of cores available
  363. * on this MIC device. Update the `nhwmiccores' topology field
  364. * accordingly. */
  365. struct _starpu_machine_topology *topology = &config->topology;
  366. int nbcores;
  367. _starpu_src_common_sink_nbcores (mic_nodes[mic_idx], &nbcores);
  368. topology->nhwmiccores[mic_idx] = nbcores;
  369. }
  370. static int
  371. _starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
  372. COIENGINE *coi_handle, COIPROCESS *coi_process)
  373. {
  374. /* Initialize the MIC node of index MIC_IDX. */
  375. struct starpu_conf *user_conf = &config->conf;
  376. char ***argv = _starpu_get_argv();
  377. const char *suffixes[] = {"-mic", "_mic", NULL};
  378. /* Environment variables to send to the Sink, it informs it what kind
  379. * of node it is (architecture and type) as there is no way to discover
  380. * it itself */
  381. char mic_idx_env[32];
  382. sprintf(mic_idx_env, "_STARPU_MIC_DEVID=%d", mic_idx);
  383. /* XXX: this is currently necessary so that the remote process does not
  384. * segfault. */
  385. char nb_mic_env[32];
  386. sprintf(nb_mic_env, "_STARPU_MIC_NB=%d", 2);
  387. const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", mic_idx_env, nb_mic_env, NULL};
  388. char mic_sink_program_path[1024];
  389. /* Let's get the helper program to run on the MIC device */
  390. int mic_file_found =
  391. _starpu_src_common_locate_file (mic_sink_program_path,
  392. starpu_getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
  393. starpu_getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
  394. user_conf->mic_sink_program_path,
  395. (argv ? (*argv)[0] : NULL),
  396. suffixes);
  397. if (0 != mic_file_found)
  398. {
  399. fprintf(stderr, "No MIC program specified, use the environment\n"
  400. "variable STARPU_MIC_SINK_PROGRAM_NAME or the environment\n"
  401. "or the field 'starpu_conf.mic_sink_program_path'\n"
  402. "to define it.\n");
  403. return -1;
  404. }
  405. COIRESULT res;
  406. /* Let's get the handle which let us manage the remote MIC device */
  407. res = COIEngineGetHandle(COI_ISA_MIC, mic_idx, coi_handle);
  408. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  409. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  410. /* We launch the helper on the MIC device, which will wait for us
  411. * to give it work to do.
  412. * As we will communicate further with the device throught scif we
  413. * don't need to keep the process pointer */
  414. res = COIProcessCreateFromFile(*coi_handle, mic_sink_program_path, 0, NULL, 0,
  415. mic_sink_env, 1, NULL, 0, NULL,
  416. coi_process);
  417. if (STARPU_UNLIKELY(res != COI_SUCCESS))
  418. STARPU_MIC_SRC_REPORT_COI_ERROR(res);
  419. /* Let's create the node structure, we'll communicate with the peer
  420. * through scif thanks to it */
  421. mic_nodes[mic_idx] =
  422. _starpu_mp_common_node_create(STARPU_NODE_MIC_SOURCE, mic_idx);
  423. return 0;
  424. }
  425. #endif
  426. #ifndef STARPU_SIMGRID
  427. #ifdef STARPU_HAVE_HWLOC
  428. static void
  429. _starpu_allocate_topology_userdata(hwloc_obj_t obj)
  430. {
  431. unsigned i;
  432. obj->userdata = calloc(1, sizeof(struct _starpu_hwloc_userdata));
  433. for (i = 0; i < obj->arity; i++)
  434. _starpu_allocate_topology_userdata(obj->children[i]);
  435. }
  436. static void
  437. _starpu_deallocate_topology_userdata(hwloc_obj_t obj)
  438. {
  439. unsigned i;
  440. struct _starpu_hwloc_userdata *data = obj->userdata;
  441. STARPU_ASSERT(!data->worker_list || data->worker_list == (void*)-1);
  442. free(data);
  443. for (i = 0; i < obj->arity; i++)
  444. _starpu_allocate_topology_userdata(obj->children[i]);
  445. }
  446. #endif
  447. #endif
  448. static void
  449. _starpu_init_topology (struct _starpu_machine_config *config)
  450. {
  451. /* Discover the topology, meaning finding all the available PUs for
  452. the compiled drivers. These drivers MUST have been initialized
  453. before calling this function. The discovered topology is filled in
  454. CONFIG. */
  455. struct _starpu_machine_topology *topology = &config->topology;
  456. if (topology_is_initialized)
  457. return;
  458. nobind = starpu_get_env_number("STARPU_WORKERS_NOBIND");
  459. topology->nhwcpus = 0;
  460. topology->nhwpus = 0;
  461. #ifndef STARPU_SIMGRID
  462. #ifdef STARPU_HAVE_HWLOC
  463. hwloc_topology_init(&topology->hwtopology);
  464. hwloc_topology_set_flags(topology->hwtopology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
  465. hwloc_topology_load(topology->hwtopology);
  466. _starpu_allocate_topology_userdata(hwloc_get_root_obj(topology->hwtopology));
  467. #endif
  468. #endif
  469. #ifdef STARPU_SIMGRID
  470. config->topology.nhwcpus = config->topology.nhwpus = _starpu_simgrid_get_nbhosts("CPU");
  471. #elif defined(STARPU_HAVE_HWLOC)
  472. /* Discover the CPUs relying on the hwloc interface and fills CONFIG
  473. * accordingly. */
  474. config->cpu_depth = hwloc_get_type_depth (topology->hwtopology,
  475. HWLOC_OBJ_CORE);
  476. config->pu_depth = hwloc_get_type_depth (topology->hwtopology,
  477. HWLOC_OBJ_PU);
  478. /* Would be very odd */
  479. STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE);
  480. if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN)
  481. {
  482. /* unknown, using logical procesors as fallback */
  483. _STARPU_DISP("Warning: The OS did not report CPU cores. Assuming there is only one hardware thread per core.\n");
  484. config->cpu_depth = hwloc_get_type_depth(topology->hwtopology,
  485. HWLOC_OBJ_PU);
  486. }
  487. topology->nhwcpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
  488. config->cpu_depth);
  489. topology->nhwpus = hwloc_get_nbobjs_by_depth (topology->hwtopology,
  490. config->pu_depth);
  491. #elif defined(HAVE_SYSCONF)
  492. /* Discover the CPUs relying on the sysconf(3) function and fills
  493. * CONFIG accordingly. */
  494. config->topology.nhwcpus = config->topology.nhwpus = sysconf(_SC_NPROCESSORS_ONLN);
  495. #elif defined(_WIN32)
  496. /* Discover the CPUs on Cygwin and MinGW systems. */
  497. SYSTEM_INFO sysinfo;
  498. GetSystemInfo(&sysinfo);
  499. config->topology.nhwcpus = config->topology.nhwpus = sysinfo.dwNumberOfProcessors;
  500. #else
  501. #warning no way to know number of cores, assuming 1
  502. config->topology.nhwcpus = config->topology.nhwpus = 1;
  503. #endif
  504. _starpu_cuda_discover_devices(config);
  505. _starpu_opencl_discover_devices(config);
  506. #ifdef STARPU_USE_SCC
  507. config->topology.nhwscc = _starpu_scc_src_get_device_count();
  508. #endif
  509. topology_is_initialized = 1;
  510. }
  511. /*
  512. * Bind workers on the different processors
  513. */
  514. static void
  515. _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
  516. {
  517. char *strval;
  518. unsigned i;
  519. struct _starpu_machine_topology *topology = &config->topology;
  520. config->current_bindid = 0;
  521. /* conf->workers_bindid indicates the successive logical PU identifier that
  522. * should be used to bind the workers. It should be either filled
  523. * according to the user's explicit parameters (from starpu_conf) or
  524. * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
  525. * round-robin policy is used to distributed the workers over the
  526. * cores. */
  527. /* what do we use, explicit value, env. variable, or round-robin ? */
  528. if ((strval = starpu_getenv("STARPU_WORKERS_CPUID")))
  529. {
  530. /* STARPU_WORKERS_CPUID certainly contains less entries than
  531. * STARPU_NMAXWORKERS, so we reuse its entries in a round
  532. * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1
  533. * 2". */
  534. unsigned wrap = 0;
  535. unsigned number_of_entries = 0;
  536. char *endptr;
  537. /* we use the content of the STARPU_WORKERS_CPUID
  538. * env. variable */
  539. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  540. {
  541. if (!wrap)
  542. {
  543. long int val;
  544. val = strtol(strval, &endptr, 10);
  545. if (endptr != strval)
  546. {
  547. topology->workers_bindid[i] =
  548. (unsigned)(val % topology->nhwpus);
  549. strval = endptr;
  550. if (*strval == '-')
  551. {
  552. /* range of values */
  553. long int endval;
  554. strval++;
  555. if (*strval && *strval != ' ' && *strval != ',')
  556. {
  557. endval = strtol(strval, &endptr, 10);
  558. strval = endptr;
  559. }
  560. else
  561. {
  562. endval = topology->nhwpus-1;
  563. if (*strval)
  564. strval++;
  565. }
  566. for (val++; val <= endval && i < STARPU_NMAXWORKERS-1; val++)
  567. {
  568. i++;
  569. topology->workers_bindid[i] =
  570. (unsigned)(val % topology->nhwpus);
  571. }
  572. }
  573. if (*strval == ',')
  574. strval++;
  575. }
  576. else
  577. {
  578. /* there must be at least one entry */
  579. STARPU_ASSERT(i != 0);
  580. number_of_entries = i;
  581. /* there is no more values in the
  582. * string */
  583. wrap = 1;
  584. topology->workers_bindid[i] =
  585. topology->workers_bindid[0];
  586. }
  587. }
  588. else
  589. {
  590. topology->workers_bindid[i] =
  591. topology->workers_bindid[i % number_of_entries];
  592. }
  593. }
  594. }
  595. else if (config->conf.use_explicit_workers_bindid)
  596. {
  597. /* we use the explicit value from the user */
  598. memcpy(topology->workers_bindid,
  599. config->conf.workers_bindid,
  600. STARPU_NMAXWORKERS*sizeof(unsigned));
  601. }
  602. else
  603. {
  604. int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
  605. int k;
  606. int nbindids=0;
  607. int nhyperthreads = topology->nhwpus / topology->nhwcpus;
  608. STARPU_ASSERT_MSG(nth_per_core > 0 && nth_per_core <= nhyperthreads , "Incorrect number of hyperthreads");
  609. i = 0; /* PU number currently assigned */
  610. k = 0; /* Number of threads already put on the current core */
  611. while(nbindids < STARPU_NMAXWORKERS)
  612. {
  613. if (k >= nth_per_core)
  614. {
  615. /* We have already put enough workers on this
  616. * core, skip remaining PUs from this core, and
  617. * proceed with next core */
  618. i += nhyperthreads-nth_per_core;
  619. k = 0;
  620. continue;
  621. }
  622. /* Add a worker to this core, by using this logical PU */
  623. topology->workers_bindid[nbindids++] =
  624. (unsigned)(i % topology->nhwpus);
  625. k++;
  626. i++;
  627. }
  628. }
  629. for (i = 0; i < STARPU_MAXCPUS;i++)
  630. cpu_worker[i] = STARPU_NOWORKERID;
  631. }
  632. /* This function gets the identifier of the next core on which to bind a
  633. * worker. In case a list of preferred cores was specified (logical indexes),
  634. * we look for a an available core among the list if possible, otherwise a
  635. * round-robin policy is used. */
  636. static inline int
  637. _starpu_get_next_bindid (struct _starpu_machine_config *config,
  638. int *preferred_binding, int npreferred)
  639. {
  640. struct _starpu_machine_topology *topology = &config->topology;
  641. unsigned found = 0;
  642. int current_preferred;
  643. int nhyperthreads = topology->nhwpus / topology->nhwcpus;
  644. /* loop over the preference list */
  645. for (current_preferred = 0;
  646. current_preferred < npreferred;
  647. current_preferred++)
  648. {
  649. if (found)
  650. break;
  651. /* Try to get this core */
  652. unsigned requested_core = preferred_binding[current_preferred];
  653. /* can we bind the worker on the preferred core ? */
  654. unsigned ind;
  655. /* Look at the remaining cores to be bound to */
  656. for (ind = config->current_bindid;
  657. ind < topology->nhwpus / nhyperthreads;
  658. ind++)
  659. {
  660. if (topology->workers_bindid[ind] == requested_core * nhyperthreads)
  661. {
  662. /* the cpu is available, we use it ! In order
  663. * to make sure that it will not be used again
  664. * later on, we exchange it with the next bindid we were supposed to use */
  665. topology->workers_bindid[ind] =
  666. topology->workers_bindid[config->current_bindid];
  667. topology->workers_bindid[config->current_bindid] = requested_core * nhyperthreads;
  668. found = 1;
  669. break;
  670. }
  671. }
  672. }
  673. unsigned i = ((config->current_bindid++) % STARPU_NMAXWORKERS);
  674. return (int)topology->workers_bindid[i];
  675. }
  676. unsigned
  677. _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
  678. {
  679. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  680. _starpu_opencl_init();
  681. #endif
  682. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  683. _starpu_init_cuda();
  684. #endif
  685. _starpu_init_topology(config);
  686. return config->topology.nhwcpus;
  687. }
  688. unsigned
  689. _starpu_topology_get_nhwpu (struct _starpu_machine_config *config)
  690. {
  691. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  692. _starpu_opencl_init();
  693. #endif
  694. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  695. _starpu_init_cuda();
  696. #endif
  697. _starpu_init_topology(config);
  698. return config->topology.nhwpus;
  699. }
  700. #ifdef STARPU_USE_MIC
  701. static void
  702. _starpu_init_mic_config (struct _starpu_machine_config *config,
  703. struct starpu_conf *user_conf,
  704. unsigned mic_idx)
  705. {
  706. // Configure the MIC device of index MIC_IDX.
  707. struct _starpu_machine_topology *topology = &config->topology;
  708. topology->nhwmiccores[mic_idx] = 0;
  709. _starpu_init_mic_topology (config, mic_idx);
  710. int nmiccores;
  711. nmiccores = starpu_get_env_number("STARPU_NMICTHREADS");
  712. if (nmiccores == -1)
  713. {
  714. /* Nothing was specified, so let's use the number of
  715. * detected mic cores. ! */
  716. nmiccores = topology->nhwmiccores[mic_idx];
  717. }
  718. else
  719. {
  720. if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
  721. {
  722. /* The user requires more MIC cores than there is available */
  723. fprintf(stderr,
  724. "# Warning: %d MIC cores requested. Only %d available.\n",
  725. nmiccores, topology->nhwmiccores[mic_idx]);
  726. nmiccores = topology->nhwmiccores[mic_idx];
  727. }
  728. }
  729. topology->nmiccores[mic_idx] = nmiccores;
  730. STARPU_ASSERT_MSG(topology->nmiccores[mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
  731. "topology->nmiccores[mic_idx(%d)] (%d) + topology->nworkers (%d) <= STARPU_NMAXWORKERS (%d)",
  732. mic_idx, topology->nmiccores[mic_idx], topology->nworkers, STARPU_NMAXWORKERS);
  733. /* _starpu_initialize_workers_mic_deviceid (config); */
  734. mic_worker_set[mic_idx].workers = &config->workers[topology->nworkers];
  735. unsigned miccore_id;
  736. for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
  737. {
  738. int worker_idx = topology->nworkers + miccore_id;
  739. config->workers[worker_idx].set = &mic_worker_set[mic_idx];
  740. config->workers[worker_idx].arch = STARPU_MIC_WORKER;
  741. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  742. config->workers[worker_idx].perf_arch.ndevices = 1;
  743. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_MIC_WORKER;
  744. config->workers[worker_idx].perf_arch.devices[0].devid = mic_idx;
  745. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  746. config->workers[worker_idx].devid = mic_idx;
  747. config->workers[worker_idx].subworkerid = miccore_id;
  748. config->workers[worker_idx].worker_mask = STARPU_MIC;
  749. config->worker_mask |= STARPU_MIC;
  750. }
  751. topology->nworkers += topology->nmiccores[mic_idx];
  752. }
  753. #ifdef STARPU_USE_MIC
  754. static COIENGINE mic_handles[STARPU_MAXMICDEVS];
  755. COIPROCESS _starpu_mic_process[STARPU_MAXMICDEVS];
  756. #endif
  757. static void
  758. _starpu_init_mp_config (struct _starpu_machine_config *config,
  759. struct starpu_conf *user_conf)
  760. {
  761. /* Discover and configure the mp topology. That means:
  762. * - discover the number of mp nodes;
  763. * - initialize each discovered node;
  764. * - discover the local topology (number of PUs/devices) of each node;
  765. * - configure the workers accordingly.
  766. */
  767. struct _starpu_machine_topology *topology = &config->topology;
  768. // We currently only support MIC at this level.
  769. #ifdef STARPU_USE_MIC
  770. /* Discover and initialize the number of MIC nodes through the mp
  771. * infrastructure. */
  772. unsigned nhwmicdevices = _starpu_mic_src_get_device_count();
  773. int reqmicdevices = starpu_get_env_number("STARPU_NMIC");
  774. if (reqmicdevices == -1 && user_conf)
  775. reqmicdevices = user_conf->nmic;
  776. if (reqmicdevices == -1)
  777. /* Nothing was specified, so let's use the number of
  778. * detected mic devices. ! */
  779. reqmicdevices = nhwmicdevices;
  780. if (reqmicdevices != -1)
  781. {
  782. if ((unsigned) reqmicdevices > nhwmicdevices)
  783. {
  784. /* The user requires more MIC devices than there is available */
  785. fprintf(stderr,
  786. "# Warning: %d MIC devices requested. Only %d available.\n",
  787. reqmicdevices, nhwmicdevices);
  788. reqmicdevices = nhwmicdevices;
  789. }
  790. }
  791. topology->nmicdevices = 0;
  792. unsigned i;
  793. for (i = 0; i < (unsigned) reqmicdevices; i++)
  794. if (0 == _starpu_init_mic_node (config, i, &mic_handles[i], &_starpu_mic_process[i]))
  795. topology->nmicdevices++;
  796. for (i = 0; i < topology->nmicdevices; i++)
  797. _starpu_init_mic_config (config, user_conf, i);
  798. #endif
  799. }
  800. static void
  801. _starpu_deinit_mic_node (unsigned mic_idx)
  802. {
  803. _starpu_mp_common_send_command(mic_nodes[mic_idx], STARPU_MP_COMMAND_EXIT, NULL, 0);
  804. COIProcessDestroy(_starpu_mic_process[mic_idx], -1, 0, NULL, NULL);
  805. _starpu_mp_common_node_destroy(mic_nodes[mic_idx]);
  806. }
  807. static void
  808. _starpu_deinit_mp_config (struct _starpu_machine_config *config)
  809. {
  810. struct _starpu_machine_topology *topology = &config->topology;
  811. unsigned i;
  812. for (i = 0; i < topology->nmicdevices; i++)
  813. _starpu_deinit_mic_node (i);
  814. _starpu_mic_clear_kernels();
  815. }
  816. #endif
  817. #ifdef STARPU_HAVE_HWLOC
  818. static unsigned
  819. _starpu_topology_count_ngpus(hwloc_obj_t obj)
  820. {
  821. struct _starpu_hwloc_userdata *data = obj->userdata;
  822. unsigned n = data->ngpus;
  823. unsigned i;
  824. for (i = 0; i < obj->arity; i++)
  825. n += _starpu_topology_count_ngpus(obj->children[i]);
  826. data->ngpus = n;
  827. #ifdef STARPU_VERBOSE
  828. {
  829. char name[64];
  830. hwloc_obj_type_snprintf(name, sizeof(name), obj, 0);
  831. _STARPU_DEBUG("hwloc obj %s has %u GPUs below\n", name, n);
  832. }
  833. #endif
  834. return n;
  835. }
  836. #endif
  837. static int
  838. _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
  839. {
  840. int i;
  841. for (i = 0; i < STARPU_NMAXWORKERS; i++)
  842. {
  843. config->workers[i].workerid = i;
  844. config->workers[i].set = NULL;
  845. }
  846. struct _starpu_machine_topology *topology = &config->topology;
  847. topology->nworkers = 0;
  848. topology->ncombinedworkers = 0;
  849. topology->nsched_ctxs = 0;
  850. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  851. _starpu_opencl_init();
  852. #endif
  853. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  854. _starpu_init_cuda();
  855. #endif
  856. _starpu_init_topology(config);
  857. _starpu_initialize_workers_bindid(config);
  858. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  859. for (i = 0; i < (int) (sizeof(cuda_worker_set)/sizeof(cuda_worker_set[0])); i++)
  860. cuda_worker_set[i].workers = NULL;
  861. #endif
  862. #ifdef STARPU_USE_MIC
  863. for (i = 0; i < (int) (sizeof(mic_worker_set)/sizeof(mic_worker_set[0])); i++)
  864. mic_worker_set[i].workers = NULL;
  865. #endif
  866. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  867. int ncuda = config->conf.ncuda;
  868. int nworker_per_cuda = starpu_get_env_number_default("STARPU_NWORKER_PER_CUDA", 1);
  869. STARPU_ASSERT_MSG(nworker_per_cuda > 0, "STARPU_NWORKER_PER_CUDA has to be > 0");
  870. STARPU_ASSERT_MSG(nworker_per_cuda < STARPU_NMAXWORKERS, "STARPU_NWORKER_PER_CUDA (%d) cannot be higher than STARPU_NMAXWORKERS (%d)\n", nworker_per_cuda, STARPU_NMAXWORKERS);
  871. #ifndef STARPU_NON_BLOCKING_DRIVERS
  872. if (nworker_per_cuda > 1)
  873. {
  874. _STARPU_DISP("Warning: reducing STARPU_NWORKER_PER_CUDA to 1 because blocking drivers are enabled\n");
  875. nworker_per_cuda = 1;
  876. }
  877. #endif
  878. if (ncuda != 0)
  879. {
  880. /* The user did not disable CUDA. We need to initialize CUDA
  881. * early to count the number of devices */
  882. _starpu_init_cuda();
  883. int nb_devices = _starpu_get_cuda_device_count();
  884. if (ncuda == -1)
  885. {
  886. /* Nothing was specified, so let's choose ! */
  887. ncuda = nb_devices;
  888. }
  889. else
  890. {
  891. if (ncuda > nb_devices)
  892. {
  893. /* The user requires more CUDA devices than
  894. * there is available */
  895. _STARPU_DISP("Warning: %d CUDA devices requested. Only %d available.\n", ncuda, nb_devices);
  896. ncuda = nb_devices;
  897. }
  898. }
  899. }
  900. /* Now we know how many CUDA devices will be used */
  901. topology->ncudagpus = ncuda;
  902. STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
  903. _starpu_initialize_workers_cuda_gpuid(config);
  904. /* allow having one worker per stream */
  905. unsigned th_per_stream = starpu_get_env_number_default("STARPU_WORKER_PER_STREAM", 1);
  906. unsigned cudagpu;
  907. for (cudagpu = 0; cudagpu < topology->ncudagpus; cudagpu++)
  908. {
  909. int devid = _starpu_get_next_cuda_gpuid(config);
  910. int worker_idx0 = topology->nworkers + cudagpu * nworker_per_cuda;
  911. cuda_worker_set[devid].workers = &config->workers[worker_idx0];
  912. for (i = 0; i < nworker_per_cuda; i++)
  913. {
  914. int worker_idx = worker_idx0 + i;
  915. if(th_per_stream)
  916. {
  917. config->workers[worker_idx].set = (struct _starpu_worker_set *)malloc(sizeof(struct _starpu_worker_set));
  918. config->workers[worker_idx].set->workers = &config->workers[worker_idx];
  919. }
  920. else
  921. config->workers[worker_idx].set = &cuda_worker_set[devid];
  922. config->workers[worker_idx].arch = STARPU_CUDA_WORKER;
  923. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  924. config->workers[worker_idx].perf_arch.ndevices = 1;
  925. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CUDA_WORKER;
  926. config->workers[worker_idx].perf_arch.devices[0].devid = devid;
  927. // TODO: fix perfmodels etc.
  928. //config->workers[worker_idx].perf_arch.ncore = nworker_per_cuda - 1;
  929. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  930. config->workers[worker_idx].devid = devid;
  931. config->workers[worker_idx].subworkerid = i;
  932. config->workers[worker_idx].worker_mask = STARPU_CUDA;
  933. config->worker_mask |= STARPU_CUDA;
  934. struct handle_entry *entry;
  935. _STARPU_MALLOC(entry, sizeof(*entry));
  936. entry->gpuid = devid;
  937. HASH_ADD_INT(devices_using_cuda, gpuid, entry);
  938. }
  939. #ifndef STARPU_SIMGRID
  940. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  941. {
  942. hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(topology->hwtopology, devid);
  943. if (obj)
  944. {
  945. struct _starpu_hwloc_userdata *data = obj->userdata;
  946. data->ngpus++;
  947. }
  948. else
  949. {
  950. _STARPU_DISP("Warning: could not find location of CUDA%u, do you have the hwloc CUDA plugin installed?\n", devid);
  951. }
  952. }
  953. #endif
  954. #endif
  955. }
  956. topology->nworkers += topology->ncudagpus * nworker_per_cuda;
  957. #endif
  958. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  959. int nopencl = config->conf.nopencl;
  960. if (nopencl != 0)
  961. {
  962. /* The user did not disable OPENCL. We need to initialize
  963. * OpenCL early to count the number of devices */
  964. _starpu_opencl_init();
  965. int nb_devices;
  966. nb_devices = _starpu_opencl_get_device_count();
  967. if (nopencl == -1)
  968. {
  969. /* Nothing was specified, so let's choose ! */
  970. nopencl = nb_devices;
  971. if (nopencl > STARPU_MAXOPENCLDEVS)
  972. {
  973. _STARPU_DISP("Warning: %d OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldadev=xxx to update the maximum value of supported OpenCL devices.\n", nb_devices, STARPU_MAXOPENCLDEVS);
  974. nopencl = STARPU_MAXOPENCLDEVS;
  975. }
  976. }
  977. else
  978. {
  979. /* Let's make sure this value is OK. */
  980. if (nopencl > nb_devices)
  981. {
  982. /* The user requires more OpenCL devices than
  983. * there is available */
  984. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d available.\n", nopencl, nb_devices);
  985. nopencl = nb_devices;
  986. }
  987. /* Let's make sure this value is OK. */
  988. if (nopencl > STARPU_MAXOPENCLDEVS)
  989. {
  990. _STARPU_DISP("Warning: %d OpenCL devices requested. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices.\n", nopencl, STARPU_MAXOPENCLDEVS);
  991. nopencl = STARPU_MAXOPENCLDEVS;
  992. }
  993. }
  994. }
  995. topology->nopenclgpus = nopencl;
  996. STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
  997. _starpu_initialize_workers_opencl_gpuid(config);
  998. unsigned openclgpu;
  999. for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
  1000. {
  1001. int worker_idx = topology->nworkers + openclgpu;
  1002. int devid = _starpu_get_next_opencl_gpuid(config);
  1003. if (devid == -1)
  1004. { // There is no more devices left
  1005. topology->nopenclgpus = openclgpu;
  1006. break;
  1007. }
  1008. config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
  1009. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1010. config->workers[worker_idx].perf_arch.ndevices = 1;
  1011. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_OPENCL_WORKER;
  1012. config->workers[worker_idx].perf_arch.devices[0].devid = devid;
  1013. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1014. config->workers[worker_idx].subworkerid = 0;
  1015. config->workers[worker_idx].devid = devid;
  1016. config->workers[worker_idx].worker_mask = STARPU_OPENCL;
  1017. config->worker_mask |= STARPU_OPENCL;
  1018. }
  1019. topology->nworkers += topology->nopenclgpus;
  1020. #endif
  1021. #ifdef STARPU_USE_SCC
  1022. int nscc = config->conf.nscc;
  1023. unsigned nb_scc_nodes = _starpu_scc_src_get_device_count();
  1024. if (nscc != 0)
  1025. {
  1026. /* The user did not disable SCC. We need to count
  1027. * the number of devices */
  1028. int nb_devices = nb_scc_nodes;
  1029. if (nscc == -1)
  1030. {
  1031. /* Nothing was specified, so let's choose ! */
  1032. nscc = nb_devices;
  1033. if (nscc > STARPU_MAXSCCDEVS)
  1034. {
  1035. _STARPU_DISP("Warning: %d SCC devices available. Only %d enabled. Use configuration option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nb_devices, STARPU_MAXSCCDEVS);
  1036. nscc = STARPU_MAXSCCDEVS;
  1037. }
  1038. }
  1039. else
  1040. {
  1041. /* Let's make sure this value is OK. */
  1042. if (nscc > nb_devices)
  1043. {
  1044. /* The user requires more SCC devices than there is available */
  1045. _STARPU_DISP("Warning: %d SCC devices requested. Only %d available.\n", nscc, nb_devices);
  1046. nscc = nb_devices;
  1047. }
  1048. /* Let's make sure this value is OK. */
  1049. if (nscc > STARPU_MAXSCCDEVS)
  1050. {
  1051. _STARPU_DISP("Warning: %d SCC devices requested. Only %d enabled. Use configure option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nscc, STARPU_MAXSCCDEVS);
  1052. nscc = STARPU_MAXSCCDEVS;
  1053. }
  1054. }
  1055. }
  1056. /* Now we know how many SCC devices will be used */
  1057. topology->nsccdevices = nscc;
  1058. STARPU_ASSERT(topology->nsccdevices + topology->nworkers <= STARPU_NMAXWORKERS);
  1059. _starpu_initialize_workers_scc_deviceid(config);
  1060. unsigned sccdev;
  1061. for (sccdev = 0; sccdev < topology->nsccdevices; sccdev++)
  1062. {
  1063. config->workers[topology->nworkers + sccdev].arch = STARPU_SCC_WORKER;
  1064. int devid = _starpu_get_next_scc_deviceid(config);
  1065. _STARPU_MALLOC(config->workers[topology->nworkers + sccdev].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1066. config->workers[topology->nworkers + sccdev].perf_arch.ndevices = 1;
  1067. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].type = STARPU_SCC_WORKER;
  1068. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].devid = sccdev;
  1069. config->workers[topology->nworkers + sccdev].perf_arch.devices[0].ncore = 1;
  1070. config->workers[topology->nworkers + sccdev].subworkerid = 0;
  1071. config->workers[topology->nworkers + sccdev].devid = devid;
  1072. config->workers[topology->nworkers + sccdev].worker_mask = STARPU_SCC;
  1073. config->worker_mask |= STARPU_SCC;
  1074. }
  1075. for (; sccdev < nb_scc_nodes; ++sccdev)
  1076. _starpu_scc_exit_useless_node(sccdev);
  1077. topology->nworkers += topology->nsccdevices;
  1078. #endif /* STARPU_USE_SCC */
  1079. /* Unless not requested, we need to complete configuration with the
  1080. * ones of the mp nodes. */
  1081. #ifdef STARPU_USE_MIC
  1082. if (! no_mp_config)
  1083. _starpu_init_mp_config (config, &config->conf);
  1084. #endif
  1085. /* we put the CPU section after the accelerator : in case there was an
  1086. * accelerator found, we devote one cpu */
  1087. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  1088. int ncpu = config->conf.ncpus;
  1089. if (ncpu != 0)
  1090. {
  1091. if (ncpu == -1)
  1092. {
  1093. unsigned mic_busy_cpus = 0;
  1094. unsigned j = 0;
  1095. for (j = 0; j < STARPU_MAXMICDEVS; j++)
  1096. mic_busy_cpus += (topology->nmiccores[j] ? 1 : 0);
  1097. unsigned already_busy_cpus = mic_busy_cpus + topology->ncudagpus
  1098. + topology->nopenclgpus + topology->nsccdevices;
  1099. long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
  1100. if (avail_cpus < 0)
  1101. avail_cpus = 0;
  1102. int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
  1103. avail_cpus *= nth_per_core;
  1104. ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
  1105. }
  1106. else
  1107. {
  1108. if (ncpu > STARPU_MAXCPUS)
  1109. {
  1110. _STARPU_DISP("Warning: %d CPU devices requested. Only %d enabled. Use configure option --enable-maxcpus=xxx to update the maximum value of supported CPU devices.\n", ncpu, STARPU_MAXCPUS);
  1111. ncpu = STARPU_MAXCPUS;
  1112. }
  1113. }
  1114. }
  1115. topology->ncpus = ncpu;
  1116. STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
  1117. unsigned cpu;
  1118. for (cpu = 0; cpu < topology->ncpus; cpu++)
  1119. {
  1120. int worker_idx = topology->nworkers + cpu;
  1121. config->workers[worker_idx].arch = STARPU_CPU_WORKER;
  1122. _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  1123. config->workers[worker_idx].perf_arch.ndevices = 1;
  1124. config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CPU_WORKER;
  1125. config->workers[worker_idx].perf_arch.devices[0].devid = 0;
  1126. config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
  1127. config->workers[worker_idx].subworkerid = 0;
  1128. config->workers[worker_idx].devid = cpu;
  1129. config->workers[worker_idx].worker_mask = STARPU_CPU;
  1130. config->worker_mask |= STARPU_CPU;
  1131. }
  1132. topology->nworkers += topology->ncpus;
  1133. #endif
  1134. if (topology->nworkers == 0)
  1135. {
  1136. _STARPU_DEBUG("No worker found, aborting ...\n");
  1137. return -ENODEV;
  1138. }
  1139. return 0;
  1140. }
  1141. void _starpu_destroy_machine_config(struct _starpu_machine_config *config)
  1142. {
  1143. _starpu_close_debug_logfile();
  1144. unsigned worker;
  1145. for (worker = 0; worker < config->topology.nworkers; worker++)
  1146. {
  1147. struct _starpu_worker *workerarg = &config->workers[worker];
  1148. int bindid = workerarg->bindid;
  1149. free(workerarg->perf_arch.devices);
  1150. #ifdef STARPU_HAVE_HWLOC
  1151. hwloc_bitmap_free(workerarg->hwloc_cpu_set);
  1152. if (bindid != -1)
  1153. {
  1154. hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
  1155. config->pu_depth,
  1156. bindid);
  1157. struct _starpu_hwloc_userdata *data = worker_obj->userdata;
  1158. if (data->worker_list)
  1159. {
  1160. _starpu_worker_list_delete(data->worker_list);
  1161. data->worker_list = NULL;
  1162. }
  1163. }
  1164. #endif
  1165. if (bindid != -1)
  1166. {
  1167. free(config->bindid_workers[bindid].workerids);
  1168. config->bindid_workers[bindid].workerids = NULL;
  1169. }
  1170. }
  1171. free(config->bindid_workers);
  1172. config->bindid_workers = NULL;
  1173. config->nbindid = 0;
  1174. unsigned combined_worker_id;
  1175. for(combined_worker_id=0 ; combined_worker_id < config->topology.ncombinedworkers ; combined_worker_id++)
  1176. {
  1177. struct _starpu_combined_worker *combined_worker = &config->combined_workers[combined_worker_id];
  1178. #ifdef STARPU_HAVE_HWLOC
  1179. hwloc_bitmap_free(combined_worker->hwloc_cpu_set);
  1180. #endif
  1181. free(combined_worker->perf_arch.devices);
  1182. }
  1183. #ifdef STARPU_HAVE_HWLOC
  1184. _starpu_deallocate_topology_userdata(hwloc_get_root_obj(config->topology.hwtopology));
  1185. hwloc_topology_destroy(config->topology.hwtopology);
  1186. #endif
  1187. topology_is_initialized = 0;
  1188. #ifdef STARPU_USE_CUDA
  1189. struct handle_entry *entry, *tmp;
  1190. HASH_ITER(hh, devices_using_cuda, entry, tmp)
  1191. {
  1192. HASH_DEL(devices_using_cuda, entry);
  1193. free(entry);
  1194. }
  1195. devices_using_cuda = NULL;
  1196. #endif
  1197. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
  1198. int i;
  1199. for (i=0; i<STARPU_NARCH; i++)
  1200. may_bind_automatically[i] = 0;
  1201. #endif
  1202. }
  1203. void
  1204. _starpu_bind_thread_on_cpu (
  1205. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  1206. int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED)
  1207. {
  1208. #ifdef STARPU_SIMGRID
  1209. return;
  1210. #else
  1211. if (nobind > 0)
  1212. return;
  1213. if (cpuid < 0)
  1214. return;
  1215. if (workerid != STARPU_NOWORKERID && cpuid < STARPU_MAXCPUS)
  1216. {
  1217. int previous = cpu_worker[cpuid];
  1218. if (previous != STARPU_NOWORKERID && previous != workerid)
  1219. _STARPU_DISP("Warning: both workers %d and %d are bound to the same PU %d, this will strongly degrade performance\n", previous, workerid, cpuid);
  1220. else
  1221. cpu_worker[cpuid] = workerid;
  1222. }
  1223. #ifdef STARPU_HAVE_HWLOC
  1224. const struct hwloc_topology_support *support;
  1225. #ifdef STARPU_USE_OPENCL
  1226. _starpu_opencl_init();
  1227. #endif
  1228. #ifdef STARPU_USE_CUDA
  1229. _starpu_init_cuda();
  1230. #endif
  1231. _starpu_init_topology(config);
  1232. support = hwloc_topology_get_support (config->topology.hwtopology);
  1233. if (support->cpubind->set_thisthread_cpubind)
  1234. {
  1235. hwloc_obj_t obj =
  1236. hwloc_get_obj_by_depth (config->topology.hwtopology,
  1237. config->pu_depth, cpuid);
  1238. hwloc_bitmap_t set = obj->cpuset;
  1239. int ret;
  1240. hwloc_bitmap_singlify(set);
  1241. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  1242. HWLOC_CPUBIND_THREAD);
  1243. if (ret)
  1244. {
  1245. perror("hwloc_set_cpubind");
  1246. STARPU_ABORT();
  1247. }
  1248. }
  1249. #elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__)
  1250. int ret;
  1251. /* fix the thread on the correct cpu */
  1252. cpu_set_t aff_mask;
  1253. CPU_ZERO(&aff_mask);
  1254. CPU_SET(cpuid, &aff_mask);
  1255. starpu_pthread_t self = pthread_self();
  1256. ret = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask);
  1257. if (ret)
  1258. {
  1259. const char *msg = strerror(ret);
  1260. fprintf(stderr, "pthread_setaffinity_np: %s\n", msg);
  1261. STARPU_ABORT();
  1262. }
  1263. #elif defined(_WIN32)
  1264. DWORD mask = 1 << cpuid;
  1265. if (!SetThreadAffinityMask(GetCurrentThread(), mask))
  1266. {
  1267. _STARPU_ERROR("SetThreadMaskAffinity(%lx) failed\n", mask);
  1268. }
  1269. #else
  1270. #warning no CPU binding support
  1271. #endif
  1272. #endif
  1273. }
  1274. void
  1275. _starpu_bind_thread_on_cpus (
  1276. struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED,
  1277. struct _starpu_combined_worker *combined_worker STARPU_ATTRIBUTE_UNUSED)
  1278. {
  1279. #ifdef STARPU_SIMGRID
  1280. return;
  1281. #endif
  1282. #ifdef STARPU_HAVE_HWLOC
  1283. const struct hwloc_topology_support *support;
  1284. #ifdef STARPU_USE_OPENC
  1285. _starpu_opencl_init();
  1286. #endif
  1287. #ifdef STARPU_USE_CUDA
  1288. _starpu_init_cuda();
  1289. #endif
  1290. _starpu_init_topology(config);
  1291. support = hwloc_topology_get_support(config->topology.hwtopology);
  1292. if (support->cpubind->set_thisthread_cpubind)
  1293. {
  1294. hwloc_bitmap_t set = combined_worker->hwloc_cpu_set;
  1295. int ret;
  1296. ret = hwloc_set_cpubind (config->topology.hwtopology, set,
  1297. HWLOC_CPUBIND_THREAD);
  1298. if (ret)
  1299. {
  1300. perror("binding thread");
  1301. STARPU_ABORT();
  1302. }
  1303. }
  1304. #else
  1305. #ifdef __GLIBC__
  1306. sched_setaffinity(0,sizeof(combined_worker->cpu_set),&combined_worker->cpu_set);
  1307. #else
  1308. # warning no parallel worker CPU binding support
  1309. #endif
  1310. #endif
  1311. }
  1312. static void
  1313. _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
  1314. {
  1315. /* launch one thread per CPU */
  1316. unsigned ram_memory_node;
  1317. /* note that even if the CPU cpu are not used, we always have a RAM
  1318. * node */
  1319. /* TODO : support NUMA ;) */
  1320. ram_memory_node = _starpu_memory_node_register(STARPU_CPU_RAM, 0);
  1321. STARPU_ASSERT(ram_memory_node == STARPU_MAIN_RAM);
  1322. #ifdef STARPU_SIMGRID
  1323. char name[16];
  1324. msg_host_t host = _starpu_simgrid_get_host_by_name("RAM");
  1325. STARPU_ASSERT(host);
  1326. _starpu_simgrid_memory_node_set_host(STARPU_MAIN_RAM, host);
  1327. #endif
  1328. /* We will store all the busid of the different (src, dst)
  1329. * combinations in a matrix which we initialize here. */
  1330. _starpu_initialize_busid_matrix();
  1331. /* Each device is initialized,
  1332. * giving it a memory node and a core bind id.
  1333. */
  1334. /* TODO: STARPU_MAXNUMANODES */
  1335. unsigned numa_init[1] = { 1 };
  1336. unsigned numa_memory_nodes[1] = { ram_memory_node };
  1337. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1338. unsigned cuda_init[STARPU_MAXCUDADEVS] = { };
  1339. unsigned cuda_memory_nodes[STARPU_MAXCUDADEVS];
  1340. unsigned cuda_bindid[STARPU_MAXCUDADEVS];
  1341. #endif
  1342. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1343. unsigned opencl_init[STARPU_MAXOPENCLDEVS] = { };
  1344. unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS];
  1345. unsigned opencl_bindid[STARPU_MAXOPENCLDEVS];
  1346. #endif
  1347. #ifdef STARPU_USE_MIC
  1348. unsigned mic_init[STARPU_MAXMICDEVS] = { };
  1349. unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
  1350. unsigned mic_bindid[STARPU_MAXMICDEVS];
  1351. #endif
  1352. unsigned bindid;
  1353. for (bindid = 0; bindid < config->nbindid; bindid++)
  1354. {
  1355. free(config->bindid_workers[bindid].workerids);
  1356. config->bindid_workers[bindid].workerids = NULL;
  1357. config->bindid_workers[bindid].nworkers = 0;
  1358. }
  1359. unsigned worker;
  1360. for (worker = 0; worker < config->topology.nworkers; worker++)
  1361. {
  1362. unsigned memory_node = -1;
  1363. struct _starpu_worker *workerarg = &config->workers[worker];
  1364. unsigned devid = workerarg->devid;
  1365. #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
  1366. /* Perhaps the worker has some "favourite" bindings */
  1367. int *preferred_binding = NULL;
  1368. int npreferred = 0;
  1369. #endif
  1370. /* select the memory node that contains worker's memory */
  1371. switch (workerarg->arch)
  1372. {
  1373. case STARPU_CPU_WORKER:
  1374. {
  1375. /* TODO: NUMA */
  1376. int numaid = 0;
  1377. /* "dedicate" a cpu core to that worker */
  1378. if (numa_init[numaid])
  1379. {
  1380. memory_node = numa_memory_nodes[numaid];
  1381. }
  1382. else
  1383. {
  1384. numa_init[numaid] = 1;
  1385. memory_node = numa_memory_nodes[numaid] = _starpu_memory_node_register(STARPU_CPU_RAM, numaid);
  1386. #ifdef STARPU_SIMGRID
  1387. snprintf(name, sizeof(name), "RAM%d", numaid);
  1388. host = _starpu_simgrid_get_host_by_name(name);
  1389. STARPU_ASSERT(host);
  1390. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1391. #endif
  1392. }
  1393. workerarg->bindid = _starpu_get_next_bindid(config, NULL, 0);
  1394. _starpu_memory_node_add_nworkers(memory_node);
  1395. #ifdef STARPU_SIMGRID
  1396. starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_transfer_queue[memory_node]);
  1397. if (memory_node != STARPU_MAIN_RAM)
  1398. starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]);
  1399. #endif
  1400. break;
  1401. }
  1402. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1403. case STARPU_CUDA_WORKER:
  1404. #ifndef STARPU_SIMGRID
  1405. if (may_bind_automatically[STARPU_CUDA_WORKER])
  1406. {
  1407. /* StarPU is allowed to bind threads automatically */
  1408. preferred_binding = _starpu_get_cuda_affinity_vector(devid);
  1409. npreferred = config->topology.nhwpus;
  1410. }
  1411. #endif /* SIMGRID */
  1412. if (cuda_init[devid])
  1413. {
  1414. memory_node = cuda_memory_nodes[devid];
  1415. #ifndef STARPU_SIMGRID
  1416. workerarg->bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);//cuda_bindid[devid];
  1417. #endif /* SIMGRID */
  1418. }
  1419. else
  1420. {
  1421. cuda_init[devid] = 1;
  1422. workerarg->bindid = cuda_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1423. memory_node = cuda_memory_nodes[devid] = _starpu_memory_node_register(STARPU_CUDA_RAM, devid);
  1424. _starpu_cuda_bus_ids[0][devid+1] = _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1425. _starpu_cuda_bus_ids[devid+1][0] = _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1426. #ifdef STARPU_SIMGRID
  1427. const char* cuda_memcpy_peer;
  1428. snprintf(name, sizeof(name), "CUDA%d", devid);
  1429. host = _starpu_simgrid_get_host_by_name(name);
  1430. STARPU_ASSERT(host);
  1431. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1432. cuda_memcpy_peer = MSG_host_get_property_value(host, "memcpy_peer");
  1433. #endif /* SIMGRID */
  1434. if (
  1435. #ifdef STARPU_SIMGRID
  1436. cuda_memcpy_peer && atoll(cuda_memcpy_peer)
  1437. #elif defined(HAVE_CUDA_MEMCPY_PEER)
  1438. 1
  1439. #else /* MEMCPY_PEER */
  1440. 0
  1441. #endif /* MEMCPY_PEER */
  1442. )
  1443. {
  1444. unsigned worker2;
  1445. for (worker2 = 0; worker2 < worker; worker2++)
  1446. {
  1447. struct _starpu_worker *workerarg2 = &config->workers[worker2];
  1448. int devid2 = workerarg2->devid;
  1449. if (workerarg2->arch == STARPU_CUDA_WORKER)
  1450. {
  1451. unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
  1452. _starpu_cuda_bus_ids[devid2][devid] = _starpu_register_bus(memory_node2, memory_node);
  1453. _starpu_cuda_bus_ids[devid][devid2] = _starpu_register_bus(memory_node, memory_node2);
  1454. #ifndef STARPU_SIMGRID
  1455. #if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
  1456. {
  1457. hwloc_obj_t obj, obj2, ancestor;
  1458. obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid);
  1459. obj2 = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid2);
  1460. ancestor = hwloc_get_common_ancestor_obj(config->topology.hwtopology, obj, obj2);
  1461. if (ancestor)
  1462. {
  1463. struct _starpu_hwloc_userdata *data = ancestor->userdata;
  1464. #ifdef STARPU_VERBOSE
  1465. {
  1466. char name[64];
  1467. hwloc_obj_type_snprintf(name, sizeof(name), ancestor, 0);
  1468. _STARPU_DEBUG("CUDA%u and CUDA%u are linked through %s, along %u GPUs\n", devid, devid2, name, data->ngpus);
  1469. }
  1470. #endif
  1471. starpu_bus_set_ngpus(_starpu_cuda_bus_ids[devid2][devid], data->ngpus);
  1472. starpu_bus_set_ngpus(_starpu_cuda_bus_ids[devid][devid2], data->ngpus);
  1473. }
  1474. }
  1475. #endif
  1476. #endif
  1477. }
  1478. }
  1479. }
  1480. }
  1481. _starpu_memory_node_add_nworkers(memory_node);
  1482. #ifdef STARPU_SIMGRID
  1483. starpu_pthread_queue_register(&workerarg->set->workers[0].wait, &_starpu_simgrid_transfer_queue[memory_node]);
  1484. starpu_pthread_queue_register(&workerarg->set->workers[0].wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]);
  1485. #endif
  1486. break;
  1487. #endif
  1488. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  1489. case STARPU_OPENCL_WORKER:
  1490. #ifndef STARPU_SIMGRID
  1491. if (may_bind_automatically[STARPU_OPENCL_WORKER])
  1492. {
  1493. /* StarPU is allowed to bind threads automatically */
  1494. preferred_binding = _starpu_get_opencl_affinity_vector(devid);
  1495. npreferred = config->topology.nhwpus;
  1496. }
  1497. #endif /* SIMGRID */
  1498. if (opencl_init[devid])
  1499. {
  1500. memory_node = opencl_memory_nodes[devid];
  1501. #ifndef STARPU_SIMGRID
  1502. workerarg->bindid = opencl_bindid[devid];
  1503. #endif /* SIMGRID */
  1504. }
  1505. else
  1506. {
  1507. opencl_init[devid] = 1;
  1508. workerarg->bindid = opencl_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1509. memory_node = opencl_memory_nodes[devid] = _starpu_memory_node_register(STARPU_OPENCL_RAM, devid);
  1510. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1511. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1512. #ifdef STARPU_SIMGRID
  1513. snprintf(name, sizeof(name), "OpenCL%d", devid);
  1514. host = _starpu_simgrid_get_host_by_name(name);
  1515. STARPU_ASSERT(host);
  1516. _starpu_simgrid_memory_node_set_host(memory_node, host);
  1517. #endif /* SIMGRID */
  1518. }
  1519. _starpu_memory_node_add_nworkers(memory_node);
  1520. #ifdef STARPU_SIMGRID
  1521. starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_transfer_queue[memory_node]);
  1522. starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]);
  1523. #endif
  1524. break;
  1525. #endif
  1526. #ifdef STARPU_USE_MIC
  1527. case STARPU_MIC_WORKER:
  1528. if (mic_init[devid])
  1529. {
  1530. memory_node = mic_memory_nodes[devid];
  1531. }
  1532. else
  1533. {
  1534. mic_init[devid] = 1;
  1535. /* TODO */
  1536. //if (may_bind_automatically)
  1537. //{
  1538. // /* StarPU is allowed to bind threads automatically */
  1539. // preferred_binding = _starpu_get_mic_affinity_vector(devid);
  1540. // npreferred = config->topology.nhwpus;
  1541. //}
  1542. mic_bindid[devid] = _starpu_get_next_bindid(config, preferred_binding, npreferred);
  1543. memory_node = mic_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MIC_RAM, devid);
  1544. _starpu_register_bus(STARPU_MAIN_RAM, memory_node);
  1545. _starpu_register_bus(memory_node, STARPU_MAIN_RAM);
  1546. }
  1547. workerarg->bindid = mic_bindid[devid];
  1548. _starpu_memory_node_add_nworkers(memory_node);
  1549. #ifdef STARPU_SIMGRID
  1550. starpu_pthread_queue_register(&workerarg->set->workers[0].wait, &_starpu_simgrid_transfer_queue[memory_node]);
  1551. starpu_pthread_queue_register(&workerarg->set->workers[0].wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]);
  1552. #endif
  1553. break;
  1554. #endif /* STARPU_USE_MIC */
  1555. #ifdef STARPU_USE_SCC
  1556. case STARPU_SCC_WORKER:
  1557. {
  1558. /* Node 0 represents the SCC shared memory when we're on SCC. */
  1559. struct _starpu_memory_node_descr *descr = _starpu_memory_node_get_description();
  1560. descr->nodes[ram_memory_node] = STARPU_SCC_SHM;
  1561. memory_node = ram_memory_node;
  1562. _starpu_memory_node_add_nworkers(memory_node);
  1563. #ifdef STARPU_SIMGRID
  1564. starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_transfer_queue[memory_node]);
  1565. starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]);
  1566. #endif
  1567. }
  1568. break;
  1569. #endif
  1570. default:
  1571. STARPU_ABORT();
  1572. }
  1573. workerarg->memory_node = memory_node;
  1574. _STARPU_DEBUG("worker %d type %d devid %d bound to cpu %d, STARPU memory node %d\n", worker, workerarg->arch, devid, workerarg->bindid, memory_node);
  1575. #ifdef __GLIBC__
  1576. if (workerarg->bindid != -1)
  1577. {
  1578. /* Save the initial cpuset */
  1579. CPU_ZERO(&workerarg->cpu_set);
  1580. CPU_SET(workerarg->bindid, &workerarg->cpu_set);
  1581. }
  1582. #endif /* __GLIBC__ */
  1583. #ifdef STARPU_HAVE_HWLOC
  1584. if (workerarg->bindid == -1)
  1585. {
  1586. workerarg->hwloc_cpu_set = hwloc_bitmap_alloc();
  1587. }
  1588. else
  1589. {
  1590. /* Put the worker descriptor in the userdata field of the
  1591. * hwloc object describing the CPU */
  1592. hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology,
  1593. config->pu_depth,
  1594. workerarg->bindid);
  1595. struct _starpu_hwloc_userdata *data = worker_obj->userdata;
  1596. if (data->worker_list == NULL)
  1597. data->worker_list = _starpu_worker_list_new();
  1598. _starpu_worker_list_push_front(data->worker_list, workerarg);
  1599. /* Clear the cpu set and set the cpu */
  1600. workerarg->hwloc_cpu_set = hwloc_bitmap_dup (worker_obj->cpuset);
  1601. }
  1602. #endif
  1603. if (workerarg->bindid != -1)
  1604. {
  1605. bindid = workerarg->bindid;
  1606. unsigned old_nbindid = config->nbindid;
  1607. if (bindid >= old_nbindid)
  1608. {
  1609. /* More room needed */
  1610. if (!old_nbindid)
  1611. config->nbindid = STARPU_NMAXWORKERS;
  1612. else
  1613. config->nbindid = 2 * old_nbindid;
  1614. _STARPU_REALLOC(config->bindid_workers, config->nbindid * sizeof(config->bindid_workers[0]));
  1615. memset(&config->bindid_workers[old_nbindid], 0, (config->nbindid - old_nbindid) * sizeof(config->bindid_workers[0]));
  1616. }
  1617. /* Add slot for this worker */
  1618. /* Don't care about amortizing the cost, there are usually very few workers sharing the same bindid */
  1619. config->bindid_workers[bindid].nworkers++;
  1620. _STARPU_REALLOC(config->bindid_workers[bindid].workerids, config->bindid_workers[bindid].nworkers * sizeof(config->bindid_workers[bindid].workerids[0]));
  1621. config->bindid_workers[bindid].workerids[config->bindid_workers[bindid].nworkers-1] = worker;
  1622. }
  1623. }
  1624. #ifdef STARPU_SIMGRID
  1625. _starpu_simgrid_count_ngpus();
  1626. #else
  1627. #ifdef STARPU_HAVE_HWLOC
  1628. _starpu_topology_count_ngpus(hwloc_get_root_obj(config->topology.hwtopology));
  1629. #endif
  1630. #endif
  1631. }
  1632. int
  1633. _starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
  1634. {
  1635. int ret;
  1636. unsigned i;
  1637. ret = _starpu_init_machine_config(config, no_mp_config);
  1638. if (ret)
  1639. return ret;
  1640. /* for the data management library */
  1641. _starpu_memory_nodes_init();
  1642. _starpu_datastats_init();
  1643. _starpu_init_workers_binding(config, no_mp_config);
  1644. config->cpus_nodeid = -1;
  1645. config->cuda_nodeid = -1;
  1646. config->opencl_nodeid = -1;
  1647. config->mic_nodeid = -1;
  1648. config->scc_nodeid = -1;
  1649. for (i = 0; i < starpu_worker_get_count(); i++)
  1650. {
  1651. switch (starpu_worker_get_type(i))
  1652. {
  1653. case STARPU_CPU_WORKER:
  1654. if (config->cpus_nodeid == -1)
  1655. config->cpus_nodeid = starpu_worker_get_memory_node(i);
  1656. else if (config->cpus_nodeid != (int) starpu_worker_get_memory_node(i))
  1657. config->cpus_nodeid = -2;
  1658. break;
  1659. case STARPU_CUDA_WORKER:
  1660. if (config->cuda_nodeid == -1)
  1661. config->cuda_nodeid = starpu_worker_get_memory_node(i);
  1662. else if (config->cuda_nodeid != (int) starpu_worker_get_memory_node(i))
  1663. config->cuda_nodeid = -2;
  1664. break;
  1665. case STARPU_OPENCL_WORKER:
  1666. if (config->opencl_nodeid == -1)
  1667. config->opencl_nodeid = starpu_worker_get_memory_node(i);
  1668. else if (config->opencl_nodeid != (int) starpu_worker_get_memory_node(i))
  1669. config->opencl_nodeid = -2;
  1670. break;
  1671. case STARPU_MIC_WORKER:
  1672. if (config->mic_nodeid == -1)
  1673. config->mic_nodeid = starpu_worker_get_memory_node(i);
  1674. else if (config->mic_nodeid != (int) starpu_worker_get_memory_node(i))
  1675. config->mic_nodeid = -2;
  1676. break;
  1677. case STARPU_SCC_WORKER:
  1678. if (config->scc_nodeid == -1)
  1679. config->scc_nodeid = starpu_worker_get_memory_node(i);
  1680. else if (config->scc_nodeid != (int) starpu_worker_get_memory_node(i))
  1681. config->scc_nodeid = -2;
  1682. break;
  1683. case STARPU_ANY_WORKER:
  1684. STARPU_ASSERT(0);
  1685. }
  1686. }
  1687. return 0;
  1688. }
  1689. void _starpu_destroy_topology(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED)
  1690. {
  1691. #ifdef STARPU_USE_MIC
  1692. _starpu_deinit_mp_config(config);
  1693. #endif
  1694. /* cleanup StarPU internal data structures */
  1695. _starpu_memory_nodes_deinit();
  1696. _starpu_destroy_machine_config(config);
  1697. }
  1698. void
  1699. starpu_topology_print (FILE *output)
  1700. {
  1701. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1702. struct _starpu_machine_topology *topology = &config->topology;
  1703. unsigned pu;
  1704. unsigned worker;
  1705. unsigned nworkers = starpu_worker_get_count();
  1706. unsigned ncombinedworkers = topology->ncombinedworkers;
  1707. unsigned nthreads_per_core = topology->nhwpus / topology->nhwcpus;
  1708. for (pu = 0; pu < topology->nhwpus; pu++)
  1709. {
  1710. if ((pu % nthreads_per_core) == 0)
  1711. fprintf(output, "core %u", pu / nthreads_per_core);
  1712. fprintf(output, "\tPU %u\t", pu);
  1713. for (worker = 0;
  1714. worker < nworkers + ncombinedworkers;
  1715. worker++)
  1716. {
  1717. if (worker < nworkers)
  1718. {
  1719. struct _starpu_worker *workerarg = &config->workers[worker];
  1720. if (workerarg->bindid == (int) pu)
  1721. {
  1722. char name[256];
  1723. starpu_worker_get_name (worker, name,
  1724. sizeof(name));
  1725. fprintf(output, "%s\t", name);
  1726. }
  1727. }
  1728. else
  1729. {
  1730. int worker_size, i;
  1731. int *combined_workerid;
  1732. starpu_combined_worker_get_description(worker, &worker_size, &combined_workerid);
  1733. for (i = 0; i < worker_size; i++)
  1734. {
  1735. if (topology->workers_bindid[combined_workerid[i]] == pu)
  1736. fprintf(output, "comb %u\t", worker-nworkers);
  1737. }
  1738. }
  1739. }
  1740. fprintf(output, "\n");
  1741. }
  1742. }