starpu_clusters_create.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2015-2016 Université de Bordeaux
  4. * Copyright (C) 2015 INRIA
  5. * Copyright (C) 2015, 2016 CNRS
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /* This file creates an interface to manage clustering resources and make use
  19. * of parallel tasks. It entirely depends on the hwloc software. */
  20. #include <util/starpu_clusters_create.h>
  21. starpu_binding_function _starpu_cluster_type_get_func(starpu_cluster_types type)
  22. {
  23. starpu_binding_function prologue_func;
  24. switch (type)
  25. {
  26. case OPENMP:
  27. prologue_func = &starpu_openmp_prologue;
  28. break;
  29. case INTEL_OPENMP_MKL:
  30. prologue_func = &starpu_intel_openmp_mkl_prologue;
  31. break;
  32. #ifdef STARPU_MKL
  33. case GNU_OPENMP_MKL:
  34. prologue_func = &starpu_gnu_openmp_mkl_prologue;
  35. break;
  36. #endif
  37. default:
  38. prologue_func = NULL;
  39. }
  40. return prologue_func;
  41. }
  42. void starpu_openmp_prologue(void *sched_ctx_id)
  43. {
  44. int sched_ctx = *(int*)sched_ctx_id;
  45. int *cpuids = NULL;
  46. int ncpuids = 0;
  47. int workerid = starpu_worker_get_id_check();
  48. if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
  49. {
  50. starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
  51. omp_set_num_threads(ncpuids);
  52. #pragma omp parallel
  53. {
  54. starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
  55. }
  56. free(cpuids);
  57. }
  58. return;
  59. }
  60. #ifdef STARPU_MKL
  61. void starpu_gnu_openmp_mkl_prologue(void *sched_ctx_id)
  62. {
  63. int sched_ctx = *(int*)sched_ctx_id;
  64. int *cpuids = NULL;
  65. int ncpuids = 0;
  66. int workerid = starpu_worker_get_id();
  67. if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
  68. {
  69. starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
  70. omp_set_num_threads(ncpuids);
  71. mkl_set_num_threads(ncpuids);
  72. mkl_set_dynamic(0);
  73. #pragma omp parallel
  74. {
  75. starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
  76. }
  77. free(cpuids);
  78. }
  79. return;
  80. }
  81. #endif
  82. /* Main interface function to create a cluster view of the machine.
  83. * Its job is to capture what the user wants and store it in a standard view. */
  84. struct starpu_cluster_machine *starpu_cluster_machine(hwloc_obj_type_t cluster_level, ...)
  85. {
  86. va_list varg_list;
  87. int arg_type;
  88. struct _starpu_cluster_parameters *params;
  89. struct starpu_cluster_machine *machine = malloc(sizeof(struct starpu_cluster_machine));
  90. machine->params = malloc(sizeof(struct _starpu_cluster_parameters));
  91. machine->id = STARPU_NMAX_SCHED_CTXS;
  92. machine->groups = _starpu_cluster_group_list_new();
  93. machine->nclusters = 0;
  94. machine->ngroups = 0;
  95. machine->topology = NULL;
  96. _starpu_cluster_init_parameters(machine->params);
  97. params = machine->params;
  98. va_start(varg_list, cluster_level);
  99. while ((arg_type = va_arg(varg_list, int)) != 0)
  100. {
  101. if (arg_type == STARPU_CLUSTER_MIN_NB)
  102. {
  103. params->min_nb = va_arg(varg_list, int);
  104. if (params->min_nb <= 0)
  105. _STARPU_DISP("Caution min number of contexts shouldn't be negative or null\n");
  106. }
  107. else if (arg_type == STARPU_CLUSTER_MAX_NB)
  108. {
  109. params->max_nb = va_arg(varg_list, int);
  110. if (params->max_nb <= 0)
  111. _STARPU_DISP("Caution max number of contexts shouldn't be negative or null\n");
  112. }
  113. else if (arg_type == STARPU_CLUSTER_NB)
  114. {
  115. params->nb = va_arg(varg_list, int);
  116. if (params->nb <= 0)
  117. _STARPU_DISP("Caution number of contexts shouldn't be negative or null\n");
  118. }
  119. else if (arg_type == STARPU_CLUSTER_POLICY_NAME)
  120. {
  121. params->sched_policy_name = va_arg(varg_list, char*);
  122. }
  123. else if (arg_type == STARPU_CLUSTER_POLICY_STRUCT)
  124. {
  125. params->sched_policy_struct = va_arg(varg_list,
  126. struct starpu_sched_policy*);
  127. }
  128. else if (arg_type == STARPU_CLUSTER_KEEP_HOMOGENEOUS)
  129. {
  130. params->keep_homogeneous = va_arg(varg_list, int); /* 0=off, other=on */
  131. }
  132. else if (arg_type == STARPU_CLUSTER_PREFERE_MIN)
  133. {
  134. params->prefere_min = va_arg(varg_list, int); /* 0=off, other=on */
  135. }
  136. else if (arg_type == STARPU_CLUSTER_CREATE_FUNC)
  137. {
  138. params->create_func = va_arg(varg_list, void (*)(void*));
  139. }
  140. else if (arg_type == STARPU_CLUSTER_CREATE_FUNC_ARG)
  141. {
  142. params->create_func_arg = va_arg(varg_list, void*);
  143. }
  144. else if (arg_type == STARPU_CLUSTER_TYPE)
  145. {
  146. params->type = va_arg(varg_list, starpu_cluster_types);
  147. }
  148. else if (arg_type == STARPU_CLUSTER_AWAKE_WORKERS)
  149. {
  150. params->awake_workers = va_arg(varg_list, unsigned);
  151. }
  152. else if (arg_type == STARPU_CLUSTER_PARTITION_ONE)
  153. {
  154. struct _starpu_cluster_group *group = _starpu_cluster_group_new();
  155. _starpu_cluster_group_init(group, machine);
  156. _starpu_cluster_group_list_push_back(machine->groups, group);
  157. params = group->params;
  158. }
  159. else if (arg_type == STARPU_CLUSTER_NEW)
  160. {
  161. struct _starpu_cluster *cluster = _starpu_cluster_new();
  162. struct _starpu_cluster_group *group = _starpu_cluster_group_list_back(machine->groups);
  163. if (group == NULL)
  164. {
  165. group = _starpu_cluster_group_new();
  166. _starpu_cluster_group_init(group, machine);
  167. _starpu_cluster_group_list_push_back(machine->groups, group);
  168. }
  169. _starpu_cluster_init(cluster, group);
  170. _starpu_cluster_list_push_back(group->clusters, cluster);
  171. params = cluster->params;
  172. }
  173. else if (arg_type == STARPU_CLUSTER_NCORES)
  174. {
  175. struct _starpu_cluster_group *group = _starpu_cluster_group_list_back(machine->groups);
  176. struct _starpu_cluster *cluster =_starpu_cluster_list_back(group->clusters);
  177. cluster->ncores = va_arg(varg_list, unsigned);
  178. }
  179. else
  180. {
  181. STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
  182. }
  183. }
  184. va_end(varg_list);
  185. switch(cluster_level)
  186. {
  187. case HWLOC_OBJ_MISC:
  188. case HWLOC_OBJ_BRIDGE:
  189. case HWLOC_OBJ_PCI_DEVICE:
  190. case HWLOC_OBJ_OS_DEVICE:
  191. STARPU_ABORT_MSG("Cluster aggregation isn't supported for level %s\n",
  192. hwloc_obj_type_string(cluster_level));
  193. break;
  194. default: /* others can pass */
  195. break;
  196. }
  197. if (_starpu_cluster_machine(cluster_level, machine) == -ENODEV)
  198. {
  199. starpu_uncluster_machine(machine);
  200. machine = NULL;
  201. }
  202. return machine;
  203. }
  204. int starpu_uncluster_machine(struct starpu_cluster_machine *machine)
  205. {
  206. if (machine == NULL)
  207. return -1;
  208. struct _starpu_cluster_group *g, *tmp;
  209. struct _starpu_cluster_group_list *group_list = machine->groups;
  210. if (machine->id != STARPU_NMAX_SCHED_CTXS)
  211. starpu_sched_ctx_delete(machine->id);
  212. g = _starpu_cluster_group_list_begin(group_list);
  213. while (g != _starpu_cluster_group_list_end(group_list))
  214. {
  215. tmp = g;
  216. g = _starpu_cluster_group_list_next(g);
  217. _starpu_cluster_group_remove(group_list, tmp);
  218. }
  219. _starpu_cluster_group_list_delete(group_list);
  220. if (machine->topology != NULL)
  221. hwloc_topology_destroy(machine->topology);
  222. free(machine->params);
  223. free(machine);
  224. starpu_sched_ctx_set_context(0);
  225. return 0;
  226. }
  227. int starpu_cluster_print(struct starpu_cluster_machine *clusters)
  228. {
  229. if (clusters == NULL)
  230. return -1;
  231. int cnt, w;
  232. struct _starpu_cluster_group *group;
  233. struct _starpu_cluster *cluster;
  234. printf("Number of clusters created: %d\n", clusters->nclusters);
  235. cnt=0;
  236. for (group = _starpu_cluster_group_list_begin(clusters->groups);
  237. group != _starpu_cluster_group_list_end(clusters->groups);
  238. group = _starpu_cluster_group_list_next(group))
  239. {
  240. for (cluster = _starpu_cluster_list_begin(group->clusters);
  241. cluster != _starpu_cluster_list_end(group->clusters);
  242. cluster = _starpu_cluster_list_next(cluster))
  243. {
  244. printf("Cluster %d contains the following logical indexes:\n\t", cnt);
  245. for (w=0; w < cluster->ncores; w++)
  246. printf("%d ", cluster->cores[w]);
  247. printf("\n");
  248. cnt++;
  249. }
  250. }
  251. return 0;
  252. }
  253. void _starpu_cluster_create(struct _starpu_cluster *cluster)
  254. {
  255. unsigned main_ctx_id = cluster->father->father->id;
  256. if (cluster->params->awake_workers)
  257. cluster->id = starpu_sched_ctx_create(cluster->workerids, cluster->ncores,
  258. "clusters", STARPU_SCHED_CTX_NESTED,
  259. main_ctx_id,
  260. STARPU_SCHED_CTX_AWAKE_WORKERS,
  261. 0);
  262. else
  263. cluster->id = starpu_sched_ctx_create(cluster->workerids, cluster->ncores,
  264. "clusters", STARPU_SCHED_CTX_NESTED,
  265. main_ctx_id,
  266. 0);
  267. /* cluster priority can be the lowest, so let's enforce it */
  268. starpu_sched_ctx_set_priority(cluster->workerids, cluster->ncores, cluster->id, 0);
  269. return;
  270. }
  271. void _starpu_cluster_group_create(struct _starpu_cluster_group *group)
  272. {
  273. struct _starpu_cluster *c;
  274. for (c = _starpu_cluster_list_begin(group->clusters) ;
  275. c != _starpu_cluster_list_end(group->clusters) ;
  276. c = _starpu_cluster_list_next(c))
  277. {
  278. if (c->ncores == 0)
  279. continue;
  280. _starpu_cluster_create(c);
  281. if (!c->params->awake_workers)
  282. _starpu_cluster_bind(c);
  283. }
  284. return;
  285. }
  286. int _starpu_cluster_bind(struct _starpu_cluster *cluster)
  287. {
  288. starpu_binding_function func;
  289. void *func_arg;
  290. if (cluster->params->create_func)
  291. {
  292. func = cluster->params->create_func;
  293. func_arg = (void*) cluster->params->create_func_arg;
  294. }
  295. else
  296. {
  297. func = _starpu_cluster_type_get_func(cluster->params->type);
  298. func_arg = (void*) &cluster->id;
  299. }
  300. return starpu_task_insert(&_starpu_cluster_bind_cl,
  301. STARPU_SCHED_CTX, cluster->id,
  302. STARPU_POSSIBLY_PARALLEL, 1,
  303. STARPU_PROLOGUE_CALLBACK_POP, func,
  304. STARPU_PROLOGUE_CALLBACK_POP_ARG, func_arg,
  305. 0);
  306. }
  307. void _starpu_cluster_group_init(struct _starpu_cluster_group *group,
  308. struct starpu_cluster_machine *father)
  309. {
  310. group->id = 0;
  311. group->nclusters = 0;
  312. group->clusters = _starpu_cluster_list_new();
  313. group->father = father;
  314. group->params = malloc(sizeof(struct _starpu_cluster_parameters));
  315. _starpu_cluster_copy_parameters(group->params,
  316. father->params);
  317. return;
  318. }
  319. void _starpu_cluster_init(struct _starpu_cluster *cluster,
  320. struct _starpu_cluster_group *father)
  321. {
  322. cluster->id = STARPU_NMAX_SCHED_CTXS;
  323. cluster->cpuset = hwloc_bitmap_alloc();
  324. cluster->ncores = 0;
  325. cluster->cores = NULL;
  326. cluster->workerids = NULL;
  327. cluster->father = father;
  328. cluster->params = malloc(sizeof(struct _starpu_cluster_parameters));
  329. _starpu_cluster_copy_parameters(cluster->params,
  330. father->params);
  331. }
  332. int _starpu_cluster_remove(struct _starpu_cluster_list *cluster_list,
  333. struct _starpu_cluster *cluster)
  334. {
  335. if (cluster && cluster->id != STARPU_NMAX_SCHED_CTXS)
  336. starpu_sched_ctx_delete(cluster->id);
  337. else
  338. return -1;
  339. if (cluster->cores != NULL)
  340. free(cluster->cores);
  341. if (cluster->workerids != NULL)
  342. free(cluster->workerids);
  343. hwloc_bitmap_free(cluster->cpuset);
  344. free(cluster->params);
  345. _starpu_cluster_list_erase(cluster_list, cluster);
  346. _starpu_cluster_delete(cluster);
  347. return 0;
  348. }
  349. int _starpu_cluster_group_remove(struct _starpu_cluster_group_list *group_list,
  350. struct _starpu_cluster_group *group)
  351. {
  352. struct _starpu_cluster *tmp;
  353. struct _starpu_cluster_list *cluster_list = group->clusters;
  354. struct _starpu_cluster *c = _starpu_cluster_list_begin(cluster_list);
  355. while (c != _starpu_cluster_list_end(cluster_list))
  356. {
  357. tmp = c;
  358. c = _starpu_cluster_list_next(c);
  359. _starpu_cluster_remove(cluster_list, tmp);
  360. }
  361. _starpu_cluster_list_delete(cluster_list);
  362. free(group->params);
  363. _starpu_cluster_group_list_erase(group_list, group);
  364. _starpu_cluster_group_delete(group);
  365. return 0;
  366. }
  367. void _starpu_cluster_init_parameters(struct _starpu_cluster_parameters *params)
  368. {
  369. params->min_nb = 0;
  370. params->max_nb = 0;
  371. params->nb = 0;
  372. params->sched_policy_name = NULL;
  373. params->sched_policy_struct = NULL;
  374. params->keep_homogeneous = 0;
  375. params->prefere_min = 0;
  376. params->create_func = NULL;
  377. params->create_func_arg = NULL;
  378. params->type = OPENMP;
  379. params->awake_workers = 0;
  380. return;
  381. }
  382. void _starpu_cluster_copy_parameters(struct _starpu_cluster_parameters *dst,
  383. struct _starpu_cluster_parameters *src)
  384. {
  385. dst->min_nb = src->min_nb;
  386. dst->max_nb = src->max_nb;
  387. dst->nb = src->nb;
  388. dst->sched_policy_name = src->sched_policy_name;
  389. dst->sched_policy_struct = src->sched_policy_struct;
  390. dst->keep_homogeneous = src->keep_homogeneous;
  391. dst->prefere_min = src->prefere_min;
  392. dst->create_func = src->create_func;
  393. dst->create_func_arg = src->create_func_arg;
  394. dst->type = src->type;
  395. dst->awake_workers = src->awake_workers;
  396. return;
  397. }
  398. /* Considering the resources and parameters, how many clusters should we take? */
  399. int _starpu_cluster_analyze_parameters(struct _starpu_cluster_parameters *params, int npus)
  400. {
  401. int nb_clusters = 1, j;
  402. if (params->nb)
  403. {
  404. nb_clusters = params->nb <= npus?params->nb : npus;
  405. }
  406. else if (params->min_nb && params->max_nb)
  407. {
  408. if (!params->keep_homogeneous)
  409. {
  410. if (params->prefere_min)
  411. nb_clusters = params->min_nb <= npus? params->min_nb : npus;
  412. else
  413. nb_clusters = params->max_nb <= npus? params->max_nb : npus;
  414. }
  415. else
  416. {
  417. int begin = params->prefere_min? params->min_nb:params->max_nb;
  418. int end = params->prefere_min? params->max_nb+1:params->min_nb-1;
  419. j=begin;
  420. int best = 0, second_best = 0, cpu_loss = INT_MAX;
  421. while (j != end)
  422. {
  423. if (npus%j == 0)
  424. {
  425. best = j;
  426. break;
  427. }
  428. if (npus%j < cpu_loss)
  429. {
  430. cpu_loss = npus%j;
  431. second_best = j;
  432. }
  433. j = params->prefere_min? j+1:j-1;
  434. }
  435. if (best)
  436. nb_clusters = best;
  437. else if (second_best)
  438. nb_clusters = second_best;
  439. }
  440. }
  441. return nb_clusters;
  442. }
  443. int _starpu_cluster_machine(hwloc_obj_type_t cluster_level,
  444. struct starpu_cluster_machine *machine)
  445. {
  446. struct _starpu_cluster_group *g;
  447. int ret = 0;
  448. if ((ret = _starpu_cluster_topology(cluster_level, machine)))
  449. return ret;
  450. if (machine->params->sched_policy_struct != NULL)
  451. {
  452. machine->id = starpu_sched_ctx_create(NULL, -1, "main sched ctx",
  453. STARPU_SCHED_CTX_POLICY_STRUCT,
  454. machine->params->sched_policy_struct,
  455. 0);
  456. }
  457. else if (machine->params->sched_policy_name != NULL)
  458. {
  459. machine->id = starpu_sched_ctx_create(NULL, -1, "main sched ctx",
  460. STARPU_SCHED_CTX_POLICY_NAME,
  461. machine->params->sched_policy_name,
  462. 0);
  463. }
  464. else
  465. {
  466. struct starpu_sched_policy *sched_policy;
  467. struct _starpu_sched_ctx *global_ctx =_starpu_get_sched_ctx_struct(STARPU_GLOBAL_SCHED_CTX);
  468. sched_policy = _starpu_get_sched_policy(global_ctx);
  469. machine->id = starpu_sched_ctx_create(NULL, -1, "main sched ctx",
  470. STARPU_SCHED_CTX_POLICY_STRUCT,
  471. sched_policy, 0);
  472. }
  473. for (g = _starpu_cluster_group_list_begin(machine->groups) ;
  474. g != _starpu_cluster_group_list_end(machine->groups) ;
  475. g = _starpu_cluster_group_list_next(g))
  476. _starpu_cluster_group_create(g);
  477. starpu_task_wait_for_all();
  478. starpu_sched_ctx_set_context(&machine->id);
  479. return ret;
  480. }
  481. int _starpu_cluster_topology(hwloc_obj_type_t cluster_level,
  482. struct starpu_cluster_machine *machine)
  483. {
  484. int w;
  485. hwloc_topology_t topology;
  486. hwloc_cpuset_t avail_cpus;
  487. int nworkers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER);
  488. if (nworkers == 0)
  489. return -ENODEV;
  490. int *workers = (int*) malloc(sizeof(int) * nworkers);
  491. starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, nworkers);
  492. struct _starpu_machine_config *config = _starpu_get_machine_config();
  493. STARPU_ASSERT_MSG(config->topology.hwtopology != NULL, "STARPU_CLUSTER: You "
  494. "need to call starpu_init() or make sure to activate hwloc.");
  495. hwloc_topology_dup(&topology, config->topology.hwtopology);
  496. avail_cpus = hwloc_bitmap_alloc();
  497. hwloc_bitmap_zero(avail_cpus);
  498. for (w = 0; w < nworkers ; w++)
  499. {
  500. struct _starpu_worker *worker_str = _starpu_get_worker_struct(workers[w]);
  501. hwloc_bitmap_or(avail_cpus, avail_cpus, worker_str->hwloc_cpu_set);
  502. }
  503. hwloc_topology_restrict(topology, avail_cpus, 0);
  504. free(workers);
  505. /* Use new topology to fill in the cluster list */
  506. machine->topology = topology;
  507. _starpu_cluster_group(cluster_level, machine);
  508. hwloc_bitmap_free(avail_cpus);
  509. return 0;
  510. }
  511. void _starpu_cluster_group(hwloc_obj_type_t cluster_level,
  512. struct starpu_cluster_machine *machine)
  513. {
  514. unsigned nb_objects;
  515. unsigned i;
  516. struct _starpu_cluster_group *group = NULL;
  517. if (machine->groups == NULL)
  518. machine->groups = _starpu_cluster_group_list_new();
  519. nb_objects = hwloc_get_nbobjs_by_type(machine->topology, cluster_level);
  520. if (nb_objects == 0)
  521. return;
  522. /* XXX: handle nb_objects == -1 */
  523. group = _starpu_cluster_group_list_begin(machine->groups);
  524. for (i = 0 ; i < nb_objects ; i++)
  525. {
  526. hwloc_obj_t cluster_obj = hwloc_get_obj_by_type(machine->topology,
  527. cluster_level, i);
  528. if (group == NULL)
  529. {
  530. group = _starpu_cluster_group_new();
  531. _starpu_cluster_group_init(group, machine);
  532. _starpu_cluster_group_list_push_back(machine->groups, group);
  533. }
  534. group->group_obj = cluster_obj;
  535. _starpu_cluster(group);
  536. machine->ngroups++;
  537. machine->nclusters += group->nclusters;
  538. group = _starpu_cluster_group_list_next(group);
  539. }
  540. return;
  541. }
  542. void _starpu_cluster(struct _starpu_cluster_group *group)
  543. {
  544. int i, avail_pus, npus, npreset=0;
  545. struct _starpu_cluster *cluster;
  546. npus = hwloc_get_nbobjs_inside_cpuset_by_type(group->father->topology,
  547. group->group_obj->cpuset,
  548. HWLOC_OBJ_PU);
  549. /* Preset clusters */
  550. avail_pus = npus;
  551. for (cluster=_starpu_cluster_list_begin(group->clusters);
  552. cluster!=_starpu_cluster_list_end(group->clusters);
  553. cluster=_starpu_cluster_list_next(cluster))
  554. {
  555. if (cluster->ncores > avail_pus)
  556. cluster->ncores = avail_pus;
  557. else if (avail_pus == 0)
  558. cluster->ncores = 0;
  559. if (cluster->ncores > 0)
  560. {
  561. cluster->cores = malloc(sizeof(int)*cluster->ncores);
  562. cluster->workerids = malloc(sizeof(int)*cluster->ncores);
  563. avail_pus -= cluster->ncores;
  564. npreset++;
  565. }
  566. }
  567. /* Automatic clusters */
  568. group->nclusters = _starpu_cluster_analyze_parameters(group->params, avail_pus);
  569. for (i=0 ; i<group->nclusters && avail_pus>0 ; i++)
  570. {
  571. if (cluster == NULL)
  572. {
  573. cluster = _starpu_cluster_new();
  574. _starpu_cluster_init(cluster, group);
  575. _starpu_cluster_list_push_back(group->clusters, cluster);
  576. }
  577. if (cluster->ncores != 0 && cluster->ncores > avail_pus)
  578. {
  579. cluster->ncores = avail_pus;
  580. }
  581. else
  582. {
  583. if (cluster->params->keep_homogeneous)
  584. cluster->ncores = avail_pus/(group->nclusters-i);
  585. else
  586. cluster->ncores = i==group->nclusters-1?
  587. avail_pus:
  588. avail_pus/(group->nclusters-i);
  589. }
  590. avail_pus -= cluster->ncores;
  591. cluster->cores = malloc(sizeof(int)*cluster->ncores);
  592. cluster->workerids = malloc(sizeof(int)*cluster->ncores);
  593. cluster = _starpu_cluster_list_next(cluster);
  594. }
  595. group->nclusters += npreset;
  596. cluster = _starpu_cluster_list_begin(group->clusters);
  597. int count = 0;
  598. static int starpu_cluster_warned = 0;
  599. for (i=0 ; i<npus ; i++)
  600. {
  601. hwloc_obj_t pu = hwloc_get_obj_inside_cpuset_by_type(group->father->topology,
  602. group->group_obj->cpuset,
  603. HWLOC_OBJ_PU, i);
  604. /* If we have more than one worker on this resource, let's add them too --
  605. even if it's bad (they'll all be boud on the same PU) */
  606. int size = 0, j;
  607. struct _starpu_worker *worker_str = _starpu_worker_list_front(pu->userdata);
  608. for (j = 0; j < _starpu_worker_list_size(pu->userdata) ; j++)
  609. {
  610. if (worker_str->arch == STARPU_CPU_WORKER)
  611. size++;
  612. worker_str = _starpu_worker_list_next(worker_str);
  613. }
  614. if (size > 1)
  615. {
  616. if (!starpu_cluster_warned)
  617. {
  618. _STARPU_DISP("STARPU CLUSTERS: Caution! It seems that you have"
  619. " multiple workers bound to the same PU. If you have"
  620. " multithreading on your cores it is greatly adviced"
  621. " to export STARPU_NTHREADS_PER_CORE=nb.");
  622. starpu_cluster_warned = 1;
  623. }
  624. cluster->ncores += size-1;
  625. cluster->cores = realloc(cluster->cores, sizeof(int)*cluster->ncores);
  626. cluster->workerids = realloc(cluster->workerids, sizeof(int)*cluster->ncores);
  627. }
  628. /* grab workerid list and return first cpu */
  629. worker_str = _starpu_worker_list_front(pu->userdata);
  630. if (worker_str)
  631. hwloc_bitmap_or(cluster->cpuset, cluster->cpuset,
  632. worker_str->hwloc_cpu_set);
  633. j = 0;
  634. while (worker_str)
  635. {
  636. if (worker_str->arch == STARPU_CPU_WORKER)
  637. {
  638. cluster->cores[count+j] = worker_str->bindid;
  639. cluster->workerids[count+j] = worker_str->workerid;
  640. j++;
  641. }
  642. worker_str = _starpu_worker_list_next(worker_str);
  643. }
  644. count+=size;
  645. if (cluster->ncores == count)
  646. {
  647. count = 0;
  648. cluster = _starpu_cluster_list_next(cluster);
  649. }
  650. }
  651. return;
  652. }