starpu_clusters_create.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2015 Université de Bordeaux
  4. * Copyright (C) 2015 INRIA
  5. * Copyright (C) 2015 CNRS
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /* This file creates an interface to manage clustering resources and make use
  19. * of parallel tasks. It entirely depends on the hwloc software. */
  20. #include "starpu_clusters_create.h"
  21. starpu_binding_function _starpu_cluster_type_get_func(starpu_cluster_types type)
  22. {
  23. starpu_binding_function prologue_func;
  24. switch (type)
  25. {
  26. case OPENMP:
  27. prologue_func = &starpu_openmp_prologue;
  28. break;
  29. case INTEL_OPENMP_MKL:
  30. prologue_func = &starpu_intel_openmp_mkl_prologue;
  31. break;
  32. #ifdef STARPU_MKL
  33. case GNU_OPENMP_MKL:
  34. prologue_func = &starpu_gnu_openmp_mkl_prologue;
  35. break;
  36. #endif
  37. default:
  38. prologue_func = NULL;
  39. }
  40. return prologue_func;
  41. }
  42. void starpu_openmp_prologue(void * sched_ctx_id)
  43. {
  44. int sched_ctx = *(int*)sched_ctx_id;
  45. int *cpuids = NULL;
  46. int ncpuids = 0;
  47. int workerid = starpu_worker_get_id();
  48. if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
  49. {
  50. starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
  51. omp_set_num_threads(ncpuids);
  52. #pragma omp parallel
  53. {
  54. starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
  55. }
  56. free(cpuids);
  57. }
  58. return;
  59. }
  60. #ifdef STARPU_MKL
  61. void starpu_gnu_openmp_mkl_prologue(void * sched_ctx_id)
  62. {
  63. int sched_ctx = *(int*)sched_ctx_id;
  64. int *cpuids = NULL;
  65. int ncpuids = 0;
  66. int workerid = starpu_worker_get_id();
  67. if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
  68. {
  69. starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids);
  70. omp_set_num_threads(ncpuids);
  71. mkl_set_num_threads(ncpuids);
  72. mkl_set_dynamic(0);
  73. #pragma omp parallel
  74. {
  75. starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]);
  76. }
  77. free(cpuids);
  78. }
  79. return;
  80. }
  81. #endif
  82. /* Main interface function to create a cluster view of the machine.
  83. * Its job is to capture what the user wants and store it in a standard view. */
  84. struct starpu_cluster_machine* starpu_cluster_machine(hwloc_obj_type_t cluster_level, ...)
  85. {
  86. va_list varg_list;
  87. int arg_type;
  88. struct _starpu_cluster_parameters *params;
  89. struct starpu_cluster_machine* machine = malloc(sizeof(struct starpu_cluster_machine));
  90. machine->params = malloc(sizeof(struct _starpu_cluster_parameters));
  91. machine->groups = _starpu_cluster_group_list_new();
  92. _starpu_cluster_init_parameters(machine->params);
  93. params = machine->params;
  94. va_start(varg_list, cluster_level);
  95. while ((arg_type = va_arg(varg_list, int)) != 0)
  96. {
  97. if (arg_type == STARPU_CLUSTER_MIN_NB)
  98. {
  99. params->min_nb = va_arg(varg_list, int);
  100. if (params->min_nb <= 0)
  101. fprintf(stderr, "Caution min number of contexts shouldn't "
  102. "be negative or null\n");
  103. }
  104. else if (arg_type == STARPU_CLUSTER_MAX_NB)
  105. {
  106. params->max_nb = va_arg(varg_list, int);
  107. if (params->max_nb <= 0)
  108. fprintf(stderr, "Caution max number of contexts shouldn't "
  109. "be negative or null\n");
  110. }
  111. else if (arg_type == STARPU_CLUSTER_NB)
  112. {
  113. params->nb = va_arg(varg_list, int);
  114. if (params->nb <= 0)
  115. fprintf(stderr, "Caution number of contexts shouldn't "
  116. "be negative or null\n");
  117. }
  118. else if (arg_type == STARPU_CLUSTER_POLICY_NAME)
  119. {
  120. params->sched_policy_name = va_arg(varg_list, char*);
  121. }
  122. else if (arg_type == STARPU_CLUSTER_POLICY_STRUCT)
  123. {
  124. params->sched_policy_struct = va_arg(varg_list,
  125. struct starpu_sched_policy*);
  126. }
  127. else if (arg_type == STARPU_CLUSTER_KEEP_HOMOGENEOUS)
  128. {
  129. params->keep_homogeneous = va_arg(varg_list, int); /* 0=off, other=on */
  130. }
  131. else if (arg_type == STARPU_CLUSTER_PREFERE_MIN)
  132. {
  133. params->prefere_min = va_arg(varg_list, int); /* 0=off, other=on */
  134. }
  135. else if (arg_type == STARPU_CLUSTER_CREATE_FUNC)
  136. {
  137. params->create_func = va_arg(varg_list, void (*)(void*));
  138. }
  139. else if (arg_type == STARPU_CLUSTER_CREATE_FUNC_ARG)
  140. {
  141. params->create_func_arg = va_arg(varg_list, void*);
  142. }
  143. else if (arg_type == STARPU_CLUSTER_TYPE)
  144. {
  145. params->type = va_arg(varg_list, starpu_cluster_types);
  146. }
  147. else if (arg_type == STARPU_CLUSTER_AWAKE_WORKERS)
  148. {
  149. params->awake_workers = va_arg(varg_list, unsigned);
  150. }
  151. else if (arg_type == STARPU_CLUSTER_PARTITION_ONE)
  152. {
  153. struct _starpu_cluster_group* group = _starpu_cluster_group_new();
  154. _starpu_cluster_group_init(group, machine);
  155. _starpu_cluster_group_list_push_back(machine->groups, group);
  156. params = group->params;
  157. }
  158. else if (arg_type == STARPU_CLUSTER_NEW)
  159. {
  160. struct _starpu_cluster* cluster = _starpu_cluster_new();
  161. struct _starpu_cluster_group* group = _starpu_cluster_group_list_back(machine->groups);
  162. if (group == NULL)
  163. {
  164. group = _starpu_cluster_group_new();
  165. _starpu_cluster_group_init(group, machine);
  166. _starpu_cluster_group_list_push_back(machine->groups, group);
  167. }
  168. _starpu_cluster_init(cluster, group);
  169. _starpu_cluster_list_push_back(group->clusters, cluster);
  170. params = cluster->params;
  171. }
  172. else if (arg_type == STARPU_CLUSTER_NCORES)
  173. {
  174. struct _starpu_cluster_group* group =
  175. _starpu_cluster_group_list_back(machine->groups);
  176. struct _starpu_cluster* cluster =
  177. _starpu_cluster_list_back(group->clusters);
  178. cluster->ncores = va_arg(varg_list, unsigned);
  179. }
  180. else
  181. {
  182. STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);
  183. }
  184. }
  185. va_end(varg_list);
  186. switch(cluster_level)
  187. {
  188. case HWLOC_OBJ_MISC:
  189. case HWLOC_OBJ_BRIDGE:
  190. case HWLOC_OBJ_PCI_DEVICE:
  191. case HWLOC_OBJ_OS_DEVICE:
  192. STARPU_ABORT_MSG("Cluster aggregation isn't supported for level %s\n",
  193. hwloc_obj_type_string(cluster_level));
  194. break;
  195. default: /* others can pass */
  196. break;
  197. }
  198. _starpu_cluster_machine(cluster_level, machine);
  199. return machine;
  200. }
  201. int starpu_uncluster_machine(struct starpu_cluster_machine* machine)
  202. {
  203. struct _starpu_cluster_group *g, *tmp;
  204. struct _starpu_cluster_group_list* group_list = machine->groups;
  205. starpu_sched_ctx_delete(machine->id);
  206. g = _starpu_cluster_group_list_begin(group_list);
  207. while (g != _starpu_cluster_group_list_end(group_list))
  208. {
  209. tmp = g;
  210. g = _starpu_cluster_group_list_next(g);
  211. _starpu_cluster_group_remove(group_list, tmp);
  212. }
  213. hwloc_topology_destroy(machine->topology);
  214. free(machine->params);
  215. free(machine);
  216. starpu_sched_ctx_set_context(0);
  217. return 0;
  218. }
  219. void starpu_cluster_print(struct starpu_cluster_machine* clusters)
  220. {
  221. int cnt, w;
  222. struct _starpu_cluster_group *group;
  223. struct _starpu_cluster *cluster;
  224. printf("Number of clusters created: %d\n", clusters->nclusters);
  225. cnt=0;
  226. for (group = _starpu_cluster_group_list_begin(clusters->groups);
  227. group != _starpu_cluster_group_list_end(clusters->groups);
  228. group = _starpu_cluster_group_list_next(group))
  229. {
  230. for (cluster = _starpu_cluster_list_begin(group->clusters);
  231. cluster != _starpu_cluster_list_end(group->clusters);
  232. cluster = _starpu_cluster_list_next(cluster))
  233. {
  234. printf("Cluster %d contains the following logical indexes:\n\t", cnt);
  235. for (w=0; w < cluster->ncores; w++)
  236. printf("%d ", cluster->cores[w]);
  237. printf("\n");
  238. cnt++;
  239. }
  240. }
  241. return;
  242. }
  243. void _starpu_cluster_create(struct _starpu_cluster* cluster)
  244. {
  245. unsigned main_ctx_id = cluster->father->father->id;
  246. if (cluster->params->awake_workers)
  247. cluster->id = starpu_sched_ctx_create(cluster->workerids, cluster->ncores,
  248. "clusters", STARPU_SCHED_CTX_NESTED,
  249. main_ctx_id,
  250. STARPU_SCHED_CTX_AWAKE_WORKERS,
  251. 0);
  252. else
  253. cluster->id = starpu_sched_ctx_create(cluster->workerids, cluster->ncores,
  254. "clusters", STARPU_SCHED_CTX_NESTED,
  255. main_ctx_id,
  256. 0);
  257. /* cluster priority can be the lowest, so let's enforce it */
  258. starpu_sched_ctx_set_priority(cluster->workerids, cluster->ncores, cluster->id, 0);
  259. return;
  260. }
  261. void _starpu_cluster_group_create(struct _starpu_cluster_group *group)
  262. {
  263. struct _starpu_cluster* c;
  264. for (c = _starpu_cluster_list_begin(group->clusters) ;
  265. c != _starpu_cluster_list_end(group->clusters) ;
  266. c = _starpu_cluster_list_next(c))
  267. {
  268. _starpu_cluster_create(c);
  269. if (!c->params->awake_workers)
  270. _starpu_cluster_bind(c);
  271. }
  272. return;
  273. }
  274. int _starpu_cluster_bind(struct _starpu_cluster* cluster)
  275. {
  276. starpu_binding_function func;
  277. void* func_arg;
  278. if (cluster->params->create_func)
  279. {
  280. func = cluster->params->create_func;
  281. func_arg = (void*) cluster->params->create_func_arg;
  282. }
  283. else
  284. {
  285. func = _starpu_cluster_type_get_func(cluster->params->type);
  286. func_arg = (void*) &cluster->id;
  287. }
  288. return starpu_task_insert(&_starpu_cluster_bind_cl,
  289. STARPU_SCHED_CTX, cluster->id,
  290. STARPU_POSSIBLY_PARALLEL, 1,
  291. STARPU_PROLOGUE_CALLBACK_POP, func,
  292. STARPU_PROLOGUE_CALLBACK_POP_ARG, func_arg,
  293. 0);
  294. }
  295. void _starpu_cluster_group_init(struct _starpu_cluster_group* group,
  296. struct starpu_cluster_machine* father)
  297. {
  298. group->id = 0;
  299. group->nclusters = 0;
  300. group->clusters = _starpu_cluster_list_new();
  301. group->father = father;
  302. group->params = malloc(sizeof(struct _starpu_cluster_parameters));
  303. _starpu_cluster_copy_parameters(group->params,
  304. father->params);
  305. return;
  306. }
  307. void _starpu_cluster_init(struct _starpu_cluster* cluster,
  308. struct _starpu_cluster_group* father)
  309. {
  310. cluster->id = 0;
  311. cluster->cpuset = hwloc_bitmap_alloc();
  312. cluster->ncores = 0;
  313. cluster->cores = NULL;
  314. cluster->workerids = NULL;
  315. cluster->father = father;
  316. cluster->params = malloc(sizeof(struct _starpu_cluster_parameters));
  317. _starpu_cluster_copy_parameters(cluster->params,
  318. father->params);
  319. }
  320. int _starpu_cluster_remove(struct _starpu_cluster_list* cluster_list,
  321. struct _starpu_cluster* cluster)
  322. {
  323. if (cluster && cluster->id != STARPU_NMAX_SCHED_CTXS)
  324. starpu_sched_ctx_delete(cluster->id);
  325. else
  326. return -1;
  327. if (cluster->cores != NULL)
  328. free(cluster->cores);
  329. if (cluster->workerids != NULL)
  330. free(cluster->workerids);
  331. hwloc_bitmap_free(cluster->cpuset);
  332. free(cluster->params);
  333. _starpu_cluster_list_erase(cluster_list, cluster);
  334. _starpu_cluster_delete(cluster);
  335. return 0;
  336. }
  337. int _starpu_cluster_group_remove(struct _starpu_cluster_group_list* group_list,
  338. struct _starpu_cluster_group* group)
  339. {
  340. struct _starpu_cluster* tmp;
  341. struct _starpu_cluster_list* cluster_list = group->clusters;
  342. struct _starpu_cluster* c = _starpu_cluster_list_begin(cluster_list);
  343. while (c != _starpu_cluster_list_end(cluster_list))
  344. {
  345. tmp = c;
  346. c = _starpu_cluster_list_next(c);
  347. _starpu_cluster_remove(cluster_list, tmp);
  348. }
  349. free(group->params);
  350. _starpu_cluster_group_list_erase(group_list, group);
  351. _starpu_cluster_group_delete(group);
  352. return 0;
  353. }
  354. void _starpu_cluster_init_parameters(struct _starpu_cluster_parameters* params)
  355. {
  356. params->min_nb = 0;
  357. params->max_nb = 0;
  358. params->nb = 0;
  359. params->sched_policy_name = NULL;
  360. params->sched_policy_struct = NULL;
  361. params->keep_homogeneous = 0;
  362. params->prefere_min = 0;
  363. params->create_func = NULL;
  364. params->create_func_arg = NULL;
  365. params->type = OPENMP;
  366. params->awake_workers = 0;
  367. return;
  368. }
  369. void _starpu_cluster_copy_parameters(struct _starpu_cluster_parameters* dst,
  370. struct _starpu_cluster_parameters* src)
  371. {
  372. dst->min_nb = src->min_nb;
  373. dst->max_nb = src->max_nb;
  374. dst->nb = src->nb;
  375. dst->sched_policy_name = src->sched_policy_name;
  376. dst->sched_policy_struct = src->sched_policy_struct;
  377. dst->keep_homogeneous = src->keep_homogeneous;
  378. dst->prefere_min = src->prefere_min;
  379. dst->create_func = src->create_func;
  380. dst->create_func_arg = src->create_func_arg;
  381. dst->type = src->type;
  382. dst->awake_workers = src->awake_workers;
  383. return;
  384. }
  385. /* Considering the resources and parameters, how many clusters should we take? */
  386. int _starpu_cluster_analyze_parameters(struct _starpu_cluster_parameters* params, int npus)
  387. {
  388. int nb_clusters = 1, j;
  389. if (params->nb)
  390. {
  391. nb_clusters = params->nb;
  392. }
  393. else if (params->min_nb && params->max_nb)
  394. {
  395. if (!params->keep_homogeneous)
  396. {
  397. if (params->prefere_min)
  398. nb_clusters = params->min_nb;
  399. else
  400. nb_clusters = params->max_nb;
  401. }
  402. else
  403. {
  404. int begin = params->prefere_min? params->min_nb:params->max_nb;
  405. int end = params->prefere_min? params->max_nb+1:params->min_nb-1;
  406. j=begin;
  407. int best = 0, second_best = 0, cpu_loss = INT_MAX;
  408. while (j != end)
  409. {
  410. if (npus%j == 0)
  411. {
  412. best = j;
  413. break;
  414. }
  415. if (npus%j < cpu_loss)
  416. {
  417. cpu_loss = npus%j;
  418. second_best = j;
  419. }
  420. j = params->prefere_min? j++:j--;
  421. }
  422. if (best)
  423. nb_clusters = best;
  424. else if (second_best)
  425. nb_clusters = second_best;
  426. }
  427. }
  428. return nb_clusters;
  429. }
  430. void _starpu_cluster_machine(hwloc_obj_type_t cluster_level,
  431. struct starpu_cluster_machine* machine)
  432. {
  433. struct _starpu_cluster_group* g;
  434. _starpu_cluster_topology(cluster_level, machine);
  435. if (machine->params->sched_policy_struct != NULL)
  436. {
  437. machine->id = starpu_sched_ctx_create(NULL, -1, "main sched ctx",
  438. STARPU_SCHED_CTX_POLICY_STRUCT,
  439. machine->params->sched_policy_struct,
  440. 0);
  441. }
  442. else if (machine->params->sched_policy_name != NULL)
  443. {
  444. machine->id = starpu_sched_ctx_create(NULL, -1, "main sched ctx",
  445. STARPU_SCHED_CTX_POLICY_NAME,
  446. machine->params->sched_policy_name,
  447. 0);
  448. }
  449. else
  450. {
  451. struct starpu_sched_policy* sched_policy;
  452. struct _starpu_sched_ctx* global_ctx =
  453. _starpu_get_sched_ctx_struct(STARPU_GLOBAL_SCHED_CTX);
  454. sched_policy = _starpu_get_sched_policy(global_ctx);
  455. machine->id = starpu_sched_ctx_create(NULL, -1, "main sched ctx",
  456. STARPU_SCHED_CTX_POLICY_STRUCT,
  457. sched_policy, 0);
  458. }
  459. for (g = _starpu_cluster_group_list_begin(machine->groups) ;
  460. g != _starpu_cluster_group_list_end(machine->groups) ;
  461. g = _starpu_cluster_group_list_next(g))
  462. _starpu_cluster_group_create(g);
  463. starpu_task_wait_for_all();
  464. starpu_sched_ctx_set_context(&machine->id);
  465. return;
  466. }
  467. void _starpu_cluster_topology(hwloc_obj_type_t cluster_level,
  468. struct starpu_cluster_machine* machine)
  469. {
  470. int w;
  471. hwloc_topology_t topology;
  472. hwloc_cpuset_t avail_cpus;
  473. char *buf;
  474. struct _starpu_machine_config* config = _starpu_get_machine_config();
  475. STARPU_ASSERT_MSG(config->topology.hwtopology != NULL, "STARPU_CLUSTER: You "
  476. "need to call starpu_init() or make sure to activate hwloc.");
  477. hwloc_topology_dup(&topology, config->topology.hwtopology);
  478. avail_cpus = hwloc_bitmap_alloc();
  479. hwloc_bitmap_zero(avail_cpus);
  480. int nworkers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER);
  481. int *workers = (int*) malloc(sizeof(int) * nworkers);
  482. starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, nworkers);
  483. for (w = 0; w < nworkers ; w++)
  484. {
  485. struct _starpu_worker *worker_str = _starpu_get_worker_struct(workers[w]);
  486. hwloc_bitmap_or(avail_cpus, avail_cpus, worker_str->hwloc_cpu_set);
  487. }
  488. hwloc_bitmap_list_asprintf(&buf, avail_cpus);
  489. hwloc_topology_restrict(topology, avail_cpus, 0);
  490. free(workers);
  491. /* Use new topology to fill in the cluster list */
  492. machine->topology = topology;
  493. _starpu_cluster_group(cluster_level, machine);
  494. hwloc_bitmap_free(avail_cpus);
  495. return;
  496. }
  497. void _starpu_cluster_group(hwloc_obj_type_t cluster_level,
  498. struct starpu_cluster_machine* machine)
  499. {
  500. unsigned nb_objects;
  501. int i;
  502. struct _starpu_cluster_group* group = NULL;
  503. if (machine->groups == NULL)
  504. machine->groups = _starpu_cluster_group_list_new();
  505. nb_objects = hwloc_get_nbobjs_by_type(machine->topology, cluster_level);
  506. if (nb_objects == 0)
  507. return;
  508. group = _starpu_cluster_group_list_begin(machine->groups);
  509. for (i = 0 ; i < nb_objects ; i++)
  510. {
  511. hwloc_obj_t cluster_obj = hwloc_get_obj_by_type(machine->topology,
  512. cluster_level, i);
  513. if (group == NULL)
  514. {
  515. group = _starpu_cluster_group_new();
  516. _starpu_cluster_group_init(group, machine);
  517. _starpu_cluster_group_list_push_back(machine->groups, group);
  518. }
  519. group->group_obj = cluster_obj;
  520. _starpu_cluster(group);
  521. machine->ngroups++;
  522. machine->nclusters += group->nclusters;
  523. group = _starpu_cluster_group_list_next(group);
  524. }
  525. return;
  526. }
  527. void _starpu_cluster(struct _starpu_cluster_group* group)
  528. {
  529. int i, avail_pus, npus, npreset=0;
  530. struct _starpu_cluster* cluster;
  531. char* buf;
  532. npus = hwloc_get_nbobjs_inside_cpuset_by_type(group->father->topology,
  533. group->group_obj->cpuset,
  534. HWLOC_OBJ_PU);
  535. /* Preset clusters */
  536. avail_pus = npus;
  537. for (cluster=_starpu_cluster_list_begin(group->clusters);
  538. cluster!=_starpu_cluster_list_end(group->clusters);
  539. cluster=_starpu_cluster_list_next(cluster))
  540. {
  541. if (cluster->ncores > avail_pus)
  542. cluster->ncores = avail_pus;
  543. else if (avail_pus == 0)
  544. cluster->ncores = 0;
  545. if (cluster->ncores > 0)
  546. {
  547. cluster->cores = malloc(sizeof(int)*cluster->ncores);
  548. cluster->workerids = malloc(sizeof(int)*cluster->ncores);
  549. avail_pus -= cluster->ncores;
  550. npreset++;
  551. }
  552. }
  553. /* Automatic clusters */
  554. group->nclusters = _starpu_cluster_analyze_parameters(group->params, avail_pus);
  555. for (i=0 ; i<group->nclusters && avail_pus>0 ; i++)
  556. {
  557. if (cluster == NULL)
  558. {
  559. cluster = _starpu_cluster_new();
  560. _starpu_cluster_init(cluster, group);
  561. _starpu_cluster_list_push_back(group->clusters, cluster);
  562. }
  563. if (cluster->ncores != 0 && cluster->ncores > avail_pus)
  564. {
  565. cluster->ncores = avail_pus;
  566. }
  567. else
  568. {
  569. if (cluster->params->keep_homogeneous)
  570. cluster->ncores = avail_pus/(group->nclusters-i);
  571. else
  572. cluster->ncores = i==group->nclusters-1?
  573. avail_pus:
  574. avail_pus/(group->nclusters-i);
  575. }
  576. avail_pus -= cluster->ncores;
  577. cluster->cores = malloc(sizeof(int)*cluster->ncores);
  578. cluster->workerids = malloc(sizeof(int)*cluster->ncores);
  579. cluster = _starpu_cluster_list_next(cluster);
  580. }
  581. group->nclusters += npreset;
  582. cluster = _starpu_cluster_list_begin(group->clusters);
  583. int count = 0;
  584. static int starpu_cluster_warned = 0;
  585. hwloc_bitmap_list_asprintf(&buf, group->group_obj->cpuset);
  586. for (i=0 ; i<npus ; i++)
  587. {
  588. hwloc_obj_t pu = hwloc_get_obj_inside_cpuset_by_type(group->father->topology,
  589. group->group_obj->cpuset,
  590. HWLOC_OBJ_PU, i);
  591. /* If we have more than one worker on this resource, let's add them too --
  592. even if it's bad (they'll all be boud on the same PU) */
  593. int size = 0, j;
  594. struct _starpu_worker* worker_str = _starpu_worker_list_front(pu->userdata);
  595. for (j = 0; j < _starpu_worker_list_size(pu->userdata) ; j++)
  596. {
  597. if (worker_str->arch == STARPU_CPU_WORKER)
  598. size++;
  599. worker_str = _starpu_worker_list_next(worker_str);
  600. }
  601. if (size > 1)
  602. {
  603. if (!starpu_cluster_warned)
  604. {
  605. fprintf(stderr, "STARPU CLUSTERS: Caution! It seems that you have"
  606. " multiple workers bound to the same PU. If you have"
  607. " multithreading on your cores it is greatly adviced"
  608. " to export STARPU_NTHREADS_PER_CORE=nb.");
  609. starpu_cluster_warned = 1;
  610. }
  611. cluster->ncores += size-1;
  612. cluster->cores = realloc(cluster->cores,
  613. sizeof(int)*cluster->ncores);
  614. cluster->workerids = realloc(cluster->workerids,
  615. sizeof(int)*cluster->ncores);
  616. }
  617. /* grab workerid list and return first cpu */
  618. worker_str = _starpu_worker_list_front(pu->userdata);
  619. if (worker_str)
  620. hwloc_bitmap_or(cluster->cpuset, cluster->cpuset,
  621. worker_str->hwloc_cpu_set);
  622. j = 0;
  623. while (worker_str)
  624. {
  625. if (worker_str->arch == STARPU_CPU_WORKER)
  626. {
  627. cluster->cores[count+j] = worker_str->bindid;
  628. cluster->workerids[count+j] = worker_str->workerid;
  629. j++;
  630. }
  631. worker_str = _starpu_worker_list_next(worker_str);
  632. }
  633. count+=size;
  634. if (cluster->ncores == count)
  635. {
  636. count = 0;
  637. cluster = _starpu_cluster_list_next(cluster);
  638. }
  639. }
  640. return;
  641. }