lp_tools.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <math.h>
  17. #include "sc_hypervisor_lp.h"
  18. #include "sc_hypervisor_policy.h"
  19. #include "sc_hypervisor_intern.h"
  20. #include <starpu_config.h>
  21. double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers],
  22. int total_nw[ntypes_of_workers], struct types_of_workers *tw)
  23. {
  24. unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs();
  25. #ifdef STARPU_HAVE_GLPK_H
  26. double v[nsched_ctxs][ntypes_of_workers];
  27. double flops[nsched_ctxs];
  28. sc_hypervisor_update_resize_interval(sched_ctxs, nsched_ctxs);
  29. int nw = tw->nw;
  30. int i = 0;
  31. struct sc_hypervisor_wrapper* sc_w;
  32. for(i = 0; i < nsched_ctxs; i++)
  33. {
  34. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
  35. int w;
  36. for(w = 0; w < nw; w++)
  37. v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw));
  38. double ready_flops = starpu_get_nready_flops_of_sched_ctx(sc_w->sched_ctx);
  39. int nready_tasks = starpu_get_nready_tasks_of_sched_ctx(sc_w->sched_ctx);
  40. if(sc_w->to_be_sized)
  41. {
  42. flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
  43. sc_w->to_be_sized = 0;
  44. }
  45. else
  46. {
  47. if(sc_w->remaining_flops < 0.0)
  48. flops[i] = ready_flops/1000000000.0; /* in gflops*/
  49. else
  50. {
  51. if((ready_flops/1000000000.0) <= 0.000002)
  52. flops[i] = 0.0;
  53. else
  54. flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
  55. }
  56. }
  57. if(flops[i] < 0.0)
  58. flops[i] = 0.0;
  59. printf("%d: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n",
  60. sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, ready_flops/1000000000, nready_tasks);
  61. }
  62. unsigned tmp_sched_ctxs[STARPU_NMAX_SCHED_CTXS];
  63. double tmp_flops[STARPU_NMAX_SCHED_CTXS];
  64. double tmp_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
  65. double tmp_res[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
  66. int tmp_nsched_ctxs = 0;
  67. for(i = 0; i < nsched_ctxs; i++)
  68. {
  69. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]);
  70. if(config->max_nworkers != 0)
  71. {
  72. tmp_sched_ctxs[tmp_nsched_ctxs] = sched_ctxs[i];
  73. tmp_flops[tmp_nsched_ctxs] = flops[i];
  74. int w;
  75. for(w = 0; w < ntypes_of_workers; w++)
  76. tmp_v[tmp_nsched_ctxs][w] = v[i][w];
  77. tmp_nsched_ctxs++;
  78. }
  79. }
  80. double ret = sc_hypervisor_lp_simulate_distrib_flops(tmp_nsched_ctxs, ntypes_of_workers, tmp_v, tmp_flops, tmp_res, total_nw, tmp_sched_ctxs, -1.0);
  81. int j;
  82. for(i = 0; i < nsched_ctxs; i++)
  83. {
  84. unsigned found = 0;
  85. for(j = 0; j < tmp_nsched_ctxs; j++)
  86. {
  87. if(sched_ctxs[i] == tmp_sched_ctxs[j])
  88. {
  89. int w;
  90. for(w = 0; w < ntypes_of_workers; w++)
  91. res[i][w] = tmp_res[j][w];
  92. found = 1;
  93. break;
  94. }
  95. }
  96. if(!found)
  97. {
  98. int w;
  99. for(w = 0; w < ntypes_of_workers; w++)
  100. res[i][w] = 0.0;
  101. }
  102. }
  103. double vmax = 0.0;
  104. if(ret != 0.0)
  105. {
  106. /* redo the lp after cleaning out the contexts that got all the max workers required */
  107. unsigned selected_sched_ctxs[STARPU_NMAX_SCHED_CTXS];
  108. double selected_flops[STARPU_NMAX_SCHED_CTXS];
  109. double selected_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
  110. int nselected = 0;
  111. int available_cpus = total_nw[0];
  112. int used_cpus = 0;
  113. for(i = 0; i < nsched_ctxs; i++)
  114. {
  115. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]);
  116. if(res[i][0] < config->max_nworkers && config->max_nworkers != 0 && flops[i] > 0.0001)
  117. {
  118. selected_flops[nselected] = flops[i];
  119. selected_v[nselected][0] = v[i][0];
  120. selected_sched_ctxs[nselected++] = sched_ctxs[i];
  121. }
  122. else
  123. available_cpus -= res[i][0];
  124. used_cpus += res[i][0];
  125. }
  126. if(used_cpus < 0.8 * total_nw[0] && nselected > 1)
  127. {
  128. double old_ret = ret;
  129. if(nselected <= 0 || nselected == nsched_ctxs)
  130. {
  131. nselected = nsched_ctxs;
  132. for(i = 0; i < nsched_ctxs; i++)
  133. {
  134. selected_flops[i] = flops[i];
  135. selected_v[i][0] = v[i][0];
  136. selected_sched_ctxs[i] = sched_ctxs[i];
  137. }
  138. }
  139. else
  140. total_nw[0] = available_cpus;
  141. double selected_res[nselected][ntypes_of_workers];
  142. ret = sc_hypervisor_lp_simulate_distrib_flops(nselected, ntypes_of_workers, selected_v, selected_flops, selected_res, total_nw, selected_sched_ctxs, ret);
  143. if(ret != 0)
  144. {
  145. int j;
  146. for(i = 0; i < nsched_ctxs; i++)
  147. {
  148. for(j = 0; j < nselected; j++)
  149. {
  150. if(sched_ctxs[i] == selected_sched_ctxs[j])
  151. {
  152. res[i][0] = selected_res[j][0];
  153. }
  154. }
  155. }
  156. }
  157. else
  158. ret = old_ret;
  159. }
  160. }
  161. /* keep the first speed */
  162. if(ret != 0.0)
  163. {
  164. vmax = 1 / ret;
  165. double optimal_v = 0.0;
  166. for(i = 0; i < nsched_ctxs; i++)
  167. {
  168. #ifdef STARPU_USE_CUDA
  169. optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1];
  170. #else
  171. optimal_v = res[i][0] * v[i][0];
  172. #endif //STARPU_USE_CUDA
  173. int w;
  174. unsigned no_workers = 1;
  175. for(w = 0; w < nw; w++)
  176. {
  177. if(res[i][w] != 0.0)
  178. {
  179. no_workers = 0;
  180. break;
  181. }
  182. }
  183. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
  184. /* if the hypervisor gave 0 workers to a context but the context still
  185. has some last flops or a ready task that does not even have any flops
  186. we give a worker (in shared mode) to the context in order to leave him
  187. finish its work = we give -1.0 value instead of 0.0 and further on in
  188. the distribution function we take this into account and revert the variable
  189. to its 0.0 value */
  190. // if(no_workers && (flops[i] != 0.0 || sc_w->nready_tasks > 0))
  191. if(no_workers)
  192. {
  193. for(w = 0; w < nw; w++)
  194. res[i][w] = -1.0;
  195. }
  196. if(optimal_v != 0.0)
  197. _set_optimal_v(i, optimal_v);
  198. }
  199. }
  200. return vmax;
  201. #else//STARPU_HAVE_GLPK_H
  202. return 0.0;
  203. #endif//STARPU_HAVE_GLPK_H
  204. }
  205. double sc_hypervisor_lp_get_tmax(int nworkers, int *workers)
  206. {
  207. struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nworkers);
  208. int nw = tw->nw;
  209. int total_nw[nw];
  210. sc_hypervisor_group_workers_by_type(tw, total_nw);
  211. int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
  212. double res[nsched_ctxs][nw];
  213. return sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, res, total_nw, tw) * 1000.0;
  214. }
  215. void sc_hypervisor_lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw])
  216. {
  217. int s, w;
  218. double left_res[nw];
  219. for(w = 0; w < nw; w++)
  220. left_res[nw] = 0.0;
  221. for(s = 0; s < ns; s++)
  222. {
  223. for(w = 0; w < nw; w++)
  224. {
  225. int x = floor(res[s][w]);
  226. double x_double = (double)x;
  227. double diff = res[s][w] - x_double;
  228. if(diff != 0.0)
  229. {
  230. if(diff > 0.5)
  231. {
  232. if(left_res[w] != 0.0)
  233. {
  234. if((diff + left_res[w]) > 0.5)
  235. {
  236. res_rounded[s][w] = x + 1;
  237. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  238. }
  239. else
  240. {
  241. res_rounded[s][w] = x;
  242. left_res[w] = (-1.0) * (diff + left_res[w]);
  243. }
  244. }
  245. else
  246. {
  247. res_rounded[s][w] = x + 1;
  248. left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]);
  249. }
  250. }
  251. else
  252. {
  253. if((diff + left_res[w]) > 0.5)
  254. {
  255. res_rounded[s][w] = x + 1;
  256. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  257. }
  258. else
  259. {
  260. res_rounded[s][w] = x;
  261. left_res[w] = diff;
  262. }
  263. }
  264. }
  265. else
  266. res_rounded[s][w] = x;
  267. }
  268. }
  269. }
  270. void _lp_find_workers_to_give_away(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx,
  271. int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  272. int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
  273. int res_rounded[ns][nw], double res[ns][nw], struct types_of_workers *tw)
  274. {
  275. int w;
  276. double target_res = 0.0;
  277. for(w = 0; w < nw; w++)
  278. {
  279. target_res += res[sched_ctx_idx][w];
  280. if(res[sched_ctx_idx][w] == -1.0) res[sched_ctx_idx][w] = 0.0;
  281. }
  282. for(w = 0; w < nw; w++)
  283. {
  284. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  285. if(arch == STARPU_CPU_WORKER)
  286. {
  287. int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
  288. if(nworkers_ctx > res_rounded[sched_ctx_idx][w])
  289. {
  290. int nworkers_to_move = nworkers_ctx - res_rounded[sched_ctx_idx][w];
  291. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &nworkers_to_move, arch);
  292. int i;
  293. if(target_res < 0.0 && nworkers_to_move > 0)
  294. {
  295. tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[0];
  296. for(i = 1; i < nworkers_to_move; i++)
  297. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  298. }
  299. else
  300. {
  301. for(i = 0; i < nworkers_to_move; i++)
  302. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  303. }
  304. free(workers_to_move);
  305. }
  306. }
  307. else
  308. {
  309. double nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch) * 1.0;
  310. if(nworkers_ctx > res[sched_ctx_idx][w])
  311. {
  312. double nworkers_to_move = nworkers_ctx - res[sched_ctx_idx][w];
  313. int x = floor(nworkers_to_move);
  314. double x_double = (double)x;
  315. double diff = nworkers_to_move - x_double;
  316. if(diff == 0.0)
  317. {
  318. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch);
  319. if(x > 0)
  320. {
  321. int i;
  322. for(i = 0; i < x; i++)
  323. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  324. }
  325. free(workers_to_move);
  326. }
  327. else
  328. {
  329. x+=1;
  330. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch);
  331. if(x > 0)
  332. {
  333. int i;
  334. for(i = 0; i < x-1; i++)
  335. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  336. if(diff > 0.8)
  337. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[x-1];
  338. else
  339. if(diff > 0.3)
  340. tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[x-1];
  341. }
  342. free(workers_to_move);
  343. }
  344. }
  345. }
  346. }
  347. }
  348. void _lp_find_workers_to_accept(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx,
  349. int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  350. int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
  351. int *nw_move, int workers_move[STARPU_NMAXWORKERS],
  352. int *nw_add, int workers_add[STARPU_NMAXWORKERS],
  353. int res_rounded[ns][nw], double res[ns][nw], struct types_of_workers *tw)
  354. {
  355. int w;
  356. int j = 0, k = 0;
  357. for(w = 0; w < nw; w++)
  358. {
  359. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  360. int nw_ctx2 = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
  361. int nw_needed = res_rounded[sched_ctx_idx][w] - nw_ctx2;
  362. if( nw_needed > 0 && tmp_nw_move[w] > 0)
  363. {
  364. *nw_move += nw_needed >= tmp_nw_move[w] ? tmp_nw_move[w] : nw_needed;
  365. int i = 0;
  366. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  367. {
  368. if(tmp_workers_move[w][i] != -1)
  369. {
  370. workers_move[j++] = tmp_workers_move[w][i];
  371. tmp_workers_move[w][i] = -1;
  372. if(j == *nw_move)
  373. break;
  374. }
  375. }
  376. tmp_nw_move[w] -= *nw_move;
  377. }
  378. double needed = res[sched_ctx_idx][w] - (nw_ctx2 * 1.0);
  379. int x = floor(needed);
  380. double x_double = (double)x;
  381. double diff = needed - x_double;
  382. if((diff > 0.3 || needed > 0.3) && tmp_nw_add[w] > 0)
  383. {
  384. *nw_add = tmp_nw_add[w];
  385. int i = 0;
  386. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  387. {
  388. if(tmp_workers_add[w][i] != -1)
  389. {
  390. workers_add[k++] = tmp_workers_add[w][i];
  391. tmp_workers_add[w][i] = -1;
  392. if(k == *nw_add)
  393. break;
  394. }
  395. }
  396. tmp_nw_add[w] -= *nw_add;
  397. }
  398. }
  399. }
  400. void _lp_find_workers_to_remove(int nw, int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  401. int *nw_move, int workers_move[STARPU_NMAXWORKERS])
  402. {
  403. int w;
  404. for(w = 0; w < nw; w++)
  405. {
  406. if(tmp_nw_move[w] > 0)
  407. {
  408. *nw_move += tmp_nw_move[w];
  409. int i = 0, j = 0;
  410. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  411. {
  412. if(tmp_workers_move[w][i] != -1)
  413. {
  414. workers_move[j++] = tmp_workers_move[w][i];
  415. tmp_workers_move[w][i] = -1;
  416. if(j == *nw_move)
  417. break;
  418. }
  419. }
  420. }
  421. }
  422. }
  423. void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], unsigned *sched_ctxs, struct types_of_workers *tw)
  424. {
  425. int s, s2, w;
  426. for(s = 0; s < ns; s++)
  427. {
  428. int tmp_workers_move[nw][STARPU_NMAXWORKERS];
  429. int tmp_nw_move[nw];
  430. int tmp_workers_add[nw][STARPU_NMAXWORKERS];
  431. int tmp_nw_add[nw];
  432. for(w = 0; w < nw; w++)
  433. {
  434. tmp_nw_move[w] = 0;
  435. tmp_nw_add[w] = 0;
  436. int i;
  437. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  438. {
  439. tmp_workers_move[w][i] = -1;
  440. tmp_workers_add[w][i] = -1;
  441. }
  442. }
  443. /* find workers that ctx s has to give away */
  444. _lp_find_workers_to_give_away(nw, ns, sched_ctxs[s], s,
  445. tmp_nw_move, tmp_workers_move,
  446. tmp_nw_add, tmp_workers_add, res_rounded,
  447. res, tw);
  448. for(s2 = 0; s2 < ns; s2++)
  449. {
  450. if(sched_ctxs[s2] != sched_ctxs[s])
  451. {
  452. /* find workers that ctx s2 wants to accept from ctx s
  453. the rest of it will probably accepted by another ctx */
  454. int workers_move[STARPU_NMAXWORKERS];
  455. int nw_move = 0;
  456. int workers_add[STARPU_NMAXWORKERS];
  457. int nw_add = 0;
  458. _lp_find_workers_to_accept(nw, ns, sched_ctxs[s2], s2,
  459. tmp_nw_move, tmp_workers_move,
  460. tmp_nw_add, tmp_workers_add,
  461. &nw_move, workers_move,
  462. &nw_add, workers_add,
  463. res_rounded, res, tw);
  464. if(nw_move > 0)
  465. {
  466. sc_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, !(_sc_hypervisor_use_lazy_resize()));
  467. nw_move = 0;
  468. }
  469. if(nw_add > 0)
  470. {
  471. sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
  472. nw_add = 0;
  473. }
  474. }
  475. }
  476. /* if there are workers that weren't accepted by anyone but ctx s wants
  477. to get rid of them just remove them from ctx s */
  478. int workers_move[STARPU_NMAXWORKERS];
  479. int nw_move = 0;
  480. _lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move,
  481. &nw_move, workers_move);
  482. if(nw_move > 0)
  483. sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
  484. }
  485. }
  486. int _lp_get_unwanted_workers(int *workers_add, int nw_add, unsigned sched_ctx, int *workers_remove)
  487. {
  488. int nw_remove = 0;
  489. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
  490. int worker;
  491. struct starpu_sched_ctx_iterator it;
  492. if(workers->init_iterator)
  493. workers->init_iterator(workers, &it);
  494. while(workers->has_next(workers, &it))
  495. {
  496. worker = workers->get_next(workers, &it);
  497. int i;
  498. unsigned found = 0;
  499. for(i = 0; i < nw_add; i++)
  500. {
  501. if(worker == workers_add[i])
  502. {
  503. found = 1;
  504. break;
  505. }
  506. }
  507. if(!found)
  508. workers_remove[nw_remove++] = worker;
  509. }
  510. return nw_remove;
  511. }
  512. void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, int res_rounded[ns][nw],
  513. double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw)
  514. {
  515. int s, w;
  516. int start[nw];
  517. for(w = 0; w < nw; w++)
  518. start[w] = 0;
  519. for(s = 0; s < ns; s++)
  520. {
  521. int workers_add[STARPU_NMAXWORKERS];
  522. int nw_add = 0;
  523. double target_res = 0.0;
  524. for(w = 0; w < nw; w++)
  525. {
  526. target_res += res[s][w];
  527. if(res[s][w] == -1.0) res[s][w] = 0.0;
  528. }
  529. for(w = 0; w < nw; w++)
  530. {
  531. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  532. if(arch == STARPU_CPU_WORKER)
  533. {
  534. int nworkers_to_add = res_rounded[s][w];
  535. if(target_res < 0.0)
  536. {
  537. nworkers_to_add=1;
  538. int old_start = start[w];
  539. if(start[w] != 0)
  540. start[w]--;
  541. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
  542. start[w] = old_start;
  543. int i;
  544. for(i = 0; i < nworkers_to_add; i++)
  545. {
  546. workers_add[nw_add++] = workers_to_add[i];
  547. }
  548. free(workers_to_add);
  549. }
  550. else
  551. {
  552. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
  553. int i;
  554. for(i = 0; i < nworkers_to_add; i++)
  555. workers_add[nw_add++] = workers_to_add[i];
  556. free(workers_to_add);
  557. }
  558. }
  559. else
  560. {
  561. double nworkers_to_add = res[s][w];
  562. int x = floor(nworkers_to_add);
  563. double x_double = (double)x;
  564. double diff = nworkers_to_add - x_double;
  565. if(diff == 0.0)
  566. {
  567. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch);
  568. int i;
  569. for(i = 0; i < x; i++)
  570. workers_add[nw_add++] = workers_to_add[i];
  571. free(workers_to_add);
  572. }
  573. else
  574. {
  575. x+=1;
  576. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch);
  577. int i;
  578. if(diff >= 0.3)
  579. for(i = 0; i < x; i++)
  580. workers_add[nw_add++] = workers_to_add[i];
  581. else
  582. for(i = 0; i < x-1; i++)
  583. workers_add[nw_add++] = workers_to_add[i];
  584. free(workers_to_add);
  585. }
  586. }
  587. }
  588. // sc_hypervisor_start_resize(sched_ctxs[s]);
  589. sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
  590. int workers_remove[STARPU_NMAXWORKERS];
  591. int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove);
  592. sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
  593. }
  594. }
  595. /* nw = all the workers (either in a list or on all machine) */
  596. void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], unsigned *sched_ctxs_input, int *workers_input, unsigned do_size, struct types_of_workers *tw)
  597. {
  598. int w, s;
  599. int ntypes_of_workers = tw->nw;
  600. double nworkers[ns][ntypes_of_workers];
  601. int nworkers_rounded[ns][ntypes_of_workers];
  602. for(s = 0; s < ns; s++)
  603. {
  604. for(w = 0; w < ntypes_of_workers; w++)
  605. {
  606. nworkers[s][w] = 0.0;
  607. nworkers_rounded[s][w] = 0;
  608. }
  609. }
  610. for(s = 0; s < ns; s++)
  611. {
  612. for(w = 0; w < nw; w++)
  613. {
  614. enum starpu_worker_archtype arch = starpu_worker_get_type(w);
  615. int idx = sc_hypervisor_get_index_for_arch(arch, tw);
  616. nworkers[s][idx] += w_in_s[s][w];
  617. if(arch == STARPU_CUDA_WORKER)
  618. {
  619. if(w_in_s[s][w] >= 0.3)
  620. nworkers_rounded[s][idx]++;
  621. }
  622. else
  623. {
  624. if(w_in_s[s][w] > 0.5)
  625. nworkers_rounded[s][idx]++;
  626. }
  627. }
  628. }
  629. if(!do_size)
  630. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_rounded, nworkers, sched_ctxs_input, tw);
  631. else
  632. {
  633. unsigned *current_sched_ctxs = sched_ctxs_input == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs_input;
  634. unsigned has_workers = 0;
  635. for(s = 0; s < ns; s++)
  636. {
  637. int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(current_sched_ctxs[s],
  638. STARPU_ANY_WORKER);
  639. if(nworkers_ctx != 0)
  640. {
  641. has_workers = 1;
  642. break;
  643. }
  644. }
  645. if(has_workers)
  646. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_rounded, nworkers, current_sched_ctxs, tw);
  647. else
  648. sc_hypervisor_lp_distribute_resources_in_ctxs(current_sched_ctxs, ns, ntypes_of_workers, nworkers_rounded, nworkers, workers_input, nw, tw);
  649. }
  650. return;
  651. }
  652. void sc_hypervisor_lp_share_remaining_resources(int ns, unsigned *sched_ctxs, int nworkers, int *workers)
  653. {
  654. int s, w, worker, nw = 0;
  655. int remaining_workers[nworkers];
  656. for(w = 0; w < nworkers; w++)
  657. {
  658. worker = workers == NULL ? w : workers[w];
  659. unsigned found = 0;
  660. for(s = 0; s < ns; s++)
  661. {
  662. if(starpu_sched_ctx_contains_worker(worker, sched_ctxs[s]))
  663. {
  664. found = 1;
  665. break;
  666. }
  667. }
  668. if(!found)
  669. {
  670. remaining_workers[nw++] = worker;
  671. }
  672. }
  673. if(nw > 0)
  674. {
  675. for(s = 0; s < ns; s++)
  676. {
  677. for(w = 0; w < nw; w++)
  678. _sc_hypervisor_allow_compute_idle(sched_ctxs[s], remaining_workers[w], 0);
  679. sc_hypervisor_add_workers_to_sched_ctx(remaining_workers, nw, sched_ctxs[s]);
  680. }
  681. }
  682. }
  683. double sc_hypervisor_lp_find_tmax(double t1, double t2)
  684. {
  685. return t1 + ((t2 - t1)/2);
  686. }