lp_tools.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <math.h>
  17. #include "sc_hypervisor_lp.h"
  18. #include "sc_hypervisor_policy.h"
  19. #include "sc_hypervisor_intern.h"
  20. #include <starpu_config.h>
  21. double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers],
  22. int total_nw[ntypes_of_workers], struct types_of_workers *tw)
  23. {
  24. unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs();
  25. #ifdef STARPU_HAVE_GLPK_H
  26. double v[nsched_ctxs][ntypes_of_workers];
  27. double flops[nsched_ctxs];
  28. sc_hypervisor_update_resize_interval(sched_ctxs, nsched_ctxs);
  29. int nw = tw->nw;
  30. int i = 0;
  31. struct sc_hypervisor_wrapper* sc_w;
  32. for(i = 0; i < nsched_ctxs; i++)
  33. {
  34. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
  35. int w;
  36. for(w = 0; w < nw; w++)
  37. v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw));
  38. if(sc_w->to_be_sized)
  39. {
  40. flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
  41. sc_w->to_be_sized = 0;
  42. }
  43. else
  44. {
  45. if(sc_w->remaining_flops < 0.0)
  46. flops[i] = sc_w->ready_flops/1000000000.0; /* in gflops*/
  47. else
  48. {
  49. if((sc_w->ready_flops/1000000000.0) <= 0.000002)
  50. flops[i] = 0.0;
  51. else
  52. flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
  53. }
  54. }
  55. /* printf("%d: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n", */
  56. /* sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, sc_w->ready_flops/1000000000, sc_w->nready_tasks); */
  57. }
  58. unsigned tmp_sched_ctxs[STARPU_NMAX_SCHED_CTXS];
  59. double tmp_flops[STARPU_NMAX_SCHED_CTXS];
  60. double tmp_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
  61. double tmp_res[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
  62. int tmp_nsched_ctxs = 0;
  63. for(i = 0; i < nsched_ctxs; i++)
  64. {
  65. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]);
  66. if(config->max_nworkers != 0)
  67. {
  68. tmp_sched_ctxs[tmp_nsched_ctxs] = sched_ctxs[i];
  69. tmp_flops[tmp_nsched_ctxs] = flops[i];
  70. int w;
  71. for(w = 0; w < ntypes_of_workers; w++)
  72. tmp_v[tmp_nsched_ctxs][w] = v[i][w];
  73. tmp_nsched_ctxs++;
  74. }
  75. }
  76. double ret = sc_hypervisor_lp_simulate_distrib_flops(tmp_nsched_ctxs, ntypes_of_workers, tmp_v, tmp_flops, tmp_res, total_nw, tmp_sched_ctxs, -1.0);
  77. int j;
  78. for(i = 0; i < nsched_ctxs; i++)
  79. {
  80. unsigned found = 0;
  81. for(j = 0; j < tmp_nsched_ctxs; j++)
  82. {
  83. if(sched_ctxs[i] == tmp_sched_ctxs[j])
  84. {
  85. int w;
  86. for(w = 0; w < ntypes_of_workers; w++)
  87. res[i][w] = tmp_res[j][w];
  88. found = 1;
  89. break;
  90. }
  91. }
  92. if(!found)
  93. {
  94. int w;
  95. for(w = 0; w < ntypes_of_workers; w++)
  96. res[i][w] = 0.0;
  97. }
  98. }
  99. double vmax = 0.0;
  100. if(ret != 0.0)
  101. {
  102. /* redo the lp after cleaning out the contexts that got all the max workers required */
  103. unsigned selected_sched_ctxs[STARPU_NMAX_SCHED_CTXS];
  104. double selected_flops[STARPU_NMAX_SCHED_CTXS];
  105. double selected_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
  106. int nselected = 0;
  107. int available_cpus = total_nw[0];
  108. int used_cpus = 0;
  109. for(i = 0; i < nsched_ctxs; i++)
  110. {
  111. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]);
  112. if(res[i][0] < config->max_nworkers && config->max_nworkers != 0 && flops[i] > 0.0001)
  113. {
  114. selected_flops[nselected] = flops[i];
  115. selected_v[nselected][0] = v[i][0];
  116. selected_sched_ctxs[nselected++] = sched_ctxs[i];
  117. }
  118. else
  119. available_cpus -= res[i][0];
  120. used_cpus += res[i][0];
  121. }
  122. if(used_cpus < 0.8 * total_nw[0] && nselected > 1)
  123. {
  124. double old_ret = ret;
  125. if(nselected <= 0 || nselected == nsched_ctxs)
  126. {
  127. nselected = nsched_ctxs;
  128. for(i = 0; i < nsched_ctxs; i++)
  129. {
  130. selected_flops[i] = flops[i];
  131. selected_v[i][0] = v[i][0];
  132. selected_sched_ctxs[i] = sched_ctxs[i];
  133. }
  134. }
  135. else
  136. total_nw[0] = available_cpus;
  137. double selected_res[nselected][ntypes_of_workers];
  138. ret = sc_hypervisor_lp_simulate_distrib_flops(nselected, ntypes_of_workers, selected_v, selected_flops, selected_res, total_nw, selected_sched_ctxs, ret);
  139. if(ret != 0)
  140. {
  141. int j;
  142. for(i = 0; i < nsched_ctxs; i++)
  143. {
  144. for(j = 0; j < nselected; j++)
  145. {
  146. if(sched_ctxs[i] == selected_sched_ctxs[j])
  147. {
  148. res[i][0] = selected_res[j][0];
  149. }
  150. }
  151. }
  152. }
  153. else
  154. ret = old_ret;
  155. }
  156. }
  157. /* keep the first speed */
  158. if(ret != 0.0)
  159. {
  160. vmax = 1 / ret;
  161. double optimal_v = 0.0;
  162. for(i = 0; i < nsched_ctxs; i++)
  163. {
  164. #ifdef STARPU_USE_CUDA
  165. optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1];
  166. #else
  167. optimal_v = res[i][0] * v[i][0];
  168. #endif //STARPU_USE_CUDA
  169. int w;
  170. unsigned no_workers = 1;
  171. for(w = 0; w < nw; w++)
  172. {
  173. if(res[i][w] != 0.0)
  174. {
  175. no_workers = 0;
  176. break;
  177. }
  178. }
  179. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
  180. /* if the hypervisor gave 0 workers to a context but the context still
  181. has some last flops or a ready task that does not even have any flops
  182. we give a worker (in shared mode) to the context in order to leave him
  183. finish its work = we give -1.0 value instead of 0.0 and further on in
  184. the distribution function we take this into account and revert the variable
  185. to its 0.0 value */
  186. // if(no_workers && (flops[i] != 0.0 || sc_w->nready_tasks > 0))
  187. if(no_workers)
  188. {
  189. for(w = 0; w < nw; w++)
  190. res[i][w] = -1.0;
  191. }
  192. if(optimal_v != 0.0)
  193. _set_optimal_v(i, optimal_v);
  194. }
  195. }
  196. return vmax;
  197. #else//STARPU_HAVE_GLPK_H
  198. return 0.0;
  199. #endif//STARPU_HAVE_GLPK_H
  200. }
  201. double sc_hypervisor_lp_get_tmax(int nworkers, int *workers)
  202. {
  203. struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nworkers);
  204. int nw = tw->nw;
  205. int total_nw[nw];
  206. sc_hypervisor_group_workers_by_type(tw, total_nw);
  207. int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
  208. double res[nsched_ctxs][nw];
  209. return sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, res, total_nw, tw) * 1000.0;
  210. }
  211. void sc_hypervisor_lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw])
  212. {
  213. int s, w;
  214. double left_res[nw];
  215. for(w = 0; w < nw; w++)
  216. left_res[nw] = 0.0;
  217. for(s = 0; s < ns; s++)
  218. {
  219. for(w = 0; w < nw; w++)
  220. {
  221. int x = floor(res[s][w]);
  222. double x_double = (double)x;
  223. double diff = res[s][w] - x_double;
  224. if(diff != 0.0)
  225. {
  226. if(diff > 0.5)
  227. {
  228. if(left_res[w] != 0.0)
  229. {
  230. if((diff + left_res[w]) > 0.5)
  231. {
  232. res_rounded[s][w] = x + 1;
  233. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  234. }
  235. else
  236. {
  237. res_rounded[s][w] = x;
  238. left_res[w] = (-1.0) * (diff + left_res[w]);
  239. }
  240. }
  241. else
  242. {
  243. res_rounded[s][w] = x + 1;
  244. left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]);
  245. }
  246. }
  247. else
  248. {
  249. if((diff + left_res[w]) > 0.5)
  250. {
  251. res_rounded[s][w] = x + 1;
  252. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  253. }
  254. else
  255. {
  256. res_rounded[s][w] = x;
  257. left_res[w] = diff;
  258. }
  259. }
  260. }
  261. else
  262. res_rounded[s][w] = x;
  263. }
  264. }
  265. }
  266. void _lp_find_workers_to_give_away(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx,
  267. int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  268. int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
  269. int res_rounded[ns][nw], double res[ns][nw], struct types_of_workers *tw)
  270. {
  271. int w;
  272. double target_res = 0.0;
  273. for(w = 0; w < nw; w++)
  274. {
  275. target_res += res[sched_ctx_idx][w];
  276. if(res[sched_ctx_idx][w] == -1.0) res[sched_ctx_idx][w] = 0.0;
  277. }
  278. for(w = 0; w < nw; w++)
  279. {
  280. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  281. if(arch == STARPU_CPU_WORKER)
  282. {
  283. int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
  284. if(nworkers_ctx > res_rounded[sched_ctx_idx][w])
  285. {
  286. int nworkers_to_move = nworkers_ctx - res_rounded[sched_ctx_idx][w];
  287. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &nworkers_to_move, arch);
  288. int i;
  289. if(target_res < 0.0 && nworkers_to_move > 0)
  290. {
  291. tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[0];
  292. for(i = 1; i < nworkers_to_move; i++)
  293. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  294. }
  295. else
  296. {
  297. for(i = 0; i < nworkers_to_move; i++)
  298. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  299. }
  300. free(workers_to_move);
  301. }
  302. }
  303. else
  304. {
  305. double nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch) * 1.0;
  306. if(nworkers_ctx > res[sched_ctx_idx][w])
  307. {
  308. double nworkers_to_move = nworkers_ctx - res[sched_ctx_idx][w];
  309. int x = floor(nworkers_to_move);
  310. double x_double = (double)x;
  311. double diff = nworkers_to_move - x_double;
  312. if(diff == 0.0)
  313. {
  314. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch);
  315. if(x > 0)
  316. {
  317. int i;
  318. for(i = 0; i < x; i++)
  319. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  320. }
  321. free(workers_to_move);
  322. }
  323. else
  324. {
  325. x+=1;
  326. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch);
  327. if(x > 0)
  328. {
  329. int i;
  330. for(i = 0; i < x-1; i++)
  331. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  332. if(diff > 0.8)
  333. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[x-1];
  334. else
  335. if(diff > 0.3)
  336. tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[x-1];
  337. }
  338. free(workers_to_move);
  339. }
  340. }
  341. }
  342. }
  343. }
  344. void _lp_find_workers_to_accept(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx,
  345. int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  346. int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
  347. int *nw_move, int workers_move[STARPU_NMAXWORKERS],
  348. int *nw_add, int workers_add[STARPU_NMAXWORKERS],
  349. int res_rounded[ns][nw], double res[ns][nw], struct types_of_workers *tw)
  350. {
  351. int w;
  352. int j = 0, k = 0;
  353. for(w = 0; w < nw; w++)
  354. {
  355. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  356. int nw_ctx2 = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
  357. int nw_needed = res_rounded[sched_ctx_idx][w] - nw_ctx2;
  358. if( nw_needed > 0 && tmp_nw_move[w] > 0)
  359. {
  360. *nw_move += nw_needed >= tmp_nw_move[w] ? tmp_nw_move[w] : nw_needed;
  361. int i = 0;
  362. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  363. {
  364. if(tmp_workers_move[w][i] != -1)
  365. {
  366. workers_move[j++] = tmp_workers_move[w][i];
  367. tmp_workers_move[w][i] = -1;
  368. if(j == *nw_move)
  369. break;
  370. }
  371. }
  372. tmp_nw_move[w] -= *nw_move;
  373. }
  374. double needed = res[sched_ctx_idx][w] - (nw_ctx2 * 1.0);
  375. int x = floor(needed);
  376. double x_double = (double)x;
  377. double diff = needed - x_double;
  378. if((diff > 0.3 || needed > 0.3) && tmp_nw_add[w] > 0)
  379. {
  380. *nw_add = tmp_nw_add[w];
  381. int i = 0;
  382. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  383. {
  384. if(tmp_workers_add[w][i] != -1)
  385. {
  386. workers_add[k++] = tmp_workers_add[w][i];
  387. tmp_workers_add[w][i] = -1;
  388. if(k == *nw_add)
  389. break;
  390. }
  391. }
  392. tmp_nw_add[w] -= *nw_add;
  393. }
  394. }
  395. }
  396. void _lp_find_workers_to_remove(int nw, int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  397. int *nw_move, int workers_move[STARPU_NMAXWORKERS])
  398. {
  399. int w;
  400. for(w = 0; w < nw; w++)
  401. {
  402. if(tmp_nw_move[w] > 0)
  403. {
  404. *nw_move += tmp_nw_move[w];
  405. int i = 0, j = 0;
  406. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  407. {
  408. if(tmp_workers_move[w][i] != -1)
  409. {
  410. workers_move[j++] = tmp_workers_move[w][i];
  411. tmp_workers_move[w][i] = -1;
  412. if(j == *nw_move)
  413. break;
  414. }
  415. }
  416. }
  417. }
  418. }
  419. void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], unsigned *sched_ctxs, struct types_of_workers *tw)
  420. {
  421. int s, s2, w;
  422. for(s = 0; s < ns; s++)
  423. {
  424. int tmp_workers_move[nw][STARPU_NMAXWORKERS];
  425. int tmp_nw_move[nw];
  426. int tmp_workers_add[nw][STARPU_NMAXWORKERS];
  427. int tmp_nw_add[nw];
  428. for(w = 0; w < nw; w++)
  429. {
  430. tmp_nw_move[w] = 0;
  431. tmp_nw_add[w] = 0;
  432. int i;
  433. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  434. {
  435. tmp_workers_move[w][i] = -1;
  436. tmp_workers_add[w][i] = -1;
  437. }
  438. }
  439. /* find workers that ctx s has to give away */
  440. _lp_find_workers_to_give_away(nw, ns, sched_ctxs[s], s,
  441. tmp_nw_move, tmp_workers_move,
  442. tmp_nw_add, tmp_workers_add, res_rounded,
  443. res, tw);
  444. for(s2 = 0; s2 < ns; s2++)
  445. {
  446. if(sched_ctxs[s2] != sched_ctxs[s])
  447. {
  448. /* find workers that ctx s2 wants to accept from ctx s
  449. the rest of it will probably accepted by another ctx */
  450. int workers_move[STARPU_NMAXWORKERS];
  451. int nw_move = 0;
  452. int workers_add[STARPU_NMAXWORKERS];
  453. int nw_add = 0;
  454. _lp_find_workers_to_accept(nw, ns, sched_ctxs[s2], s2,
  455. tmp_nw_move, tmp_workers_move,
  456. tmp_nw_add, tmp_workers_add,
  457. &nw_move, workers_move,
  458. &nw_add, workers_add,
  459. res_rounded, res, tw);
  460. if(nw_move > 0)
  461. {
  462. sc_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, !(_sc_hypervisor_use_lazy_resize()));
  463. nw_move = 0;
  464. }
  465. if(nw_add > 0)
  466. {
  467. sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
  468. nw_add = 0;
  469. }
  470. }
  471. }
  472. /* if there are workers that weren't accepted by anyone but ctx s wants
  473. to get rid of them just remove them from ctx s */
  474. int workers_move[STARPU_NMAXWORKERS];
  475. int nw_move = 0;
  476. _lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move,
  477. &nw_move, workers_move);
  478. if(nw_move > 0)
  479. sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
  480. }
  481. }
  482. int _lp_get_unwanted_workers(int *workers_add, int nw_add, unsigned sched_ctx, int *workers_remove)
  483. {
  484. int nw_remove = 0;
  485. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
  486. int worker;
  487. struct starpu_sched_ctx_iterator it;
  488. if(workers->init_iterator)
  489. workers->init_iterator(workers, &it);
  490. while(workers->has_next(workers, &it))
  491. {
  492. worker = workers->get_next(workers, &it);
  493. int i;
  494. unsigned found = 0;
  495. for(i = 0; i < nw_add; i++)
  496. {
  497. if(worker == workers_add[i])
  498. {
  499. found = 1;
  500. break;
  501. }
  502. }
  503. if(!found)
  504. workers_remove[nw_remove++] = worker;
  505. }
  506. return nw_remove;
  507. }
  508. void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, int res_rounded[ns][nw],
  509. double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw)
  510. {
  511. int s, w;
  512. int start[nw];
  513. for(w = 0; w < nw; w++)
  514. start[w] = 0;
  515. for(s = 0; s < ns; s++)
  516. {
  517. int workers_add[STARPU_NMAXWORKERS];
  518. int nw_add = 0;
  519. double target_res = 0.0;
  520. for(w = 0; w < nw; w++)
  521. {
  522. target_res += res[s][w];
  523. if(res[s][w] == -1.0) res[s][w] = 0.0;
  524. }
  525. for(w = 0; w < nw; w++)
  526. {
  527. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  528. if(arch == STARPU_CPU_WORKER)
  529. {
  530. int nworkers_to_add = res_rounded[s][w];
  531. if(target_res < 0.0)
  532. {
  533. nworkers_to_add=1;
  534. int old_start = start[w];
  535. if(start[w] == nworkers)
  536. start[w]--;
  537. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
  538. start[w] = old_start;
  539. int i;
  540. for(i = 0; i < nworkers_to_add; i++)
  541. {
  542. workers_add[nw_add++] = workers_to_add[i];
  543. }
  544. free(workers_to_add);
  545. }
  546. else
  547. {
  548. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
  549. int i;
  550. for(i = 0; i < nworkers_to_add; i++)
  551. workers_add[nw_add++] = workers_to_add[i];
  552. free(workers_to_add);
  553. }
  554. }
  555. else
  556. {
  557. double nworkers_to_add = res[s][w];
  558. int x = floor(nworkers_to_add);
  559. double x_double = (double)x;
  560. double diff = nworkers_to_add - x_double;
  561. if(diff == 0.0)
  562. {
  563. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch);
  564. int i;
  565. for(i = 0; i < x; i++)
  566. workers_add[nw_add++] = workers_to_add[i];
  567. free(workers_to_add);
  568. }
  569. else
  570. {
  571. x+=1;
  572. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch);
  573. int i;
  574. if(diff >= 0.3)
  575. for(i = 0; i < x; i++)
  576. workers_add[nw_add++] = workers_to_add[i];
  577. else
  578. for(i = 0; i < x-1; i++)
  579. workers_add[nw_add++] = workers_to_add[i];
  580. free(workers_to_add);
  581. }
  582. }
  583. }
  584. // sc_hypervisor_start_resize(sched_ctxs[s]);
  585. sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
  586. int workers_remove[STARPU_NMAXWORKERS];
  587. int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove);
  588. sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
  589. }
  590. }
  591. /* nw = all the workers (either in a list or on all machine) */
  592. void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], unsigned *sched_ctxs_input, int *workers_input, unsigned do_size, struct types_of_workers *tw)
  593. {
  594. int w, s;
  595. int ntypes_of_workers = tw->nw;
  596. double nworkers[ns][ntypes_of_workers];
  597. int nworkers_rounded[ns][ntypes_of_workers];
  598. for(s = 0; s < ns; s++)
  599. {
  600. for(w = 0; w < ntypes_of_workers; w++)
  601. {
  602. nworkers[s][w] = 0.0;
  603. nworkers_rounded[s][w] = 0;
  604. }
  605. }
  606. for(s = 0; s < ns; s++)
  607. {
  608. for(w = 0; w < nw; w++)
  609. {
  610. enum starpu_worker_archtype arch = starpu_worker_get_type(w);
  611. int idx = sc_hypervisor_get_index_for_arch(arch, tw);
  612. nworkers[s][idx] += w_in_s[s][w];
  613. if(arch == STARPU_CUDA_WORKER)
  614. {
  615. if(w_in_s[s][w] >= 0.3)
  616. nworkers_rounded[s][idx]++;
  617. }
  618. else
  619. {
  620. if(w_in_s[s][w] > 0.5)
  621. nworkers_rounded[s][idx]++;
  622. }
  623. }
  624. }
  625. if(!do_size)
  626. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_rounded, nworkers, sched_ctxs_input, tw);
  627. else
  628. {
  629. unsigned *current_sched_ctxs = sched_ctxs_input == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs_input;
  630. unsigned has_workers = 0;
  631. for(s = 0; s < ns; s++)
  632. {
  633. int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(current_sched_ctxs[s],
  634. STARPU_ANY_WORKER);
  635. if(nworkers_ctx != 0)
  636. {
  637. has_workers = 1;
  638. break;
  639. }
  640. }
  641. if(has_workers)
  642. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_rounded, nworkers, current_sched_ctxs, tw);
  643. else
  644. sc_hypervisor_lp_distribute_resources_in_ctxs(current_sched_ctxs, ns, ntypes_of_workers, nworkers_rounded, nworkers, workers_input, nw, tw);
  645. }
  646. return;
  647. }
  648. void sc_hypervisor_lp_share_remaining_resources(int ns, unsigned *sched_ctxs, int nworkers, int *workers)
  649. {
  650. int s, w, worker, nw = 0;
  651. int remaining_workers[nworkers];
  652. for(w = 0; w < nworkers; w++)
  653. {
  654. worker = workers == NULL ? w : workers[w];
  655. unsigned found = 0;
  656. for(s = 0; s < ns; s++)
  657. {
  658. if(starpu_sched_ctx_contains_worker(worker, sched_ctxs[s]))
  659. {
  660. found = 1;
  661. break;
  662. }
  663. }
  664. if(!found)
  665. {
  666. remaining_workers[nw++] = worker;
  667. }
  668. }
  669. if(nw > 0)
  670. {
  671. for(s = 0; s < ns; s++)
  672. {
  673. for(w = 0; w < nw; w++)
  674. _sc_hypervisor_allow_compute_idle(sched_ctxs[s], remaining_workers[w], 0);
  675. sc_hypervisor_add_workers_to_sched_ctx(remaining_workers, nw, sched_ctxs[s]);
  676. }
  677. }
  678. }
  679. double sc_hypervisor_lp_find_tmax(double t1, double t2)
  680. {
  681. return t1 + ((t2 - t1)/2);
  682. }