lp_tools.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <math.h>
  17. #include "sc_hypervisor_lp.h"
  18. #include "sc_hypervisor_policy.h"
  19. #include "sc_hypervisor_intern.h"
  20. #include <starpu_config.h>
  21. double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers],
  22. int total_nw[ntypes_of_workers], struct types_of_workers *tw, unsigned *in_sched_ctxs)
  23. {
  24. unsigned *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs;
  25. #ifdef STARPU_HAVE_GLPK_H
  26. double v[nsched_ctxs][ntypes_of_workers];
  27. double flops[nsched_ctxs];
  28. /* unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels(); */
  29. /* if(nhierarchy_levels <= 1) */
  30. sc_hypervisor_update_resize_interval(sched_ctxs, nsched_ctxs, total_nw[0]);
  31. int nw = tw->nw;
  32. int i = 0;
  33. struct sc_hypervisor_wrapper* sc_w;
  34. for(i = 0; i < nsched_ctxs; i++)
  35. {
  36. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
  37. int w;
  38. for(w = 0; w < nw; w++)
  39. v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw));
  40. double ready_flops = starpu_sched_ctx_get_nready_flops(sc_w->sched_ctx);
  41. unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
  42. if(nhierarchy_levels > 1)
  43. ready_flops = sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(sc_w->sched_ctx);
  44. int nready_tasks = starpu_sched_ctx_get_nready_tasks(sc_w->sched_ctx);
  45. if(sc_w->to_be_sized)
  46. {
  47. flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
  48. sc_w->to_be_sized = 0;
  49. }
  50. else
  51. {
  52. if(nhierarchy_levels > 1)
  53. flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
  54. else
  55. if(sc_w->remaining_flops < 0.0)
  56. flops[i] = ready_flops/1000000000.0; /* in gflops*/
  57. else
  58. {
  59. if((ready_flops/1000000000.0) <= 0.000002)
  60. flops[i] = 0.0;
  61. else
  62. flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/
  63. }
  64. }
  65. if(flops[i] < 0.0)
  66. flops[i] = 0.0;
  67. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  68. printf("%u: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n",
  69. sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, ready_flops/1000000000, nready_tasks);
  70. #endif
  71. }
  72. sc_hypervisor_check_if_consider_max(tw);
  73. int w;
  74. for(w = 0; w < nw; w++)
  75. {
  76. double avg_speed = sc_hypervisor_get_avg_speed(sc_hypervisor_get_arch_for_index(w, tw));
  77. if(avg_speed != -1.0)
  78. {
  79. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  80. printf("avg_speed for cpus is %lf \n", avg_speed);
  81. #endif
  82. unsigned consider_max_for_all = 0;
  83. for(i = 0; i < nsched_ctxs; i++)
  84. {
  85. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
  86. if(!sc_w->consider_max)
  87. {
  88. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  89. printf("ctx %u: current speed is %lf and compare speed is min %lf max %lf\n", sched_ctxs[i], v[i][w], (0.1*avg_speed), (2*avg_speed));
  90. #endif
  91. if(v[i][w] < 0.1*avg_speed || v[i][w] > 2*avg_speed)
  92. {
  93. sc_w->consider_max = 1;
  94. consider_max_for_all = 1;
  95. }
  96. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  97. printf("ctx %u consider max %d \n", sched_ctxs[i], sc_w->consider_max);
  98. #endif
  99. }
  100. }
  101. if(consider_max_for_all)
  102. {
  103. for(i = 0; i < nsched_ctxs; i++)
  104. {
  105. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
  106. sc_w->consider_max = 1;
  107. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  108. printf("ctx %u consider max %d anyway \n", sched_ctxs[i], sc_w->consider_max);
  109. #endif
  110. }
  111. }
  112. }
  113. }
  114. if(nsched_ctxs == 1)
  115. {
  116. int w;
  117. for(w = 0; w < nw; w++)
  118. res[0][w] = total_nw[w];
  119. double optimal_v = 0.0;
  120. #ifdef STARPU_USE_CUDA
  121. optimal_v = res[0][0] * v[0][0] + res[0][1]* v[0][1];
  122. #else
  123. optimal_v = res[0][0] * v[0][0];
  124. #endif //STARPU_USE_CUDA
  125. _set_optimal_v(sched_ctxs[0], optimal_v);
  126. return 1.0;
  127. }
  128. unsigned tmp_sched_ctxs[STARPU_NMAX_SCHED_CTXS];
  129. double tmp_flops[STARPU_NMAX_SCHED_CTXS];
  130. double tmp_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
  131. double tmp_res[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
  132. int tmp_nsched_ctxs = 0;
  133. for(i = 0; i < nsched_ctxs; i++)
  134. {
  135. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]);
  136. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
  137. if(config->max_nworkers != 0 || !sc_w->consider_max)
  138. {
  139. tmp_sched_ctxs[tmp_nsched_ctxs] = sched_ctxs[i];
  140. tmp_flops[tmp_nsched_ctxs] = flops[i];
  141. int w;
  142. for(w = 0; w < ntypes_of_workers; w++)
  143. tmp_v[tmp_nsched_ctxs][w] = v[i][w];
  144. tmp_nsched_ctxs++;
  145. }
  146. }
  147. if(tmp_nsched_ctxs == 0)
  148. return -1.0;
  149. double ret = sc_hypervisor_lp_simulate_distrib_flops(tmp_nsched_ctxs, ntypes_of_workers, tmp_v, tmp_flops, tmp_res, total_nw, tmp_sched_ctxs, -1.0);
  150. int j;
  151. for(i = 0; i < nsched_ctxs; i++)
  152. {
  153. unsigned found = 0;
  154. for(j = 0; j < tmp_nsched_ctxs; j++)
  155. {
  156. if(sched_ctxs[i] == tmp_sched_ctxs[j])
  157. {
  158. int w;
  159. for(w = 0; w < ntypes_of_workers; w++)
  160. res[i][w] = tmp_res[j][w];
  161. found = 1;
  162. break;
  163. }
  164. }
  165. if(!found)
  166. {
  167. int w;
  168. for(w = 0; w < ntypes_of_workers; w++)
  169. res[i][w] = 0.0;
  170. }
  171. }
  172. double vmax = 0.0;
  173. if(ret != 0.0)
  174. {
  175. /* redo the lp after cleaning out the contexts that got all the max workers required */
  176. unsigned selected_sched_ctxs[STARPU_NMAX_SCHED_CTXS];
  177. double selected_flops[STARPU_NMAX_SCHED_CTXS];
  178. double selected_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers];
  179. int nselected = 0;
  180. int available_cpus = total_nw[0];
  181. int used_cpus = 0;
  182. for(i = 0; i < nsched_ctxs; i++)
  183. {
  184. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]);
  185. if(res[i][0] < config->max_nworkers && config->max_nworkers != 0 && flops[i] > 0.0001)
  186. {
  187. selected_flops[nselected] = flops[i];
  188. selected_v[nselected][0] = v[i][0];
  189. selected_sched_ctxs[nselected++] = sched_ctxs[i];
  190. }
  191. else
  192. available_cpus -= res[i][0];
  193. used_cpus += res[i][0];
  194. }
  195. if(used_cpus < 0.8 * total_nw[0] && nselected > 1)
  196. {
  197. double old_ret = ret;
  198. if(nselected <= 0 || nselected == nsched_ctxs)
  199. {
  200. nselected = nsched_ctxs;
  201. for(i = 0; i < nsched_ctxs; i++)
  202. {
  203. selected_flops[i] = flops[i];
  204. selected_v[i][0] = v[i][0];
  205. selected_sched_ctxs[i] = sched_ctxs[i];
  206. }
  207. }
  208. else
  209. total_nw[0] = available_cpus;
  210. double selected_res[nselected][ntypes_of_workers];
  211. ret = sc_hypervisor_lp_simulate_distrib_flops(nselected, ntypes_of_workers, selected_v, selected_flops, selected_res, total_nw, selected_sched_ctxs, ret);
  212. if(ret != 0)
  213. {
  214. int j;
  215. for(i = 0; i < nsched_ctxs; i++)
  216. {
  217. for(j = 0; j < nselected; j++)
  218. {
  219. if(sched_ctxs[i] == selected_sched_ctxs[j])
  220. {
  221. res[i][0] = selected_res[j][0];
  222. }
  223. }
  224. }
  225. }
  226. else
  227. ret = old_ret;
  228. }
  229. }
  230. /* if the lp could not give any workers to any context
  231. just split the workers btw the contexts */
  232. if(ret == 0.0)
  233. {
  234. double rand_res[nw];
  235. int w;
  236. for(w = 0; w < nw; w++)
  237. rand_res[w] = total_nw[w]/nsched_ctxs;
  238. int s;
  239. for(s = 0; s < nsched_ctxs; s++)
  240. for(w = 0; w < nw; w++)
  241. res[s][w] = rand_res[w];
  242. }
  243. else
  244. /* keep the first speed */
  245. // if(ret != 0.0)
  246. {
  247. vmax = 1 / ret;
  248. }
  249. double optimal_v = 0.0;
  250. for(i = 0; i < nsched_ctxs; i++)
  251. {
  252. #ifdef STARPU_USE_CUDA
  253. optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1];
  254. #else
  255. optimal_v = res[i][0] * v[i][0];
  256. #endif //STARPU_USE_CUDA
  257. int w;
  258. unsigned no_workers = 1;
  259. for(w = 0; w < nw; w++)
  260. {
  261. if(res[i][w] != 0.0)
  262. {
  263. no_workers = 0;
  264. break;
  265. }
  266. }
  267. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
  268. /* if the hypervisor gave 0 workers to a context but the context still
  269. has some last flops or a ready task that does not even have any flops
  270. we give a worker (in shared mode) to the context in order to leave him
  271. finish its work = we give -1.0 value instead of 0.0 and further on in
  272. the distribution function we take this into account and revert the variable
  273. to its 0.0 value */
  274. // if(no_workers && (flops[i] != 0.0 || sc_w->nready_tasks > 0))
  275. if(no_workers)
  276. {
  277. for(w = 0; w < nw; w++)
  278. res[i][w] = -1.0;
  279. }
  280. // if(optimal_v != 0.0)
  281. _set_optimal_v(sched_ctxs[i], optimal_v);
  282. }
  283. return vmax;
  284. #else//STARPU_HAVE_GLPK_H
  285. return 0.0;
  286. #endif//STARPU_HAVE_GLPK_H
  287. }
  288. double sc_hypervisor_lp_get_tmax(int nworkers, int *workers)
  289. {
  290. struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nworkers);
  291. int nw = tw->nw;
  292. int total_nw[nw];
  293. sc_hypervisor_group_workers_by_type(tw, total_nw);
  294. int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
  295. double res[nsched_ctxs][nw];
  296. double ret = sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, res, total_nw, tw, NULL) * 1000.0;
  297. free(tw);
  298. return ret;
  299. }
  300. void sc_hypervisor_lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw])
  301. {
  302. int s, w;
  303. double left_res[nw];
  304. for(w = 0; w < nw; w++)
  305. left_res[w] = 0.0;
  306. for(s = 0; s < ns; s++)
  307. {
  308. for(w = 0; w < nw; w++)
  309. {
  310. int x = floor(res[s][w]);
  311. double x_double = (double)x;
  312. double diff = res[s][w] - x_double;
  313. if(diff != 0.0)
  314. {
  315. if(diff > 0.5)
  316. {
  317. if(left_res[w] != 0.0)
  318. {
  319. if((diff + left_res[w]) > 0.5)
  320. {
  321. res_rounded[s][w] = x + 1;
  322. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  323. }
  324. else
  325. {
  326. res_rounded[s][w] = x;
  327. left_res[w] = (-1.0) * (diff + left_res[w]);
  328. }
  329. }
  330. else
  331. {
  332. res_rounded[s][w] = x + 1;
  333. left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]);
  334. }
  335. }
  336. else
  337. {
  338. if((diff + left_res[w]) > 0.5)
  339. {
  340. res_rounded[s][w] = x + 1;
  341. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  342. }
  343. else
  344. {
  345. res_rounded[s][w] = x;
  346. left_res[w] = diff;
  347. }
  348. }
  349. }
  350. else
  351. res_rounded[s][w] = x;
  352. }
  353. }
  354. }
  355. void _lp_find_workers_to_give_away(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx,
  356. int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  357. int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
  358. int res_rounded[ns][nw], double res[ns][nw], struct types_of_workers *tw)
  359. {
  360. int w;
  361. double target_res = 0.0;
  362. for(w = 0; w < nw; w++)
  363. {
  364. target_res += res[sched_ctx_idx][w];
  365. if(res[sched_ctx_idx][w] == -1.0) res[sched_ctx_idx][w] = 0.0;
  366. }
  367. for(w = 0; w < nw; w++)
  368. {
  369. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  370. if(arch == STARPU_CPU_WORKER)
  371. {
  372. int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
  373. if(nworkers_ctx > res_rounded[sched_ctx_idx][w])
  374. {
  375. int nworkers_to_move = nworkers_ctx - res_rounded[sched_ctx_idx][w];
  376. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &nworkers_to_move, arch);
  377. int i;
  378. if(target_res < 0.0 && nworkers_to_move > 0)
  379. {
  380. tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[0];
  381. for(i = 1; i < nworkers_to_move; i++)
  382. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  383. }
  384. else
  385. {
  386. for(i = 0; i < nworkers_to_move; i++)
  387. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  388. }
  389. free(workers_to_move);
  390. }
  391. }
  392. else
  393. {
  394. double nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch) * 1.0;
  395. if(nworkers_ctx > res[sched_ctx_idx][w])
  396. {
  397. double nworkers_to_move = nworkers_ctx - res[sched_ctx_idx][w];
  398. int x = floor(nworkers_to_move);
  399. double x_double = (double)x;
  400. double diff = nworkers_to_move - x_double;
  401. if(diff == 0.0)
  402. {
  403. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch);
  404. if(x > 0)
  405. {
  406. int i;
  407. for(i = 0; i < x; i++)
  408. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  409. }
  410. free(workers_to_move);
  411. }
  412. else
  413. {
  414. x+=1;
  415. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch);
  416. if(x > 0)
  417. {
  418. int i;
  419. for(i = 0; i < x-1; i++)
  420. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  421. if(diff > 0.8)
  422. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[x-1];
  423. else
  424. if(diff > 0.3)
  425. tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[x-1];
  426. }
  427. free(workers_to_move);
  428. }
  429. }
  430. }
  431. }
  432. }
  433. void _lp_find_workers_to_accept(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx,
  434. int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  435. int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
  436. int *nw_move, int workers_move[STARPU_NMAXWORKERS],
  437. int *nw_add, int workers_add[STARPU_NMAXWORKERS],
  438. int res_rounded[ns][nw], double res[ns][nw], struct types_of_workers *tw)
  439. {
  440. int w;
  441. int j = 0, k = 0;
  442. for(w = 0; w < nw; w++)
  443. {
  444. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  445. int nw_ctx2 = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
  446. int nw_needed = res_rounded[sched_ctx_idx][w] - nw_ctx2;
  447. if( nw_needed > 0 && tmp_nw_move[w] > 0)
  448. {
  449. *nw_move += nw_needed >= tmp_nw_move[w] ? tmp_nw_move[w] : nw_needed;
  450. int i = 0;
  451. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  452. {
  453. if(tmp_workers_move[w][i] != -1)
  454. {
  455. workers_move[j++] = tmp_workers_move[w][i];
  456. tmp_workers_move[w][i] = -1;
  457. if(j == *nw_move)
  458. break;
  459. }
  460. }
  461. tmp_nw_move[w] -= *nw_move;
  462. }
  463. double needed = res[sched_ctx_idx][w] - (nw_ctx2 * 1.0);
  464. int x = floor(needed);
  465. double x_double = (double)x;
  466. double diff = needed - x_double;
  467. if((diff > 0.3 || needed > 0.3) && tmp_nw_add[w] > 0)
  468. {
  469. *nw_add = tmp_nw_add[w];
  470. int i = 0;
  471. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  472. {
  473. if(tmp_workers_add[w][i] != -1)
  474. {
  475. workers_add[k++] = tmp_workers_add[w][i];
  476. tmp_workers_add[w][i] = -1;
  477. if(k == *nw_add)
  478. break;
  479. }
  480. }
  481. tmp_nw_add[w] -= *nw_add;
  482. }
  483. }
  484. }
  485. void _lp_find_workers_to_remove(int nw, int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  486. int *nw_move, int workers_move[STARPU_NMAXWORKERS])
  487. {
  488. int w;
  489. for(w = 0; w < nw; w++)
  490. {
  491. if(tmp_nw_move[w] > 0)
  492. {
  493. *nw_move += tmp_nw_move[w];
  494. int i = 0, j = 0;
  495. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  496. {
  497. if(tmp_workers_move[w][i] != -1)
  498. {
  499. workers_move[j++] = tmp_workers_move[w][i];
  500. tmp_workers_move[w][i] = -1;
  501. if(j == *nw_move)
  502. break;
  503. }
  504. }
  505. }
  506. }
  507. }
  508. void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], unsigned *sched_ctxs, struct types_of_workers *tw)
  509. {
  510. int s, s2, w;
  511. for(s = 0; s < ns; s++)
  512. {
  513. int tmp_workers_move[nw][STARPU_NMAXWORKERS];
  514. int tmp_nw_move[nw];
  515. int tmp_workers_add[nw][STARPU_NMAXWORKERS];
  516. int tmp_nw_add[nw];
  517. for(w = 0; w < nw; w++)
  518. {
  519. tmp_nw_move[w] = 0;
  520. tmp_nw_add[w] = 0;
  521. int i;
  522. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  523. {
  524. tmp_workers_move[w][i] = -1;
  525. tmp_workers_add[w][i] = -1;
  526. }
  527. }
  528. /* find workers that ctx s has to give away */
  529. _lp_find_workers_to_give_away(nw, ns, sched_ctxs[s], s,
  530. tmp_nw_move, tmp_workers_move,
  531. tmp_nw_add, tmp_workers_add, res_rounded,
  532. res, tw);
  533. for(s2 = 0; s2 < ns; s2++)
  534. {
  535. if(sched_ctxs[s2] != sched_ctxs[s])
  536. {
  537. /* find workers that ctx s2 wants to accept from ctx s
  538. the rest of it will probably accepted by another ctx */
  539. int workers_move[STARPU_NMAXWORKERS];
  540. int nw_move = 0;
  541. int workers_add[STARPU_NMAXWORKERS];
  542. int nw_add = 0;
  543. _lp_find_workers_to_accept(nw, ns, sched_ctxs[s2], s2,
  544. tmp_nw_move, tmp_workers_move,
  545. tmp_nw_add, tmp_workers_add,
  546. &nw_move, workers_move,
  547. &nw_add, workers_add,
  548. res_rounded, res, tw);
  549. if(nw_move > 0)
  550. {
  551. sc_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, !(_sc_hypervisor_use_lazy_resize()));
  552. nw_move = 0;
  553. }
  554. if(nw_add > 0)
  555. {
  556. sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
  557. nw_add = 0;
  558. }
  559. }
  560. }
  561. /* if there are workers that weren't accepted by anyone but ctx s wants
  562. to get rid of them just remove them from ctx s */
  563. int workers_move[STARPU_NMAXWORKERS];
  564. int nw_move = 0;
  565. _lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move,
  566. &nw_move, workers_move);
  567. if(nw_move > 0)
  568. sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
  569. }
  570. }
  571. int _lp_get_unwanted_workers(int *workers_add, int nw_add, unsigned sched_ctx, int *workers_remove)
  572. {
  573. int nw_remove = 0;
  574. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
  575. int worker;
  576. struct starpu_sched_ctx_iterator it;
  577. workers->init_iterator(workers, &it);
  578. while(workers->has_next(workers, &it))
  579. {
  580. worker = workers->get_next(workers, &it);
  581. int i;
  582. unsigned found = 0;
  583. for(i = 0; i < nw_add; i++)
  584. {
  585. if(worker == workers_add[i])
  586. {
  587. found = 1;
  588. break;
  589. }
  590. }
  591. if(!found)
  592. workers_remove[nw_remove++] = worker;
  593. }
  594. return nw_remove;
  595. }
  596. void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, int res_rounded[ns][nw],
  597. double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw)
  598. {
  599. int s, w;
  600. int start[nw];
  601. for(w = 0; w < nw; w++)
  602. start[w] = 0;
  603. for(s = 0; s < ns; s++)
  604. {
  605. int workers_add[STARPU_NMAXWORKERS];
  606. int nw_add = 0;
  607. double target_res = 0.0;
  608. for(w = 0; w < nw; w++)
  609. {
  610. target_res += res[s][w];
  611. if(res[s][w] == -1.0) res[s][w] = 0.0;
  612. }
  613. for(w = 0; w < nw; w++)
  614. {
  615. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  616. if(arch == STARPU_CPU_WORKER)
  617. {
  618. int nworkers_to_add = res_rounded[s][w];
  619. if(target_res < 0.0)
  620. {
  621. nworkers_to_add=1;
  622. int old_start = start[w];
  623. if(start[w] != 0)
  624. start[w]--;
  625. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
  626. start[w] = old_start;
  627. int i;
  628. for(i = 0; i < nworkers_to_add; i++)
  629. {
  630. workers_add[nw_add++] = workers_to_add[i];
  631. }
  632. free(workers_to_add);
  633. }
  634. else
  635. {
  636. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
  637. int i;
  638. for(i = 0; i < nworkers_to_add; i++)
  639. workers_add[nw_add++] = workers_to_add[i];
  640. free(workers_to_add);
  641. }
  642. }
  643. else
  644. {
  645. double nworkers_to_add = res[s][w];
  646. int x = floor(nworkers_to_add);
  647. double x_double = (double)x;
  648. double diff = nworkers_to_add - x_double;
  649. if(diff == 0.0)
  650. {
  651. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch);
  652. int i;
  653. for(i = 0; i < x; i++)
  654. workers_add[nw_add++] = workers_to_add[i];
  655. free(workers_to_add);
  656. }
  657. else
  658. {
  659. x+=1;
  660. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch);
  661. int i;
  662. if(diff >= 0.3)
  663. for(i = 0; i < x; i++)
  664. workers_add[nw_add++] = workers_to_add[i];
  665. else
  666. for(i = 0; i < x-1; i++)
  667. workers_add[nw_add++] = workers_to_add[i];
  668. free(workers_to_add);
  669. }
  670. }
  671. }
  672. // sc_hypervisor_start_resize(sched_ctxs[s]);
  673. sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
  674. int workers_remove[STARPU_NMAXWORKERS];
  675. int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove);
  676. sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
  677. }
  678. }
  679. void sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, double res[ns][nw],
  680. int *workers, int nworkers, struct types_of_workers *tw)
  681. {
  682. int s, w;
  683. int start[nw];
  684. for(w = 0; w < nw; w++)
  685. start[w] = 0;
  686. for(s = 0; s < ns; s++)
  687. {
  688. int workers_add[STARPU_NMAXWORKERS];
  689. int nw_add = 0;
  690. double target_res = 0.0;
  691. for(w = 0; w < nw; w++)
  692. {
  693. target_res += res[s][w];
  694. if(res[s][w] == -1.0) res[s][w] = 0.0;
  695. }
  696. for(w = 0; w < nw; w++)
  697. {
  698. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  699. if(arch == STARPU_CPU_WORKER)
  700. {
  701. int nworkers_to_add = ceil(res[s][w]);
  702. double ceil_double = (double)nworkers_to_add;
  703. double diff = ceil_double - res[s][w];
  704. if(target_res < 0.0)
  705. {
  706. nworkers_to_add=1;
  707. int old_start = start[w];
  708. if(start[w] != 0)
  709. start[w]--;
  710. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
  711. start[w] = old_start;
  712. int i;
  713. for(i = 0; i < nworkers_to_add; i++)
  714. {
  715. workers_add[nw_add++] = workers_to_add[i];
  716. }
  717. free(workers_to_add);
  718. }
  719. else
  720. {
  721. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
  722. int i;
  723. for(i = 0; i < nworkers_to_add; i++)
  724. workers_add[nw_add++] = workers_to_add[i];
  725. free(workers_to_add);
  726. }
  727. if(diff != 0.0)
  728. start[w]--;
  729. }
  730. else
  731. {
  732. double nworkers_to_add = res[s][w];
  733. int x = floor(nworkers_to_add);
  734. double x_double = (double)x;
  735. double diff = nworkers_to_add - x_double;
  736. if(diff == 0.0)
  737. {
  738. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch);
  739. int i;
  740. for(i = 0; i < x; i++)
  741. workers_add[nw_add++] = workers_to_add[i];
  742. free(workers_to_add);
  743. }
  744. else
  745. {
  746. x+=1;
  747. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch);
  748. int i;
  749. if(diff >= 0.3)
  750. for(i = 0; i < x; i++)
  751. workers_add[nw_add++] = workers_to_add[i];
  752. else
  753. for(i = 0; i < x-1; i++)
  754. workers_add[nw_add++] = workers_to_add[i];
  755. free(workers_to_add);
  756. }
  757. }
  758. }
  759. // sc_hypervisor_start_resize(sched_ctxs[s]);
  760. sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
  761. int workers_remove[STARPU_NMAXWORKERS];
  762. int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove);
  763. sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
  764. }
  765. }
  766. /* nw = all the workers (either in a list or on all machine) */
  767. void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], unsigned *sched_ctxs_input, int *workers_input, unsigned do_size, struct types_of_workers *tw)
  768. {
  769. int w, s;
  770. int ntypes_of_workers = tw->nw;
  771. double nworkers[ns][ntypes_of_workers];
  772. int nworkers_rounded[ns][ntypes_of_workers];
  773. for(s = 0; s < ns; s++)
  774. {
  775. for(w = 0; w < ntypes_of_workers; w++)
  776. {
  777. nworkers[s][w] = 0.0;
  778. nworkers_rounded[s][w] = 0;
  779. }
  780. }
  781. for(s = 0; s < ns; s++)
  782. {
  783. for(w = 0; w < nw; w++)
  784. {
  785. enum starpu_worker_archtype arch = starpu_worker_get_type(w);
  786. int idx = sc_hypervisor_get_index_for_arch(arch, tw);
  787. nworkers[s][idx] += w_in_s[s][w];
  788. if(arch == STARPU_CUDA_WORKER)
  789. {
  790. if(w_in_s[s][w] >= 0.3)
  791. nworkers_rounded[s][idx]++;
  792. }
  793. else
  794. {
  795. if(w_in_s[s][w] > 0.5)
  796. nworkers_rounded[s][idx]++;
  797. }
  798. }
  799. }
  800. if(!do_size)
  801. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_rounded, nworkers, sched_ctxs_input, tw);
  802. else
  803. {
  804. unsigned *current_sched_ctxs = sched_ctxs_input == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs_input;
  805. unsigned has_workers = 0;
  806. for(s = 0; s < ns; s++)
  807. {
  808. int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(current_sched_ctxs[s],
  809. STARPU_ANY_WORKER);
  810. if(nworkers_ctx != 0)
  811. {
  812. has_workers = 1;
  813. break;
  814. }
  815. }
  816. if(has_workers)
  817. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_rounded, nworkers, current_sched_ctxs, tw);
  818. else
  819. sc_hypervisor_lp_distribute_resources_in_ctxs(current_sched_ctxs, ns, ntypes_of_workers, nworkers_rounded, nworkers, workers_input, nw, tw);
  820. }
  821. return;
  822. }
  823. void sc_hypervisor_lp_share_remaining_resources(int ns, unsigned *sched_ctxs, int nworkers, int *workers)
  824. {
  825. int s, w, worker, nw = 0;
  826. int remaining_workers[nworkers];
  827. for(w = 0; w < nworkers; w++)
  828. {
  829. worker = workers == NULL ? w : workers[w];
  830. unsigned found = 0;
  831. for(s = 0; s < ns; s++)
  832. {
  833. if(starpu_sched_ctx_contains_worker(worker, sched_ctxs[s]))
  834. {
  835. found = 1;
  836. break;
  837. }
  838. }
  839. if(!found)
  840. {
  841. remaining_workers[nw++] = worker;
  842. }
  843. }
  844. if(nw > 0)
  845. {
  846. for(s = 0; s < ns; s++)
  847. {
  848. for(w = 0; w < nw; w++)
  849. _sc_hypervisor_allow_compute_idle(sched_ctxs[s], remaining_workers[w], 0);
  850. sc_hypervisor_add_workers_to_sched_ctx(remaining_workers, nw, sched_ctxs[s]);
  851. }
  852. }
  853. }
  854. double sc_hypervisor_lp_find_tmax(double t1, double t2)
  855. {
  856. return t1 + ((t2 - t1)/2);
  857. }