lp_tools.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <math.h>
  17. #include "sc_hypervisor_lp.h"
  18. #include "sc_hypervisor_policy.h"
  19. #include "sc_hypervisor_intern.h"
  20. #include <starpu_config.h>
  21. #ifdef STARPU_HAVE_GLPK_H
  22. #endif //STARPU_HAVE_GLPK_H
  23. double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers],
  24. int total_nw[ntypes_of_workers], struct types_of_workers *tw)
  25. {
  26. unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs();
  27. #ifdef STARPU_HAVE_GLPK_H
  28. double v[nsched_ctxs][ntypes_of_workers];
  29. double flops[nsched_ctxs];
  30. int nw = tw->nw;
  31. int i = 0;
  32. struct sc_hypervisor_wrapper* sc_w;
  33. for(i = 0; i < nsched_ctxs; i++)
  34. {
  35. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
  36. int w;
  37. for(w = 0; w < nw; w++)
  38. v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw));
  39. flops[i] = sc_w->remaining_flops < 0.0 ? 0.0 : sc_w->remaining_flops/1000000000; /* in gflops*/
  40. // printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
  41. }
  42. double vmax = 1/sc_hypervisor_lp_simulate_distrib_flops(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
  43. double optimal_v = 0.0;
  44. for(i = 0; i < nsched_ctxs; i++)
  45. {
  46. #ifdef STARPU_USE_CUDA
  47. optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1];
  48. #else
  49. optimal_v = res[i][0] * v[i][0];
  50. #endif //STARPU_USE_CUDA
  51. // printf("%d: set opt %lf\n", i, optimal_v[i]);
  52. if(optimal_v != 0.0)
  53. _set_optimal_v(i, optimal_v);
  54. }
  55. return vmax;
  56. #else//STARPU_HAVE_GLPK_H
  57. return 0.0;
  58. #endif//STARPU_HAVE_GLPK_H
  59. }
  60. double sc_hypervisor_lp_get_tmax(int nworkers, int *workers)
  61. {
  62. struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nworkers);
  63. int nw = tw->nw;
  64. int total_nw[nw];
  65. sc_hypervisor_group_workers_by_type(tw, total_nw);
  66. int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
  67. double res[nsched_ctxs][nw];
  68. return sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, res, total_nw, tw) * 1000.0;
  69. }
  70. void sc_hypervisor_lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw])
  71. {
  72. int s, w;
  73. double left_res[nw];
  74. for(w = 0; w < nw; w++)
  75. left_res[nw] = 0.0;
  76. for(s = 0; s < ns; s++)
  77. {
  78. for(w = 0; w < nw; w++)
  79. {
  80. int x = floor(res[s][w]);
  81. double x_double = (double)x;
  82. double diff = res[s][w] - x_double;
  83. if(diff != 0.0)
  84. {
  85. if(diff > 0.5)
  86. {
  87. if(left_res[w] != 0.0)
  88. {
  89. if((diff + left_res[w]) > 0.5)
  90. {
  91. res_rounded[s][w] = x + 1;
  92. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  93. }
  94. else
  95. {
  96. res_rounded[s][w] = x;
  97. left_res[w] = (-1.0) * (diff + left_res[w]);
  98. }
  99. }
  100. else
  101. {
  102. res_rounded[s][w] = x + 1;
  103. left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]);
  104. }
  105. }
  106. else
  107. {
  108. if((diff + left_res[w]) > 0.5)
  109. {
  110. res_rounded[s][w] = x + 1;
  111. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  112. }
  113. else
  114. {
  115. res_rounded[s][w] = x;
  116. left_res[w] = diff;
  117. }
  118. }
  119. }
  120. else
  121. res_rounded[s][w] = x;
  122. }
  123. }
  124. }
  125. void _lp_find_workers_to_give_away(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx,
  126. int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  127. int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
  128. int res_rounded[ns][nw], double res[ns][nw], struct types_of_workers *tw)
  129. {
  130. int w;
  131. double target_res = 0.0;
  132. for(w = 0; w < nw; w++)
  133. target_res += res[sched_ctx_idx][w];
  134. for(w = 0; w < nw; w++)
  135. {
  136. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  137. if(arch == STARPU_CPU_WORKER)
  138. {
  139. int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
  140. if(nworkers_ctx > res_rounded[sched_ctx_idx][w])
  141. {
  142. int nworkers_to_move = nworkers_ctx - res_rounded[sched_ctx_idx][w];
  143. if(target_res == 0.0 && nworkers_to_move > 0)
  144. nworkers_to_move--;
  145. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &nworkers_to_move, arch);
  146. int i;
  147. for(i = 0; i < nworkers_to_move; i++)
  148. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  149. free(workers_to_move);
  150. }
  151. }
  152. else
  153. {
  154. double nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch) * 1.0;
  155. if(nworkers_ctx > res[sched_ctx_idx][w])
  156. {
  157. double nworkers_to_move = nworkers_ctx - res[sched_ctx_idx][w];
  158. int x = floor(nworkers_to_move);
  159. double x_double = (double)x;
  160. double diff = nworkers_to_move - x_double;
  161. if(diff == 0.0)
  162. {
  163. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch);
  164. if(x > 0)
  165. {
  166. int i;
  167. for(i = 0; i < x; i++)
  168. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  169. }
  170. free(workers_to_move);
  171. }
  172. else
  173. {
  174. x+=1;
  175. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch);
  176. if(x > 0)
  177. {
  178. int i;
  179. for(i = 0; i < x-1; i++)
  180. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  181. if(diff > 0.8)
  182. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[x-1];
  183. else
  184. if(diff > 0.3)
  185. tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[x-1];
  186. }
  187. free(workers_to_move);
  188. }
  189. }
  190. }
  191. }
  192. }
  193. void _lp_find_workers_to_accept(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx,
  194. int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  195. int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
  196. int *nw_move, int workers_move[STARPU_NMAXWORKERS],
  197. int *nw_add, int workers_add[STARPU_NMAXWORKERS],
  198. int res_rounded[ns][nw], double res[ns][nw], struct types_of_workers *tw)
  199. {
  200. int w;
  201. int j = 0, k = 0;
  202. for(w = 0; w < nw; w++)
  203. {
  204. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  205. int nw_ctx2 = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
  206. int nw_needed = res_rounded[sched_ctx_idx][w] - nw_ctx2;
  207. if( nw_needed > 0 && tmp_nw_move[w] > 0)
  208. {
  209. *nw_move += nw_needed >= tmp_nw_move[w] ? tmp_nw_move[w] : nw_needed;
  210. int i = 0;
  211. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  212. {
  213. if(tmp_workers_move[w][i] != -1)
  214. {
  215. workers_move[j++] = tmp_workers_move[w][i];
  216. tmp_workers_move[w][i] = -1;
  217. if(j == *nw_move)
  218. break;
  219. }
  220. }
  221. tmp_nw_move[w] -= *nw_move;
  222. }
  223. double needed = res[sched_ctx_idx][w] - (nw_ctx2 * 1.0);
  224. int x = floor(needed);
  225. double x_double = (double)x;
  226. double diff = needed - x_double;
  227. if(diff > 0.3 && tmp_nw_add[w] > 0)
  228. {
  229. *nw_add = tmp_nw_add[w];
  230. int i = 0;
  231. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  232. {
  233. if(tmp_workers_add[w][i] != -1)
  234. {
  235. workers_add[k++] = tmp_workers_add[w][i];
  236. tmp_workers_add[w][i] = -1;
  237. if(k == *nw_add)
  238. break;
  239. }
  240. }
  241. tmp_nw_add[w] -= *nw_add;
  242. }
  243. }
  244. }
  245. void _lp_find_workers_to_remove(int nw, int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  246. int *nw_move, int workers_move[STARPU_NMAXWORKERS])
  247. {
  248. int w;
  249. for(w = 0; w < nw; w++)
  250. {
  251. if(tmp_nw_move[w] > 0)
  252. {
  253. *nw_move += tmp_nw_move[w];
  254. int i = 0, j = 0;
  255. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  256. {
  257. if(tmp_workers_move[w][i] != -1)
  258. {
  259. workers_move[j++] = tmp_workers_move[w][i];
  260. tmp_workers_move[w][i] = -1;
  261. if(j == *nw_move)
  262. break;
  263. }
  264. }
  265. }
  266. }
  267. }
  268. void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], unsigned *sched_ctxs, struct types_of_workers *tw)
  269. {
  270. int s, s2, w;
  271. for(s = 0; s < ns; s++)
  272. {
  273. int tmp_workers_move[nw][STARPU_NMAXWORKERS];
  274. int tmp_nw_move[nw];
  275. int tmp_workers_add[nw][STARPU_NMAXWORKERS];
  276. int tmp_nw_add[nw];
  277. for(w = 0; w < nw; w++)
  278. {
  279. tmp_nw_move[w] = 0;
  280. tmp_nw_add[w] = 0;
  281. int i;
  282. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  283. {
  284. tmp_workers_move[w][i] = -1;
  285. tmp_workers_add[w][i] = -1;
  286. }
  287. }
  288. /* find workers that ctx s has to give away */
  289. _lp_find_workers_to_give_away(nw, ns, sched_ctxs[s], s,
  290. tmp_nw_move, tmp_workers_move,
  291. tmp_nw_add, tmp_workers_add, res_rounded, res, tw);
  292. for(s2 = 0; s2 < ns; s2++)
  293. {
  294. if(sched_ctxs[s2] != sched_ctxs[s])
  295. {
  296. /* find workers that ctx s2 wants to accept from ctx s
  297. the rest of it will probably accepted by another ctx */
  298. int workers_move[STARPU_NMAXWORKERS];
  299. int nw_move = 0;
  300. int workers_add[STARPU_NMAXWORKERS];
  301. int nw_add = 0;
  302. _lp_find_workers_to_accept(nw, ns, sched_ctxs[s2], s2,
  303. tmp_nw_move, tmp_workers_move,
  304. tmp_nw_add, tmp_workers_add,
  305. &nw_move, workers_move,
  306. &nw_add, workers_add,
  307. res_rounded, res, tw);
  308. if(nw_move > 0)
  309. {
  310. sc_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0);
  311. nw_move = 0;
  312. }
  313. if(nw_add > 0)
  314. {
  315. sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
  316. nw_add = 0;
  317. }
  318. }
  319. }
  320. /* if there are workers that weren't accepted by anyone but ctx s wants
  321. to get rid of them just remove them from ctx s */
  322. int workers_move[STARPU_NMAXWORKERS];
  323. int nw_move = 0;
  324. _lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move,
  325. &nw_move, workers_move);
  326. if(nw_move > 0)
  327. sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
  328. }
  329. }
  330. void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw)
  331. {
  332. int s, w;
  333. int start[nw];
  334. for(w = 0; w < nw; w++)
  335. start[w] = 0;
  336. for(s = 0; s < ns; s++)
  337. {
  338. int workers_add[STARPU_NMAXWORKERS];
  339. int nw_add = 0;
  340. double target_res = 0.0;
  341. for(w = 0; w < nw; w++)
  342. target_res += res[s][w];
  343. for(w = 0; w < nw; w++)
  344. {
  345. enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw);
  346. if(arch == STARPU_CPU_WORKER)
  347. {
  348. int nworkers_to_add = res_rounded[s][w];
  349. if(target_res == 0.0)
  350. {
  351. nworkers_to_add=1;
  352. start[w]--;
  353. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
  354. int i;
  355. for(i = 0; i < nworkers_to_add; i++)
  356. workers_add[nw_add++] = workers_to_add[i];
  357. free(workers_to_add);
  358. }
  359. else
  360. {
  361. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch);
  362. int i;
  363. for(i = 0; i < nworkers_to_add; i++)
  364. workers_add[nw_add++] = workers_to_add[i];
  365. free(workers_to_add);
  366. }
  367. }
  368. else
  369. {
  370. double nworkers_to_add = res[s][w];
  371. int x = floor(nworkers_to_add);
  372. double x_double = (double)x;
  373. double diff = nworkers_to_add - x_double;
  374. if(diff == 0.0)
  375. {
  376. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch);
  377. int i;
  378. for(i = 0; i < x; i++)
  379. workers_add[nw_add++] = workers_to_add[i];
  380. free(workers_to_add);
  381. }
  382. else
  383. {
  384. x+=1;
  385. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch);
  386. int i;
  387. if(diff >= 0.3)
  388. for(i = 0; i < x; i++)
  389. workers_add[nw_add++] = workers_to_add[i];
  390. else
  391. for(i = 0; i < x-1; i++)
  392. workers_add[nw_add++] = workers_to_add[i];
  393. free(workers_to_add);
  394. }
  395. }
  396. }
  397. if(nw_add > 0)
  398. {
  399. sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
  400. sc_hypervisor_start_resize(sched_ctxs[s]);
  401. }
  402. // sc_hypervisor_stop_resize(current_sched_ctxs[s]);
  403. }
  404. }
  405. /* nw = all the workers (either in a list or on all machine) */
  406. void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], unsigned *sched_ctxs_input, int *workers_input, unsigned do_size, struct types_of_workers *tw)
  407. {
  408. int w, s;
  409. int ntypes_of_workers = tw->nw;
  410. double nworkers[ns][ntypes_of_workers];
  411. int nworkers_rounded[ns][ntypes_of_workers];
  412. for(s = 0; s < ns; s++)
  413. {
  414. for(w = 0; w < ntypes_of_workers; w++)
  415. {
  416. nworkers[s][w] = 0.0;
  417. nworkers_rounded[s][w] = 0;
  418. }
  419. }
  420. for(s = 0; s < ns; s++)
  421. {
  422. for(w = 0; w < nw; w++)
  423. {
  424. enum starpu_worker_archtype arch = starpu_worker_get_type(w);
  425. int idx = sc_hypervisor_get_index_for_arch(arch, tw);
  426. nworkers[s][idx] += w_in_s[s][w];
  427. if(arch == STARPU_CUDA_WORKER)
  428. {
  429. if(w_in_s[s][w] >= 0.3)
  430. nworkers_rounded[s][idx]++;
  431. }
  432. else
  433. {
  434. if(w_in_s[s][w] > 0.5)
  435. nworkers_rounded[s][idx]++;
  436. }
  437. }
  438. }
  439. if(!do_size)
  440. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_rounded, nworkers, sched_ctxs_input, tw);
  441. else
  442. {
  443. unsigned *current_sched_ctxs = sched_ctxs_input == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs_input;
  444. unsigned has_workers = 0;
  445. for(s = 0; s < ns; s++)
  446. {
  447. int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(current_sched_ctxs[s],
  448. STARPU_ANY_WORKER);
  449. if(nworkers_ctx != 0)
  450. {
  451. has_workers = 1;
  452. break;
  453. }
  454. }
  455. if(has_workers)
  456. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_rounded, nworkers, current_sched_ctxs, tw);
  457. else
  458. sc_hypervisor_lp_distribute_resources_in_ctxs(current_sched_ctxs, ns, ntypes_of_workers, nworkers_rounded, nworkers, workers_input, nw, tw);
  459. }
  460. return;
  461. }
  462. double sc_hypervisor_lp_find_tmax(double t1, double t2)
  463. {
  464. return t1 + ((t2 - t1)/2);
  465. }