lp_tools.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <math.h>
  17. #include "sc_hypervisor_lp.h"
  18. #include "sc_hypervisor_policy.h"
  19. #include <starpu_config.h>
  20. #ifdef STARPU_HAVE_GLPK_H
  21. #endif //STARPU_HAVE_GLPK_H
  22. double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers])
  23. {
  24. int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
  25. #ifdef STARPU_HAVE_GLPK_H
  26. double v[nsched_ctxs][ntypes_of_workers];
  27. double flops[nsched_ctxs];
  28. int i = 0;
  29. struct sc_hypervisor_wrapper* sc_w;
  30. for(i = 0; i < nsched_ctxs; i++)
  31. {
  32. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
  33. #ifdef STARPU_USE_CUDA
  34. int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
  35. if(ncuda != 0)
  36. {
  37. v[i][0] = sc_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
  38. v[i][1] = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
  39. }
  40. else
  41. v[i][0] = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
  42. #else
  43. v[i][0] = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
  44. #endif // STARPU_USE_CUDA
  45. flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
  46. // printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
  47. }
  48. return 1/sc_hypervisor_lp_simulate_distrib_flops(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
  49. #else//STARPU_HAVE_GLPK_H
  50. return 0.0;
  51. #endif//STARPU_HAVE_GLPK_H
  52. }
  53. double sc_hypervisor_lp_get_tmax(int nw, int *workers)
  54. {
  55. int ntypes_of_workers = 2;
  56. int total_nw[ntypes_of_workers];
  57. sc_hypervisor_group_workers_by_type(workers, nw, 2, total_nw);
  58. int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
  59. double res[nsched_ctxs][ntypes_of_workers];
  60. return sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, res, total_nw) * 1000;
  61. }
  62. void sc_hypervisor_lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw])
  63. {
  64. int s, w;
  65. double left_res[nw];
  66. for(w = 0; w < nw; w++)
  67. left_res[nw] = 0.0;
  68. for(s = 0; s < ns; s++)
  69. {
  70. for(w = 0; w < nw; w++)
  71. {
  72. int x = floor(res[s][w]);
  73. double x_double = (double)x;
  74. double diff = res[s][w] - x_double;
  75. if(diff != 0.0)
  76. {
  77. if(diff > 0.5)
  78. {
  79. if(left_res[w] != 0.0)
  80. {
  81. if((diff + left_res[w]) > 0.5)
  82. {
  83. res_rounded[s][w] = x + 1;
  84. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  85. }
  86. else
  87. {
  88. res_rounded[s][w] = x;
  89. left_res[w] = (-1.0) * (diff + left_res[w]);
  90. }
  91. }
  92. else
  93. {
  94. res_rounded[s][w] = x + 1;
  95. left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]);
  96. }
  97. }
  98. else
  99. {
  100. if((diff + left_res[w]) > 0.5)
  101. {
  102. res_rounded[s][w] = x + 1;
  103. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  104. }
  105. else
  106. {
  107. res_rounded[s][w] = x;
  108. left_res[w] = diff;
  109. }
  110. }
  111. }
  112. else
  113. res_rounded[s][w] = x;
  114. }
  115. }
  116. }
  117. void _lp_find_workers_to_give_away(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx,
  118. int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  119. int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
  120. int res_rounded[ns][nw], double res[ns][nw])
  121. {
  122. int w;
  123. for(w = 0; w < nw; w++)
  124. {
  125. enum starpu_archtype arch = STARPU_ANY_WORKER;
  126. if(w == 0) arch = STARPU_CUDA_WORKER;
  127. if(w == 1) arch = STARPU_CPU_WORKER;
  128. if(w == 1)
  129. {
  130. int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
  131. if(nworkers_ctx > res_rounded[sched_ctx_idx][w])
  132. {
  133. int nworkers_to_move = nworkers_ctx - res_rounded[sched_ctx_idx][w];
  134. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &nworkers_to_move, arch);
  135. int i;
  136. for(i = 0; i < nworkers_to_move; i++)
  137. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  138. free(workers_to_move);
  139. }
  140. }
  141. else
  142. {
  143. double nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch) * 1.0;
  144. if(nworkers_ctx > res[sched_ctx_idx][w])
  145. {
  146. double nworkers_to_move = nworkers_ctx - res[sched_ctx_idx][w];
  147. int x = floor(nworkers_to_move);
  148. double x_double = (double)x;
  149. double diff = nworkers_to_move - x_double;
  150. if(diff == 0.0)
  151. {
  152. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch);
  153. if(x > 0)
  154. {
  155. int i;
  156. for(i = 0; i < x; i++)
  157. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  158. }
  159. free(workers_to_move);
  160. }
  161. else
  162. {
  163. x+=1;
  164. int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch);
  165. if(x > 0)
  166. {
  167. int i;
  168. for(i = 0; i < x-1; i++)
  169. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  170. if(diff > 0.8)
  171. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[x-1];
  172. else
  173. if(diff > 0.3)
  174. tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[x-1];
  175. }
  176. free(workers_to_move);
  177. }
  178. }
  179. }
  180. }
  181. }
  182. void _lp_find_workers_to_accept(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx,
  183. int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  184. int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
  185. int *nw_move, int workers_move[STARPU_NMAXWORKERS],
  186. int *nw_add, int workers_add[STARPU_NMAXWORKERS],
  187. int res_rounded[ns][nw], double res[ns][nw])
  188. {
  189. int w;
  190. int j = 0, k = 0;
  191. for(w = 0; w < nw; w++)
  192. {
  193. enum starpu_archtype arch = STARPU_ANY_WORKER;
  194. if(w == 0) arch = STARPU_CUDA_WORKER;
  195. if(w == 1) arch = STARPU_CPU_WORKER;
  196. int nw_ctx2 = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
  197. int nw_needed = res_rounded[sched_ctx_idx][w] - nw_ctx2;
  198. if( nw_needed > 0 && tmp_nw_move[w] > 0)
  199. {
  200. *nw_move += nw_needed >= tmp_nw_move[w] ? tmp_nw_move[w] : nw_needed;
  201. int i = 0;
  202. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  203. {
  204. if(tmp_workers_move[w][i] != -1)
  205. {
  206. workers_move[j++] = tmp_workers_move[w][i];
  207. tmp_workers_move[w][i] = -1;
  208. if(j == *nw_move)
  209. break;
  210. }
  211. }
  212. tmp_nw_move[w] -= *nw_move;
  213. }
  214. double needed = res[sched_ctx_idx][w] - (nw_ctx2 * 1.0);
  215. int x = floor(needed);
  216. double x_double = (double)x;
  217. double diff = needed - x_double;
  218. if(diff > 0.3 && tmp_nw_add[w] > 0)
  219. {
  220. *nw_add = tmp_nw_add[w];
  221. int i = 0;
  222. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  223. {
  224. if(tmp_workers_add[w][i] != -1)
  225. {
  226. workers_add[k++] = tmp_workers_add[w][i];
  227. tmp_workers_add[w][i] = -1;
  228. if(k == *nw_add)
  229. break;
  230. }
  231. }
  232. tmp_nw_add[w] -= *nw_add;
  233. }
  234. }
  235. }
  236. void _lp_find_workers_to_remove(int nw, int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  237. int *nw_move, int workers_move[STARPU_NMAXWORKERS])
  238. {
  239. int w;
  240. for(w = 0; w < nw; w++)
  241. {
  242. if(tmp_nw_move[w] > 0)
  243. {
  244. *nw_move += tmp_nw_move[w];
  245. int i = 0, j = 0;
  246. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  247. {
  248. if(tmp_workers_move[w][i] != -1)
  249. {
  250. workers_move[j++] = tmp_workers_move[w][i];
  251. tmp_workers_move[w][i] = -1;
  252. if(j == *nw_move)
  253. break;
  254. }
  255. }
  256. }
  257. }
  258. }
  259. void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw])
  260. {
  261. int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
  262. int s, s2, w;
  263. for(s = 0; s < ns; s++)
  264. {
  265. int tmp_workers_move[nw][STARPU_NMAXWORKERS];
  266. int tmp_nw_move[nw];
  267. int tmp_workers_add[nw][STARPU_NMAXWORKERS];
  268. int tmp_nw_add[nw];
  269. for(w = 0; w < nw; w++)
  270. {
  271. tmp_nw_move[w] = 0;
  272. tmp_nw_add[w] = 0;
  273. int i;
  274. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  275. {
  276. tmp_workers_move[w][i] = -1;
  277. tmp_workers_add[w][i] = -1;
  278. }
  279. }
  280. /* find workers that ctx s has to give away */
  281. _lp_find_workers_to_give_away(nw, ns, sched_ctxs[s], s,
  282. tmp_nw_move, tmp_workers_move,
  283. tmp_nw_add, tmp_workers_add, res_rounded, res);
  284. for(s2 = 0; s2 < ns; s2++)
  285. {
  286. if(sched_ctxs[s2] != sched_ctxs[s])
  287. {
  288. /* find workers that ctx s2 wants to accept from ctx s
  289. the rest of it will probably accepted by another ctx */
  290. int workers_move[STARPU_NMAXWORKERS];
  291. int nw_move = 0;
  292. int workers_add[STARPU_NMAXWORKERS];
  293. int nw_add = 0;
  294. _lp_find_workers_to_accept(nw, ns, sched_ctxs[s2], s2,
  295. tmp_nw_move, tmp_workers_move,
  296. tmp_nw_add, tmp_workers_add,
  297. &nw_move, workers_move,
  298. &nw_add, workers_add,
  299. res_rounded, res);
  300. if(nw_move > 0)
  301. {
  302. sc_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0);
  303. nw_move = 0;
  304. }
  305. if(nw_add > 0)
  306. {
  307. sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
  308. nw_add = 0;
  309. }
  310. }
  311. }
  312. /* if there are workers that weren't accepted by anyone but ctx s wants
  313. to get rid of them just remove them from ctx s */
  314. int workers_move[STARPU_NMAXWORKERS];
  315. int nw_move = 0;
  316. _lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move,
  317. &nw_move, workers_move);
  318. if(nw_move > 0)
  319. sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
  320. }
  321. }
  322. void sc_hypervisor_lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers)
  323. {
  324. unsigned current_nworkers = workers == NULL ? starpu_worker_get_count() : (unsigned)nworkers;
  325. int s, w;
  326. int start[nw];
  327. for(w = 0; w < nw; w++)
  328. start[w] = 0;
  329. for(s = 0; s < ns; s++)
  330. {
  331. int workers_add[STARPU_NMAXWORKERS];
  332. int nw_add = 0;
  333. for(w = 0; w < nw; w++)
  334. {
  335. enum starpu_archtype arch;
  336. #ifdef STARPU_USE_CUDA
  337. int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
  338. if(ncuda != 0)
  339. {
  340. if(w == 0) arch = STARPU_CUDA_WORKER;
  341. if(w == 1) arch = STARPU_CPU_WORKER;
  342. }
  343. else
  344. if(w == 0) arch = STARPU_CPU_WORKER;
  345. #else
  346. if(w == 0) arch = STARPU_CPU_WORKER;
  347. #endif //STARPU_USE_CUDA
  348. if(w == 1)
  349. {
  350. int nworkers_to_add = res_rounded[s][w];
  351. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, current_nworkers, &nworkers_to_add, arch);
  352. int i;
  353. for(i = 0; i < nworkers_to_add; i++)
  354. workers_add[nw_add++] = workers_to_add[i];
  355. free(workers_to_add);
  356. }
  357. else
  358. {
  359. double nworkers_to_add = res[s][w];
  360. int x = floor(nworkers_to_add);
  361. double x_double = (double)x;
  362. double diff = nworkers_to_add - x_double;
  363. if(diff == 0.0)
  364. {
  365. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, current_nworkers, &x, arch);
  366. int i;
  367. for(i = 0; i < x; i++)
  368. workers_add[nw_add++] = workers_to_add[i];
  369. free(workers_to_add);
  370. }
  371. else
  372. {
  373. x+=1;
  374. int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, current_nworkers, &x, arch);
  375. int i;
  376. if(diff >= 0.3)
  377. for(i = 0; i < x; i++)
  378. workers_add[nw_add++] = workers_to_add[i];
  379. else
  380. for(i = 0; i < x-1; i++)
  381. workers_add[nw_add++] = workers_to_add[i];
  382. free(workers_to_add);
  383. }
  384. }
  385. }
  386. if(nw_add > 0)
  387. {
  388. sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
  389. sc_hypervisor_start_resize(sched_ctxs[s]);
  390. }
  391. // sc_hypervisor_stop_resize(current_sched_ctxs[s]);
  392. }
  393. }
  394. /* nw = all the workers (either in a list or on all machine) */
  395. void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], int *sched_ctxs_input, int *workers_input, unsigned do_size)
  396. {
  397. int w, s;
  398. double nworkers[ns][2];
  399. int nworkers_rounded[ns][2];
  400. for(s = 0; s < ns; s++)
  401. {
  402. nworkers[s][0] = 0.0;
  403. nworkers[s][1] = 0.0;
  404. nworkers_rounded[s][0] = 0;
  405. nworkers_rounded[s][1] = 0;
  406. }
  407. for(s = 0; s < ns; s++)
  408. {
  409. for(w = 0; w < nw; w++)
  410. {
  411. enum starpu_archtype arch = starpu_worker_get_type(w);
  412. if(arch == STARPU_CUDA_WORKER)
  413. {
  414. nworkers[s][0] += w_in_s[s][w];
  415. if(w_in_s[s][w] >= 0.3)
  416. nworkers_rounded[s][0]++;
  417. }
  418. else
  419. {
  420. nworkers[s][1] += w_in_s[s][w];
  421. if(w_in_s[s][w] > 0.5)
  422. nworkers_rounded[s][1]++;
  423. }
  424. }
  425. }
  426. if(!do_size)
  427. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
  428. else
  429. {
  430. int *current_sched_ctxs = sched_ctxs_input == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs_input;
  431. unsigned has_workers = 0;
  432. for(s = 0; s < ns; s++)
  433. {
  434. int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(current_sched_ctxs[s],
  435. STARPU_ANY_WORKER);
  436. if(nworkers_ctx != 0)
  437. {
  438. has_workers = 1;
  439. break;
  440. }
  441. }
  442. if(has_workers)
  443. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
  444. else
  445. sc_hypervisor_lp_distribute_resources_in_ctxs(current_sched_ctxs, ns, 2, nworkers_rounded, nworkers, workers_input, nw);
  446. }
  447. return;
  448. }
  449. double sc_hypervisor_lp_find_tmax(double t1, double t2)
  450. {
  451. return t1 + ((t2 - t1)/2);
  452. }