lp_tools.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <math.h>
  17. #include "lp_tools.h"
  18. #ifdef HAVE_GLPK_H
  19. static double _glp_get_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int total_nw[nw])
  20. {
  21. int s, w;
  22. glp_prob *lp;
  23. int ne =
  24. (ns*nw+1)*(ns+nw)
  25. + 1; /* glp dumbness */
  26. int n = 1;
  27. int ia[ne], ja[ne];
  28. double ar[ne];
  29. lp = glp_create_prob();
  30. glp_set_prob_name(lp, "sample");
  31. glp_set_obj_dir(lp, GLP_MAX);
  32. glp_set_obj_name(lp, "max speed");
  33. /* we add nw*ns columns one for each type of worker in each context
  34. and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/
  35. glp_add_cols(lp, nw*ns+1);
  36. for(s = 0; s < ns; s++)
  37. {
  38. for(w = 0; w < nw; w++)
  39. {
  40. char name[32];
  41. snprintf(name, sizeof(name), "worker%dctx%d", w, s);
  42. glp_set_col_name(lp, n, name);
  43. glp_set_col_bnds(lp, n, GLP_LO, 0.3, 0.0);
  44. n++;
  45. }
  46. }
  47. /*1/tmax should belong to the interval [0.0;1.0]*/
  48. glp_set_col_name(lp, n, "vmax");
  49. glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
  50. /* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
  51. glp_set_obj_coef(lp, n, 1.0);
  52. n = 1;
  53. /* one row corresponds to one ctx*/
  54. glp_add_rows(lp, ns);
  55. for(s = 0; s < ns; s++)
  56. {
  57. char name[32];
  58. snprintf(name, sizeof(name), "ctx%d", s);
  59. glp_set_row_name(lp, s+1, name);
  60. glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.);
  61. for(w = 0; w < nw; w++)
  62. {
  63. int s2;
  64. for(s2 = 0; s2 < ns; s2++)
  65. {
  66. if(s2 == s)
  67. {
  68. ia[n] = s+1;
  69. ja[n] = w + nw*s2 + 1;
  70. ar[n] = v[s][w];
  71. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  72. }
  73. else
  74. {
  75. ia[n] = s+1;
  76. ja[n] = w + nw*s2 + 1;
  77. ar[n] = 0.0;
  78. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  79. }
  80. n++;
  81. }
  82. }
  83. /* 1/tmax */
  84. ia[n] = s+1;
  85. ja[n] = ns*nw+1;
  86. ar[n] = (-1) * flops[s];
  87. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  88. n++;
  89. }
  90. /*we add another linear constraint : sum(all cpus) = 9 and sum(all gpus) = 3 */
  91. glp_add_rows(lp, nw);
  92. for(w = 0; w < nw; w++)
  93. {
  94. char name[32];
  95. snprintf(name, sizeof(name), "w%d", w);
  96. glp_set_row_name(lp, ns+w+1, name);
  97. for(s = 0; s < ns; s++)
  98. {
  99. int w2;
  100. for(w2 = 0; w2 < nw; w2++)
  101. {
  102. if(w2 == w)
  103. {
  104. ia[n] = ns+w+1;
  105. ja[n] = w2+s*nw + 1;
  106. ar[n] = 1.0;
  107. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  108. }
  109. else
  110. {
  111. ia[n] = ns+w+1;
  112. ja[n] = w2+s*nw + 1;
  113. ar[n] = 0.0;
  114. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  115. }
  116. n++;
  117. }
  118. }
  119. /* 1/tmax */
  120. ia[n] = ns+w+1;
  121. ja[n] = ns*nw+1;
  122. ar[n] = 0.0;
  123. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  124. n++;
  125. /*sum(all gpus) = 3*/
  126. if(w == 0)
  127. glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
  128. /*sum(all cpus) = 9*/
  129. if(w == 1)
  130. glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
  131. }
  132. STARPU_ASSERT(n == ne);
  133. glp_load_matrix(lp, ne-1, ia, ja, ar);
  134. glp_smcp parm;
  135. glp_init_smcp(&parm);
  136. parm.msg_lev = GLP_MSG_OFF;
  137. glp_simplex(lp, &parm);
  138. double vmax = glp_get_obj_val(lp);
  139. n = 1;
  140. for(s = 0; s < ns; s++)
  141. {
  142. for(w = 0; w < nw; w++)
  143. {
  144. res[s][w] = glp_get_col_prim(lp, n);
  145. n++;
  146. }
  147. }
  148. glp_delete_prob(lp);
  149. return vmax;
  150. }
  151. #endif //HAVE_GLPK_H
  152. double _lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers])
  153. {
  154. int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
  155. #ifdef HAVE_GLPK_H
  156. double v[nsched_ctxs][ntypes_of_workers];
  157. double flops[nsched_ctxs];
  158. #endif
  159. int i = 0;
  160. struct sched_ctx_wrapper* sc_w;
  161. for(i = 0; i < nsched_ctxs; i++)
  162. {
  163. sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
  164. #ifdef HAVE_GLPK_H
  165. v[i][0] = 200.0;//_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
  166. v[i][1] = 20.0;//_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
  167. flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
  168. // printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
  169. #endif
  170. }
  171. #ifdef HAVE_GLPK_H
  172. return 1/_glp_get_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
  173. #else
  174. return 0.0;
  175. #endif
  176. }
  177. double _lp_get_tmax(int nw, int *workers)
  178. {
  179. int ntypes_of_workers = 2;
  180. int total_nw[ntypes_of_workers];
  181. _get_total_nw(workers, nw, 2, total_nw);
  182. int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
  183. double res[nsched_ctxs][ntypes_of_workers];
  184. return _lp_get_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, res, total_nw) * 1000;
  185. }
  186. void _lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw])
  187. {
  188. int s, w;
  189. double left_res[nw];
  190. for(w = 0; w < nw; w++)
  191. left_res[nw] = 0.0;
  192. for(s = 0; s < ns; s++)
  193. {
  194. for(w = 0; w < nw; w++)
  195. {
  196. int x = floor(res[s][w]);
  197. double x_double = (double)x;
  198. double diff = res[s][w] - x_double;
  199. if(diff != 0.0)
  200. {
  201. if(diff > 0.5)
  202. {
  203. if(left_res[w] != 0.0)
  204. {
  205. if((diff + left_res[w]) > 0.5)
  206. {
  207. res_rounded[s][w] = x + 1;
  208. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  209. }
  210. else
  211. {
  212. res_rounded[s][w] = x;
  213. left_res[w] = (-1.0) * (diff + left_res[w]);
  214. }
  215. }
  216. else
  217. {
  218. res_rounded[s][w] = x + 1;
  219. left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]);
  220. }
  221. }
  222. else
  223. {
  224. if((diff + left_res[w]) > 0.5)
  225. {
  226. res_rounded[s][w] = x + 1;
  227. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  228. }
  229. else
  230. {
  231. res_rounded[s][w] = x;
  232. left_res[w] = diff;
  233. }
  234. }
  235. }
  236. }
  237. }
  238. }
  239. void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw])
  240. {
  241. int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
  242. int s, s2, w;
  243. for(s = 0; s < ns; s++)
  244. {
  245. for(w = 0; w < nw; w++)
  246. {
  247. enum starpu_archtype arch;
  248. if(w == 0) arch = STARPU_CUDA_WORKER;
  249. if(w == 1) arch = STARPU_CPU_WORKER;
  250. int workers_move[STARPU_NMAXWORKERS];
  251. int nw_move = 0;
  252. int workers_add[STARPU_NMAXWORKERS];
  253. int nw_add = 0;
  254. if(w == 1)
  255. {
  256. int nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], arch);
  257. if(nworkers_ctx > res_rounded[s][w])
  258. {
  259. int nworkers_to_move = nworkers_ctx - res_rounded[s][w];
  260. int *workers_to_move = _get_first_workers(sched_ctxs[s], &nworkers_to_move, arch);
  261. int i;
  262. for(i = 0; i < nworkers_to_move; i++)
  263. workers_move[nw_move++] = workers_to_move[i];
  264. free(workers_to_move);
  265. }
  266. }
  267. else
  268. {
  269. double nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], arch) * 1.0;
  270. if(nworkers_ctx > res[s][w])
  271. {
  272. double nworkers_to_move = nworkers_ctx - res[s][w];
  273. int x = floor(nworkers_to_move);
  274. double x_double = (double)x;
  275. double diff = nworkers_to_move - x_double;
  276. if(diff == 0.0)
  277. {
  278. int *workers_to_move = _get_first_workers(sched_ctxs[s], &x, arch);
  279. if(x > 0)
  280. {
  281. int i;
  282. for(i = 0; i < x; i++)
  283. workers_move[nw_move++] = workers_to_move[i];
  284. }
  285. free(workers_to_move);
  286. }
  287. else
  288. {
  289. x+=1;
  290. int *workers_to_move = _get_first_workers(sched_ctxs[s], &x, arch);
  291. if(x > 0)
  292. {
  293. int i;
  294. for(i = 0; i < x-1; i++)
  295. workers_move[nw_move++] = workers_to_move[i];
  296. if(diff > 0.8)
  297. workers_move[nw_move++] = workers_to_move[x-1];
  298. else
  299. if(diff > 0.3)
  300. workers_add[nw_add++] = workers_to_move[x-1];
  301. }
  302. free(workers_to_move);
  303. }
  304. }
  305. }
  306. for(s2 = 0; s2 < ns; s2++)
  307. {
  308. if(sched_ctxs[s2] != sched_ctxs[s])
  309. {
  310. double nworkers_ctx2 = get_nworkers_ctx(sched_ctxs[s2], arch) * 1.0;
  311. if((res[s2][w] - nworkers_ctx2) >= 0.0 && nw_move > 0)
  312. {
  313. sched_ctx_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0);
  314. nw_move = 0;
  315. break;
  316. }
  317. if((res[s2][w] - nworkers_ctx2) >= 0.0 && (res[s2][w] - nworkers_ctx2) <= (double)nw_add && nw_add > 0)
  318. {
  319. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
  320. nw_add = 0;
  321. break;
  322. }
  323. }
  324. }
  325. if(nw_move > 0)
  326. sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
  327. }
  328. }
  329. }
  330. void _lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers)
  331. {
  332. int current_nworkers = workers == NULL ? starpu_worker_get_count() : nworkers;
  333. int *current_sched_ctxs = sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : sched_ctxs;
  334. int s, w;
  335. for(s = 0; s < ns; s++)
  336. {
  337. for(w = 0; w < nw; w++)
  338. {
  339. enum starpu_archtype arch;
  340. if(w == 0) arch = STARPU_CUDA_WORKER;
  341. if(w == 1) arch = STARPU_CPU_WORKER;
  342. if(w == 1)
  343. {
  344. int nworkers_to_add = res_rounded[s][w];
  345. int *workers_to_add = _get_first_workers_in_list(workers, current_nworkers, &nworkers_to_add, arch);
  346. if(nworkers_to_add > 0)
  347. {
  348. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, nworkers_to_add, current_sched_ctxs[s]);
  349. sched_ctx_hypervisor_start_resize(current_sched_ctxs[s]);
  350. struct policy_config *new_config = sched_ctx_hypervisor_get_config(current_sched_ctxs[s]);
  351. int i;
  352. for(i = 0; i < nworkers_to_add; i++)
  353. new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] : new_config->new_workers_max_idle;
  354. }
  355. free(workers_to_add);
  356. }
  357. else
  358. {
  359. double nworkers_to_add = res[s][w];
  360. int x = floor(nworkers_to_add);
  361. double x_double = (double)x;
  362. double diff = nworkers_to_add - x_double;
  363. if(diff == 0.0)
  364. {
  365. int *workers_to_add = _get_first_workers_in_list(workers, current_nworkers, &x, arch);
  366. if(x > 0)
  367. {
  368. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, x, current_sched_ctxs[s]);
  369. sched_ctx_hypervisor_start_resize(current_sched_ctxs[s]);
  370. }
  371. free(workers_to_add);
  372. }
  373. else
  374. {
  375. x+=1;
  376. int *workers_to_add = _get_first_workers_in_list(workers, current_nworkers, &x, arch);
  377. if(x > 0)
  378. {
  379. if(diff >= 0.3)
  380. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, x, current_sched_ctxs[s]);
  381. else
  382. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, x-1, current_sched_ctxs[s]);
  383. sched_ctx_hypervisor_start_resize(current_sched_ctxs[s]);
  384. }
  385. free(workers_to_add);
  386. }
  387. }
  388. }
  389. sched_ctx_hypervisor_stop_resize(current_sched_ctxs[s]);
  390. }
  391. }