lp_tools.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <math.h>
  17. #include "lp_tools.h"
  18. #include <starpu_config.h>
  19. #ifdef STARPU_HAVE_GLPK_H
  20. static double _glp_get_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int total_nw[nw])
  21. {
  22. int s, w;
  23. glp_prob *lp;
  24. int ne =
  25. (ns*nw+1)*(ns+nw)
  26. + 1; /* glp dumbness */
  27. int n = 1;
  28. int ia[ne], ja[ne];
  29. double ar[ne];
  30. lp = glp_create_prob();
  31. glp_set_prob_name(lp, "sample");
  32. glp_set_obj_dir(lp, GLP_MAX);
  33. glp_set_obj_name(lp, "max speed");
  34. /* we add nw*ns columns one for each type of worker in each context
  35. and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/
  36. glp_add_cols(lp, nw*ns+1);
  37. for(s = 0; s < ns; s++)
  38. {
  39. for(w = 0; w < nw; w++)
  40. {
  41. char name[32];
  42. snprintf(name, sizeof(name), "worker%dctx%d", w, s);
  43. glp_set_col_name(lp, n, name);
  44. glp_set_col_bnds(lp, n, GLP_LO, 0.3, 0.0);
  45. n++;
  46. }
  47. }
  48. /*1/tmax should belong to the interval [0.0;1.0]*/
  49. glp_set_col_name(lp, n, "vmax");
  50. glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
  51. /* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
  52. glp_set_obj_coef(lp, n, 1.0);
  53. n = 1;
  54. /* one row corresponds to one ctx*/
  55. glp_add_rows(lp, ns);
  56. for(s = 0; s < ns; s++)
  57. {
  58. char name[32];
  59. snprintf(name, sizeof(name), "ctx%d", s);
  60. glp_set_row_name(lp, s+1, name);
  61. glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.);
  62. for(w = 0; w < nw; w++)
  63. {
  64. int s2;
  65. for(s2 = 0; s2 < ns; s2++)
  66. {
  67. if(s2 == s)
  68. {
  69. ia[n] = s+1;
  70. ja[n] = w + nw*s2 + 1;
  71. ar[n] = v[s][w];
  72. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  73. }
  74. else
  75. {
  76. ia[n] = s+1;
  77. ja[n] = w + nw*s2 + 1;
  78. ar[n] = 0.0;
  79. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  80. }
  81. n++;
  82. }
  83. }
  84. /* 1/tmax */
  85. ia[n] = s+1;
  86. ja[n] = ns*nw+1;
  87. ar[n] = (-1) * flops[s];
  88. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  89. n++;
  90. }
  91. /*we add another linear constraint : sum(all cpus) = 9 and sum(all gpus) = 3 */
  92. glp_add_rows(lp, nw);
  93. for(w = 0; w < nw; w++)
  94. {
  95. char name[32];
  96. snprintf(name, sizeof(name), "w%d", w);
  97. glp_set_row_name(lp, ns+w+1, name);
  98. for(s = 0; s < ns; s++)
  99. {
  100. int w2;
  101. for(w2 = 0; w2 < nw; w2++)
  102. {
  103. if(w2 == w)
  104. {
  105. ia[n] = ns+w+1;
  106. ja[n] = w2+s*nw + 1;
  107. ar[n] = 1.0;
  108. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  109. }
  110. else
  111. {
  112. ia[n] = ns+w+1;
  113. ja[n] = w2+s*nw + 1;
  114. ar[n] = 0.0;
  115. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  116. }
  117. n++;
  118. }
  119. }
  120. /* 1/tmax */
  121. ia[n] = ns+w+1;
  122. ja[n] = ns*nw+1;
  123. ar[n] = 0.0;
  124. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  125. n++;
  126. /*sum(all gpus) = 3*/
  127. if(w == 0)
  128. glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
  129. /*sum(all cpus) = 9*/
  130. if(w == 1)
  131. glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
  132. }
  133. STARPU_ASSERT(n == ne);
  134. glp_load_matrix(lp, ne-1, ia, ja, ar);
  135. glp_smcp parm;
  136. glp_init_smcp(&parm);
  137. parm.msg_lev = GLP_MSG_OFF;
  138. glp_simplex(lp, &parm);
  139. double vmax = glp_get_obj_val(lp);
  140. n = 1;
  141. for(s = 0; s < ns; s++)
  142. {
  143. for(w = 0; w < nw; w++)
  144. {
  145. res[s][w] = glp_get_col_prim(lp, n);
  146. n++;
  147. }
  148. }
  149. glp_delete_prob(lp);
  150. return vmax;
  151. }
  152. #endif //STARPU_HAVE_GLPK_H
  153. double _lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers])
  154. {
  155. int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
  156. #ifdef STARPU_HAVE_GLPK_H
  157. double v[nsched_ctxs][ntypes_of_workers];
  158. double flops[nsched_ctxs];
  159. #endif
  160. int i = 0;
  161. struct sched_ctx_wrapper* sc_w;
  162. for(i = 0; i < nsched_ctxs; i++)
  163. {
  164. sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
  165. #ifdef STARPU_HAVE_GLPK_H
  166. v[i][0] = 200.0;//_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER);
  167. v[i][1] = 20.0;//_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER);
  168. flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
  169. // printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
  170. #endif
  171. }
  172. #ifdef STARPU_HAVE_GLPK_H
  173. return 1/_glp_get_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
  174. #else
  175. return 0.0;
  176. #endif
  177. }
  178. double _lp_get_tmax(int nw, int *workers)
  179. {
  180. int ntypes_of_workers = 2;
  181. int total_nw[ntypes_of_workers];
  182. _get_total_nw(workers, nw, 2, total_nw);
  183. int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
  184. double res[nsched_ctxs][ntypes_of_workers];
  185. return _lp_get_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, res, total_nw) * 1000;
  186. }
  187. void _lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw])
  188. {
  189. int s, w;
  190. double left_res[nw];
  191. for(w = 0; w < nw; w++)
  192. left_res[nw] = 0.0;
  193. for(s = 0; s < ns; s++)
  194. {
  195. for(w = 0; w < nw; w++)
  196. {
  197. int x = floor(res[s][w]);
  198. double x_double = (double)x;
  199. double diff = res[s][w] - x_double;
  200. if(diff != 0.0)
  201. {
  202. if(diff > 0.5)
  203. {
  204. if(left_res[w] != 0.0)
  205. {
  206. if((diff + left_res[w]) > 0.5)
  207. {
  208. res_rounded[s][w] = x + 1;
  209. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  210. }
  211. else
  212. {
  213. res_rounded[s][w] = x;
  214. left_res[w] = (-1.0) * (diff + left_res[w]);
  215. }
  216. }
  217. else
  218. {
  219. res_rounded[s][w] = x + 1;
  220. left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]);
  221. }
  222. }
  223. else
  224. {
  225. if((diff + left_res[w]) > 0.5)
  226. {
  227. res_rounded[s][w] = x + 1;
  228. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  229. }
  230. else
  231. {
  232. res_rounded[s][w] = x;
  233. left_res[w] = diff;
  234. }
  235. }
  236. }
  237. }
  238. }
  239. }
  240. void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw])
  241. {
  242. int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
  243. int s, s2, w;
  244. for(s = 0; s < ns; s++)
  245. {
  246. for(w = 0; w < nw; w++)
  247. {
  248. enum starpu_archtype arch;
  249. if(w == 0) arch = STARPU_CUDA_WORKER;
  250. if(w == 1) arch = STARPU_CPU_WORKER;
  251. int workers_move[STARPU_NMAXWORKERS];
  252. int nw_move = 0;
  253. int workers_add[STARPU_NMAXWORKERS];
  254. int nw_add = 0;
  255. if(w == 1)
  256. {
  257. int nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], arch);
  258. if(nworkers_ctx > res_rounded[s][w])
  259. {
  260. int nworkers_to_move = nworkers_ctx - res_rounded[s][w];
  261. int *workers_to_move = _get_first_workers(sched_ctxs[s], &nworkers_to_move, arch);
  262. int i;
  263. for(i = 0; i < nworkers_to_move; i++)
  264. workers_move[nw_move++] = workers_to_move[i];
  265. free(workers_to_move);
  266. }
  267. }
  268. else
  269. {
  270. double nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], arch) * 1.0;
  271. if(nworkers_ctx > res[s][w])
  272. {
  273. double nworkers_to_move = nworkers_ctx - res[s][w];
  274. int x = floor(nworkers_to_move);
  275. double x_double = (double)x;
  276. double diff = nworkers_to_move - x_double;
  277. if(diff == 0.0)
  278. {
  279. int *workers_to_move = _get_first_workers(sched_ctxs[s], &x, arch);
  280. if(x > 0)
  281. {
  282. int i;
  283. for(i = 0; i < x; i++)
  284. workers_move[nw_move++] = workers_to_move[i];
  285. }
  286. free(workers_to_move);
  287. }
  288. else
  289. {
  290. x+=1;
  291. int *workers_to_move = _get_first_workers(sched_ctxs[s], &x, arch);
  292. if(x > 0)
  293. {
  294. int i;
  295. for(i = 0; i < x-1; i++)
  296. workers_move[nw_move++] = workers_to_move[i];
  297. if(diff > 0.8)
  298. workers_move[nw_move++] = workers_to_move[x-1];
  299. else
  300. if(diff > 0.3)
  301. workers_add[nw_add++] = workers_to_move[x-1];
  302. }
  303. free(workers_to_move);
  304. }
  305. }
  306. }
  307. for(s2 = 0; s2 < ns; s2++)
  308. {
  309. if(sched_ctxs[s2] != sched_ctxs[s])
  310. {
  311. double nworkers_ctx2 = get_nworkers_ctx(sched_ctxs[s2], arch) * 1.0;
  312. if((res[s2][w] - nworkers_ctx2) >= 0.0 && nw_move > 0)
  313. {
  314. sched_ctx_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0);
  315. nw_move = 0;
  316. break;
  317. }
  318. if((res[s2][w] - nworkers_ctx2) >= 0.0 && (res[s2][w] - nworkers_ctx2) <= (double)nw_add && nw_add > 0)
  319. {
  320. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
  321. nw_add = 0;
  322. break;
  323. }
  324. }
  325. }
  326. if(nw_move > 0)
  327. sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
  328. }
  329. }
  330. }
  331. void _lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers)
  332. {
  333. int current_nworkers = workers == NULL ? starpu_worker_get_count() : nworkers;
  334. int *current_sched_ctxs = sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : sched_ctxs;
  335. int s, w;
  336. for(s = 0; s < ns; s++)
  337. {
  338. for(w = 0; w < nw; w++)
  339. {
  340. enum starpu_archtype arch;
  341. if(w == 0) arch = STARPU_CUDA_WORKER;
  342. if(w == 1) arch = STARPU_CPU_WORKER;
  343. if(w == 1)
  344. {
  345. int nworkers_to_add = res_rounded[s][w];
  346. int *workers_to_add = _get_first_workers_in_list(workers, current_nworkers, &nworkers_to_add, arch);
  347. if(nworkers_to_add > 0)
  348. {
  349. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, nworkers_to_add, current_sched_ctxs[s]);
  350. sched_ctx_hypervisor_start_resize(current_sched_ctxs[s]);
  351. struct policy_config *new_config = sched_ctx_hypervisor_get_config(current_sched_ctxs[s]);
  352. int i;
  353. for(i = 0; i < nworkers_to_add; i++)
  354. new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] : new_config->new_workers_max_idle;
  355. }
  356. free(workers_to_add);
  357. }
  358. else
  359. {
  360. double nworkers_to_add = res[s][w];
  361. int x = floor(nworkers_to_add);
  362. double x_double = (double)x;
  363. double diff = nworkers_to_add - x_double;
  364. if(diff == 0.0)
  365. {
  366. int *workers_to_add = _get_first_workers_in_list(workers, current_nworkers, &x, arch);
  367. if(x > 0)
  368. {
  369. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, x, current_sched_ctxs[s]);
  370. sched_ctx_hypervisor_start_resize(current_sched_ctxs[s]);
  371. }
  372. free(workers_to_add);
  373. }
  374. else
  375. {
  376. x+=1;
  377. int *workers_to_add = _get_first_workers_in_list(workers, current_nworkers, &x, arch);
  378. if(x > 0)
  379. {
  380. if(diff >= 0.3)
  381. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, x, current_sched_ctxs[s]);
  382. else
  383. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, x-1, current_sched_ctxs[s]);
  384. sched_ctx_hypervisor_start_resize(current_sched_ctxs[s]);
  385. }
  386. free(workers_to_add);
  387. }
  388. }
  389. }
  390. sched_ctx_hypervisor_stop_resize(current_sched_ctxs[s]);
  391. }
  392. }