ispeed_lp_policy.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011, 2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu_config.h>
  17. #include "sc_hypervisor_lp.h"
  18. #include "sc_hypervisor_policy.h"
  19. #include <math.h>
  20. #include <sys/time.h>
  21. static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers, unsigned integer);
  22. static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_in_s[ns][nw], double flops_on_w[ns][nw], int *in_sched_ctxs, int *workers)
  23. {
  24. double draft_w_in_s[ns][nw];
  25. double draft_flops_on_w[ns][nw];
  26. double flops[ns];
  27. double velocity[ns][nw];
  28. int *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs;
  29. int w,s;
  30. struct sc_hypervisor_wrapper* sc_w = NULL;
  31. double total_flops = 0.0;
  32. for(s = 0; s < ns; s++)
  33. {
  34. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
  35. for(w = 0; w < nw; w++)
  36. {
  37. w_in_s[s][w] = 0.0;
  38. draft_w_in_s[s][w] = 0.0;
  39. flops_on_w[s][w] = 0.0;
  40. draft_flops_on_w[s][w] = 0.0;
  41. int worker = workers == NULL ? w : workers[w];
  42. velocity[s][w] = _get_velocity_per_worker(sc_w, worker);
  43. if(velocity[s][w] == -1.0)
  44. {
  45. enum starpu_archtype arch = starpu_worker_get_type(worker);
  46. velocity[s][w] = sc_hypervisor_get_velocity(sc_w, arch);
  47. if(arch == STARPU_CUDA_WORKER)
  48. {
  49. unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx);
  50. if(!worker_in_ctx)
  51. {
  52. double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker) / 1000;
  53. velocity[s][w] = (velocity[s][w] * transfer_velocity) / (velocity[s][w] + transfer_velocity);
  54. }
  55. }
  56. }
  57. // printf("v[w%d][s%d] = %lf\n",w, s, velocity[s][w]);
  58. }
  59. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]);
  60. flops[s] = config->ispeed_ctx_sample/1000000000; /* in gflops */
  61. }
  62. /* take the exec time of the slowest ctx
  63. as starting point and then try to minimize it
  64. as increasing it a little for the faster ctxs */
  65. double tmax = _get_slowest_ctx_exec_time();
  66. double smallest_tmax = _get_fastest_ctx_exec_time(); //tmax - 0.5*tmax;
  67. // printf("tmax %lf smallest %lf\n", tmax, smallest_tmax);
  68. double res = 1.0;
  69. unsigned has_sol = 0;
  70. double tmin = 0.0;
  71. double old_tmax = 0.0;
  72. unsigned found_sol = 0;
  73. struct timeval start_time;
  74. struct timeval end_time;
  75. int nd = 0;
  76. gettimeofday(&start_time, NULL);
  77. /* we fix tmax and we do not treat it as an unknown
  78. we just vary by dichotomy its values*/
  79. while(tmax > 1.0)
  80. {
  81. /* find solution and save the values in draft tables
  82. only if there is a solution for the system we save them
  83. in the proper table */
  84. res = _glp_resolve(ns, nw, velocity, flops, tmax, draft_flops_on_w, draft_w_in_s, workers, 1);
  85. if(res != 0.0)
  86. {
  87. for(s = 0; s < ns; s++)
  88. for(w = 0; w < nw; w++)
  89. {
  90. w_in_s[s][w] = draft_w_in_s[s][w];
  91. flops_on_w[s][w] = draft_flops_on_w[s][w];
  92. }
  93. has_sol = 1;
  94. found_sol = 1;
  95. }
  96. else
  97. has_sol = 0;
  98. /* if we have a solution with this tmax try a smaller value
  99. bigger than the old min */
  100. if(has_sol)
  101. {
  102. if(old_tmax != 0.0 && (old_tmax - tmax) < 0.5)
  103. break;
  104. old_tmax = tmax;
  105. }
  106. else /*else try a bigger one but smaller than the old tmax */
  107. {
  108. tmin = tmax;
  109. if(old_tmax != 0.0)
  110. tmax = old_tmax;
  111. }
  112. if(tmin == tmax) break;
  113. tmax = sc_hypervisor_lp_find_tmax(tmin, tmax);
  114. if(tmax < smallest_tmax)
  115. {
  116. tmax = old_tmax;
  117. tmin = smallest_tmax;
  118. tmax = sc_hypervisor_lp_find_tmax(tmin, tmax);
  119. }
  120. nd++;
  121. }
  122. gettimeofday(&end_time, NULL);
  123. long diff_s = end_time.tv_sec - start_time.tv_sec;
  124. long diff_us = end_time.tv_usec - start_time.tv_usec;
  125. float timing = (float)(diff_s*1000000 + diff_us)/1000;
  126. // fprintf(stdout, "nd = %d total time: %f ms \n", nd, timing);
  127. return found_sol;
  128. }
  129. /*
  130. * GNU Linear Programming Kit backend
  131. */
  132. #ifdef STARPU_HAVE_GLPK_H
  133. #include <glpk.h>
  134. static double _glp_resolve(int ns, int nw, double velocity[ns][nw], double flops[ns], double tmax, double flops_on_w[ns][nw], double w_in_s[ns][nw], int *workers, unsigned integer)
  135. {
  136. int w, s;
  137. glp_prob *lp;
  138. // printf("try with tmax %lf\n", tmax);
  139. lp = glp_create_prob();
  140. glp_set_prob_name(lp, "StarPU theoretical bound");
  141. glp_set_obj_dir(lp, GLP_MAX);
  142. glp_set_obj_name(lp, "total execution time");
  143. {
  144. int ne = 5 * ns * nw /* worker execution time */
  145. + 1; /* glp dumbness */
  146. int n = 1;
  147. int ia[ne], ja[ne];
  148. double ar[ne];
  149. /* Variables: number of flops assigned to worker w in context s, and
  150. the acknwoledgment that the worker w belongs to the context s */
  151. glp_add_cols(lp, 2*nw*ns);
  152. #define colnum(w, s) ((s)*nw+(w)+1)
  153. for(s = 0; s < ns; s++)
  154. for(w = 0; w < nw; w++)
  155. glp_set_obj_coef(lp, nw*ns+colnum(w,s), 1.);
  156. for(s = 0; s < ns; s++)
  157. for(w = 0; w < nw; w++)
  158. {
  159. char name[32];
  160. snprintf(name, sizeof(name), "flopsw%ds%dn", w, s);
  161. glp_set_col_name(lp, colnum(w,s), name);
  162. glp_set_col_bnds(lp, colnum(w,s), GLP_LO, 0., 0.);
  163. snprintf(name, sizeof(name), "w%ds%dn", w, s);
  164. glp_set_col_name(lp, nw*ns+colnum(w,s), name);
  165. if (integer)
  166. {
  167. glp_set_col_kind(lp, nw*ns+colnum(w, s), GLP_IV);
  168. glp_set_col_bnds(lp, nw*ns+colnum(w,s), GLP_DB, 0, 1);
  169. }
  170. else
  171. glp_set_col_bnds(lp, nw*ns+colnum(w,s), GLP_DB, 0.0, 1.0);
  172. }
  173. int curr_row_idx = 0;
  174. /* Total worker execution time */
  175. glp_add_rows(lp, nw*ns);
  176. /*nflops[s][w]/v[s][w] < x[s][w]*tmax */
  177. for(s = 0; s < ns; s++)
  178. {
  179. for (w = 0; w < nw; w++)
  180. {
  181. char name[32], title[64];
  182. starpu_worker_get_name(w, name, sizeof(name));
  183. snprintf(title, sizeof(title), "worker %s", name);
  184. glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title);
  185. /* nflosp[s][w] */
  186. ia[n] = curr_row_idx+s*nw+w+1;
  187. ja[n] = colnum(w, s);
  188. ar[n] = 1 / velocity[s][w];
  189. n++;
  190. /* x[s][w] = 1 | 0 */
  191. ia[n] = curr_row_idx+s*nw+w+1;
  192. ja[n] = nw*ns+colnum(w,s);
  193. ar[n] = (-1) * tmax;
  194. n++;
  195. glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
  196. }
  197. }
  198. curr_row_idx += nw*ns;
  199. /* sum(flops[s][w]) = flops[s] */
  200. glp_add_rows(lp, ns);
  201. for (s = 0; s < ns; s++)
  202. {
  203. char name[32], title[64];
  204. starpu_worker_get_name(w, name, sizeof(name));
  205. snprintf(title, sizeof(title), "flops %lf ctx%d", flops[s], s);
  206. glp_set_row_name(lp, curr_row_idx+s+1, title);
  207. for (w = 0; w < nw; w++)
  208. {
  209. ia[n] = curr_row_idx+s+1;
  210. ja[n] = colnum(w, s);
  211. ar[n] = 1;
  212. n++;
  213. }
  214. glp_set_row_bnds(lp, curr_row_idx+s+1, GLP_FX, flops[s], flops[s]);
  215. }
  216. curr_row_idx += ns;
  217. /* sum(x[s][w]) = 1 */
  218. glp_add_rows(lp, nw);
  219. for (w = 0; w < nw; w++)
  220. {
  221. char name[32], title[64];
  222. starpu_worker_get_name(w, name, sizeof(name));
  223. snprintf(title, sizeof(title), "w%x", w);
  224. glp_set_row_name(lp, curr_row_idx+w+1, title);
  225. for(s = 0; s < ns; s++)
  226. {
  227. ia[n] = curr_row_idx+w+1;
  228. ja[n] = nw*ns+colnum(w,s);
  229. ar[n] = 1;
  230. n++;
  231. }
  232. if(integer)
  233. glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1);
  234. else
  235. glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
  236. }
  237. curr_row_idx += nw;
  238. /* sum(nflops[s][w]) > 0*/
  239. glp_add_rows(lp, nw);
  240. for (w = 0; w < nw; w++)
  241. {
  242. char name[32], title[64];
  243. starpu_worker_get_name(w, name, sizeof(name));
  244. snprintf(title, sizeof(title), "flopsw%x", w);
  245. glp_set_row_name(lp, curr_row_idx+w+1, title);
  246. for(s = 0; s < ns; s++)
  247. {
  248. ia[n] = curr_row_idx+w+1;
  249. ja[n] = colnum(w,s);
  250. ar[n] = 1;
  251. n++;
  252. }
  253. glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_LO, 0.1, 0.);
  254. }
  255. if(n != ne)
  256. printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne);
  257. STARPU_ASSERT(n == ne);
  258. glp_load_matrix(lp, ne-1, ia, ja, ar);
  259. }
  260. glp_smcp parm;
  261. glp_init_smcp(&parm);
  262. parm.msg_lev = GLP_MSG_OFF;
  263. int ret = glp_simplex(lp, &parm);
  264. if (ret)
  265. {
  266. glp_delete_prob(lp);
  267. lp = NULL;
  268. return 0.0;
  269. }
  270. if (integer)
  271. {
  272. glp_iocp iocp;
  273. glp_init_iocp(&iocp);
  274. iocp.msg_lev = GLP_MSG_OFF;
  275. glp_intopt(lp, &iocp);
  276. int stat = glp_mip_status(lp);
  277. /* if we don't have a solution return */
  278. if(stat == GLP_NOFEAS)
  279. {
  280. glp_delete_prob(lp);
  281. lp = NULL;
  282. return 0.0;
  283. }
  284. }
  285. int stat = glp_get_prim_stat(lp);
  286. /* if we don't have a solution return */
  287. if(stat == GLP_NOFEAS)
  288. {
  289. glp_delete_prob(lp);
  290. lp = NULL;
  291. return 0.0;
  292. }
  293. double res = glp_get_obj_val(lp);
  294. for(s = 0; s < ns; s++)
  295. for(w = 0; w < nw; w++)
  296. {
  297. flops_on_w[s][w] = glp_get_col_prim(lp, colnum(w, s));
  298. if (integer)
  299. w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*ns+colnum(w, s));
  300. else
  301. w_in_s[s][w] = glp_get_col_prim(lp, nw*ns+colnum(w,s));
  302. // printf("w_in_s[s%d][w%d] = %lf flops[s%d][w%d] = %lf \n", s, w, w_in_s[s][w], s, w, flops_on_w[s][w]);
  303. }
  304. glp_delete_prob(lp);
  305. return res;
  306. }
  307. static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
  308. {
  309. struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
  310. _get_velocity_per_worker(sc_w, worker);
  311. int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
  312. if(ret != EBUSY)
  313. {
  314. if(_velocity_gap_btw_ctxs())
  315. {
  316. int ns = sc_hypervisor_get_nsched_ctxs();
  317. int nw = starpu_worker_get_count(); /* Number of different workers */
  318. double w_in_s[ns][nw];
  319. double flops_on_w[ns][nw];
  320. unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw, w_in_s, flops_on_w, NULL, NULL);
  321. /* if we did find at least one solution redistribute the resources */
  322. if(found_sol)
  323. {
  324. int w, s;
  325. double nworkers[ns][2];
  326. int nworkers_rounded[ns][2];
  327. for(s = 0; s < ns; s++)
  328. {
  329. nworkers[s][0] = 0.0;
  330. nworkers[s][1] = 0.0;
  331. nworkers_rounded[s][0] = 0;
  332. nworkers_rounded[s][1] = 0;
  333. }
  334. for(s = 0; s < ns; s++)
  335. {
  336. for(w = 0; w < nw; w++)
  337. {
  338. enum starpu_archtype arch = starpu_worker_get_type(w);
  339. if(arch == STARPU_CUDA_WORKER)
  340. {
  341. nworkers[s][0] += w_in_s[s][w];
  342. if(w_in_s[s][w] >= 0.3)
  343. nworkers_rounded[s][0]++;
  344. }
  345. else
  346. {
  347. nworkers[s][1] += w_in_s[s][w];
  348. if(w_in_s[s][w] > 0.5)
  349. nworkers_rounded[s][1]++;
  350. }
  351. }
  352. }
  353. /* for(s = 0; s < ns; s++) */
  354. /* printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
  355. /* nworkers_rounded[s][1], nworkers_rounded[s][0]); */
  356. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
  357. }
  358. }
  359. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  360. }
  361. }
  362. static void ispeed_lp_end_ctx(unsigned sched_ctx)
  363. {
  364. struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
  365. int worker;
  366. /* for(worker = 0; worker < 12; worker++) */
  367. /* printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_velocity[worker]); */
  368. return;
  369. }
  370. struct sc_hypervisor_policy ispeed_lp_policy = {
  371. .size_ctxs = NULL,
  372. .handle_poped_task = ispeed_lp_handle_poped_task,
  373. .handle_pushed_task = NULL,
  374. .handle_idle_cycle = NULL,
  375. .handle_idle_end = NULL,
  376. .handle_post_exec_hook = NULL,
  377. .handle_submitted_job = NULL,
  378. .end_ctx = ispeed_lp_end_ctx,
  379. .custom = 0,
  380. .name = "ispeed_lp"
  381. };
  382. #endif /* STARPU_HAVE_GLPK_H */