throughput_lp_policy.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011,2013-2015 Inria
  4. * Copyright (C) 2013,2016,2017 CNRS
  5. * Copyright (C) 2017 Université de Bordeaux
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <starpu_config.h>
  19. #include "sc_hypervisor_lp.h"
  20. #include "sc_hypervisor_policy.h"
  21. #include <math.h>
  22. #include <sys/time.h>
  23. static double _glp_resolve(int ns, int nw, double speed[ns][nw], double w_in_s[ns][nw], unsigned integer);
  24. static unsigned _compute_max_speed(int ns, int nw, double w_in_s[ns][nw], unsigned *in_sched_ctxs, int *workers)
  25. {
  26. double speed[ns][nw];
  27. unsigned *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs;
  28. int w,s;
  29. struct sc_hypervisor_wrapper* sc_w = NULL;
  30. for(s = 0; s < ns; s++)
  31. {
  32. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
  33. for(w = 0; w < nw; w++)
  34. {
  35. w_in_s[s][w] = 0.0;
  36. int worker = workers == NULL ? w : workers[w];
  37. enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
  38. speed[s][w] = sc_hypervisor_get_speed(sc_w, arch);
  39. }
  40. }
  41. struct timeval start_time;
  42. struct timeval end_time;
  43. gettimeofday(&start_time, NULL);
  44. double res = _glp_resolve(ns, nw, speed, w_in_s, 1);
  45. gettimeofday(&end_time, NULL);
  46. long diff_s = end_time.tv_sec - start_time.tv_sec;
  47. long diff_us = end_time.tv_usec - start_time.tv_usec;
  48. __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000;
  49. if(res > 0.0)
  50. return 1;
  51. return 0;
  52. }
  53. /*
  54. * GNU Linear Programming Kit backend
  55. */
  56. #ifdef STARPU_HAVE_GLPK_H
  57. #include <glpk.h>
  58. static double _glp_resolve(int ns, int nw, double speed[ns][nw], double w_in_s[ns][nw], unsigned integer)
  59. {
  60. int w = 0, s = 0;
  61. glp_prob *lp;
  62. lp = glp_create_prob();
  63. glp_set_prob_name(lp, "StarPU theoretical bound");
  64. glp_set_obj_dir(lp, GLP_MAX);
  65. glp_set_obj_name(lp, "total speed");
  66. {
  67. int ne = 2 * ns * nw /* worker execution time */
  68. + 1
  69. + 1 ; /* glp dumbness */
  70. int n = 1;
  71. int ia[ne], ja[ne];
  72. double ar[ne];
  73. /* Variables: x[s][w]
  74. the acknwoledgment that the worker w belongs to the context s */
  75. glp_add_cols(lp, nw*ns + 1);
  76. for(s = 0; s < ns; s++)
  77. for(w = 0; w < nw; w++)
  78. {
  79. char name[32];
  80. snprintf(name, sizeof(name), "w%ds%dn", w, s);
  81. glp_set_col_name(lp, s*nw+w+1, name);
  82. if (integer)
  83. {
  84. glp_set_col_kind(lp, s*nw+w+1, GLP_IV);
  85. glp_set_col_bnds(lp, s*nw+w+1, GLP_DB, 0, 1);
  86. }
  87. else
  88. glp_set_col_bnds(lp, s*nw+w+1, GLP_DB, 0.0, 1.0);
  89. }
  90. /* vmax should be positif */
  91. /* Z = vmax structural variable, x[s][w] are auxiliar variables */
  92. glp_set_col_name(lp, nw*ns+1, "vmax");
  93. glp_set_col_bnds(lp, nw*ns+1, GLP_LO, 0.0, 0.0);
  94. glp_set_obj_coef(lp, nw*ns+1, 1.);
  95. int curr_row_idx = 0;
  96. /* Total worker speed */
  97. glp_add_rows(lp, 1);
  98. /*sum(x[s][w]*speed[s][w]) >= vmax */
  99. char name[32], title[64];
  100. starpu_worker_get_name(w, name, sizeof(name));
  101. snprintf(title, sizeof(title), "worker %s", name);
  102. glp_set_row_name(lp, curr_row_idx + 1, title);
  103. for(s = 0; s < ns; s++)
  104. {
  105. for (w = 0; w < nw; w++)
  106. {
  107. /* x[s][w] */
  108. ia[n] = curr_row_idx + 1;
  109. ja[n] = s*nw+w+1;
  110. ar[n] = speed[s][w];
  111. n++;
  112. }
  113. }
  114. /* vmax */
  115. ia[n] = curr_row_idx + 1;
  116. ja[n] = nw*ns+1;
  117. ar[n] = (-1);
  118. n++;
  119. glp_set_row_bnds(lp, curr_row_idx + 1, GLP_LO, 0.0, 0.0);
  120. curr_row_idx += 1 ;
  121. /* sum(x[s][w]) = 1 */
  122. glp_add_rows(lp, nw);
  123. for (w = 0; w < nw; w++)
  124. {
  125. char name[32], title[64];
  126. starpu_worker_get_name(w, name, sizeof(name));
  127. snprintf(title, sizeof(title), "w%x", w);
  128. glp_set_row_name(lp, curr_row_idx+w+1, title);
  129. for(s = 0; s < ns; s++)
  130. {
  131. ia[n] = curr_row_idx+w+1;
  132. ja[n] = s*nw+w+1;
  133. ar[n] = 1;
  134. n++;
  135. }
  136. if(integer)
  137. glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1);
  138. else
  139. glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
  140. }
  141. if(n != ne)
  142. printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne);
  143. STARPU_ASSERT(n == ne);
  144. glp_load_matrix(lp, ne-1, ia, ja, ar);
  145. }
  146. glp_smcp parm;
  147. glp_init_smcp(&parm);
  148. parm.msg_lev = GLP_MSG_OFF;
  149. int ret = glp_simplex(lp, &parm);
  150. if (ret)
  151. {
  152. glp_delete_prob(lp);
  153. lp = NULL;
  154. return 0.0;
  155. }
  156. if (integer)
  157. {
  158. glp_iocp iocp;
  159. glp_init_iocp(&iocp);
  160. iocp.msg_lev = GLP_MSG_OFF;
  161. glp_intopt(lp, &iocp);
  162. int stat = glp_mip_status(lp);
  163. /* if we don't have a solution return */
  164. if(stat == GLP_NOFEAS)
  165. {
  166. glp_delete_prob(lp);
  167. lp = NULL;
  168. return 0.0;
  169. }
  170. }
  171. int stat = glp_get_prim_stat(lp);
  172. /* if we don't have a solution return */
  173. if(stat == GLP_NOFEAS)
  174. {
  175. glp_delete_prob(lp);
  176. lp = NULL;
  177. printf("No sol!!!\n");
  178. return 0.0;
  179. }
  180. double res = glp_get_obj_val(lp);
  181. for(s = 0; s < ns; s++)
  182. for(w = 0; w < nw; w++)
  183. {
  184. if (integer)
  185. w_in_s[s][w] = (double)glp_mip_col_val(lp, s*nw+w+1);
  186. else
  187. w_in_s[s][w] = glp_get_col_prim(lp, s*nw+w+1);
  188. }
  189. glp_delete_prob(lp);
  190. return res;
  191. }
  192. static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
  193. {
  194. int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
  195. int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
  196. sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
  197. double w_in_s[ns][nw];
  198. unsigned found_sol = _compute_max_speed(ns, nw, w_in_s, sched_ctxs, workers);
  199. /* if we did find at least one solution redistribute the resources */
  200. if(found_sol)
  201. {
  202. struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nw);
  203. int w, s;
  204. double nworkers_per_ctx[ns][tw->nw];
  205. int nworkers_per_ctx_rounded[ns][tw->nw];
  206. for(s = 0; s < ns; s++)
  207. {
  208. for(w = 0; w < nw; w++)
  209. {
  210. nworkers_per_ctx[s][w] = 0.0;
  211. nworkers_per_ctx_rounded[s][w] = 0;
  212. }
  213. }
  214. for(s = 0; s < ns; s++)
  215. {
  216. for(w = 0; w < nw; w++)
  217. {
  218. enum starpu_worker_archtype arch = starpu_worker_get_type(w);
  219. int idx = sc_hypervisor_get_index_for_arch(STARPU_CUDA_WORKER, tw);
  220. nworkers_per_ctx[s][idx] += w_in_s[s][w];
  221. if(arch == STARPU_CUDA_WORKER)
  222. {
  223. if(w_in_s[s][w] >= 0.3)
  224. nworkers_per_ctx_rounded[s][idx]++;
  225. }
  226. else
  227. {
  228. int idx = sc_hypervisor_get_index_for_arch(STARPU_CPU_WORKER, tw);
  229. nworkers_per_ctx[s][idx] += w_in_s[s][w];
  230. if(w_in_s[s][w] > 0.5)
  231. nworkers_per_ctx_rounded[s][idx]++;
  232. }
  233. }
  234. }
  235. /* for(s = 0; s < ns; s++) */
  236. /* printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
  237. /* nworkers_rounded[s][1], nworkers_rounded[s][0]); */
  238. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, tw->nw, nworkers_per_ctx_rounded, nworkers_per_ctx, sched_ctxs, tw);
  239. free(tw);
  240. }
  241. }
  242. static void throughput_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx, __attribute__((unused))int worker,
  243. __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint)
  244. {
  245. int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
  246. if(ret != EBUSY)
  247. {
  248. unsigned criteria = sc_hypervisor_get_resize_criteria();
  249. if(criteria != SC_NOTHING && criteria == SC_SPEED)
  250. {
  251. if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
  252. {
  253. _try_resizing(NULL, -1, NULL, -1);
  254. }
  255. }
  256. STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex);
  257. }
  258. }
  259. static void throughput_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
  260. {
  261. int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
  262. if(ret != EBUSY)
  263. {
  264. unsigned criteria = sc_hypervisor_get_resize_criteria();
  265. if(criteria != SC_NOTHING && criteria == SC_IDLE)
  266. {
  267. if(sc_hypervisor_check_idle(sched_ctx, worker))
  268. {
  269. _try_resizing(NULL, -1, NULL, -1);
  270. // sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1);
  271. }
  272. }
  273. STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex);
  274. }
  275. }
  276. static void throughput_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
  277. {
  278. int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
  279. if(ret != EBUSY)
  280. {
  281. _try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers);
  282. STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex);
  283. }
  284. }
  285. static void throughput_lp_end_ctx(__attribute__((unused))unsigned sched_ctx)
  286. {
  287. /* struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); */
  288. /* int worker; */
  289. /* for(worker = 0; worker < 12; worker++) */
  290. /* printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_speed[worker]); */
  291. return;
  292. }
  293. struct sc_hypervisor_policy throughput_lp_policy = {
  294. .size_ctxs = NULL,
  295. .resize_ctxs = throughput_lp_resize_ctxs,
  296. .handle_poped_task = throughput_lp_handle_poped_task,
  297. .handle_pushed_task = NULL,
  298. .handle_idle_cycle = throughput_lp_handle_idle_cycle,
  299. .handle_idle_end = NULL,
  300. .handle_post_exec_hook = NULL,
  301. .handle_submitted_job = NULL,
  302. .end_ctx = throughput_lp_end_ctx,
  303. .init_worker = NULL,
  304. .custom = 0,
  305. .name = "throughput_lp"
  306. };
  307. #endif /* STARPU_HAVE_GLPK_H */