lp2_policy.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011, 2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu_config.h>
  17. #include "lp_tools.h"
  18. #include <math.h>
  19. static struct bound_task_pool *task_pools = NULL;
  20. static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
  21. static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers);
  22. static double _find_tmax(double t1, double t2);
  23. static unsigned _compute_task_distribution_over_ctxs(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt], int *sched_ctxs, int *workers)
  24. {
  25. double draft_tasks[nw][nt];
  26. double draft_w_in_s[ns][nw];
  27. int w,t, s;
  28. for(w = 0; w < nw; w++)
  29. for(t = 0; t < nt; t++)
  30. {
  31. tasks[w][t] = 0.0;
  32. draft_tasks[w][t] = 0.0;
  33. }
  34. for(s = 0; s < ns; s++)
  35. for(w = 0; w < nw; w++)
  36. {
  37. w_in_s[s][w] = 0.0;
  38. draft_w_in_s[s][w] = 0.0;
  39. }
  40. /* smallest possible tmax, difficult to obtain as we
  41. compute the nr of flops and not the tasks */
  42. double smallest_tmax = _lp_get_tmax(nw, workers);
  43. double tmax = smallest_tmax * ns;
  44. double res = 1.0;
  45. unsigned has_sol = 0;
  46. double tmin = 0.0;
  47. double old_tmax = 0.0;
  48. unsigned found_sol = 0;
  49. struct timeval start_time;
  50. struct timeval end_time;
  51. int nd = 0;
  52. gettimeofday(&start_time, NULL);
  53. /* we fix tmax and we do not treat it as an unknown
  54. we just vary by dichotomy its values*/
  55. while(tmax > 1.0)
  56. {
  57. /* find solution and save the values in draft tables
  58. only if there is a solution for the system we save them
  59. in the proper table */
  60. res = _glp_resolve(ns, nw, nt, draft_tasks, tmax, draft_w_in_s, sched_ctxs, workers);
  61. if(res != 0.0)
  62. {
  63. for(w = 0; w < nw; w++)
  64. for(t = 0; t < nt; t++)
  65. tasks[w][t] = draft_tasks[w][t];
  66. for(s = 0; s < ns; s++)
  67. for(w = 0; w < nw; w++)
  68. w_in_s[s][w] = draft_w_in_s[s][w];
  69. has_sol = 1;
  70. found_sol = 1;
  71. }
  72. else
  73. has_sol = 0;
  74. /* if we have a solution with this tmax try a smaller value
  75. bigger than the old min */
  76. if(has_sol)
  77. {
  78. if(old_tmax != 0.0 && (old_tmax - tmax) < 0.5)
  79. break;
  80. old_tmax = tmax;
  81. }
  82. else /*else try a bigger one but smaller than the old tmax */
  83. {
  84. tmin = tmax;
  85. if(old_tmax != 0.0)
  86. tmax = old_tmax;
  87. }
  88. if(tmin == tmax) break;
  89. tmax = _find_tmax(tmin, tmax);
  90. if(tmax < smallest_tmax)
  91. {
  92. tmax = old_tmax;
  93. tmin = smallest_tmax;
  94. tmax = _find_tmax(tmin, tmax);
  95. }
  96. nd++;
  97. }
  98. gettimeofday(&end_time, NULL);
  99. long diff_s = end_time.tv_sec - start_time.tv_sec;
  100. long diff_us = end_time.tv_usec - start_time.tv_usec;
  101. float timing = (float)(diff_s*1000000 + diff_us)/1000;
  102. // fprintf(stdout, "nd = %d total time: %f ms \n", nd, timing);
  103. return found_sol;
  104. }
  105. static void _redistribute_resources_in_ctxs(int ns, int nw, int nt, double w_in_s[ns][nw], unsigned first_time, int *in_sched_ctxs, int *workers)
  106. {
  107. int *sched_ctxs = in_sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : in_sched_ctxs;
  108. int s, s2, w;
  109. for(s = 0; s < ns; s++)
  110. {
  111. int workers_to_add[nw], workers_to_remove[nw];
  112. int destination_ctx[nw][ns];
  113. for(w = 0; w < nw; w++)
  114. {
  115. workers_to_add[w] = -1;
  116. workers_to_remove[w] = -1;
  117. for(s2 = 0; s2 < ns; s2++)
  118. destination_ctx[w][s2] = -1;
  119. }
  120. int nadd = 0, nremove = 0;
  121. for(w = 0; w < nw; w++)
  122. {
  123. enum starpu_perf_archtype arch = workers == NULL ? starpu_worker_get_type(w) :
  124. starpu_worker_get_type(workers[w]);
  125. if(arch == STARPU_CPU_WORKER)
  126. {
  127. if(w_in_s[s][w] >= 0.5)
  128. {
  129. workers_to_add[nadd++] = workers == NULL ? w : workers[w];
  130. }
  131. else
  132. {
  133. workers_to_remove[nremove++] = workers == NULL ? w : workers[w];
  134. for(s2 = 0; s2 < ns; s2++)
  135. if(s2 != s && w_in_s[s2][w] >= 0.5)
  136. destination_ctx[w][s2] = 1;
  137. else
  138. destination_ctx[w][s2] = 0;
  139. }
  140. }
  141. else
  142. {
  143. if(w_in_s[s][w] >= 0.3)
  144. {
  145. workers_to_add[nadd++] = workers == NULL ? w : workers[w];
  146. }
  147. else
  148. {
  149. workers_to_remove[nremove++] = workers == NULL ? w : workers[w];
  150. for(s2 = 0; s2 < ns; s2++)
  151. if(s2 != s && w_in_s[s2][w] >= 0.3)
  152. destination_ctx[w][s2] = 1;
  153. else
  154. destination_ctx[w][s2] = 0;
  155. }
  156. }
  157. }
  158. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, nadd, sched_ctxs[s]);
  159. struct policy_config *new_config = sched_ctx_hypervisor_get_config(sched_ctxs[s]);
  160. int i;
  161. for(i = 0; i < nadd; i++)
  162. new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] : new_config->new_workers_max_idle;
  163. if(!first_time)
  164. {
  165. /* do not remove workers if they can't go anywhere */
  166. int w2;
  167. unsigned found_one_dest[nremove];
  168. unsigned all_have_dest = 1;
  169. for(w2 = 0; w2 < nremove; w2++)
  170. found_one_dest[w2] = 0;
  171. for(w2 = 0; w2 < nremove; w2++)
  172. for(s2 = 0; s2 < ns; s2++)
  173. {
  174. /* if the worker has to be removed we should find a destination
  175. otherwise we are not interested */
  176. if(destination_ctx[w2][s2] == -1)
  177. found_one_dest[w2] = -1;
  178. if(destination_ctx[w2][s2] == 1)// && sched_ctx_hypervisor_can_resize(sched_ctxs[s2]))
  179. {
  180. found_one_dest[w2] = 1;
  181. break;
  182. }
  183. }
  184. for(w2 = 0; w2 < nremove; w2++)
  185. {
  186. if(found_one_dest[w2] == 0)
  187. {
  188. all_have_dest = 0;
  189. break;
  190. }
  191. }
  192. if(all_have_dest)
  193. sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_to_remove, nremove, sched_ctxs[s], 0);
  194. }
  195. }
  196. }
  197. static void _size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
  198. {
  199. int ns = sched_ctxs == NULL ? sched_ctx_hypervisor_get_nsched_ctxs() : nsched_ctxs;
  200. int nw = workers == NULL ? starpu_worker_get_count() : nworkers; /* Number of different workers */
  201. int nt = 0; /* Number of different kinds of tasks */
  202. struct bound_task_pool * tp;
  203. for (tp = task_pools; tp; tp = tp->next)
  204. nt++;
  205. double w_in_s[ns][nw];
  206. unsigned found_sol = _compute_task_distribution_over_ctxs(ns, nw, nt, w_in_s, NULL, sched_ctxs, workers);
  207. /* if we did find at least one solution redistribute the resources */
  208. if(found_sol)
  209. _redistribute_resources_in_ctxs(ns, nw, nt, w_in_s, 1, sched_ctxs, workers);
  210. }
  211. static void size_if_required()
  212. {
  213. int nsched_ctxs, nworkers;
  214. int *sched_ctxs, *workers;
  215. unsigned has_req = sched_ctx_hypervisor_get_size_req(&sched_ctxs, &nsched_ctxs, &workers, &nworkers);
  216. if(has_req)
  217. {
  218. struct sched_ctx_wrapper* sc_w = NULL;
  219. unsigned ready_to_size = 1;
  220. int s;
  221. pthread_mutex_lock(&act_hypervisor_mutex);
  222. for(s = 0; s < nsched_ctxs; s++)
  223. {
  224. sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[s]);
  225. if(sc_w->submitted_flops < sc_w->total_flops)
  226. ready_to_size = 0;
  227. }
  228. if(ready_to_size)
  229. _size_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers);
  230. pthread_mutex_unlock(&act_hypervisor_mutex);
  231. }
  232. }
  233. static void lp2_handle_submitted_job(struct starpu_task *task, uint32_t footprint)
  234. {
  235. /* count the tasks of the same type */
  236. pthread_mutex_lock(&mutex);
  237. struct bound_task_pool *tp = NULL;
  238. for (tp = task_pools; tp; tp = tp->next)
  239. {
  240. if (tp->cl == task->cl && tp->footprint == footprint && tp->sched_ctx_id == task->sched_ctx)
  241. break;
  242. }
  243. if (!tp)
  244. {
  245. tp = (struct bound_task_pool *) malloc(sizeof(struct bound_task_pool));
  246. tp->cl = task->cl;
  247. tp->footprint = footprint;
  248. tp->sched_ctx_id = task->sched_ctx;
  249. tp->n = 0;
  250. tp->next = task_pools;
  251. task_pools = tp;
  252. }
  253. /* One more task of this kind */
  254. tp->n++;
  255. pthread_mutex_unlock(&mutex);
  256. size_if_required();
  257. }
  258. static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers)
  259. {
  260. struct bound_task_pool *tp;
  261. int w, t;
  262. for (w = 0; w < nw; w++)
  263. {
  264. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  265. {
  266. enum starpu_perf_archtype arch = workers == NULL ? starpu_worker_get_perf_archtype(w) :
  267. starpu_worker_get_perf_archtype(workers[w]);
  268. double length = starpu_history_based_job_expected_perf(tp->cl->model, arch, tp->footprint);
  269. if (isnan(length))
  270. times[w][t] = NAN;
  271. else
  272. times[w][t] = length / 1000.;
  273. }
  274. }
  275. }
  276. /*
  277. * GNU Linear Programming Kit backend
  278. */
  279. #ifdef STARPU_HAVE_GLPK_H
  280. #include <glpk.h>
  281. static double _glp_resolve(int ns, int nw, int nt, double tasks[nw][nt], double tmax, double w_in_s[ns][nw], int *in_sched_ctxs, int *workers)
  282. {
  283. struct bound_task_pool * tp;
  284. int t, w, s;
  285. glp_prob *lp;
  286. lp = glp_create_prob();
  287. glp_set_prob_name(lp, "StarPU theoretical bound");
  288. glp_set_obj_dir(lp, GLP_MAX);
  289. glp_set_obj_name(lp, "total execution time");
  290. {
  291. double times[nw][nt];
  292. int ne = nt * nw /* worker execution time */
  293. + nw * ns
  294. + nw * (nt + ns)
  295. + 1; /* glp dumbness */
  296. int n = 1;
  297. int ia[ne], ja[ne];
  298. double ar[ne];
  299. _starpu_get_tasks_times(nw, nt, times, workers);
  300. /* Variables: number of tasks i assigned to worker j, and tmax */
  301. glp_add_cols(lp, nw*nt+ns*nw);
  302. #define colnum(w, t) ((t)*nw+(w)+1)
  303. for(s = 0; s < ns; s++)
  304. for(w = 0; w < nw; w++)
  305. glp_set_obj_coef(lp, nw*nt+s*nw+w+1, 1.);
  306. for (w = 0; w < nw; w++)
  307. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  308. {
  309. char name[32];
  310. snprintf(name, sizeof(name), "w%dt%dn", w, t);
  311. glp_set_col_name(lp, colnum(w, t), name);
  312. glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
  313. }
  314. for(s = 0; s < ns; s++)
  315. for(w = 0; w < nw; w++)
  316. {
  317. char name[32];
  318. snprintf(name, sizeof(name), "w%ds%dn", w, s);
  319. glp_set_col_name(lp, nw*nt+s*nw+w+1, name);
  320. glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0.0, 1.0);
  321. }
  322. int *sched_ctxs = in_sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : in_sched_ctxs;
  323. int curr_row_idx = 0;
  324. /* Total worker execution time */
  325. glp_add_rows(lp, nw*ns);
  326. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  327. {
  328. int someone = 0;
  329. for (w = 0; w < nw; w++)
  330. if (!isnan(times[w][t]))
  331. someone = 1;
  332. if (!someone)
  333. {
  334. /* This task does not have any performance model at all, abort */
  335. glp_delete_prob(lp);
  336. return 0.0;
  337. }
  338. }
  339. /*sum(t[t][w]*n[t][w]) < x[s][w]*tmax */
  340. for(s = 0; s < ns; s++)
  341. {
  342. for (w = 0; w < nw; w++)
  343. {
  344. char name[32], title[64];
  345. starpu_worker_get_name(w, name, sizeof(name));
  346. snprintf(title, sizeof(title), "worker %s", name);
  347. glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title);
  348. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  349. {
  350. if(tp->sched_ctx_id == sched_ctxs[s])
  351. {
  352. ia[n] = curr_row_idx+s*nw+w+1;
  353. ja[n] = colnum(w, t);
  354. if (isnan(times[w][t]))
  355. ar[n] = 1000000000.;
  356. else
  357. ar[n] = times[w][t];
  358. n++;
  359. }
  360. }
  361. /* x[s][w] = 1 | 0 */
  362. ia[n] = curr_row_idx+s*nw+w+1;
  363. ja[n] = nw*nt+s*nw+w+1;
  364. ar[n] = (-1) * tmax;
  365. n++;
  366. glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
  367. }
  368. }
  369. curr_row_idx += nw*ns;
  370. /* Total task completion */
  371. glp_add_rows(lp, nt);
  372. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  373. {
  374. char name[32], title[64];
  375. starpu_worker_get_name(w, name, sizeof(name));
  376. snprintf(title, sizeof(title), "task %s key %x", tp->cl->name, (unsigned) tp->footprint);
  377. glp_set_row_name(lp, curr_row_idx+t+1, title);
  378. for (w = 0; w < nw; w++)
  379. {
  380. ia[n] = curr_row_idx+t+1;
  381. ja[n] = colnum(w, t);
  382. ar[n] = 1;
  383. n++;
  384. }
  385. glp_set_row_bnds(lp, curr_row_idx+t+1, GLP_FX, tp->n, tp->n);
  386. }
  387. curr_row_idx += nt;
  388. /* sum(x[s][i]) = 1 */
  389. glp_add_rows(lp, nw);
  390. for (w = 0; w < nw; w++)
  391. {
  392. char name[32], title[64];
  393. starpu_worker_get_name(w, name, sizeof(name));
  394. snprintf(title, sizeof(title), "w%x", w);
  395. glp_set_row_name(lp, curr_row_idx+w+1, title);
  396. for(s = 0; s < ns; s++)
  397. {
  398. ia[n] = curr_row_idx+w+1;
  399. ja[n] = nw*nt+s*nw+w+1;
  400. ar[n] = 1;
  401. n++;
  402. }
  403. glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
  404. }
  405. if(n != ne)
  406. printf("ns= %d nw = %d nt = %d n = %d ne = %d\n", ns, nw, nt, n, ne);
  407. STARPU_ASSERT(n == ne);
  408. glp_load_matrix(lp, ne-1, ia, ja, ar);
  409. }
  410. glp_smcp parm;
  411. glp_init_smcp(&parm);
  412. parm.msg_lev = GLP_MSG_OFF;
  413. int ret = glp_simplex(lp, &parm);
  414. if (ret)
  415. {
  416. glp_delete_prob(lp);
  417. lp = NULL;
  418. return 0.0;
  419. }
  420. int stat = glp_get_prim_stat(lp);
  421. /* if we don't have a solution return */
  422. if(stat == GLP_NOFEAS)
  423. {
  424. glp_delete_prob(lp);
  425. lp = NULL;
  426. return 0.0;
  427. }
  428. double res = glp_get_obj_val(lp);
  429. for (w = 0; w < nw; w++)
  430. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  431. tasks[w][t] = glp_get_col_prim(lp, colnum(w, t));
  432. for(s = 0; s < ns; s++)
  433. for(w = 0; w < nw; w++)
  434. w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
  435. glp_delete_prob(lp);
  436. return res;
  437. }
  438. static double _find_tmax(double t1, double t2)
  439. {
  440. return t1 + ((t2 - t1)/2);
  441. }
  442. static void lp2_handle_poped_task(unsigned sched_ctx, int worker)
  443. {
  444. struct sched_ctx_wrapper* sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctx);
  445. int ret = pthread_mutex_trylock(&act_hypervisor_mutex);
  446. if(ret != EBUSY)
  447. {
  448. if(sc_w->submitted_flops < sc_w->total_flops)
  449. {
  450. pthread_mutex_unlock(&act_hypervisor_mutex);
  451. return;
  452. }
  453. if(_velocity_gap_btw_ctxs())
  454. {
  455. int ns = sched_ctx_hypervisor_get_nsched_ctxs();
  456. int nw = starpu_worker_get_count(); /* Number of different workers */
  457. int nt = 0; /* Number of different kinds of tasks */
  458. struct bound_task_pool * tp;
  459. for (tp = task_pools; tp; tp = tp->next)
  460. nt++;
  461. double w_in_s[ns][nw];
  462. double tasks_per_worker[nw][nt];
  463. unsigned found_sol = _compute_task_distribution_over_ctxs(ns, nw, nt, w_in_s, tasks_per_worker, NULL, NULL);
  464. /* if we did find at least one solution redistribute the resources */
  465. if(found_sol)
  466. {
  467. int w, s;
  468. double nworkers[ns][2];
  469. int nworkers_rounded[ns][2];
  470. for(s = 0; s < ns; s++)
  471. {
  472. nworkers[s][0] = 0.0;
  473. nworkers[s][1] = 0.0;
  474. nworkers_rounded[s][0] = 0;
  475. nworkers_rounded[s][1] = 0;
  476. }
  477. for(s = 0; s < ns; s++)
  478. {
  479. for(w = 0; w < nw; w++)
  480. {
  481. enum starpu_perf_archtype arch = starpu_worker_get_type(w);
  482. if(arch == STARPU_CUDA_WORKER)
  483. {
  484. nworkers[s][0] += w_in_s[s][w];
  485. if(w_in_s[s][w] >= 0.3)
  486. nworkers_rounded[s][0]++;
  487. }
  488. else
  489. {
  490. nworkers[s][1] += w_in_s[s][w];
  491. if(w_in_s[s][w] > 0.3)
  492. nworkers_rounded[s][1]++;
  493. }
  494. }
  495. }
  496. /* for(s = 0; s < ns; s++) */
  497. /* printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
  498. /* nworkers_rounded[s][1], nworkers_rounded[s][0]); */
  499. _lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
  500. }
  501. }
  502. pthread_mutex_unlock(&act_hypervisor_mutex);
  503. }
  504. }
  505. static void lp2_size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
  506. {
  507. sched_ctx_hypervisor_save_size_req(sched_ctxs, nsched_ctxs, workers, nworkers);
  508. }
  509. struct hypervisor_policy lp2_policy = {
  510. .size_ctxs = lp2_size_ctxs,
  511. .handle_poped_task = lp2_handle_poped_task,
  512. .handle_pushed_task = NULL,
  513. .handle_idle_cycle = NULL,
  514. .handle_idle_end = NULL,
  515. .handle_post_exec_hook = NULL,
  516. .handle_submitted_job = lp2_handle_submitted_job,
  517. .custom = 0,
  518. .name = "lp2"
  519. };
  520. #endif /* STARPU_HAVE_GLPK_H */