lp_tools.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <math.h>
  17. #include "lp_tools.h"
  18. #include <starpu_config.h>
  19. #ifdef STARPU_HAVE_GLPK_H
  20. double _lp_compute_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int total_nw[nw])
  21. {
  22. int integer = 1;
  23. int s, w;
  24. glp_prob *lp;
  25. int ne =
  26. (ns*nw+1)*(ns+nw)
  27. + 1; /* glp dumbness */
  28. int n = 1;
  29. int ia[ne], ja[ne];
  30. double ar[ne];
  31. lp = glp_create_prob();
  32. glp_set_prob_name(lp, "sample");
  33. glp_set_obj_dir(lp, GLP_MAX);
  34. glp_set_obj_name(lp, "max speed");
  35. /* we add nw*ns columns one for each type of worker in each context
  36. and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/
  37. glp_add_cols(lp, nw*ns+1);
  38. for(s = 0; s < ns; s++)
  39. {
  40. for(w = 0; w < nw; w++)
  41. {
  42. char name[32];
  43. snprintf(name, sizeof(name), "worker%dctx%d", w, s);
  44. glp_set_col_name(lp, n, name);
  45. if (integer)
  46. {
  47. glp_set_col_kind(lp, n, GLP_IV);
  48. glp_set_col_bnds(lp, n, GLP_LO, 0, 0);
  49. }
  50. else
  51. glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0);
  52. n++;
  53. }
  54. }
  55. /*1/tmax should belong to the interval [0.0;1.0]*/
  56. glp_set_col_name(lp, n, "vmax");
  57. glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
  58. /* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
  59. glp_set_obj_coef(lp, n, 1.0);
  60. n = 1;
  61. /* one row corresponds to one ctx*/
  62. glp_add_rows(lp, ns);
  63. for(s = 0; s < ns; s++)
  64. {
  65. char name[32];
  66. snprintf(name, sizeof(name), "ctx%d", s);
  67. glp_set_row_name(lp, s+1, name);
  68. glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.);
  69. for(w = 0; w < nw; w++)
  70. {
  71. int s2;
  72. for(s2 = 0; s2 < ns; s2++)
  73. {
  74. if(s2 == s)
  75. {
  76. ia[n] = s+1;
  77. ja[n] = w + nw*s2 + 1;
  78. ar[n] = v[s][w];
  79. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  80. }
  81. else
  82. {
  83. ia[n] = s+1;
  84. ja[n] = w + nw*s2 + 1;
  85. ar[n] = 0.0;
  86. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  87. }
  88. n++;
  89. }
  90. }
  91. /* 1/tmax */
  92. ia[n] = s+1;
  93. ja[n] = ns*nw+1;
  94. ar[n] = (-1) * flops[s];
  95. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  96. n++;
  97. }
  98. /*we add another linear constraint : sum(all cpus) = 9 and sum(all gpus) = 3 */
  99. glp_add_rows(lp, nw);
  100. for(w = 0; w < nw; w++)
  101. {
  102. char name[32];
  103. snprintf(name, sizeof(name), "w%d", w);
  104. glp_set_row_name(lp, ns+w+1, name);
  105. for(s = 0; s < ns; s++)
  106. {
  107. int w2;
  108. for(w2 = 0; w2 < nw; w2++)
  109. {
  110. if(w2 == w)
  111. {
  112. ia[n] = ns+w+1;
  113. ja[n] = w2+s*nw + 1;
  114. ar[n] = 1.0;
  115. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  116. }
  117. else
  118. {
  119. ia[n] = ns+w+1;
  120. ja[n] = w2+s*nw + 1;
  121. ar[n] = 0.0;
  122. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  123. }
  124. n++;
  125. }
  126. }
  127. /* 1/tmax */
  128. ia[n] = ns+w+1;
  129. ja[n] = ns*nw+1;
  130. ar[n] = 0.0;
  131. // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
  132. n++;
  133. /*sum(all gpus) = 3*/
  134. if(w == 0)
  135. glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
  136. /*sum(all cpus) = 9*/
  137. if(w == 1)
  138. glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
  139. }
  140. STARPU_ASSERT(n == ne);
  141. glp_load_matrix(lp, ne-1, ia, ja, ar);
  142. glp_smcp parm;
  143. glp_init_smcp(&parm);
  144. parm.msg_lev = GLP_MSG_OFF;
  145. int ret = glp_simplex(lp, &parm);
  146. if (ret)
  147. {
  148. printf("error in simplex\n");
  149. glp_delete_prob(lp);
  150. lp = NULL;
  151. return 0.0;
  152. }
  153. int stat = glp_get_prim_stat(lp);
  154. /* if we don't have a solution return */
  155. if(stat == GLP_NOFEAS)
  156. {
  157. glp_delete_prob(lp);
  158. // printf("no_sol in tmax = %lf\n", tmax);
  159. lp = NULL;
  160. return 0.0;
  161. }
  162. if (integer)
  163. {
  164. glp_iocp iocp;
  165. glp_init_iocp(&iocp);
  166. iocp.msg_lev = GLP_MSG_OFF;
  167. glp_intopt(lp, &iocp);
  168. int stat = glp_mip_status(lp);
  169. /* if we don't have a solution return */
  170. if(stat == GLP_NOFEAS)
  171. {
  172. // printf("no int sol in tmax = %lf\n", tmax);
  173. glp_delete_prob(lp);
  174. lp = NULL;
  175. return 0.0;
  176. }
  177. }
  178. double vmax = glp_get_obj_val(lp);
  179. n = 1;
  180. for(s = 0; s < ns; s++)
  181. {
  182. for(w = 0; w < nw; w++)
  183. {
  184. if (integer)
  185. res[s][w] = (double)glp_mip_col_val(lp, n);
  186. else
  187. res[s][w] = glp_get_col_prim(lp, n);
  188. // printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]);
  189. n++;
  190. }
  191. }
  192. glp_delete_prob(lp);
  193. return vmax;
  194. }
  195. #endif //STARPU_HAVE_GLPK_H
  196. double _lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers])
  197. {
  198. int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
  199. #ifdef STARPU_HAVE_GLPK_H
  200. double v[nsched_ctxs][ntypes_of_workers];
  201. double flops[nsched_ctxs];
  202. int i = 0;
  203. struct sched_ctx_hypervisor_wrapper* sc_w;
  204. for(i = 0; i < nsched_ctxs; i++)
  205. {
  206. sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]);
  207. v[i][0] = sched_ctx_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
  208. v[i][1] = sched_ctx_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
  209. flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
  210. // printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
  211. }
  212. return 1/_lp_compute_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
  213. #else
  214. return 0.0;
  215. #endif
  216. }
  217. double _lp_get_tmax(int nw, int *workers)
  218. {
  219. int ntypes_of_workers = 2;
  220. int total_nw[ntypes_of_workers];
  221. _get_total_nw(workers, nw, 2, total_nw);
  222. int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
  223. double res[nsched_ctxs][ntypes_of_workers];
  224. return _lp_get_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, res, total_nw) * 1000;
  225. }
  226. void _lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw])
  227. {
  228. int s, w;
  229. double left_res[nw];
  230. for(w = 0; w < nw; w++)
  231. left_res[nw] = 0.0;
  232. for(s = 0; s < ns; s++)
  233. {
  234. for(w = 0; w < nw; w++)
  235. {
  236. int x = floor(res[s][w]);
  237. double x_double = (double)x;
  238. double diff = res[s][w] - x_double;
  239. if(diff != 0.0)
  240. {
  241. if(diff > 0.5)
  242. {
  243. if(left_res[w] != 0.0)
  244. {
  245. if((diff + left_res[w]) > 0.5)
  246. {
  247. res_rounded[s][w] = x + 1;
  248. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  249. }
  250. else
  251. {
  252. res_rounded[s][w] = x;
  253. left_res[w] = (-1.0) * (diff + left_res[w]);
  254. }
  255. }
  256. else
  257. {
  258. res_rounded[s][w] = x + 1;
  259. left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]);
  260. }
  261. }
  262. else
  263. {
  264. if((diff + left_res[w]) > 0.5)
  265. {
  266. res_rounded[s][w] = x + 1;
  267. left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
  268. }
  269. else
  270. {
  271. res_rounded[s][w] = x;
  272. left_res[w] = diff;
  273. }
  274. }
  275. }
  276. else
  277. res_rounded[s][w] = x;
  278. }
  279. }
  280. }
  281. void _lp_find_workers_to_give_away(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx,
  282. int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  283. int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
  284. int res_rounded[ns][nw], double res[ns][nw])
  285. {
  286. int w;
  287. for(w = 0; w < nw; w++)
  288. {
  289. enum starpu_archtype arch = STARPU_ANY_WORKER;
  290. if(w == 0) arch = STARPU_CUDA_WORKER;
  291. if(w == 1) arch = STARPU_CPU_WORKER;
  292. if(w == 1)
  293. {
  294. int nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctx, arch);
  295. if(nworkers_ctx > res_rounded[sched_ctx_idx][w])
  296. {
  297. int nworkers_to_move = nworkers_ctx - res_rounded[sched_ctx_idx][w];
  298. int *workers_to_move = _get_first_workers(sched_ctx, &nworkers_to_move, arch);
  299. int i;
  300. for(i = 0; i < nworkers_to_move; i++)
  301. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  302. free(workers_to_move);
  303. }
  304. }
  305. else
  306. {
  307. double nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctx, arch) * 1.0;
  308. if(nworkers_ctx > res[sched_ctx_idx][w])
  309. {
  310. double nworkers_to_move = nworkers_ctx - res[sched_ctx_idx][w];
  311. int x = floor(nworkers_to_move);
  312. double x_double = (double)x;
  313. double diff = nworkers_to_move - x_double;
  314. if(diff == 0.0)
  315. {
  316. int *workers_to_move = _get_first_workers(sched_ctx, &x, arch);
  317. if(x > 0)
  318. {
  319. int i;
  320. for(i = 0; i < x; i++)
  321. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  322. }
  323. free(workers_to_move);
  324. }
  325. else
  326. {
  327. x+=1;
  328. int *workers_to_move = _get_first_workers(sched_ctx, &x, arch);
  329. if(x > 0)
  330. {
  331. int i;
  332. for(i = 0; i < x-1; i++)
  333. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
  334. if(diff > 0.8)
  335. tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[x-1];
  336. else
  337. if(diff > 0.3)
  338. tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[x-1];
  339. }
  340. free(workers_to_move);
  341. }
  342. }
  343. }
  344. }
  345. }
  346. void _lp_find_workers_to_accept(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx,
  347. int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  348. int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
  349. int *nw_move, int workers_move[STARPU_NMAXWORKERS],
  350. int *nw_add, int workers_add[STARPU_NMAXWORKERS],
  351. int res_rounded[ns][nw], double res[ns][nw])
  352. {
  353. int w;
  354. int j = 0, k = 0;
  355. for(w = 0; w < nw; w++)
  356. {
  357. enum starpu_archtype arch = STARPU_ANY_WORKER;
  358. if(w == 0) arch = STARPU_CUDA_WORKER;
  359. if(w == 1) arch = STARPU_CPU_WORKER;
  360. int nw_ctx2 = sched_ctx_hypervisor_get_nworkers_ctx(sched_ctx, arch);
  361. int nw_needed = res_rounded[sched_ctx_idx][w] - nw_ctx2;
  362. if( nw_needed > 0 && tmp_nw_move[w] > 0)
  363. {
  364. *nw_move += nw_needed >= tmp_nw_move[w] ? tmp_nw_move[w] : nw_needed;
  365. int i = 0;
  366. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  367. {
  368. if(tmp_workers_move[w][i] != -1)
  369. {
  370. workers_move[j++] = tmp_workers_move[w][i];
  371. tmp_workers_move[w][i] = -1;
  372. if(j == *nw_move)
  373. break;
  374. }
  375. }
  376. tmp_nw_move[w] -= *nw_move;
  377. }
  378. double needed = res[sched_ctx_idx][w] - (nw_ctx2 * 1.0);
  379. int x = floor(needed);
  380. double x_double = (double)x;
  381. double diff = needed - x_double;
  382. if(diff > 0.3 && tmp_nw_add[w] > 0)
  383. {
  384. *nw_add = tmp_nw_add[w];
  385. int i = 0;
  386. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  387. {
  388. if(tmp_workers_add[w][i] != -1)
  389. {
  390. workers_add[k++] = tmp_workers_add[w][i];
  391. tmp_workers_add[w][i] = -1;
  392. if(k == *nw_add)
  393. break;
  394. }
  395. }
  396. tmp_nw_add[w] -= *nw_add;
  397. }
  398. }
  399. }
  400. void _lp_find_workers_to_remove(int nw, int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS],
  401. int *nw_move, int workers_move[STARPU_NMAXWORKERS])
  402. {
  403. int w;
  404. for(w = 0; w < nw; w++)
  405. {
  406. if(tmp_nw_move[w] > 0)
  407. {
  408. *nw_move += tmp_nw_move[w];
  409. int i = 0, j = 0;
  410. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  411. {
  412. if(tmp_workers_move[w][i] != -1)
  413. {
  414. workers_move[j++] = tmp_workers_move[w][i];
  415. tmp_workers_move[w][i] = -1;
  416. if(j == *nw_move)
  417. break;
  418. }
  419. }
  420. }
  421. }
  422. }
  423. void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw])
  424. {
  425. int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
  426. int s, s2, w;
  427. for(s = 0; s < ns; s++)
  428. {
  429. int tmp_workers_move[nw][STARPU_NMAXWORKERS];
  430. int tmp_nw_move[nw];
  431. int tmp_workers_add[nw][STARPU_NMAXWORKERS];
  432. int tmp_nw_add[nw];
  433. for(w = 0; w < nw; w++)
  434. {
  435. tmp_nw_move[w] = 0;
  436. tmp_nw_add[w] = 0;
  437. int i;
  438. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  439. {
  440. tmp_workers_move[w][i] = -1;
  441. tmp_workers_add[w][i] = -1;
  442. }
  443. }
  444. /* find workers that ctx s has to give away */
  445. _lp_find_workers_to_give_away(nw, ns, sched_ctxs[s], s,
  446. tmp_nw_move, tmp_workers_move,
  447. tmp_nw_add, tmp_workers_add, res_rounded, res);
  448. for(s2 = 0; s2 < ns; s2++)
  449. {
  450. if(sched_ctxs[s2] != sched_ctxs[s])
  451. {
  452. /* find workers that ctx s2 wants to accept from ctx s
  453. the rest of it will probably accepted by another ctx */
  454. int workers_move[STARPU_NMAXWORKERS];
  455. int nw_move = 0;
  456. int workers_add[STARPU_NMAXWORKERS];
  457. int nw_add = 0;
  458. _lp_find_workers_to_accept(nw, ns, sched_ctxs[s2], s2,
  459. tmp_nw_move, tmp_workers_move,
  460. tmp_nw_add, tmp_workers_add,
  461. &nw_move, workers_move,
  462. &nw_add, workers_add,
  463. res_rounded, res);
  464. if(nw_move > 0)
  465. {
  466. sched_ctx_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0);
  467. nw_move = 0;
  468. }
  469. if(nw_add > 0)
  470. {
  471. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
  472. nw_add = 0;
  473. }
  474. }
  475. }
  476. /* if there are workers that weren't accepted by anyone but ctx s wants
  477. to get rid of them just remove them from ctx s */
  478. int workers_move[STARPU_NMAXWORKERS];
  479. int nw_move = 0;
  480. _lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move,
  481. &nw_move, workers_move);
  482. if(nw_move > 0)
  483. sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
  484. }
  485. }
  486. void _lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers)
  487. {
  488. unsigned current_nworkers = workers == NULL ? starpu_worker_get_count() : (unsigned)nworkers;
  489. int s, w;
  490. int start[nw];
  491. for(w = 0; w < nw; w++)
  492. start[w] = 0;
  493. for(s = 0; s < ns; s++)
  494. {
  495. int workers_add[STARPU_NMAXWORKERS];
  496. int nw_add = 0;
  497. for(w = 0; w < nw; w++)
  498. {
  499. enum starpu_archtype arch;
  500. if(w == 0) arch = STARPU_CUDA_WORKER;
  501. if(w == 1) arch = STARPU_CPU_WORKER;
  502. if(w == 1)
  503. {
  504. int nworkers_to_add = res_rounded[s][w];
  505. int *workers_to_add = _get_first_workers_in_list(&start[w], workers, current_nworkers, &nworkers_to_add, arch);
  506. int i;
  507. for(i = 0; i < nworkers_to_add; i++)
  508. workers_add[nw_add++] = workers_to_add[i];
  509. free(workers_to_add);
  510. }
  511. else
  512. {
  513. double nworkers_to_add = res[s][w];
  514. int x = floor(nworkers_to_add);
  515. double x_double = (double)x;
  516. double diff = nworkers_to_add - x_double;
  517. if(diff == 0.0)
  518. {
  519. int *workers_to_add = _get_first_workers_in_list(&start[w], workers, current_nworkers, &x, arch);
  520. int i;
  521. for(i = 0; i < x; i++)
  522. workers_add[nw_add++] = workers_to_add[i];
  523. free(workers_to_add);
  524. }
  525. else
  526. {
  527. x+=1;
  528. int *workers_to_add = _get_first_workers_in_list(&start[w], workers, current_nworkers, &x, arch);
  529. int i;
  530. if(diff >= 0.3)
  531. for(i = 0; i < x; i++)
  532. workers_add[nw_add++] = workers_to_add[i];
  533. else
  534. for(i = 0; i < x-1; i++)
  535. workers_add[nw_add++] = workers_to_add[i];
  536. free(workers_to_add);
  537. }
  538. }
  539. }
  540. if(nw_add > 0)
  541. {
  542. sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
  543. sched_ctx_hypervisor_start_resize(sched_ctxs[s]);
  544. }
  545. // sched_ctx_hypervisor_stop_resize(current_sched_ctxs[s]);
  546. }
  547. }
  548. /* nw = all the workers (either in a list or on all machine) */
  549. void _lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], int *sched_ctxs_input, int *workers_input, unsigned do_size)
  550. {
  551. int w, s;
  552. double nworkers[ns][2];
  553. int nworkers_rounded[ns][2];
  554. for(s = 0; s < ns; s++)
  555. {
  556. nworkers[s][0] = 0.0;
  557. nworkers[s][1] = 0.0;
  558. nworkers_rounded[s][0] = 0;
  559. nworkers_rounded[s][1] = 0;
  560. }
  561. for(s = 0; s < ns; s++)
  562. {
  563. for(w = 0; w < nw; w++)
  564. {
  565. enum starpu_archtype arch = starpu_worker_get_type(w);
  566. if(arch == STARPU_CUDA_WORKER)
  567. {
  568. nworkers[s][0] += w_in_s[s][w];
  569. if(w_in_s[s][w] >= 0.3)
  570. nworkers_rounded[s][0]++;
  571. }
  572. else
  573. {
  574. nworkers[s][1] += w_in_s[s][w];
  575. if(w_in_s[s][w] > 0.5)
  576. nworkers_rounded[s][1]++;
  577. }
  578. }
  579. }
  580. /* for(s = 0; s < ns; s++) */
  581. /* printf("%d: cpus = %d gpus = %d \n", s, nworkers_rounded[s][1], nworkers_rounded[s][0]); */
  582. if(!do_size)
  583. _lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
  584. else
  585. {
  586. int *current_sched_ctxs = sched_ctxs_input == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : sched_ctxs_input;
  587. unsigned has_workers = 0;
  588. for(s = 0; s < ns; s++)
  589. {
  590. int nworkers_ctx = sched_ctx_hypervisor_get_nworkers_ctx(current_sched_ctxs[s],
  591. STARPU_ANY_WORKER);
  592. if(nworkers_ctx != 0)
  593. {
  594. has_workers = 1;
  595. break;
  596. }
  597. }
  598. if(has_workers)
  599. _lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
  600. else
  601. _lp_distribute_resources_in_ctxs(current_sched_ctxs, ns, 2, nworkers_rounded, nworkers, workers_input, nw);
  602. }
  603. return;
  604. }
  605. double _find_tmax(double t1, double t2)
  606. {
  607. return t1 + ((t2 - t1)/2);
  608. }