policy_tools.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2013 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include "sc_hypervisor_policy.h"
  17. #include "sc_hypervisor_intern.h"
  18. #include <math.h>
  19. static int _compute_priority(unsigned sched_ctx)
  20. {
  21. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx);
  22. int total_priority = 0;
  23. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
  24. int worker;
  25. struct starpu_sched_ctx_iterator it;
  26. if(workers->init_iterator)
  27. workers->init_iterator(workers, &it);
  28. while(workers->has_next(workers, &it))
  29. {
  30. worker = workers->get_next(workers, &it);
  31. total_priority += config->priority[worker];
  32. }
  33. return total_priority;
  34. }
  35. /* find the context with the lowest priority */
  36. unsigned sc_hypervisor_find_lowest_prio_sched_ctx(unsigned req_sched_ctx, int nworkers_to_move)
  37. {
  38. int i;
  39. int highest_priority = -1;
  40. int current_priority = 0;
  41. unsigned sched_ctx = STARPU_NMAX_SCHED_CTXS;
  42. int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
  43. int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
  44. struct sc_hypervisor_policy_config *config = NULL;
  45. for(i = 0; i < nsched_ctxs; i++)
  46. {
  47. if(sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS && sched_ctxs[i] != req_sched_ctx)
  48. {
  49. unsigned nworkers = starpu_sched_ctx_get_nworkers(sched_ctxs[i]);
  50. config = sc_hypervisor_get_config(sched_ctxs[i]);
  51. if((nworkers + nworkers_to_move) <= config->max_nworkers)
  52. {
  53. current_priority = _compute_priority(sched_ctxs[i]);
  54. if (highest_priority < current_priority)
  55. {
  56. highest_priority = current_priority;
  57. sched_ctx = sched_ctxs[i];
  58. }
  59. }
  60. }
  61. }
  62. return sched_ctx;
  63. }
  64. int* sc_hypervisor_get_idlest_workers_in_list(int *start, int *workers, int nall_workers, int *nworkers, enum starpu_worker_archtype arch)
  65. {
  66. int *curr_workers = (int*)malloc((*nworkers)*sizeof(int));
  67. int w, worker;
  68. int nfound_workers = 0;
  69. for(w = 0; w < nall_workers; w++)
  70. {
  71. if(nfound_workers >= *nworkers)
  72. break;
  73. worker = workers == NULL ? w : workers[w];
  74. enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker);
  75. if(arch == STARPU_ANY_WORKER || curr_arch == arch)
  76. {
  77. if(w >= *start)
  78. {
  79. curr_workers[nfound_workers++] = worker;
  80. *start = w+1;
  81. }
  82. }
  83. }
  84. if(nfound_workers < *nworkers)
  85. *nworkers = nfound_workers;
  86. return curr_workers;
  87. }
  88. /* get first nworkers with the highest idle time in the context */
  89. int* sc_hypervisor_get_idlest_workers(unsigned sched_ctx, int *nworkers, enum starpu_worker_archtype arch)
  90. {
  91. struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
  92. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx);
  93. int *curr_workers = (int*)malloc((*nworkers) * sizeof(int));
  94. int i;
  95. for(i = 0; i < *nworkers; i++)
  96. curr_workers[i] = -1;
  97. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
  98. int index;
  99. int worker;
  100. int considered = 0;
  101. struct starpu_sched_ctx_iterator it;
  102. if(workers->init_iterator)
  103. workers->init_iterator(workers, &it);
  104. for(index = 0; index < *nworkers; index++)
  105. {
  106. while(workers->has_next(workers, &it))
  107. {
  108. considered = 0;
  109. worker = workers->get_next(workers, &it);
  110. enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker);
  111. if(arch == STARPU_ANY_WORKER || curr_arch == arch)
  112. {
  113. if(!config->fixed_workers[worker])
  114. {
  115. for(i = 0; i < index; i++)
  116. {
  117. if(curr_workers[i] == worker)
  118. {
  119. considered = 1;
  120. break;
  121. }
  122. }
  123. if(!considered)
  124. {
  125. /* the first iteration*/
  126. if(curr_workers[index] < 0)
  127. curr_workers[index] = worker;
  128. /* small priority worker is the first to leave the ctx*/
  129. else if(config->priority[worker] <
  130. config->priority[curr_workers[index]])
  131. curr_workers[index] = worker;
  132. /* if we don't consider priorities check for the workers
  133. with the biggest idle time */
  134. else if(config->priority[worker] ==
  135. config->priority[curr_workers[index]])
  136. {
  137. double worker_idle_time = sc_w->current_idle_time[worker];
  138. double curr_worker_idle_time = sc_w->current_idle_time[curr_workers[index]];
  139. if(worker_idle_time > curr_worker_idle_time)
  140. curr_workers[index] = worker;
  141. }
  142. }
  143. }
  144. }
  145. }
  146. if(curr_workers[index] < 0)
  147. {
  148. *nworkers = index;
  149. break;
  150. }
  151. }
  152. return curr_workers;
  153. }
  154. /* get the number of workers in the context that are allowed to be moved (that are not fixed) */
  155. unsigned sc_hypervisor_get_movable_nworkers(struct sc_hypervisor_policy_config *config, unsigned sched_ctx, enum starpu_worker_archtype arch)
  156. {
  157. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
  158. unsigned potential_workers = 0;
  159. int worker;
  160. struct starpu_sched_ctx_iterator it;
  161. if(workers->init_iterator)
  162. workers->init_iterator(workers, &it);
  163. while(workers->has_next(workers, &it))
  164. {
  165. worker = workers->get_next(workers, &it);
  166. enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker);
  167. if(arch == STARPU_ANY_WORKER || curr_arch == arch)
  168. {
  169. if(!config->fixed_workers[worker])
  170. potential_workers++;
  171. }
  172. }
  173. return potential_workers;
  174. }
  175. /* compute the number of workers that should be moved depending:
  176. - on the min/max number of workers in a context imposed by the user,
  177. - on the resource granularity imposed by the user for the resizing process*/
  178. int sc_hypervisor_compute_nworkers_to_move(unsigned req_sched_ctx)
  179. {
  180. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(req_sched_ctx);
  181. unsigned nworkers = starpu_sched_ctx_get_nworkers(req_sched_ctx);
  182. unsigned nworkers_to_move = 0;
  183. unsigned potential_moving_workers = sc_hypervisor_get_movable_nworkers(config, req_sched_ctx, STARPU_ANY_WORKER);
  184. if(potential_moving_workers > 0)
  185. {
  186. if(potential_moving_workers <= config->min_nworkers)
  187. /* if we have to give more than min better give it all */
  188. /* => empty ctx will block until having the required workers */
  189. nworkers_to_move = potential_moving_workers;
  190. else if(potential_moving_workers > config->max_nworkers)
  191. {
  192. if((potential_moving_workers - config->granularity) > config->max_nworkers)
  193. // nworkers_to_move = config->granularity;
  194. nworkers_to_move = potential_moving_workers;
  195. else
  196. nworkers_to_move = potential_moving_workers - config->max_nworkers;
  197. }
  198. else if(potential_moving_workers > config->granularity)
  199. {
  200. if((nworkers - config->granularity) > config->min_nworkers)
  201. nworkers_to_move = config->granularity;
  202. else
  203. nworkers_to_move = potential_moving_workers - config->min_nworkers;
  204. }
  205. else
  206. {
  207. int nfixed_workers = nworkers - potential_moving_workers;
  208. if(nfixed_workers >= config->min_nworkers)
  209. nworkers_to_move = potential_moving_workers;
  210. else
  211. nworkers_to_move = potential_moving_workers - (config->min_nworkers - nfixed_workers);
  212. }
  213. if((nworkers - nworkers_to_move) > config->max_nworkers)
  214. nworkers_to_move = nworkers - config->max_nworkers;
  215. }
  216. return nworkers_to_move;
  217. }
  218. unsigned sc_hypervisor_policy_resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize, unsigned now)
  219. {
  220. int ret = 1;
  221. if(force_resize)
  222. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  223. else
  224. ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
  225. if(ret != EBUSY)
  226. {
  227. int nworkers_to_move = sc_hypervisor_compute_nworkers_to_move(sender_sched_ctx);
  228. if(nworkers_to_move > 0)
  229. {
  230. unsigned poor_sched_ctx = STARPU_NMAX_SCHED_CTXS;
  231. if(receiver_sched_ctx == STARPU_NMAX_SCHED_CTXS)
  232. {
  233. poor_sched_ctx = sc_hypervisor_find_lowest_prio_sched_ctx(sender_sched_ctx, nworkers_to_move);
  234. }
  235. else
  236. {
  237. poor_sched_ctx = receiver_sched_ctx;
  238. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(poor_sched_ctx);
  239. unsigned nworkers = starpu_sched_ctx_get_nworkers(poor_sched_ctx);
  240. unsigned nshared_workers = starpu_sched_ctx_get_nshared_workers(sender_sched_ctx, poor_sched_ctx);
  241. if((nworkers+nworkers_to_move-nshared_workers) > config->max_nworkers)
  242. nworkers_to_move = nworkers > config->max_nworkers ? 0 : (config->max_nworkers - nworkers+nshared_workers);
  243. if(nworkers_to_move == 0) poor_sched_ctx = STARPU_NMAX_SCHED_CTXS;
  244. }
  245. if(poor_sched_ctx != STARPU_NMAX_SCHED_CTXS)
  246. {
  247. int *workers_to_move = sc_hypervisor_get_idlest_workers(sender_sched_ctx, &nworkers_to_move, STARPU_ANY_WORKER);
  248. sc_hypervisor_move_workers(sender_sched_ctx, poor_sched_ctx, workers_to_move, nworkers_to_move, now);
  249. struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(poor_sched_ctx);
  250. int i;
  251. for(i = 0; i < nworkers_to_move; i++)
  252. new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] : new_config->new_workers_max_idle;
  253. free(workers_to_move);
  254. }
  255. }
  256. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  257. return 1;
  258. }
  259. return 0;
  260. }
  261. unsigned sc_hypervisor_policy_resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now)
  262. {
  263. return sc_hypervisor_policy_resize(sender_sched_ctx, STARPU_NMAX_SCHED_CTXS, 0, now);
  264. }
  265. static double _get_ispeed_sample_for_type_of_worker(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype req_arch)
  266. {
  267. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
  268. int worker;
  269. double avg = 0.0;
  270. int n = 0;
  271. struct starpu_sched_ctx_iterator it;
  272. if(workers->init_iterator)
  273. workers->init_iterator(workers, &it);
  274. while(workers->has_next(workers, &it))
  275. {
  276. worker = workers->get_next(workers, &it);
  277. enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
  278. if(arch == req_arch)
  279. {
  280. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx);
  281. avg += config->ispeed_w_sample[worker];
  282. n++;
  283. }
  284. }
  285. return n != 0 ? avg/n : 0;
  286. }
  287. static double _get_ispeed_sample_for_sched_ctx(unsigned sched_ctx)
  288. {
  289. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
  290. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx);
  291. int worker;
  292. double ispeed_sample = 0.0;
  293. struct starpu_sched_ctx_iterator it;
  294. if(workers->init_iterator)
  295. workers->init_iterator(workers, &it);
  296. while(workers->has_next(workers, &it))
  297. {
  298. worker = workers->get_next(workers, &it);
  299. ispeed_sample += config->ispeed_w_sample[worker];
  300. }
  301. return ispeed_sample;
  302. }
  303. double sc_hypervisor_get_ctx_velocity(struct sc_hypervisor_wrapper* sc_w)
  304. {
  305. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx);
  306. double elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
  307. // double sample = _get_ispeed_sample_for_sched_ctx(sc_w->sched_ctx);
  308. double sample = config->ispeed_ctx_sample;
  309. /* double total_elapsed_flops = sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w); */
  310. /* double prc = config->ispeed_ctx_sample != 0.0 ? elapsed_flops : elapsed_flops/sc_w->total_flops; */
  311. /* double redim_sample = config->ispeed_ctx_sample != 0.0 ? config->ispeed_ctx_sample : */
  312. /* (elapsed_flops == total_elapsed_flops ? HYPERVISOR_START_REDIM_SAMPLE : HYPERVISOR_REDIM_SAMPLE); */
  313. // printf("%d: prc %lf sample %lf\n", sc_w->sched_ctx, prc, redim_sample);
  314. /* double curr_time2 = starpu_timing_now(); */
  315. /* double elapsed_time2 = (curr_time2 - sc_w->start_time) / 1000000.0; /\* in seconds *\/ */
  316. /* if(elapsed_time2 > 5.0 && elapsed_flops < sample) */
  317. /* return (elapsed_flops/1000000000.0)/elapsed_time2;/\* in Gflops/s *\/ */
  318. if(elapsed_flops >= sample)
  319. {
  320. double curr_time = starpu_timing_now();
  321. double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
  322. return (elapsed_flops/1000000000.0)/elapsed_time;/* in Gflops/s */
  323. }
  324. return -1.0;
  325. }
  326. double sc_hypervisor_get_slowest_ctx_exec_time(void)
  327. {
  328. int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
  329. int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
  330. /* double curr_time = starpu_timing_now(); */
  331. double slowest_time = 0.0;
  332. int s;
  333. struct sc_hypervisor_wrapper* sc_w;
  334. for(s = 0; s < nsched_ctxs; s++)
  335. {
  336. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
  337. // double elapsed_time = (curr_time - sc_w->start_time)/1000000;
  338. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx);
  339. double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/sc_hypervisor_get_ctx_velocity(sc_w);
  340. if(elapsed_time > slowest_time)
  341. slowest_time = elapsed_time;
  342. }
  343. return slowest_time;
  344. }
  345. double sc_hypervisor_get_fastest_ctx_exec_time(void)
  346. {
  347. int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
  348. int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
  349. double curr_time = starpu_timing_now();
  350. double fastest_time = curr_time;
  351. int s;
  352. struct sc_hypervisor_wrapper* sc_w;
  353. for(s = 0; s < nsched_ctxs; s++)
  354. {
  355. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
  356. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx);
  357. double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/sc_hypervisor_get_ctx_velocity(sc_w);
  358. if(elapsed_time < fastest_time)
  359. fastest_time = elapsed_time;
  360. }
  361. return fastest_time;
  362. }
  363. double sc_hypervisor_get_velocity_per_worker(struct sc_hypervisor_wrapper *sc_w, unsigned worker)
  364. {
  365. if(!starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx))
  366. return -1.0;
  367. double elapsed_flops = sc_w->elapsed_flops[worker] / 1000000000.0; /*in gflops */
  368. size_t elapsed_data_used = sc_w->elapsed_data[worker];
  369. int elapsed_tasks = sc_w->elapsed_tasks[worker];
  370. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx);
  371. double sample = config->ispeed_w_sample[worker] / 1000000000.0; /*in gflops */
  372. double ctx_elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
  373. double ctx_sample = config->ispeed_ctx_sample;
  374. if(ctx_elapsed_flops > ctx_sample && elapsed_flops == 0.0)
  375. return 0.00000000000001;
  376. /* if( elapsed_flops >= sample) */
  377. /* { */
  378. /* double curr_time = starpu_timing_now(); */
  379. /* double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /\* in seconds *\/ */
  380. /* sc_w->ref_velocity[worker] = (elapsed_flops/elapsed_time); /\* in Gflops/s *\/ */
  381. /* return sc_w->ref_velocity[worker]; */
  382. /* } */
  383. /* return -1.0; */
  384. if( elapsed_flops != 0.0)
  385. {
  386. double curr_time = starpu_timing_now();
  387. size_t elapsed_data_used = sc_w->elapsed_data[worker];
  388. double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
  389. enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
  390. if(arch == STARPU_CUDA_WORKER)
  391. {
  392. /* unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx); */
  393. /* if(!worker_in_ctx) */
  394. /* { */
  395. /* double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker); */
  396. /* elapsed_time += (elapsed_data_used / transfer_velocity) / 1000000 ; */
  397. /* } */
  398. double latency = starpu_get_latency_RAM_CUDA(worker);
  399. // printf("%d/%d: latency %lf elapsed_time before %lf ntasks %d\n", worker, sc_w->sched_ctx, latency, elapsed_time, elapsed_tasks);
  400. elapsed_time += (elapsed_tasks * latency)/1000000;
  401. // printf("elapsed time after %lf \n", elapsed_time);
  402. }
  403. double vel = (elapsed_flops/elapsed_time);/* in Gflops/s */
  404. sc_w->ref_velocity[worker] = sc_w->ref_velocity[worker] > 1.0 ? (sc_w->ref_velocity[worker] + vel) / 2 : vel;
  405. return vel;
  406. }
  407. return 0.00000000000001;
  408. }
  409. static double _get_best_elapsed_flops(struct sc_hypervisor_wrapper* sc_w, int *npus, enum starpu_worker_archtype req_arch)
  410. {
  411. double ret_val = 0.0;
  412. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
  413. int worker;
  414. struct starpu_sched_ctx_iterator it;
  415. if(workers->init_iterator)
  416. workers->init_iterator(workers, &it);
  417. while(workers->has_next(workers, &it))
  418. {
  419. worker = workers->get_next(workers, &it);
  420. enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
  421. if(arch == req_arch)
  422. {
  423. if(sc_w->elapsed_flops[worker] > ret_val)
  424. ret_val = sc_w->elapsed_flops[worker];
  425. (*npus)++;
  426. }
  427. }
  428. return ret_val;
  429. }
  430. /* compute an average value of the cpu/cuda velocity */
  431. double sc_hypervisor_get_velocity_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch)
  432. {
  433. int npus = 0;
  434. double elapsed_flops = _get_best_elapsed_flops(sc_w, &npus, arch) / 1000000000.0 ; /* in gflops */
  435. if(npus == 0)
  436. return -1.0;
  437. if( elapsed_flops != 0.0)
  438. {
  439. double curr_time = starpu_timing_now();
  440. double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
  441. double velocity = (elapsed_flops/elapsed_time); /* in Gflops/s */
  442. return velocity;
  443. }
  444. return -1.0;
  445. }
  446. /* check if there is a big velocity gap between the contexts */
  447. unsigned sc_hypervisor_check_velocity_gap_btw_ctxs(void)
  448. {
  449. int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
  450. int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
  451. int i = 0, j = 0;
  452. struct sc_hypervisor_wrapper* sc_w;
  453. struct sc_hypervisor_wrapper* other_sc_w;
  454. for(i = 0; i < nsched_ctxs; i++)
  455. {
  456. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
  457. double ctx_v = sc_hypervisor_get_ctx_velocity(sc_w);
  458. if(ctx_v != -1.0)
  459. {
  460. for(j = 0; j < nsched_ctxs; j++)
  461. {
  462. if(sched_ctxs[i] != sched_ctxs[j])
  463. {
  464. unsigned nworkers = starpu_sched_ctx_get_nworkers(sched_ctxs[j]);
  465. if(nworkers == 0)
  466. return 1;
  467. other_sc_w = sc_hypervisor_get_wrapper(sched_ctxs[j]);
  468. double other_ctx_v = sc_hypervisor_get_ctx_velocity(other_sc_w);
  469. if(other_ctx_v != -1.0)
  470. {
  471. double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v ;
  472. // if(gap > 1.5)
  473. if(gap > _get_max_velocity_gap())
  474. return 1;
  475. }
  476. }
  477. }
  478. }
  479. }
  480. return 0;
  481. }
  482. void sc_hypervisor_group_workers_by_type(int *workers, int nworkers, int ntypes_of_workers, int total_nw[ntypes_of_workers])
  483. {
  484. int current_nworkers = workers == NULL ? starpu_worker_get_count() : nworkers;
  485. int w;
  486. for(w = 0; w < ntypes_of_workers; w++)
  487. total_nw[w] = 0;
  488. for(w = 0; w < current_nworkers; w++)
  489. {
  490. enum starpu_worker_archtype arch = workers == NULL ? starpu_worker_get_type(w) :
  491. starpu_worker_get_type(workers[w]);
  492. if(ntypes_of_workers == 2)
  493. {
  494. if(arch == STARPU_CPU_WORKER)
  495. total_nw[1]++;
  496. else
  497. total_nw[0]++;
  498. }
  499. else
  500. total_nw[0]++;
  501. }
  502. }
  503. void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers, unsigned size_ctxs, struct sc_hypervisor_policy_task_pool *task_pools)
  504. {
  505. struct sc_hypervisor_policy_task_pool *tp;
  506. int w, t;
  507. for (w = 0; w < nw; w++)
  508. {
  509. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  510. {
  511. int worker = workers == NULL ? w : workers[w];
  512. enum starpu_perfmodel_archtype arch = starpu_worker_get_perf_archtype(worker);
  513. double length = starpu_permodel_history_based_expected_perf(tp->cl->model, arch, tp->footprint);
  514. if (isnan(length))
  515. times[w][t] = NAN;
  516. else
  517. {
  518. times[w][t] = length / 1000.;
  519. double transfer_time = 0.0;
  520. enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
  521. if(arch == STARPU_CUDA_WORKER)
  522. {
  523. unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, tp->sched_ctx_id);
  524. if(!worker_in_ctx && !size_ctxs)
  525. {
  526. double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker);
  527. transfer_time += (tp->footprint / transfer_velocity) / 1000. ;
  528. }
  529. double latency = starpu_get_latency_RAM_CUDA(worker);
  530. transfer_time += latency/1000.;
  531. }
  532. // printf("%d/%d %s x %d time = %lf transfer_time = %lf\n", w, tp->sched_ctx_id, tp->cl->model->symbol, tp->n, times[w][t], transfer_time);
  533. times[w][t] += transfer_time;
  534. }
  535. }
  536. }
  537. }
  538. unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker)
  539. {
  540. struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
  541. struct sc_hypervisor_policy_config *config = sc_w->config;
  542. if(config != NULL)
  543. {
  544. if(sc_w->current_idle_time[worker] > config->max_idle[worker])
  545. {
  546. sc_w->current_idle_time[worker] = 0.0;
  547. return 1;
  548. }
  549. }
  550. return 0;
  551. }
  552. unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker)
  553. {
  554. unsigned criteria = sc_hypervisor_get_resize_criteria();
  555. if(criteria != SC_NOTHING)
  556. {
  557. if(criteria == SC_IDLE)
  558. return sc_hypervisor_check_idle(sched_ctx, worker);
  559. else
  560. return sc_hypervisor_check_velocity_gap_btw_ctxs();
  561. }
  562. else
  563. return 0;
  564. }