sc_hypervisor.c 56 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011, 2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <sc_hypervisor_intern.h>
  17. #include <sc_hypervisor_policy.h>
  18. #include <starpu_config.h>
  19. double hyp_overhead = 0.0;
  20. unsigned imposed_resize = 0;
  21. unsigned type_of_tasks_known = 0;
  22. struct starpu_sched_ctx_performance_counters* perf_counters = NULL;
  23. static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time);
  24. static void notify_pushed_task(unsigned sched_ctx, int worker);
  25. static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint,
  26. int hypervisor_tag, double flops);
  27. static void notify_poped_task(unsigned sched_ctx, int worker);
  28. static void notify_submitted_job(struct starpu_task *task, unsigned footprint, size_t data_size);
  29. static void notify_empty_ctx(unsigned sched_ctx, struct starpu_task *task);
  30. static void notify_delete_context(unsigned sched_ctx);
  31. extern struct sc_hypervisor_policy idle_policy;
  32. extern struct sc_hypervisor_policy app_driven_policy;
  33. extern struct sc_hypervisor_policy gflops_rate_policy;
  34. #ifdef STARPU_HAVE_GLPK_H
  35. extern struct sc_hypervisor_policy feft_lp_policy;
  36. extern struct sc_hypervisor_policy teft_lp_policy;
  37. extern struct sc_hypervisor_policy ispeed_lp_policy;
  38. extern struct sc_hypervisor_policy throughput_lp_policy;
  39. #endif // STARPU_HAVE_GLPK_
  40. extern struct sc_hypervisor_policy ispeed_policy;
  41. extern struct sc_hypervisor_policy hard_coded_policy;
  42. static struct sc_hypervisor_policy *predefined_policies[] =
  43. {
  44. &idle_policy,
  45. &app_driven_policy,
  46. #ifdef STARPU_HAVE_GLPK_H
  47. &feft_lp_policy,
  48. &teft_lp_policy,
  49. &ispeed_lp_policy,
  50. &throughput_lp_policy,
  51. #endif // STARPU_HAVE_GLPK_H
  52. &gflops_rate_policy,
  53. &ispeed_policy,
  54. &hard_coded_policy
  55. };
  56. static void _load_hypervisor_policy(struct sc_hypervisor_policy *policy)
  57. {
  58. STARPU_ASSERT(policy);
  59. hypervisor.policy.name = policy->name;
  60. hypervisor.policy.size_ctxs = policy->size_ctxs;
  61. hypervisor.policy.resize_ctxs = policy->resize_ctxs;
  62. hypervisor.policy.handle_poped_task = policy->handle_poped_task;
  63. hypervisor.policy.handle_pushed_task = policy->handle_pushed_task;
  64. hypervisor.policy.handle_idle_cycle = policy->handle_idle_cycle;
  65. hypervisor.policy.handle_idle_end = policy->handle_idle_end;
  66. hypervisor.policy.handle_post_exec_hook = policy->handle_post_exec_hook;
  67. hypervisor.policy.handle_submitted_job = policy->handle_submitted_job;
  68. hypervisor.policy.end_ctx = policy->end_ctx;
  69. }
  70. static struct sc_hypervisor_policy *_find_hypervisor_policy_from_name(const char *policy_name)
  71. {
  72. if (!policy_name)
  73. return NULL;
  74. unsigned i;
  75. for (i = 0; i < sizeof(predefined_policies)/sizeof(predefined_policies[0]); i++)
  76. {
  77. struct sc_hypervisor_policy *p;
  78. p = predefined_policies[i];
  79. if (p->name)
  80. {
  81. if (strcmp(policy_name, p->name) == 0) {
  82. /* we found a policy with the requested name */
  83. return p;
  84. }
  85. }
  86. }
  87. fprintf(stderr, "Warning: hypervisor policy \"%s\" was not found, try \"help\" to get a list\n", policy_name);
  88. /* nothing was found */
  89. return NULL;
  90. }
  91. static void display_sched_help_message(void)
  92. {
  93. const char* policy_name = getenv("SC_HYPERVISOR_POLICY");
  94. if (policy_name && (strcmp(policy_name, "help") == 0))
  95. {
  96. fprintf(stderr, "SC_HYPERVISOR_POLICY can be either of\n");
  97. /* display the description of all predefined policies */
  98. unsigned i;
  99. for (i = 0; i < sizeof(predefined_policies)/sizeof(predefined_policies[0]); i++)
  100. {
  101. struct sc_hypervisor_policy *p = predefined_policies[i];
  102. if (p->name)
  103. {
  104. fprintf(stderr, "%s\n", p->name);
  105. }
  106. }
  107. }
  108. }
  109. static struct sc_hypervisor_policy *_select_hypervisor_policy(struct sc_hypervisor_policy* hypervisor_policy)
  110. {
  111. struct sc_hypervisor_policy *selected_policy = NULL;
  112. if(hypervisor_policy && hypervisor_policy->custom)
  113. return hypervisor_policy;
  114. /* we look if the application specified the name of a policy to load */
  115. const char *policy_name;
  116. if (hypervisor_policy && hypervisor_policy->name)
  117. {
  118. policy_name = hypervisor_policy->name;
  119. }
  120. else
  121. {
  122. policy_name = getenv("SC_HYPERVISOR_POLICY");
  123. }
  124. if (policy_name)
  125. selected_policy = _find_hypervisor_policy_from_name(policy_name);
  126. /* Perhaps there was no policy that matched the name */
  127. if (selected_policy)
  128. return selected_policy;
  129. /* If no policy was specified, we use the idle policy as a default */
  130. return &idle_policy;
  131. }
  132. /* initializez the performance counters that starpu will use to retrive hints for resizing */
  133. void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
  134. {
  135. /* Perhaps we have to display some help */
  136. display_sched_help_message();
  137. hypervisor.min_tasks = 0;
  138. hypervisor.nsched_ctxs = 0;
  139. char* vel_gap = getenv("SC_HYPERVISOR_MAX_SPEED_GAP");
  140. hypervisor.max_speed_gap = vel_gap ? atof(vel_gap) : SC_SPEED_MAX_GAP_DEFAULT;
  141. char* crit = getenv("SC_HYPERVISOR_TRIGGER_RESIZE");
  142. hypervisor.resize_criteria = !crit ? SC_IDLE : strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_SPEED : SC_NOTHING);
  143. starpu_pthread_mutex_init(&act_hypervisor_mutex, NULL);
  144. // hypervisor.start_executing_time = starpu_timing_now();
  145. int i;
  146. for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
  147. {
  148. hypervisor.resize[i] = 0;
  149. hypervisor.allow_remove[i] = 1;
  150. hypervisor.configurations[i] = NULL;
  151. hypervisor.sr = NULL;
  152. hypervisor.check_min_tasks[i] = 1;
  153. hypervisor.sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS;
  154. hypervisor.sched_ctx_w[i].sched_ctx = STARPU_NMAX_SCHED_CTXS;
  155. hypervisor.sched_ctx_w[i].config = NULL;
  156. hypervisor.sched_ctx_w[i].total_flops = 0.0;
  157. hypervisor.sched_ctx_w[i].submitted_flops = 0.0;
  158. hypervisor.sched_ctx_w[i].remaining_flops = 0.0;
  159. hypervisor.sched_ctx_w[i].start_time = 0.0;
  160. hypervisor.sched_ctx_w[i].real_start_time = 0.0;
  161. hypervisor.sched_ctx_w[i].hyp_react_start_time = 0.0;
  162. hypervisor.sched_ctx_w[i].resize_ack.receiver_sched_ctx = -1;
  163. hypervisor.sched_ctx_w[i].resize_ack.moved_workers = NULL;
  164. hypervisor.sched_ctx_w[i].resize_ack.nmoved_workers = 0;
  165. hypervisor.sched_ctx_w[i].resize_ack.acked_workers = NULL;
  166. starpu_pthread_mutex_init(&hypervisor.sched_ctx_w[i].mutex, NULL);
  167. hypervisor.optimal_v[i] = 0.0;
  168. hypervisor.sched_ctx_w[i].ref_speed[0] = -1.0;
  169. hypervisor.sched_ctx_w[i].ref_speed[1] = -1.0;
  170. hypervisor.sched_ctx_w[i].total_flops_available = 0;
  171. hypervisor.sched_ctx_w[i].to_be_sized = 0;
  172. hypervisor.sched_ctx_w[i].consider_max = 0;
  173. int j;
  174. for(j = 0; j < STARPU_NMAXWORKERS; j++)
  175. {
  176. hypervisor.sched_ctx_w[i].start_time_w[i] = 0.0;
  177. hypervisor.sched_ctx_w[i].current_idle_time[j] = 0.0;
  178. hypervisor.sched_ctx_w[i].idle_time[j] = 0.0;
  179. hypervisor.sched_ctx_w[i].idle_start_time[j] = 0.0;
  180. hypervisor.sched_ctx_w[i].exec_time[j] = 0.0;
  181. hypervisor.sched_ctx_w[i].exec_start_time[j] = 0.0;
  182. hypervisor.sched_ctx_w[i].pushed_tasks[j] = 0;
  183. hypervisor.sched_ctx_w[i].poped_tasks[j] = 0;
  184. hypervisor.sched_ctx_w[i].elapsed_flops[j] = 0.0;
  185. hypervisor.sched_ctx_w[i].elapsed_data[j] = 0;
  186. hypervisor.sched_ctx_w[i].elapsed_tasks[j] = 0;
  187. hypervisor.sched_ctx_w[i].total_elapsed_flops[j] = 0.0;
  188. hypervisor.sched_ctx_w[i].worker_to_be_removed[j] = 0;
  189. hypervisor.sched_ctx_w[i].compute_idle[j] = 1;
  190. hypervisor.sched_ctx_w[i].compute_partial_idle[j] = 0;
  191. }
  192. }
  193. struct sc_hypervisor_policy *selected_hypervisor_policy = _select_hypervisor_policy(hypervisor_policy);
  194. _load_hypervisor_policy(selected_hypervisor_policy);
  195. perf_counters = (struct starpu_sched_ctx_performance_counters*)malloc(sizeof(struct starpu_sched_ctx_performance_counters));
  196. perf_counters->notify_idle_cycle = notify_idle_cycle;
  197. perf_counters->notify_pushed_task = notify_pushed_task;
  198. perf_counters->notify_poped_task = notify_poped_task;
  199. perf_counters->notify_post_exec_task = notify_post_exec_task;
  200. perf_counters->notify_submitted_job = notify_submitted_job;
  201. perf_counters->notify_empty_ctx = notify_empty_ctx;
  202. perf_counters->notify_delete_context = notify_delete_context;
  203. starpu_sched_ctx_notify_hypervisor_exists();
  204. return (void*)perf_counters;
  205. }
  206. const char* sc_hypervisor_get_policy()
  207. {
  208. return hypervisor.policy.name;
  209. }
  210. /* the user can forbid the resizing process*/
  211. void sc_hypervisor_stop_resize(unsigned sched_ctx)
  212. {
  213. imposed_resize = 1;
  214. hypervisor.resize[sched_ctx] = 0;
  215. }
  216. /* the user can restart the resizing process*/
  217. void sc_hypervisor_start_resize(unsigned sched_ctx)
  218. {
  219. imposed_resize = 1;
  220. hypervisor.resize[sched_ctx] = 1;
  221. }
  222. static void _print_current_time()
  223. {
  224. char* stop_print = getenv("SC_HYPERVISOR_STOP_PRINT");
  225. int sp = stop_print ? atoi(stop_print) : 1;
  226. if(!sp)
  227. {
  228. if(hypervisor.start_executing_time == 0.0)
  229. {
  230. fprintf(stdout, "Time: %lf\n", -1.0);
  231. return;
  232. }
  233. double curr_time = starpu_timing_now();
  234. double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */
  235. fprintf(stdout, "Time: %lf\n", elapsed_time);
  236. int i;
  237. for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
  238. {
  239. if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
  240. {
  241. struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
  242. double cpu_speed = sc_hypervisor_get_speed(sc_w, STARPU_CPU_WORKER);
  243. double cuda_speed = sc_hypervisor_get_speed(sc_w, STARPU_CUDA_WORKER);
  244. int ncpus = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CPU_WORKER);
  245. int ncuda = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CUDA_WORKER);
  246. fprintf(stdout, "%d: cpu_v = %lf cuda_v = %lf ncpus = %d ncuda = %d\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed, ncpus, ncuda);
  247. }
  248. }
  249. }
  250. return;
  251. }
  252. void sc_hypervisor_shutdown(void)
  253. {
  254. int i;
  255. for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
  256. {
  257. if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS && hypervisor.nsched_ctxs > 0)
  258. {
  259. sc_hypervisor_stop_resize(hypervisor.sched_ctxs[i]);
  260. sc_hypervisor_unregister_ctx(hypervisor.sched_ctxs[i]);
  261. starpu_pthread_mutex_destroy(&hypervisor.sched_ctx_w[i].mutex);
  262. }
  263. }
  264. perf_counters->notify_idle_cycle = NULL;
  265. perf_counters->notify_pushed_task = NULL;
  266. perf_counters->notify_poped_task = NULL;
  267. perf_counters->notify_post_exec_task = NULL;
  268. perf_counters->notify_delete_context = NULL;
  269. free(perf_counters);
  270. perf_counters = NULL;
  271. starpu_pthread_mutex_destroy(&act_hypervisor_mutex);
  272. }
  273. void sc_hypervisor_print_overhead()
  274. {
  275. // hyp_overhead /= 1000000.0;*
  276. FILE *f;
  277. const char *sched_env = getenv("OVERHEAD_FILE");
  278. if(!sched_env)
  279. f = fopen("overhead_microsec", "a");
  280. else
  281. f = fopen(sched_env, "a");
  282. fprintf(f, "%lf \n", hyp_overhead);
  283. fclose(f);
  284. }
  285. /* the hypervisor is in charge only of the contexts registered to it*/
  286. void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops)
  287. {
  288. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  289. hypervisor.configurations[sched_ctx] = NULL;
  290. hypervisor.resize_requests[sched_ctx] = NULL;
  291. starpu_pthread_mutex_init(&hypervisor.conf_mut[sched_ctx], NULL);
  292. starpu_pthread_mutex_init(&hypervisor.resize_mut[sched_ctx], NULL);
  293. _add_config(sched_ctx);
  294. hypervisor.sched_ctx_w[sched_ctx].sched_ctx = sched_ctx;
  295. hypervisor.sched_ctxs[hypervisor.nsched_ctxs++] = sched_ctx;
  296. hypervisor.sched_ctx_w[sched_ctx].total_flops = total_flops;
  297. hypervisor.sched_ctx_w[sched_ctx].remaining_flops = total_flops;
  298. hypervisor.resize[sched_ctx] = 0;//1;
  299. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  300. }
  301. static int _get_first_free_sched_ctx(unsigned *sched_ctxs, int nsched_ctxs)
  302. {
  303. int i;
  304. for(i = 0; i < nsched_ctxs; i++)
  305. if(sched_ctxs[i] == STARPU_NMAX_SCHED_CTXS)
  306. return i;
  307. return STARPU_NMAX_SCHED_CTXS;
  308. }
  309. /* rearange array of sched_ctxs in order not to have {MAXVAL, MAXVAL, 5, MAXVAL, 7}
  310. and have instead {5, 7, MAXVAL, MAXVAL, MAXVAL}
  311. it is easier afterwards to iterate the array
  312. */
  313. static void _rearange_sched_ctxs(unsigned *sched_ctxs, int old_nsched_ctxs)
  314. {
  315. int first_free_id = STARPU_NMAX_SCHED_CTXS;
  316. int i;
  317. for(i = 0; i < old_nsched_ctxs; i++)
  318. {
  319. if(sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
  320. {
  321. first_free_id = _get_first_free_sched_ctx(sched_ctxs, old_nsched_ctxs);
  322. if(first_free_id != STARPU_NMAX_SCHED_CTXS)
  323. {
  324. sched_ctxs[first_free_id] = sched_ctxs[i];
  325. sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS;
  326. }
  327. }
  328. }
  329. }
  330. /* unregistered contexts will no longer be resized */
  331. void sc_hypervisor_unregister_ctx(unsigned sched_ctx)
  332. {
  333. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  334. printf("unregister ctx %d with remaining flops %lf \n", hypervisor.sched_ctx_w[sched_ctx].sched_ctx, hypervisor.sched_ctx_w[sched_ctx].remaining_flops);
  335. #endif
  336. if(hypervisor.policy.end_ctx)
  337. hypervisor.policy.end_ctx(sched_ctx);
  338. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  339. unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx);
  340. int *pus;
  341. unsigned npus = starpu_sched_ctx_get_workers_list(sched_ctx, &pus);
  342. if(npus)
  343. {
  344. starpu_sched_ctx_set_priority(pus, npus, father, 1);
  345. starpu_sched_ctx_set_priority_on_level(pus, npus, father, 1);
  346. free(pus);
  347. }
  348. unsigned i;
  349. for(i = 0; i < hypervisor.nsched_ctxs; i++)
  350. {
  351. if(hypervisor.sched_ctxs[i] == sched_ctx)
  352. {
  353. hypervisor.sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS;
  354. break;
  355. }
  356. }
  357. _rearange_sched_ctxs(hypervisor.sched_ctxs, hypervisor.nsched_ctxs);
  358. hypervisor.nsched_ctxs--;
  359. hypervisor.sched_ctx_w[sched_ctx].sched_ctx = STARPU_NMAX_SCHED_CTXS;
  360. _remove_config(sched_ctx);
  361. starpu_pthread_mutex_destroy(&hypervisor.conf_mut[sched_ctx]);
  362. starpu_pthread_mutex_destroy(&hypervisor.resize_mut[sched_ctx]);
  363. if(hypervisor.nsched_ctxs == 1)
  364. sc_hypervisor_stop_resize(hypervisor.sched_ctxs[0]);
  365. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  366. }
  367. void sc_hypervisor_reset_react_start_time(unsigned sched_ctx, unsigned now)
  368. {
  369. if(now)
  370. hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now();
  371. starpu_sched_ctx_update_start_resizing_sample(sched_ctx, starpu_timing_now());
  372. }
  373. double _get_max_speed_gap()
  374. {
  375. return hypervisor.max_speed_gap;
  376. }
  377. unsigned sc_hypervisor_get_resize_criteria()
  378. {
  379. return hypervisor.resize_criteria;
  380. }
  381. static int get_ntasks( int *tasks)
  382. {
  383. int ntasks = 0;
  384. int j;
  385. for(j = 0; j < STARPU_NMAXWORKERS; j++)
  386. {
  387. ntasks += tasks[j];
  388. }
  389. return ntasks;
  390. }
  391. int sc_hypervisor_get_nworkers_ctx(unsigned sched_ctx, enum starpu_worker_archtype arch)
  392. {
  393. int nworkers_ctx = 0;
  394. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
  395. int worker;
  396. struct starpu_sched_ctx_iterator it;
  397. workers->init_iterator(workers, &it);
  398. while(workers->has_next(workers, &it))
  399. {
  400. worker = workers->get_next(workers, &it);
  401. enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker);
  402. if(curr_arch == arch || arch == STARPU_ANY_WORKER)
  403. nworkers_ctx++;
  404. }
  405. return nworkers_ctx;
  406. }
  407. static void _set_elapsed_flops_per_sched_ctx(unsigned sched_ctx, double val)
  408. {
  409. int i;
  410. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  411. {
  412. hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[i] = val;
  413. if(val == 0)
  414. {
  415. hypervisor.sched_ctx_w[sched_ctx].elapsed_data[i] = 0;
  416. hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[i] = 0;
  417. }
  418. }
  419. }
  420. double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w)
  421. {
  422. double ret_val = 0.0;
  423. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
  424. int worker;
  425. struct starpu_sched_ctx_iterator it;
  426. workers->init_iterator(workers, &it);
  427. while(workers->has_next(workers, &it))
  428. {
  429. worker = workers->get_next(workers, &it);
  430. ret_val += sc_w->elapsed_flops[worker];
  431. }
  432. return ret_val;
  433. }
  434. double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w)
  435. {
  436. double ret_val = 0.0;
  437. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
  438. int worker;
  439. struct starpu_sched_ctx_iterator it;
  440. workers->init_iterator(workers, &it);
  441. while(workers->has_next(workers, &it))
  442. {
  443. worker = workers->get_next(workers, &it);
  444. ret_val += sc_w->total_elapsed_flops[worker];
  445. }
  446. return ret_val;
  447. }
  448. double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx)
  449. {
  450. double ready_flops = starpu_sched_ctx_get_nready_flops(sched_ctx);
  451. unsigned *sched_ctxs;
  452. int nsched_ctxs = 0;
  453. sc_hypervisor_get_ctxs_on_level(&sched_ctxs, &nsched_ctxs, starpu_sched_ctx_get_hierarchy_level(sched_ctx), sched_ctx);
  454. int s;
  455. for(s = 0; s < nsched_ctxs; s++)
  456. ready_flops += sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(sched_ctxs[s]);
  457. //ready_flops += starpu_get_nready_flops_of_sched_ctx(sched_ctxs[s]);
  458. return ready_flops;
  459. }
  460. static void _decrement_elapsed_flops_per_worker(unsigned sched_ctx, int worker, double flops)
  461. {
  462. if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
  463. {
  464. unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx);
  465. hypervisor.sched_ctx_w[father].elapsed_flops[worker] -= flops;
  466. _decrement_elapsed_flops_per_worker(father, worker, flops);
  467. }
  468. return;
  469. }
  470. void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sched_ctx)
  471. {
  472. double start_time = starpu_timing_now();
  473. if(sender_sched_ctx != STARPU_NMAX_SCHED_CTXS)
  474. {
  475. /* info concerning only the gflops_rate strateg */
  476. struct sc_hypervisor_wrapper *sender_sc_w = &hypervisor.sched_ctx_w[sender_sched_ctx];
  477. sender_sc_w->start_time = start_time;
  478. unsigned nworkers = starpu_worker_get_count();
  479. int i;
  480. for(i = 0; i < nworkers; i++)
  481. {
  482. sender_sc_w->start_time_w[i] = start_time;
  483. sender_sc_w->idle_time[i] = 0.0;
  484. sender_sc_w->idle_start_time[i] = 0.0;
  485. hypervisor.sched_ctx_w[sender_sched_ctx].exec_time[i] = 0.0;
  486. // hypervisor.sched_ctx_w[sender_sched_ctx].exec_start_time[i] = (hypervisor.sched_ctx_w[sender_sched_ctx].exec_start_time[i] != 0.0) ? starpu_timing_now() : 0.0;
  487. _decrement_elapsed_flops_per_worker(sender_sched_ctx, i, hypervisor.sched_ctx_w[sender_sched_ctx].elapsed_flops[i]);
  488. }
  489. _set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
  490. }
  491. if(receiver_sched_ctx != STARPU_NMAX_SCHED_CTXS)
  492. {
  493. struct sc_hypervisor_wrapper *receiver_sc_w = &hypervisor.sched_ctx_w[receiver_sched_ctx];
  494. receiver_sc_w->start_time = start_time;
  495. unsigned nworkers = starpu_worker_get_count();
  496. int i;
  497. for(i = 0; i < nworkers; i++)
  498. {
  499. receiver_sc_w->start_time_w[i] = (receiver_sc_w->start_time_w[i] != 0.0) ? starpu_timing_now() : 0.0;
  500. receiver_sc_w->idle_time[i] = 0.0;
  501. receiver_sc_w->idle_start_time[i] = (receiver_sc_w->exec_start_time[i] != 0.0) ? 0.0 : starpu_timing_now();
  502. // hypervisor.sched_ctx_w[receiver_sched_ctx].exec_start_time[i] = (receiver_sc_w->exec_start_time[i] != 0.0) ? starpu_timing_now() : 0.0;
  503. hypervisor.sched_ctx_w[receiver_sched_ctx].exec_time[i] = 0.0;
  504. _decrement_elapsed_flops_per_worker(receiver_sched_ctx, i, hypervisor.sched_ctx_w[receiver_sched_ctx].elapsed_flops[i]);
  505. }
  506. _set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
  507. }
  508. return;
  509. }
  510. /* actually move the workers: the cpus are moved, gpus are only shared */
  511. /* forbids another resize request before this one is take into account */
  512. void sc_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int* workers_to_move, unsigned nworkers_to_move, unsigned now)
  513. {
  514. if(nworkers_to_move > 0 && hypervisor.resize[sender_sched_ctx])
  515. {
  516. _print_current_time();
  517. unsigned j;
  518. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  519. printf("resize ctx %d with %d workers", sender_sched_ctx, nworkers_to_move);
  520. for(j = 0; j < nworkers_to_move; j++)
  521. printf(" %d", workers_to_move[j]);
  522. printf("\n");
  523. #endif
  524. hypervisor.allow_remove[receiver_sched_ctx] = 0;
  525. starpu_sched_ctx_add_workers(workers_to_move, nworkers_to_move, receiver_sched_ctx);
  526. if(now)
  527. {
  528. unsigned j;
  529. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  530. printf("remove now from ctx %d:", sender_sched_ctx);
  531. for(j = 0; j < nworkers_to_move; j++)
  532. printf(" %d", workers_to_move[j]);
  533. printf("\n");
  534. #endif
  535. starpu_sched_ctx_remove_workers(workers_to_move, nworkers_to_move, sender_sched_ctx);
  536. hypervisor.allow_remove[receiver_sched_ctx] = 1;
  537. _reset_resize_sample_info(sender_sched_ctx, receiver_sched_ctx);
  538. }
  539. else
  540. {
  541. int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
  542. if(ret != EBUSY)
  543. {
  544. hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.receiver_sched_ctx = receiver_sched_ctx;
  545. hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_move * sizeof(int));
  546. hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.nmoved_workers = nworkers_to_move;
  547. hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_move * sizeof(int));
  548. unsigned i;
  549. for(i = 0; i < nworkers_to_move; i++)
  550. {
  551. hypervisor.sched_ctx_w[sender_sched_ctx].current_idle_time[workers_to_move[i]] = 0.0;
  552. hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers[i] = workers_to_move[i];
  553. hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers[i] = 0;
  554. }
  555. hypervisor.resize[sender_sched_ctx] = 0;
  556. if(imposed_resize) imposed_resize = 0;
  557. starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
  558. }
  559. }
  560. struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(receiver_sched_ctx);
  561. unsigned i;
  562. for(i = 0; i < nworkers_to_move; i++)
  563. new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] : new_config->new_workers_max_idle;
  564. }
  565. return;
  566. }
  567. void sc_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx)
  568. {
  569. if(nworkers_to_add > 0 && hypervisor.resize[sched_ctx])
  570. {
  571. _print_current_time();
  572. unsigned j;
  573. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  574. printf("add to ctx %d:", sched_ctx);
  575. for(j = 0; j < nworkers_to_add; j++)
  576. printf(" %d", workers_to_add[j]);
  577. printf("\n");
  578. #endif
  579. starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, sched_ctx);
  580. struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(sched_ctx);
  581. unsigned i;
  582. for(i = 0; i < nworkers_to_add; i++)
  583. new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] : new_config->new_workers_max_idle;
  584. _reset_resize_sample_info(STARPU_NMAX_SCHED_CTXS, sched_ctx);
  585. }
  586. return;
  587. }
  588. unsigned sc_hypervisor_can_resize(unsigned sched_ctx)
  589. {
  590. return hypervisor.resize[sched_ctx];
  591. }
  592. void sc_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now)
  593. {
  594. if(nworkers_to_remove > 0 && hypervisor.resize[sched_ctx] && hypervisor.allow_remove[sched_ctx])
  595. {
  596. _print_current_time();
  597. unsigned nworkers = 0;
  598. int workers[nworkers_to_remove];
  599. if(now)
  600. {
  601. unsigned j;
  602. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  603. printf("remove explicitley now from ctx %d:", sched_ctx);
  604. for(j = 0; j < nworkers_to_remove; j++)
  605. printf(" %d", workers_to_remove[j]);
  606. printf("\n");
  607. #endif
  608. starpu_sched_ctx_remove_workers(workers_to_remove, nworkers_to_remove, sched_ctx);
  609. _reset_resize_sample_info(sched_ctx, STARPU_NMAX_SCHED_CTXS);
  610. }
  611. else
  612. {
  613. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  614. printf("try to remove from ctx %d: ", sched_ctx);
  615. unsigned j;
  616. for(j = 0; j < nworkers_to_remove; j++)
  617. printf(" %d", workers_to_remove[j]);
  618. printf("\n");
  619. #endif
  620. int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  621. if(ret != EBUSY)
  622. {
  623. unsigned i;
  624. for(i = 0; i < nworkers_to_remove; i++)
  625. if(starpu_sched_ctx_contains_worker(workers_to_remove[i], sched_ctx))
  626. workers[nworkers++] = workers_to_remove[i];
  627. hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = -1;
  628. hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
  629. hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = (int)nworkers;
  630. hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
  631. for(i = 0; i < nworkers; i++)
  632. {
  633. hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers[i]] = 0.0;
  634. hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers[i];
  635. hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;
  636. }
  637. hypervisor.resize[sched_ctx] = 0;
  638. if(imposed_resize) imposed_resize = 0;
  639. starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  640. }
  641. }
  642. }
  643. return;
  644. }
  645. static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
  646. {
  647. if(worker != -1 && !starpu_sched_ctx_contains_worker(worker, sched_ctx))
  648. return 0;
  649. struct sc_hypervisor_resize_ack *resize_ack = NULL;
  650. unsigned sender_sched_ctx = STARPU_NMAX_SCHED_CTXS;
  651. int i;
  652. for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
  653. {
  654. if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
  655. {
  656. struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
  657. starpu_pthread_mutex_lock(&sc_w->mutex);
  658. unsigned only_remove = 0;
  659. if(sc_w->resize_ack.receiver_sched_ctx == -1 && hypervisor.sched_ctxs[i] != sched_ctx &&
  660. sc_w->resize_ack.nmoved_workers > 0 && starpu_sched_ctx_contains_worker(worker, hypervisor.sched_ctxs[i]))
  661. {
  662. int j;
  663. for(j = 0; j < sc_w->resize_ack.nmoved_workers; j++)
  664. if(sc_w->resize_ack.moved_workers[j] == worker)
  665. {
  666. only_remove = 1;
  667. _reset_resize_sample_info(sched_ctx, STARPU_NMAX_SCHED_CTXS);
  668. starpu_pthread_mutex_unlock(&sc_w->mutex);
  669. break;
  670. }
  671. }
  672. if(only_remove ||
  673. (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == (int)sched_ctx))
  674. {
  675. resize_ack = &sc_w->resize_ack;
  676. sender_sched_ctx = hypervisor.sched_ctxs[i];
  677. starpu_pthread_mutex_unlock(&sc_w->mutex);
  678. break;
  679. }
  680. starpu_pthread_mutex_unlock(&sc_w->mutex);
  681. }
  682. }
  683. /* if there is no ctx waiting for its ack return 1*/
  684. if(resize_ack == NULL)
  685. {
  686. return 1;
  687. }
  688. int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
  689. if(ret != EBUSY)
  690. {
  691. int *moved_workers = resize_ack->moved_workers;
  692. int nmoved_workers = resize_ack->nmoved_workers;
  693. int *acked_workers = resize_ack->acked_workers;
  694. if(worker != -1)
  695. {
  696. for(i = 0; i < nmoved_workers; i++)
  697. {
  698. int moved_worker = moved_workers[i];
  699. if(moved_worker == worker && acked_workers[i] == 0)
  700. {
  701. acked_workers[i] = 1;
  702. }
  703. }
  704. }
  705. int nacked_workers = 0;
  706. for(i = 0; i < nmoved_workers; i++)
  707. {
  708. nacked_workers += (acked_workers[i] == 1);
  709. }
  710. unsigned resize_completed = (nacked_workers == nmoved_workers);
  711. int receiver_sched_ctx = sched_ctx;
  712. if(resize_completed)
  713. {
  714. /* if the permission to resize is not allowed by the user don't do it
  715. whatever the application says */
  716. if(!((hypervisor.resize[sender_sched_ctx] == 0 || hypervisor.resize[receiver_sched_ctx] == 0) && imposed_resize))
  717. {
  718. /* int j; */
  719. /* printf("remove after ack from ctx %d:", sender_sched_ctx); */
  720. /* for(j = 0; j < nmoved_workers; j++) */
  721. /* printf(" %d", moved_workers[j]); */
  722. /* printf("\n"); */
  723. starpu_sched_ctx_remove_workers(moved_workers, nmoved_workers, sender_sched_ctx);
  724. _reset_resize_sample_info(sender_sched_ctx, receiver_sched_ctx);
  725. hypervisor.resize[sender_sched_ctx] = 1;
  726. hypervisor.allow_remove[receiver_sched_ctx] = 1;
  727. /* if the user allowed resizing leave the decisions to the application */
  728. if(imposed_resize) imposed_resize = 0;
  729. resize_ack->receiver_sched_ctx = -1;
  730. resize_ack->nmoved_workers = 0;
  731. free(resize_ack->moved_workers);
  732. free(resize_ack->acked_workers);
  733. }
  734. starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
  735. return resize_completed;
  736. }
  737. starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
  738. }
  739. return 0;
  740. }
  741. /* Enqueue a resize request for 'sched_ctx', to be executed when the
  742. * 'task_tag' tasks of 'sched_ctx' complete. */
  743. void sc_hypervisor_post_resize_request(unsigned sched_ctx, int task_tag)
  744. {
  745. struct resize_request_entry *entry;
  746. entry = malloc(sizeof *entry);
  747. STARPU_ASSERT(entry != NULL);
  748. entry->sched_ctx = sched_ctx;
  749. entry->task_tag = task_tag;
  750. starpu_pthread_mutex_lock(&hypervisor.resize_mut[sched_ctx]);
  751. HASH_ADD_INT(hypervisor.resize_requests[sched_ctx], task_tag, entry);
  752. starpu_pthread_mutex_unlock(&hypervisor.resize_mut[sched_ctx]);
  753. }
  754. void sc_hypervisor_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
  755. {
  756. if(hypervisor.policy.resize_ctxs)
  757. hypervisor.policy.resize_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers);
  758. }
  759. void _sc_hypervisor_allow_compute_idle(unsigned sched_ctx, int worker, unsigned allow)
  760. {
  761. hypervisor.sched_ctx_w[sched_ctx].compute_idle[worker] = allow;
  762. }
  763. int _update_max_hierarchically(unsigned *sched_ctxs, int nsched_ctxs)
  764. {
  765. int s, i;
  766. unsigned leaves[hypervisor.nsched_ctxs];
  767. int nleaves = 0;
  768. sc_hypervisor_get_leaves(hypervisor.sched_ctxs, hypervisor.nsched_ctxs, leaves, &nleaves);
  769. int max = 0;
  770. for(s = 0; s < nsched_ctxs; s++)
  771. {
  772. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]);
  773. unsigned found = 0;
  774. int l = 0;
  775. for(l = 0; l < nleaves; l++)
  776. {
  777. if(leaves[l] == sched_ctxs[s])
  778. {
  779. found = 1;
  780. break;
  781. }
  782. }
  783. if(!found)
  784. {
  785. config->max_nworkers = 0;
  786. int level = starpu_sched_ctx_get_hierarchy_level(sched_ctxs[s]);
  787. unsigned *sched_ctxs_child;
  788. int nsched_ctxs_child = 0;
  789. sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, level+1, sched_ctxs[s]);
  790. if(nsched_ctxs_child > 0)
  791. {
  792. config->max_nworkers += _update_max_hierarchically(sched_ctxs_child, nsched_ctxs_child);
  793. free(sched_ctxs_child);
  794. int max_possible_workers = starpu_worker_get_count();
  795. if(config->max_nworkers < 0)
  796. config->max_nworkers = 0;
  797. if(config->max_nworkers > max_possible_workers)
  798. config->max_nworkers = max_possible_workers;
  799. }
  800. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  801. printf("ctx %d has max %d \n", sched_ctxs[s], config->max_nworkers);
  802. #endif
  803. }
  804. max += config->max_nworkers;
  805. }
  806. return max;
  807. }
  808. void _update_max_diff_hierarchically(unsigned father, double diff)
  809. {
  810. int level = starpu_sched_ctx_get_hierarchy_level(father);
  811. unsigned *sched_ctxs_child;
  812. int nsched_ctxs_child = 0;
  813. sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, level+1, father);
  814. if(nsched_ctxs_child > 0)
  815. {
  816. int s;
  817. double total_nflops = 0.0;
  818. for(s = 0; s < nsched_ctxs_child; s++)
  819. {
  820. total_nflops += hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops < 0.0 ? 0.0 : hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops;
  821. }
  822. int accumulated_diff = 0;
  823. for(s = 0; s < nsched_ctxs_child; s++)
  824. {
  825. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs_child[s]);
  826. double remaining_flops = hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops < 0.0 ? 0.0 : hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops;
  827. int current_diff = total_nflops == 0.0 ? 0.0 : floor((remaining_flops / total_nflops) * diff);
  828. accumulated_diff += current_diff;
  829. if(s == (nsched_ctxs_child - 1) && accumulated_diff < diff)
  830. current_diff += (diff - accumulated_diff);
  831. config->max_nworkers += current_diff;
  832. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  833. printf("%d: redib max_nworkers incr %d diff = %d \n", sched_ctxs_child[s], config->max_nworkers, current_diff);
  834. #endif
  835. _update_max_diff_hierarchically(sched_ctxs_child[s], current_diff);
  836. }
  837. }
  838. return;
  839. }
  840. void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs, int max_workers)
  841. {
  842. unsigned leaves[hypervisor.nsched_ctxs];
  843. unsigned nleaves = 0;
  844. sc_hypervisor_get_leaves(hypervisor.sched_ctxs, hypervisor.nsched_ctxs, leaves, &nleaves);
  845. int l;
  846. unsigned sched_ctx;
  847. int total_max_nworkers = 0;
  848. // int max_cpus = starpu_cpu_worker_get_count();
  849. unsigned configured = 0;
  850. int i;
  851. for(i = 0; i < nsched_ctxs; i++)
  852. {
  853. unsigned found = 0;
  854. for(l = 0; l < nleaves; l++)
  855. {
  856. if(leaves[l] == sched_ctxs[i])
  857. {
  858. found = 1;
  859. break;
  860. }
  861. }
  862. if(!found)
  863. continue;
  864. sched_ctx = sched_ctxs[i];
  865. if(hypervisor.sched_ctx_w[sched_ctx].to_be_sized) continue;
  866. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx);
  867. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
  868. int worker;
  869. struct starpu_sched_ctx_iterator it;
  870. workers->init_iterator(workers, &it);
  871. double elapsed_time_worker[STARPU_NMAXWORKERS];
  872. double norm_idle_time = 0.0;
  873. double end_time = starpu_timing_now();
  874. while(workers->has_next(workers, &it))
  875. {
  876. double idle_time = 0.0;
  877. worker = workers->get_next(workers, &it);
  878. if(hypervisor.sched_ctx_w[sched_ctx].compute_idle[worker])
  879. {
  880. if(hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0)
  881. elapsed_time_worker[worker] = 0.0;
  882. else
  883. elapsed_time_worker[worker] = (end_time - hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker]) / 1000000.0;
  884. if(hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] == 0.0)
  885. {
  886. idle_time = hypervisor.sched_ctx_w[sched_ctx].idle_time[worker]; /* in seconds */
  887. }
  888. else
  889. {
  890. double idle = (end_time - hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker]) / 1000000.0; /* in seconds */
  891. idle_time = hypervisor.sched_ctx_w[sched_ctx].idle_time[worker] + idle;
  892. }
  893. norm_idle_time += (elapsed_time_worker[worker] == 0.0 ? 0.0 : (idle_time / elapsed_time_worker[worker]));
  894. /* printf("%d/%d: start time %lf elapsed time %lf idle time %lf norm_idle_time %lf \n", */
  895. /* worker, sched_ctx, hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker], elapsed_time_worker[worker], idle_time, norm_idle_time); */
  896. }
  897. }
  898. double norm_exec_time = 0.0;
  899. for(worker = 0; worker < STARPU_NMAXWORKERS; worker++)
  900. {
  901. double exec_time = 0.0;
  902. if(hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0)
  903. elapsed_time_worker[worker] = 0.0;
  904. else
  905. elapsed_time_worker[worker] = (end_time - hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker]) / 1000000.0;
  906. if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] == 0.0)
  907. {
  908. exec_time = hypervisor.sched_ctx_w[sched_ctx].exec_time[worker];
  909. }
  910. else
  911. {
  912. double current_exec_time = 0.0;
  913. if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] < hypervisor.sched_ctx_w[sched_ctx].start_time)
  914. current_exec_time = (end_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */
  915. else
  916. current_exec_time = (end_time - hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker]) / 1000000.0; /* in seconds */
  917. exec_time = hypervisor.sched_ctx_w[sched_ctx].exec_time[worker] + current_exec_time;
  918. }
  919. norm_exec_time += elapsed_time_worker[worker] == 0.0 ? 0.0 : exec_time / elapsed_time_worker[worker];
  920. }
  921. double curr_time = starpu_timing_now();
  922. double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */
  923. int nready_tasks = starpu_sched_ctx_get_nready_tasks(sched_ctx);
  924. /* if(norm_idle_time >= 0.9) */
  925. /* { */
  926. /* config->max_nworkers = lrint(norm_exec_time); */
  927. /* } */
  928. /* else */
  929. /* { */
  930. /* if(norm_idle_time < 0.1) */
  931. /* config->max_nworkers = lrint(norm_exec_time) + nready_tasks - 1; //workers->nworkers + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; */
  932. /* else */
  933. /* config->max_nworkers = lrint(norm_exec_time); */
  934. /* } */
  935. config->max_nworkers = lrint(norm_exec_time);
  936. // config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1;
  937. /* if(config->max_nworkers < 0) */
  938. /* config->max_nworkers = 0; */
  939. /* if(config->max_nworkers > max_workers) */
  940. /* config->max_nworkers = max_workers; */
  941. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  942. printf("%d: ready tasks %d norm_idle_time %lf elapsed_time %lf norm_exec_time %lf nworker %d max %d \n",
  943. sched_ctx, nready_tasks, norm_idle_time, elapsed_time, norm_exec_time, workers->nworkers, config->max_nworkers);
  944. #endif
  945. total_max_nworkers += config->max_nworkers;
  946. configured = 1;
  947. }
  948. unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels();
  949. if(nhierarchy_levels > 1 && configured)
  950. {
  951. unsigned *sched_ctxs2;
  952. int nsched_ctxs2;
  953. sc_hypervisor_get_ctxs_on_level(&sched_ctxs2, &nsched_ctxs2, 0, STARPU_NMAX_SCHED_CTXS);
  954. if(nsched_ctxs2 > 0)
  955. {
  956. _update_max_hierarchically(sched_ctxs2, nsched_ctxs2);
  957. int s;
  958. int current_total_max_nworkers = 0;
  959. double max_nflops = 0.0;
  960. unsigned max_nflops_sched_ctx = sched_ctxs2[0];
  961. for(s = 0; s < nsched_ctxs2; s++)
  962. {
  963. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs2[s]);
  964. current_total_max_nworkers += config->max_nworkers;
  965. if(max_nflops < hypervisor.sched_ctx_w[sched_ctxs2[s]].remaining_flops)
  966. {
  967. max_nflops = hypervisor.sched_ctx_w[sched_ctxs2[s]].remaining_flops;
  968. max_nflops_sched_ctx = sched_ctxs2[s];
  969. }
  970. }
  971. int max_possible_workers = starpu_worker_get_count();
  972. /*if the sum of the max cpus is smaller than the total cpus available
  973. increase the max for the ones having more ready tasks to exec */
  974. if(current_total_max_nworkers < max_possible_workers)
  975. {
  976. int diff = max_possible_workers - current_total_max_nworkers;
  977. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(max_nflops_sched_ctx);
  978. config->max_nworkers += diff;
  979. #ifdef STARPU_SC_HYPERVISOR_DEBUG
  980. printf("%d: redib max_nworkers incr %d \n", max_nflops_sched_ctx, config->max_nworkers);
  981. #endif
  982. _update_max_diff_hierarchically(max_nflops_sched_ctx, diff);
  983. }
  984. }
  985. }
  986. /*if the sum of the max cpus is smaller than the total cpus available
  987. increase the max for the ones having more ready tasks to exec */
  988. /* if(configured && total_max_nworkers < max_workers) */
  989. /* { */
  990. /* int diff = max_workers - total_max_nworkers; */
  991. /* int max_nready = -1; */
  992. /* unsigned max_nready_sched_ctx = sched_ctxs[0]; */
  993. /* for(i = 0; i < nsched_ctxs; i++) */
  994. /* { */
  995. /* int nready_tasks = starpu_sched_ctx_get_nready_tasks(sched_ctxs[i]); */
  996. /* if(max_nready < nready_tasks) */
  997. /* { */
  998. /* max_nready = nready_tasks; */
  999. /* max_nready_sched_ctx = sched_ctxs[i]; */
  1000. /* } */
  1001. /* } */
  1002. /* struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(max_nready_sched_ctx); */
  1003. /* config->max_nworkers += diff; */
  1004. /* printf("%d: redib max_nworkers incr %d \n", max_nready_sched_ctx, config->max_nworkers); */
  1005. /* } */
  1006. }
  1007. /* notifies the hypervisor that a new task was pushed on the queue of the worker */
  1008. static void notify_pushed_task(unsigned sched_ctx, int worker)
  1009. {
  1010. hypervisor.sched_ctx_w[sched_ctx].pushed_tasks[worker]++;
  1011. if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].start_time == 0.0)
  1012. hypervisor.sched_ctx_w[sched_ctx].start_time = starpu_timing_now();
  1013. if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0)
  1014. {
  1015. hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] = starpu_timing_now();
  1016. }
  1017. int ntasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].pushed_tasks);
  1018. if((hypervisor.min_tasks == 0 || (!(hypervisor.resize[sched_ctx] == 0 && imposed_resize) && ntasks == hypervisor.min_tasks)) && hypervisor.check_min_tasks[sched_ctx])
  1019. {
  1020. hypervisor.resize[sched_ctx] = 1;
  1021. if(imposed_resize) imposed_resize = 0;
  1022. hypervisor.check_min_tasks[sched_ctx] = 0;
  1023. }
  1024. if(hypervisor.policy.handle_pushed_task)
  1025. hypervisor.policy.handle_pushed_task(sched_ctx, worker);
  1026. }
  1027. unsigned choose_ctx_to_steal(int worker)
  1028. {
  1029. int j;
  1030. int ns = hypervisor.nsched_ctxs;
  1031. int max_ready_tasks = 0;
  1032. unsigned chosen_ctx = STARPU_NMAX_SCHED_CTXS;
  1033. for(j = 0; j < ns; j++)
  1034. {
  1035. unsigned other_ctx = hypervisor.sched_ctxs[j];
  1036. int nready = starpu_sched_ctx_get_nready_tasks(other_ctx);
  1037. if(!starpu_sched_ctx_contains_worker(worker, other_ctx) && max_ready_tasks < nready)
  1038. {
  1039. max_ready_tasks = nready;
  1040. chosen_ctx = other_ctx;
  1041. }
  1042. }
  1043. return chosen_ctx;
  1044. }
  1045. /* notifies the hypervisor that the worker spent another cycle in idle time */
  1046. static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
  1047. {
  1048. if(hypervisor.start_executing_time == 0.0) return;
  1049. struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
  1050. sc_w->current_idle_time[worker] += idle_time;
  1051. if(sc_w->idle_start_time[worker] == 0.0 && sc_w->hyp_react_start_time != 0.0)
  1052. sc_w->idle_start_time[worker] = starpu_timing_now();
  1053. if(sc_w->idle_start_time[worker] > 0.0)
  1054. {
  1055. double end_time = starpu_timing_now();
  1056. sc_w->idle_time[worker] += (end_time - sc_w->idle_start_time[worker]) / 1000000.0; /* in seconds */
  1057. }
  1058. hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] = starpu_timing_now();
  1059. if(hypervisor.resize[sched_ctx] && hypervisor.policy.handle_idle_cycle)
  1060. {
  1061. if(sc_w->hyp_react_start_time == 0.0)
  1062. sc_hypervisor_reset_react_start_time(sched_ctx, 1);
  1063. double curr_time = starpu_timing_now();
  1064. double elapsed_time = (curr_time - sc_w->hyp_react_start_time) / 1000000.0; /* in seconds */
  1065. if(sc_w->sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > sc_w->config->time_sample)
  1066. {
  1067. unsigned idle_everywhere = 0;
  1068. unsigned *sched_ctxs = NULL;
  1069. unsigned nsched_ctxs = 0;
  1070. int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
  1071. if(ret != EBUSY)
  1072. {
  1073. if(sc_hypervisor_check_idle(sched_ctx, worker))
  1074. {
  1075. idle_everywhere = 1;
  1076. nsched_ctxs = starpu_worker_get_sched_ctx_list(worker, &sched_ctxs);
  1077. int s;
  1078. for(s = 0; s < nsched_ctxs; s++)
  1079. {
  1080. if(hypervisor.sched_ctx_w[sched_ctxs[s]].sched_ctx != STARPU_NMAX_SCHED_CTXS)
  1081. {
  1082. if(!sc_hypervisor_check_idle(sched_ctxs[s], worker))
  1083. idle_everywhere = 0;
  1084. }
  1085. }
  1086. free(sched_ctxs);
  1087. }
  1088. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  1089. }
  1090. if(idle_everywhere)
  1091. {
  1092. double hyp_overhead_start = starpu_timing_now();
  1093. if(elapsed_time > (sc_w->config->time_sample*2))
  1094. hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
  1095. double hyp_overhead_end = starpu_timing_now();
  1096. hyp_overhead += (hyp_overhead_end - hyp_overhead_start);
  1097. if(elapsed_time > (sc_w->config->time_sample*2))
  1098. sc_hypervisor_reset_react_start_time(sched_ctx, 1);
  1099. else
  1100. sc_hypervisor_reset_react_start_time(sched_ctx, 0);
  1101. }
  1102. }
  1103. }
  1104. return;
  1105. }
  1106. void _update_real_start_time_hierarchically(unsigned sched_ctx)
  1107. {
  1108. hypervisor.sched_ctx_w[sched_ctx].real_start_time = starpu_timing_now();
  1109. if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
  1110. {
  1111. _update_real_start_time_hierarchically(starpu_sched_ctx_get_inheritor(sched_ctx));
  1112. }
  1113. return;
  1114. }
  1115. /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */
  1116. static void notify_poped_task(unsigned sched_ctx, int worker)
  1117. {
  1118. if(hypervisor.start_executing_time == 0.0)
  1119. hypervisor.start_executing_time = starpu_timing_now();
  1120. if(!hypervisor.resize[sched_ctx])
  1121. hypervisor.resize[sched_ctx] = 1;
  1122. if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].real_start_time == 0.0)
  1123. _update_real_start_time_hierarchically(sched_ctx);
  1124. if(hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0)
  1125. {
  1126. hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] = starpu_timing_now();
  1127. }
  1128. hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] = starpu_timing_now();
  1129. if(hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] > 0.0)
  1130. {
  1131. int ns = hypervisor.nsched_ctxs;
  1132. int j;
  1133. for(j = 0; j < ns; j++)
  1134. {
  1135. if(hypervisor.sched_ctxs[j] != sched_ctx)
  1136. {
  1137. if(hypervisor.sched_ctx_w[hypervisor.sched_ctxs[j]].idle_start_time[worker] > 0.0)
  1138. hypervisor.sched_ctx_w[hypervisor.sched_ctxs[j]].compute_partial_idle[worker] = 1;
  1139. }
  1140. }
  1141. double end_time = starpu_timing_now();
  1142. double idle = (end_time - hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker]) / 1000000.0; /* in seconds */
  1143. if(hypervisor.sched_ctx_w[sched_ctx].compute_partial_idle[worker])
  1144. hypervisor.sched_ctx_w[sched_ctx].idle_time[worker] += idle / 2.0;
  1145. else
  1146. hypervisor.sched_ctx_w[sched_ctx].idle_time[worker] += idle;
  1147. hypervisor.sched_ctx_w[sched_ctx].compute_partial_idle[worker] = 0;
  1148. hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] = 0.0;
  1149. }
  1150. if(hypervisor.resize[sched_ctx])
  1151. hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] = 0.0;
  1152. if(hypervisor.policy.handle_idle_end)
  1153. hypervisor.policy.handle_idle_end(sched_ctx, worker);
  1154. }
  1155. static void _update_counters_hierarchically(int worker, unsigned sched_ctx, double flops, size_t data_size)
  1156. {
  1157. hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++;
  1158. hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += flops;
  1159. hypervisor.sched_ctx_w[sched_ctx].elapsed_data[worker] += data_size ;
  1160. hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[worker]++ ;
  1161. hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += flops;
  1162. starpu_pthread_mutex_lock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  1163. hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= flops;
  1164. starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  1165. if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
  1166. _update_counters_hierarchically(worker, starpu_sched_ctx_get_inheritor(sched_ctx), flops, data_size);
  1167. return;
  1168. }
  1169. /* notifies the hypervisor that a tagged task has just been executed */
  1170. static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint, int task_tag, double flops)
  1171. {
  1172. unsigned sched_ctx = task->sched_ctx;
  1173. int worker = starpu_worker_get_id();
  1174. if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] != 0.0)
  1175. {
  1176. double current_time = starpu_timing_now();
  1177. double exec_time = (current_time -
  1178. hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker]) / 1000000.0; /* in seconds */
  1179. hypervisor.sched_ctx_w[sched_ctx].exec_time[worker] += exec_time;
  1180. hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] = 0.0;
  1181. }
  1182. hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++;
  1183. hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += flops;
  1184. hypervisor.sched_ctx_w[sched_ctx].elapsed_data[worker] += data_size ;
  1185. hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[worker]++ ;
  1186. hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += flops;
  1187. starpu_pthread_mutex_lock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  1188. hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= flops;
  1189. if(_sc_hypervisor_use_lazy_resize())
  1190. _ack_resize_completed(sched_ctx, worker);
  1191. starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  1192. if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
  1193. {
  1194. _update_counters_hierarchically(worker, starpu_sched_ctx_get_inheritor(sched_ctx), flops, data_size);
  1195. }
  1196. if(hypervisor.resize[sched_ctx])
  1197. {
  1198. if(hypervisor.policy.handle_poped_task)
  1199. {
  1200. if(hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time == 0.0)
  1201. sc_hypervisor_reset_react_start_time(sched_ctx, 1);
  1202. double curr_time = starpu_timing_now();
  1203. double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time) / 1000000.0; /* in seconds */
  1204. if(hypervisor.sched_ctx_w[sched_ctx].sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > hypervisor.sched_ctx_w[sched_ctx].config->time_sample)
  1205. {
  1206. double hyp_overhead_start = starpu_timing_now();
  1207. if(elapsed_time > (hypervisor.sched_ctx_w[sched_ctx].config->time_sample*2))
  1208. hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
  1209. double hyp_overhead_end = starpu_timing_now();
  1210. hyp_overhead += (hyp_overhead_end - hyp_overhead_start);
  1211. if(elapsed_time > (hypervisor.sched_ctx_w[sched_ctx].config->time_sample*2))
  1212. sc_hypervisor_reset_react_start_time(sched_ctx, 1);
  1213. else
  1214. sc_hypervisor_reset_react_start_time(sched_ctx, 0);
  1215. }
  1216. else
  1217. /* no need to consider resizing, just remove the task from the pool if the strategy requires it*/
  1218. hypervisor.policy.handle_poped_task(sched_ctx, -2, task, footprint);
  1219. }
  1220. }
  1221. /* starpu_pthread_mutex_lock(&act_hypervisor_mutex); */
  1222. /* _ack_resize_completed(sched_ctx, worker); */
  1223. /* starpu_pthread_mutex_unlock(&act_hypervisor_mutex); */
  1224. if(hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker] % 200 == 0)
  1225. _print_current_time();
  1226. if(task_tag <= 0)
  1227. return;
  1228. unsigned conf_sched_ctx;
  1229. unsigned i;
  1230. unsigned ns = hypervisor.nsched_ctxs;
  1231. for(i = 0; i < ns; i++)
  1232. {
  1233. struct configuration_entry *entry;
  1234. conf_sched_ctx = hypervisor.sched_ctxs[i];
  1235. starpu_pthread_mutex_lock(&hypervisor.conf_mut[conf_sched_ctx]);
  1236. HASH_FIND_INT(hypervisor.configurations[conf_sched_ctx], &task_tag, entry);
  1237. if (entry != NULL)
  1238. {
  1239. struct sc_hypervisor_policy_config *config = entry->configuration;
  1240. sc_hypervisor_set_config(conf_sched_ctx, config);
  1241. HASH_DEL(hypervisor.configurations[conf_sched_ctx], entry);
  1242. free(config);
  1243. }
  1244. starpu_pthread_mutex_unlock(&hypervisor.conf_mut[conf_sched_ctx]);
  1245. }
  1246. if(hypervisor.resize[sched_ctx])
  1247. {
  1248. starpu_pthread_mutex_lock(&hypervisor.resize_mut[sched_ctx]);
  1249. if(hypervisor.policy.handle_post_exec_hook)
  1250. {
  1251. /* Check whether 'task_tag' is in the 'resize_requests' set. */
  1252. struct resize_request_entry *entry;
  1253. HASH_FIND_INT(hypervisor.resize_requests[sched_ctx], &task_tag, entry);
  1254. if (entry != NULL)
  1255. {
  1256. hypervisor.policy.handle_post_exec_hook(sched_ctx, task_tag);
  1257. HASH_DEL(hypervisor.resize_requests[sched_ctx], entry);
  1258. free(entry);
  1259. }
  1260. }
  1261. starpu_pthread_mutex_unlock(&hypervisor.resize_mut[sched_ctx]);
  1262. }
  1263. return;
  1264. }
  1265. static void notify_submitted_job(struct starpu_task *task, uint32_t footprint, size_t data_size)
  1266. {
  1267. unsigned sched_ctx = task->sched_ctx;
  1268. starpu_pthread_mutex_lock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  1269. hypervisor.sched_ctx_w[sched_ctx].submitted_flops += task->flops;
  1270. starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  1271. /* signaled by the user - no need to wait for them */
  1272. /* if(hypervisor.policy.handle_submitted_job && !type_of_tasks_known) */
  1273. /* hypervisor.policy.handle_submitted_job(task->cl, task->sched_ctx, footprint, data_size); */
  1274. }
  1275. static void notify_empty_ctx(unsigned sched_ctx_id, struct starpu_task *task)
  1276. {
  1277. sc_hypervisor_resize_ctxs(NULL, -1 , NULL, -1);
  1278. }
  1279. void sc_hypervisor_set_type_of_task(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size)
  1280. {
  1281. type_of_tasks_known = 1;
  1282. if(hypervisor.policy.handle_submitted_job)
  1283. hypervisor.policy.handle_submitted_job(cl, sched_ctx, footprint, data_size);
  1284. }
  1285. static void notify_delete_context(unsigned sched_ctx)
  1286. {
  1287. _print_current_time();
  1288. sc_hypervisor_unregister_ctx(sched_ctx);
  1289. }
  1290. void sc_hypervisor_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
  1291. {
  1292. // starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  1293. unsigned curr_nsched_ctxs = sched_ctxs == NULL ? hypervisor.nsched_ctxs : (unsigned)nsched_ctxs;
  1294. unsigned *curr_sched_ctxs = sched_ctxs == NULL ? hypervisor.sched_ctxs : sched_ctxs;
  1295. // starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  1296. unsigned s;
  1297. for(s = 0; s < curr_nsched_ctxs; s++)
  1298. hypervisor.resize[curr_sched_ctxs[s]] = 1;
  1299. if(hypervisor.policy.size_ctxs)
  1300. hypervisor.policy.size_ctxs(curr_sched_ctxs, curr_nsched_ctxs, workers, nworkers);
  1301. }
  1302. struct sc_hypervisor_wrapper* sc_hypervisor_get_wrapper(unsigned sched_ctx)
  1303. {
  1304. return &hypervisor.sched_ctx_w[sched_ctx];
  1305. }
  1306. unsigned* sc_hypervisor_get_sched_ctxs()
  1307. {
  1308. return hypervisor.sched_ctxs;
  1309. }
  1310. int sc_hypervisor_get_nsched_ctxs()
  1311. {
  1312. int ns;
  1313. ns = hypervisor.nsched_ctxs;
  1314. return ns;
  1315. }
  1316. int _sc_hypervisor_use_lazy_resize(void)
  1317. {
  1318. char* lazy = getenv("SC_HYPERVISOR_LAZY_RESIZE");
  1319. return lazy ? atoi(lazy) : 1;
  1320. }
  1321. void sc_hypervisor_save_size_req(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
  1322. {
  1323. hypervisor.sr = (struct size_request*)malloc(sizeof(struct size_request));
  1324. hypervisor.sr->sched_ctxs = sched_ctxs;
  1325. hypervisor.sr->nsched_ctxs = nsched_ctxs;
  1326. hypervisor.sr->workers = workers;
  1327. hypervisor.sr->nworkers = nworkers;
  1328. }
  1329. unsigned sc_hypervisor_get_size_req(unsigned **sched_ctxs, int* nsched_ctxs, int **workers, int *nworkers)
  1330. {
  1331. if(hypervisor.sr != NULL)
  1332. {
  1333. *sched_ctxs = hypervisor.sr->sched_ctxs;
  1334. *nsched_ctxs = hypervisor.sr->nsched_ctxs;
  1335. *workers = hypervisor.sr->workers;
  1336. *nworkers = hypervisor.sr->nworkers;
  1337. return 1;
  1338. }
  1339. return 0;
  1340. }
  1341. void sc_hypervisor_free_size_req(void)
  1342. {
  1343. if(hypervisor.sr != NULL)
  1344. {
  1345. free(hypervisor.sr);
  1346. hypervisor.sr = NULL;
  1347. }
  1348. }
  1349. double _get_optimal_v(unsigned sched_ctx)
  1350. {
  1351. return hypervisor.optimal_v[sched_ctx];
  1352. }
  1353. void _set_optimal_v(unsigned sched_ctx, double optimal_v)
  1354. {
  1355. hypervisor.optimal_v[sched_ctx] = optimal_v;
  1356. }
  1357. static struct types_of_workers* _init_structure_types_of_workers(void)
  1358. {
  1359. struct types_of_workers *tw = (struct types_of_workers*)malloc(sizeof(struct types_of_workers));
  1360. tw->ncpus = 0;
  1361. tw->ncuda = 0;
  1362. tw->nw = 0;
  1363. return tw;
  1364. }
  1365. struct types_of_workers* sc_hypervisor_get_types_of_workers(int *workers, unsigned nworkers)
  1366. {
  1367. struct types_of_workers *tw = _init_structure_types_of_workers();
  1368. unsigned w;
  1369. for(w = 0; w < nworkers; w++)
  1370. {
  1371. enum starpu_worker_archtype arch = workers == NULL ? starpu_worker_get_type((int)w) : starpu_worker_get_type(workers[w]);
  1372. if(arch == STARPU_CPU_WORKER)
  1373. tw->ncpus++;
  1374. if(arch == STARPU_CUDA_WORKER)
  1375. tw->ncuda++;
  1376. }
  1377. if(tw->ncpus > 0) tw->nw++;
  1378. if(tw->ncuda > 0) tw->nw++;
  1379. return tw;
  1380. }
  1381. void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total_flops)
  1382. {
  1383. // double hyp_overhead_start = starpu_timing_now();
  1384. starpu_pthread_mutex_lock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  1385. hypervisor.sched_ctx_w[sched_ctx].total_flops += diff_total_flops;
  1386. hypervisor.sched_ctx_w[sched_ctx].remaining_flops += diff_total_flops;
  1387. starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  1388. /* double hyp_overhead_end = starpu_timing_now(); */
  1389. /* hyp_overhead += (hyp_overhead_end - hyp_overhead_start); */
  1390. if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
  1391. sc_hypervisor_update_diff_total_flops(starpu_sched_ctx_get_inheritor(sched_ctx), diff_total_flops);
  1392. return;
  1393. }
  1394. void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_elapsed_flops)
  1395. {
  1396. // double hyp_overhead_start = starpu_timing_now();
  1397. int workerid = starpu_worker_get_id();
  1398. if(workerid != -1)
  1399. {
  1400. // starpu_pthread_mutex_lock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  1401. hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[workerid] += diff_elapsed_flops;
  1402. hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[workerid] += diff_elapsed_flops;
  1403. // starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  1404. }
  1405. /* double hyp_overhead_end = starpu_timing_now(); */
  1406. /* hyp_overhead += (hyp_overhead_end - hyp_overhead_start); */
  1407. if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0)
  1408. sc_hypervisor_update_diff_elapsed_flops(starpu_sched_ctx_get_inheritor(sched_ctx), diff_elapsed_flops);
  1409. return;
  1410. }
  1411. void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, unsigned hierarchy_level, unsigned father_sched_ctx_id)
  1412. {
  1413. unsigned s;
  1414. *nsched_ctxs = 0;
  1415. *sched_ctxs = (unsigned*)malloc(hypervisor.nsched_ctxs * sizeof(unsigned));
  1416. for(s = 0; s < hypervisor.nsched_ctxs; s++)
  1417. {
  1418. /* if father == STARPU_NMAX_SCHED_CTXS we take all the ctxs in this level */
  1419. if(starpu_sched_ctx_get_hierarchy_level(hypervisor.sched_ctxs[s]) == hierarchy_level &&
  1420. (starpu_sched_ctx_get_inheritor(hypervisor.sched_ctxs[s]) == father_sched_ctx_id || father_sched_ctx_id == STARPU_NMAX_SCHED_CTXS))
  1421. (*sched_ctxs)[(*nsched_ctxs)++] = hypervisor.sched_ctxs[s];
  1422. }
  1423. if(*nsched_ctxs == 0)
  1424. free(*sched_ctxs);
  1425. return;
  1426. }
  1427. unsigned sc_hypervisor_get_nhierarchy_levels(void)
  1428. {
  1429. unsigned nlevels = 0;
  1430. unsigned level = 0;
  1431. unsigned levels[STARPU_NMAX_SCHED_CTXS];
  1432. unsigned s, l;
  1433. for(s = 0; s < hypervisor.nsched_ctxs; s++)
  1434. {
  1435. level = starpu_sched_ctx_get_hierarchy_level(hypervisor.sched_ctxs[s]);
  1436. unsigned found = 0;
  1437. for(l = 0; l < nlevels; l++)
  1438. if(levels[l] == level)
  1439. found = 1;
  1440. if(!found)
  1441. levels[nlevels++] = level;
  1442. }
  1443. return nlevels;
  1444. }
  1445. void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *leaves, int *nleaves)
  1446. {
  1447. int s, s2;
  1448. for(s = 0; s < nsched_ctxs; s++)
  1449. {
  1450. unsigned is_someones_father = 0;
  1451. for(s2 = 0; s2 < nsched_ctxs; s2++)
  1452. {
  1453. unsigned father = starpu_sched_ctx_get_inheritor(sched_ctxs[s2]);
  1454. if(sched_ctxs[s] == father)
  1455. {
  1456. is_someones_father = 1;
  1457. break;
  1458. }
  1459. }
  1460. if(!is_someones_father)
  1461. leaves[(*nleaves)++] = sched_ctxs[s];
  1462. }
  1463. return;
  1464. }