sc_hypervisor.c 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011, 2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <sc_hypervisor_intern.h>
  17. #include <sc_hypervisor_policy.h>
  18. #include <starpu_config.h>
  19. unsigned imposed_resize = 0;
  20. unsigned type_of_tasks_known = 0;
  21. struct starpu_sched_ctx_performance_counters* perf_counters = NULL;
  22. static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time);
  23. static void notify_pushed_task(unsigned sched_ctx, int worker);
  24. static void notify_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, size_t data_size, uint32_t footprint);
  25. static void notify_post_exec_hook(unsigned sched_ctx, int taskid);
  26. static void notify_idle_end(unsigned sched_ctx, int worker);
  27. static void notify_submitted_job(struct starpu_task *task, unsigned footprint, size_t data_size);
  28. static void notify_ready_task(unsigned sched_ctx, struct starpu_task *task);
  29. static void notify_empty_ctx(unsigned sched_ctx, struct starpu_task *task);
  30. static void notify_delete_context(unsigned sched_ctx);
  31. extern struct sc_hypervisor_policy idle_policy;
  32. extern struct sc_hypervisor_policy app_driven_policy;
  33. extern struct sc_hypervisor_policy gflops_rate_policy;
  34. #ifdef STARPU_HAVE_GLPK_H
  35. extern struct sc_hypervisor_policy feft_lp_policy;
  36. extern struct sc_hypervisor_policy teft_lp_policy;
  37. extern struct sc_hypervisor_policy ispeed_lp_policy;
  38. extern struct sc_hypervisor_policy throughput_lp_policy;
  39. #endif // STARPU_HAVE_GLPK_
  40. extern struct sc_hypervisor_policy ispeed_policy;
  41. static struct sc_hypervisor_policy *predefined_policies[] =
  42. {
  43. &idle_policy,
  44. &app_driven_policy,
  45. #ifdef STARPU_HAVE_GLPK_H
  46. &feft_lp_policy,
  47. &teft_lp_policy,
  48. &ispeed_lp_policy,
  49. &throughput_lp_policy,
  50. #endif // STARPU_HAVE_GLPK_H
  51. &gflops_rate_policy,
  52. &ispeed_policy
  53. };
  54. static void _load_hypervisor_policy(struct sc_hypervisor_policy *policy)
  55. {
  56. STARPU_ASSERT(policy);
  57. hypervisor.policy.name = policy->name;
  58. hypervisor.policy.size_ctxs = policy->size_ctxs;
  59. hypervisor.policy.resize_ctxs = policy->resize_ctxs;
  60. hypervisor.policy.handle_poped_task = policy->handle_poped_task;
  61. hypervisor.policy.handle_pushed_task = policy->handle_pushed_task;
  62. hypervisor.policy.handle_idle_cycle = policy->handle_idle_cycle;
  63. hypervisor.policy.handle_idle_end = policy->handle_idle_end;
  64. hypervisor.policy.handle_post_exec_hook = policy->handle_post_exec_hook;
  65. hypervisor.policy.handle_submitted_job = policy->handle_submitted_job;
  66. hypervisor.policy.end_ctx = policy->end_ctx;
  67. }
  68. static struct sc_hypervisor_policy *_find_hypervisor_policy_from_name(const char *policy_name)
  69. {
  70. if (!policy_name)
  71. return NULL;
  72. unsigned i;
  73. for (i = 0; i < sizeof(predefined_policies)/sizeof(predefined_policies[0]); i++)
  74. {
  75. struct sc_hypervisor_policy *p;
  76. p = predefined_policies[i];
  77. if (p->name)
  78. {
  79. if (strcmp(policy_name, p->name) == 0) {
  80. /* we found a policy with the requested name */
  81. return p;
  82. }
  83. }
  84. }
  85. fprintf(stderr, "Warning: hypervisor policy \"%s\" was not found, try \"help\" to get a list\n", policy_name);
  86. /* nothing was found */
  87. return NULL;
  88. }
  89. static void display_sched_help_message(void)
  90. {
  91. const char* policy_name = getenv("SC_HYPERVISOR_POLICY");
  92. if (policy_name && (strcmp(policy_name, "help") == 0))
  93. {
  94. fprintf(stderr, "SC_HYPERVISOR_POLICY can be either of\n");
  95. /* display the description of all predefined policies */
  96. unsigned i;
  97. for (i = 0; i < sizeof(predefined_policies)/sizeof(predefined_policies[0]); i++)
  98. {
  99. struct sc_hypervisor_policy *p = predefined_policies[i];
  100. if (p->name)
  101. {
  102. fprintf(stderr, "%s\n", p->name);
  103. }
  104. }
  105. }
  106. }
  107. static struct sc_hypervisor_policy *_select_hypervisor_policy(struct sc_hypervisor_policy* hypervisor_policy)
  108. {
  109. struct sc_hypervisor_policy *selected_policy = NULL;
  110. if(hypervisor_policy && hypervisor_policy->custom)
  111. return hypervisor_policy;
  112. /* we look if the application specified the name of a policy to load */
  113. const char *policy_name;
  114. if (hypervisor_policy && hypervisor_policy->name)
  115. {
  116. policy_name = hypervisor_policy->name;
  117. }
  118. else
  119. {
  120. policy_name = getenv("SC_HYPERVISOR_POLICY");
  121. }
  122. if (policy_name)
  123. selected_policy = _find_hypervisor_policy_from_name(policy_name);
  124. /* Perhaps there was no policy that matched the name */
  125. if (selected_policy)
  126. return selected_policy;
  127. /* If no policy was specified, we use the idle policy as a default */
  128. return &idle_policy;
  129. }
  130. /* initializez the performance counters that starpu will use to retrive hints for resizing */
  131. void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy)
  132. {
  133. /* Perhaps we have to display some help */
  134. display_sched_help_message();
  135. hypervisor.min_tasks = 0;
  136. hypervisor.nsched_ctxs = 0;
  137. char* vel_gap = getenv("SC_HYPERVISOR_MAX_SPEED_GAP");
  138. hypervisor.max_speed_gap = vel_gap ? atof(vel_gap) : SC_SPEED_MAX_GAP_DEFAULT;
  139. char* crit = getenv("SC_HYPERVISOR_TRIGGER_RESIZE");
  140. hypervisor.resize_criteria = !crit ? SC_IDLE : strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_SPEED : SC_NOTHING);
  141. starpu_pthread_mutex_init(&act_hypervisor_mutex, NULL);
  142. hypervisor.start_executing_time = starpu_timing_now();
  143. int i;
  144. for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
  145. {
  146. hypervisor.resize[i] = 0;
  147. hypervisor.allow_remove[i] = 1;
  148. hypervisor.configurations[i] = NULL;
  149. hypervisor.sr = NULL;
  150. hypervisor.check_min_tasks[i] = 1;
  151. hypervisor.sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS;
  152. hypervisor.sched_ctx_w[i].sched_ctx = STARPU_NMAX_SCHED_CTXS;
  153. hypervisor.sched_ctx_w[i].config = NULL;
  154. hypervisor.sched_ctx_w[i].total_flops = 0.0;
  155. hypervisor.sched_ctx_w[i].submitted_flops = 0.0;
  156. hypervisor.sched_ctx_w[i].remaining_flops = 0.0;
  157. hypervisor.sched_ctx_w[i].start_time = 0.0;
  158. hypervisor.sched_ctx_w[i].real_start_time = 0.0;
  159. hypervisor.sched_ctx_w[i].resize_ack.receiver_sched_ctx = -1;
  160. hypervisor.sched_ctx_w[i].resize_ack.moved_workers = NULL;
  161. hypervisor.sched_ctx_w[i].resize_ack.nmoved_workers = 0;
  162. hypervisor.sched_ctx_w[i].resize_ack.acked_workers = NULL;
  163. starpu_pthread_mutex_init(&hypervisor.sched_ctx_w[i].mutex, NULL);
  164. hypervisor.optimal_v[i] = 0.0;
  165. hypervisor.sched_ctx_w[i].ref_speed[0] = -1.0;
  166. hypervisor.sched_ctx_w[i].ref_speed[1] = -1.0;
  167. hypervisor.sched_ctx_w[i].ready_flops = 0.0;
  168. hypervisor.sched_ctx_w[i].total_flops_available = 0;
  169. hypervisor.sched_ctx_w[i].nready_tasks = 0;
  170. hypervisor.sched_ctx_w[i].to_be_sized = 0;
  171. int j;
  172. for(j = 0; j < STARPU_NMAXWORKERS; j++)
  173. {
  174. hypervisor.sched_ctx_w[i].current_idle_time[j] = 0.0;
  175. hypervisor.sched_ctx_w[i].idle_time[j] = 0.0;
  176. hypervisor.sched_ctx_w[i].idle_start_time[j] = 0.0;
  177. hypervisor.sched_ctx_w[i].pushed_tasks[j] = 0;
  178. hypervisor.sched_ctx_w[i].poped_tasks[j] = 0;
  179. hypervisor.sched_ctx_w[i].elapsed_flops[j] = 0.0;
  180. hypervisor.sched_ctx_w[i].elapsed_data[j] = 0;
  181. hypervisor.sched_ctx_w[i].elapsed_tasks[j] = 0;
  182. hypervisor.sched_ctx_w[i].total_elapsed_flops[j] = 0.0;
  183. hypervisor.sched_ctx_w[i].worker_to_be_removed[j] = 0;
  184. }
  185. }
  186. struct sc_hypervisor_policy *selected_hypervisor_policy = _select_hypervisor_policy(hypervisor_policy);
  187. _load_hypervisor_policy(selected_hypervisor_policy);
  188. perf_counters = (struct starpu_sched_ctx_performance_counters*)malloc(sizeof(struct starpu_sched_ctx_performance_counters));
  189. perf_counters->notify_idle_cycle = notify_idle_cycle;
  190. perf_counters->notify_pushed_task = notify_pushed_task;
  191. perf_counters->notify_poped_task = notify_poped_task;
  192. perf_counters->notify_post_exec_hook = notify_post_exec_hook;
  193. perf_counters->notify_idle_end = notify_idle_end;
  194. perf_counters->notify_submitted_job = notify_submitted_job;
  195. perf_counters->notify_ready_task = notify_ready_task;
  196. perf_counters->notify_empty_ctx = notify_empty_ctx;
  197. perf_counters->notify_delete_context = notify_delete_context;
  198. starpu_sched_ctx_notify_hypervisor_exists();
  199. return (void*)perf_counters;
  200. }
  201. const char* sc_hypervisor_get_policy()
  202. {
  203. return hypervisor.policy.name;
  204. }
  205. /* the user can forbid the resizing process*/
  206. void sc_hypervisor_stop_resize(unsigned sched_ctx)
  207. {
  208. imposed_resize = 1;
  209. hypervisor.resize[sched_ctx] = 0;
  210. }
  211. /* the user can restart the resizing process*/
  212. void sc_hypervisor_start_resize(unsigned sched_ctx)
  213. {
  214. imposed_resize = 1;
  215. hypervisor.resize[sched_ctx] = 1;
  216. }
  217. static void _print_current_time()
  218. {
  219. if(!getenv("SC_HYPERVISOR_STOP_PRINT"))
  220. {
  221. double curr_time = starpu_timing_now();
  222. double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */
  223. fprintf(stdout, "Time: %lf\n", elapsed_time);
  224. int i;
  225. for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
  226. {
  227. if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
  228. {
  229. struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
  230. double cpu_speed = sc_hypervisor_get_speed(sc_w, STARPU_CPU_WORKER);
  231. double cuda_speed = sc_hypervisor_get_speed(sc_w, STARPU_CUDA_WORKER);
  232. int ncpus = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CPU_WORKER);
  233. int ncuda = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CUDA_WORKER);
  234. fprintf(stdout, "%d: cpu_v = %lf cuda_v = %lf ncpus = %d ncuda = %d\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed, ncpus, ncuda);
  235. }
  236. }
  237. }
  238. return;
  239. }
  240. void sc_hypervisor_shutdown(void)
  241. {
  242. int i;
  243. for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
  244. {
  245. if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS && hypervisor.nsched_ctxs > 0)
  246. {
  247. sc_hypervisor_stop_resize(hypervisor.sched_ctxs[i]);
  248. sc_hypervisor_unregister_ctx(hypervisor.sched_ctxs[i]);
  249. starpu_pthread_mutex_destroy(&hypervisor.sched_ctx_w[i].mutex);
  250. }
  251. }
  252. perf_counters->notify_idle_cycle = NULL;
  253. perf_counters->notify_pushed_task = NULL;
  254. perf_counters->notify_poped_task = NULL;
  255. perf_counters->notify_post_exec_hook = NULL;
  256. perf_counters->notify_idle_end = NULL;
  257. perf_counters->notify_delete_context = NULL;
  258. free(perf_counters);
  259. perf_counters = NULL;
  260. starpu_pthread_mutex_destroy(&act_hypervisor_mutex);
  261. }
  262. /* the hypervisor is in charge only of the contexts registered to it*/
  263. void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops)
  264. {
  265. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  266. hypervisor.configurations[sched_ctx] = NULL;
  267. hypervisor.resize_requests[sched_ctx] = NULL;
  268. starpu_pthread_mutex_init(&hypervisor.conf_mut[sched_ctx], NULL);
  269. starpu_pthread_mutex_init(&hypervisor.resize_mut[sched_ctx], NULL);
  270. _add_config(sched_ctx);
  271. hypervisor.sched_ctx_w[sched_ctx].sched_ctx = sched_ctx;
  272. hypervisor.sched_ctxs[hypervisor.nsched_ctxs++] = sched_ctx;
  273. hypervisor.sched_ctx_w[sched_ctx].total_flops = total_flops;
  274. hypervisor.sched_ctx_w[sched_ctx].remaining_flops = total_flops;
  275. hypervisor.resize[sched_ctx] = 1;
  276. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  277. }
  278. static int _get_first_free_sched_ctx(unsigned *sched_ctxs, int nsched_ctxs)
  279. {
  280. int i;
  281. for(i = 0; i < nsched_ctxs; i++)
  282. if(sched_ctxs[i] == STARPU_NMAX_SCHED_CTXS)
  283. return i;
  284. return STARPU_NMAX_SCHED_CTXS;
  285. }
  286. /* rearange array of sched_ctxs in order not to have {MAXVAL, MAXVAL, 5, MAXVAL, 7}
  287. and have instead {5, 7, MAXVAL, MAXVAL, MAXVAL}
  288. it is easier afterwards to iterate the array
  289. */
  290. static void _rearange_sched_ctxs(unsigned *sched_ctxs, int old_nsched_ctxs)
  291. {
  292. int first_free_id = STARPU_NMAX_SCHED_CTXS;
  293. int i;
  294. for(i = 0; i < old_nsched_ctxs; i++)
  295. {
  296. if(sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
  297. {
  298. first_free_id = _get_first_free_sched_ctx(sched_ctxs, old_nsched_ctxs);
  299. if(first_free_id != STARPU_NMAX_SCHED_CTXS)
  300. {
  301. sched_ctxs[first_free_id] = sched_ctxs[i];
  302. sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS;
  303. }
  304. }
  305. }
  306. }
  307. /* unregistered contexts will no longer be resized */
  308. void sc_hypervisor_unregister_ctx(unsigned sched_ctx)
  309. {
  310. if(hypervisor.policy.end_ctx)
  311. hypervisor.policy.end_ctx(sched_ctx);
  312. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  313. unsigned i;
  314. for(i = 0; i < hypervisor.nsched_ctxs; i++)
  315. {
  316. if(hypervisor.sched_ctxs[i] == sched_ctx)
  317. {
  318. hypervisor.sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS;
  319. break;
  320. }
  321. }
  322. _rearange_sched_ctxs(hypervisor.sched_ctxs, hypervisor.nsched_ctxs);
  323. hypervisor.nsched_ctxs--;
  324. hypervisor.sched_ctx_w[sched_ctx].sched_ctx = STARPU_NMAX_SCHED_CTXS;
  325. _remove_config(sched_ctx);
  326. starpu_pthread_mutex_destroy(&hypervisor.conf_mut[sched_ctx]);
  327. starpu_pthread_mutex_destroy(&hypervisor.resize_mut[sched_ctx]);
  328. if(hypervisor.nsched_ctxs == 1)
  329. sc_hypervisor_stop_resize(hypervisor.sched_ctxs[0]);
  330. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  331. }
  332. double _get_max_speed_gap()
  333. {
  334. return hypervisor.max_speed_gap;
  335. }
  336. unsigned sc_hypervisor_get_resize_criteria()
  337. {
  338. return hypervisor.resize_criteria;
  339. }
  340. static int get_ntasks( int *tasks)
  341. {
  342. int ntasks = 0;
  343. int j;
  344. for(j = 0; j < STARPU_NMAXWORKERS; j++)
  345. {
  346. ntasks += tasks[j];
  347. }
  348. return ntasks;
  349. }
  350. int sc_hypervisor_get_nworkers_ctx(unsigned sched_ctx, enum starpu_worker_archtype arch)
  351. {
  352. int nworkers_ctx = 0;
  353. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
  354. int worker;
  355. struct starpu_sched_ctx_iterator it;
  356. if(workers->init_iterator)
  357. workers->init_iterator(workers, &it);
  358. while(workers->has_next(workers, &it))
  359. {
  360. worker = workers->get_next(workers, &it);
  361. enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker);
  362. if(curr_arch == arch || arch == STARPU_ANY_WORKER)
  363. nworkers_ctx++;
  364. }
  365. return nworkers_ctx;
  366. }
  367. static void _set_elapsed_flops_per_sched_ctx(unsigned sched_ctx, double val)
  368. {
  369. int i;
  370. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  371. {
  372. hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[i] = val;
  373. if(val == 0)
  374. {
  375. hypervisor.sched_ctx_w[sched_ctx].elapsed_data[i] = 0;
  376. hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[i] = 0;
  377. }
  378. }
  379. }
  380. double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w)
  381. {
  382. double ret_val = 0.0;
  383. int i;
  384. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  385. ret_val += sc_w->elapsed_flops[i];
  386. return ret_val;
  387. }
  388. double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w)
  389. {
  390. double ret_val = 0.0;
  391. int i;
  392. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  393. ret_val += sc_w->total_elapsed_flops[i];
  394. return ret_val;
  395. }
  396. static void _reset_idle_time(unsigned sched_ctx)
  397. {
  398. int i;
  399. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  400. {
  401. hypervisor.sched_ctx_w[sched_ctx].idle_time[i] = 0.0;
  402. hypervisor.sched_ctx_w[sched_ctx].idle_start_time[i] = hypervisor.sched_ctx_w[sched_ctx].idle_start_time[i] != 0.0 ? starpu_timing_now() : 0.0;
  403. }
  404. return;
  405. }
  406. void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sched_ctx)
  407. {
  408. double start_time = starpu_timing_now();
  409. if(sender_sched_ctx != STARPU_NMAX_SCHED_CTXS)
  410. {
  411. /* info concerning only the gflops_rate strateg */
  412. struct sc_hypervisor_wrapper *sender_sc_w = &hypervisor.sched_ctx_w[sender_sched_ctx];
  413. sender_sc_w->start_time = start_time;
  414. _set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0);
  415. _reset_idle_time(sender_sched_ctx);
  416. int i;
  417. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  418. {
  419. sender_sc_w->idle_start_time[i] = 0.0;
  420. }
  421. }
  422. if(receiver_sched_ctx != STARPU_NMAX_SCHED_CTXS)
  423. {
  424. struct sc_hypervisor_wrapper *receiver_sc_w = &hypervisor.sched_ctx_w[receiver_sched_ctx];
  425. receiver_sc_w->start_time = start_time;
  426. _set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0);
  427. _reset_idle_time(receiver_sched_ctx);
  428. int i;
  429. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  430. {
  431. receiver_sc_w->idle_start_time[i] = (hypervisor.sched_ctx_w[receiver_sched_ctx].idle_start_time[i] != 0.0) ? starpu_timing_now() : 0.0;
  432. }
  433. }
  434. }
  435. /* actually move the workers: the cpus are moved, gpus are only shared */
  436. /* forbids another resize request before this one is take into account */
  437. void sc_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int* workers_to_move, unsigned nworkers_to_move, unsigned now)
  438. {
  439. if(nworkers_to_move > 0 && hypervisor.resize[sender_sched_ctx])
  440. {
  441. _print_current_time();
  442. unsigned j;
  443. printf("resize ctx %d with %d workers", sender_sched_ctx, nworkers_to_move);
  444. for(j = 0; j < nworkers_to_move; j++)
  445. printf(" %d", workers_to_move[j]);
  446. printf("\n");
  447. starpu_trace_user_event(1);
  448. hypervisor.allow_remove[receiver_sched_ctx] = 0;
  449. starpu_sched_ctx_add_workers(workers_to_move, nworkers_to_move, receiver_sched_ctx);
  450. if(now)
  451. {
  452. unsigned j;
  453. printf("remove now from ctx %d:", sender_sched_ctx);
  454. for(j = 0; j < nworkers_to_move; j++)
  455. printf(" %d", workers_to_move[j]);
  456. printf("\n");
  457. starpu_sched_ctx_remove_workers(workers_to_move, nworkers_to_move, sender_sched_ctx);
  458. hypervisor.allow_remove[receiver_sched_ctx] = 1;
  459. _reset_resize_sample_info(sender_sched_ctx, receiver_sched_ctx);
  460. }
  461. else
  462. {
  463. int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
  464. if(ret != EBUSY)
  465. {
  466. hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.receiver_sched_ctx = receiver_sched_ctx;
  467. hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_move * sizeof(int));
  468. hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.nmoved_workers = nworkers_to_move;
  469. hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_move * sizeof(int));
  470. unsigned i;
  471. for(i = 0; i < nworkers_to_move; i++)
  472. {
  473. hypervisor.sched_ctx_w[sender_sched_ctx].current_idle_time[workers_to_move[i]] = 0.0;
  474. hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers[i] = workers_to_move[i];
  475. hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers[i] = 0;
  476. }
  477. hypervisor.resize[sender_sched_ctx] = 0;
  478. if(imposed_resize) imposed_resize = 0;
  479. starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
  480. }
  481. }
  482. struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(receiver_sched_ctx);
  483. unsigned i;
  484. for(i = 0; i < nworkers_to_move; i++)
  485. new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] : new_config->new_workers_max_idle;
  486. }
  487. return;
  488. }
  489. void sc_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx)
  490. {
  491. if(nworkers_to_add > 0 && hypervisor.resize[sched_ctx])
  492. {
  493. _print_current_time();
  494. unsigned j;
  495. printf("add to ctx %d:", sched_ctx);
  496. for(j = 0; j < nworkers_to_add; j++)
  497. printf(" %d", workers_to_add[j]);
  498. printf("\n");
  499. starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, sched_ctx);
  500. struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(sched_ctx);
  501. unsigned i;
  502. for(i = 0; i < nworkers_to_add; i++)
  503. new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] : new_config->new_workers_max_idle;
  504. _reset_resize_sample_info(STARPU_NMAX_SCHED_CTXS, sched_ctx);
  505. }
  506. return;
  507. }
  508. unsigned sc_hypervisor_can_resize(unsigned sched_ctx)
  509. {
  510. return hypervisor.resize[sched_ctx];
  511. }
  512. void sc_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now)
  513. {
  514. if(nworkers_to_remove > 0 && hypervisor.resize[sched_ctx] && hypervisor.allow_remove[sched_ctx])
  515. {
  516. _print_current_time();
  517. unsigned nworkers = 0;
  518. int workers[nworkers_to_remove];
  519. if(now)
  520. {
  521. unsigned j;
  522. printf("remove explicitley now from ctx %d:", sched_ctx);
  523. for(j = 0; j < nworkers_to_remove; j++)
  524. printf(" %d", workers_to_remove[j]);
  525. printf("\n");
  526. starpu_sched_ctx_remove_workers(workers_to_remove, nworkers_to_remove, sched_ctx);
  527. _reset_resize_sample_info(sched_ctx, STARPU_NMAX_SCHED_CTXS);
  528. }
  529. else
  530. {
  531. printf("try to remove from ctx %d: ", sched_ctx);
  532. unsigned j;
  533. for(j = 0; j < nworkers_to_remove; j++)
  534. printf(" %d", workers_to_remove[j]);
  535. printf("\n");
  536. int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  537. if(ret != EBUSY)
  538. {
  539. unsigned i;
  540. for(i = 0; i < nworkers_to_remove; i++)
  541. if(starpu_sched_ctx_contains_worker(workers_to_remove[i], sched_ctx))
  542. workers[nworkers++] = workers_to_remove[i];
  543. hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = -1;
  544. hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
  545. hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = (int)nworkers;
  546. hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int));
  547. for(i = 0; i < nworkers; i++)
  548. {
  549. hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers[i]] = 0.0;
  550. hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers[i];
  551. hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0;
  552. }
  553. hypervisor.resize[sched_ctx] = 0;
  554. if(imposed_resize) imposed_resize = 0;
  555. starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sched_ctx].mutex);
  556. }
  557. }
  558. }
  559. return;
  560. }
  561. static unsigned _ack_resize_completed(unsigned sched_ctx, int worker)
  562. {
  563. if(worker != -1 && !starpu_sched_ctx_contains_worker(worker, sched_ctx))
  564. return 0;
  565. struct sc_hypervisor_resize_ack *resize_ack = NULL;
  566. unsigned sender_sched_ctx = STARPU_NMAX_SCHED_CTXS;
  567. int i;
  568. for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
  569. {
  570. if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
  571. {
  572. struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
  573. starpu_pthread_mutex_lock(&sc_w->mutex);
  574. unsigned only_remove = 0;
  575. if(sc_w->resize_ack.receiver_sched_ctx == -1 && hypervisor.sched_ctxs[i] != sched_ctx &&
  576. sc_w->resize_ack.nmoved_workers > 0 && starpu_sched_ctx_contains_worker(worker, hypervisor.sched_ctxs[i]))
  577. {
  578. int j;
  579. for(j = 0; j < sc_w->resize_ack.nmoved_workers; j++)
  580. if(sc_w->resize_ack.moved_workers[j] == worker)
  581. {
  582. only_remove = 1;
  583. _reset_resize_sample_info(sched_ctx, STARPU_NMAX_SCHED_CTXS);
  584. starpu_pthread_mutex_unlock(&sc_w->mutex);
  585. break;
  586. }
  587. }
  588. if(only_remove ||
  589. (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == (int)sched_ctx))
  590. {
  591. resize_ack = &sc_w->resize_ack;
  592. sender_sched_ctx = hypervisor.sched_ctxs[i];
  593. starpu_pthread_mutex_unlock(&sc_w->mutex);
  594. break;
  595. }
  596. starpu_pthread_mutex_unlock(&sc_w->mutex);
  597. }
  598. }
  599. /* if there is no ctx waiting for its ack return 1*/
  600. if(resize_ack == NULL)
  601. {
  602. return 1;
  603. }
  604. int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
  605. if(ret != EBUSY)
  606. {
  607. int *moved_workers = resize_ack->moved_workers;
  608. int nmoved_workers = resize_ack->nmoved_workers;
  609. int *acked_workers = resize_ack->acked_workers;
  610. if(worker != -1)
  611. {
  612. for(i = 0; i < nmoved_workers; i++)
  613. {
  614. int moved_worker = moved_workers[i];
  615. if(moved_worker == worker && acked_workers[i] == 0)
  616. {
  617. acked_workers[i] = 1;
  618. }
  619. }
  620. }
  621. int nacked_workers = 0;
  622. for(i = 0; i < nmoved_workers; i++)
  623. {
  624. nacked_workers += (acked_workers[i] == 1);
  625. }
  626. unsigned resize_completed = (nacked_workers == nmoved_workers);
  627. int receiver_sched_ctx = sched_ctx;
  628. if(resize_completed)
  629. {
  630. /* if the permission to resize is not allowed by the user don't do it
  631. whatever the application says */
  632. if(!((hypervisor.resize[sender_sched_ctx] == 0 || hypervisor.resize[receiver_sched_ctx] == 0) && imposed_resize))
  633. {
  634. /* int j; */
  635. /* printf("remove after ack from ctx %d:", sender_sched_ctx); */
  636. /* for(j = 0; j < nmoved_workers; j++) */
  637. /* printf(" %d", moved_workers[j]); */
  638. /* printf("\n"); */
  639. starpu_sched_ctx_remove_workers(moved_workers, nmoved_workers, sender_sched_ctx);
  640. _reset_resize_sample_info(sender_sched_ctx, receiver_sched_ctx);
  641. hypervisor.resize[sender_sched_ctx] = 1;
  642. hypervisor.allow_remove[receiver_sched_ctx] = 1;
  643. /* if the user allowed resizing leave the decisions to the application */
  644. if(imposed_resize) imposed_resize = 0;
  645. resize_ack->receiver_sched_ctx = -1;
  646. resize_ack->nmoved_workers = 0;
  647. free(resize_ack->moved_workers);
  648. free(resize_ack->acked_workers);
  649. }
  650. starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
  651. return resize_completed;
  652. }
  653. starpu_pthread_mutex_unlock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex);
  654. }
  655. return 0;
  656. }
  657. /* Enqueue a resize request for 'sched_ctx', to be executed when the
  658. * 'task_tag' tasks of 'sched_ctx' complete. */
  659. void sc_hypervisor_post_resize_request(unsigned sched_ctx, int task_tag)
  660. {
  661. struct resize_request_entry *entry;
  662. entry = malloc(sizeof *entry);
  663. STARPU_ASSERT(entry != NULL);
  664. entry->sched_ctx = sched_ctx;
  665. entry->task_tag = task_tag;
  666. starpu_pthread_mutex_lock(&hypervisor.resize_mut[sched_ctx]);
  667. HASH_ADD_INT(hypervisor.resize_requests[sched_ctx], task_tag, entry);
  668. starpu_pthread_mutex_unlock(&hypervisor.resize_mut[sched_ctx]);
  669. }
  670. void sc_hypervisor_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
  671. {
  672. if(hypervisor.policy.resize_ctxs)
  673. hypervisor.policy.resize_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers);
  674. }
  675. void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs)
  676. {
  677. unsigned sched_ctx;
  678. int total_max_nworkers = 0;
  679. int max_cpus = starpu_cpu_worker_get_count();
  680. double max_workers_idle_time[nsched_ctxs];
  681. unsigned configured = 0;
  682. int i;
  683. for(i = 0; i < nsched_ctxs; i++)
  684. {
  685. sched_ctx = sched_ctxs[i];
  686. if(hypervisor.sched_ctx_w[sched_ctx].to_be_sized) continue;
  687. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx);
  688. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx);
  689. int worker;
  690. struct starpu_sched_ctx_iterator it;
  691. if(workers->init_iterator)
  692. workers->init_iterator(workers, &it);
  693. max_workers_idle_time[i] = 0.0;
  694. while(workers->has_next(workers, &it))
  695. {
  696. worker = workers->get_next(workers, &it);
  697. if(hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker]==0.0)
  698. {
  699. max_workers_idle_time[i] += hypervisor.sched_ctx_w[sched_ctx].idle_time[worker]; /* in seconds */
  700. }
  701. else
  702. {
  703. double end_time = starpu_timing_now();
  704. double idle = (end_time - hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker]) / 1000000.0; /* in seconds */
  705. max_workers_idle_time[i] += hypervisor.sched_ctx_w[sched_ctx].idle_time[worker] + idle;
  706. }
  707. }
  708. double curr_time = starpu_timing_now();
  709. double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */
  710. double norm_idle_time = max_workers_idle_time[i] / elapsed_time;
  711. config->max_nworkers = workers->nworkers - lrint(norm_idle_time) + hypervisor.sched_ctx_w[sched_ctx].nready_tasks;
  712. if(config->max_nworkers < 0)
  713. config->max_nworkers = 0;
  714. if(config->max_nworkers > max_cpus)
  715. config->max_nworkers = max_cpus;
  716. printf("%d: ready tasks %d idle for long %lf norm_idle_time %lf elapsed_time %lf nworkers %d max %d \n",
  717. sched_ctx, hypervisor.sched_ctx_w[sched_ctx].nready_tasks, max_workers_idle_time[i], norm_idle_time, elapsed_time, workers->nworkers, config->max_nworkers);
  718. /* if(max_workers_idle_time[i] > 0.000002) */
  719. /* { */
  720. /* double curr_time = starpu_timing_now(); */
  721. /* double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /\* in seconds *\/ */
  722. /* double norm_idle_time = max_workers_idle_time[i] / elapsed_time; */
  723. /* config->max_nworkers = workers->nworkers - lrint(norm_idle_time); */
  724. /* if(config->max_nworkers < 0) */
  725. /* config->max_nworkers = 0; */
  726. /* printf("%d: ready tasks %d idle for long %lf norm_idle_time %lf elapsed_time %lf nworkers %d decr %d \n", */
  727. /* sched_ctx, hypervisor.sched_ctx_w[sched_ctx].nready_tasks, max_workers_idle_time[i], norm_idle_time, elapsed_time, workers->nworkers, config->max_nworkers); */
  728. /* } */
  729. /* else */
  730. /* { */
  731. /* double curr_time = starpu_timing_now(); */
  732. /* double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /\* in seconds *\/ */
  733. /* double norm_idle_time = max_workers_idle_time[i] / elapsed_time; */
  734. /* if(workers->nworkers == 0 && hypervisor.sched_ctx_w[sched_ctx].nready_tasks == 1) */
  735. /* config->max_nworkers = 0; */
  736. /* else */
  737. /* { */
  738. /* config->max_nworkers = (hypervisor.sched_ctx_w[sched_ctx].nready_tasks > max_cpus) */
  739. /* ? max_cpus : hypervisor.sched_ctx_w[sched_ctx].nready_tasks; */
  740. /* config->max_nworkers = workers->nworkers > config->max_nworkers ? workers->nworkers : config->max_nworkers; */
  741. /* } */
  742. /* printf("%d: ready tasks %d not idle %lf norm_idle_time %lf elapsed_time %lf nworkers %d incr %d \n", */
  743. /* sched_ctx, hypervisor.sched_ctx_w[sched_ctx].nready_tasks, max_workers_idle_time[i], norm_idle_time, elapsed_time, workers->nworkers, config->max_nworkers); */
  744. /* } */
  745. total_max_nworkers += config->max_nworkers;
  746. configured = 1;
  747. }
  748. /*if the sum of the max cpus is smaller than the total cpus available
  749. increase the max for the ones having more ready tasks to exec */
  750. if(configured && total_max_nworkers < max_cpus)
  751. {
  752. int diff = max_cpus - total_max_nworkers;
  753. int max_nready = -1;
  754. unsigned max_nready_sched_ctx = sched_ctxs[0];
  755. for(i = 0; i < nsched_ctxs; i++)
  756. {
  757. if(max_nready < hypervisor.sched_ctx_w[sched_ctxs[i]].nready_tasks)
  758. {
  759. max_nready = hypervisor.sched_ctx_w[sched_ctxs[i]].nready_tasks;
  760. max_nready_sched_ctx = sched_ctxs[i];
  761. }
  762. }
  763. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(max_nready_sched_ctx);
  764. config->max_nworkers += diff;
  765. printf("%d: redib max_nworkers incr %d \n", max_nready_sched_ctx, config->max_nworkers);
  766. }
  767. }
  768. /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */
  769. static void notify_idle_end(unsigned sched_ctx, int worker)
  770. {
  771. if(hypervisor.resize[sched_ctx])
  772. hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] = 0.0;
  773. struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
  774. if(sc_w->idle_start_time[worker] != 0.0)
  775. {
  776. double end_time = starpu_timing_now();
  777. sc_w->idle_time[worker] += (end_time - sc_w->idle_start_time[worker]) / 1000000.0; /* in seconds */
  778. sc_w->idle_start_time[worker] = 0.0;
  779. }
  780. if(hypervisor.policy.handle_idle_end)
  781. hypervisor.policy.handle_idle_end(sched_ctx, worker);
  782. }
  783. /* notifies the hypervisor that the worker spent another cycle in idle time */
  784. static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time)
  785. {
  786. if(hypervisor.resize[sched_ctx])
  787. {
  788. struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx];
  789. sc_w->current_idle_time[worker] += idle_time;
  790. if(sc_w->idle_start_time[worker] == 0.0)
  791. sc_w->idle_start_time[worker] = starpu_timing_now();
  792. if(hypervisor.policy.handle_idle_cycle)
  793. {
  794. hypervisor.policy.handle_idle_cycle(sched_ctx, worker);
  795. }
  796. }
  797. return;
  798. }
  799. /* notifies the hypervisor that a new task was pushed on the queue of the worker */
  800. static void notify_pushed_task(unsigned sched_ctx, int worker)
  801. {
  802. hypervisor.sched_ctx_w[sched_ctx].pushed_tasks[worker]++;
  803. if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].start_time == 0.0)
  804. hypervisor.sched_ctx_w[sched_ctx].start_time = starpu_timing_now();
  805. if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].real_start_time == 0.0)
  806. hypervisor.sched_ctx_w[sched_ctx].real_start_time = starpu_timing_now();
  807. int ntasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].pushed_tasks);
  808. if((hypervisor.min_tasks == 0 || (!(hypervisor.resize[sched_ctx] == 0 && imposed_resize) && ntasks == hypervisor.min_tasks)) && hypervisor.check_min_tasks[sched_ctx])
  809. {
  810. hypervisor.resize[sched_ctx] = 1;
  811. if(imposed_resize) imposed_resize = 0;
  812. hypervisor.check_min_tasks[sched_ctx] = 0;
  813. }
  814. if(hypervisor.policy.handle_pushed_task)
  815. hypervisor.policy.handle_pushed_task(sched_ctx, worker);
  816. }
  817. /* notifies the hypervisor that a task was poped from the queue of the worker */
  818. static void notify_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, size_t data_size, uint32_t footprint)
  819. {
  820. hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++;
  821. hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += task->flops;
  822. hypervisor.sched_ctx_w[sched_ctx].elapsed_data[worker] += data_size ;
  823. hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[worker]++ ;
  824. hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += task->flops;
  825. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  826. hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= task->flops;
  827. hypervisor.sched_ctx_w[sched_ctx].nready_tasks--;
  828. hypervisor.sched_ctx_w[sched_ctx].ready_flops -= task->flops;
  829. if(hypervisor.sched_ctx_w[sched_ctx].ready_flops < 0.0)
  830. hypervisor.sched_ctx_w[sched_ctx].ready_flops = 0.0;
  831. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  832. /* struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx); */
  833. /* unsigned finished_sample = 0; */
  834. /* char *speed_sample_criteria = getenv("SC_HYPERVISOR_SAMPLE_CRITERIA"); */
  835. /* if(speed_sample_criteria && (strcmp(speed_sample_criteria, "time") == 0)) */
  836. /* { */
  837. /* double curr_time = starpu_timing_now(); */
  838. /* double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /\* in seconds *\/ */
  839. /* finished_sample = elapsed_time > config->time_sample; */
  840. /* } */
  841. /* else */
  842. /* { */
  843. /* double ctx_elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(&hypervisor.sched_ctx_w[sched_ctx]); */
  844. /* double ctx_sample = config->ispeed_ctx_sample; */
  845. /* finished_sample = ctx_elapsed_flops > ctx_sample; */
  846. /* } */
  847. /* if(finished_sample) */
  848. /* { */
  849. /* sc_hypervisor_update_resize_interval(sched_ctx); */
  850. /* } */
  851. if(hypervisor.resize[sched_ctx])
  852. {
  853. if(hypervisor.policy.handle_poped_task)
  854. hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint);
  855. }
  856. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  857. _ack_resize_completed(sched_ctx, worker);
  858. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  859. if(hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker] % 200 == 0)
  860. _print_current_time();
  861. }
  862. /* notifies the hypervisor that a tagged task has just been executed */
  863. static void notify_post_exec_hook(unsigned sched_ctx, int task_tag)
  864. {
  865. STARPU_ASSERT(task_tag > 0);
  866. unsigned conf_sched_ctx;
  867. unsigned i;
  868. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  869. unsigned ns = hypervisor.nsched_ctxs;
  870. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  871. for(i = 0; i < ns; i++)
  872. {
  873. struct configuration_entry *entry;
  874. conf_sched_ctx = hypervisor.sched_ctxs[i];
  875. starpu_pthread_mutex_lock(&hypervisor.conf_mut[conf_sched_ctx]);
  876. HASH_FIND_INT(hypervisor.configurations[conf_sched_ctx], &task_tag, entry);
  877. if (entry != NULL)
  878. {
  879. struct sc_hypervisor_policy_config *config = entry->configuration;
  880. sc_hypervisor_set_config(conf_sched_ctx, config);
  881. HASH_DEL(hypervisor.configurations[conf_sched_ctx], entry);
  882. free(config);
  883. }
  884. starpu_pthread_mutex_unlock(&hypervisor.conf_mut[conf_sched_ctx]);
  885. }
  886. if(hypervisor.resize[sched_ctx])
  887. {
  888. starpu_pthread_mutex_lock(&hypervisor.resize_mut[sched_ctx]);
  889. if(hypervisor.policy.handle_post_exec_hook)
  890. {
  891. /* Check whether 'task_tag' is in the 'resize_requests' set. */
  892. struct resize_request_entry *entry;
  893. HASH_FIND_INT(hypervisor.resize_requests[sched_ctx], &task_tag, entry);
  894. if (entry != NULL)
  895. {
  896. hypervisor.policy.handle_post_exec_hook(sched_ctx, task_tag);
  897. HASH_DEL(hypervisor.resize_requests[sched_ctx], entry);
  898. free(entry);
  899. }
  900. }
  901. starpu_pthread_mutex_unlock(&hypervisor.resize_mut[sched_ctx]);
  902. }
  903. return;
  904. }
  905. static void notify_submitted_job(struct starpu_task *task, uint32_t footprint, size_t data_size)
  906. {
  907. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  908. hypervisor.sched_ctx_w[task->sched_ctx].submitted_flops += task->flops;
  909. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  910. if(hypervisor.policy.handle_submitted_job && !type_of_tasks_known)
  911. hypervisor.policy.handle_submitted_job(task->cl, task->sched_ctx, footprint, data_size);
  912. }
  913. static void notify_ready_task(unsigned sched_ctx_id, struct starpu_task *task)
  914. {
  915. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  916. hypervisor.sched_ctx_w[sched_ctx_id].nready_tasks++;
  917. hypervisor.sched_ctx_w[sched_ctx_id].ready_flops += task->flops;
  918. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  919. }
  920. static void notify_empty_ctx(unsigned sched_ctx_id, struct starpu_task *task)
  921. {
  922. sc_hypervisor_resize_ctxs(NULL, -1 , NULL, -1);
  923. }
  924. void sc_hypervisor_set_type_of_task(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size)
  925. {
  926. type_of_tasks_known = 1;
  927. if(hypervisor.policy.handle_submitted_job)
  928. hypervisor.policy.handle_submitted_job(cl, sched_ctx, footprint, data_size);
  929. }
  930. static void notify_delete_context(unsigned sched_ctx)
  931. {
  932. _print_current_time();
  933. sc_hypervisor_unregister_ctx(sched_ctx);
  934. }
  935. void sc_hypervisor_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
  936. {
  937. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  938. unsigned curr_nsched_ctxs = sched_ctxs == NULL ? hypervisor.nsched_ctxs : (unsigned)nsched_ctxs;
  939. unsigned *curr_sched_ctxs = sched_ctxs == NULL ? hypervisor.sched_ctxs : sched_ctxs;
  940. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  941. unsigned s;
  942. for(s = 0; s < curr_nsched_ctxs; s++)
  943. hypervisor.resize[curr_sched_ctxs[s]] = 1;
  944. if(hypervisor.policy.size_ctxs)
  945. hypervisor.policy.size_ctxs(curr_sched_ctxs, curr_nsched_ctxs, workers, nworkers);
  946. }
  947. struct sc_hypervisor_wrapper* sc_hypervisor_get_wrapper(unsigned sched_ctx)
  948. {
  949. return &hypervisor.sched_ctx_w[sched_ctx];
  950. }
  951. unsigned* sc_hypervisor_get_sched_ctxs()
  952. {
  953. return hypervisor.sched_ctxs;
  954. }
  955. int sc_hypervisor_get_nsched_ctxs()
  956. {
  957. int ns;
  958. ns = hypervisor.nsched_ctxs;
  959. return ns;
  960. }
  961. int _sc_hypervisor_use_lazy_resize(void)
  962. {
  963. char* lazy = getenv("SC_HYPERVISOR_LAZY_RESIZE");
  964. return lazy ? atoi(lazy) : 1;
  965. }
  966. void sc_hypervisor_save_size_req(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
  967. {
  968. hypervisor.sr = (struct size_request*)malloc(sizeof(struct size_request));
  969. hypervisor.sr->sched_ctxs = sched_ctxs;
  970. hypervisor.sr->nsched_ctxs = nsched_ctxs;
  971. hypervisor.sr->workers = workers;
  972. hypervisor.sr->nworkers = nworkers;
  973. }
  974. unsigned sc_hypervisor_get_size_req(unsigned **sched_ctxs, int* nsched_ctxs, int **workers, int *nworkers)
  975. {
  976. if(hypervisor.sr != NULL)
  977. {
  978. *sched_ctxs = hypervisor.sr->sched_ctxs;
  979. *nsched_ctxs = hypervisor.sr->nsched_ctxs;
  980. *workers = hypervisor.sr->workers;
  981. *nworkers = hypervisor.sr->nworkers;
  982. return 1;
  983. }
  984. return 0;
  985. }
  986. void sc_hypervisor_free_size_req(void)
  987. {
  988. if(hypervisor.sr != NULL)
  989. {
  990. free(hypervisor.sr);
  991. hypervisor.sr = NULL;
  992. }
  993. }
  994. double _get_optimal_v(unsigned sched_ctx)
  995. {
  996. return hypervisor.optimal_v[sched_ctx];
  997. }
  998. void _set_optimal_v(unsigned sched_ctx, double optimal_v)
  999. {
  1000. hypervisor.optimal_v[sched_ctx] = optimal_v;
  1001. }
  1002. static struct types_of_workers* _init_structure_types_of_workers(void)
  1003. {
  1004. struct types_of_workers *tw = (struct types_of_workers*)malloc(sizeof(struct types_of_workers));
  1005. tw->ncpus = 0;
  1006. tw->ncuda = 0;
  1007. tw->nw = 0;
  1008. return tw;
  1009. }
  1010. struct types_of_workers* sc_hypervisor_get_types_of_workers(int *workers, unsigned nworkers)
  1011. {
  1012. struct types_of_workers *tw = _init_structure_types_of_workers();
  1013. unsigned w;
  1014. for(w = 0; w < nworkers; w++)
  1015. {
  1016. enum starpu_worker_archtype arch = workers == NULL ? starpu_worker_get_type((int)w) : starpu_worker_get_type(workers[w]);
  1017. if(arch == STARPU_CPU_WORKER)
  1018. tw->ncpus++;
  1019. if(arch == STARPU_CUDA_WORKER)
  1020. tw->ncuda++;
  1021. }
  1022. if(tw->ncpus > 0) tw->nw++;
  1023. if(tw->ncuda > 0) tw->nw++;
  1024. return tw;
  1025. }
  1026. void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total_flops)
  1027. {
  1028. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  1029. hypervisor.sched_ctx_w[sched_ctx].total_flops += diff_total_flops;
  1030. hypervisor.sched_ctx_w[sched_ctx].remaining_flops += diff_total_flops;
  1031. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  1032. }
  1033. void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_elapsed_flops)
  1034. {
  1035. int workerid = starpu_worker_get_id();
  1036. if(workerid != -1)
  1037. {
  1038. starpu_pthread_mutex_lock(&act_hypervisor_mutex);
  1039. hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[workerid] += diff_elapsed_flops;
  1040. hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[workerid] += diff_elapsed_flops;
  1041. starpu_pthread_mutex_unlock(&act_hypervisor_mutex);
  1042. }
  1043. }