sc_hypervisor.h 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2015 Inria
  4. * Copyright (C) 2012,2013,2017,2019 CNRS
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #ifndef SC_HYPERVISOR_H
  18. #define SC_HYPERVISOR_H
  19. #include <starpu.h>
  20. #include <starpu_sched_ctx_hypervisor.h>
  21. #include <sc_hypervisor_config.h>
  22. #include <sc_hypervisor_monitoring.h>
  23. #include <math.h>
  24. #ifdef __cplusplus
  25. extern "C"
  26. {
  27. #endif
  28. /**
  29. @defgroup API_SC_Hypervisor_usage Scheduling Context Hypervisor - Regular usage
  30. There is a single hypervisor that is in charge of resizing contexts
  31. and the resizing strategy is chosen at the initialization of the
  32. hypervisor. A single resize can be done at a time.
  33. The Scheduling Context Hypervisor Plugin provides a series of
  34. performance counters to StarPU. By incrementing them, StarPU can
  35. help the hypervisor in the resizing decision making process.
  36. The function sc_hypervisor_init() initializes the hypervisor to use
  37. the strategy provided as parameter and creates the performance
  38. counters (see starpu_sched_ctx_performance_counters). These
  39. performance counters represent actually some callbacks that will be
  40. used by the contexts to notify the information needed by the
  41. hypervisor.
  42. Scheduling Contexts that have to be resized by the hypervisor must
  43. be first registered to the hypervisor using the function
  44. sc_hypervisor_register_ctx()
  45. Note: The Hypervisor is actually a worker that takes this role once
  46. certain conditions trigger the resizing process (there is no
  47. additional thread assigned to the hypervisor).
  48. @{
  49. */
  50. /**
  51. synchronise the hypervisor when several workers try to update its
  52. information
  53. */
  54. extern starpu_pthread_mutex_t act_hypervisor_mutex;
  55. /**
  56. @ingroup API_SC_Hypervisor
  57. Methods to implement a hypervisor resizing policy.
  58. */
  59. struct sc_hypervisor_policy
  60. {
  61. /**
  62. Indicate the name of the policy, if there is not a custom
  63. policy, the policy corresponding to this name will be used
  64. by the hypervisor
  65. */
  66. const char* name;
  67. /**
  68. Indicate whether the policy is custom or not
  69. */
  70. unsigned custom;
  71. /**
  72. Distribute workers to contexts even at the begining of the
  73. program
  74. */
  75. void (*size_ctxs)(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers);
  76. /**
  77. Require explicit resizing
  78. */
  79. void (*resize_ctxs)(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers);
  80. /**
  81. Called whenever the indicated worker executes another idle
  82. cycle in sched_ctx
  83. */
  84. void (*handle_idle_cycle)(unsigned sched_ctx, int worker);
  85. /**
  86. Called whenever a task is pushed on the worker’s queue
  87. corresponding to the context sched_ctx
  88. */
  89. void (*handle_pushed_task)(unsigned sched_ctx, int worker);
  90. /**
  91. Called whenever a task is poped from the worker’s queue
  92. corresponding to the context sched_ctx
  93. */
  94. void (*handle_poped_task)(unsigned sched_ctx, int worker,struct starpu_task *task, uint32_t footprint);
  95. /**
  96. Called whenever a task is executed on the indicated worker
  97. and context after a long period of idle time
  98. */
  99. void (*handle_idle_end)(unsigned sched_ctx, int worker);
  100. /**
  101. Called whenever a tag task has just been executed. The
  102. table of resize requests is provided as well as the tag
  103. */
  104. void (*handle_post_exec_hook)(unsigned sched_ctx, int task_tag);
  105. /**
  106. the hypervisor takes a decision when a job was submitted in
  107. this ctx
  108. */
  109. void (*handle_submitted_job)(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size);
  110. /**
  111. the hypervisor takes a decision when a certain ctx was
  112. deleted
  113. */
  114. void (*end_ctx)(unsigned sched_ctx);
  115. /**
  116. the hypervisor takes a decision when a certain ctx was
  117. registerd
  118. */
  119. void (*start_ctx)(unsigned sched_ctx);
  120. /**
  121. the hypervisor initializes values for the workers
  122. */
  123. void (*init_worker)(int workerid, unsigned sched_ctx);
  124. };
  125. /**
  126. Start the hypervisor with the given policy
  127. */
  128. void* sc_hypervisor_init(struct sc_hypervisor_policy *policy);
  129. /**
  130. Shutdown the hypervisor.
  131. The hypervisor and all information concerning it is cleaned. There
  132. is no synchronization between this function and starpu_shutdown().
  133. Thus, this should be called after starpu_shutdown(), because the
  134. performance counters will still need allocated callback functions.
  135. */
  136. void sc_hypervisor_shutdown(void);
  137. /**
  138. Register the context to the hypervisor, and indicate the number of
  139. flops the context will execute (used for Gflops rate based strategy)
  140. */
  141. void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops);
  142. /**
  143. Unregister a context from the hypervisor, and so exclude the
  144. context from the resizing process
  145. */
  146. void sc_hypervisor_unregister_ctx(unsigned sched_ctx);
  147. /**
  148. Require resizing the context \p sched_ctx whenever a task tagged
  149. with the id \p task_tag finished executing
  150. */
  151. void sc_hypervisor_post_resize_request(unsigned sched_ctx, int task_tag);
  152. /**
  153. Require reconsidering the distribution of ressources over the
  154. indicated scheduling contexts, i.e reevaluate the distribution of
  155. the resources and eventually resize if needed
  156. */
  157. void sc_hypervisor_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers);
  158. /**
  159. Do not allow the hypervisor to resize a context.
  160. */
  161. void sc_hypervisor_stop_resize(unsigned sched_ctx);
  162. /**
  163. Allow the hypervisor to resize a context if necessary.
  164. */
  165. void sc_hypervisor_start_resize(unsigned sched_ctx);
  166. /**
  167. Return the name of the resizing policy used by the hypervisor
  168. */
  169. const char *sc_hypervisor_get_policy();
  170. /**
  171. Ask the hypervisor to add workers to a sched_ctx
  172. */
  173. void sc_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx);
  174. /**
  175. Ask the hypervisor to remove workers from a sched_ctx
  176. */
  177. void sc_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now);
  178. /**
  179. Ask the hypervisor to move workers from one context to another
  180. */
  181. void sc_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int *workers_to_move, unsigned nworkers_to_move, unsigned now);
  182. /**
  183. Ask the hypervisor to choose a distribution of workers in the
  184. required contexts
  185. */
  186. void sc_hypervisor_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers);
  187. /**
  188. Check if there are pending demands of resizing
  189. */
  190. unsigned sc_hypervisor_get_size_req(unsigned **sched_ctxs, int* nsched_ctxs, int **workers, int *nworkers);
  191. /**
  192. Save a demand of resizing
  193. */
  194. void sc_hypervisor_save_size_req(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers);
  195. /**
  196. Clear the list of pending demands of resizing
  197. */
  198. void sc_hypervisor_free_size_req(void);
  199. /**
  200. Check out if a context can be resized
  201. */
  202. unsigned sc_hypervisor_can_resize(unsigned sched_ctx);
  203. /**
  204. Indicate the types of tasks a context will execute in order to
  205. better decide the sizing of ctxs
  206. */
  207. void sc_hypervisor_set_type_of_task(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size);
  208. /**
  209. Change dynamically the total number of flops of a context, move the
  210. deadline of the finishing time of the context
  211. */
  212. void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total_flops);
  213. /**
  214. Change dynamically the number of the elapsed flops in a context,
  215. modify the past in order to better compute the speed
  216. */
  217. void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_task_flops);
  218. /**
  219. Update the min and max workers needed by each context
  220. */
  221. void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs, int max_nworkers);
  222. /**
  223. Return a list of contexts that are on the same level in the
  224. hierarchy of contexts
  225. */
  226. void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, unsigned hierarchy_level, unsigned father_sched_ctx_id);
  227. /**
  228. Returns the number of levels of ctxs registered to the hyp
  229. */
  230. unsigned sc_hypervisor_get_nhierarchy_levels(void);
  231. /**
  232. Return the leaves ctxs from the list of ctxs
  233. */
  234. void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *leaves, int *nleaves);
  235. /**
  236. Return the nready flops of all ctxs below in hierachy of sched_ctx
  237. */
  238. double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx);
  239. void sc_hypervisor_print_overhead();
  240. void sc_hypervisor_init_worker(int workerid, unsigned sched_ctx);
  241. /** @} */
  242. #ifdef __cplusplus
  243. }
  244. #endif
  245. #endif