sc_hypervisor.h 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #ifndef SC_HYPERVISOR_H
  17. #define SC_HYPERVISOR_H
  18. #include <starpu.h>
  19. #include <starpu_sched_ctx_hypervisor.h>
  20. #include <sc_hypervisor_config.h>
  21. #include <sc_hypervisor_monitoring.h>
  22. #include <math.h>
  23. #ifdef __cplusplus
  24. extern "C"
  25. {
  26. #endif
  27. /**
  28. @ingroup API_SC_Hypervisor
  29. Methods to implement a hypervisor resizing policy.
  30. */
  31. struct sc_hypervisor_policy
  32. {
  33. /**
  34. Indicate the name of the policy, if there is not a custom
  35. policy, the policy corresponding to this name will be used
  36. by the hypervisor
  37. */
  38. const char* name;
  39. /**
  40. Indicate whether the policy is custom or not
  41. */
  42. unsigned custom;
  43. /**
  44. Distribute workers to contexts even at the begining of the
  45. program
  46. */
  47. void (*size_ctxs)(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers);
  48. /**
  49. Require explicit resizing
  50. */
  51. void (*resize_ctxs)(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers);
  52. /**
  53. Called whenever the indicated worker executes another idle
  54. cycle in sched_ctx
  55. */
  56. void (*handle_idle_cycle)(unsigned sched_ctx, int worker);
  57. /**
  58. Called whenever a task is pushed on the worker’s queue
  59. corresponding to the context sched_ctx
  60. */
  61. void (*handle_pushed_task)(unsigned sched_ctx, int worker);
  62. /**
  63. Called whenever a task is poped from the worker’s queue
  64. corresponding to the context sched_ctx
  65. */
  66. void (*handle_poped_task)(unsigned sched_ctx, int worker,struct starpu_task *task, uint32_t footprint);
  67. /**
  68. Called whenever a task is executed on the indicated worker
  69. and context after a long period of idle time
  70. */
  71. void (*handle_idle_end)(unsigned sched_ctx, int worker);
  72. /**
  73. Called whenever a tag task has just been executed. The
  74. table of resize requests is provided as well as the tag
  75. */
  76. void (*handle_post_exec_hook)(unsigned sched_ctx, int task_tag);
  77. /**
  78. the hypervisor takes a decision when a job was submitted in
  79. this ctx
  80. */
  81. void (*handle_submitted_job)(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size);
  82. /**
  83. the hypervisor takes a decision when a certain ctx was
  84. deleted
  85. */
  86. void (*end_ctx)(unsigned sched_ctx);
  87. /**
  88. the hypervisor takes a decision when a certain ctx was
  89. registerd
  90. */
  91. void (*start_ctx)(unsigned sched_ctx);
  92. /**
  93. the hypervisor initializes values for the workers
  94. */
  95. void (*init_worker)(int workerid, unsigned sched_ctx);
  96. };
  97. /**
  98. @defgroup API_SC_Hypervisor_usage Scheduling Context Hypervisor - Regular usage
  99. There is a single hypervisor that is in charge of resizing contexts
  100. and the resizing strategy is chosen at the initialization of the
  101. hypervisor. A single resize can be done at a time.
  102. The Scheduling Context Hypervisor Plugin provides a series of
  103. performance counters to StarPU. By incrementing them, StarPU can
  104. help the hypervisor in the resizing decision making process.
  105. The function sc_hypervisor_init() initializes the hypervisor to use
  106. the strategy provided as parameter and creates the performance
  107. counters (see starpu_sched_ctx_performance_counters). These
  108. performance counters represent actually some callbacks that will be
  109. used by the contexts to notify the information needed by the
  110. hypervisor.
  111. Scheduling Contexts that have to be resized by the hypervisor must
  112. be first registered to the hypervisor using the function
  113. sc_hypervisor_register_ctx()
  114. Note: The Hypervisor is actually a worker that takes this role once
  115. certain conditions trigger the resizing process (there is no
  116. additional thread assigned to the hypervisor).
  117. @{
  118. */
  119. /**
  120. synchronise the hypervisor when several workers try to update its
  121. information
  122. */
  123. extern starpu_pthread_mutex_t act_hypervisor_mutex;
  124. /**
  125. Start the hypervisor with the given policy
  126. */
  127. void* sc_hypervisor_init(struct sc_hypervisor_policy *policy);
  128. /**
  129. Shutdown the hypervisor.
  130. The hypervisor and all information concerning it is cleaned. There
  131. is no synchronization between this function and starpu_shutdown().
  132. Thus, this should be called after starpu_shutdown(), because the
  133. performance counters will still need allocated callback functions.
  134. */
  135. void sc_hypervisor_shutdown(void);
  136. /**
  137. Register the context to the hypervisor, and indicate the number of
  138. flops the context will execute (used for Gflops rate based strategy)
  139. */
  140. void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops);
  141. /**
  142. Unregister a context from the hypervisor, and so exclude the
  143. context from the resizing process
  144. */
  145. void sc_hypervisor_unregister_ctx(unsigned sched_ctx);
  146. /**
  147. Require resizing the context \p sched_ctx whenever a task tagged
  148. with the id \p task_tag finished executing
  149. */
  150. void sc_hypervisor_post_resize_request(unsigned sched_ctx, int task_tag);
  151. /**
  152. Require reconsidering the distribution of ressources over the
  153. indicated scheduling contexts, i.e reevaluate the distribution of
  154. the resources and eventually resize if needed
  155. */
  156. void sc_hypervisor_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers);
  157. /**
  158. Do not allow the hypervisor to resize a context.
  159. */
  160. void sc_hypervisor_stop_resize(unsigned sched_ctx);
  161. /**
  162. Allow the hypervisor to resize a context if necessary.
  163. */
  164. void sc_hypervisor_start_resize(unsigned sched_ctx);
  165. /**
  166. Return the name of the resizing policy used by the hypervisor
  167. */
  168. const char *sc_hypervisor_get_policy();
  169. /**
  170. Ask the hypervisor to add workers to a sched_ctx
  171. */
  172. void sc_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx);
  173. /**
  174. Ask the hypervisor to remove workers from a sched_ctx
  175. */
  176. void sc_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now);
  177. /**
  178. Ask the hypervisor to move workers from one context to another
  179. */
  180. void sc_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int *workers_to_move, unsigned nworkers_to_move, unsigned now);
  181. /**
  182. Ask the hypervisor to choose a distribution of workers in the
  183. required contexts
  184. */
  185. void sc_hypervisor_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers);
  186. /**
  187. Check if there are pending demands of resizing
  188. */
  189. unsigned sc_hypervisor_get_size_req(unsigned **sched_ctxs, int* nsched_ctxs, int **workers, int *nworkers);
  190. /**
  191. Save a demand of resizing
  192. */
  193. void sc_hypervisor_save_size_req(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers);
  194. /**
  195. Clear the list of pending demands of resizing
  196. */
  197. void sc_hypervisor_free_size_req(void);
  198. /**
  199. Check out if a context can be resized
  200. */
  201. unsigned sc_hypervisor_can_resize(unsigned sched_ctx);
  202. /**
  203. Indicate the types of tasks a context will execute in order to
  204. better decide the sizing of ctxs
  205. */
  206. void sc_hypervisor_set_type_of_task(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size);
  207. /**
  208. Change dynamically the total number of flops of a context, move the
  209. deadline of the finishing time of the context
  210. */
  211. void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total_flops);
  212. /**
  213. Change dynamically the number of the elapsed flops in a context,
  214. modify the past in order to better compute the speed
  215. */
  216. void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_task_flops);
  217. /**
  218. Update the min and max workers needed by each context
  219. */
  220. void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs, int max_nworkers);
  221. /**
  222. Return a list of contexts that are on the same level in the
  223. hierarchy of contexts
  224. */
  225. void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, unsigned hierarchy_level, unsigned father_sched_ctx_id);
  226. /**
  227. Returns the number of levels of ctxs registered to the hyp
  228. */
  229. unsigned sc_hypervisor_get_nhierarchy_levels(void);
  230. /**
  231. Return the leaves ctxs from the list of ctxs
  232. */
  233. void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *leaves, int *nleaves);
  234. /**
  235. Return the nready flops of all ctxs below in hierachy of sched_ctx
  236. */
  237. double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx);
  238. void sc_hypervisor_print_overhead();
  239. void sc_hypervisor_init_worker(int workerid, unsigned sched_ctx);
  240. /** @} */
  241. #ifdef __cplusplus
  242. }
  243. #endif
  244. #endif