sc_hypervisor_monitoring.h 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011 - 2013 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #ifndef SCHED_CTX_HYPERVISOR_MONITORING_H
  17. #define SCHED_CTX_HYPERVISOR_MONITORING_H
  18. #include <sc_hypervisor.h>
  19. #ifdef __cplusplus
  20. extern "C"
  21. {
  22. #endif
  23. /* structure to indicate when the moving of workers was actually done
  24. (moved workers can be seen in the new ctx ) */
  25. struct sc_hypervisor_resize_ack
  26. {
  27. /* receiver context */
  28. int receiver_sched_ctx;
  29. /* list of workers required to be moved */
  30. int *moved_workers;
  31. /* number of workers required to be moved */
  32. int nmoved_workers;
  33. /* list of workers that actually got in the receiver ctx */
  34. int *acked_workers;
  35. };
  36. /* wrapper attached to a sched_ctx storing monitoring information */
  37. struct sc_hypervisor_wrapper
  38. {
  39. /* the sched_ctx it monitors */
  40. unsigned sched_ctx;
  41. /* user configuration meant to limit resizing */
  42. struct sc_hypervisor_policy_config *config;
  43. /* idle time of workers in this context */
  44. double current_idle_time[STARPU_NMAXWORKERS];
  45. /* list of workers that will leave this contexts (lazy resizing process) */
  46. int worker_to_be_removed[STARPU_NMAXWORKERS];
  47. /* number of tasks pushed on each worker in this ctx */
  48. int pushed_tasks[STARPU_NMAXWORKERS];
  49. /* number of tasks poped from each worker in this ctx */
  50. int poped_tasks[STARPU_NMAXWORKERS];
  51. /* number of flops the context has to execute */
  52. double total_flops;
  53. /* number of flops executed since the biginning until now */
  54. double total_elapsed_flops[STARPU_NMAXWORKERS];
  55. /* number of flops executed since last resizing */
  56. double elapsed_flops[STARPU_NMAXWORKERS];
  57. /* data quantity executed on each worker in this ctx */
  58. size_t elapsed_data[STARPU_NMAXWORKERS];
  59. /* nr of tasks executed on each worker in this ctx */
  60. int elapsed_tasks[STARPU_NMAXWORKERS];
  61. /* the average speed of workers when they belonged to this context */
  62. double ref_velocity[STARPU_NMAXWORKERS];
  63. /* number of flops submitted to this ctx */
  64. double submitted_flops;
  65. /* number of flops that still have to be executed in this ctx */
  66. double remaining_flops;
  67. /* the start time of the resizing sample of this context*/
  68. double start_time;
  69. /* the first time a task was pushed to this context*/
  70. double real_start_time;
  71. /* the workers don't leave the current ctx until the receiver ctx
  72. doesn't ack the receive of these workers */
  73. struct sc_hypervisor_resize_ack resize_ack;
  74. /* mutex to protect the ack of workers */
  75. starpu_pthread_mutex_t mutex;
  76. };
  77. /* return the wrapper of context that saves its monitoring information */
  78. struct sc_hypervisor_wrapper *sc_hypervisor_get_wrapper(unsigned sched_ctx);
  79. /* get the list of registered contexts */
  80. int *sc_hypervisor_get_sched_ctxs();
  81. /* get the number of registered contexts */
  82. int sc_hypervisor_get_nsched_ctxs();
  83. /* get the number of workers of a certain architecture in a context */
  84. int sc_hypervisor_get_nworkers_ctx(unsigned sched_ctx, enum starpu_archtype arch);
  85. /* get the number of flops executed by a context since last resizing (reset to 0 when a resizing is done)*/
  86. double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper *sc_w);
  87. /* get the number of flops executed by a context since the begining */
  88. double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w);
  89. /* compute an average value of the cpu/cuda velocity */
  90. double sc_hypervisor_get_velocity_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_archtype arch);
  91. /* compte the actual velocity of all workers of a specific type of worker */
  92. double sc_hypervisor_get_velocity(struct sc_hypervisor_wrapper *sc_w, enum starpu_archtype arch);
  93. #ifdef __cplusplus
  94. }
  95. #endif
  96. #endif