speed.c 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2013 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include "sc_hypervisor_policy.h"
  17. #include "sc_hypervisor_intern.h"
  18. #include <math.h>
  19. double sc_hypervisor_get_ctx_speed(struct sc_hypervisor_wrapper* sc_w)
  20. {
  21. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx);
  22. double elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
  23. double sample = config->ispeed_ctx_sample;
  24. double total_elapsed_flops = sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w);
  25. double total_flops = sc_w->total_flops;
  26. char *start_sample_prc_char = getenv("SC_HYPERVISOR_START_RESIZE");
  27. double start_sample_prc = start_sample_prc_char ? atof(start_sample_prc_char) : 0.0;
  28. double start_sample = start_sample_prc > 0.0 ? (start_sample_prc / 100) * total_flops : sample;
  29. double redim_sample = elapsed_flops == total_elapsed_flops ? (start_sample > 0.0 ? start_sample : sample) : sample;
  30. double curr_time = starpu_timing_now();
  31. double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
  32. unsigned can_compute_speed = 0;
  33. char *speed_sample_criteria = getenv("SC_HYPERVISOR_SAMPLE_CRITERIA");
  34. if(speed_sample_criteria && (strcmp(speed_sample_criteria, "time") == 0))
  35. can_compute_speed = elapsed_time > config->time_sample;
  36. else
  37. can_compute_speed = elapsed_flops >= redim_sample;
  38. if(can_compute_speed)
  39. {
  40. return (elapsed_flops/1000000000.0)/elapsed_time;/* in Gflops/s */
  41. }
  42. return -1.0;
  43. }
  44. double sc_hypervisor_get_speed_per_worker(struct sc_hypervisor_wrapper *sc_w, unsigned worker)
  45. {
  46. if(!starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx))
  47. return -1.0;
  48. double elapsed_flops = sc_w->elapsed_flops[worker] / 1000000000.0; /*in gflops */
  49. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx);
  50. double sample = config->ispeed_w_sample[worker] / 1000000000.0; /*in gflops */
  51. double ctx_elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
  52. double ctx_sample = config->ispeed_ctx_sample;
  53. if(ctx_elapsed_flops > ctx_sample && elapsed_flops == 0.0)
  54. return 0.00000000000001;
  55. if( elapsed_flops > sample)
  56. {
  57. double curr_time = starpu_timing_now();
  58. double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
  59. elapsed_time -= sc_w->idle_time[worker];
  60. /* size_t elapsed_data_used = sc_w->elapsed_data[worker]; */
  61. /* enum starpu_worker_archtype arch = starpu_worker_get_type(worker); */
  62. /* if(arch == STARPU_CUDA_WORKER) */
  63. /* { */
  64. /* /\* unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx); *\/ */
  65. /* /\* if(!worker_in_ctx) *\/ */
  66. /* /\* { *\/ */
  67. /* /\* double transfer_speed = starpu_transfer_bandwidth(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)); *\/ */
  68. /* /\* elapsed_time += (elapsed_data_used / transfer_speed) / 1000000 ; *\/ */
  69. /* /\* } *\/ */
  70. /* double latency = starpu_transfer_latency(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)); */
  71. /* // printf("%d/%d: latency %lf elapsed_time before %lf ntasks %d\n", worker, sc_w->sched_ctx, latency, elapsed_time, elapsed_tasks); */
  72. /* elapsed_time += (elapsed_tasks * latency)/1000000; */
  73. /* // printf("elapsed time after %lf \n", elapsed_time); */
  74. /* } */
  75. double vel = (elapsed_flops/elapsed_time);/* in Gflops/s */
  76. return vel;
  77. }
  78. return -1.0;
  79. }
  80. /* compute an average value of the cpu/cuda speed */
  81. double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch)
  82. {
  83. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx);
  84. double ctx_elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w);
  85. double ctx_sample = config->ispeed_ctx_sample;
  86. double curr_time = starpu_timing_now();
  87. double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */
  88. unsigned can_compute_speed = 0;
  89. char *speed_sample_criteria = getenv("SC_HYPERVISOR_SAMPLE_CRITERIA");
  90. if(speed_sample_criteria && (strcmp(speed_sample_criteria, "time") == 0))
  91. can_compute_speed = elapsed_time > config->time_sample;
  92. else
  93. can_compute_speed = ctx_elapsed_flops > ctx_sample;
  94. if(can_compute_speed)
  95. {
  96. if(ctx_elapsed_flops == 0.0) return -1.0;
  97. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx);
  98. int worker;
  99. struct starpu_sched_ctx_iterator it;
  100. if(workers->init_iterator)
  101. workers->init_iterator(workers, &it);
  102. double speed = 0.0;
  103. unsigned nworkers = 0;
  104. double all_workers_flops = 0.0;
  105. double max_workers_idle_time = 0.0;
  106. while(workers->has_next(workers, &it))
  107. {
  108. worker = workers->get_next(workers, &it);
  109. enum starpu_worker_archtype req_arch = starpu_worker_get_type(worker);
  110. if(arch == req_arch)
  111. {
  112. all_workers_flops += sc_w->elapsed_flops[worker] / 1000000000.0; /*in gflops */
  113. if(max_workers_idle_time < sc_w->idle_time[worker])
  114. max_workers_idle_time = sc_w->idle_time[worker]; /* in seconds */
  115. nworkers++;
  116. }
  117. }
  118. if(nworkers != 0)
  119. {
  120. // elapsed_time -= max_workers_idle_time;
  121. speed = (all_workers_flops / elapsed_time) / nworkers;
  122. }
  123. else
  124. speed = -1.0;
  125. if(speed != -1.0)
  126. {
  127. /* if ref_speed started being corrupted bc of the old bad distribution
  128. register only the last frame otherwise make the average with the speed
  129. behavior of the application until now */
  130. if(arch == STARPU_CUDA_WORKER)
  131. sc_w->ref_speed[0] = (sc_w->ref_speed[0] > 0.1) ? ((sc_w->ref_speed[0] + speed ) / 2.0) : speed;
  132. else
  133. sc_w->ref_speed[1] = (sc_w->ref_speed[1] > 0.1) ? ((sc_w->ref_speed[1] + speed ) / 2.0) : speed;
  134. }
  135. return speed;
  136. }
  137. return -1.0;
  138. }
  139. /* compute an average value of the cpu/cuda old speed */
  140. double sc_hypervisor_get_ref_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch)
  141. {
  142. if(arch == STARPU_CUDA_WORKER && sc_w->ref_speed[0] > 0.0)
  143. return sc_w->ref_speed[0];
  144. else
  145. if(arch == STARPU_CPU_WORKER && sc_w->ref_speed[1] > 0.0)
  146. return sc_w->ref_speed[1];
  147. return -1.0;
  148. }
  149. /* returns the speed necessary for the linear programs (either the monitored one either a default value) */
  150. double sc_hypervisor_get_speed(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch)
  151. {
  152. /* monitored speed in the last frame */
  153. double speed = sc_hypervisor_get_speed_per_worker_type(sc_w, arch);
  154. /* if(speed != -1.0 && arch == STARPU_CPU_WORKER) */
  155. /* { */
  156. /* struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx); */
  157. /* double ratio = speed / SC_HYPERVISOR_DEFAULT_CPU_SPEED; */
  158. /* if(ratio < 0.3) */
  159. /* { */
  160. /* if(config->max_nworkers > 0) */
  161. /* config->max_nworkers--; */
  162. /* printf("%d: ratio %lf max_nworkers descr %d \n", sc_w->sched_ctx, ratio, config->max_nworkers); */
  163. /* } */
  164. /* if(ratio > 0.9) */
  165. /* { */
  166. /* int max_cpus = starpu_cpu_worker_get_count(); */
  167. /* if(config->max_nworkers < max_cpus) */
  168. /* config->max_nworkers++; */
  169. /* printf("%d: ratio %lf max_nworkers incr %d \n", sc_w->sched_ctx, ratio, config->max_nworkers); */
  170. /* } */
  171. /* } */
  172. if(speed == -1.0)
  173. {
  174. /* avg value of the monitored speed over the entier current execution */
  175. speed = sc_hypervisor_get_ref_speed_per_worker_type(sc_w, arch);
  176. }
  177. if(speed == -1.0)
  178. {
  179. /* a default value */
  180. speed = arch == STARPU_CPU_WORKER ? SC_HYPERVISOR_DEFAULT_CPU_SPEED : SC_HYPERVISOR_DEFAULT_CUDA_SPEED;
  181. }
  182. return speed;
  183. }