ispeed_lp_policy.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu_config.h>
  17. #include "sc_hypervisor_lp.h"
  18. #include "sc_hypervisor_policy.h"
  19. #include <math.h>
  20. #include <sys/time.h>
  21. struct ispeed_lp_data
  22. {
  23. double **speed;
  24. double *flops;
  25. double **flops_on_w;
  26. int *workers;
  27. };
  28. #ifdef STARPU_HAVE_GLPK_H
  29. static double _compute_workers_distrib(int ns, int nw, double final_w_in_s[ns][nw],
  30. unsigned is_integer, double tmax, void *specific_data)
  31. {
  32. struct ispeed_lp_data *sd = (struct ispeed_lp_data *)specific_data;
  33. double **speed = sd->speed;
  34. double *flops = sd->flops;
  35. double **final_flops_on_w = sd->flops_on_w;
  36. return sc_hypervisor_lp_simulate_distrib_flops_on_sample(ns, nw, final_w_in_s, is_integer, tmax, speed, flops, final_flops_on_w);
  37. }
  38. static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_in_s[ns][nw], double **flops_on_w, unsigned *sched_ctxs, int *workers)
  39. {
  40. double *flops = (double*)malloc(ns*sizeof(double));
  41. double **speed = (double **)malloc(ns*sizeof(double*));
  42. int i;
  43. for(i = 0; i < ns; i++)
  44. speed[i] = (double*)malloc(nw*sizeof(double));
  45. int w,s;
  46. struct sc_hypervisor_wrapper* sc_w = NULL;
  47. for(s = 0; s < ns; s++)
  48. {
  49. sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]);
  50. for(w = 0; w < nw; w++)
  51. {
  52. w_in_s[s][w] = 0.0;
  53. int worker = workers == NULL ? w : workers[w];
  54. speed[s][w] = sc_hypervisor_get_speed_per_worker(sc_w, worker);
  55. if(speed[s][w] == -1.0)
  56. {
  57. enum starpu_worker_archtype arch = starpu_worker_get_type(worker);
  58. speed[s][w] = sc_hypervisor_get_speed(sc_w, arch);
  59. if(arch == STARPU_CUDA_WORKER)
  60. {
  61. unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx);
  62. if(!worker_in_ctx)
  63. {
  64. double transfer_speed = starpu_transfer_bandwidth(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)) / 1000;
  65. speed[s][w] = (speed[s][w] * transfer_speed) / (speed[s][w] + transfer_speed);
  66. }
  67. }
  68. }
  69. // printf("v[w%d][s%d] = %lf\n",w, s, speed[s][w]);
  70. }
  71. struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]);
  72. flops[s] = config->ispeed_ctx_sample/1000000000; /* in gflops */
  73. }
  74. /* take the exec time of the slowest ctx
  75. as starting point and then try to minimize it
  76. as increasing it a little for the faster ctxs */
  77. double tmax = sc_hypervisor_get_slowest_ctx_exec_time();
  78. double smallest_tmax = sc_hypervisor_get_fastest_ctx_exec_time(); //tmax - 0.5*tmax;
  79. // printf("tmax %lf smallest %lf\n", tmax, smallest_tmax);
  80. double tmin = 0.0;
  81. struct ispeed_lp_data specific_data;
  82. specific_data.speed = speed;
  83. specific_data.flops = flops;
  84. specific_data.flops_on_w = flops_on_w;
  85. specific_data.workers = workers;
  86. unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data,
  87. tmin, tmax, smallest_tmax, _compute_workers_distrib);
  88. for(i = 0; i < ns; i++)
  89. free(speed[i]);
  90. free(speed);
  91. return found_sol;
  92. }
  93. static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
  94. {
  95. int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs;
  96. int nw = nworkers == -1 ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */
  97. unsigned *curr_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs;
  98. struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nw);
  99. int ntypes_of_workers = tw->nw;
  100. double w_in_s[ns][nw];
  101. double **flops_on_w = (double**)malloc(ns*sizeof(double*));
  102. int i;
  103. for(i = 0; i < ns; i++)
  104. flops_on_w[i] = (double*)malloc(nw*sizeof(double));
  105. struct timeval start_time;
  106. struct timeval end_time;
  107. gettimeofday(&start_time, NULL);
  108. unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw, w_in_s, flops_on_w, curr_sched_ctxs, workers);
  109. gettimeofday(&end_time, NULL);
  110. long diff_s = end_time.tv_sec - start_time.tv_sec;
  111. long diff_us = end_time.tv_usec - start_time.tv_usec;
  112. __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000.0;
  113. /* if we did find at least one solution redistribute the resources */
  114. if(found_sol)
  115. {
  116. int w, s;
  117. double nworkers_per_ctx[ns][ntypes_of_workers];
  118. int nworkers_per_ctx_rounded[ns][ntypes_of_workers];
  119. for(s = 0; s < ns; s++)
  120. {
  121. for(w = 0; w < ntypes_of_workers; w++)
  122. {
  123. nworkers_per_ctx[s][w] = 0.0;
  124. nworkers_per_ctx_rounded[s][w] = 0;
  125. }
  126. }
  127. for(s = 0; s < ns; s++)
  128. {
  129. for(w = 0; w < nw; w++)
  130. {
  131. enum starpu_worker_archtype arch = starpu_worker_get_type(w);
  132. int idx = sc_hypervisor_get_index_for_arch(arch, tw);
  133. nworkers_per_ctx[s][idx] += w_in_s[s][w];
  134. if(arch == STARPU_CUDA_WORKER)
  135. {
  136. if(w_in_s[s][w] >= 0.3)
  137. nworkers_per_ctx_rounded[s][idx]++;
  138. }
  139. else
  140. {
  141. if(w_in_s[s][w] > 0.5)
  142. nworkers_per_ctx_rounded[s][idx]++;
  143. }
  144. }
  145. }
  146. /* for(s = 0; s < ns; s++) */
  147. /* printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */
  148. /* nworkers_rounded[s][1], nworkers_rounded[s][0]); */
  149. sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_per_ctx_rounded, nworkers_per_ctx, curr_sched_ctxs, tw);
  150. }
  151. free(tw);
  152. for(i = 0; i < ns; i++)
  153. free(flops_on_w[i]);
  154. free(flops_on_w);
  155. }
  156. static void ispeed_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx, __attribute__((unused))int worker,
  157. __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint)
  158. {
  159. int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
  160. if(ret != EBUSY)
  161. {
  162. unsigned criteria = sc_hypervisor_get_resize_criteria();
  163. if(criteria != SC_NOTHING && criteria == SC_SPEED)
  164. {
  165. if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1))
  166. {
  167. _try_resizing(NULL, -1, NULL, -1);
  168. }
  169. }
  170. STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex);
  171. }
  172. }
  173. static void ispeed_lp_handle_idle_cycle(unsigned sched_ctx, int worker)
  174. {
  175. int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
  176. if(ret != EBUSY)
  177. {
  178. unsigned criteria = sc_hypervisor_get_resize_criteria();
  179. if(criteria != SC_NOTHING && criteria == SC_IDLE)
  180. {
  181. if(sc_hypervisor_check_idle(sched_ctx, worker))
  182. {
  183. _try_resizing(NULL, -1, NULL, -1);
  184. }
  185. }
  186. STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex);
  187. }
  188. }
  189. static void ispeed_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers)
  190. {
  191. int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
  192. if(ret != EBUSY)
  193. {
  194. _try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers);
  195. STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex);
  196. }
  197. }
  198. static void ispeed_lp_end_ctx(__attribute__((unused))unsigned sched_ctx)
  199. {
  200. /* struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); */
  201. /* int worker; */
  202. /* for(worker = 0; worker < 12; worker++) */
  203. /* printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_speed[worker]); */
  204. return;
  205. }
  206. struct sc_hypervisor_policy ispeed_lp_policy = {
  207. .size_ctxs = NULL,
  208. .resize_ctxs = ispeed_lp_resize_ctxs,
  209. .handle_poped_task = ispeed_lp_handle_poped_task,
  210. .handle_pushed_task = NULL,
  211. .handle_idle_cycle = ispeed_lp_handle_idle_cycle,
  212. .handle_idle_end = NULL,
  213. .handle_post_exec_hook = NULL,
  214. .handle_submitted_job = NULL,
  215. .end_ctx = ispeed_lp_end_ctx,
  216. .init_worker = NULL,
  217. .custom = 0,
  218. .name = "ispeed_lp"
  219. };
  220. #endif /* STARPU_HAVE_GLPK_H */