helper_mct.c 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2013-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. * Copyright (C) 2013 Simon Archipoff
  5. * Copyright (C) 2020 Télécom-Sud Paris
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <starpu_sched_component.h>
  19. #include "helper_mct.h"
  20. #include <float.h>
  21. /* Alpha, Beta and Gamma are MCT-specific values, which allows the
  22. * user to set more precisely the weight of each computing value.
  23. * Beta, for example, controls the weight of communications between
  24. * memories for the computation of the best component to choose.
  25. */
  26. #define _STARPU_SCHED_ALPHA_DEFAULT 1.0
  27. #define _STARPU_SCHED_BETA_DEFAULT 1.0
  28. #define _STARPU_SCHED_GAMMA_DEFAULT 1000.0
  29. struct _starpu_mct_data *starpu_mct_init_parameters(struct starpu_sched_component_mct_data *params)
  30. {
  31. struct _starpu_mct_data *data;
  32. _STARPU_MALLOC(data, sizeof(*data));
  33. if (params)
  34. {
  35. data->alpha = params->alpha;
  36. data->beta = params->beta;
  37. /* data->_gamma: cost of one Joule in us. If gamma is set to 10^6, then one Joule cost 1s */
  38. data->_gamma = params->_gamma;
  39. /* data->idle_power: Idle power of the whole machine in Watt */
  40. data->idle_power = params->idle_power;
  41. }
  42. else
  43. {
  44. data->alpha = starpu_get_env_float_default("STARPU_SCHED_ALPHA", _STARPU_SCHED_ALPHA_DEFAULT);
  45. data->beta = starpu_get_env_float_default("STARPU_SCHED_BETA", _STARPU_SCHED_BETA_DEFAULT);
  46. #ifdef STARPU_NON_BLOCKING_DRIVERS
  47. if (starpu_getenv("STARPU_SCHED_GAMMA"))
  48. _STARPU_DISP("Warning: STARPU_SCHED_GAMMA was used, but --enable-blocking-drivers configuration was not set, CPU cores will not actually be sleeping\n");
  49. #endif
  50. data->_gamma = starpu_get_env_float_default("STARPU_SCHED_GAMMA", _STARPU_SCHED_GAMMA_DEFAULT);
  51. data->idle_power = starpu_get_env_float_default("STARPU_IDLE_POWER", 0.0);
  52. }
  53. return data;
  54. }
  55. /* compute predicted_end by taking into account the case of the predicted transfer and the predicted_end overlap
  56. */
  57. static double compute_expected_time(double now, double predicted_end, double predicted_length, double predicted_transfer)
  58. {
  59. STARPU_ASSERT(!isnan(now + predicted_end + predicted_length + predicted_transfer));
  60. STARPU_ASSERT_MSG(now >= 0.0 && predicted_end >= 0.0 && predicted_length >= 0.0 && predicted_transfer >= 0.0, "now=%lf, predicted_end=%lf, predicted_length=%lf, predicted_transfer=%lf\n", now, predicted_end, predicted_length, predicted_transfer);
  61. /* TODO: actually schedule transfers */
  62. /* Compute the transfer time which will not be overlapped */
  63. /* However, no modification in calling function so that the whole transfer time is counted as a penalty */
  64. if (now + predicted_transfer < predicted_end)
  65. {
  66. /* We may hope that the transfer will be finished by
  67. * the start of the task. */
  68. predicted_transfer = 0;
  69. }
  70. else
  71. {
  72. /* The transfer will not be finished by then, take the
  73. * remainder into account */
  74. predicted_transfer -= (predicted_end - now);
  75. }
  76. predicted_end += predicted_transfer;
  77. predicted_end += predicted_length;
  78. return predicted_end;
  79. }
  80. double starpu_mct_compute_fitness(struct _starpu_mct_data * d, double exp_end, double min_exp_end_of_task, double max_exp_end_of_workers, double transfer_len, double local_energy)
  81. {
  82. /* Note: the expected end includes the data transfer duration, which we want to be able to tune separately */
  83. /* min_exp_end_of_task is the minimum end time of the task over all workers */
  84. double fitness = d->alpha * (exp_end - min_exp_end_of_task) + d->beta * transfer_len + d->_gamma * local_energy;
  85. /* max_exp_end is the maximum end time of the workers. If the total execution time is increased, then an
  86. additional energy penalty must be considered*/
  87. if(exp_end > max_exp_end_of_workers)
  88. fitness += d->_gamma * d->idle_power * (exp_end - max_exp_end_of_workers) / 1000000.0; /* Since gamma is the cost in us of one Joules,
  89. then d->idle_power * (exp_end - max_exp_end)
  90. must be in Joules, thus the / 1000000.0 */
  91. return fitness;
  92. }
  93. unsigned starpu_mct_compute_execution_times(struct starpu_sched_component *component, struct starpu_task *task,
  94. double *estimated_lengths, double *estimated_transfer_length, unsigned *suitable_components)
  95. {
  96. unsigned nsuitable_components = 0;
  97. unsigned i;
  98. for(i = 0; i < component->nchildren; i++)
  99. {
  100. struct starpu_sched_component * c = component->children[i];
  101. /* Silence static analysis warnings */
  102. estimated_lengths[i] = NAN;
  103. estimated_transfer_length[i] = NAN;
  104. if(starpu_sched_component_execute_preds(c, task, estimated_lengths + i))
  105. {
  106. if(isnan(estimated_lengths[i]))
  107. /* The perfmodel had been purged since the task was pushed
  108. * onto the mct component. */
  109. continue;
  110. STARPU_ASSERT_MSG(estimated_lengths[i]>=0, "component=%p, child[%u]=%p, estimated_lengths[%u]=%lf\n", component, i, c, i, estimated_lengths[i]);
  111. estimated_transfer_length[i] = starpu_sched_component_transfer_length(c, task);
  112. suitable_components[nsuitable_components++] = i;
  113. }
  114. }
  115. return nsuitable_components;
  116. }
  117. void starpu_mct_compute_expected_times(struct starpu_sched_component *component, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED,
  118. double *estimated_lengths, double *estimated_transfer_length, double *estimated_ends_with_task,
  119. double *min_exp_end_of_task, double *max_exp_end_of_workers, unsigned *suitable_components, unsigned nsuitable_components)
  120. {
  121. unsigned i;
  122. double now = starpu_timing_now();
  123. *min_exp_end_of_task = DBL_MAX;
  124. *max_exp_end_of_workers = 0.0;
  125. for(i = 0; i < nsuitable_components; i++)
  126. {
  127. unsigned icomponent = suitable_components[i];
  128. struct starpu_sched_component * c = component->children[icomponent];
  129. /* Estimated availability of worker */
  130. double estimated_end = c->estimated_end(c);
  131. if (estimated_end < now)
  132. estimated_end = now;
  133. estimated_ends_with_task[icomponent] = compute_expected_time(now,
  134. estimated_end,
  135. estimated_lengths[icomponent],
  136. estimated_transfer_length[icomponent]);
  137. /* estimated_ends_with_task[icomponent]: estimated end of execution on the worker icomponent
  138. estimated_end: estimatated end of the worker
  139. min_exp_end_of_task: minimum estimated execution time of the task over all workers
  140. max_exp_end_of_workers: maximum estimated end of the already-scheduled tasks over all workers
  141. */
  142. if(estimated_ends_with_task[icomponent] < *min_exp_end_of_task)
  143. *min_exp_end_of_task = estimated_ends_with_task[icomponent];
  144. if(estimated_end > *max_exp_end_of_workers)
  145. *max_exp_end_of_workers = estimated_end;
  146. }
  147. }
  148. /* This function retrieves the energy consumption of a task in Joules*/
  149. void starpu_mct_compute_energy(struct starpu_sched_component *component, struct starpu_task *task , double *local_energy, unsigned *suitable_components, unsigned nsuitable_components)
  150. {
  151. unsigned i;
  152. for(i = 0; i < nsuitable_components; i++)
  153. {
  154. unsigned icomponent = suitable_components[i];
  155. int nimpl = 0;
  156. local_energy[icomponent] = starpu_task_worker_expected_energy(task, icomponent, component->tree->sched_ctx_id, nimpl);
  157. for (nimpl = 1; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  158. {
  159. double e;
  160. e = starpu_task_worker_expected_energy(task, icomponent, component->tree->sched_ctx_id, nimpl);
  161. if (e < local_energy[icomponent])
  162. local_energy[icomponent] = e;
  163. }
  164. }
  165. }
  166. int starpu_mct_get_best_component(struct _starpu_mct_data *d, struct starpu_task *task, double *estimated_lengths, double *estimated_transfer_length, double *estimated_ends_with_task, double *local_energy, double min_exp_end_of_task, double max_exp_end_of_workers, unsigned *suitable_components, unsigned nsuitable_components)
  167. {
  168. double best_fitness = DBL_MAX;
  169. int best_icomponent = -1;
  170. unsigned i;
  171. for(i = 0; i < nsuitable_components; i++)
  172. {
  173. int icomponent = suitable_components[i];
  174. double tmp = starpu_mct_compute_fitness(d,
  175. estimated_ends_with_task[icomponent],
  176. min_exp_end_of_task,
  177. max_exp_end_of_workers,
  178. estimated_transfer_length[icomponent],
  179. local_energy[icomponent]);
  180. if(tmp < best_fitness)
  181. {
  182. best_fitness = tmp;
  183. best_icomponent = icomponent;
  184. }
  185. }
  186. if (best_icomponent != -1)
  187. {
  188. task->predicted = estimated_lengths[best_icomponent];
  189. task->predicted_transfer = estimated_transfer_length[best_icomponent];
  190. }
  191. return best_icomponent;
  192. }