modular_gemm.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2013-2015,2017 Inria
  4. * Copyright (C) 2017 CNRS
  5. * Copyright (C) 2014,2016-2019 Université de Bordeaux
  6. * Copyright (C) 2013 Simon Archipoff
  7. *
  8. * StarPU is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU Lesser General Public License as published by
  10. * the Free Software Foundation; either version 2.1 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * StarPU is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. *
  17. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. */
  19. /* This scheduler runs only GEMMs on GPUs, and tries to feed them with as many
  20. * GEMMs as possible. */
  21. #include <starpu_sched_component.h>
  22. #include <starpu_scheduler.h>
  23. /* Optionally, it can take memory affinity into account, to avoid too many GPU
  24. * data transfers */
  25. #define MEMORY_AFFINITY
  26. struct child_data {
  27. double expected_start;
  28. double predicted;
  29. double predicted_transfer;
  30. double expected_end;
  31. unsigned child;
  32. };
  33. static int compar(const void *_a, const void *_b)
  34. {
  35. const struct child_data *a = _a;
  36. const struct child_data *b = _b;
  37. if (a->expected_end < b->expected_end)
  38. return -1;
  39. if (a->expected_end == b->expected_end)
  40. return 0;
  41. return 1;
  42. }
  43. static int gemm_push_task(struct starpu_sched_component * component, struct starpu_task * task)
  44. {
  45. unsigned n = component->nchildren;
  46. unsigned i;
  47. /* See if it's a GEMM task */
  48. const char *name = starpu_task_get_model_name(task);
  49. //fprintf(stderr, "it's %s\n", name);
  50. if (name && (!strcmp(name, "gemm") ||
  51. !strcmp(name, "dgemm") ||
  52. !strcmp(name, "sgemm") ||
  53. !strcmp(name, "chol_model_22") ||
  54. !strcmp(name, "starpu_dlu_lu_model_22") ||
  55. !strcmp(name, "starpu_slu_lu_model_22")))
  56. {
  57. /* It's a GEMM, try to push to GPUs */
  58. struct child_data child_data[n];
  59. for (i = 0; i < n; i++)
  60. {
  61. child_data[i].expected_end = -1;
  62. child_data[i].child = i;
  63. }
  64. /* Look at GPU availability time */
  65. for (i = 0; i < n; i++)
  66. {
  67. struct starpu_sched_component *child = component->children[i];
  68. double predicted;
  69. if (starpu_sched_component_execute_preds(child, task, &predicted))
  70. {
  71. double expected_start;
  72. child_data[i].expected_start =
  73. expected_start = child->estimated_end(child);
  74. child_data[i].predicted = predicted;
  75. child_data[i].expected_end = expected_start
  76. + predicted;
  77. #ifdef MEMORY_AFFINITY
  78. double predicted_transfer;
  79. child_data[i].predicted_transfer =
  80. predicted_transfer = starpu_sched_component_transfer_length(child, task);
  81. child_data[i].expected_end += predicted_transfer;
  82. #endif
  83. }
  84. }
  85. /* Sort by increasing expected end */
  86. qsort(child_data, n, sizeof(*child_data), compar);
  87. /* Try to push to the GPU with minimum availability time, to balance the load. */
  88. for (i = 0; i < n; i++)
  89. {
  90. if (child_data[i].expected_end != -1)
  91. {
  92. struct starpu_sched_component *child = component->children[child_data[i].child];
  93. /* Note it in the task so that estimated_end() has it */
  94. task->predicted = child_data[i].predicted;
  95. task->predicted_transfer = child_data[i].predicted_transfer;
  96. int ret = starpu_sched_component_push_task(component,child,task);
  97. if (!ret)
  98. /* Ok, this GPU took it */
  99. return 0;
  100. }
  101. }
  102. }
  103. int workerid;
  104. /* It's not a GEMM, or no GPU wanted to take it, find somebody else */
  105. for(workerid = starpu_bitmap_first(component->workers_in_ctx);
  106. workerid != -1;
  107. workerid = starpu_bitmap_next(component->workers_in_ctx, workerid))
  108. {
  109. int nimpl;
  110. for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  111. {
  112. if(starpu_worker_can_execute_task(workerid,task,nimpl)
  113. || starpu_combined_worker_can_execute_task(workerid, task, nimpl))
  114. {
  115. for (i = 0; i < n; i++)
  116. {
  117. struct starpu_sched_component *child = component->children[i];
  118. int idworker;
  119. for(idworker = starpu_bitmap_first(component->children[i]->workers);
  120. idworker != -1;
  121. idworker = starpu_bitmap_next(component->children[i]->workers, idworker))
  122. {
  123. if (idworker == workerid)
  124. {
  125. if ((starpu_cpu_worker_get_count() == 0 ||
  126. starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
  127. && (starpu_worker_can_execute_task(workerid,task,nimpl)
  128. || starpu_combined_worker_can_execute_task(workerid, task, nimpl)))
  129. {
  130. int ret = starpu_sched_component_push_task(component,child,task);
  131. if (!ret)
  132. return 0;
  133. }
  134. }
  135. }
  136. }
  137. }
  138. }
  139. }
  140. /* FIFOs are full */
  141. return 1;
  142. }
  143. struct starpu_sched_component *starpu_sched_component_gemm_create(struct starpu_sched_tree *tree, void *params STARPU_ATTRIBUTE_UNUSED)
  144. {
  145. struct starpu_sched_component *component = starpu_sched_component_create(tree, "gemm");
  146. component->push_task = gemm_push_task;
  147. return component;
  148. }
  149. static void initialize_gemm_center_policy(unsigned sched_ctx_id)
  150. {
  151. starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_gemm_create, NULL,
  152. STARPU_SCHED_SIMPLE_DECIDE_MEMNODES |
  153. STARPU_SCHED_SIMPLE_FIFO_ABOVE |
  154. STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO |
  155. STARPU_SCHED_SIMPLE_FIFOS_BELOW |
  156. STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO |
  157. STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id);
  158. }
  159. static void deinitialize_gemm_center_policy(unsigned sched_ctx_id)
  160. {
  161. struct starpu_sched_tree *tree = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
  162. starpu_sched_tree_destroy(tree);
  163. }
  164. struct starpu_sched_policy _starpu_sched_modular_gemm_policy =
  165. {
  166. .init_sched = initialize_gemm_center_policy,
  167. .deinit_sched = deinitialize_gemm_center_policy,
  168. .add_workers = starpu_sched_tree_add_workers,
  169. .remove_workers = starpu_sched_tree_remove_workers,
  170. .push_task = starpu_sched_tree_push_task,
  171. .pop_task = starpu_sched_tree_pop_task,
  172. .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook,
  173. .post_exec_hook = starpu_sched_component_worker_post_exec_hook,
  174. .pop_every_task = NULL,
  175. .policy_name = "modular-gemm",
  176. .policy_description = "gemm modular policy",
  177. .worker_type = STARPU_WORKER_LIST,
  178. };