modular_ez.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2013-2015,2017,2018-2019 Université de Bordeaux
  4. * Copyright (C) 2013-2015,2017 Inria
  5. * Copyright (C) 2014,2015,2017 CNRS
  6. * Copyright (C) 2013 Simon Archipoff
  7. *
  8. * StarPU is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU Lesser General Public License as published by
  10. * the Free Software Foundation; either version 2.1 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * StarPU is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. *
  17. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. */
  19. #include <starpu_sched_component.h>
  20. #include <starpu_scheduler.h>
  21. #include <limits.h>
  22. /* The scheduling strategy may look like this :
  23. *
  24. * |
  25. * fifo_above
  26. * |
  27. * decision_component <--push-- perfmodel_select_component --push--> eager_component
  28. * | | | |
  29. * fifo fifo fifo |
  30. * | | | |
  31. * eager eager eager |
  32. * | | | |
  33. * >--------------------------------------------------------------<
  34. * | |
  35. * best_impl_component best_impl_component
  36. * | |
  37. * worker_component worker_component
  38. */
  39. /* The two thresolds concerns the fifo components below, which contains queues
  40. * who can handle the priority of StarPU tasks. You can tune your
  41. * scheduling by benching those values and choose which one is the
  42. * best for your current application.
  43. * The current value of the ntasks_threshold is the best we found
  44. * so far across several types of applications (cholesky, LU, stencil).
  45. */
  46. #define _STARPU_SCHED_NTASKS_THRESHOLD_HEFT 30
  47. #define _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT 2
  48. #define _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT 1000000000.0
  49. void starpu_sched_component_initialize_simple_scheduler(starpu_sched_component_create_t create_decision_component, void *data, unsigned flags, unsigned sched_ctx_id)
  50. {
  51. struct starpu_sched_tree * t;
  52. struct starpu_sched_component *last = NULL; /* Stores the last created component, from top to bottom */
  53. unsigned i, j, n;
  54. struct starpu_sched_component *decision_component = NULL;
  55. struct starpu_sched_component *no_perfmodel_component = NULL;
  56. struct starpu_sched_component *calibrator_component = NULL;
  57. /* Start building the tree */
  58. t = starpu_sched_tree_create(sched_ctx_id);
  59. t->root = NULL;
  60. starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t);
  61. /* Create combined workers if requested */
  62. if (flags & STARPU_SCHED_SIMPLE_COMBINED_WORKERS)
  63. starpu_sched_find_all_worker_combinations();
  64. /* Components parameters */
  65. if (flags & STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO || flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO)
  66. {
  67. /* The application may use any integer */
  68. if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0)
  69. starpu_sched_ctx_set_min_priority(sched_ctx_id, INT_MIN);
  70. if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0)
  71. starpu_sched_ctx_set_max_priority(sched_ctx_id, INT_MAX);
  72. }
  73. /* See what the component will decide */
  74. unsigned nbelow;
  75. unsigned nummaxids;
  76. nummaxids = starpu_worker_get_count() + starpu_combined_worker_get_count();
  77. if (starpu_memory_nodes_get_count() > nummaxids)
  78. nummaxids = starpu_memory_nodes_get_count();
  79. if (STARPU_ANY_WORKER > nummaxids)
  80. nummaxids = STARPU_ANY_WORKER;
  81. unsigned below_id[nummaxids];
  82. switch (flags & STARPU_SCHED_SIMPLE_DECIDE_MASK)
  83. {
  84. case STARPU_SCHED_SIMPLE_DECIDE_WORKERS:
  85. /* Count workers */
  86. nbelow = starpu_worker_get_count() + starpu_combined_worker_get_count();
  87. /* and no need for IDs */
  88. break;
  89. case STARPU_SCHED_SIMPLE_DECIDE_MEMNODES:
  90. {
  91. /* Count memory nodes */
  92. n = starpu_memory_nodes_get_count();
  93. nbelow = 0;
  94. for(i = 0; i < n; i++)
  95. {
  96. for(j = 0; j < starpu_worker_get_count() + starpu_combined_worker_get_count(); j++)
  97. if (starpu_worker_get_memory_node(j) == i)
  98. break;
  99. if (j >= starpu_worker_get_count() + starpu_combined_worker_get_count())
  100. /* Don't create a component string for this memory node with no worker */
  101. continue;
  102. below_id[nbelow] = i;
  103. nbelow++;
  104. }
  105. break;
  106. }
  107. case STARPU_SCHED_SIMPLE_DECIDE_ARCHS:
  108. {
  109. /* Count available architecture types */
  110. enum starpu_worker_archtype type;
  111. nbelow = 0;
  112. for (type = STARPU_CPU_WORKER; type < STARPU_ANY_WORKER; type++)
  113. {
  114. if (starpu_worker_get_count_by_type(type))
  115. {
  116. below_id[nbelow] = type;
  117. nbelow++;
  118. }
  119. }
  120. break;
  121. }
  122. default:
  123. STARPU_ABORT();
  124. }
  125. STARPU_ASSERT(nbelow > 0);
  126. if (nbelow == 1)
  127. {
  128. /* Oh, no choice, we don't actually need to decide, just
  129. * use an eager scheduler */
  130. decision_component = starpu_sched_component_eager_create(t, NULL);
  131. /* But make sure we have a fifo above it, fifos below it would
  132. * possibly refuse tasks out of available room */
  133. flags |= STARPU_SCHED_SIMPLE_FIFO_ABOVE;
  134. }
  135. else
  136. {
  137. decision_component = create_decision_component(t, data);
  138. }
  139. /* First, a fifo if requested */
  140. if (flags & STARPU_SCHED_SIMPLE_FIFO_ABOVE)
  141. {
  142. struct starpu_sched_component *fifo_above;
  143. if (flags & STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO)
  144. {
  145. fifo_above = starpu_sched_component_prio_create(t, NULL);
  146. }
  147. else
  148. {
  149. fifo_above = starpu_sched_component_fifo_create(t, NULL);
  150. }
  151. last = t->root = fifo_above;
  152. }
  153. /* Then, perfmodel calibration if requested, and plug the scheduling decision-making component to it */
  154. if (flags & STARPU_SCHED_SIMPLE_PERFMODEL)
  155. {
  156. no_perfmodel_component = starpu_sched_component_eager_create(t, NULL);
  157. calibrator_component = starpu_sched_component_eager_calibration_create(t, NULL);
  158. struct starpu_sched_component_perfmodel_select_data perfmodel_select_data =
  159. {
  160. .calibrator_component = calibrator_component,
  161. .no_perfmodel_component = no_perfmodel_component,
  162. .perfmodel_component = decision_component,
  163. };
  164. struct starpu_sched_component * perfmodel_select_component = starpu_sched_component_perfmodel_select_create(t, &perfmodel_select_data);
  165. if (!t->root)
  166. t->root = perfmodel_select_component;
  167. else
  168. starpu_sched_component_connect(last, perfmodel_select_component);
  169. starpu_sched_component_connect(perfmodel_select_component, decision_component);
  170. starpu_sched_component_connect(perfmodel_select_component, calibrator_component);
  171. starpu_sched_component_connect(perfmodel_select_component, no_perfmodel_component);
  172. }
  173. else
  174. {
  175. /* No perfmodel calibration */
  176. if (!t->root)
  177. /* Plug decision_component directly */
  178. t->root = decision_component;
  179. else
  180. /* Plug decision_component to fifo */
  181. starpu_sched_component_connect(last, decision_component);
  182. }
  183. /* Take default ntasks_threshold */
  184. unsigned ntasks_threshold;
  185. if (starpu_sched_component_is_heft(decision_component) ||
  186. starpu_sched_component_is_mct(decision_component) ||
  187. starpu_sched_component_is_heteroprio(decision_component)) {
  188. /* These need more queueing to allow CPUs to take some share of the work */
  189. ntasks_threshold = _STARPU_SCHED_NTASKS_THRESHOLD_HEFT;
  190. } else {
  191. ntasks_threshold = _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT;
  192. }
  193. /* But let user tune it */
  194. ntasks_threshold = starpu_get_env_number_default("STARPU_NTASKS_THRESHOLD", ntasks_threshold);
  195. double exp_len_threshold = _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT;
  196. exp_len_threshold = starpu_get_env_float_default("STARPU_EXP_LEN_THRESHOLD", exp_len_threshold);
  197. int ready = starpu_get_env_number_default("STARPU_SCHED_READY", flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY ? 1 : 0);
  198. struct starpu_sched_component_prio_data prio_data =
  199. {
  200. .ntasks_threshold = ntasks_threshold,
  201. .exp_len_threshold = exp_len_threshold,
  202. .ready = ready,
  203. };
  204. struct starpu_sched_component_fifo_data fifo_data =
  205. {
  206. .ntasks_threshold = ntasks_threshold,
  207. .exp_len_threshold = exp_len_threshold,
  208. .ready = ready,
  209. };
  210. /* Create one fifo+eager component pair per choice, below scheduling decision */
  211. struct starpu_sched_component *last_below[nbelow];
  212. memset(&last_below, 0, sizeof(last_below));
  213. for(i = 0; i < nbelow; i++)
  214. {
  215. last = decision_component;
  216. if (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW
  217. && !((flags & STARPU_SCHED_SIMPLE_DECIDE_MASK) == STARPU_SCHED_SIMPLE_DECIDE_WORKERS
  218. && i >= starpu_worker_get_count()))
  219. {
  220. struct starpu_sched_component *fifo_below;
  221. if (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO)
  222. {
  223. fifo_below = starpu_sched_component_prio_create(t, &prio_data);
  224. }
  225. else
  226. {
  227. fifo_below = starpu_sched_component_fifo_create(t, &fifo_data);
  228. }
  229. starpu_sched_component_connect(last, fifo_below);
  230. last = fifo_below;
  231. }
  232. switch (flags & STARPU_SCHED_SIMPLE_DECIDE_MASK)
  233. {
  234. case STARPU_SCHED_SIMPLE_DECIDE_WORKERS:
  235. /* 1-1 mapping between choice and worker, no need for an eager component */
  236. n = 1;
  237. break;
  238. case STARPU_SCHED_SIMPLE_DECIDE_MEMNODES:
  239. n = 0;
  240. for (j = 0; j < starpu_worker_get_count() + starpu_combined_worker_get_count(); j++)
  241. if (starpu_worker_get_memory_node(j) == below_id[i])
  242. n++;
  243. break;
  244. case STARPU_SCHED_SIMPLE_DECIDE_ARCHS:
  245. n = starpu_worker_get_count_by_type(i);
  246. break;
  247. default:
  248. STARPU_ABORT();
  249. }
  250. STARPU_ASSERT(n >= 1);
  251. if (n > 1) {
  252. /* Several workers for this choice, need to introduce
  253. * a component to distribute the work */
  254. struct starpu_sched_component *distribute;
  255. if (flags & STARPU_SCHED_SIMPLE_WS_BELOW)
  256. {
  257. distribute = starpu_sched_component_work_stealing_create(t, NULL);
  258. }
  259. else
  260. {
  261. distribute = starpu_sched_component_eager_create(t, NULL);
  262. }
  263. starpu_sched_component_connect(last, distribute);
  264. last = distribute;
  265. }
  266. last_below[i] = last;
  267. }
  268. /* Finish by creating components per worker */
  269. for(i = 0; i < starpu_worker_get_count() + starpu_combined_worker_get_count(); i++)
  270. {
  271. /* Start from the bottom */
  272. struct starpu_sched_component * worker_component = starpu_sched_component_worker_new(sched_ctx_id, i);
  273. struct starpu_sched_component * worker = worker_component;
  274. unsigned id;
  275. /* Create implementation chooser if requested */
  276. if (flags & STARPU_SCHED_SIMPLE_IMPL)
  277. {
  278. struct starpu_sched_component * impl_component = starpu_sched_component_best_implementation_create(t, NULL);
  279. starpu_sched_component_connect(impl_component, worker_component);
  280. /* Reroute components above through it */
  281. worker = impl_component;
  282. }
  283. switch (flags & STARPU_SCHED_SIMPLE_DECIDE_MASK)
  284. {
  285. case STARPU_SCHED_SIMPLE_DECIDE_WORKERS:
  286. id = i;
  287. break;
  288. case STARPU_SCHED_SIMPLE_DECIDE_MEMNODES:
  289. for (id = 0; id < nbelow; id++)
  290. if (below_id[id] == starpu_worker_get_memory_node(i))
  291. break;
  292. break;
  293. case STARPU_SCHED_SIMPLE_DECIDE_ARCHS:
  294. for (id = 0; id < nbelow; id++)
  295. if (below_id[id] == starpu_worker_get_type(i))
  296. break;
  297. break;
  298. default:
  299. STARPU_ABORT();
  300. }
  301. STARPU_ASSERT(id < nbelow);
  302. last = last_below[id];
  303. if (!last)
  304. last = decision_component;
  305. starpu_sched_component_connect(last, worker);
  306. /* Plug perfmodel calibrator if requested */
  307. if (flags & STARPU_SCHED_SIMPLE_PERFMODEL)
  308. {
  309. starpu_sched_component_connect(no_perfmodel_component, worker);
  310. /* Calibrator needs to choose the implementation */
  311. starpu_sched_component_connect(calibrator_component, worker_component);
  312. }
  313. }
  314. starpu_sched_tree_update_workers(t);
  315. starpu_sched_tree_update_workers_in_ctx(t);
  316. }