graph.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2016 Université de Bordeaux
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. /*
  17. * This stores the task graph structure, to used by the schedulers which need
  18. * it. We do not always enable it since it is costly.
  19. */
  20. #include <starpu.h>
  21. #include <core/jobs.h>
  22. #include <common/graph.h>
  23. /* Protects the whole task graph */
  24. static starpu_pthread_rwlock_t graph_lock;
  25. /* Whether we should enable recording the task graph */
  26. int _starpu_graph_record;
  27. /* This list contains all jobs without incoming dependency */
  28. struct _starpu_job_multilist_top top;
  29. /* This list contains all jobs without outgoing dependency */
  30. struct _starpu_job_multilist_bottom bottom;
  31. /* This list contains all jobs */
  32. struct _starpu_job_multilist_all all;
  33. void _starpu_graph_init(void)
  34. {
  35. STARPU_PTHREAD_RWLOCK_INIT(&graph_lock, NULL);
  36. _starpu_job_multilist_init_top(&top);
  37. _starpu_job_multilist_init_bottom(&bottom);
  38. _starpu_job_multilist_init_all(&all);
  39. }
  40. static void __starpu_graph_foreach(void (*func)(void *data, struct _starpu_job *job), void *data)
  41. {
  42. struct _starpu_job *job;
  43. for (job = _starpu_job_multilist_begin_all(&all);
  44. job != _starpu_job_multilist_end_all(&all);
  45. job = _starpu_job_multilist_next_all(job))
  46. func(data, job);
  47. }
  48. /* Add a job to the graph */
  49. void _starpu_graph_add_job(struct _starpu_job *job)
  50. {
  51. STARPU_PTHREAD_RWLOCK_WRLOCK(&graph_lock);
  52. /* It does not have any dependency yet, add to all lists */
  53. _starpu_job_multilist_push_back_top(&top, job);
  54. _starpu_job_multilist_push_back_bottom(&bottom, job);
  55. _starpu_job_multilist_push_back_all(&all, job);
  56. STARPU_PTHREAD_RWLOCK_UNLOCK(&graph_lock);
  57. }
  58. /* Add a job to an array of jobs */
  59. static unsigned add_job(struct _starpu_job *job, struct _starpu_job ***jobs, unsigned *n_jobs, unsigned *alloc_jobs, unsigned **slot)
  60. {
  61. unsigned ret;
  62. if (*n_jobs == *alloc_jobs)
  63. {
  64. if (*alloc_jobs)
  65. *alloc_jobs *= 2;
  66. else
  67. *alloc_jobs = 4;
  68. *jobs = realloc(*jobs, *alloc_jobs * sizeof(**jobs));
  69. if (slot)
  70. *slot = realloc(*slot, *alloc_jobs * sizeof(**slot));
  71. }
  72. ret = (*n_jobs)++;
  73. (*jobs)[ret] = job;
  74. return ret;
  75. }
  76. /* Add a dependency between jobs */
  77. void _starpu_graph_add_job_dep(struct _starpu_job *job, struct _starpu_job *prev_job)
  78. {
  79. unsigned rank_incoming, rank_outgoing;
  80. STARPU_PTHREAD_RWLOCK_WRLOCK(&graph_lock);
  81. if (_starpu_job_multilist_queued_bottom(prev_job))
  82. /* Previous job is not at bottom any more */
  83. _starpu_job_multilist_erase_bottom(&bottom, prev_job);
  84. if (_starpu_job_multilist_queued_top(job))
  85. /* Next job is not at top any more */
  86. _starpu_job_multilist_erase_top(&top, job);
  87. rank_incoming = add_job(prev_job, &job->incoming, &job->n_incoming, &job->alloc_incoming, NULL);
  88. rank_outgoing = add_job(job, &prev_job->outgoing, &prev_job->n_outgoing, &prev_job->alloc_outgoing, &prev_job->outgoing_slot);
  89. prev_job->outgoing_slot[rank_outgoing] = rank_incoming;
  90. STARPU_PTHREAD_RWLOCK_UNLOCK(&graph_lock);
  91. }
  92. /* Drop a job, and thus its dependencies */
  93. void _starpu_graph_drop_job(struct _starpu_job *job)
  94. {
  95. unsigned i;
  96. STARPU_PTHREAD_RWLOCK_WRLOCK(&graph_lock);
  97. if (_starpu_job_multilist_queued_bottom(job))
  98. _starpu_job_multilist_erase_bottom(&bottom, job);
  99. if (_starpu_job_multilist_queued_top(job))
  100. _starpu_job_multilist_erase_top(&top, job);
  101. if (_starpu_job_multilist_queued_all(job))
  102. _starpu_job_multilist_erase_all(&all, job);
  103. /* Drop ourself from the incoming part of the outgoing jobs */
  104. for (i = 0; i < job->n_outgoing; i++)
  105. {
  106. struct _starpu_job *next = job->outgoing[i];
  107. next->incoming[job->outgoing_slot[i]] = NULL;
  108. }
  109. job->n_outgoing = 0;
  110. free(job->outgoing);
  111. job->outgoing = NULL;
  112. free(job->outgoing_slot);
  113. job->outgoing_slot = NULL;
  114. job->alloc_outgoing = 0;
  115. job->n_incoming = 0;
  116. free(job->incoming);
  117. job->incoming = NULL;
  118. job->alloc_incoming = 0;
  119. STARPU_PTHREAD_RWLOCK_UNLOCK(&graph_lock);
  120. }
  121. static void _starpu_graph_set_n(void *data, struct _starpu_job *job)
  122. {
  123. int value = (intptr_t) data;
  124. job->graph_n = value;
  125. }
  126. /* Call func for each vertex of the task graph, from bottom to top, in topological order */
  127. static void _starpu_graph_compute_bottom_up(void (*func)(struct _starpu_job *next_job, struct _starpu_job *prev_job, void *data), void *data)
  128. {
  129. struct _starpu_job *job, *job2;
  130. struct _starpu_job **current_set = NULL, **next_set = NULL, **swap_set;
  131. unsigned current_n, next_n, i, j;
  132. unsigned current_alloc = 0, next_alloc = 0, swap_alloc;
  133. /* Classical flow algorithm: start from bottom, and propagate depths to top */
  134. /* Set number of processed outgoing edges to 0 for each node */
  135. __starpu_graph_foreach(_starpu_graph_set_n, (void*) 0);
  136. /* Start with the bottom of the graph */
  137. current_n = 0;
  138. for (job = _starpu_job_multilist_begin_bottom(&bottom);
  139. job != _starpu_job_multilist_end_bottom(&bottom);
  140. job = _starpu_job_multilist_next_bottom(job))
  141. add_job(job, &current_set, &current_n, &current_alloc, NULL);
  142. /* Now propagate to top as long as we have current nodes */
  143. while (current_n)
  144. {
  145. /* Next set is initially empty */
  146. next_n = 0;
  147. /* For each node in the current set */
  148. for (i = 0; i < current_n; i++)
  149. {
  150. job = current_set[i];
  151. /* For each parent of this job */
  152. for (j = 0; j < job->n_incoming; j++)
  153. {
  154. job2 = job->incoming[j];
  155. if (!job2)
  156. continue;
  157. job2->graph_n++;
  158. func(job, job2, data);
  159. if ((unsigned) job2->graph_n == job2->n_outgoing)
  160. /* All outgoing edges were processed, can now add to next set */
  161. add_job(job2, &next_set, &next_n, &next_alloc, NULL);
  162. }
  163. }
  164. /* Swap next set with current set */
  165. swap_set = next_set;
  166. swap_alloc = next_alloc;
  167. next_set = current_set;
  168. next_alloc = current_alloc;
  169. current_set = swap_set;
  170. current_alloc = swap_alloc;
  171. current_n = next_n;
  172. }
  173. free(current_set);
  174. free(next_set);
  175. }
  176. static void compute_depth(struct _starpu_job *next_job, struct _starpu_job *prev_job, void *data STARPU_ATTRIBUTE_UNUSED)
  177. {
  178. if (prev_job->depth < next_job->depth + 1)
  179. prev_job->depth = next_job->depth + 1;
  180. }
  181. void _starpu_graph_compute_depths(void)
  182. {
  183. struct _starpu_job *job;
  184. STARPU_PTHREAD_RWLOCK_WRLOCK(&graph_lock);
  185. /* The bottom of the graph has depth 0 */
  186. for (job = _starpu_job_multilist_begin_bottom(&bottom);
  187. job != _starpu_job_multilist_end_bottom(&bottom);
  188. job = _starpu_job_multilist_next_bottom(job))
  189. job->depth = 0;
  190. _starpu_graph_compute_bottom_up(compute_depth, NULL);
  191. STARPU_PTHREAD_RWLOCK_UNLOCK(&graph_lock);
  192. }
  193. void _starpu_graph_compute_descendants(void)
  194. {
  195. struct _starpu_job *job, *job2, *job3;
  196. struct _starpu_job **current_set = NULL, **next_set = NULL, **swap_set;
  197. unsigned current_n, next_n, i, j;
  198. unsigned current_alloc = 0, next_alloc = 0, swap_alloc;
  199. unsigned descendants;
  200. STARPU_PTHREAD_RWLOCK_WRLOCK(&graph_lock);
  201. /* Yes, this is O(|V|.(|V|+|E|)) :( */
  202. /* We could get O(|V|.|E|) by doing a topological sort first.
  203. *
  204. * |E| is usually O(|V|), though (bounded number of data dependencies,
  205. * and we use synchronization tasks) */
  206. for (job = _starpu_job_multilist_begin_all(&all);
  207. job != _starpu_job_multilist_end_all(&all);
  208. job = _starpu_job_multilist_next_all(job))
  209. {
  210. /* Mark all nodes as unseen */
  211. for (job2 = _starpu_job_multilist_begin_all(&all);
  212. job2 != _starpu_job_multilist_end_all(&all);
  213. job2 = _starpu_job_multilist_next_all(job2))
  214. job2->graph_n = 0;
  215. /* Start with the node we want to compute the number of descendants of */
  216. current_n = 0;
  217. add_job(job, &current_set, &current_n, &current_alloc, NULL);
  218. job->graph_n = 1;
  219. descendants = 0;
  220. /* While we have descendants, count their descendants */
  221. while (current_n) {
  222. /* Next set is initially empty */
  223. next_n = 0;
  224. /* For each node in the current set */
  225. for (i = 0; i < current_n; i++)
  226. {
  227. job2 = current_set[i];
  228. /* For each child of this job2 */
  229. for (j = 0; j < job2->n_outgoing; j++)
  230. {
  231. job3 = job2->outgoing[j];
  232. if (!job3)
  233. continue;
  234. if (job3->graph_n)
  235. /* Already seen */
  236. continue;
  237. /* Add this node */
  238. job3->graph_n = 1;
  239. descendants++;
  240. add_job(job3, &next_set, &next_n, &next_alloc, NULL);
  241. }
  242. }
  243. /* Swap next set with current set */
  244. swap_set = next_set;
  245. swap_alloc = next_alloc;
  246. next_set = current_set;
  247. next_alloc = current_alloc;
  248. current_set = swap_set;
  249. current_alloc = swap_alloc;
  250. current_n = next_n;
  251. }
  252. job->descendants = descendants;
  253. }
  254. STARPU_PTHREAD_RWLOCK_UNLOCK(&graph_lock);
  255. free(current_set);
  256. free(next_set);
  257. }
  258. void _starpu_graph_foreach(void (*func)(void *data, struct _starpu_job *job), void *data)
  259. {
  260. STARPU_PTHREAD_RWLOCK_WRLOCK(&graph_lock);
  261. __starpu_graph_foreach(func, data);
  262. STARPU_PTHREAD_RWLOCK_UNLOCK(&graph_lock);
  263. }