graph.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2016,2017 CNRS
  4. * Copyright (C) 2017 Inria
  5. * Copyright (C) 2016-2018 Université de Bordeaux
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /*
  19. * This stores the task graph structure, to used by the schedulers which need
  20. * it. We do not always enable it since it is costly. To avoid interfering
  21. * too much with execution, it may be a bit outdated, i.e. still contain jobs
  22. * which have completed very recently.
  23. *
  24. * This is because we drop nodes lazily: when a job terminates, we just add the
  25. * node to the dropped list (to avoid having to take the mutex on the whole
  26. * graph). The graph gets updated whenever the graph mutex becomes available.
  27. */
  28. #include <starpu.h>
  29. #include <core/jobs.h>
  30. #include <common/graph.h>
  31. #include <core/workers.h>
  32. /* Protects the whole task graph except the dropped list */
  33. static starpu_pthread_rwlock_t graph_lock;
  34. /* Whether we should enable recording the task graph */
  35. int _starpu_graph_record;
  36. /* This list contains all nodes without incoming dependency */
  37. struct _starpu_graph_node_multilist_top top;
  38. /* This list contains all nodes without outgoing dependency */
  39. struct _starpu_graph_node_multilist_bottom bottom;
  40. /* This list contains all nodes */
  41. struct _starpu_graph_node_multilist_all all;
  42. /* Protects the dropped list, always taken before graph lock */
  43. static starpu_pthread_mutex_t dropped_lock;
  44. /* This list contains all dropped nodes, i.e. the job terminated by the corresponding node is still int he graph */
  45. struct _starpu_graph_node_multilist_dropped dropped;
  46. void _starpu_graph_init(void)
  47. {
  48. STARPU_PTHREAD_RWLOCK_INIT(&graph_lock, NULL);
  49. _starpu_graph_node_multilist_head_init_top(&top);
  50. _starpu_graph_node_multilist_head_init_bottom(&bottom);
  51. _starpu_graph_node_multilist_head_init_all(&all);
  52. STARPU_PTHREAD_MUTEX_INIT(&dropped_lock, NULL);
  53. _starpu_graph_node_multilist_head_init_dropped(&dropped);
  54. }
  55. /* LockWR the graph lock */
  56. void _starpu_graph_wrlock(void)
  57. {
  58. starpu_worker_relax_on();
  59. STARPU_PTHREAD_RWLOCK_WRLOCK(&graph_lock);
  60. starpu_worker_relax_off();
  61. }
  62. void _starpu_graph_drop_node(struct _starpu_graph_node *node);
  63. /* This flushes the list of nodes to be dropped. Both the dropped_lock and
  64. * graph_lock mutexes have to be held on entry, and are released. */
  65. void _starpu_graph_drop_dropped_nodes(void)
  66. {
  67. struct _starpu_graph_node_multilist_dropped dropping;
  68. /* Pick up the list of dropped nodes */
  69. _starpu_graph_node_multilist_move_dropped(&dropped, &dropping);
  70. STARPU_PTHREAD_MUTEX_UNLOCK(&dropped_lock);
  71. /* And now process it if it's not empty. */
  72. if (!_starpu_graph_node_multilist_empty_dropped(&dropping))
  73. {
  74. struct _starpu_graph_node *node, *next;
  75. for (node = _starpu_graph_node_multilist_begin_dropped(&dropping);
  76. node != _starpu_graph_node_multilist_end_dropped(&dropping);
  77. node = next)
  78. {
  79. next = _starpu_graph_node_multilist_next_dropped(node);
  80. _starpu_graph_drop_node(node);
  81. }
  82. }
  83. STARPU_PTHREAD_RWLOCK_UNLOCK(&graph_lock);
  84. }
  85. /* UnlockWR the graph lock */
  86. void _starpu_graph_wrunlock(void)
  87. {
  88. starpu_worker_relax_on();
  89. STARPU_PTHREAD_MUTEX_LOCK(&dropped_lock);
  90. starpu_worker_relax_off();
  91. _starpu_graph_drop_dropped_nodes();
  92. }
  93. /* LockRD the graph lock */
  94. void _starpu_graph_rdlock(void)
  95. {
  96. starpu_worker_relax_on();
  97. STARPU_PTHREAD_RWLOCK_RDLOCK(&graph_lock);
  98. starpu_worker_relax_off();
  99. }
  100. /* UnlockRD the graph lock */
  101. void _starpu_graph_rdunlock(void)
  102. {
  103. STARPU_PTHREAD_RWLOCK_UNLOCK(&graph_lock);
  104. /* Take the opportunity to try to take it WR */
  105. if (STARPU_PTHREAD_RWLOCK_TRYWRLOCK(&graph_lock) == 0)
  106. /* Good, flush dropped nodes */
  107. _starpu_graph_wrunlock();
  108. }
  109. static void __starpu_graph_foreach(void (*func)(void *data, struct _starpu_graph_node *node), void *data)
  110. {
  111. struct _starpu_graph_node *node;
  112. for (node = _starpu_graph_node_multilist_begin_all(&all);
  113. node != _starpu_graph_node_multilist_end_all(&all);
  114. node = _starpu_graph_node_multilist_next_all(node))
  115. func(data, node);
  116. }
  117. /* Add a node to the graph */
  118. void _starpu_graph_add_job(struct _starpu_job *job)
  119. {
  120. struct _starpu_graph_node *node;
  121. _STARPU_CALLOC(node, 1, sizeof(*node));
  122. node->job = job;
  123. job->graph_node = node;
  124. STARPU_PTHREAD_MUTEX_INIT(&node->mutex, NULL);
  125. _starpu_graph_wrlock();
  126. /* It does not have any dependency yet, add to all lists */
  127. _starpu_graph_node_multilist_push_back_top(&top, node);
  128. _starpu_graph_node_multilist_push_back_bottom(&bottom, node);
  129. _starpu_graph_node_multilist_push_back_all(&all, node);
  130. _starpu_graph_wrunlock();
  131. }
  132. /* Add a node to an array of nodes */
  133. static unsigned add_node(struct _starpu_graph_node *node, struct _starpu_graph_node ***nodes, unsigned *n_nodes, unsigned *alloc_nodes, unsigned **slot)
  134. {
  135. unsigned ret;
  136. if (*n_nodes == *alloc_nodes)
  137. {
  138. if (*alloc_nodes)
  139. *alloc_nodes *= 2;
  140. else
  141. *alloc_nodes = 4;
  142. _STARPU_REALLOC(*nodes, *alloc_nodes * sizeof(**nodes));
  143. if (slot)
  144. {
  145. _STARPU_REALLOC(*slot, *alloc_nodes * sizeof(**slot));
  146. }
  147. }
  148. ret = (*n_nodes)++;
  149. (*nodes)[ret] = node;
  150. return ret;
  151. }
  152. /* Add a dependency between nodes */
  153. void _starpu_graph_add_job_dep(struct _starpu_job *job, struct _starpu_job *prev_job)
  154. {
  155. unsigned rank_incoming, rank_outgoing;
  156. _starpu_graph_wrlock();
  157. struct _starpu_graph_node *node = job->graph_node;
  158. struct _starpu_graph_node *prev_node = prev_job->graph_node;
  159. if (!node || !prev_node)
  160. {
  161. /* Already gone */
  162. _starpu_graph_wrunlock();
  163. return;
  164. }
  165. if (_starpu_graph_node_multilist_queued_bottom(prev_node))
  166. /* Previous node is not at bottom any more */
  167. _starpu_graph_node_multilist_erase_bottom(&bottom, prev_node);
  168. if (_starpu_graph_node_multilist_queued_top(node))
  169. /* Next node is not at top any more */
  170. _starpu_graph_node_multilist_erase_top(&top, node);
  171. rank_incoming = add_node(prev_node, &node->incoming, &node->n_incoming, &node->alloc_incoming, &node->incoming_slot);
  172. rank_outgoing = add_node(node, &prev_node->outgoing, &prev_node->n_outgoing, &prev_node->alloc_outgoing, &prev_node->outgoing_slot);
  173. prev_node->outgoing_slot[rank_outgoing] = rank_incoming;
  174. node->incoming_slot[rank_incoming] = rank_outgoing;
  175. _starpu_graph_wrunlock();
  176. }
  177. /* Drop a node, and thus its dependencies */
  178. void _starpu_graph_drop_node(struct _starpu_graph_node *node)
  179. {
  180. unsigned i;
  181. STARPU_ASSERT(!node->job);
  182. if (_starpu_graph_node_multilist_queued_bottom(node))
  183. _starpu_graph_node_multilist_erase_bottom(&bottom, node);
  184. if (_starpu_graph_node_multilist_queued_top(node))
  185. _starpu_graph_node_multilist_erase_top(&top, node);
  186. if (_starpu_graph_node_multilist_queued_all(node))
  187. _starpu_graph_node_multilist_erase_all(&all, node);
  188. /* Drop ourself from the incoming part of the outgoing nodes. */
  189. for (i = 0; i < node->n_outgoing; i++)
  190. {
  191. struct _starpu_graph_node *next = node->outgoing[i];
  192. if (next)
  193. next->incoming[node->outgoing_slot[i]] = NULL;
  194. }
  195. /* Drop ourself from the outgoing part of the incoming nodes,
  196. * in case we happen to get dropped before it. */
  197. for (i = 0; i < node->n_incoming; i++)
  198. {
  199. struct _starpu_graph_node *prev = node->incoming[i];
  200. if (prev)
  201. prev->outgoing[node->incoming_slot[i]] = NULL;
  202. }
  203. node->n_outgoing = 0;
  204. free(node->outgoing);
  205. node->outgoing = NULL;
  206. free(node->outgoing_slot);
  207. node->outgoing_slot = NULL;
  208. node->alloc_outgoing = 0;
  209. node->n_incoming = 0;
  210. free(node->incoming);
  211. node->incoming = NULL;
  212. free(node->incoming_slot);
  213. node->incoming_slot = NULL;
  214. node->alloc_incoming = 0;
  215. free(node);
  216. }
  217. /* Drop a job */
  218. void _starpu_graph_drop_job(struct _starpu_job *job)
  219. {
  220. struct _starpu_graph_node *node = job->graph_node;
  221. job->graph_node = NULL;
  222. if (!node)
  223. return;
  224. starpu_worker_relax_on();
  225. STARPU_PTHREAD_MUTEX_LOCK(&node->mutex);
  226. starpu_worker_relax_off();
  227. /* Will not be able to use the job any more */
  228. node->job = NULL;
  229. STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex);
  230. starpu_worker_relax_on();
  231. STARPU_PTHREAD_MUTEX_LOCK(&dropped_lock);
  232. starpu_worker_relax_off();
  233. /* Queue for removal when lock becomes available */
  234. _starpu_graph_node_multilist_push_back_dropped(&dropped, node);
  235. if (STARPU_PTHREAD_RWLOCK_TRYWRLOCK(&graph_lock) == 0)
  236. {
  237. /* Graph wrlock is available, drop nodes immediately */
  238. _starpu_graph_drop_dropped_nodes();
  239. }
  240. else
  241. STARPU_PTHREAD_MUTEX_UNLOCK(&dropped_lock);
  242. }
  243. static void _starpu_graph_set_n(void *data, struct _starpu_graph_node *node)
  244. {
  245. int value = (intptr_t) data;
  246. node->graph_n = value;
  247. }
  248. /* Call func for each vertex of the task graph, from bottom to top, in topological order */
  249. static void _starpu_graph_compute_bottom_up(void (*func)(struct _starpu_graph_node *next_node, struct _starpu_graph_node *prev_node, void *data), void *data)
  250. {
  251. struct _starpu_graph_node *node, *node2;
  252. struct _starpu_graph_node **current_set = NULL, **next_set = NULL, **swap_set;
  253. unsigned current_n, next_n, i, j;
  254. unsigned current_alloc = 0, next_alloc = 0, swap_alloc;
  255. /* Classical flow algorithm: start from bottom, and propagate depths to top */
  256. /* Set number of processed outgoing edges to 0 for each node */
  257. __starpu_graph_foreach(_starpu_graph_set_n, (void*) 0);
  258. /* Start with the bottom of the graph */
  259. current_n = 0;
  260. for (node = _starpu_graph_node_multilist_begin_bottom(&bottom);
  261. node != _starpu_graph_node_multilist_end_bottom(&bottom);
  262. node = _starpu_graph_node_multilist_next_bottom(node))
  263. add_node(node, &current_set, &current_n, &current_alloc, NULL);
  264. /* Now propagate to top as long as we have current nodes */
  265. while (current_n)
  266. {
  267. /* Next set is initially empty */
  268. next_n = 0;
  269. /* For each node in the current set */
  270. for (i = 0; i < current_n; i++)
  271. {
  272. node = current_set[i];
  273. /* For each parent of this node */
  274. for (j = 0; j < node->n_incoming; j++)
  275. {
  276. node2 = node->incoming[j];
  277. if (!node2)
  278. continue;
  279. node2->graph_n++;
  280. func(node, node2, data);
  281. if ((unsigned) node2->graph_n == node2->n_outgoing)
  282. /* All outgoing edges were processed, can now add to next set */
  283. add_node(node2, &next_set, &next_n, &next_alloc, NULL);
  284. }
  285. }
  286. /* Swap next set with current set */
  287. swap_set = next_set;
  288. swap_alloc = next_alloc;
  289. next_set = current_set;
  290. next_alloc = current_alloc;
  291. current_set = swap_set;
  292. current_alloc = swap_alloc;
  293. current_n = next_n;
  294. }
  295. free(current_set);
  296. free(next_set);
  297. }
  298. static void compute_depth(struct _starpu_graph_node *next_node, struct _starpu_graph_node *prev_node, void *data)
  299. {
  300. (void)data;
  301. if (prev_node->depth < next_node->depth + 1)
  302. prev_node->depth = next_node->depth + 1;
  303. }
  304. void _starpu_graph_compute_depths(void)
  305. {
  306. struct _starpu_graph_node *node;
  307. _starpu_graph_wrlock();
  308. /* The bottom of the graph has depth 0 */
  309. for (node = _starpu_graph_node_multilist_begin_bottom(&bottom);
  310. node != _starpu_graph_node_multilist_end_bottom(&bottom);
  311. node = _starpu_graph_node_multilist_next_bottom(node))
  312. node->depth = 0;
  313. _starpu_graph_compute_bottom_up(compute_depth, NULL);
  314. _starpu_graph_wrunlock();
  315. }
  316. void _starpu_graph_compute_descendants(void)
  317. {
  318. struct _starpu_graph_node *node, *node2, *node3;
  319. struct _starpu_graph_node **current_set = NULL, **next_set = NULL, **swap_set;
  320. unsigned current_n, next_n, i, j;
  321. unsigned current_alloc = 0, next_alloc = 0, swap_alloc;
  322. _starpu_graph_wrlock();
  323. /* Yes, this is O(|V|.(|V|+|E|)) :( */
  324. /* We could get O(|V|.|E|) by doing a topological sort first.
  325. *
  326. * |E| is usually O(|V|), though (bounded number of data dependencies,
  327. * and we use synchronization tasks) */
  328. for (node = _starpu_graph_node_multilist_begin_all(&all);
  329. node != _starpu_graph_node_multilist_end_all(&all);
  330. node = _starpu_graph_node_multilist_next_all(node))
  331. {
  332. unsigned descendants;
  333. /* Mark all nodes as unseen */
  334. for (node2 = _starpu_graph_node_multilist_begin_all(&all);
  335. node2 != _starpu_graph_node_multilist_end_all(&all);
  336. node2 = _starpu_graph_node_multilist_next_all(node2))
  337. node2->graph_n = 0;
  338. /* Start with the node we want to compute the number of descendants of */
  339. current_n = 0;
  340. add_node(node, &current_set, &current_n, &current_alloc, NULL);
  341. node->graph_n = 1;
  342. descendants = 0;
  343. /* While we have descendants, count their descendants */
  344. while (current_n)
  345. {
  346. /* Next set is initially empty */
  347. next_n = 0;
  348. /* For each node in the current set */
  349. for (i = 0; i < current_n; i++)
  350. {
  351. node2 = current_set[i];
  352. /* For each child of this node2 */
  353. for (j = 0; j < node2->n_outgoing; j++)
  354. {
  355. node3 = node2->outgoing[j];
  356. if (!node3)
  357. continue;
  358. if (node3->graph_n)
  359. /* Already seen */
  360. continue;
  361. /* Add this node */
  362. node3->graph_n = 1;
  363. descendants++;
  364. add_node(node3, &next_set, &next_n, &next_alloc, NULL);
  365. }
  366. }
  367. /* Swap next set with current set */
  368. swap_set = next_set;
  369. swap_alloc = next_alloc;
  370. next_set = current_set;
  371. next_alloc = current_alloc;
  372. current_set = swap_set;
  373. current_alloc = swap_alloc;
  374. current_n = next_n;
  375. }
  376. node->descendants = descendants;
  377. }
  378. _starpu_graph_wrunlock();
  379. free(current_set);
  380. free(next_set);
  381. }
  382. void _starpu_graph_foreach(void (*func)(void *data, struct _starpu_graph_node *node), void *data)
  383. {
  384. _starpu_graph_wrlock();
  385. __starpu_graph_foreach(func, data);
  386. _starpu_graph_wrunlock();
  387. }