stencil-tasks.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. /*
  2. * StarPU
  3. * Copyright (C) Université Bordeaux 1, CNRS 2009-2010 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include "stencil.h"
  17. #define BIND_LAST 1
  18. /*
  19. * Schedule tasks for updates and saves
  20. */
  21. /*
  22. * NB: iter = 0: initialization phase, TAG_U(z, 0) = TAG_INIT
  23. *
  24. * dir is -1 or +1.
  25. */
  26. #if 0
  27. # define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__)
  28. #else
  29. # define DEBUG(fmt, ...)
  30. #endif
  31. /*
  32. * SAVE
  33. */
  34. /* R(z) = R(z+d) = local, just call the save kernel */
  35. static void create_task_save_local(unsigned iter, unsigned z, int dir, unsigned local_rank)
  36. {
  37. struct starpu_task *save_task = starpu_task_create();
  38. struct block_description *descr = get_block_description(z);
  39. save_task->cl = (dir == -1)?&save_cl_bottom:&save_cl_top;
  40. save_task->cl_arg = descr;
  41. /* Saving our border... */
  42. save_task->buffers[0].handle = descr->layers_handle[0];
  43. save_task->buffers[0].mode = STARPU_R;
  44. save_task->buffers[1].handle = descr->layers_handle[1];
  45. save_task->buffers[1].mode = STARPU_R;
  46. /* ... to the neighbour's copy */
  47. struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2];
  48. save_task->buffers[2].handle = neighbour->boundaries_handle[(1-dir)/2][0];
  49. save_task->buffers[2].mode = STARPU_W;
  50. save_task->buffers[3].handle = neighbour->boundaries_handle[(1-dir)/2][1];
  51. save_task->buffers[3].mode = STARPU_W;
  52. /* Bind */
  53. if (iter <= BIND_LAST)
  54. save_task->execute_on_a_specific_worker = get_bind_tasks();
  55. save_task->workerid = descr->preferred_worker;
  56. int ret = starpu_task_submit(save_task);
  57. if (ret)
  58. {
  59. fprintf(stderr, "Could not submit task save: %d\n", ret);
  60. STARPU_ASSERT(0);
  61. }
  62. }
  63. /* R(z) = local & R(z+d) != local */
  64. /* We need to send our save over MPI */
  65. static void send_done(void *arg) {
  66. uintptr_t z = (uintptr_t) arg;
  67. DEBUG("DO SEND %d\n", (int)z);
  68. }
  69. #ifdef STARPU_USE_MPI
  70. /* Post MPI send */
  71. static void create_task_save_mpi_send(unsigned iter, unsigned z, int dir, unsigned local_rank)
  72. {
  73. struct block_description *descr = get_block_description(z);
  74. STARPU_ASSERT(descr->mpi_node == local_rank);
  75. struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2];
  76. int dest = neighbour->mpi_node;
  77. STARPU_ASSERT(neighbour->mpi_node != local_rank);
  78. /* Send neighbour's border copy to the neighbour */
  79. starpu_data_handle handle0 = neighbour->boundaries_handle[(1-dir)/2][0];
  80. starpu_data_handle handle1 = neighbour->boundaries_handle[(1-dir)/2][1];
  81. starpu_mpi_isend_detached(handle0, dest, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, send_done, (void*)(uintptr_t)z);
  82. starpu_mpi_isend_detached(handle1, dest, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, send_done, (void*)(uintptr_t)z);
  83. }
  84. /* R(z) != local & R(z+d) = local */
  85. /* We need to receive over MPI */
  86. static void recv_done(void *arg) {
  87. uintptr_t z = (uintptr_t) arg;
  88. DEBUG("DO RECV %d\n", (int)z);
  89. }
  90. /* Post MPI recv */
  91. static void create_task_save_mpi_recv(unsigned iter, unsigned z, int dir, unsigned local_rank)
  92. {
  93. struct block_description *descr = get_block_description(z);
  94. STARPU_ASSERT(descr->mpi_node != local_rank);
  95. struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2];
  96. int source = descr->mpi_node;
  97. STARPU_ASSERT(neighbour->mpi_node == local_rank);
  98. /* Receive our neighbour's border in our neighbour copy */
  99. starpu_data_handle handle0 = neighbour->boundaries_handle[(1-dir)/2][0];
  100. starpu_data_handle handle1 = neighbour->boundaries_handle[(1-dir)/2][1];
  101. starpu_mpi_irecv_detached(handle0, source, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z);
  102. starpu_mpi_irecv_detached(handle1, source, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z);
  103. }
  104. #endif // STARPU_USE_MPI
  105. /*
  106. * Schedule saving boundaries of blocks to communication buffers
  107. */
  108. void create_task_save(unsigned iter, unsigned z, int dir, unsigned local_rank)
  109. {
  110. unsigned node_z = get_block_mpi_node(z);
  111. unsigned node_z_and_d = get_block_mpi_node(z+dir);
  112. #ifdef STARPU_USE_MPI
  113. if (node_z == local_rank)
  114. {
  115. /* Save data from update */
  116. create_task_save_local(iter, z, dir, local_rank);
  117. if (node_z_and_d != local_rank)
  118. { // R(z) = local & R(z+d) != local, We have to send the data
  119. create_task_save_mpi_send(iter, z, dir, local_rank);
  120. }
  121. }
  122. else { // node_z != local_rank, this MPI node doesn't have the saved data
  123. if (node_z_and_d == local_rank)
  124. {
  125. create_task_save_mpi_recv(iter, z, dir, local_rank);
  126. }
  127. else { // R(z) != local & R(z+d) != local We don't have
  128. // the saved data and don't need it, we shouldn't
  129. // even have been called!
  130. STARPU_ASSERT(0);
  131. }
  132. }
  133. #else // !STARPU_USE_MPI
  134. STARPU_ASSERT((node_z == local_rank) && (node_z_and_d == local_rank));
  135. create_task_save_local(iter, z, dir, local_rank);
  136. #endif // STARPU_USE_MPI
  137. }
  138. /*
  139. * Schedule update computation in computation buffer
  140. */
  141. void create_task_update(unsigned iter, unsigned z, unsigned local_rank)
  142. {
  143. STARPU_ASSERT(iter != 0);
  144. struct starpu_task *task = starpu_task_create();
  145. unsigned niter = get_niter();
  146. /* We are going to synchronize with the last tasks */
  147. if (iter == niter) {
  148. task->detach = 0;
  149. task->use_tag = 1;
  150. task->tag_id = TAG_FINISH(z);
  151. }
  152. unsigned old_layer = (K*(iter-1)) % 2;
  153. unsigned new_layer = (old_layer + 1) % 2;
  154. struct block_description *descr = get_block_description(z);
  155. task->buffers[0].handle = descr->layers_handle[new_layer];
  156. task->buffers[0].mode = STARPU_RW;
  157. task->buffers[1].handle = descr->layers_handle[old_layer];
  158. task->buffers[1].mode = STARPU_RW;
  159. task->buffers[2].handle = descr->boundaries_handle[T][new_layer];
  160. task->buffers[2].mode = STARPU_R;
  161. task->buffers[3].handle = descr->boundaries_handle[T][old_layer];
  162. task->buffers[3].mode = STARPU_R;
  163. task->buffers[4].handle = descr->boundaries_handle[B][new_layer];
  164. task->buffers[4].mode = STARPU_R;
  165. task->buffers[5].handle = descr->boundaries_handle[B][old_layer];
  166. task->buffers[5].mode = STARPU_R;
  167. task->cl = &cl_update;
  168. task->cl_arg = descr;
  169. if (iter <= BIND_LAST)
  170. task->execute_on_a_specific_worker = get_bind_tasks();
  171. task->workerid = descr->preferred_worker;
  172. int ret = starpu_task_submit(task);
  173. if (ret)
  174. {
  175. fprintf(stderr, "Could not submit task update block: %d\n", ret);
  176. STARPU_ASSERT(0);
  177. }
  178. }
  179. /* Dummy empty codelet taking one buffer */
  180. static void null_func(void *descr[] __attribute__((unused)), void *arg __attribute__((unused))) { }
  181. static starpu_codelet null = {
  182. .where = STARPU_CPU|STARPU_CUDA,
  183. .cpu_func = null_func,
  184. .cuda_func = null_func,
  185. .nbuffers = 2
  186. };
  187. void create_start_task(int z, int dir) {
  188. /* Dumb task depending on the init task and simulating writing the
  189. neighbour buffers, to avoid communications and computation running
  190. before we start measuring time */
  191. struct starpu_task *wait_init = starpu_task_create();
  192. struct block_description *descr = get_block_description(z);
  193. starpu_tag_t tag_init = TAG_INIT_TASK;
  194. wait_init->cl = &null;
  195. wait_init->use_tag = 1;
  196. wait_init->tag_id = TAG_START(z, dir);
  197. wait_init->buffers[0].handle = descr->boundaries_handle[(1+dir)/2][0];
  198. wait_init->buffers[0].mode = STARPU_W;
  199. wait_init->buffers[1].handle = descr->boundaries_handle[(1+dir)/2][1];
  200. wait_init->buffers[1].mode = STARPU_W;
  201. starpu_tag_declare_deps_array(wait_init->tag_id, 1, &tag_init);
  202. int ret = starpu_task_submit(wait_init);
  203. if (ret)
  204. {
  205. fprintf(stderr, "Could not submit task initial wait: %d\n", ret);
  206. STARPU_ASSERT(0);
  207. }
  208. }
  209. /*
  210. * Create all the tasks
  211. */
  212. void create_tasks(int rank)
  213. {
  214. unsigned iter;
  215. unsigned bz;
  216. int niter = get_niter();
  217. int nbz = get_nbz();
  218. for (bz = 0; bz < nbz; bz++) {
  219. if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
  220. create_start_task(bz, +1);
  221. if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank))
  222. create_start_task(bz, -1);
  223. }
  224. for (iter = 0; iter <= niter; iter++)
  225. for (bz = 0; bz < nbz; bz++)
  226. {
  227. if ((iter > 0) && (get_block_mpi_node(bz) == rank))
  228. create_task_update(iter, bz, rank);
  229. if (iter != niter)
  230. {
  231. if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
  232. create_task_save(iter, bz, +1, rank);
  233. if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank))
  234. create_task_save(iter, bz, -1, rank);
  235. }
  236. }
  237. }
  238. /*
  239. * Wait for termination
  240. */
  241. void wait_end_tasks(int rank)
  242. {
  243. unsigned bz;
  244. int nbz = get_nbz();
  245. for (bz = 0; bz < nbz; bz++)
  246. {
  247. if (get_block_mpi_node(bz) == rank)
  248. {
  249. /* Wait for the task producing block "bz" */
  250. starpu_tag_wait(TAG_FINISH(bz));
  251. /* Get the result back to memory */
  252. struct block_description *block = get_block_description(bz);
  253. starpu_data_acquire(block->layers_handle[0], STARPU_R);
  254. starpu_data_acquire(block->layers_handle[1], STARPU_R);
  255. }
  256. }
  257. }