stencil-tasks.c 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010, 2013-2014 Université de Bordeaux
  4. * Copyright (C) 2012, 2013, 2015 CNRS
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include "stencil.h"
  18. #define BIND_LAST 1
  19. /*
  20. * Schedule tasks for updates and saves
  21. */
  22. /*
  23. * NB: iter = 0: initialization phase, TAG_U(z, 0) = TAG_INIT
  24. *
  25. * dir is -1 or +1.
  26. */
  27. #if 0
  28. # define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__)
  29. #else
  30. # define DEBUG(fmt, ...)
  31. #endif
  32. /*
  33. * SAVE
  34. */
  35. /* R(z) = R(z+d) = local, just call the save kernel */
  36. static void create_task_save_local(unsigned iter, unsigned z, int dir, int local_rank)
  37. {
  38. struct starpu_task *save_task = starpu_task_create();
  39. struct block_description *descr = get_block_description(z);
  40. save_task->cl = (dir == -1)?&save_cl_bottom:&save_cl_top;
  41. save_task->cl_arg = descr;
  42. /* Saving our border... */
  43. save_task->handles[0] = descr->layers_handle[0];
  44. save_task->handles[1] = descr->layers_handle[1];
  45. /* ... to the neighbour's copy */
  46. struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2];
  47. save_task->handles[2] = neighbour->boundaries_handle[(1-dir)/2][0];
  48. save_task->handles[3] = neighbour->boundaries_handle[(1-dir)/2][1];
  49. /* Bind */
  50. if (iter <= BIND_LAST)
  51. save_task->execute_on_a_specific_worker = get_bind_tasks();
  52. save_task->workerid = descr->preferred_worker;
  53. int ret = starpu_task_submit(save_task);
  54. if (ret)
  55. {
  56. FPRINTF(stderr, "Could not submit task save: %d\n", ret);
  57. STARPU_ABORT();
  58. }
  59. }
  60. /* R(z) = local & R(z+d) != local */
  61. /* We need to send our save over MPI */
  62. static void send_done(void *arg)
  63. {
  64. uintptr_t z = (uintptr_t) arg;
  65. DEBUG("DO SEND %d\n", (int)z);
  66. }
  67. #ifdef STARPU_USE_MPI
  68. /* Post MPI send */
  69. static void create_task_save_mpi_send(unsigned iter, unsigned z, int dir, int local_rank)
  70. {
  71. struct block_description *descr = get_block_description(z);
  72. STARPU_ASSERT(descr->mpi_node == local_rank);
  73. struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2];
  74. int dest = neighbour->mpi_node;
  75. STARPU_ASSERT(neighbour->mpi_node != local_rank);
  76. /* Send neighbour's border copy to the neighbour */
  77. starpu_data_handle_t handle0 = neighbour->boundaries_handle[(1-dir)/2][0];
  78. starpu_data_handle_t handle1 = neighbour->boundaries_handle[(1-dir)/2][1];
  79. starpu_mpi_isend_detached(handle0, dest, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, send_done, (void*)(uintptr_t)z);
  80. starpu_mpi_isend_detached(handle1, dest, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, send_done, (void*)(uintptr_t)z);
  81. }
  82. /* R(z) != local & R(z+d) = local */
  83. /* We need to receive over MPI */
  84. static void recv_done(void *arg)
  85. {
  86. uintptr_t z = (uintptr_t) arg;
  87. DEBUG("DO RECV %d\n", (int)z);
  88. }
  89. /* Post MPI recv */
  90. static void create_task_save_mpi_recv(unsigned iter, unsigned z, int dir, int local_rank)
  91. {
  92. struct block_description *descr = get_block_description(z);
  93. STARPU_ASSERT(descr->mpi_node != local_rank);
  94. struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2];
  95. int source = descr->mpi_node;
  96. STARPU_ASSERT(neighbour->mpi_node == local_rank);
  97. /* Receive our neighbour's border in our neighbour copy */
  98. starpu_data_handle_t handle0 = neighbour->boundaries_handle[(1-dir)/2][0];
  99. starpu_data_handle_t handle1 = neighbour->boundaries_handle[(1-dir)/2][1];
  100. starpu_mpi_irecv_detached(handle0, source, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z);
  101. starpu_mpi_irecv_detached(handle1, source, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z);
  102. }
  103. #endif /* STARPU_USE_MPI */
  104. /*
  105. * Schedule saving boundaries of blocks to communication buffers
  106. */
  107. void create_task_save(unsigned iter, unsigned z, int dir, int local_rank)
  108. {
  109. int node_z = get_block_mpi_node(z);
  110. int node_z_and_d = get_block_mpi_node(z+dir);
  111. #ifdef STARPU_USE_MPI
  112. if (node_z == local_rank)
  113. {
  114. /* Save data from update */
  115. create_task_save_local(iter, z, dir, local_rank);
  116. if (node_z_and_d != local_rank)
  117. {
  118. /* R(z) = local & R(z+d) != local, We have to send the data */
  119. create_task_save_mpi_send(iter, z, dir, local_rank);
  120. }
  121. }
  122. else
  123. { /* node_z != local_rank, this MPI node doesn't have the saved data */
  124. if (node_z_and_d == local_rank)
  125. {
  126. create_task_save_mpi_recv(iter, z, dir, local_rank);
  127. }
  128. else
  129. {
  130. /* R(z) != local & R(z+d) != local We don't have
  131. the saved data and don't need it, we shouldn't
  132. even have been called! */
  133. STARPU_ABORT();
  134. }
  135. }
  136. #else /* !STARPU_USE_MPI */
  137. STARPU_ASSERT((node_z == local_rank) && (node_z_and_d == local_rank));
  138. create_task_save_local(iter, z, dir, local_rank);
  139. #endif /* STARPU_USE_MPI */
  140. }
  141. /*
  142. * Schedule update computation in computation buffer
  143. */
  144. void create_task_update(unsigned iter, unsigned z, int local_rank)
  145. {
  146. STARPU_ASSERT(iter != 0);
  147. struct starpu_task *task = starpu_task_create();
  148. unsigned niter = get_niter();
  149. /* We are going to synchronize with the last tasks */
  150. if (iter == niter)
  151. {
  152. task->use_tag = 1;
  153. task->tag_id = TAG_FINISH(z);
  154. }
  155. unsigned old_layer = (K*(iter-1)) % 2;
  156. unsigned new_layer = (old_layer + 1) % 2;
  157. struct block_description *descr = get_block_description(z);
  158. task->handles[0] = descr->layers_handle[new_layer];
  159. task->handles[1] = descr->layers_handle[old_layer];
  160. task->handles[2] = descr->boundaries_handle[T][new_layer];
  161. task->handles[3] = descr->boundaries_handle[T][old_layer];
  162. task->handles[4] = descr->boundaries_handle[B][new_layer];
  163. task->handles[5] = descr->boundaries_handle[B][old_layer];
  164. task->cl = &cl_update;
  165. task->cl_arg = descr;
  166. if (iter <= BIND_LAST)
  167. task->execute_on_a_specific_worker = get_bind_tasks();
  168. task->workerid = descr->preferred_worker;
  169. int ret = starpu_task_submit(task);
  170. if (ret)
  171. {
  172. FPRINTF(stderr, "Could not submit task update block: %d\n", ret);
  173. STARPU_ABORT();
  174. }
  175. }
  176. /* Dummy empty codelet taking one buffer */
  177. void null_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
  178. {
  179. }
  180. static double null_cost_function(struct starpu_task *task, unsigned nimpl)
  181. {
  182. (void) task;
  183. (void) nimpl;
  184. return 0.000001;
  185. }
  186. static struct starpu_perfmodel null_model =
  187. {
  188. .type = STARPU_COMMON,
  189. .cost_function = null_cost_function,
  190. .symbol = "null"
  191. };
  192. static struct starpu_codelet null =
  193. {
  194. .modes = { STARPU_W, STARPU_W },
  195. .cpu_funcs = {null_func},
  196. .cpu_funcs_name = {"null_func"},
  197. .cuda_funcs = {null_func},
  198. .opencl_funcs = {null_func},
  199. .nbuffers = 2,
  200. .model = &null_model,
  201. .name = "start"
  202. };
  203. void create_start_task(int z, int dir)
  204. {
  205. /* Dumb task depending on the init task and simulating writing the
  206. neighbour buffers, to avoid communications and computation running
  207. before we start measuring time */
  208. struct starpu_task *wait_init = starpu_task_create();
  209. struct block_description *descr = get_block_description(z);
  210. starpu_tag_t tag_init = TAG_INIT_TASK;
  211. wait_init->cl = &null;
  212. wait_init->use_tag = 1;
  213. wait_init->tag_id = TAG_START(z, dir);
  214. wait_init->handles[0] = descr->boundaries_handle[(1 + dir) / 2][0];
  215. wait_init->handles[1] = descr->boundaries_handle[(1 + dir) / 2][1];
  216. starpu_tag_declare_deps_array(wait_init->tag_id, 1, &tag_init);
  217. int ret = starpu_task_submit(wait_init);
  218. if (ret)
  219. {
  220. FPRINTF(stderr, "Could not submit task initial wait: %d\n", ret);
  221. STARPU_ABORT();
  222. }
  223. }
  224. /*
  225. * Create all the tasks
  226. */
  227. void create_tasks(int rank)
  228. {
  229. int iter;
  230. int bz;
  231. int niter = get_niter();
  232. int nbz = get_nbz();
  233. for (bz = 0; bz < nbz; bz++)
  234. {
  235. if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
  236. create_start_task(bz, +1);
  237. if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank))
  238. create_start_task(bz, -1);
  239. }
  240. for (iter = 0; iter <= niter; iter++)
  241. {
  242. for (bz = 0; bz < nbz; bz++)
  243. {
  244. if ((iter > 0) && (get_block_mpi_node(bz) == rank))
  245. create_task_update(iter, bz, rank);
  246. }
  247. for (bz = 0; bz < nbz; bz++)
  248. {
  249. if (iter != niter)
  250. {
  251. if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
  252. create_task_save(iter, bz, +1, rank);
  253. if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank))
  254. create_task_save(iter, bz, -1, rank);
  255. }
  256. }
  257. }
  258. }
  259. /*
  260. * Wait for termination
  261. */
  262. void wait_end_tasks(int rank)
  263. {
  264. int bz;
  265. int nbz = get_nbz();
  266. for (bz = 0; bz < nbz; bz++)
  267. {
  268. if (get_block_mpi_node(bz) == rank)
  269. {
  270. /* Wait for the task producing block "bz" */
  271. starpu_tag_wait(TAG_FINISH(bz));
  272. /* Get the result back to memory */
  273. struct block_description *block = get_block_description(bz);
  274. starpu_data_acquire(block->layers_handle[0], STARPU_R);
  275. starpu_data_acquire(block->layers_handle[1], STARPU_R);
  276. /* the data_acquire here is done to make sure
  277. * the data is sent back to the ram memory, we
  278. * can safely do a data_release, to avoid the
  279. * data_unregister to block later on
  280. */
  281. starpu_data_release(block->layers_handle[0]);
  282. starpu_data_release(block->layers_handle[1]);
  283. }
  284. }
  285. }