stencil-tasks.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2012,2017 Inria
  4. * Copyright (C) 2011-2013,2015,2017 CNRS
  5. * Copyright (C) 2010,2011,2013-2015,2017 Université de Bordeaux
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include "stencil.h"
  19. #define BIND_LAST 1
  20. /*
  21. * Schedule tasks for updates and saves
  22. */
  23. /*
  24. * NB: iter = 0: initialization phase, TAG_U(z, 0) = TAG_INIT
  25. *
  26. * dir is -1 or +1.
  27. */
  28. #if 0
  29. # define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__)
  30. #else
  31. # define DEBUG(fmt, ...)
  32. #endif
  33. /*
  34. * SAVE
  35. */
  36. /* R(z) = R(z+d) = local, just call the save kernel */
  37. static void create_task_save_local(unsigned iter, unsigned z, int dir)
  38. {
  39. struct starpu_task *save_task = starpu_task_create();
  40. struct block_description *descr = get_block_description(z);
  41. save_task->cl = (dir == -1)?&save_cl_bottom:&save_cl_top;
  42. save_task->cl_arg = descr;
  43. /* Saving our border... */
  44. save_task->handles[0] = descr->layers_handle[0];
  45. save_task->handles[1] = descr->layers_handle[1];
  46. /* ... to the neighbour's copy */
  47. struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2];
  48. save_task->handles[2] = neighbour->boundaries_handle[(1-dir)/2][0];
  49. save_task->handles[3] = neighbour->boundaries_handle[(1-dir)/2][1];
  50. /* Bind */
  51. if (iter <= BIND_LAST)
  52. save_task->execute_on_a_specific_worker = get_bind_tasks();
  53. save_task->workerid = descr->preferred_worker;
  54. int ret = starpu_task_submit(save_task);
  55. if (ret)
  56. {
  57. FPRINTF(stderr, "Could not submit task save: %d\n", ret);
  58. if (ret == -ENODEV)
  59. exit(77);
  60. STARPU_ABORT();
  61. }
  62. }
  63. /* R(z) = local & R(z+d) != local */
  64. /* We need to send our save over MPI */
  65. static void send_done(void *arg)
  66. {
  67. uintptr_t z = (uintptr_t) arg;
  68. (void) z;
  69. DEBUG("DO SEND %d\n", (int)z);
  70. }
  71. #if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE)
  72. /* Post MPI send */
  73. static void create_task_save_mpi_send(unsigned iter, unsigned z, int dir, int local_rank)
  74. {
  75. struct block_description *descr = get_block_description(z);
  76. STARPU_ASSERT(descr->mpi_node == local_rank);
  77. struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2];
  78. int dest = neighbour->mpi_node;
  79. STARPU_ASSERT(neighbour->mpi_node != local_rank);
  80. /* Send neighbour's border copy to the neighbour */
  81. starpu_data_handle_t handle0 = neighbour->boundaries_handle[(1-dir)/2][0];
  82. starpu_data_handle_t handle1 = neighbour->boundaries_handle[(1-dir)/2][1];
  83. starpu_mpi_isend_detached(handle0, dest, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, send_done, (void*)(uintptr_t)z);
  84. starpu_mpi_isend_detached(handle1, dest, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, send_done, (void*)(uintptr_t)z);
  85. }
  86. /* R(z) != local & R(z+d) = local */
  87. /* We need to receive over MPI */
  88. static void recv_done(void *arg)
  89. {
  90. uintptr_t z = (uintptr_t) arg;
  91. (void) z;
  92. DEBUG("DO RECV %d\n", (int)z);
  93. }
  94. /* Post MPI recv */
  95. static void create_task_save_mpi_recv(unsigned iter, unsigned z, int dir, int local_rank)
  96. {
  97. struct block_description *descr = get_block_description(z);
  98. STARPU_ASSERT(descr->mpi_node != local_rank);
  99. struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2];
  100. int source = descr->mpi_node;
  101. STARPU_ASSERT(neighbour->mpi_node == local_rank);
  102. /* Receive our neighbour's border in our neighbour copy */
  103. starpu_data_handle_t handle0 = neighbour->boundaries_handle[(1-dir)/2][0];
  104. starpu_data_handle_t handle1 = neighbour->boundaries_handle[(1-dir)/2][1];
  105. starpu_mpi_irecv_detached(handle0, source, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z);
  106. starpu_mpi_irecv_detached(handle1, source, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z);
  107. }
  108. #endif /* STARPU_USE_MPI */
  109. /*
  110. * Schedule saving boundaries of blocks to communication buffers
  111. */
  112. void create_task_save(unsigned iter, unsigned z, int dir, int local_rank)
  113. {
  114. int node_z = get_block_mpi_node(z);
  115. int node_z_and_d = get_block_mpi_node(z+dir);
  116. #if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE)
  117. if (node_z == local_rank)
  118. {
  119. /* Save data from update */
  120. create_task_save_local(iter, z, dir);
  121. if (node_z_and_d != local_rank)
  122. {
  123. /* R(z) = local & R(z+d) != local, We have to send the data */
  124. create_task_save_mpi_send(iter, z, dir, local_rank);
  125. }
  126. }
  127. else
  128. {
  129. /* node_z != local_rank, this MPI node doesn't have the saved data */
  130. if (node_z_and_d == local_rank)
  131. {
  132. create_task_save_mpi_recv(iter, z, dir, local_rank);
  133. }
  134. else
  135. {
  136. /* R(z) != local & R(z+d) != local We don't have
  137. the saved data and don't need it, we shouldn't
  138. even have been called! */
  139. STARPU_ABORT();
  140. }
  141. }
  142. #else /* !STARPU_USE_MPI */
  143. STARPU_ASSERT((node_z == local_rank) && (node_z_and_d == local_rank));
  144. create_task_save_local(iter, z, dir);
  145. #endif /* STARPU_USE_MPI */
  146. }
  147. /*
  148. * Schedule update computation in computation buffer
  149. */
  150. void create_task_update(unsigned iter, unsigned z, int local_rank)
  151. {
  152. (void)local_rank; // unneeded parameter, we keep it to have a similar function prototype to the implicit case
  153. STARPU_ASSERT(iter != 0);
  154. struct starpu_task *task = starpu_task_create();
  155. unsigned niter = get_niter();
  156. /* We are going to synchronize with the last tasks */
  157. if (iter == niter)
  158. {
  159. task->use_tag = 1;
  160. task->tag_id = TAG_FINISH(z);
  161. }
  162. unsigned old_layer = (K*(iter-1)) % 2;
  163. unsigned new_layer = (old_layer + 1) % 2;
  164. struct block_description *descr = get_block_description(z);
  165. task->handles[0] = descr->layers_handle[new_layer];
  166. task->handles[1] = descr->layers_handle[old_layer];
  167. task->handles[2] = descr->boundaries_handle[T][new_layer];
  168. task->handles[3] = descr->boundaries_handle[T][old_layer];
  169. task->handles[4] = descr->boundaries_handle[B][new_layer];
  170. task->handles[5] = descr->boundaries_handle[B][old_layer];
  171. task->cl = &cl_update;
  172. task->cl_arg = descr;
  173. if (iter <= BIND_LAST)
  174. task->execute_on_a_specific_worker = get_bind_tasks();
  175. task->workerid = descr->preferred_worker;
  176. int ret = starpu_task_submit(task);
  177. if (ret)
  178. {
  179. FPRINTF(stderr, "Could not submit task update block: %d\n", ret);
  180. if (ret == -ENODEV)
  181. exit(77);
  182. STARPU_ABORT();
  183. }
  184. }
  185. /* Dummy empty codelet taking one buffer */
  186. void null_func(void *descr[], void *arg)
  187. {
  188. (void)descr;
  189. (void)arg;
  190. }
  191. static double null_cost_function(struct starpu_task *task, unsigned nimpl)
  192. {
  193. (void) task;
  194. (void) nimpl;
  195. return 0.000001;
  196. }
  197. static struct starpu_perfmodel null_model =
  198. {
  199. .type = STARPU_COMMON,
  200. .cost_function = null_cost_function,
  201. .symbol = "null"
  202. };
  203. static struct starpu_codelet null =
  204. {
  205. .modes = { STARPU_W, STARPU_W },
  206. .cpu_funcs = {null_func},
  207. .cpu_funcs_name = {"null_func"},
  208. .cuda_funcs = {null_func},
  209. .opencl_funcs = {null_func},
  210. .nbuffers = 2,
  211. .model = &null_model,
  212. .name = "start"
  213. };
  214. void create_start_task(int z, int dir)
  215. {
  216. /* Dumb task depending on the init task and simulating writing the
  217. neighbour buffers, to avoid communications and computation running
  218. before we start measuring time */
  219. struct starpu_task *wait_init = starpu_task_create();
  220. struct block_description *descr = get_block_description(z);
  221. starpu_tag_t tag_init = TAG_INIT_TASK;
  222. wait_init->cl = &null;
  223. wait_init->use_tag = 1;
  224. wait_init->tag_id = TAG_START(z, dir);
  225. wait_init->handles[0] = descr->boundaries_handle[(1 + dir) / 2][0];
  226. wait_init->handles[1] = descr->boundaries_handle[(1 + dir) / 2][1];
  227. starpu_tag_declare_deps_array(wait_init->tag_id, 1, &tag_init);
  228. int ret = starpu_task_submit(wait_init);
  229. if (ret)
  230. {
  231. FPRINTF(stderr, "Could not submit task initial wait: %d\n", ret);
  232. if (ret == -ENODEV)
  233. exit(77);
  234. STARPU_ABORT();
  235. }
  236. }
  237. /*
  238. * Create all the tasks
  239. */
  240. void create_tasks(int rank)
  241. {
  242. int iter;
  243. int bz;
  244. int niter = get_niter();
  245. int nbz = get_nbz();
  246. for (bz = 0; bz < nbz; bz++)
  247. {
  248. if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
  249. create_start_task(bz, +1);
  250. if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank))
  251. create_start_task(bz, -1);
  252. }
  253. for (iter = 0; iter <= niter; iter++)
  254. {
  255. starpu_iteration_push(iter);
  256. for (bz = 0; bz < nbz; bz++)
  257. {
  258. if ((iter > 0) && (get_block_mpi_node(bz) == rank))
  259. create_task_update(iter, bz, rank);
  260. }
  261. for (bz = 0; bz < nbz; bz++)
  262. {
  263. if (iter != niter)
  264. {
  265. if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
  266. create_task_save(iter, bz, +1, rank);
  267. if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank))
  268. create_task_save(iter, bz, -1, rank);
  269. }
  270. }
  271. starpu_iteration_pop();
  272. }
  273. }
  274. /*
  275. * Wait for termination
  276. */
  277. void wait_end_tasks(int rank)
  278. {
  279. int bz;
  280. int nbz = get_nbz();
  281. for (bz = 0; bz < nbz; bz++)
  282. {
  283. if (get_block_mpi_node(bz) == rank)
  284. {
  285. /* Wait for the task producing block "bz" */
  286. starpu_tag_wait(TAG_FINISH(bz));
  287. /* Get the result back to memory */
  288. struct block_description *block = get_block_description(bz);
  289. starpu_data_acquire(block->layers_handle[0], STARPU_R);
  290. starpu_data_acquire(block->layers_handle[1], STARPU_R);
  291. /* the data_acquire here is done to make sure
  292. * the data is sent back to the ram memory, we
  293. * can safely do a data_release, to avoid the
  294. * data_unregister to block later on
  295. */
  296. starpu_data_release(block->layers_handle[0]);
  297. starpu_data_release(block->layers_handle[1]);
  298. }
  299. }
  300. }