copy_faces.c.svn-base 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. //---------------------------------------------------------------------
  2. //
  3. // Copyright 2010 Intel Corporation
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License");
  6. // you may not use this file except in compliance with the License.
  7. // You may obtain a copy of the License at
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS,
  13. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. // See the License for the specific language governing permissions and
  15. // limitations under the License.
  16. //
  17. //---------------------------------------------------------------------
  18. #include "header.h"
  19. #include "mpinpb.h"
  20. void copy_faces() {
  21. //---------------------------------------------------------------------
  22. //---------------------------------------------------------------------
  23. //---------------------------------------------------------------------
  24. //
  25. // This function copies the face values of a variable defined on a set
  26. // of cells to the overlap locations of the adjacent sets of cells.
  27. // Because a set of cells interfaces in each direction with exactly one
  28. // other set, we only need to fill six different buffers. We could try to
  29. // overlap communication with computation, by computing
  30. // some internal values while communicating boundary values, but this
  31. // adds so much overhead that it's not clearly useful.
  32. //---------------------------------------------------------------------
  33. int i, j, k, c, m, p0, p1, phase,
  34. p2, p3, p4, p5, b_size[6], ss[6],
  35. sr[6], error;
  36. #define b_size(m) b_size[m]
  37. #define ss(m) ss[m]
  38. #define sr(m) sr[m]
  39. //---------------------------------------------------------------------
  40. // exit immediately if there are no faces to be copied
  41. //---------------------------------------------------------------------
  42. if (no_nodes == 1) {
  43. compute_rhs();
  44. return;
  45. }
  46. ss(0) = start_send_east;
  47. ss(1) = start_send_west;
  48. ss(2) = start_send_north;
  49. ss(3) = start_send_south;
  50. ss(4) = start_send_top;
  51. ss(5) = start_send_bottom;
  52. sr(0) = start_recv_east;
  53. sr(1) = start_recv_west;
  54. sr(2) = start_recv_north;
  55. sr(3) = start_recv_south;
  56. sr(4) = start_recv_top;
  57. sr(5) = start_recv_bottom;
  58. b_size(0) = east_size ;
  59. b_size(1) = west_size ;
  60. b_size(2) = north_size ;
  61. b_size(3) = south_size ;
  62. b_size(4) = top_size ;
  63. b_size(5) = bottom_size ;
  64. //---------------------------------------------------------------------
  65. // because the difference stencil for the diagonalized scheme is
  66. // orthogonal, we do not have to perform the staged copying of faces,
  67. // but can send all face information simultaneously to the neighboring
  68. // cells in all directions
  69. //---------------------------------------------------------------------
  70. p0 = 0;
  71. p1 = 0;
  72. p2 = 0;
  73. p3 = 0;
  74. p4 = 0;
  75. p5 = 0;
  76. for (c = 1; c <= ncells; c++) {
  77. //---------------------------------------------------------------------
  78. // fill the buffer to be sent to eastern neighbors (i-dir)
  79. //---------------------------------------------------------------------
  80. if (cell_coord(1,c) != ncells) {
  81. for (k = 0; k <= cell_size(3,c)-1; k++) {
  82. for (j = 0; j <= cell_size(2,c)-1; j++) {
  83. for (i = cell_size(1,c)-2; i <= cell_size(1,c)-1; i++) {
  84. for (m = 1; m <= 5; m++) {
  85. out_buffer(ss(0)+p0) = u(m,i,j,k,c);
  86. p0 = p0 + 1;
  87. }
  88. }
  89. }
  90. }
  91. }
  92. //---------------------------------------------------------------------
  93. // fill the buffer to be sent to western neighbors
  94. //---------------------------------------------------------------------
  95. if (cell_coord(1,c) != 1) {
  96. for (k = 0; k <= cell_size(3,c)-1; k++) {
  97. for (j = 0; j <= cell_size(2,c)-1; j++) {
  98. for (i = 0; i <= 1; i++) {
  99. for (m = 1; m <= 5; m++) {
  100. out_buffer(ss(1)+p1) = u(m,i,j,k,c);
  101. p1 = p1 + 1;
  102. }
  103. }
  104. }
  105. }
  106. }
  107. //---------------------------------------------------------------------
  108. // fill the buffer to be sent to northern neighbors (j_dir)
  109. //---------------------------------------------------------------------
  110. if (cell_coord(2,c) != ncells) {
  111. for (k = 0; k <= cell_size(3,c)-1; k++) {
  112. for (j = cell_size(2,c)-2; j <= cell_size(2,c)-1; j++) {
  113. for (i = 0; i <= cell_size(1,c)-1; i++) {
  114. for (m = 1; m <= 5; m++) {
  115. out_buffer(ss(2)+p2) = u(m,i,j,k,c);
  116. p2 = p2 + 1;
  117. }
  118. }
  119. }
  120. }
  121. }
  122. //---------------------------------------------------------------------
  123. // fill the buffer to be sent to southern neighbors
  124. //---------------------------------------------------------------------
  125. if (cell_coord(2,c)!= 1) {
  126. for (k = 0; k <= cell_size(3,c)-1; k++) {
  127. for (j = 0; j <= 1; j++) {
  128. for (i = 0; i <= cell_size(1,c)-1; i++) {
  129. for (m = 1; m <= 5; m++) {
  130. out_buffer(ss(3)+p3) = u(m,i,j,k,c);
  131. p3 = p3 + 1;
  132. }
  133. }
  134. }
  135. }
  136. }
  137. //---------------------------------------------------------------------
  138. // fill the buffer to be sent to top neighbors (k-dir)
  139. //---------------------------------------------------------------------
  140. if (cell_coord(3,c) != ncells) {
  141. for (k = cell_size(3,c)-2; k <= cell_size(3,c)-1; k++) {
  142. for (j = 0; j <= cell_size(2,c)-1; j++) {
  143. for (i = 0; i <= cell_size(1,c)-1; i++) {
  144. for (m = 1; m <= 5; m++) {
  145. out_buffer(ss(4)+p4) = u(m,i,j,k,c);
  146. p4 = p4 + 1;
  147. }
  148. }
  149. }
  150. }
  151. }
  152. //---------------------------------------------------------------------
  153. // fill the buffer to be sent to bottom neighbors
  154. //---------------------------------------------------------------------
  155. if (cell_coord(3,c)!= 1) {
  156. for (k = 0; k <= 1; k++) {
  157. for (j = 0; j <= cell_size(2,c)-1; j++) {
  158. for (i = 0; i <= cell_size(1,c)-1; i++) {
  159. for (m = 1; m <= 5; m++) {
  160. out_buffer(ss(5)+p5) = u(m,i,j,k,c);
  161. p5 = p5 + 1;
  162. }
  163. }
  164. }
  165. }
  166. }
  167. //---------------------------------------------------------------------
  168. // cell loop
  169. //---------------------------------------------------------------------
  170. }
  171. for (phase = 0; phase < 3; phase++) {
  172. if (send_color[WESTDIR]==phase) {
  173. RCCE_send((char*)(&out_buffer(ss(1))), b_size(1)*sizeof(double), predecessor(1));
  174. }
  175. if (recv_color[WESTDIR]==phase) {
  176. RCCE_recv((char*)(&in_buffer(sr(0))), b_size(0)*sizeof(double), successor(1));
  177. }
  178. if (send_color[EASTDIR]==phase) {
  179. RCCE_send((char*)(&out_buffer(ss(0))), b_size(0)*sizeof(double), successor(1));
  180. }
  181. if (recv_color[EASTDIR]==phase) {
  182. RCCE_recv((char*)(&in_buffer(sr(1))), b_size(1)*sizeof(double), predecessor(1));
  183. }
  184. if (send_color[SOUTHDIR]==phase) {
  185. RCCE_send((char*)(&out_buffer(ss(3))), b_size(3)*sizeof(double), predecessor(2));
  186. }
  187. if (recv_color[SOUTHDIR]==phase) {
  188. RCCE_recv((char*)(&in_buffer(sr(2))), b_size(2)*sizeof(double), successor(2));
  189. }
  190. if (send_color[NORTHDIR]==phase) {
  191. RCCE_send((char*)(&out_buffer(ss(2))), b_size(2)*sizeof(double),successor(2));
  192. }
  193. if (recv_color[NORTHDIR]==phase) {
  194. RCCE_recv((char*)(&in_buffer(sr(3))), b_size(3)*sizeof(double), predecessor(2));
  195. }
  196. if (send_color[BOTTOMDIR]==phase) {
  197. RCCE_send((char*)(&out_buffer(ss(5))), b_size(5)*sizeof(double),predecessor(3));
  198. }
  199. if (recv_color[BOTTOMDIR]==phase) {
  200. RCCE_recv((char*)(&in_buffer(sr(4))), b_size(4)*sizeof(double), successor(3));
  201. }
  202. if (send_color[TOPDIR]==phase) {
  203. RCCE_send((char*)(&out_buffer(ss(4))), b_size(4)*sizeof(double),successor(3));
  204. }
  205. if (recv_color[TOPDIR]==phase) {
  206. RCCE_recv((char*)(&in_buffer(sr(5))), b_size(5)*sizeof(double), predecessor(3));
  207. }
  208. }
  209. //---------------------------------------------------------------------
  210. // unpack the data that has just been received;
  211. //---------------------------------------------------------------------
  212. p0 = 0;
  213. p1 = 0;
  214. p2 = 0;
  215. p3 = 0;
  216. p4 = 0;
  217. p5 = 0;
  218. for (c = 1; c <= ncells; c++) {
  219. if (cell_coord(1,c) != 1) {
  220. for (k = 0; k <= cell_size(3,c)-1; k++) {
  221. for (j = 0; j <= cell_size(2,c)-1; j++) {
  222. for (i = -2; i <= -1; i++) {
  223. for (m = 1; m <= 5; m++) {
  224. u(m,i,j,k,c) = in_buffer(sr(1)+p0);
  225. p0 = p0 + 1;
  226. }
  227. }
  228. }
  229. }
  230. }
  231. if (cell_coord(1,c) != ncells) {
  232. for (k = 0; k <= cell_size(3,c)-1; k++) {
  233. for (j = 0; j <= cell_size(2,c)-1; j++) {
  234. for (i = cell_size(1,c); i <= cell_size(1,c)+1; i++) {
  235. for (m = 1; m <= 5; m++) {
  236. u(m,i,j,k,c) = in_buffer(sr(0)+p1);
  237. p1 = p1 + 1;
  238. }
  239. }
  240. }
  241. }
  242. }
  243. if (cell_coord(2,c) != 1) {
  244. for (k = 0; k <= cell_size(3,c)-1; k++) {
  245. for (j = -2; j <= -1; j++) {
  246. for (i = 0; i <= cell_size(1,c)-1; i++) {
  247. for (m = 1; m <= 5; m++) {
  248. u(m,i,j,k,c) = in_buffer(sr(3)+p2);
  249. p2 = p2 + 1;
  250. }
  251. }
  252. }
  253. }
  254. }
  255. if (cell_coord(2,c) != ncells) {
  256. for (k = 0; k <= cell_size(3,c)-1; k++) {
  257. for (j = cell_size(2,c); j <= cell_size(2,c)+1; j++) {
  258. for (i = 0; i <= cell_size(1,c)-1; i++) {
  259. for (m = 1; m <= 5; m++) {
  260. u(m,i,j,k,c) = in_buffer(sr(2)+p3);
  261. p3 = p3 + 1;
  262. }
  263. }
  264. }
  265. }
  266. }
  267. if (cell_coord(3,c) != 1) {
  268. for (k = -2; k <= -1; k++) {
  269. for (j = 0; j <= cell_size(2,c)-1; j++) {
  270. for (i = 0; i <= cell_size(1,c)-1; i++) {
  271. for (m = 1; m <= 5; m++) {
  272. u(m,i,j,k,c) = in_buffer(sr(5)+p4);
  273. p4 = p4 + 1;
  274. }
  275. }
  276. }
  277. }
  278. }
  279. if (cell_coord(3,c) != ncells) {
  280. for (k = cell_size(3,c); k <= cell_size(3,c)+1; k++) {
  281. for (j = 0; j <= cell_size(2,c)-1; j++) {
  282. for (i = 0; i <= cell_size(1,c)-1; i++) {
  283. for (m = 1; m <= 5; m++) {
  284. u(m,i,j,k,c) = in_buffer(sr(4)+p5);
  285. p5 = p5 + 1;
  286. }
  287. }
  288. }
  289. }
  290. }
  291. //---------------------------------------------------------------------
  292. // cells loop
  293. //---------------------------------------------------------------------
  294. }
  295. //---------------------------------------------------------------------
  296. // do the rest of the rhs that uses the copied face values
  297. //---------------------------------------------------------------------
  298. compute_rhs();
  299. return;
  300. }