header.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. //---------------------------------------------------------------------
  2. //---------------------------------------------------------------------
  3. //
  4. // header.h
  5. //
  6. //---------------------------------------------------------------------
  7. //---------------------------------------------------------------------
  8. #ifndef __HEADER_H
  9. #define __HEADER_H
  10. //---------------------------------------------------------------------
  11. // The following include file is generated automatically by the
  12. // "setparams" utility. It defines
  13. // maxcells: the square root of the maximum number of processors
  14. // problem_size: 12, 64, 102, 162 (for class T, A, B, C)
  15. // dt_default: default time step for this problem size if no
  16. // config file
  17. // niter_default: default number of iterations for this problem size
  18. //---------------------------------------------------------------------
  19. #include "npbparams.h"
  20. #include "RCCE.h"
  21. //we introduce the next definition to avoid confusing the compiler, which
  22. //sometimes thinks the variable class is a reserved word
  23. #define class _class_
  24. #include "../common/common.h"
  25. #define AA 0
  26. #define BB 1
  27. #define CC 2
  28. #define BLOCK_SIZE 5
  29. #define EAST 2000
  30. #define WEST 3000
  31. #define NORTH 4000
  32. #define SOUTH 5000
  33. #define BOTTOM 6000
  34. #define TOP 7000
  35. #define WESTDIR 0
  36. #define EASTDIR 1
  37. #define SOUTHDIR 2
  38. #define NORTHDIR 3
  39. #define BOTTOMDIR 4
  40. #define TOPDIR 5
  41. #define MAX_CELL_DIM ((PROBLEM_SIZE/MAXCELLS)+1)
  42. #define IMAX MAX_CELL_DIM
  43. #define JMAX MAX_CELL_DIM
  44. #define KMAX MAX_CELL_DIM
  45. #define BUF_SIZE (MAX_CELL_DIM*MAX_CELL_DIM*(MAXCELLS-1)*60+1)
  46. #define SQR(x) (x)*(x)
  47. #define grid_points(m) grid_points[m-1]
  48. #define ce(m,n) ce[(m-1)+5*(n-1)]
  49. #define cell_coord(m,n) cell_coord[(m-1)+3*(n-1)]
  50. #define cell_low(m,n) cell_low[(m-1)+3*(n-1)]
  51. #define cell_high(m,n) cell_high[(m-1)+3*(n-1)]
  52. #define cell_size(m,n) cell_size[(m-1)+3*(n-1)]
  53. #define predecessor(m) predecessor[m-1]
  54. #define slice(m,n) slice[(m-1)+3*(n-1)]
  55. #define grid_size(m) grid_size[m-1]
  56. #define successor(m) successor[m-1]
  57. #define start(m,n) start[(m-1)+3*(n-1)]
  58. #define end(m,n) end[(m-1)+3*(n-1)]
  59. #define us(i,j,k,c) us[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
  60. #define vs(i,j,k,c) vs[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
  61. #define ws(i,j,k,c) ws[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
  62. #define qs(i,j,k,c) qs[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
  63. #define rho_i(i,j,k,c) rho_i[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
  64. #define square(i,j,k,c) square[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
  65. #define forcing(m,i,j,k,c) forcing[(m-1)+5*(i+IMAX*(j+JMAX*(k+KMAX*(c-1))))]
  66. #define u(m,i,j,k,c) u[(m-1)+5*((i+2)+(IMAX+4)*((j+2)+(JMAX+4)*((k+2)+(KMAX+4)*(c-1))))]
  67. #define rhs(m,i,j,k,c) rhs[(m-1)+5*((i+1)+(IMAX+1)*((j+1)+(JMAX+1)*((k+1)+(KMAX+1)*(c-1))))]
  68. #define lhsc(m,n,i,j,k,c) lhsc[(m-1)+5*((n-1)+5*((i+1)+(IMAX+1)*((j+1)+(JMAX+1)*((k+1)+(KMAX+1)*(c-1)))))]
  69. #define backsub_info(m,i,j,c) backsub_info[(m-1)+5*((i)+(IMAX+1)*((j)+(JMAX+1)*(c-1)))]
  70. #define in_buffer(i) in_buffer[i-1]
  71. #define out_buffer(i) out_buffer[i-1]
  72. #define cv(m) cv[m+2]
  73. #define rhon(m) rhon[m+2]
  74. #define rhos(m) rhos[m+2]
  75. #define rhoq(m) rhoq[m+2]
  76. #define cuf(m) cuf[m+2]
  77. #define q(m) q[m+2]
  78. #define ue(m,n) ue[(m+2)+(MAX_CELL_DIM+4)*(n-1)]
  79. #define buf(m,n) buf[(m+2)+(MAX_CELL_DIM+4)*(n-1)]
  80. #define sum(m) sum[m-1]
  81. #define xce_sub(m) xce_sub[m-1]
  82. #ifdef G_MAIN
  83. int ncells, grid_points[3];
  84. double elapsed_time;
  85. double tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3,
  86. dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4,
  87. dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt,
  88. ce[5*13], dxmax, dymax, dzmax, xxcon1, xxcon2,
  89. xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
  90. dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
  91. yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
  92. zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1,
  93. dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1,
  94. dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2,
  95. c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
  96. dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1,
  97. c2dtty1, c2dttz1, comz1, comz4, comz5, comz6,
  98. c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16;
  99. int cell_coord[MAXCELLS*3], cell_low[MAXCELLS*3],
  100. cell_high[MAXCELLS*3], cell_size[MAXCELLS*3],
  101. predecessor[3], slice[MAXCELLS*3],
  102. grid_size[3], successor[3],
  103. start[MAXCELLS*3], end[MAXCELLS*3];
  104. double
  105. us [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
  106. vs [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
  107. ws [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
  108. qs [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
  109. rho_i [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
  110. square [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
  111. forcing [5*IMAX*JMAX*KMAX*MAXCELLS],
  112. u [5*(IMAX+4)*(JMAX+4)*(KMAX+4)*MAXCELLS],
  113. rhs [5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
  114. lhsc [5*5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
  115. backsub_info [5*(MAX_CELL_DIM+1)*(MAX_CELL_DIM+1)*MAXCELLS],
  116. in_buffer[BUF_SIZE], out_buffer[BUF_SIZE];
  117. double cv[MAX_CELL_DIM+4], rhon[MAX_CELL_DIM+4],
  118. rhos[MAX_CELL_DIM+4], rhoq[MAX_CELL_DIM+4],
  119. cuf[MAX_CELL_DIM+4], q[MAX_CELL_DIM+4],
  120. ue[(MAX_CELL_DIM+4)*5], buf[(MAX_CELL_DIM+4)*5];
  121. int west_size, east_size, bottom_size, top_size,
  122. north_size, south_size, start_send_west,
  123. start_send_east, start_send_south, start_send_north,
  124. start_send_bottom, start_send_top, start_recv_west,
  125. start_recv_east, start_recv_south, start_recv_north,
  126. start_recv_bottom, start_recv_top;
  127. //
  128. // These are used by btio
  129. //
  130. int collbuf_nodes, collbuf_size, iosize,
  131. idump, record_length,
  132. idump_sub, rd_interval;
  133. double sum[NITER_DEFAULT], xce_sub[5];
  134. long int iseek;
  135. int send_color[6], recv_color[6];
  136. #else
  137. extern int ncells, grid_points[3];
  138. extern double elapsed_time;
  139. extern double tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3,
  140. dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4,
  141. dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt,
  142. ce[5*13], dxmax, dymax, dzmax, xxcon1, xxcon2,
  143. xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
  144. dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
  145. yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
  146. zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1,
  147. dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1,
  148. dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2,
  149. c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
  150. dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1,
  151. c2dtty1, c2dttz1, comz1, comz4, comz5, comz6,
  152. c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16;
  153. extern int cell_coord[MAXCELLS*3], cell_low[MAXCELLS*3],
  154. cell_high[MAXCELLS*3], cell_size[MAXCELLS*3],
  155. predecessor[3], slice[MAXCELLS*3],
  156. grid_size[3], successor[3],
  157. start[MAXCELLS*3], end[MAXCELLS*3];
  158. extern double
  159. us [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
  160. vs [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
  161. ws [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
  162. qs [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
  163. rho_i [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
  164. square [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
  165. forcing [5*IMAX*JMAX*KMAX*MAXCELLS],
  166. u [5*(IMAX+4)*(JMAX+4)*(KMAX+4)*MAXCELLS],
  167. rhs [5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
  168. lhsc [5*5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
  169. backsub_info [5*(MAX_CELL_DIM+1)*(MAX_CELL_DIM+1)*MAXCELLS],
  170. in_buffer[BUF_SIZE], out_buffer[BUF_SIZE];
  171. extern double cv[MAX_CELL_DIM+4], rhon[MAX_CELL_DIM+4],
  172. rhos[MAX_CELL_DIM+4], rhoq[MAX_CELL_DIM+4],
  173. cuf[MAX_CELL_DIM+4], q[MAX_CELL_DIM+4],
  174. ue[(MAX_CELL_DIM+4)*5], buf[(MAX_CELL_DIM+4)*5];
  175. extern int west_size, east_size, bottom_size, top_size,
  176. north_size, south_size, start_send_west,
  177. start_send_east, start_send_south, start_send_north,
  178. start_send_bottom, start_send_top, start_recv_west,
  179. start_recv_east, start_recv_south, start_recv_north,
  180. start_recv_bottom, start_recv_top;
  181. //
  182. // These are used by btio
  183. //
  184. extern int collbuf_nodes, collbuf_size, iosize,
  185. idump, record_length,
  186. idump_sub, rd_interval;
  187. extern double sum[NITER_DEFAULT], xce_sub[5];
  188. extern long int iseek;
  189. extern int send_color[6], recv_color[6];
  190. #endif /*G_MAIN*/
  191. extern void matvec_sub(double ablock[], double avec[], double bvec[]);
  192. extern void matmul_sub(double ablock[], double bblock[], double cblock[]);
  193. extern void binvcrhs( double lhs[], double c[], double r[] );
  194. extern void binvrhs( double lhs[], double r[] );
  195. extern void exact_solution(double xi,double eta,double zeta,double dtemp[]);
  196. extern int setup_mpi(int *argc, char ***argv);
  197. extern void make_set(void);
  198. extern void set_constants(void);
  199. extern void lhsinit(void);
  200. extern void lhsabinit(double lhsa[], double lhsb[], int size);
  201. extern void initialize(void);
  202. extern void exact_rhs(void);
  203. extern void compute_buffer_size(int c);
  204. extern void adi(void);
  205. extern void compute_rhs(void);
  206. extern void copy_faces(void);
  207. extern void x_solve(void);
  208. extern void y_solve(void);
  209. extern void z_solve(void);
  210. extern void add(void);
  211. extern void verify(int niter, char *class, int *verified);
  212. extern void error_norm(double rms[]);
  213. extern void rhs_norm(double rms[]);
  214. extern void setup_btio(void);
  215. extern void output_timestep(void);
  216. extern void btio_cleanup(void);
  217. extern void btio_verify(int *verified);
  218. extern void accumulate_norms(double xce[]);
  219. extern void clear_timestep(void);
  220. #endif
  221. #ifdef _OPENMP
  222. #pragma omp threadprivate (cell_coord, cell_low, cell_high, cell_size)
  223. #pragma omp threadprivate (predecessor, slice, grid_size, successor)
  224. #pragma omp threadprivate (start, end)
  225. #pragma omp threadprivate (ncells, grid_points, elapsed_time)
  226. #pragma omp threadprivate (tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, \
  227. dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, \
  228. dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, \
  229. ce, dxmax, dymax, dzmax, xxcon1, xxcon2, \
  230. xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1, \
  231. dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4, \
  232. yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1, \
  233. zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, \
  234. dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, \
  235. dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, \
  236. c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt, \
  237. dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, \
  238. c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, \
  239. c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16)
  240. #pragma omp threadprivate (us, vs, ws, qs, rho_i, square, forcing, \
  241. u, rhs, lhsc, backsub_info, in_buffer, out_buffer)
  242. #pragma omp threadprivate (cv, rhon, rhos, rhoq, cuf, q, ue, buf)
  243. #pragma omp threadprivate (west_size, east_size, bottom_size, top_size, \
  244. north_size, south_size, start_send_west, \
  245. start_send_east, start_send_south, start_send_north, \
  246. start_send_bottom, start_send_top, start_recv_west, \
  247. start_recv_east, start_recv_south, start_recv_north, \
  248. start_recv_bottom, start_recv_top, send_color, recv_color)
  249. //
  250. // These are used by btio
  251. //
  252. #pragma omp threadprivate (collbuf_nodes, collbuf_size, iosize, idump,\
  253. record_length, idump_sub, rd_interval, \
  254. sum, xce_sub, iseek)
  255. #endif