z_solve.c.svn-base 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690
  1. //---------------------------------------------------------------------
  2. //
  3. // Copyright 2010 Intel Corporation
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License");
  6. // you may not use this file except in compliance with the License.
  7. // You may obtain a copy of the License at
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS,
  13. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. // See the License for the specific language governing permissions and
  15. // limitations under the License.
  16. //
  17. //---------------------------------------------------------------------
  18. #include "header.h"
  19. #include "mpinpb.h"
  20. #include "work_lhs.h"
  21. extern void z_sendrecv_solve(int c, int cprev);
  22. extern void z_sendrecv_back(int c, int cprev);
  23. extern void z_backsubstitute(int first, int last, int c);
  24. extern void z_solve_cell(int first, int last, int c);
  25. void z_solve() {
  26. //---------------------------------------------------------------------
  27. //---------------------------------------------------------------------
  28. //---------------------------------------------------------------------
  29. // Performs line solves in Z direction by first factoring
  30. // the block-tridiagonal matrix into an upper triangular matrix,
  31. // and then performing back substitution to solve for the unknow
  32. // vectors of each line.
  33. //
  34. // Make sure we treat elements zero to cell_size in the direction
  35. // of the sweep.
  36. //---------------------------------------------------------------------
  37. int c, cprev, stage, first, last, error;
  38. //---------------------------------------------------------------------
  39. // in our terminology stage is the number of the cell in the y-direction
  40. // i.e. stage = 1 means the start of the line stage=ncells means end
  41. //---------------------------------------------------------------------
  42. for (stage = 1; stage <= ncells; stage++) {
  43. c = slice(3,stage);
  44. //---------------------------------------------------------------------
  45. // set last-cell flag
  46. //---------------------------------------------------------------------
  47. first = (stage == 1);
  48. last = (stage == ncells);
  49. if (stage >1) {
  50. cprev = slice(3,stage-1);
  51. z_sendrecv_solve(c, cprev);
  52. }
  53. z_solve_cell(first,last,c);
  54. }
  55. //---------------------------------------------------------------------
  56. // now perform backsubstitution in reverse direction
  57. //---------------------------------------------------------------------
  58. for (stage = ncells; stage >= 1; stage--) {
  59. c = slice(3,stage);
  60. first = (stage == 1);
  61. last = (stage == ncells);
  62. if (stage <ncells) {
  63. cprev = slice(3,stage+1);
  64. z_sendrecv_back(c, cprev);
  65. }
  66. z_backsubstitute(first,last,c);
  67. }
  68. return;
  69. }
  70. //---------------------------------------------------------------------
  71. //---------------------------------------------------------------------
  72. void z_unpack_solve_info(int c) {
  73. //---------------------------------------------------------------------
  74. //---------------------------------------------------------------------
  75. //---------------------------------------------------------------------
  76. // unpack C'(-1) and rhs'(-1) for
  77. // all i and j
  78. //---------------------------------------------------------------------
  79. int i,j,m,n,ptr,kstart ;
  80. kstart = 0;
  81. ptr = 0;
  82. for (j = 0; j <= JMAX-1; j++) {
  83. for (i = 0; i <= IMAX-1; i++) {
  84. for (m = 1; m <= BLOCK_SIZE; m++) {
  85. for (n = 1; n <= BLOCK_SIZE; n++) {
  86. lhsc(m,n,i,j,kstart-1,c) = out_buffer(ptr+n);
  87. }
  88. ptr = ptr+BLOCK_SIZE;
  89. }
  90. for (n = 1; n <= BLOCK_SIZE; n++) {
  91. rhs(n,i,j,kstart-1,c) = out_buffer(ptr+n);
  92. }
  93. ptr = ptr+BLOCK_SIZE;
  94. }
  95. }
  96. return;
  97. }
  98. void z_sendrecv_solve(int c, int cprev) {
  99. //---------------------------------------------------------------------
  100. // pack up and send C'(kend) and rhs'(kend) for
  101. // all i and j
  102. //---------------------------------------------------------------------
  103. int i,j,m,n,ksize,ptr,kstart;
  104. int phase;
  105. int error,buffer_size;
  106. ksize = cell_size(3,cprev)-1;
  107. buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
  108. (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE);
  109. //---------------------------------------------------------------------
  110. // pack up buffer
  111. //---------------------------------------------------------------------
  112. ptr = 0;
  113. for (j = 0; j <= JMAX-1; j++) {
  114. for (i = 0; i <= IMAX-1; i++) {
  115. for (m = 1; m <= BLOCK_SIZE; m++) {
  116. for (n = 1; n <= BLOCK_SIZE; n++) {
  117. in_buffer(ptr+n) = lhsc(m,n,i,j,ksize,cprev);
  118. }
  119. ptr = ptr+BLOCK_SIZE;
  120. }
  121. for (n = 1; n <= BLOCK_SIZE; n++) {
  122. in_buffer(ptr+n) = rhs(n,i,j,ksize,cprev);
  123. }
  124. ptr = ptr+BLOCK_SIZE;
  125. }
  126. }
  127. //---------------------------------------------------------------------
  128. // send and receive buffer
  129. //---------------------------------------------------------------------
  130. for (phase = 0; phase < 3; phase++) {
  131. if (send_color[TOPDIR]==phase)
  132. RCCE_send((char*)in_buffer, buffer_size*sizeof(double), successor(3));
  133. if (recv_color[TOPDIR]==phase)
  134. RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), predecessor(3));
  135. }
  136. //---------------------------------------------------------------------
  137. // unpack buffer
  138. //---------------------------------------------------------------------
  139. kstart = 0;
  140. ptr = 0;
  141. for (j = 0; j <= JMAX-1; j++) {
  142. for (i = 0; i <= IMAX-1; i++) {
  143. for (m = 1; m <= BLOCK_SIZE; m++) {
  144. for (n = 1; n <= BLOCK_SIZE; n++) {
  145. lhsc(m,n,i,j,kstart-1,c) = out_buffer(ptr+n);
  146. }
  147. ptr = ptr+BLOCK_SIZE;
  148. }
  149. for (n = 1; n <= BLOCK_SIZE; n++) {
  150. rhs(n,i,j,kstart-1,c) = out_buffer(ptr+n);
  151. }
  152. ptr = ptr+BLOCK_SIZE;
  153. }
  154. }
  155. return;
  156. }
  157. //---------------------------------------------------------------------
  158. //---------------------------------------------------------------------
  159. void z_sendrecv_back(int c, int cprev) {
  160. //---------------------------------------------------------------------
  161. // pack up and send U(jstart) for all i and j
  162. //---------------------------------------------------------------------
  163. int i,j,n,ptr,kstart;
  164. int phase;
  165. int error,buffer_size;
  166. //---------------------------------------------------------------------
  167. // Send element 0 to previous processor
  168. //---------------------------------------------------------------------
  169. kstart = 0;
  170. buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE;
  171. ptr = 0;
  172. for (j = 0; j <= JMAX-1; j++) {
  173. for (i = 0; i <= IMAX-1; i++) {
  174. for (n = 1; n <= BLOCK_SIZE; n++) {
  175. in_buffer(ptr+n) = rhs(n,i,j,kstart,cprev);
  176. }
  177. ptr = ptr+BLOCK_SIZE;
  178. }
  179. }
  180. //---------------------------------------------------------------------
  181. // send and receive buffer
  182. //---------------------------------------------------------------------
  183. for (phase = 0; phase < 3; phase++) {
  184. if (send_color[BOTTOMDIR]==phase)
  185. RCCE_send((char*)in_buffer, buffer_size*sizeof(double), predecessor(3));
  186. if (recv_color[BOTTOMDIR]==phase)
  187. RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), successor(3));
  188. }
  189. //---------------------------------------------------------------------
  190. // unpack U(ksize) for all i and j
  191. //---------------------------------------------------------------------
  192. ptr = 0;
  193. for (j = 0; j <= JMAX-1; j++) {
  194. for (i = 0; i <= IMAX-1; i++) {
  195. for (n = 1; n <= BLOCK_SIZE; n++) {
  196. backsub_info(n,i,j,c) = out_buffer(ptr+n);
  197. }
  198. ptr = ptr+BLOCK_SIZE;
  199. }
  200. }
  201. return;
  202. }
  203. //---------------------------------------------------------------------
  204. //---------------------------------------------------------------------
  205. void z_backsubstitute(int first, int last, int c) {
  206. //---------------------------------------------------------------------
  207. //---------------------------------------------------------------------
  208. //---------------------------------------------------------------------
  209. // back solve: if last cell, then generate U(ksize)=rhs(ksize)
  210. // else assume U(ksize) is loaded in un pack backsub_info
  211. // so just use it
  212. // after call u(kstart) will be sent to next cell
  213. //---------------------------------------------------------------------
  214. int i, k;
  215. int m,n,j,jsize,isize,ksize,kstart;
  216. kstart = 0;
  217. isize = cell_size(1,c)-end(1,c)-1 ;
  218. jsize = cell_size(2,c)-end(2,c)-1;
  219. ksize = cell_size(3,c)-1;
  220. if (last == 0) {
  221. for (j = start(2,c); j <= jsize; j++) {
  222. for (i = start(1,c); i <= isize; i++) {
  223. //---------------------------------------------------------------------
  224. // U(jsize) uses info from previous cell if not last cell
  225. //---------------------------------------------------------------------
  226. for (m = 1; m <= BLOCK_SIZE; m++) {
  227. for (n = 1; n <= BLOCK_SIZE; n++) {
  228. rhs(m,i,j,ksize,c) = rhs(m,i,j,ksize,c)
  229. - lhsc(m,n,i,j,ksize,c)*
  230. backsub_info(n,i,j,c);
  231. }
  232. }
  233. }
  234. }
  235. }
  236. for (k = ksize-1; k >= kstart; k--) {
  237. for (j = start(2,c); j <= jsize; j++) {
  238. for (i = start(1,c); i <= isize; i++) {
  239. for (m = 1; m <= BLOCK_SIZE; m++) {
  240. for (n = 1; n <= BLOCK_SIZE; n++) {
  241. rhs(m,i,j,k,c) = rhs(m,i,j,k,c)
  242. - lhsc(m,n,i,j,k,c)*rhs(n,i,j,k+1,c);
  243. }
  244. }
  245. }
  246. }
  247. }
  248. return;
  249. }
  250. //---------------------------------------------------------------------
  251. //---------------------------------------------------------------------
  252. void z_solve_cell(int first,int last,int c) {
  253. //---------------------------------------------------------------------
  254. //---------------------------------------------------------------------
  255. //---------------------------------------------------------------------
  256. // performs guaussian elimination on this cell.
  257. //
  258. // assumes that unpacking routines for non-first cells
  259. // preload C' and rhs' from previous cell.
  260. //
  261. // assumed send happens outside this routine, but that
  262. // c'(KMAX) and rhs'(KMAX) will be sent to next cell.
  263. //---------------------------------------------------------------------
  264. int i,j,k,isize,ksize,jsize,kstart;
  265. double utmp[6*(KMAX+4)];
  266. #define utmp(m,i) utmp[(m-1)+6*(i+2)]
  267. kstart = 0;
  268. isize = cell_size(1,c)-end(1,c)-1;
  269. jsize = cell_size(2,c)-end(2,c)-1;
  270. ksize = cell_size(3,c)-1;
  271. lhsabinit(lhsa, lhsb, ksize);
  272. for (j = start(2,c); j <= jsize; j++) {
  273. for (i = start(1,c); i <= isize; i++) {
  274. //---------------------------------------------------------------------
  275. // This function computes the left hand side for the three z-factors
  276. //---------------------------------------------------------------------
  277. //---------------------------------------------------------------------
  278. // Compute the indices for storing the block-diagonal matrix;
  279. // determine c (labeled f) and s jacobians for cell c
  280. //---------------------------------------------------------------------
  281. for (k = start(3,c)-1; k <= cell_size(3,c)-end(3,c); k++) {
  282. utmp(1,k) = 1.0e0 / u(1,i,j,k,c);
  283. utmp(2,k) = u(2,i,j,k,c);
  284. utmp(3,k) = u(3,i,j,k,c);
  285. utmp(4,k) = u(4,i,j,k,c);
  286. utmp(5,k) = u(5,i,j,k,c);
  287. utmp(6,k) = qs(i,j,k,c);
  288. }
  289. for (k = start(3,c)-1; k <= cell_size(3,c)-end(3,c); k++) {
  290. tmp1 = utmp(1,k);
  291. tmp2 = tmp1 * tmp1;
  292. tmp3 = tmp1 * tmp2;
  293. fjac(1,1,k) = 0.0e+00;
  294. fjac(1,2,k) = 0.0e+00;
  295. fjac(1,3,k) = 0.0e+00;
  296. fjac(1,4,k) = 1.0e+00;
  297. fjac(1,5,k) = 0.0e+00;
  298. fjac(2,1,k) = - ( utmp(2,k)*utmp(4,k) )
  299. * tmp2 ;
  300. fjac(2,2,k) = utmp(4,k) * tmp1;
  301. fjac(2,3,k) = 0.0e+00;
  302. fjac(2,4,k) = utmp(2,k) * tmp1;
  303. fjac(2,5,k) = 0.0e+00;
  304. fjac(3,1,k) = - ( utmp(3,k)*utmp(4,k) )
  305. * tmp2 ;
  306. fjac(3,2,k) = 0.0e+00;
  307. fjac(3,3,k) = utmp(4,k) * tmp1;
  308. fjac(3,4,k) = utmp(3,k) * tmp1;
  309. fjac(3,5,k) = 0.0e+00;
  310. fjac(4,1,k) = - (utmp(4,k)*utmp(4,k) * tmp2 )
  311. + c2 * utmp(6,k);
  312. fjac(4,2,k) = - c2 * utmp(2,k) * tmp1 ;
  313. fjac(4,3,k) = - c2 * utmp(3,k) * tmp1;
  314. fjac(4,4,k) = ( 2.0e+00 - c2 )
  315. * utmp(4,k) * tmp1 ;
  316. fjac(4,5,k) = c2;
  317. fjac(5,1,k) = ( c2 * 2.0e0 * utmp(6,k)
  318. - c1 * ( utmp(5,k) * tmp1 ) )
  319. * ( utmp(4,k) * tmp1 );
  320. fjac(5,2,k) = - c2 * ( utmp(2,k)*utmp(4,k) )
  321. * tmp2 ;
  322. fjac(5,3,k) = - c2 * ( utmp(3,k)*utmp(4,k) )
  323. * tmp2;
  324. fjac(5,4,k) = c1 * ( utmp(5,k) * tmp1 )
  325. - c2 * ( utmp(6,k)
  326. + utmp(4,k)*utmp(4,k) * tmp2 );
  327. fjac(5,5,k) = c1 * utmp(4,k) * tmp1;
  328. njac(1,1,k) = 0.0e+00;
  329. njac(1,2,k) = 0.0e+00;
  330. njac(1,3,k) = 0.0e+00;
  331. njac(1,4,k) = 0.0e+00;
  332. njac(1,5,k) = 0.0e+00;
  333. njac(2,1,k) = - c3c4 * tmp2 * utmp(2,k);
  334. njac(2,2,k) = c3c4 * tmp1;
  335. njac(2,3,k) = 0.0e+00;
  336. njac(2,4,k) = 0.0e+00;
  337. njac(2,5,k) = 0.0e+00;
  338. njac(3,1,k) = - c3c4 * tmp2 * utmp(3,k);
  339. njac(3,2,k) = 0.0e+00;
  340. njac(3,3,k) = c3c4 * tmp1;
  341. njac(3,4,k) = 0.0e+00;
  342. njac(3,5,k) = 0.0e+00;
  343. njac(4,1,k) = - con43 * c3c4 * tmp2 * utmp(4,k);
  344. njac(4,2,k) = 0.0e+00;
  345. njac(4,3,k) = 0.0e+00;
  346. njac(4,4,k) = con43 * c3 * c4 * tmp1;
  347. njac(4,5,k) = 0.0e+00;
  348. njac(5,1,k) = - ( c3c4
  349. - c1345 ) * tmp3 * SQR(utmp(2,k))
  350. - ( c3c4 - c1345 ) * tmp3 * SQR(utmp(3,k))
  351. - ( con43 * c3c4
  352. - c1345 ) * tmp3 * SQR(utmp(4,k))
  353. - c1345 * tmp2 * utmp(5,k);
  354. njac(5,2,k) = ( c3c4 - c1345 ) * tmp2 * utmp(2,k);
  355. njac(5,3,k) = ( c3c4 - c1345 ) * tmp2 * utmp(3,k);
  356. njac(5,4,k) = ( con43 * c3c4
  357. - c1345 ) * tmp2 * utmp(4,k);
  358. njac(5,5,k) = ( c1345 )* tmp1;
  359. }
  360. //---------------------------------------------------------------------
  361. // now joacobians set, so form left hand side in z direction
  362. //---------------------------------------------------------------------
  363. for (k = start(3,c); k <= ksize-end(3,c); k++) {
  364. tmp1 = dt * tz1;
  365. tmp2 = dt * tz2;
  366. lhsa(1,1,k) = - tmp2 * fjac(1,1,k-1)
  367. - tmp1 * njac(1,1,k-1)
  368. - tmp1 * dz1 ;
  369. lhsa(1,2,k) = - tmp2 * fjac(1,2,k-1)
  370. - tmp1 * njac(1,2,k-1);
  371. lhsa(1,3,k) = - tmp2 * fjac(1,3,k-1)
  372. - tmp1 * njac(1,3,k-1);
  373. lhsa(1,4,k) = - tmp2 * fjac(1,4,k-1)
  374. - tmp1 * njac(1,4,k-1);
  375. lhsa(1,5,k) = - tmp2 * fjac(1,5,k-1)
  376. - tmp1 * njac(1,5,k-1);
  377. lhsa(2,1,k) = - tmp2 * fjac(2,1,k-1)
  378. - tmp1 * njac(2,1,k-1);
  379. lhsa(2,2,k) = - tmp2 * fjac(2,2,k-1)
  380. - tmp1 * njac(2,2,k-1)
  381. - tmp1 * dz2;
  382. lhsa(2,3,k) = - tmp2 * fjac(2,3,k-1)
  383. - tmp1 * njac(2,3,k-1);
  384. lhsa(2,4,k) = - tmp2 * fjac(2,4,k-1)
  385. - tmp1 * njac(2,4,k-1);
  386. lhsa(2,5,k) = - tmp2 * fjac(2,5,k-1)
  387. - tmp1 * njac(2,5,k-1);
  388. lhsa(3,1,k) = - tmp2 * fjac(3,1,k-1)
  389. - tmp1 * njac(3,1,k-1);
  390. lhsa(3,2,k) = - tmp2 * fjac(3,2,k-1)
  391. - tmp1 * njac(3,2,k-1);
  392. lhsa(3,3,k) = - tmp2 * fjac(3,3,k-1)
  393. - tmp1 * njac(3,3,k-1)
  394. - tmp1 * dz3 ;
  395. lhsa(3,4,k) = - tmp2 * fjac(3,4,k-1)
  396. - tmp1 * njac(3,4,k-1);
  397. lhsa(3,5,k) = - tmp2 * fjac(3,5,k-1)
  398. - tmp1 * njac(3,5,k-1);
  399. lhsa(4,1,k) = - tmp2 * fjac(4,1,k-1)
  400. - tmp1 * njac(4,1,k-1);
  401. lhsa(4,2,k) = - tmp2 * fjac(4,2,k-1)
  402. - tmp1 * njac(4,2,k-1);
  403. lhsa(4,3,k) = - tmp2 * fjac(4,3,k-1)
  404. - tmp1 * njac(4,3,k-1);
  405. lhsa(4,4,k) = - tmp2 * fjac(4,4,k-1)
  406. - tmp1 * njac(4,4,k-1)
  407. - tmp1 * dz4;
  408. lhsa(4,5,k) = - tmp2 * fjac(4,5,k-1)
  409. - tmp1 * njac(4,5,k-1);
  410. lhsa(5,1,k) = - tmp2 * fjac(5,1,k-1)
  411. - tmp1 * njac(5,1,k-1);
  412. lhsa(5,2,k) = - tmp2 * fjac(5,2,k-1)
  413. - tmp1 * njac(5,2,k-1);
  414. lhsa(5,3,k) = - tmp2 * fjac(5,3,k-1)
  415. - tmp1 * njac(5,3,k-1);
  416. lhsa(5,4,k) = - tmp2 * fjac(5,4,k-1)
  417. - tmp1 * njac(5,4,k-1);
  418. lhsa(5,5,k) = - tmp2 * fjac(5,5,k-1)
  419. - tmp1 * njac(5,5,k-1)
  420. - tmp1 * dz5;
  421. lhsb(1,1,k) = 1.0e+00
  422. + tmp1 * 2.0e+00 * njac(1,1,k)
  423. + tmp1 * 2.0e+00 * dz1;
  424. lhsb(1,2,k) = tmp1 * 2.0e+00 * njac(1,2,k);
  425. lhsb(1,3,k) = tmp1 * 2.0e+00 * njac(1,3,k);
  426. lhsb(1,4,k) = tmp1 * 2.0e+00 * njac(1,4,k);
  427. lhsb(1,5,k) = tmp1 * 2.0e+00 * njac(1,5,k);
  428. lhsb(2,1,k) = tmp1 * 2.0e+00 * njac(2,1,k);
  429. lhsb(2,2,k) = 1.0e+00
  430. + tmp1 * 2.0e+00 * njac(2,2,k)
  431. + tmp1 * 2.0e+00 * dz2;
  432. lhsb(2,3,k) = tmp1 * 2.0e+00 * njac(2,3,k);
  433. lhsb(2,4,k) = tmp1 * 2.0e+00 * njac(2,4,k);
  434. lhsb(2,5,k) = tmp1 * 2.0e+00 * njac(2,5,k);
  435. lhsb(3,1,k) = tmp1 * 2.0e+00 * njac(3,1,k);
  436. lhsb(3,2,k) = tmp1 * 2.0e+00 * njac(3,2,k);
  437. lhsb(3,3,k) = 1.0e+00
  438. + tmp1 * 2.0e+00 * njac(3,3,k)
  439. + tmp1 * 2.0e+00 * dz3;
  440. lhsb(3,4,k) = tmp1 * 2.0e+00 * njac(3,4,k);
  441. lhsb(3,5,k) = tmp1 * 2.0e+00 * njac(3,5,k);
  442. lhsb(4,1,k) = tmp1 * 2.0e+00 * njac(4,1,k);
  443. lhsb(4,2,k) = tmp1 * 2.0e+00 * njac(4,2,k);
  444. lhsb(4,3,k) = tmp1 * 2.0e+00 * njac(4,3,k);
  445. lhsb(4,4,k) = 1.0e+00
  446. + tmp1 * 2.0e+00 * njac(4,4,k)
  447. + tmp1 * 2.0e+00 * dz4;
  448. lhsb(4,5,k) = tmp1 * 2.0e+00 * njac(4,5,k);
  449. lhsb(5,1,k) = tmp1 * 2.0e+00 * njac(5,1,k);
  450. lhsb(5,2,k) = tmp1 * 2.0e+00 * njac(5,2,k);
  451. lhsb(5,3,k) = tmp1 * 2.0e+00 * njac(5,3,k);
  452. lhsb(5,4,k) = tmp1 * 2.0e+00 * njac(5,4,k);
  453. lhsb(5,5,k) = 1.0e+00
  454. + tmp1 * 2.0e+00 * njac(5,5,k)
  455. + tmp1 * 2.0e+00 * dz5;
  456. lhsc(1,1,i,j,k,c) = tmp2 * fjac(1,1,k+1)
  457. - tmp1 * njac(1,1,k+1)
  458. - tmp1 * dz1;
  459. lhsc(1,2,i,j,k,c) = tmp2 * fjac(1,2,k+1)
  460. - tmp1 * njac(1,2,k+1);
  461. lhsc(1,3,i,j,k,c) = tmp2 * fjac(1,3,k+1)
  462. - tmp1 * njac(1,3,k+1);
  463. lhsc(1,4,i,j,k,c) = tmp2 * fjac(1,4,k+1)
  464. - tmp1 * njac(1,4,k+1);
  465. lhsc(1,5,i,j,k,c) = tmp2 * fjac(1,5,k+1)
  466. - tmp1 * njac(1,5,k+1);
  467. lhsc(2,1,i,j,k,c) = tmp2 * fjac(2,1,k+1)
  468. - tmp1 * njac(2,1,k+1);
  469. lhsc(2,2,i,j,k,c) = tmp2 * fjac(2,2,k+1)
  470. - tmp1 * njac(2,2,k+1)
  471. - tmp1 * dz2;
  472. lhsc(2,3,i,j,k,c) = tmp2 * fjac(2,3,k+1)
  473. - tmp1 * njac(2,3,k+1);
  474. lhsc(2,4,i,j,k,c) = tmp2 * fjac(2,4,k+1)
  475. - tmp1 * njac(2,4,k+1);
  476. lhsc(2,5,i,j,k,c) = tmp2 * fjac(2,5,k+1)
  477. - tmp1 * njac(2,5,k+1);
  478. lhsc(3,1,i,j,k,c) = tmp2 * fjac(3,1,k+1)
  479. - tmp1 * njac(3,1,k+1);
  480. lhsc(3,2,i,j,k,c) = tmp2 * fjac(3,2,k+1)
  481. - tmp1 * njac(3,2,k+1);
  482. lhsc(3,3,i,j,k,c) = tmp2 * fjac(3,3,k+1)
  483. - tmp1 * njac(3,3,k+1)
  484. - tmp1 * dz3;
  485. lhsc(3,4,i,j,k,c) = tmp2 * fjac(3,4,k+1)
  486. - tmp1 * njac(3,4,k+1);
  487. lhsc(3,5,i,j,k,c) = tmp2 * fjac(3,5,k+1)
  488. - tmp1 * njac(3,5,k+1);
  489. lhsc(4,1,i,j,k,c) = tmp2 * fjac(4,1,k+1)
  490. - tmp1 * njac(4,1,k+1);
  491. lhsc(4,2,i,j,k,c) = tmp2 * fjac(4,2,k+1)
  492. - tmp1 * njac(4,2,k+1);
  493. lhsc(4,3,i,j,k,c) = tmp2 * fjac(4,3,k+1)
  494. - tmp1 * njac(4,3,k+1);
  495. lhsc(4,4,i,j,k,c) = tmp2 * fjac(4,4,k+1)
  496. - tmp1 * njac(4,4,k+1)
  497. - tmp1 * dz4;
  498. lhsc(4,5,i,j,k,c) = tmp2 * fjac(4,5,k+1)
  499. - tmp1 * njac(4,5,k+1);
  500. lhsc(5,1,i,j,k,c) = tmp2 * fjac(5,1,k+1)
  501. - tmp1 * njac(5,1,k+1);
  502. lhsc(5,2,i,j,k,c) = tmp2 * fjac(5,2,k+1)
  503. - tmp1 * njac(5,2,k+1);
  504. lhsc(5,3,i,j,k,c) = tmp2 * fjac(5,3,k+1)
  505. - tmp1 * njac(5,3,k+1);
  506. lhsc(5,4,i,j,k,c) = tmp2 * fjac(5,4,k+1)
  507. - tmp1 * njac(5,4,k+1);
  508. lhsc(5,5,i,j,k,c) = tmp2 * fjac(5,5,k+1)
  509. - tmp1 * njac(5,5,k+1)
  510. - tmp1 * dz5;
  511. }
  512. //---------------------------------------------------------------------
  513. // outer most do loops - sweeping in i direction
  514. //---------------------------------------------------------------------
  515. if (first == 1) {
  516. //---------------------------------------------------------------------
  517. // multiply c(i,j,kstart) by b_inverse and copy back to c
  518. // multiply rhs(kstart) by b_inverse(kstart) and copy to rhs
  519. //---------------------------------------------------------------------
  520. binvcrhs( &lhsb(1,1,kstart),
  521. &lhsc(1,1,i,j,kstart,c),
  522. &rhs(1,i,j,kstart,c) );
  523. }
  524. //---------------------------------------------------------------------
  525. // begin inner most do loop
  526. // do all the elements of the cell unless last
  527. //---------------------------------------------------------------------
  528. for (k = kstart+first; k <= ksize-last; k++) {
  529. //---------------------------------------------------------------------
  530. // subtract A*lhs_vector(k-1) from lhs_vector(k)
  531. //
  532. // rhs(k) = rhs(k) - A*rhs(k-1)
  533. //---------------------------------------------------------------------
  534. matvec_sub(&lhsa(1,1,k),
  535. &rhs(1,i,j,k-1,c),&rhs(1,i,j,k,c));
  536. //---------------------------------------------------------------------
  537. // B(k) = B(k) - C(k-1)*A(k)
  538. // call matmul_sub(aa,i,j,k,c,cc,i,j,k-1,c,bb,i,j,k,c)
  539. //---------------------------------------------------------------------
  540. matmul_sub(&lhsa(1,1,k),
  541. &lhsc(1,1,i,j,k-1,c),
  542. &lhsb(1,1,k));
  543. //---------------------------------------------------------------------
  544. // multiply c(i,j,k) by b_inverse and copy back to c
  545. // multiply rhs(i,j,1) by b_inverse(i,j,1) and copy to rhs
  546. //---------------------------------------------------------------------
  547. binvcrhs( &lhsb(1,1,k),
  548. &lhsc(1,1,i,j,k,c),
  549. &rhs(1,i,j,k,c) );
  550. }
  551. //---------------------------------------------------------------------
  552. // Now finish up special cases for last cell
  553. //---------------------------------------------------------------------
  554. if (last == 1) {
  555. //---------------------------------------------------------------------
  556. // rhs(ksize) = rhs(ksize) - A*rhs(ksize-1)
  557. //---------------------------------------------------------------------
  558. matvec_sub(&lhsa(1,1,ksize),
  559. &rhs(1,i,j,ksize-1,c),&rhs(1,i,j,ksize,c));
  560. //---------------------------------------------------------------------
  561. // B(ksize) = B(ksize) - C(ksize-1)*A(ksize)
  562. // call matmul_sub(aa,i,j,ksize,c,
  563. // $ cc,i,j,ksize-1,c,bb,i,j,ksize,c)
  564. //---------------------------------------------------------------------
  565. matmul_sub(&lhsa(1,1,ksize),
  566. &lhsc(1,1,i,j,ksize-1,c),
  567. &lhsb(1,1,ksize));
  568. //---------------------------------------------------------------------
  569. // multiply rhs(ksize) by b_inverse(ksize) and copy to rhs
  570. //---------------------------------------------------------------------
  571. binvrhs( &lhsb(1,1,ksize),
  572. &rhs(1,i,j,ksize,c) );
  573. }
  574. }
  575. }
  576. return;
  577. }