y_solve.c.svn-base 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. //---------------------------------------------------------------------
  2. //
  3. // Copyright 2010 Intel Corporation
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License");
  6. // you may not use this file except in compliance with the License.
  7. // You may obtain a copy of the License at
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS,
  13. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. // See the License for the specific language governing permissions and
  15. // limitations under the License.
  16. //
  17. //---------------------------------------------------------------------
  18. #include "header.h"
  19. #include "mpinpb.h"
  20. #include "work_lhs.h"
  21. extern void y_sendrecv_solve(int c, int cprev);
  22. extern void y_sendrecv_back(int c, int cprev);
  23. extern void y_backsubstitute(int first, int last, int c);
  24. extern void y_solve_cell(int first, int last, int c);
  25. void y_solve() {
  26. //---------------------------------------------------------------------
  27. //---------------------------------------------------------------------
  28. //---------------------------------------------------------------------
  29. // Performs line solves in Y direction by first factoring
  30. // the block-tridiagonal matrix into an upper triangular matrix,
  31. // and then performing back substitution to solve for the unknow
  32. // vectors of each line.
  33. //
  34. // Make sure we treat elements zero to cell_size in the direction
  35. // of the sweep.
  36. //---------------------------------------------------------------------
  37. int c, cprev, stage, first, last, error;
  38. //---------------------------------------------------------------------
  39. // in our terminology stage is the number of the cell in the y-direction
  40. // i.e. stage = 1 means the start of the line stage=ncells means end
  41. //---------------------------------------------------------------------
  42. for (stage = 1; stage <= ncells; stage++) {
  43. c = slice(2,stage);
  44. //---------------------------------------------------------------------
  45. // set last-cell flag
  46. //---------------------------------------------------------------------
  47. first = (stage == 1);
  48. last = (stage == ncells);
  49. if (stage >1) {
  50. cprev = slice(2,stage-1);
  51. y_sendrecv_solve(c, cprev);
  52. }
  53. y_solve_cell(first,last,c);
  54. }
  55. //---------------------------------------------------------------------
  56. // now perform backsubstitution in reverse direction
  57. //---------------------------------------------------------------------
  58. for (stage = ncells; stage >= 1; stage--) {
  59. c = slice(2,stage);
  60. first = (stage == 1);
  61. last = (stage == ncells);
  62. if (stage <ncells) {
  63. cprev = slice(2,stage+1);
  64. y_sendrecv_back(c, cprev);
  65. }
  66. y_backsubstitute(first,last,c);
  67. }
  68. return;
  69. }
  70. void y_sendrecv_solve(int c, int cprev) {
  71. //---------------------------------------------------------------------
  72. // pack up and send C'(jend) and rhs'(jend) for
  73. // all i and k
  74. //---------------------------------------------------------------------
  75. int i,k,m,n,jsize,ptr,jstart;
  76. int phase;
  77. int error,buffer_size;
  78. jsize = cell_size(2,cprev)-1;
  79. buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
  80. (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE);
  81. //---------------------------------------------------------------------
  82. // pack up buffer
  83. //---------------------------------------------------------------------
  84. ptr = 0;
  85. for (k = 0; k <= KMAX-1; k++) {
  86. for (i = 0; i <= IMAX-1; i++) {
  87. for (m = 1; m <= BLOCK_SIZE; m++) {
  88. for (n = 1; n <= BLOCK_SIZE; n++) {
  89. in_buffer(ptr+n) = lhsc(m,n,i,jsize,k,cprev);
  90. }
  91. ptr = ptr+BLOCK_SIZE;
  92. }
  93. for (n = 1; n <= BLOCK_SIZE; n++) {
  94. in_buffer(ptr+n) = rhs(n,i,jsize,k,cprev);
  95. }
  96. ptr = ptr+BLOCK_SIZE;
  97. }
  98. }
  99. //---------------------------------------------------------------------
  100. // send and receive buffer
  101. //---------------------------------------------------------------------
  102. for (phase = 0; phase < 3; phase++) {
  103. if (send_color[NORTHDIR]==phase)
  104. RCCE_send((char*)in_buffer, buffer_size*sizeof(double), successor(2));
  105. if (recv_color[NORTHDIR]==phase)
  106. RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), predecessor(2));
  107. }
  108. //---------------------------------------------------------------------
  109. // unpack buffer
  110. //---------------------------------------------------------------------
  111. jstart = 0;
  112. ptr = 0;
  113. for (k = 0; k <= KMAX-1; k++) {
  114. for (i = 0; i <= IMAX-1; i++) {
  115. for (m = 1; m <= BLOCK_SIZE; m++) {
  116. for (n = 1; n <= BLOCK_SIZE; n++) {
  117. lhsc(m,n,i,jstart-1,k,c) = out_buffer(ptr+n);
  118. }
  119. ptr = ptr+BLOCK_SIZE;
  120. }
  121. for (n = 1; n <= BLOCK_SIZE; n++) {
  122. rhs(n,i,jstart-1,k,c) = out_buffer(ptr+n);
  123. }
  124. ptr = ptr+BLOCK_SIZE;
  125. }
  126. }
  127. return;
  128. }
  129. //---------------------------------------------------------------------
  130. //---------------------------------------------------------------------
  131. void y_sendrecv_back(int c, int cprev) {
  132. //---------------------------------------------------------------------
  133. // pack up and send U(jstart) for all i and k
  134. //---------------------------------------------------------------------
  135. int i,k,n,ptr,jstart;
  136. int phase;
  137. int error,buffer_size;
  138. //---------------------------------------------------------------------
  139. // Send element 0 to previous processor
  140. //---------------------------------------------------------------------
  141. jstart = 0;
  142. buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE;
  143. ptr = 0;
  144. for (k = 0; k <= KMAX-1; k++) {
  145. for (i = 0; i <= IMAX-1; i++) {
  146. for (n = 1; n <= BLOCK_SIZE; n++) {
  147. in_buffer(ptr+n) = rhs(n,i,jstart,k,cprev);
  148. }
  149. ptr = ptr+BLOCK_SIZE;
  150. }
  151. }
  152. //---------------------------------------------------------------------
  153. // send and receive buffer
  154. //---------------------------------------------------------------------
  155. for (phase = 0; phase < 3; phase++) {
  156. if (send_color[SOUTHDIR]==phase)
  157. RCCE_send((char*)in_buffer, buffer_size*sizeof(double), predecessor(2));
  158. if (recv_color[SOUTHDIR]==phase)
  159. RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), successor(2));
  160. }
  161. //---------------------------------------------------------------------
  162. // unpack U(jsize) for all i and k
  163. //---------------------------------------------------------------------
  164. ptr = 0;
  165. for (k = 0; k <= KMAX-1; k++) {
  166. for (i = 0; i <= IMAX-1; i++) {
  167. for (n = 1; n <= BLOCK_SIZE; n++) {
  168. backsub_info(n,i,k,c) = out_buffer(ptr+n);
  169. }
  170. ptr = ptr+BLOCK_SIZE;
  171. }
  172. }
  173. return;
  174. }
  175. void y_backsubstitute(int first, int last, int c) {
  176. //---------------------------------------------------------------------
  177. //---------------------------------------------------------------------
  178. //---------------------------------------------------------------------
  179. // back solve: if last cell, then generate U(jsize)=rhs(jsize)
  180. // else assume U(jsize) is loaded in un pack backsub_info
  181. // so just use it
  182. // after call u(jstart) will be sent to next cell
  183. //---------------------------------------------------------------------
  184. int i, k;
  185. int m,n,j,jsize,isize,ksize,jstart;
  186. jstart = 0;
  187. isize = cell_size(1,c)-end(1,c)-1 ;
  188. jsize = cell_size(2,c)-1;
  189. ksize = cell_size(3,c)-end(3,c)-1;
  190. if (last == 0) {
  191. for (k = start(3,c); k <= ksize; k++) {
  192. for (i = start(1,c); i <= isize; i++) {
  193. //---------------------------------------------------------------------
  194. // U(jsize) uses info from previous cell if not last cell
  195. //---------------------------------------------------------------------
  196. for (m = 1; m <= BLOCK_SIZE; m++) {
  197. for (n = 1; n <= BLOCK_SIZE; n++) {
  198. rhs(m,i,jsize,k,c) = rhs(m,i,jsize,k,c)
  199. - lhsc(m,n,i,jsize,k,c)*
  200. backsub_info(n,i,k,c);
  201. }
  202. }
  203. }
  204. }
  205. }
  206. for (k = start(3,c); k <= ksize; k++) {
  207. for (j = jsize-1; j >= jstart; j--) {
  208. for (i = start(1,c); i <= isize; i++) {
  209. for (m = 1; m <= BLOCK_SIZE; m++) {
  210. for (n = 1; n <= BLOCK_SIZE; n++) {
  211. rhs(m,i,j,k,c) = rhs(m,i,j,k,c)
  212. - lhsc(m,n,i,j,k,c)*rhs(n,i,j+1,k,c);
  213. }
  214. }
  215. }
  216. }
  217. }
  218. return;
  219. }
  220. //---------------------------------------------------------------------
  221. //---------------------------------------------------------------------
  222. void y_solve_cell(int first,int last,int c) {
  223. //---------------------------------------------------------------------
  224. //---------------------------------------------------------------------
  225. //---------------------------------------------------------------------
  226. // performs guaussian elimination on this cell.
  227. //
  228. // assumes that unpacking routines for non-first cells
  229. // preload C' and rhs' from previous cell.
  230. //
  231. // assumed send happens outside this routine, but that
  232. // c'(JMAX) and rhs'(JMAX) will be sent to next cell
  233. //---------------------------------------------------------------------
  234. int i,j,k,isize,ksize,jsize,jstart;
  235. double utmp[6*(JMAX+4)];
  236. #define utmp(m,i) utmp[(m-1)+6*(i+2)]
  237. jstart = 0;
  238. isize = cell_size(1,c)-end(1,c)-1;
  239. jsize = cell_size(2,c)-1;
  240. ksize = cell_size(3,c)-end(3,c)-1;
  241. lhsabinit(lhsa, lhsb, jsize);
  242. for (k = start(3,c); k <= ksize; k++) {
  243. for (i = start(1,c); i <= isize; i++) {
  244. //---------------------------------------------------------------------
  245. // This function computes the left hand side for the three y-factors
  246. //---------------------------------------------------------------------
  247. //---------------------------------------------------------------------
  248. // Compute the indices for storing the tri-diagonal matrix;
  249. // determine a (labeled f) and n jacobians for cell c
  250. //---------------------------------------------------------------------
  251. for (j = start(2,c)-1; j <= cell_size(2,c)-end(2,c); j++) {
  252. utmp(1,j) = 1.0e0 / u(1,i,j,k,c);
  253. utmp(2,j) = u(2,i,j,k,c);
  254. utmp(3,j) = u(3,i,j,k,c);
  255. utmp(4,j) = u(4,i,j,k,c);
  256. utmp(5,j) = u(5,i,j,k,c);
  257. utmp(6,j) = qs(i,j,k,c);
  258. }
  259. for (j = start(2,c)-1; j <= cell_size(2,c)-end(2,c); j++) {
  260. tmp1 = utmp(1,j);
  261. tmp2 = tmp1 * tmp1;
  262. tmp3 = tmp1 * tmp2;
  263. fjac(1,1,j) = 0.0e+00;
  264. fjac(1,2,j) = 0.0e+00;
  265. fjac(1,3,j) = 1.0e+00;
  266. fjac(1,4,j) = 0.0e+00;
  267. fjac(1,5,j) = 0.0e+00;
  268. fjac(2,1,j) = - ( utmp(2,j)*utmp(3,j) )
  269. * tmp2;
  270. fjac(2,2,j) = utmp(3,j) * tmp1;
  271. fjac(2,3,j) = utmp(2,j) * tmp1;
  272. fjac(2,4,j) = 0.0e+00;
  273. fjac(2,5,j) = 0.0e+00;
  274. fjac(3,1,j) = - ( utmp(3,j)*utmp(3,j)*tmp2)
  275. + c2 * utmp(6,j);
  276. fjac(3,2,j) = - c2 * utmp(2,j) * tmp1;
  277. fjac(3,3,j) = ( 2.0e+00 - c2 )
  278. * utmp(3,j) * tmp1 ;
  279. fjac(3,4,j) = - c2 * utmp(4,j) * tmp1 ;
  280. fjac(3,5,j) = c2;
  281. fjac(4,1,j) = - ( utmp(3,j)*utmp(4,j) )
  282. * tmp2;
  283. fjac(4,2,j) = 0.0e+00;
  284. fjac(4,3,j) = utmp(4,j) * tmp1;
  285. fjac(4,4,j) = utmp(3,j) * tmp1;
  286. fjac(4,5,j) = 0.0e+00;
  287. fjac(5,1,j) = ( c2 * 2.0e0 * utmp(6,j)
  288. - c1 * utmp(5,j) * tmp1 )
  289. * utmp(3,j) * tmp1 ;
  290. fjac(5,2,j) = - c2 * utmp(2,j)*utmp(3,j)
  291. * tmp2;
  292. fjac(5,3,j) = c1 * utmp(5,j) * tmp1
  293. - c2 * ( utmp(6,j)
  294. + utmp(3,j)*utmp(3,j) * tmp2 );
  295. fjac(5,4,j) = - c2 * ( utmp(3,j)*utmp(4,j) )
  296. * tmp2;
  297. fjac(5,5,j) = c1 * utmp(3,j) * tmp1 ;
  298. njac(1,1,j) = 0.0e+00;
  299. njac(1,2,j) = 0.0e+00;
  300. njac(1,3,j) = 0.0e+00;
  301. njac(1,4,j) = 0.0e+00;
  302. njac(1,5,j) = 0.0e+00;
  303. njac(2,1,j) = - c3c4 * tmp2 * utmp(2,j);
  304. njac(2,2,j) = c3c4 * tmp1;
  305. njac(2,3,j) = 0.0e+00;
  306. njac(2,4,j) = 0.0e+00;
  307. njac(2,5,j) = 0.0e+00;
  308. njac(3,1,j) = - con43 * c3c4 * tmp2 * utmp(3,j);
  309. njac(3,2,j) = 0.0e+00;
  310. njac(3,3,j) = con43 * c3c4 * tmp1;
  311. njac(3,4,j) = 0.0e+00;
  312. njac(3,5,j) = 0.0e+00;
  313. njac(4,1,j) = - c3c4 * tmp2 * utmp(4,j);
  314. njac(4,2,j) = 0.0e+00;
  315. njac(4,3,j) = 0.0e+00;
  316. njac(4,4,j) = c3c4 * tmp1;
  317. njac(4,5,j) = 0.0e+00;
  318. njac(5,1,j) = - ( c3c4
  319. - c1345 ) * tmp3 * SQR(utmp(2,j))
  320. - ( con43 * c3c4
  321. - c1345 ) * tmp3 * SQR(utmp(3,j))
  322. - ( c3c4 - c1345 ) * tmp3 * SQR(utmp(4,j))
  323. - c1345 * tmp2 * utmp(5,j);
  324. njac(5,2,j) = ( c3c4 - c1345 ) * tmp2 * utmp(2,j);
  325. njac(5,3,j) = ( con43 * c3c4
  326. - c1345 ) * tmp2 * utmp(3,j);
  327. njac(5,4,j) = ( c3c4 - c1345 ) * tmp2 * utmp(4,j);
  328. njac(5,5,j) = ( c1345 ) * tmp1;
  329. }
  330. //---------------------------------------------------------------------
  331. // now joacobians set, so form left hand side in y direction
  332. //---------------------------------------------------------------------
  333. for (j = start(2,c); j <= jsize-end(2,c); j++) {
  334. tmp1 = dt * ty1;
  335. tmp2 = dt * ty2;
  336. lhsa(1,1,j) = - tmp2 * fjac(1,1,j-1)
  337. - tmp1 * njac(1,1,j-1)
  338. - tmp1 * dy1 ;
  339. lhsa(1,2,j) = - tmp2 * fjac(1,2,j-1)
  340. - tmp1 * njac(1,2,j-1);
  341. lhsa(1,3,j) = - tmp2 * fjac(1,3,j-1)
  342. - tmp1 * njac(1,3,j-1);
  343. lhsa(1,4,j) = - tmp2 * fjac(1,4,j-1)
  344. - tmp1 * njac(1,4,j-1);
  345. lhsa(1,5,j) = - tmp2 * fjac(1,5,j-1)
  346. - tmp1 * njac(1,5,j-1);
  347. lhsa(2,1,j) = - tmp2 * fjac(2,1,j-1)
  348. - tmp1 * njac(2,1,j-1);
  349. lhsa(2,2,j) = - tmp2 * fjac(2,2,j-1)
  350. - tmp1 * njac(2,2,j-1)
  351. - tmp1 * dy2;
  352. lhsa(2,3,j) = - tmp2 * fjac(2,3,j-1)
  353. - tmp1 * njac(2,3,j-1);
  354. lhsa(2,4,j) = - tmp2 * fjac(2,4,j-1)
  355. - tmp1 * njac(2,4,j-1);
  356. lhsa(2,5,j) = - tmp2 * fjac(2,5,j-1)
  357. - tmp1 * njac(2,5,j-1);
  358. lhsa(3,1,j) = - tmp2 * fjac(3,1,j-1)
  359. - tmp1 * njac(3,1,j-1);
  360. lhsa(3,2,j) = - tmp2 * fjac(3,2,j-1)
  361. - tmp1 * njac(3,2,j-1);
  362. lhsa(3,3,j) = - tmp2 * fjac(3,3,j-1)
  363. - tmp1 * njac(3,3,j-1)
  364. - tmp1 * dy3 ;
  365. lhsa(3,4,j) = - tmp2 * fjac(3,4,j-1)
  366. - tmp1 * njac(3,4,j-1);
  367. lhsa(3,5,j) = - tmp2 * fjac(3,5,j-1)
  368. - tmp1 * njac(3,5,j-1);
  369. lhsa(4,1,j) = - tmp2 * fjac(4,1,j-1)
  370. - tmp1 * njac(4,1,j-1);
  371. lhsa(4,2,j) = - tmp2 * fjac(4,2,j-1)
  372. - tmp1 * njac(4,2,j-1);
  373. lhsa(4,3,j) = - tmp2 * fjac(4,3,j-1)
  374. - tmp1 * njac(4,3,j-1);
  375. lhsa(4,4,j) = - tmp2 * fjac(4,4,j-1)
  376. - tmp1 * njac(4,4,j-1)
  377. - tmp1 * dy4;
  378. lhsa(4,5,j) = - tmp2 * fjac(4,5,j-1)
  379. - tmp1 * njac(4,5,j-1);
  380. lhsa(5,1,j) = - tmp2 * fjac(5,1,j-1)
  381. - tmp1 * njac(5,1,j-1);
  382. lhsa(5,2,j) = - tmp2 * fjac(5,2,j-1)
  383. - tmp1 * njac(5,2,j-1);
  384. lhsa(5,3,j) = - tmp2 * fjac(5,3,j-1)
  385. - tmp1 * njac(5,3,j-1);
  386. lhsa(5,4,j) = - tmp2 * fjac(5,4,j-1)
  387. - tmp1 * njac(5,4,j-1);
  388. lhsa(5,5,j) = - tmp2 * fjac(5,5,j-1)
  389. - tmp1 * njac(5,5,j-1)
  390. - tmp1 * dy5;
  391. lhsb(1,1,j) = 1.0e+00
  392. + tmp1 * 2.0e+00 * njac(1,1,j)
  393. + tmp1 * 2.0e+00 * dy1;
  394. lhsb(1,2,j) = tmp1 * 2.0e+00 * njac(1,2,j);
  395. lhsb(1,3,j) = tmp1 * 2.0e+00 * njac(1,3,j);
  396. lhsb(1,4,j) = tmp1 * 2.0e+00 * njac(1,4,j);
  397. lhsb(1,5,j) = tmp1 * 2.0e+00 * njac(1,5,j);
  398. lhsb(2,1,j) = tmp1 * 2.0e+00 * njac(2,1,j);
  399. lhsb(2,2,j) = 1.0e+00
  400. + tmp1 * 2.0e+00 * njac(2,2,j)
  401. + tmp1 * 2.0e+00 * dy2;
  402. lhsb(2,3,j) = tmp1 * 2.0e+00 * njac(2,3,j);
  403. lhsb(2,4,j) = tmp1 * 2.0e+00 * njac(2,4,j);
  404. lhsb(2,5,j) = tmp1 * 2.0e+00 * njac(2,5,j);
  405. lhsb(3,1,j) = tmp1 * 2.0e+00 * njac(3,1,j);
  406. lhsb(3,2,j) = tmp1 * 2.0e+00 * njac(3,2,j);
  407. lhsb(3,3,j) = 1.0e+00
  408. + tmp1 * 2.0e+00 * njac(3,3,j)
  409. + tmp1 * 2.0e+00 * dy3;
  410. lhsb(3,4,j) = tmp1 * 2.0e+00 * njac(3,4,j);
  411. lhsb(3,5,j) = tmp1 * 2.0e+00 * njac(3,5,j);
  412. lhsb(4,1,j) = tmp1 * 2.0e+00 * njac(4,1,j);
  413. lhsb(4,2,j) = tmp1 * 2.0e+00 * njac(4,2,j);
  414. lhsb(4,3,j) = tmp1 * 2.0e+00 * njac(4,3,j);
  415. lhsb(4,4,j) = 1.0e+00
  416. + tmp1 * 2.0e+00 * njac(4,4,j)
  417. + tmp1 * 2.0e+00 * dy4;
  418. lhsb(4,5,j) = tmp1 * 2.0e+00 * njac(4,5,j);
  419. lhsb(5,1,j) = tmp1 * 2.0e+00 * njac(5,1,j);
  420. lhsb(5,2,j) = tmp1 * 2.0e+00 * njac(5,2,j);
  421. lhsb(5,3,j) = tmp1 * 2.0e+00 * njac(5,3,j);
  422. lhsb(5,4,j) = tmp1 * 2.0e+00 * njac(5,4,j);
  423. lhsb(5,5,j) = 1.0e+00
  424. + tmp1 * 2.0e+00 * njac(5,5,j)
  425. + tmp1 * 2.0e+00 * dy5;
  426. lhsc(1,1,i,j,k,c) = tmp2 * fjac(1,1,j+1)
  427. - tmp1 * njac(1,1,j+1)
  428. - tmp1 * dy1;
  429. lhsc(1,2,i,j,k,c) = tmp2 * fjac(1,2,j+1)
  430. - tmp1 * njac(1,2,j+1);
  431. lhsc(1,3,i,j,k,c) = tmp2 * fjac(1,3,j+1)
  432. - tmp1 * njac(1,3,j+1);
  433. lhsc(1,4,i,j,k,c) = tmp2 * fjac(1,4,j+1)
  434. - tmp1 * njac(1,4,j+1);
  435. lhsc(1,5,i,j,k,c) = tmp2 * fjac(1,5,j+1)
  436. - tmp1 * njac(1,5,j+1);
  437. lhsc(2,1,i,j,k,c) = tmp2 * fjac(2,1,j+1)
  438. - tmp1 * njac(2,1,j+1);
  439. lhsc(2,2,i,j,k,c) = tmp2 * fjac(2,2,j+1)
  440. - tmp1 * njac(2,2,j+1)
  441. - tmp1 * dy2;
  442. lhsc(2,3,i,j,k,c) = tmp2 * fjac(2,3,j+1)
  443. - tmp1 * njac(2,3,j+1);
  444. lhsc(2,4,i,j,k,c) = tmp2 * fjac(2,4,j+1)
  445. - tmp1 * njac(2,4,j+1);
  446. lhsc(2,5,i,j,k,c) = tmp2 * fjac(2,5,j+1)
  447. - tmp1 * njac(2,5,j+1);
  448. lhsc(3,1,i,j,k,c) = tmp2 * fjac(3,1,j+1)
  449. - tmp1 * njac(3,1,j+1);
  450. lhsc(3,2,i,j,k,c) = tmp2 * fjac(3,2,j+1)
  451. - tmp1 * njac(3,2,j+1);
  452. lhsc(3,3,i,j,k,c) = tmp2 * fjac(3,3,j+1)
  453. - tmp1 * njac(3,3,j+1)
  454. - tmp1 * dy3;
  455. lhsc(3,4,i,j,k,c) = tmp2 * fjac(3,4,j+1)
  456. - tmp1 * njac(3,4,j+1);
  457. lhsc(3,5,i,j,k,c) = tmp2 * fjac(3,5,j+1)
  458. - tmp1 * njac(3,5,j+1);
  459. lhsc(4,1,i,j,k,c) = tmp2 * fjac(4,1,j+1)
  460. - tmp1 * njac(4,1,j+1);
  461. lhsc(4,2,i,j,k,c) = tmp2 * fjac(4,2,j+1)
  462. - tmp1 * njac(4,2,j+1);
  463. lhsc(4,3,i,j,k,c) = tmp2 * fjac(4,3,j+1)
  464. - tmp1 * njac(4,3,j+1);
  465. lhsc(4,4,i,j,k,c) = tmp2 * fjac(4,4,j+1)
  466. - tmp1 * njac(4,4,j+1)
  467. - tmp1 * dy4;
  468. lhsc(4,5,i,j,k,c) = tmp2 * fjac(4,5,j+1)
  469. - tmp1 * njac(4,5,j+1);
  470. lhsc(5,1,i,j,k,c) = tmp2 * fjac(5,1,j+1)
  471. - tmp1 * njac(5,1,j+1);
  472. lhsc(5,2,i,j,k,c) = tmp2 * fjac(5,2,j+1)
  473. - tmp1 * njac(5,2,j+1);
  474. lhsc(5,3,i,j,k,c) = tmp2 * fjac(5,3,j+1)
  475. - tmp1 * njac(5,3,j+1);
  476. lhsc(5,4,i,j,k,c) = tmp2 * fjac(5,4,j+1)
  477. - tmp1 * njac(5,4,j+1);
  478. lhsc(5,5,i,j,k,c) = tmp2 * fjac(5,5,j+1)
  479. - tmp1 * njac(5,5,j+1)
  480. - tmp1 * dy5;
  481. }
  482. //---------------------------------------------------------------------
  483. // outer most do loops - sweeping in i direction
  484. //---------------------------------------------------------------------
  485. if (first == 1) {
  486. //---------------------------------------------------------------------
  487. // multiply c(i,jstart,k) by b_inverse and copy back to c
  488. // multiply rhs(jstart) by b_inverse(jstart) and copy to rhs
  489. //---------------------------------------------------------------------
  490. binvcrhs( &lhsb(1,1,jstart),
  491. &lhsc(1,1,i,jstart,k,c),
  492. &rhs(1,i,jstart,k,c) );
  493. }
  494. //---------------------------------------------------------------------
  495. // begin inner most do loop
  496. // do all the elements of the cell unless last
  497. //---------------------------------------------------------------------
  498. for (j = jstart+first; j <= jsize-last; j++) {
  499. //---------------------------------------------------------------------
  500. // subtract A*lhs_vector(j-1) from lhs_vector(j)
  501. //
  502. // rhs(j) = rhs(j) - A*rhs(j-1)
  503. //---------------------------------------------------------------------
  504. matvec_sub(&lhsa(1,1,j),
  505. &rhs(1,i,j-1,k,c),&rhs(1,i,j,k,c));
  506. //---------------------------------------------------------------------
  507. // B(j) = B(j) - C(j-1)*A(j)
  508. //---------------------------------------------------------------------
  509. matmul_sub(&lhsa(1,1,j),
  510. &lhsc(1,1,i,j-1,k,c),
  511. &lhsb(1,1,j));
  512. //---------------------------------------------------------------------
  513. // multiply c(i,j,k) by b_inverse and copy back to c
  514. // multiply rhs(i,1,k) by b_inverse(i,1,k) and copy to rhs
  515. //---------------------------------------------------------------------
  516. binvcrhs( &lhsb(1,1,j),
  517. &lhsc(1,1,i,j,k,c),
  518. &rhs(1,i,j,k,c) );
  519. }
  520. //---------------------------------------------------------------------
  521. // Now finish up special cases for last cell
  522. //---------------------------------------------------------------------
  523. if (last == 1) {
  524. //---------------------------------------------------------------------
  525. // rhs(jsize) = rhs(jsize) - A*rhs(jsize-1)
  526. //---------------------------------------------------------------------
  527. matvec_sub(&lhsa(1,1,jsize),
  528. &rhs(1,i,jsize-1,k,c),&rhs(1,i,jsize,k,c));
  529. //---------------------------------------------------------------------
  530. // B(jsize) = B(jsize) - C(jsize-1)*A(jsize)
  531. // call matmul_sub(aa,i,jsize,k,c,
  532. // $ cc,i,jsize-1,k,c,bb,i,jsize,k,c)
  533. //---------------------------------------------------------------------
  534. matmul_sub(&lhsa(1,1,jsize),
  535. &lhsc(1,1,i,jsize-1,k,c),
  536. &lhsb(1,1,jsize));
  537. //---------------------------------------------------------------------
  538. // multiply rhs(jsize) by b_inverse(jsize) and copy to rhs
  539. //---------------------------------------------------------------------
  540. binvrhs( &lhsb(1,1,jsize),
  541. &rhs(1,i,jsize,k,c) );
  542. }
  543. }
  544. }
  545. return;
  546. }