x_solve.c.svn-base 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. //---------------------------------------------------------------------
  2. //
  3. // Copyright 2010 Intel Corporation
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License");
  6. // you may not use this file except in compliance with the License.
  7. // You may obtain a copy of the License at
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS,
  13. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. // See the License for the specific language governing permissions and
  15. // limitations under the License.
  16. //
  17. //---------------------------------------------------------------------
  18. #include "header.h"
  19. #include "mpinpb.h"
  20. #define G_MAIN
  21. #include "work_lhs.h"
  22. #undef G_MAIN
  23. extern void x_sendrecv_solve(int c, int cprev);
  24. extern void x_sendrecv_back(int c, int cprev);
  25. extern void x_backsubstitute(int first, int last, int c);
  26. extern void x_solve_cell(int first, int last, int c);
  27. void x_solve() {
  28. //---------------------------------------------------------------------
  29. //---------------------------------------------------------------------
  30. //---------------------------------------------------------------------
  31. // Performs line solves in X direction by first factoring
  32. // the block-tridiagonal matrix into an upper triangular matrix,
  33. // and then performing back substitution to solve for the unknown
  34. // vectors of each line.
  35. //
  36. // Make sure we treat elements zero to cell_size in the direction
  37. // of the sweep.
  38. //---------------------------------------------------------------------
  39. int c, cprev, stage, first, last, error;
  40. //---------------------------------------------------------------------
  41. // in our terminology stage is the number of the cell in the x-direction
  42. // i.e. stage = 1 means the start of the line stage=ncells means end
  43. //---------------------------------------------------------------------
  44. for (stage = 1; stage <= ncells; stage++) {
  45. c = slice(1,stage);
  46. //---------------------------------------------------------------------
  47. // set first/last-cell flags
  48. //---------------------------------------------------------------------
  49. first = (stage == 1);
  50. last = (stage == ncells);
  51. if (stage >1) {
  52. cprev = slice(1,stage-1);
  53. x_sendrecv_solve(c, cprev);
  54. }
  55. x_solve_cell(first,last,c);
  56. }
  57. //---------------------------------------------------------------------
  58. // now perform backsubstitution in reverse direction
  59. //---------------------------------------------------------------------
  60. for (stage = ncells; stage >= 1; stage--) {
  61. c = slice(1,stage);
  62. first = (stage == 1);
  63. last = (stage == ncells);
  64. if (stage <ncells) {
  65. cprev = slice(1,stage+1);
  66. x_sendrecv_back(c, cprev);
  67. }
  68. x_backsubstitute(first,last,c);
  69. }
  70. return;
  71. }
  72. void x_sendrecv_solve(int c, int cprev) {
  73. //---------------------------------------------------------------------
  74. // pack up and send C'(iend) and rhs'(iend) for
  75. // all j and k of previous cell
  76. //---------------------------------------------------------------------
  77. int j,k,m,n,isize,ptr, istart;
  78. int phase;
  79. int error, buffer_size;
  80. isize = cell_size(1,cprev)-1;
  81. buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
  82. (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE);
  83. //---------------------------------------------------------------------
  84. // pack up buffer
  85. //---------------------------------------------------------------------
  86. ptr = 0;
  87. for (k = 0; k <= KMAX-1; k++) {
  88. for (j = 0; j <= JMAX-1; j++) {
  89. for (m = 1; m <= BLOCK_SIZE; m++) {
  90. for (n = 1; n <= BLOCK_SIZE; n++) {
  91. in_buffer(ptr+n) = lhsc(m,n,isize,j,k,cprev);
  92. }
  93. ptr = ptr+BLOCK_SIZE;
  94. }
  95. for (n = 1; n <= BLOCK_SIZE; n++) {
  96. in_buffer(ptr+n) = rhs(n,isize,j,k,cprev);
  97. }
  98. ptr = ptr+BLOCK_SIZE;
  99. }
  100. }
  101. //---------------------------------------------------------------------
  102. // send and receive buffer
  103. //---------------------------------------------------------------------
  104. for (phase = 0; phase < 3; phase++) {
  105. if (send_color[EASTDIR]==phase)
  106. RCCE_send((char*)in_buffer, buffer_size*sizeof(double), successor(1));
  107. if (recv_color[EASTDIR]==phase)
  108. RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), predecessor(1));
  109. }
  110. //---------------------------------------------------------------------
  111. // unpack buffer
  112. //---------------------------------------------------------------------
  113. istart = 0;
  114. ptr = 0;
  115. for (k = 0; k <= KMAX-1; k++) {
  116. for (j = 0; j <= JMAX-1; j++) {
  117. for (m = 1; m <= BLOCK_SIZE; m++) {
  118. for (n = 1; n <= BLOCK_SIZE; n++) {
  119. lhsc(m,n,istart-1,j,k,c) = out_buffer(ptr+n);
  120. }
  121. ptr = ptr+BLOCK_SIZE;
  122. }
  123. for (n = 1; n <= BLOCK_SIZE; n++) {
  124. rhs(n,istart-1,j,k,c) = out_buffer(ptr+n);
  125. }
  126. ptr = ptr+BLOCK_SIZE;
  127. }
  128. }
  129. return;
  130. }
  131. //---------------------------------------------------------------------
  132. //---------------------------------------------------------------------
  133. void x_sendrecv_back(int c, int cprev) {
  134. //---------------------------------------------------------------------
  135. // pack up and send U(istart) for all j and k
  136. //---------------------------------------------------------------------
  137. int j,k,n,ptr,istart,jp,kp;
  138. int phase;
  139. int error, buffer_size;
  140. //---------------------------------------------------------------------
  141. // Send element 0 to previous processor
  142. //---------------------------------------------------------------------
  143. istart = 0;
  144. buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE;
  145. ptr = 0;
  146. for (k = 0; k <= KMAX-1; k++) {
  147. for (j = 0; j <= JMAX-1; j++) {
  148. for (n = 1; n <= BLOCK_SIZE; n++) {
  149. in_buffer(ptr+n) = rhs(n,istart,j,k,cprev);
  150. }
  151. ptr = ptr+BLOCK_SIZE;
  152. }
  153. }
  154. //---------------------------------------------------------------------
  155. // send and receive buffer
  156. //---------------------------------------------------------------------
  157. for (phase = 0; phase < 3; phase++) {
  158. if (send_color[WESTDIR]==phase)
  159. RCCE_send((char*)in_buffer, buffer_size*sizeof(double), predecessor(1));
  160. if (recv_color[WESTDIR]==phase)
  161. RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), successor(1));
  162. }
  163. //---------------------------------------------------------------------
  164. // unpack U(isize) for all j and k
  165. //---------------------------------------------------------------------
  166. ptr = 0;
  167. for (k = 0; k <= KMAX-1; k++) {
  168. for (j = 0; j <= JMAX-1; j++) {
  169. for (n = 1; n <= BLOCK_SIZE; n++) {
  170. backsub_info(n,j,k,c) = out_buffer(ptr+n);
  171. }
  172. ptr = ptr+BLOCK_SIZE;
  173. }
  174. }
  175. return;
  176. }
  177. void x_backsubstitute(int first, int last, int c) {
  178. //---------------------------------------------------------------------
  179. //---------------------------------------------------------------------
  180. //---------------------------------------------------------------------
  181. // back solve: if last cell, then generate U(isize)=rhs(isize)
  182. // else assume U(isize) is loaded in un pack backsub_info
  183. // so just use it
  184. // after call u(istart) will be sent to next cell
  185. //---------------------------------------------------------------------
  186. int i, j, k;
  187. int m,n,isize,jsize,ksize,istart;
  188. istart = 0;
  189. isize = cell_size(1,c)-1;
  190. jsize = cell_size(2,c)-end(2,c)-1 ;
  191. ksize = cell_size(3,c)-end(3,c)-1;
  192. if (last == 0) {
  193. for (k = start(3,c); k <= ksize; k++) {
  194. for (j = start(2,c); j <= jsize; j++) {
  195. //---------------------------------------------------------------------
  196. // U(isize) uses info from previous cell if not last cell
  197. //---------------------------------------------------------------------
  198. for (m = 1; m <= BLOCK_SIZE; m++) {
  199. for (n = 1; n <= BLOCK_SIZE; n++) {
  200. rhs(m,isize,j,k,c) = rhs(m,isize,j,k,c)
  201. - lhsc(m,n,isize,j,k,c)*
  202. backsub_info(n,j,k,c);
  203. }
  204. }
  205. }
  206. }
  207. }
  208. for (k = start(3,c); k <= ksize; k++) {
  209. for (j = start(2,c); j <= jsize; j++) {
  210. for (i = isize-1; i >= istart; i--) {
  211. for (m = 1; m <= BLOCK_SIZE; m++) {
  212. for (n = 1; n <= BLOCK_SIZE; n++) {
  213. rhs(m,i,j,k,c) = rhs(m,i,j,k,c)
  214. - lhsc(m,n,i,j,k,c)*rhs(n,i+1,j,k,c);
  215. }
  216. }
  217. }
  218. }
  219. }
  220. return;
  221. }
  222. //---------------------------------------------------------------------
  223. //---------------------------------------------------------------------
  224. void x_solve_cell(int first, int last, int c) {
  225. //---------------------------------------------------------------------
  226. //---------------------------------------------------------------------
  227. //---------------------------------------------------------------------
  228. // performs guaussian elimination on this cell.
  229. //
  230. // assumes that unpacking routines for non-first cells
  231. // preload C' and rhs' from previous cell.
  232. //
  233. // assumed send happens outside this routine, but that
  234. // c'(IMAX) and rhs'(IMAX) will be sent to next cell
  235. //---------------------------------------------------------------------
  236. int i,j,k,isize,ksize,jsize,istart;
  237. istart = 0;
  238. isize = cell_size(1,c)-1;
  239. jsize = cell_size(2,c)-end(2,c)-1;
  240. ksize = cell_size(3,c)-end(3,c)-1;
  241. lhsabinit(lhsa, lhsb, isize);
  242. for (k = start(3,c); k <= ksize; k++) {
  243. for (j = start(2,c); j <= jsize; j++) {
  244. //---------------------------------------------------------------------
  245. // This function computes the left hand side in the xi-direction
  246. //---------------------------------------------------------------------
  247. //---------------------------------------------------------------------
  248. // determine a (labeled f) and n jacobians for cell c
  249. //---------------------------------------------------------------------
  250. for (i = start(1,c)-1; i <= cell_size(1,c) - end(1,c); i++) {
  251. tmp1 = rho_i(i,j,k,c);
  252. tmp2 = tmp1 * tmp1;
  253. tmp3 = tmp1 * tmp2;
  254. //---------------------------------------------------------------------
  255. //
  256. //---------------------------------------------------------------------
  257. fjac(1,1,i) = 0.0e+00;
  258. fjac(1,2,i) = 1.0e+00;
  259. fjac(1,3,i) = 0.0e+00;
  260. fjac(1,4,i) = 0.0e+00;
  261. fjac(1,5,i) = 0.0e+00;
  262. fjac(2,1,i) = -(u(2,i,j,k,c) * tmp2 *
  263. u(2,i,j,k,c))
  264. + c2 * qs(i,j,k,c);
  265. fjac(2,2,i) = ( 2.0e+00 - c2 )
  266. * ( u(2,i,j,k,c) * tmp1 );
  267. fjac(2,3,i) = - c2 * ( u(3,i,j,k,c) * tmp1 );
  268. fjac(2,4,i) = - c2 * ( u(4,i,j,k,c) * tmp1 );
  269. fjac(2,5,i) = c2;
  270. fjac(3,1,i) = - ( u(2,i,j,k,c)*u(3,i,j,k,c) ) * tmp2;
  271. fjac(3,2,i) = u(3,i,j,k,c) * tmp1;
  272. fjac(3,3,i) = u(2,i,j,k,c) * tmp1;
  273. fjac(3,4,i) = 0.0e+00;
  274. fjac(3,5,i) = 0.0e+00;
  275. fjac(4,1,i) = - ( u(2,i,j,k,c)*u(4,i,j,k,c) ) * tmp2;
  276. fjac(4,2,i) = u(4,i,j,k,c) * tmp1;
  277. fjac(4,3,i) = 0.0e+00;
  278. fjac(4,4,i) = u(2,i,j,k,c) * tmp1;
  279. fjac(4,5,i) = 0.0e+00;
  280. fjac(5,1,i) = ( c2 * 2.0e0 * qs(i,j,k,c)
  281. - c1 * ( u(5,i,j,k,c) * tmp1 ) )
  282. * ( u(2,i,j,k,c) * tmp1 );
  283. fjac(5,2,i) = c1 * u(5,i,j,k,c) * tmp1
  284. - c2
  285. * ( u(2,i,j,k,c)*u(2,i,j,k,c) * tmp2
  286. + qs(i,j,k,c) );
  287. fjac(5,3,i) = - c2 * ( u(3,i,j,k,c)*u(2,i,j,k,c) )
  288. * tmp2;
  289. fjac(5,4,i) = - c2 * ( u(4,i,j,k,c)*u(2,i,j,k,c) )
  290. * tmp2;
  291. fjac(5,5,i) = c1 * ( u(2,i,j,k,c) * tmp1 );
  292. njac(1,1,i) = 0.0e+00;
  293. njac(1,2,i) = 0.0e+00;
  294. njac(1,3,i) = 0.0e+00;
  295. njac(1,4,i) = 0.0e+00;
  296. njac(1,5,i) = 0.0e+00;
  297. njac(2,1,i) = - con43 * c3c4 * tmp2 * u(2,i,j,k,c);
  298. njac(2,2,i) = con43 * c3c4 * tmp1;
  299. njac(2,3,i) = 0.0e+00;
  300. njac(2,4,i) = 0.0e+00;
  301. njac(2,5,i) = 0.0e+00;
  302. njac(3,1,i) = - c3c4 * tmp2 * u(3,i,j,k,c);
  303. njac(3,2,i) = 0.0e+00;
  304. njac(3,3,i) = c3c4 * tmp1;
  305. njac(3,4,i) = 0.0e+00;
  306. njac(3,5,i) = 0.0e+00;
  307. njac(4,1,i) = - c3c4 * tmp2 * u(4,i,j,k,c);
  308. njac(4,2,i) = 0.0e+00 ;
  309. njac(4,3,i) = 0.0e+00;
  310. njac(4,4,i) = c3c4 * tmp1;
  311. njac(4,5,i) = 0.0e+00;
  312. njac(5,1,i) = - ( con43 * c3c4
  313. - c1345 ) * tmp3 * SQR(u(2,i,j,k,c))
  314. - ( c3c4 - c1345 ) * tmp3 * SQR(u(3,i,j,k,c))
  315. - ( c3c4 - c1345 ) * tmp3 * SQR(u(4,i,j,k,c))
  316. - c1345 * tmp2 * u(5,i,j,k,c);
  317. njac(5,2,i) = ( con43 * c3c4
  318. - c1345 ) * tmp2 * u(2,i,j,k,c);
  319. njac(5,3,i) = ( c3c4 - c1345 ) * tmp2 * u(3,i,j,k,c);
  320. njac(5,4,i) = ( c3c4 - c1345 ) * tmp2 * u(4,i,j,k,c);
  321. njac(5,5,i) = ( c1345 ) * tmp1;
  322. }
  323. //---------------------------------------------------------------------
  324. // now jacobians set, so form left hand side in x direction
  325. //---------------------------------------------------------------------
  326. for (i = start(1,c); i <= isize - end(1,c); i++) {
  327. tmp1 = dt * tx1;
  328. tmp2 = dt * tx2;
  329. lhsa(1,1,i) = - tmp2 * fjac(1,1,i-1)
  330. - tmp1 * njac(1,1,i-1)
  331. - tmp1 * dx1 ;
  332. lhsa(1,2,i) = - tmp2 * fjac(1,2,i-1)
  333. - tmp1 * njac(1,2,i-1);
  334. lhsa(1,3,i) = - tmp2 * fjac(1,3,i-1)
  335. - tmp1 * njac(1,3,i-1);
  336. lhsa(1,4,i) = - tmp2 * fjac(1,4,i-1)
  337. - tmp1 * njac(1,4,i-1);
  338. lhsa(1,5,i) = - tmp2 * fjac(1,5,i-1)
  339. - tmp1 * njac(1,5,i-1);
  340. lhsa(2,1,i) = - tmp2 * fjac(2,1,i-1)
  341. - tmp1 * njac(2,1,i-1);
  342. lhsa(2,2,i) = - tmp2 * fjac(2,2,i-1)
  343. - tmp1 * njac(2,2,i-1)
  344. - tmp1 * dx2;
  345. lhsa(2,3,i) = - tmp2 * fjac(2,3,i-1)
  346. - tmp1 * njac(2,3,i-1);
  347. lhsa(2,4,i) = - tmp2 * fjac(2,4,i-1)
  348. - tmp1 * njac(2,4,i-1);
  349. lhsa(2,5,i) = - tmp2 * fjac(2,5,i-1)
  350. - tmp1 * njac(2,5,i-1);
  351. lhsa(3,1,i) = - tmp2 * fjac(3,1,i-1)
  352. - tmp1 * njac(3,1,i-1);
  353. lhsa(3,2,i) = - tmp2 * fjac(3,2,i-1)
  354. - tmp1 * njac(3,2,i-1);
  355. lhsa(3,3,i) = - tmp2 * fjac(3,3,i-1)
  356. - tmp1 * njac(3,3,i-1)
  357. - tmp1 * dx3 ;
  358. lhsa(3,4,i) = - tmp2 * fjac(3,4,i-1)
  359. - tmp1 * njac(3,4,i-1);
  360. lhsa(3,5,i) = - tmp2 * fjac(3,5,i-1)
  361. - tmp1 * njac(3,5,i-1);
  362. lhsa(4,1,i) = - tmp2 * fjac(4,1,i-1)
  363. - tmp1 * njac(4,1,i-1);
  364. lhsa(4,2,i) = - tmp2 * fjac(4,2,i-1)
  365. - tmp1 * njac(4,2,i-1);
  366. lhsa(4,3,i) = - tmp2 * fjac(4,3,i-1)
  367. - tmp1 * njac(4,3,i-1);
  368. lhsa(4,4,i) = - tmp2 * fjac(4,4,i-1)
  369. - tmp1 * njac(4,4,i-1)
  370. - tmp1 * dx4;
  371. lhsa(4,5,i) = - tmp2 * fjac(4,5,i-1)
  372. - tmp1 * njac(4,5,i-1);
  373. lhsa(5,1,i) = - tmp2 * fjac(5,1,i-1)
  374. - tmp1 * njac(5,1,i-1);
  375. lhsa(5,2,i) = - tmp2 * fjac(5,2,i-1)
  376. - tmp1 * njac(5,2,i-1);
  377. lhsa(5,3,i) = - tmp2 * fjac(5,3,i-1)
  378. - tmp1 * njac(5,3,i-1);
  379. lhsa(5,4,i) = - tmp2 * fjac(5,4,i-1)
  380. - tmp1 * njac(5,4,i-1);
  381. lhsa(5,5,i) = - tmp2 * fjac(5,5,i-1)
  382. - tmp1 * njac(5,5,i-1)
  383. - tmp1 * dx5;
  384. lhsb(1,1,i) = 1.0e+00
  385. + tmp1 * 2.0e+00 * njac(1,1,i)
  386. + tmp1 * 2.0e+00 * dx1;
  387. lhsb(1,2,i) = tmp1 * 2.0e+00 * njac(1,2,i);
  388. lhsb(1,3,i) = tmp1 * 2.0e+00 * njac(1,3,i);
  389. lhsb(1,4,i) = tmp1 * 2.0e+00 * njac(1,4,i);
  390. lhsb(1,5,i) = tmp1 * 2.0e+00 * njac(1,5,i);
  391. lhsb(2,1,i) = tmp1 * 2.0e+00 * njac(2,1,i);
  392. lhsb(2,2,i) = 1.0e+00
  393. + tmp1 * 2.0e+00 * njac(2,2,i)
  394. + tmp1 * 2.0e+00 * dx2;
  395. lhsb(2,3,i) = tmp1 * 2.0e+00 * njac(2,3,i);
  396. lhsb(2,4,i) = tmp1 * 2.0e+00 * njac(2,4,i);
  397. lhsb(2,5,i) = tmp1 * 2.0e+00 * njac(2,5,i);
  398. lhsb(3,1,i) = tmp1 * 2.0e+00 * njac(3,1,i);
  399. lhsb(3,2,i) = tmp1 * 2.0e+00 * njac(3,2,i);
  400. lhsb(3,3,i) = 1.0e+00
  401. + tmp1 * 2.0e+00 * njac(3,3,i)
  402. + tmp1 * 2.0e+00 * dx3;
  403. lhsb(3,4,i) = tmp1 * 2.0e+00 * njac(3,4,i);
  404. lhsb(3,5,i) = tmp1 * 2.0e+00 * njac(3,5,i);
  405. lhsb(4,1,i) = tmp1 * 2.0e+00 * njac(4,1,i);
  406. lhsb(4,2,i) = tmp1 * 2.0e+00 * njac(4,2,i);
  407. lhsb(4,3,i) = tmp1 * 2.0e+00 * njac(4,3,i);
  408. lhsb(4,4,i) = 1.0e+00
  409. + tmp1 * 2.0e+00 * njac(4,4,i)
  410. + tmp1 * 2.0e+00 * dx4;
  411. lhsb(4,5,i) = tmp1 * 2.0e+00 * njac(4,5,i);
  412. lhsb(5,1,i) = tmp1 * 2.0e+00 * njac(5,1,i);
  413. lhsb(5,2,i) = tmp1 * 2.0e+00 * njac(5,2,i);
  414. lhsb(5,3,i) = tmp1 * 2.0e+00 * njac(5,3,i);
  415. lhsb(5,4,i) = tmp1 * 2.0e+00 * njac(5,4,i);
  416. lhsb(5,5,i) = 1.0e+00
  417. + tmp1 * 2.0e+00 * njac(5,5,i)
  418. + tmp1 * 2.0e+00 * dx5;
  419. lhsc(1,1,i,j,k,c) = tmp2 * fjac(1,1,i+1)
  420. - tmp1 * njac(1,1,i+1)
  421. - tmp1 * dx1;
  422. lhsc(1,2,i,j,k,c) = tmp2 * fjac(1,2,i+1)
  423. - tmp1 * njac(1,2,i+1);
  424. lhsc(1,3,i,j,k,c) = tmp2 * fjac(1,3,i+1)
  425. - tmp1 * njac(1,3,i+1);
  426. lhsc(1,4,i,j,k,c) = tmp2 * fjac(1,4,i+1)
  427. - tmp1 * njac(1,4,i+1);
  428. lhsc(1,5,i,j,k,c) = tmp2 * fjac(1,5,i+1)
  429. - tmp1 * njac(1,5,i+1);
  430. lhsc(2,1,i,j,k,c) = tmp2 * fjac(2,1,i+1)
  431. - tmp1 * njac(2,1,i+1);
  432. lhsc(2,2,i,j,k,c) = tmp2 * fjac(2,2,i+1)
  433. - tmp1 * njac(2,2,i+1)
  434. - tmp1 * dx2;
  435. lhsc(2,3,i,j,k,c) = tmp2 * fjac(2,3,i+1)
  436. - tmp1 * njac(2,3,i+1);
  437. lhsc(2,4,i,j,k,c) = tmp2 * fjac(2,4,i+1)
  438. - tmp1 * njac(2,4,i+1);
  439. lhsc(2,5,i,j,k,c) = tmp2 * fjac(2,5,i+1)
  440. - tmp1 * njac(2,5,i+1);
  441. lhsc(3,1,i,j,k,c) = tmp2 * fjac(3,1,i+1)
  442. - tmp1 * njac(3,1,i+1);
  443. lhsc(3,2,i,j,k,c) = tmp2 * fjac(3,2,i+1)
  444. - tmp1 * njac(3,2,i+1);
  445. lhsc(3,3,i,j,k,c) = tmp2 * fjac(3,3,i+1)
  446. - tmp1 * njac(3,3,i+1)
  447. - tmp1 * dx3;
  448. lhsc(3,4,i,j,k,c) = tmp2 * fjac(3,4,i+1)
  449. - tmp1 * njac(3,4,i+1);
  450. lhsc(3,5,i,j,k,c) = tmp2 * fjac(3,5,i+1)
  451. - tmp1 * njac(3,5,i+1);
  452. lhsc(4,1,i,j,k,c) = tmp2 * fjac(4,1,i+1)
  453. - tmp1 * njac(4,1,i+1);
  454. lhsc(4,2,i,j,k,c) = tmp2 * fjac(4,2,i+1)
  455. - tmp1 * njac(4,2,i+1);
  456. lhsc(4,3,i,j,k,c) = tmp2 * fjac(4,3,i+1)
  457. - tmp1 * njac(4,3,i+1);
  458. lhsc(4,4,i,j,k,c) = tmp2 * fjac(4,4,i+1)
  459. - tmp1 * njac(4,4,i+1)
  460. - tmp1 * dx4;
  461. lhsc(4,5,i,j,k,c) = tmp2 * fjac(4,5,i+1)
  462. - tmp1 * njac(4,5,i+1);
  463. lhsc(5,1,i,j,k,c) = tmp2 * fjac(5,1,i+1)
  464. - tmp1 * njac(5,1,i+1);
  465. lhsc(5,2,i,j,k,c) = tmp2 * fjac(5,2,i+1)
  466. - tmp1 * njac(5,2,i+1);
  467. lhsc(5,3,i,j,k,c) = tmp2 * fjac(5,3,i+1)
  468. - tmp1 * njac(5,3,i+1);
  469. lhsc(5,4,i,j,k,c) = tmp2 * fjac(5,4,i+1)
  470. - tmp1 * njac(5,4,i+1);
  471. lhsc(5,5,i,j,k,c) = tmp2 * fjac(5,5,i+1)
  472. - tmp1 * njac(5,5,i+1)
  473. - tmp1 * dx5;
  474. }
  475. //---------------------------------------------------------------------
  476. // outer most do loops - sweeping in i direction
  477. //---------------------------------------------------------------------
  478. if (first == 1) {
  479. //---------------------------------------------------------------------
  480. // multiply c(istart,j,k) by b_inverse and copy back to c
  481. // multiply rhs(istart) by b_inverse(istart) and copy to rhs
  482. //---------------------------------------------------------------------
  483. binvcrhs( &lhsb(1,1,istart),
  484. &lhsc(1,1,istart,j,k,c),
  485. &rhs(1,istart,j,k,c) );
  486. }
  487. //---------------------------------------------------------------------
  488. // begin inner most do loop
  489. // do all the elements of the cell unless last
  490. //---------------------------------------------------------------------
  491. for (i = istart+first; i <= isize-last; i++) {
  492. //---------------------------------------------------------------------
  493. // rhs(i) = rhs(i) - A*rhs(i-1)
  494. //---------------------------------------------------------------------
  495. matvec_sub(&lhsa(1,1,i),
  496. &rhs(1,i-1,j,k,c),&rhs(1,i,j,k,c));
  497. //---------------------------------------------------------------------
  498. // B(i) = B(i) - C(i-1)*A(i)
  499. //---------------------------------------------------------------------
  500. matmul_sub(&lhsa(1,1,i),
  501. &lhsc(1,1,i-1,j,k,c),
  502. &lhsb(1,1,i));
  503. //---------------------------------------------------------------------
  504. // multiply c(i,j,k) by b_inverse and copy back to c
  505. // multiply rhs(1,j,k) by b_inverse(1,j,k) and copy to rhs
  506. //---------------------------------------------------------------------
  507. binvcrhs( &lhsb(1,1,i),
  508. &lhsc(1,1,i,j,k,c),
  509. &rhs(1,i,j,k,c) );
  510. }
  511. //---------------------------------------------------------------------
  512. // Now finish up special cases for last cell
  513. //---------------------------------------------------------------------
  514. if (last == 1) {
  515. //---------------------------------------------------------------------
  516. // rhs(isize) = rhs(isize) - A*rhs(isize-1)
  517. //---------------------------------------------------------------------
  518. matvec_sub(&lhsa(1,1,isize),
  519. &rhs(1,isize-1,j,k,c),&rhs(1,isize,j,k,c));
  520. //---------------------------------------------------------------------
  521. // B(isize) = B(isize) - C(isize-1)*A(isize)
  522. //---------------------------------------------------------------------
  523. matmul_sub(&lhsa(1,1,isize),
  524. &lhsc(1,1,isize-1,j,k,c),
  525. &lhsb(1,1,isize));
  526. //---------------------------------------------------------------------
  527. // multiply rhs() by b_inverse() and copy to rhs
  528. //---------------------------------------------------------------------
  529. binvrhs( &lhsb(1,1,isize),
  530. &rhs(1,isize,j,k,c) );
  531. }
  532. }
  533. }
  534. return;
  535. }