dla_gbrfsx_extended.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631
  1. /* _starpu_dla_gbrfsx_extended.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__1 = 1;
  15. static doublereal c_b6 = -1.;
  16. static doublereal c_b8 = 1.;
  17. /* Subroutine */ int _starpu_dla_gbrfsx_extended__(integer *prec_type__, integer *
  18. trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs,
  19. doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb,
  20. integer *ipiv, logical *colequ, doublereal *c__, doublereal *b,
  21. integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__,
  22. integer *n_norms__, doublereal *err_bnds_norm__, doublereal *
  23. err_bnds_comp__, doublereal *res, doublereal *ayb, doublereal *dy,
  24. doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal
  25. *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info)
  26. {
  27. /* System generated locals */
  28. integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset,
  29. y_dim1, y_offset, err_bnds_norm_dim1, err_bnds_norm_offset,
  30. err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3;
  31. doublereal d__1, d__2;
  32. char ch__1[1];
  33. /* Local variables */
  34. doublereal dxratmax, dzratmax;
  35. integer i__, j, m;
  36. extern /* Subroutine */ int _starpu_dla_gbamv__(integer *, integer *, integer *,
  37. integer *, integer *, doublereal *, doublereal *, integer *,
  38. doublereal *, integer *, doublereal *, doublereal *, integer *);
  39. logical incr_prec__;
  40. doublereal prev_dz_z__, yk, final_dx_x__;
  41. extern /* Subroutine */ int _starpu_dla_wwaddw__(integer *, doublereal *,
  42. doublereal *, doublereal *);
  43. doublereal final_dz_z__, prevnormdx;
  44. integer cnt;
  45. doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__;
  46. extern /* Subroutine */ int _starpu_dla_lin_berr__(integer *, integer *, integer *
  47. , doublereal *, doublereal *, doublereal *);
  48. doublereal ymin;
  49. extern /* Subroutine */ int _starpu_blas_dgbmv_x__(integer *, integer *, integer *
  50. , integer *, integer *, doublereal *, doublereal *, integer *,
  51. doublereal *, integer *, doublereal *, doublereal *, integer *,
  52. integer *);
  53. integer y_prec_state__;
  54. extern /* Subroutine */ int blas_dgbmv2_x__(integer *, integer *, integer
  55. *, integer *, integer *, doublereal *, doublereal *, integer *,
  56. doublereal *, doublereal *, integer *, doublereal *, doublereal *,
  57. integer *, integer *), _starpu_dgbmv_(char *, integer *, integer *,
  58. integer *, integer *, doublereal *, doublereal *, integer *,
  59. doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *,
  60. integer *);
  61. doublereal dxrat, dzrat;
  62. extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *,
  63. integer *, doublereal *, integer *);
  64. char trans[1];
  65. doublereal normx, normy;
  66. extern doublereal _starpu_dlamch_(char *);
  67. extern /* Subroutine */ int _starpu_dgbtrs_(char *, integer *, integer *, integer
  68. *, integer *, doublereal *, integer *, integer *, doublereal *,
  69. integer *, integer *);
  70. doublereal normdx;
  71. extern /* Character */ VOID _starpu_chla_transtype__(char *, ftnlen, integer *);
  72. doublereal hugeval;
  73. integer x_state__, z_state__;
  74. /* -- LAPACK routine (version 3.2.1) -- */
  75. /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */
  76. /* -- Jason Riedy of Univ. of California Berkeley. -- */
  77. /* -- April 2009 -- */
  78. /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
  79. /* -- Univ. of California Berkeley and NAG Ltd. -- */
  80. /* .. */
  81. /* .. Scalar Arguments .. */
  82. /* .. */
  83. /* .. Array Arguments .. */
  84. /* .. */
  85. /* Purpose */
  86. /* ======= */
  87. /* DLA_GBRFSX_EXTENDED improves the computed solution to a system of */
  88. /* linear equations by performing extra-precise iterative refinement */
  89. /* and provides error bounds and backward error estimates for the solution. */
  90. /* This subroutine is called by DGBRFSX to perform iterative refinement. */
  91. /* In addition to normwise error bound, the code provides maximum */
  92. /* componentwise error bound if possible. See comments for ERR_BNDS_NORM */
  93. /* and ERR_BNDS_COMP for details of the error bounds. Note that this */
  94. /* subroutine is only resonsible for setting the second fields of */
  95. /* ERR_BNDS_NORM and ERR_BNDS_COMP. */
  96. /* Arguments */
  97. /* ========= */
  98. /* PREC_TYPE (input) INTEGER */
  99. /* Specifies the intermediate precision to be used in refinement. */
  100. /* The value is defined by ILAPREC(P) where P is a CHARACTER and */
  101. /* P = 'S': Single */
  102. /* = 'D': Double */
  103. /* = 'I': Indigenous */
  104. /* = 'X', 'E': Extra */
  105. /* TRANS_TYPE (input) INTEGER */
  106. /* Specifies the transposition operation on A. */
  107. /* The value is defined by ILATRANS(T) where T is a CHARACTER and */
  108. /* T = 'N': No transpose */
  109. /* = 'T': Transpose */
  110. /* = 'C': Conjugate transpose */
  111. /* N (input) INTEGER */
  112. /* The number of linear equations, i.e., the order of the */
  113. /* matrix A. N >= 0. */
  114. /* KL (input) INTEGER */
  115. /* The number of subdiagonals within the band of A. KL >= 0. */
  116. /* KU (input) INTEGER */
  117. /* The number of superdiagonals within the band of A. KU >= 0 */
  118. /* NRHS (input) INTEGER */
  119. /* The number of right-hand-sides, i.e., the number of columns of the */
  120. /* matrix B. */
  121. /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */
  122. /* On entry, the N-by-N matrix A. */
  123. /* LDA (input) INTEGER */
  124. /* The leading dimension of the array A. LDA >= max(1,N). */
  125. /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */
  126. /* The factors L and U from the factorization */
  127. /* A = P*L*U as computed by DGBTRF. */
  128. /* LDAF (input) INTEGER */
  129. /* The leading dimension of the array AF. LDAF >= max(1,N). */
  130. /* IPIV (input) INTEGER array, dimension (N) */
  131. /* The pivot indices from the factorization A = P*L*U */
  132. /* as computed by DGBTRF; row i of the matrix was interchanged */
  133. /* with row IPIV(i). */
  134. /* COLEQU (input) LOGICAL */
  135. /* If .TRUE. then column equilibration was done to A before calling */
  136. /* this routine. This is needed to compute the solution and error */
  137. /* bounds correctly. */
  138. /* C (input) DOUBLE PRECISION array, dimension (N) */
  139. /* The column scale factors for A. If COLEQU = .FALSE., C */
  140. /* is not accessed. If C is input, each element of C should be a power */
  141. /* of the radix to ensure a reliable solution and error estimates. */
  142. /* Scaling by powers of the radix does not cause rounding errors unless */
  143. /* the result underflows or overflows. Rounding errors during scaling */
  144. /* lead to refining with a matrix that is not equivalent to the */
  145. /* input matrix, producing error estimates that may not be */
  146. /* reliable. */
  147. /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */
  148. /* The right-hand-side matrix B. */
  149. /* LDB (input) INTEGER */
  150. /* The leading dimension of the array B. LDB >= max(1,N). */
  151. /* Y (input/output) DOUBLE PRECISION array, dimension */
  152. /* (LDY,NRHS) */
  153. /* On entry, the solution matrix X, as computed by DGBTRS. */
  154. /* On exit, the improved solution matrix Y. */
  155. /* LDY (input) INTEGER */
  156. /* The leading dimension of the array Y. LDY >= max(1,N). */
  157. /* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */
  158. /* On exit, BERR_OUT(j) contains the componentwise relative backward */
  159. /* error for right-hand-side j from the formula */
  160. /* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
  161. /* where abs(Z) is the componentwise absolute value of the matrix */
  162. /* or vector Z. This is computed by DLA_LIN_BERR. */
  163. /* N_NORMS (input) INTEGER */
  164. /* Determines which error bounds to return (see ERR_BNDS_NORM */
  165. /* and ERR_BNDS_COMP). */
  166. /* If N_NORMS >= 1 return normwise error bounds. */
  167. /* If N_NORMS >= 2 return componentwise error bounds. */
  168. /* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */
  169. /* (NRHS, N_ERR_BNDS) */
  170. /* For each right-hand side, this array contains information about */
  171. /* various error bounds and condition numbers corresponding to the */
  172. /* normwise relative error, which is defined as follows: */
  173. /* Normwise relative error in the ith solution vector: */
  174. /* max_j (abs(XTRUE(j,i) - X(j,i))) */
  175. /* ------------------------------ */
  176. /* max_j abs(X(j,i)) */
  177. /* The array is indexed by the type of error information as described */
  178. /* below. There currently are up to three pieces of information */
  179. /* returned. */
  180. /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
  181. /* right-hand side. */
  182. /* The second index in ERR_BNDS_NORM(:,err) contains the following */
  183. /* three fields: */
  184. /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */
  185. /* reciprocal condition number is less than the threshold */
  186. /* sqrt(n) * slamch('Epsilon'). */
  187. /* err = 2 "Guaranteed" error bound: The estimated forward error, */
  188. /* almost certainly within a factor of 10 of the true error */
  189. /* so long as the next entry is greater than the threshold */
  190. /* sqrt(n) * slamch('Epsilon'). This error bound should only */
  191. /* be trusted if the previous boolean is true. */
  192. /* err = 3 Reciprocal condition number: Estimated normwise */
  193. /* reciprocal condition number. Compared with the threshold */
  194. /* sqrt(n) * slamch('Epsilon') to determine if the error */
  195. /* estimate is "guaranteed". These reciprocal condition */
  196. /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
  197. /* appropriately scaled matrix Z. */
  198. /* Let Z = S*A, where S scales each row by a power of the */
  199. /* radix so all absolute row sums of Z are approximately 1. */
  200. /* This subroutine is only responsible for setting the second field */
  201. /* above. */
  202. /* See Lapack Working Note 165 for further details and extra */
  203. /* cautions. */
  204. /* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */
  205. /* (NRHS, N_ERR_BNDS) */
  206. /* For each right-hand side, this array contains information about */
  207. /* various error bounds and condition numbers corresponding to the */
  208. /* componentwise relative error, which is defined as follows: */
  209. /* Componentwise relative error in the ith solution vector: */
  210. /* abs(XTRUE(j,i) - X(j,i)) */
  211. /* max_j ---------------------- */
  212. /* abs(X(j,i)) */
  213. /* The array is indexed by the right-hand side i (on which the */
  214. /* componentwise relative error depends), and the type of error */
  215. /* information as described below. There currently are up to three */
  216. /* pieces of information returned for each right-hand side. If */
  217. /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
  218. /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */
  219. /* the first (:,N_ERR_BNDS) entries are returned. */
  220. /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
  221. /* right-hand side. */
  222. /* The second index in ERR_BNDS_COMP(:,err) contains the following */
  223. /* three fields: */
  224. /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */
  225. /* reciprocal condition number is less than the threshold */
  226. /* sqrt(n) * slamch('Epsilon'). */
  227. /* err = 2 "Guaranteed" error bound: The estimated forward error, */
  228. /* almost certainly within a factor of 10 of the true error */
  229. /* so long as the next entry is greater than the threshold */
  230. /* sqrt(n) * slamch('Epsilon'). This error bound should only */
  231. /* be trusted if the previous boolean is true. */
  232. /* err = 3 Reciprocal condition number: Estimated componentwise */
  233. /* reciprocal condition number. Compared with the threshold */
  234. /* sqrt(n) * slamch('Epsilon') to determine if the error */
  235. /* estimate is "guaranteed". These reciprocal condition */
  236. /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
  237. /* appropriately scaled matrix Z. */
  238. /* Let Z = S*(A*diag(x)), where x is the solution for the */
  239. /* current right-hand side and S scales each row of */
  240. /* A*diag(x) by a power of the radix so all absolute row */
  241. /* sums of Z are approximately 1. */
  242. /* This subroutine is only responsible for setting the second field */
  243. /* above. */
  244. /* See Lapack Working Note 165 for further details and extra */
  245. /* cautions. */
  246. /* RES (input) DOUBLE PRECISION array, dimension (N) */
  247. /* Workspace to hold the intermediate residual. */
  248. /* AYB (input) DOUBLE PRECISION array, dimension (N) */
  249. /* Workspace. This can be the same workspace passed for Y_TAIL. */
  250. /* DY (input) DOUBLE PRECISION array, dimension (N) */
  251. /* Workspace to hold the intermediate solution. */
  252. /* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */
  253. /* Workspace to hold the trailing bits of the intermediate solution. */
  254. /* RCOND (input) DOUBLE PRECISION */
  255. /* Reciprocal scaled condition number. This is an estimate of the */
  256. /* reciprocal Skeel condition number of the matrix A after */
  257. /* equilibration (if done). If this is less than the machine */
  258. /* precision (in particular, if it is zero), the matrix is singular */
  259. /* to working precision. Note that the error may still be small even */
  260. /* if this number is very small and the matrix appears ill- */
  261. /* conditioned. */
  262. /* ITHRESH (input) INTEGER */
  263. /* The maximum number of residual computations allowed for */
  264. /* refinement. The default is 10. For 'aggressive' set to 100 to */
  265. /* permit convergence using approximate factorizations or */
  266. /* factorizations other than LU. If the factorization uses a */
  267. /* technique other than Gaussian elimination, the guarantees in */
  268. /* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */
  269. /* RTHRESH (input) DOUBLE PRECISION */
  270. /* Determines when to stop refinement if the error estimate stops */
  271. /* decreasing. Refinement will stop when the next solution no longer */
  272. /* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */
  273. /* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */
  274. /* default value is 0.5. For 'aggressive' set to 0.9 to permit */
  275. /* convergence on extremely ill-conditioned matrices. See LAWN 165 */
  276. /* for more details. */
  277. /* DZ_UB (input) DOUBLE PRECISION */
  278. /* Determines when to start considering componentwise convergence. */
  279. /* Componentwise convergence is only considered after each component */
  280. /* of the solution Y is stable, which we definte as the relative */
  281. /* change in each component being less than DZ_UB. The default value */
  282. /* is 0.25, requiring the first bit to be stable. See LAWN 165 for */
  283. /* more details. */
  284. /* IGNORE_CWISE (input) LOGICAL */
  285. /* If .TRUE. then ignore componentwise convergence. Default value */
  286. /* is .FALSE.. */
  287. /* INFO (output) INTEGER */
  288. /* = 0: Successful exit. */
  289. /* < 0: if INFO = -i, the ith argument to DGBTRS had an illegal */
  290. /* value */
  291. /* ===================================================================== */
  292. /* .. Local Scalars .. */
  293. /* .. */
  294. /* .. Parameters .. */
  295. /* .. */
  296. /* .. External Subroutines .. */
  297. /* .. */
  298. /* .. Intrinsic Functions .. */
  299. /* .. */
  300. /* .. Executable Statements .. */
  301. /* Parameter adjustments */
  302. err_bnds_comp_dim1 = *nrhs;
  303. err_bnds_comp_offset = 1 + err_bnds_comp_dim1;
  304. err_bnds_comp__ -= err_bnds_comp_offset;
  305. err_bnds_norm_dim1 = *nrhs;
  306. err_bnds_norm_offset = 1 + err_bnds_norm_dim1;
  307. err_bnds_norm__ -= err_bnds_norm_offset;
  308. ab_dim1 = *ldab;
  309. ab_offset = 1 + ab_dim1;
  310. ab -= ab_offset;
  311. afb_dim1 = *ldafb;
  312. afb_offset = 1 + afb_dim1;
  313. afb -= afb_offset;
  314. --ipiv;
  315. --c__;
  316. b_dim1 = *ldb;
  317. b_offset = 1 + b_dim1;
  318. b -= b_offset;
  319. y_dim1 = *ldy;
  320. y_offset = 1 + y_dim1;
  321. y -= y_offset;
  322. --berr_out__;
  323. --res;
  324. --ayb;
  325. --dy;
  326. --y_tail__;
  327. /* Function Body */
  328. if (*info != 0) {
  329. return 0;
  330. }
  331. _starpu_chla_transtype__(ch__1, (ftnlen)1, trans_type__);
  332. *(unsigned char *)trans = *(unsigned char *)&ch__1[0];
  333. eps = _starpu_dlamch_("Epsilon");
  334. hugeval = _starpu_dlamch_("Overflow");
  335. /* Force HUGEVAL to Inf */
  336. hugeval *= hugeval;
  337. /* Using HUGEVAL may lead to spurious underflows. */
  338. incr_thresh__ = (doublereal) (*n) * eps;
  339. m = *kl + *ku + 1;
  340. i__1 = *nrhs;
  341. for (j = 1; j <= i__1; ++j) {
  342. y_prec_state__ = 1;
  343. if (y_prec_state__ == 2) {
  344. i__2 = *n;
  345. for (i__ = 1; i__ <= i__2; ++i__) {
  346. y_tail__[i__] = 0.;
  347. }
  348. }
  349. dxrat = 0.;
  350. dxratmax = 0.;
  351. dzrat = 0.;
  352. dzratmax = 0.;
  353. final_dx_x__ = hugeval;
  354. final_dz_z__ = hugeval;
  355. prevnormdx = hugeval;
  356. prev_dz_z__ = hugeval;
  357. dz_z__ = hugeval;
  358. dx_x__ = hugeval;
  359. x_state__ = 1;
  360. z_state__ = 0;
  361. incr_prec__ = FALSE_;
  362. i__2 = *ithresh;
  363. for (cnt = 1; cnt <= i__2; ++cnt) {
  364. /* Compute residual RES = B_s - op(A_s) * Y, */
  365. /* op(A) = A, A**T, or A**H depending on TRANS (and type). */
  366. _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
  367. if (y_prec_state__ == 0) {
  368. _starpu_dgbmv_(trans, &m, n, kl, ku, &c_b6, &ab[ab_offset], ldab, &y[
  369. j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1);
  370. } else if (y_prec_state__ == 1) {
  371. _starpu_blas_dgbmv_x__(trans_type__, n, n, kl, ku, &c_b6, &ab[
  372. ab_offset], ldab, &y[j * y_dim1 + 1], &c__1, &c_b8, &
  373. res[1], &c__1, prec_type__);
  374. } else {
  375. blas_dgbmv2_x__(trans_type__, n, n, kl, ku, &c_b6, &ab[
  376. ab_offset], ldab, &y[j * y_dim1 + 1], &y_tail__[1], &
  377. c__1, &c_b8, &res[1], &c__1, prec_type__);
  378. }
  379. /* XXX: RES is no longer needed. */
  380. _starpu_dcopy_(n, &res[1], &c__1, &dy[1], &c__1);
  381. _starpu_dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1]
  382. , &dy[1], n, info);
  383. /* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */
  384. normx = 0.;
  385. normy = 0.;
  386. normdx = 0.;
  387. dz_z__ = 0.;
  388. ymin = hugeval;
  389. i__3 = *n;
  390. for (i__ = 1; i__ <= i__3; ++i__) {
  391. yk = (d__1 = y[i__ + j * y_dim1], abs(d__1));
  392. dyk = (d__1 = dy[i__], abs(d__1));
  393. if (yk != 0.) {
  394. /* Computing MAX */
  395. d__1 = dz_z__, d__2 = dyk / yk;
  396. dz_z__ = max(d__1,d__2);
  397. } else if (dyk != 0.) {
  398. dz_z__ = hugeval;
  399. }
  400. ymin = min(ymin,yk);
  401. normy = max(normy,yk);
  402. if (*colequ) {
  403. /* Computing MAX */
  404. d__1 = normx, d__2 = yk * c__[i__];
  405. normx = max(d__1,d__2);
  406. /* Computing MAX */
  407. d__1 = normdx, d__2 = dyk * c__[i__];
  408. normdx = max(d__1,d__2);
  409. } else {
  410. normx = normy;
  411. normdx = max(normdx,dyk);
  412. }
  413. }
  414. if (normx != 0.) {
  415. dx_x__ = normdx / normx;
  416. } else if (normdx == 0.) {
  417. dx_x__ = 0.;
  418. } else {
  419. dx_x__ = hugeval;
  420. }
  421. dxrat = normdx / prevnormdx;
  422. dzrat = dz_z__ / prev_dz_z__;
  423. /* Check termination criteria. */
  424. if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy
  425. && y_prec_state__ < 2) {
  426. incr_prec__ = TRUE_;
  427. }
  428. if (x_state__ == 3 && dxrat <= *rthresh) {
  429. x_state__ = 1;
  430. }
  431. if (x_state__ == 1) {
  432. if (dx_x__ <= eps) {
  433. x_state__ = 2;
  434. } else if (dxrat > *rthresh) {
  435. if (y_prec_state__ != 2) {
  436. incr_prec__ = TRUE_;
  437. } else {
  438. x_state__ = 3;
  439. }
  440. } else {
  441. if (dxrat > dxratmax) {
  442. dxratmax = dxrat;
  443. }
  444. }
  445. if (x_state__ > 1) {
  446. final_dx_x__ = dx_x__;
  447. }
  448. }
  449. if (z_state__ == 0 && dz_z__ <= *dz_ub__) {
  450. z_state__ = 1;
  451. }
  452. if (z_state__ == 3 && dzrat <= *rthresh) {
  453. z_state__ = 1;
  454. }
  455. if (z_state__ == 1) {
  456. if (dz_z__ <= eps) {
  457. z_state__ = 2;
  458. } else if (dz_z__ > *dz_ub__) {
  459. z_state__ = 0;
  460. dzratmax = 0.;
  461. final_dz_z__ = hugeval;
  462. } else if (dzrat > *rthresh) {
  463. if (y_prec_state__ != 2) {
  464. incr_prec__ = TRUE_;
  465. } else {
  466. z_state__ = 3;
  467. }
  468. } else {
  469. if (dzrat > dzratmax) {
  470. dzratmax = dzrat;
  471. }
  472. }
  473. if (z_state__ > 1) {
  474. final_dz_z__ = dz_z__;
  475. }
  476. }
  477. /* Exit if both normwise and componentwise stopped working, */
  478. /* but if componentwise is unstable, let it go at least two */
  479. /* iterations. */
  480. if (x_state__ != 1) {
  481. if (*ignore_cwise__) {
  482. goto L666;
  483. }
  484. if (z_state__ == 3 || z_state__ == 2) {
  485. goto L666;
  486. }
  487. if (z_state__ == 0 && cnt > 1) {
  488. goto L666;
  489. }
  490. }
  491. if (incr_prec__) {
  492. incr_prec__ = FALSE_;
  493. ++y_prec_state__;
  494. i__3 = *n;
  495. for (i__ = 1; i__ <= i__3; ++i__) {
  496. y_tail__[i__] = 0.;
  497. }
  498. }
  499. prevnormdx = normdx;
  500. prev_dz_z__ = dz_z__;
  501. /* Update soluton. */
  502. if (y_prec_state__ < 2) {
  503. _starpu_daxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1);
  504. } else {
  505. _starpu_dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]);
  506. }
  507. }
  508. /* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */
  509. L666:
  510. /* Set final_* when cnt hits ithresh. */
  511. if (x_state__ == 1) {
  512. final_dx_x__ = dx_x__;
  513. }
  514. if (z_state__ == 1) {
  515. final_dz_z__ = dz_z__;
  516. }
  517. /* Compute error bounds. */
  518. if (*n_norms__ >= 1) {
  519. err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / (
  520. 1 - dxratmax);
  521. }
  522. if (*n_norms__ >= 2) {
  523. err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / (
  524. 1 - dzratmax);
  525. }
  526. /* Compute componentwise relative backward error from formula */
  527. /* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
  528. /* where abs(Z) is the componentwise absolute value of the matrix */
  529. /* or vector Z. */
  530. /* Compute residual RES = B_s - op(A_s) * Y, */
  531. /* op(A) = A, A**T, or A**H depending on TRANS (and type). */
  532. _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
  533. _starpu_dgbmv_(trans, n, n, kl, ku, &c_b6, &ab[ab_offset], ldab, &y[j *
  534. y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1);
  535. i__2 = *n;
  536. for (i__ = 1; i__ <= i__2; ++i__) {
  537. ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1));
  538. }
  539. /* Compute abs(op(A_s))*abs(Y) + abs(B_s). */
  540. _starpu_dla_gbamv__(trans_type__, n, n, kl, ku, &c_b8, &ab[ab_offset], ldab, &
  541. y[j * y_dim1 + 1], &c__1, &c_b8, &ayb[1], &c__1);
  542. _starpu_dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]);
  543. /* End of loop for each RHS */
  544. }
  545. return 0;
  546. } /* _starpu_dla_gbrfsx_extended__ */