dla_gerfsx_extended.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. /* _starpu_dla_gerfsx_extended.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__1 = 1;
  15. static doublereal c_b6 = -1.;
  16. static doublereal c_b8 = 1.;
  17. /* Subroutine */ int _starpu_dla_gerfsx_extended__(integer *prec_type__, integer *
  18. trans_type__, integer *n, integer *nrhs, doublereal *a, integer *lda,
  19. doublereal *af, integer *ldaf, integer *ipiv, logical *colequ,
  20. doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer *
  21. ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__,
  22. doublereal *errs_c__, doublereal *res, doublereal *ayb, doublereal *
  23. dy, doublereal *y_tail__, doublereal *rcond, integer *ithresh,
  24. doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__,
  25. integer *info)
  26. {
  27. /* System generated locals */
  28. integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1,
  29. y_offset, errs_n_dim1, errs_n_offset, errs_c_dim1, errs_c_offset,
  30. i__1, i__2, i__3;
  31. doublereal d__1, d__2;
  32. char ch__1[1];
  33. /* Local variables */
  34. doublereal dxratmax, dzratmax;
  35. integer i__, j;
  36. extern /* Subroutine */ int _starpu_dla_geamv__(integer *, integer *, integer *,
  37. doublereal *, doublereal *, integer *, doublereal *, integer *,
  38. doublereal *, doublereal *, integer *);
  39. logical incr_prec__;
  40. doublereal prev_dz_z__, yk, final_dx_x__;
  41. extern /* Subroutine */ int _starpu_dla_wwaddw__(integer *, doublereal *,
  42. doublereal *, doublereal *);
  43. doublereal final_dz_z__, prevnormdx;
  44. integer cnt;
  45. doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__;
  46. extern /* Subroutine */ int _starpu_dla_lin_berr__(integer *, integer *, integer *
  47. , doublereal *, doublereal *, doublereal *);
  48. doublereal ymin;
  49. extern /* Subroutine */ int _starpu_blas_dgemv_x__(integer *, integer *, integer *
  50. , doublereal *, doublereal *, integer *, doublereal *, integer *,
  51. doublereal *, doublereal *, integer *, integer *);
  52. integer y_prec_state__;
  53. extern /* Subroutine */ int blas_dgemv2_x__(integer *, integer *, integer
  54. *, doublereal *, doublereal *, integer *, doublereal *,
  55. doublereal *, integer *, doublereal *, doublereal *, integer *,
  56. integer *), _starpu_dgemv_(char *, integer *, integer *, doublereal *,
  57. doublereal *, integer *, doublereal *, integer *, doublereal *,
  58. doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *,
  59. integer *, doublereal *, integer *);
  60. doublereal dxrat, dzrat;
  61. extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *,
  62. integer *, doublereal *, integer *);
  63. char trans[1];
  64. doublereal normx, normy;
  65. extern doublereal _starpu_dlamch_(char *);
  66. extern /* Subroutine */ int _starpu_dgetrs_(char *, integer *, integer *,
  67. doublereal *, integer *, integer *, doublereal *, integer *,
  68. integer *);
  69. doublereal normdx;
  70. extern /* Character */ VOID _starpu_chla_transtype__(char *, ftnlen, integer *);
  71. doublereal hugeval;
  72. integer x_state__, z_state__;
  73. /* -- LAPACK routine (version 3.2.1) -- */
  74. /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */
  75. /* -- Jason Riedy of Univ. of California Berkeley. -- */
  76. /* -- April 2009 -- */
  77. /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
  78. /* -- Univ. of California Berkeley and NAG Ltd. -- */
  79. /* .. */
  80. /* .. Scalar Arguments .. */
  81. /* .. */
  82. /* .. Array Arguments .. */
  83. /* .. */
  84. /* Purpose */
  85. /* ======= */
  86. /* DLA_GERFSX_EXTENDED improves the computed solution to a system of */
  87. /* linear equations by performing extra-precise iterative refinement */
  88. /* and provides error bounds and backward error estimates for the solution. */
  89. /* This subroutine is called by DGERFSX to perform iterative refinement. */
  90. /* In addition to normwise error bound, the code provides maximum */
  91. /* componentwise error bound if possible. See comments for ERR_BNDS_NORM */
  92. /* and ERR_BNDS_COMP for details of the error bounds. Note that this */
  93. /* subroutine is only resonsible for setting the second fields of */
  94. /* ERR_BNDS_NORM and ERR_BNDS_COMP. */
  95. /* Arguments */
  96. /* ========= */
  97. /* PREC_TYPE (input) INTEGER */
  98. /* Specifies the intermediate precision to be used in refinement. */
  99. /* The value is defined by ILAPREC(P) where P is a CHARACTER and */
  100. /* P = 'S': Single */
  101. /* = 'D': Double */
  102. /* = 'I': Indigenous */
  103. /* = 'X', 'E': Extra */
  104. /* TRANS_TYPE (input) INTEGER */
  105. /* Specifies the transposition operation on A. */
  106. /* The value is defined by ILATRANS(T) where T is a CHARACTER and */
  107. /* T = 'N': No transpose */
  108. /* = 'T': Transpose */
  109. /* = 'C': Conjugate transpose */
  110. /* N (input) INTEGER */
  111. /* The number of linear equations, i.e., the order of the */
  112. /* matrix A. N >= 0. */
  113. /* NRHS (input) INTEGER */
  114. /* The number of right-hand-sides, i.e., the number of columns of the */
  115. /* matrix B. */
  116. /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */
  117. /* On entry, the N-by-N matrix A. */
  118. /* LDA (input) INTEGER */
  119. /* The leading dimension of the array A. LDA >= max(1,N). */
  120. /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */
  121. /* The factors L and U from the factorization */
  122. /* A = P*L*U as computed by DGETRF. */
  123. /* LDAF (input) INTEGER */
  124. /* The leading dimension of the array AF. LDAF >= max(1,N). */
  125. /* IPIV (input) INTEGER array, dimension (N) */
  126. /* The pivot indices from the factorization A = P*L*U */
  127. /* as computed by DGETRF; row i of the matrix was interchanged */
  128. /* with row IPIV(i). */
  129. /* COLEQU (input) LOGICAL */
  130. /* If .TRUE. then column equilibration was done to A before calling */
  131. /* this routine. This is needed to compute the solution and error */
  132. /* bounds correctly. */
  133. /* C (input) DOUBLE PRECISION array, dimension (N) */
  134. /* The column scale factors for A. If COLEQU = .FALSE., C */
  135. /* is not accessed. If C is input, each element of C should be a power */
  136. /* of the radix to ensure a reliable solution and error estimates. */
  137. /* Scaling by powers of the radix does not cause rounding errors unless */
  138. /* the result underflows or overflows. Rounding errors during scaling */
  139. /* lead to refining with a matrix that is not equivalent to the */
  140. /* input matrix, producing error estimates that may not be */
  141. /* reliable. */
  142. /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */
  143. /* The right-hand-side matrix B. */
  144. /* LDB (input) INTEGER */
  145. /* The leading dimension of the array B. LDB >= max(1,N). */
  146. /* Y (input/output) DOUBLE PRECISION array, dimension */
  147. /* (LDY,NRHS) */
  148. /* On entry, the solution matrix X, as computed by DGETRS. */
  149. /* On exit, the improved solution matrix Y. */
  150. /* LDY (input) INTEGER */
  151. /* The leading dimension of the array Y. LDY >= max(1,N). */
  152. /* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */
  153. /* On exit, BERR_OUT(j) contains the componentwise relative backward */
  154. /* error for right-hand-side j from the formula */
  155. /* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
  156. /* where abs(Z) is the componentwise absolute value of the matrix */
  157. /* or vector Z. This is computed by DLA_LIN_BERR. */
  158. /* N_NORMS (input) INTEGER */
  159. /* Determines which error bounds to return (see ERR_BNDS_NORM */
  160. /* and ERR_BNDS_COMP). */
  161. /* If N_NORMS >= 1 return normwise error bounds. */
  162. /* If N_NORMS >= 2 return componentwise error bounds. */
  163. /* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */
  164. /* (NRHS, N_ERR_BNDS) */
  165. /* For each right-hand side, this array contains information about */
  166. /* various error bounds and condition numbers corresponding to the */
  167. /* normwise relative error, which is defined as follows: */
  168. /* Normwise relative error in the ith solution vector: */
  169. /* max_j (abs(XTRUE(j,i) - X(j,i))) */
  170. /* ------------------------------ */
  171. /* max_j abs(X(j,i)) */
  172. /* The array is indexed by the type of error information as described */
  173. /* below. There currently are up to three pieces of information */
  174. /* returned. */
  175. /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
  176. /* right-hand side. */
  177. /* The second index in ERR_BNDS_NORM(:,err) contains the following */
  178. /* three fields: */
  179. /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */
  180. /* reciprocal condition number is less than the threshold */
  181. /* sqrt(n) * slamch('Epsilon'). */
  182. /* err = 2 "Guaranteed" error bound: The estimated forward error, */
  183. /* almost certainly within a factor of 10 of the true error */
  184. /* so long as the next entry is greater than the threshold */
  185. /* sqrt(n) * slamch('Epsilon'). This error bound should only */
  186. /* be trusted if the previous boolean is true. */
  187. /* err = 3 Reciprocal condition number: Estimated normwise */
  188. /* reciprocal condition number. Compared with the threshold */
  189. /* sqrt(n) * slamch('Epsilon') to determine if the error */
  190. /* estimate is "guaranteed". These reciprocal condition */
  191. /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
  192. /* appropriately scaled matrix Z. */
  193. /* Let Z = S*A, where S scales each row by a power of the */
  194. /* radix so all absolute row sums of Z are approximately 1. */
  195. /* This subroutine is only responsible for setting the second field */
  196. /* above. */
  197. /* See Lapack Working Note 165 for further details and extra */
  198. /* cautions. */
  199. /* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */
  200. /* (NRHS, N_ERR_BNDS) */
  201. /* For each right-hand side, this array contains information about */
  202. /* various error bounds and condition numbers corresponding to the */
  203. /* componentwise relative error, which is defined as follows: */
  204. /* Componentwise relative error in the ith solution vector: */
  205. /* abs(XTRUE(j,i) - X(j,i)) */
  206. /* max_j ---------------------- */
  207. /* abs(X(j,i)) */
  208. /* The array is indexed by the right-hand side i (on which the */
  209. /* componentwise relative error depends), and the type of error */
  210. /* information as described below. There currently are up to three */
  211. /* pieces of information returned for each right-hand side. If */
  212. /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
  213. /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */
  214. /* the first (:,N_ERR_BNDS) entries are returned. */
  215. /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
  216. /* right-hand side. */
  217. /* The second index in ERR_BNDS_COMP(:,err) contains the following */
  218. /* three fields: */
  219. /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */
  220. /* reciprocal condition number is less than the threshold */
  221. /* sqrt(n) * slamch('Epsilon'). */
  222. /* err = 2 "Guaranteed" error bound: The estimated forward error, */
  223. /* almost certainly within a factor of 10 of the true error */
  224. /* so long as the next entry is greater than the threshold */
  225. /* sqrt(n) * slamch('Epsilon'). This error bound should only */
  226. /* be trusted if the previous boolean is true. */
  227. /* err = 3 Reciprocal condition number: Estimated componentwise */
  228. /* reciprocal condition number. Compared with the threshold */
  229. /* sqrt(n) * slamch('Epsilon') to determine if the error */
  230. /* estimate is "guaranteed". These reciprocal condition */
  231. /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
  232. /* appropriately scaled matrix Z. */
  233. /* Let Z = S*(A*diag(x)), where x is the solution for the */
  234. /* current right-hand side and S scales each row of */
  235. /* A*diag(x) by a power of the radix so all absolute row */
  236. /* sums of Z are approximately 1. */
  237. /* This subroutine is only responsible for setting the second field */
  238. /* above. */
  239. /* See Lapack Working Note 165 for further details and extra */
  240. /* cautions. */
  241. /* RES (input) DOUBLE PRECISION array, dimension (N) */
  242. /* Workspace to hold the intermediate residual. */
  243. /* AYB (input) DOUBLE PRECISION array, dimension (N) */
  244. /* Workspace. This can be the same workspace passed for Y_TAIL. */
  245. /* DY (input) DOUBLE PRECISION array, dimension (N) */
  246. /* Workspace to hold the intermediate solution. */
  247. /* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */
  248. /* Workspace to hold the trailing bits of the intermediate solution. */
  249. /* RCOND (input) DOUBLE PRECISION */
  250. /* Reciprocal scaled condition number. This is an estimate of the */
  251. /* reciprocal Skeel condition number of the matrix A after */
  252. /* equilibration (if done). If this is less than the machine */
  253. /* precision (in particular, if it is zero), the matrix is singular */
  254. /* to working precision. Note that the error may still be small even */
  255. /* if this number is very small and the matrix appears ill- */
  256. /* conditioned. */
  257. /* ITHRESH (input) INTEGER */
  258. /* The maximum number of residual computations allowed for */
  259. /* refinement. The default is 10. For 'aggressive' set to 100 to */
  260. /* permit convergence using approximate factorizations or */
  261. /* factorizations other than LU. If the factorization uses a */
  262. /* technique other than Gaussian elimination, the guarantees in */
  263. /* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */
  264. /* RTHRESH (input) DOUBLE PRECISION */
  265. /* Determines when to stop refinement if the error estimate stops */
  266. /* decreasing. Refinement will stop when the next solution no longer */
  267. /* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */
  268. /* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */
  269. /* default value is 0.5. For 'aggressive' set to 0.9 to permit */
  270. /* convergence on extremely ill-conditioned matrices. See LAWN 165 */
  271. /* for more details. */
  272. /* DZ_UB (input) DOUBLE PRECISION */
  273. /* Determines when to start considering componentwise convergence. */
  274. /* Componentwise convergence is only considered after each component */
  275. /* of the solution Y is stable, which we definte as the relative */
  276. /* change in each component being less than DZ_UB. The default value */
  277. /* is 0.25, requiring the first bit to be stable. See LAWN 165 for */
  278. /* more details. */
  279. /* IGNORE_CWISE (input) LOGICAL */
  280. /* If .TRUE. then ignore componentwise convergence. Default value */
  281. /* is .FALSE.. */
  282. /* INFO (output) INTEGER */
  283. /* = 0: Successful exit. */
  284. /* < 0: if INFO = -i, the ith argument to DGETRS had an illegal */
  285. /* value */
  286. /* ===================================================================== */
  287. /* .. Local Scalars .. */
  288. /* .. */
  289. /* .. Parameters .. */
  290. /* .. */
  291. /* .. External Subroutines .. */
  292. /* .. */
  293. /* .. Intrinsic Functions .. */
  294. /* .. */
  295. /* .. Executable Statements .. */
  296. /* Parameter adjustments */
  297. errs_c_dim1 = *nrhs;
  298. errs_c_offset = 1 + errs_c_dim1;
  299. errs_c__ -= errs_c_offset;
  300. errs_n_dim1 = *nrhs;
  301. errs_n_offset = 1 + errs_n_dim1;
  302. errs_n__ -= errs_n_offset;
  303. a_dim1 = *lda;
  304. a_offset = 1 + a_dim1;
  305. a -= a_offset;
  306. af_dim1 = *ldaf;
  307. af_offset = 1 + af_dim1;
  308. af -= af_offset;
  309. --ipiv;
  310. --c__;
  311. b_dim1 = *ldb;
  312. b_offset = 1 + b_dim1;
  313. b -= b_offset;
  314. y_dim1 = *ldy;
  315. y_offset = 1 + y_dim1;
  316. y -= y_offset;
  317. --berr_out__;
  318. --res;
  319. --ayb;
  320. --dy;
  321. --y_tail__;
  322. /* Function Body */
  323. if (*info != 0) {
  324. return 0;
  325. }
  326. _starpu_chla_transtype__(ch__1, (ftnlen)1, trans_type__);
  327. *(unsigned char *)trans = *(unsigned char *)&ch__1[0];
  328. eps = _starpu_dlamch_("Epsilon");
  329. hugeval = _starpu_dlamch_("Overflow");
  330. /* Force HUGEVAL to Inf */
  331. hugeval *= hugeval;
  332. /* Using HUGEVAL may lead to spurious underflows. */
  333. incr_thresh__ = (doublereal) (*n) * eps;
  334. i__1 = *nrhs;
  335. for (j = 1; j <= i__1; ++j) {
  336. y_prec_state__ = 1;
  337. if (y_prec_state__ == 2) {
  338. i__2 = *n;
  339. for (i__ = 1; i__ <= i__2; ++i__) {
  340. y_tail__[i__] = 0.;
  341. }
  342. }
  343. dxrat = 0.;
  344. dxratmax = 0.;
  345. dzrat = 0.;
  346. dzratmax = 0.;
  347. final_dx_x__ = hugeval;
  348. final_dz_z__ = hugeval;
  349. prevnormdx = hugeval;
  350. prev_dz_z__ = hugeval;
  351. dz_z__ = hugeval;
  352. dx_x__ = hugeval;
  353. x_state__ = 1;
  354. z_state__ = 0;
  355. incr_prec__ = FALSE_;
  356. i__2 = *ithresh;
  357. for (cnt = 1; cnt <= i__2; ++cnt) {
  358. /* Compute residual RES = B_s - op(A_s) * Y, */
  359. /* op(A) = A, A**T, or A**H depending on TRANS (and type). */
  360. _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
  361. if (y_prec_state__ == 0) {
  362. _starpu_dgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 +
  363. 1], &c__1, &c_b8, &res[1], &c__1);
  364. } else if (y_prec_state__ == 1) {
  365. _starpu_blas_dgemv_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, &
  366. y[j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1,
  367. prec_type__);
  368. } else {
  369. blas_dgemv2_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda,
  370. &y[j * y_dim1 + 1], &y_tail__[1], &c__1, &c_b8, &res[
  371. 1], &c__1, prec_type__);
  372. }
  373. /* XXX: RES is no longer needed. */
  374. _starpu_dcopy_(n, &res[1], &c__1, &dy[1], &c__1);
  375. _starpu_dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &dy[1],
  376. n, info);
  377. /* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */
  378. normx = 0.;
  379. normy = 0.;
  380. normdx = 0.;
  381. dz_z__ = 0.;
  382. ymin = hugeval;
  383. i__3 = *n;
  384. for (i__ = 1; i__ <= i__3; ++i__) {
  385. yk = (d__1 = y[i__ + j * y_dim1], abs(d__1));
  386. dyk = (d__1 = dy[i__], abs(d__1));
  387. if (yk != 0.) {
  388. /* Computing MAX */
  389. d__1 = dz_z__, d__2 = dyk / yk;
  390. dz_z__ = max(d__1,d__2);
  391. } else if (dyk != 0.) {
  392. dz_z__ = hugeval;
  393. }
  394. ymin = min(ymin,yk);
  395. normy = max(normy,yk);
  396. if (*colequ) {
  397. /* Computing MAX */
  398. d__1 = normx, d__2 = yk * c__[i__];
  399. normx = max(d__1,d__2);
  400. /* Computing MAX */
  401. d__1 = normdx, d__2 = dyk * c__[i__];
  402. normdx = max(d__1,d__2);
  403. } else {
  404. normx = normy;
  405. normdx = max(normdx,dyk);
  406. }
  407. }
  408. if (normx != 0.) {
  409. dx_x__ = normdx / normx;
  410. } else if (normdx == 0.) {
  411. dx_x__ = 0.;
  412. } else {
  413. dx_x__ = hugeval;
  414. }
  415. dxrat = normdx / prevnormdx;
  416. dzrat = dz_z__ / prev_dz_z__;
  417. /* Check termination criteria */
  418. if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy
  419. && y_prec_state__ < 2) {
  420. incr_prec__ = TRUE_;
  421. }
  422. if (x_state__ == 3 && dxrat <= *rthresh) {
  423. x_state__ = 1;
  424. }
  425. if (x_state__ == 1) {
  426. if (dx_x__ <= eps) {
  427. x_state__ = 2;
  428. } else if (dxrat > *rthresh) {
  429. if (y_prec_state__ != 2) {
  430. incr_prec__ = TRUE_;
  431. } else {
  432. x_state__ = 3;
  433. }
  434. } else {
  435. if (dxrat > dxratmax) {
  436. dxratmax = dxrat;
  437. }
  438. }
  439. if (x_state__ > 1) {
  440. final_dx_x__ = dx_x__;
  441. }
  442. }
  443. if (z_state__ == 0 && dz_z__ <= *dz_ub__) {
  444. z_state__ = 1;
  445. }
  446. if (z_state__ == 3 && dzrat <= *rthresh) {
  447. z_state__ = 1;
  448. }
  449. if (z_state__ == 1) {
  450. if (dz_z__ <= eps) {
  451. z_state__ = 2;
  452. } else if (dz_z__ > *dz_ub__) {
  453. z_state__ = 0;
  454. dzratmax = 0.;
  455. final_dz_z__ = hugeval;
  456. } else if (dzrat > *rthresh) {
  457. if (y_prec_state__ != 2) {
  458. incr_prec__ = TRUE_;
  459. } else {
  460. z_state__ = 3;
  461. }
  462. } else {
  463. if (dzrat > dzratmax) {
  464. dzratmax = dzrat;
  465. }
  466. }
  467. if (z_state__ > 1) {
  468. final_dz_z__ = dz_z__;
  469. }
  470. }
  471. /* Exit if both normwise and componentwise stopped working, */
  472. /* but if componentwise is unstable, let it go at least two */
  473. /* iterations. */
  474. if (x_state__ != 1) {
  475. if (*ignore_cwise__) {
  476. goto L666;
  477. }
  478. if (z_state__ == 3 || z_state__ == 2) {
  479. goto L666;
  480. }
  481. if (z_state__ == 0 && cnt > 1) {
  482. goto L666;
  483. }
  484. }
  485. if (incr_prec__) {
  486. incr_prec__ = FALSE_;
  487. ++y_prec_state__;
  488. i__3 = *n;
  489. for (i__ = 1; i__ <= i__3; ++i__) {
  490. y_tail__[i__] = 0.;
  491. }
  492. }
  493. prevnormdx = normdx;
  494. prev_dz_z__ = dz_z__;
  495. /* Update soluton. */
  496. if (y_prec_state__ < 2) {
  497. _starpu_daxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1);
  498. } else {
  499. _starpu_dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]);
  500. }
  501. }
  502. /* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */
  503. L666:
  504. /* Set final_* when cnt hits ithresh. */
  505. if (x_state__ == 1) {
  506. final_dx_x__ = dx_x__;
  507. }
  508. if (z_state__ == 1) {
  509. final_dz_z__ = dz_z__;
  510. }
  511. /* Compute error bounds */
  512. if (*n_norms__ >= 1) {
  513. errs_n__[j + (errs_n_dim1 << 1)] = final_dx_x__ / (1 - dxratmax);
  514. }
  515. if (*n_norms__ >= 2) {
  516. errs_c__[j + (errs_c_dim1 << 1)] = final_dz_z__ / (1 - dzratmax);
  517. }
  518. /* Compute componentwise relative backward error from formula */
  519. /* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
  520. /* where abs(Z) is the componentwise absolute value of the matrix */
  521. /* or vector Z. */
  522. /* Compute residual RES = B_s - op(A_s) * Y, */
  523. /* op(A) = A, A**T, or A**H depending on TRANS (and type). */
  524. _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
  525. _starpu_dgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + 1], &
  526. c__1, &c_b8, &res[1], &c__1);
  527. i__2 = *n;
  528. for (i__ = 1; i__ <= i__2; ++i__) {
  529. ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1));
  530. }
  531. /* Compute abs(op(A_s))*abs(Y) + abs(B_s). */
  532. _starpu_dla_geamv__(trans_type__, n, n, &c_b8, &a[a_offset], lda, &y[j *
  533. y_dim1 + 1], &c__1, &c_b8, &ayb[1], &c__1);
  534. _starpu_dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]);
  535. /* End of loop for each RHS. */
  536. }
  537. return 0;
  538. } /* _starpu_dla_gerfsx_extended__ */