dla_porfsx_extended.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. /* _starpu_dla_porfsx_extended.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__1 = 1;
  15. static doublereal c_b9 = -1.;
  16. static doublereal c_b11 = 1.;
  17. /* Subroutine */ int _starpu_dla_porfsx_extended__(integer *prec_type__, char *uplo,
  18. integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *
  19. af, integer *ldaf, logical *colequ, doublereal *c__, doublereal *b,
  20. integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__,
  21. integer *n_norms__, doublereal *err_bnds_norm__, doublereal *
  22. err_bnds_comp__, doublereal *res, doublereal *ayb, doublereal *dy,
  23. doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal
  24. *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info,
  25. ftnlen uplo_len)
  26. {
  27. /* System generated locals */
  28. integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1,
  29. y_offset, err_bnds_norm_dim1, err_bnds_norm_offset,
  30. err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3;
  31. doublereal d__1, d__2;
  32. /* Local variables */
  33. doublereal dxratmax, dzratmax;
  34. integer i__, j;
  35. logical incr_prec__;
  36. extern /* Subroutine */ int _starpu_dla_syamv__(integer *, integer *, doublereal *
  37. , doublereal *, integer *, doublereal *, integer *, doublereal *,
  38. doublereal *, integer *);
  39. doublereal prev_dz_z__, yk, final_dx_x__;
  40. extern /* Subroutine */ int _starpu_dla_wwaddw__(integer *, doublereal *,
  41. doublereal *, doublereal *);
  42. doublereal final_dz_z__, prevnormdx;
  43. integer cnt;
  44. doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__;
  45. extern /* Subroutine */ int _starpu_dla_lin_berr__(integer *, integer *, integer *
  46. , doublereal *, doublereal *, doublereal *);
  47. doublereal ymin;
  48. integer y_prec_state__;
  49. extern /* Subroutine */ int _starpu_blas_dsymv_x__(integer *, integer *,
  50. doublereal *, doublereal *, integer *, doublereal *, integer *,
  51. doublereal *, doublereal *, integer *, integer *);
  52. integer uplo2;
  53. extern logical _starpu_lsame_(char *, char *);
  54. extern /* Subroutine */ int blas_dsymv2_x__(integer *, integer *,
  55. doublereal *, doublereal *, integer *, doublereal *, doublereal *,
  56. integer *, doublereal *, doublereal *, integer *, integer *),
  57. _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *
  58. );
  59. doublereal dxrat, dzrat;
  60. extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *,
  61. integer *, doublereal *, integer *), _starpu_dsymv_(char *, integer *,
  62. doublereal *, doublereal *, integer *, doublereal *, integer *,
  63. doublereal *, doublereal *, integer *);
  64. doublereal normx, normy;
  65. extern doublereal _starpu_dlamch_(char *);
  66. doublereal normdx;
  67. extern /* Subroutine */ int _starpu_dpotrs_(char *, integer *, integer *,
  68. doublereal *, integer *, doublereal *, integer *, integer *);
  69. doublereal hugeval;
  70. extern integer _starpu_ilauplo_(char *);
  71. integer x_state__, z_state__;
  72. /* -- LAPACK routine (version 3.2.1) -- */
  73. /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */
  74. /* -- Jason Riedy of Univ. of California Berkeley. -- */
  75. /* -- April 2009 -- */
  76. /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
  77. /* -- Univ. of California Berkeley and NAG Ltd. -- */
  78. /* .. */
  79. /* .. Scalar Arguments .. */
  80. /* .. */
  81. /* .. Array Arguments .. */
  82. /* .. */
  83. /* Purpose */
  84. /* ======= */
  85. /* DLA_PORFSX_EXTENDED improves the computed solution to a system of */
  86. /* linear equations by performing extra-precise iterative refinement */
  87. /* and provides error bounds and backward error estimates for the solution. */
  88. /* This subroutine is called by DPORFSX to perform iterative refinement. */
  89. /* In addition to normwise error bound, the code provides maximum */
  90. /* componentwise error bound if possible. See comments for ERR_BNDS_NORM */
  91. /* and ERR_BNDS_COMP for details of the error bounds. Note that this */
  92. /* subroutine is only resonsible for setting the second fields of */
  93. /* ERR_BNDS_NORM and ERR_BNDS_COMP. */
  94. /* Arguments */
  95. /* ========= */
  96. /* PREC_TYPE (input) INTEGER */
  97. /* Specifies the intermediate precision to be used in refinement. */
  98. /* The value is defined by ILAPREC(P) where P is a CHARACTER and */
  99. /* P = 'S': Single */
  100. /* = 'D': Double */
  101. /* = 'I': Indigenous */
  102. /* = 'X', 'E': Extra */
  103. /* UPLO (input) CHARACTER*1 */
  104. /* = 'U': Upper triangle of A is stored; */
  105. /* = 'L': Lower triangle of A is stored. */
  106. /* N (input) INTEGER */
  107. /* The number of linear equations, i.e., the order of the */
  108. /* matrix A. N >= 0. */
  109. /* NRHS (input) INTEGER */
  110. /* The number of right-hand-sides, i.e., the number of columns of the */
  111. /* matrix B. */
  112. /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */
  113. /* On entry, the N-by-N matrix A. */
  114. /* LDA (input) INTEGER */
  115. /* The leading dimension of the array A. LDA >= max(1,N). */
  116. /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */
  117. /* The triangular factor U or L from the Cholesky factorization */
  118. /* A = U**T*U or A = L*L**T, as computed by DPOTRF. */
  119. /* LDAF (input) INTEGER */
  120. /* The leading dimension of the array AF. LDAF >= max(1,N). */
  121. /* COLEQU (input) LOGICAL */
  122. /* If .TRUE. then column equilibration was done to A before calling */
  123. /* this routine. This is needed to compute the solution and error */
  124. /* bounds correctly. */
  125. /* C (input) DOUBLE PRECISION array, dimension (N) */
  126. /* The column scale factors for A. If COLEQU = .FALSE., C */
  127. /* is not accessed. If C is input, each element of C should be a power */
  128. /* of the radix to ensure a reliable solution and error estimates. */
  129. /* Scaling by powers of the radix does not cause rounding errors unless */
  130. /* the result underflows or overflows. Rounding errors during scaling */
  131. /* lead to refining with a matrix that is not equivalent to the */
  132. /* input matrix, producing error estimates that may not be */
  133. /* reliable. */
  134. /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */
  135. /* The right-hand-side matrix B. */
  136. /* LDB (input) INTEGER */
  137. /* The leading dimension of the array B. LDB >= max(1,N). */
  138. /* Y (input/output) DOUBLE PRECISION array, dimension */
  139. /* (LDY,NRHS) */
  140. /* On entry, the solution matrix X, as computed by DPOTRS. */
  141. /* On exit, the improved solution matrix Y. */
  142. /* LDY (input) INTEGER */
  143. /* The leading dimension of the array Y. LDY >= max(1,N). */
  144. /* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */
  145. /* On exit, BERR_OUT(j) contains the componentwise relative backward */
  146. /* error for right-hand-side j from the formula */
  147. /* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
  148. /* where abs(Z) is the componentwise absolute value of the matrix */
  149. /* or vector Z. This is computed by DLA_LIN_BERR. */
  150. /* N_NORMS (input) INTEGER */
  151. /* Determines which error bounds to return (see ERR_BNDS_NORM */
  152. /* and ERR_BNDS_COMP). */
  153. /* If N_NORMS >= 1 return normwise error bounds. */
  154. /* If N_NORMS >= 2 return componentwise error bounds. */
  155. /* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */
  156. /* (NRHS, N_ERR_BNDS) */
  157. /* For each right-hand side, this array contains information about */
  158. /* various error bounds and condition numbers corresponding to the */
  159. /* normwise relative error, which is defined as follows: */
  160. /* Normwise relative error in the ith solution vector: */
  161. /* max_j (abs(XTRUE(j,i) - X(j,i))) */
  162. /* ------------------------------ */
  163. /* max_j abs(X(j,i)) */
  164. /* The array is indexed by the type of error information as described */
  165. /* below. There currently are up to three pieces of information */
  166. /* returned. */
  167. /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
  168. /* right-hand side. */
  169. /* The second index in ERR_BNDS_NORM(:,err) contains the following */
  170. /* three fields: */
  171. /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */
  172. /* reciprocal condition number is less than the threshold */
  173. /* sqrt(n) * slamch('Epsilon'). */
  174. /* err = 2 "Guaranteed" error bound: The estimated forward error, */
  175. /* almost certainly within a factor of 10 of the true error */
  176. /* so long as the next entry is greater than the threshold */
  177. /* sqrt(n) * slamch('Epsilon'). This error bound should only */
  178. /* be trusted if the previous boolean is true. */
  179. /* err = 3 Reciprocal condition number: Estimated normwise */
  180. /* reciprocal condition number. Compared with the threshold */
  181. /* sqrt(n) * slamch('Epsilon') to determine if the error */
  182. /* estimate is "guaranteed". These reciprocal condition */
  183. /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
  184. /* appropriately scaled matrix Z. */
  185. /* Let Z = S*A, where S scales each row by a power of the */
  186. /* radix so all absolute row sums of Z are approximately 1. */
  187. /* This subroutine is only responsible for setting the second field */
  188. /* above. */
  189. /* See Lapack Working Note 165 for further details and extra */
  190. /* cautions. */
  191. /* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */
  192. /* (NRHS, N_ERR_BNDS) */
  193. /* For each right-hand side, this array contains information about */
  194. /* various error bounds and condition numbers corresponding to the */
  195. /* componentwise relative error, which is defined as follows: */
  196. /* Componentwise relative error in the ith solution vector: */
  197. /* abs(XTRUE(j,i) - X(j,i)) */
  198. /* max_j ---------------------- */
  199. /* abs(X(j,i)) */
  200. /* The array is indexed by the right-hand side i (on which the */
  201. /* componentwise relative error depends), and the type of error */
  202. /* information as described below. There currently are up to three */
  203. /* pieces of information returned for each right-hand side. If */
  204. /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
  205. /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */
  206. /* the first (:,N_ERR_BNDS) entries are returned. */
  207. /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
  208. /* right-hand side. */
  209. /* The second index in ERR_BNDS_COMP(:,err) contains the following */
  210. /* three fields: */
  211. /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */
  212. /* reciprocal condition number is less than the threshold */
  213. /* sqrt(n) * slamch('Epsilon'). */
  214. /* err = 2 "Guaranteed" error bound: The estimated forward error, */
  215. /* almost certainly within a factor of 10 of the true error */
  216. /* so long as the next entry is greater than the threshold */
  217. /* sqrt(n) * slamch('Epsilon'). This error bound should only */
  218. /* be trusted if the previous boolean is true. */
  219. /* err = 3 Reciprocal condition number: Estimated componentwise */
  220. /* reciprocal condition number. Compared with the threshold */
  221. /* sqrt(n) * slamch('Epsilon') to determine if the error */
  222. /* estimate is "guaranteed". These reciprocal condition */
  223. /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
  224. /* appropriately scaled matrix Z. */
  225. /* Let Z = S*(A*diag(x)), where x is the solution for the */
  226. /* current right-hand side and S scales each row of */
  227. /* A*diag(x) by a power of the radix so all absolute row */
  228. /* sums of Z are approximately 1. */
  229. /* This subroutine is only responsible for setting the second field */
  230. /* above. */
  231. /* See Lapack Working Note 165 for further details and extra */
  232. /* cautions. */
  233. /* RES (input) DOUBLE PRECISION array, dimension (N) */
  234. /* Workspace to hold the intermediate residual. */
  235. /* AYB (input) DOUBLE PRECISION array, dimension (N) */
  236. /* Workspace. This can be the same workspace passed for Y_TAIL. */
  237. /* DY (input) DOUBLE PRECISION array, dimension (N) */
  238. /* Workspace to hold the intermediate solution. */
  239. /* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */
  240. /* Workspace to hold the trailing bits of the intermediate solution. */
  241. /* RCOND (input) DOUBLE PRECISION */
  242. /* Reciprocal scaled condition number. This is an estimate of the */
  243. /* reciprocal Skeel condition number of the matrix A after */
  244. /* equilibration (if done). If this is less than the machine */
  245. /* precision (in particular, if it is zero), the matrix is singular */
  246. /* to working precision. Note that the error may still be small even */
  247. /* if this number is very small and the matrix appears ill- */
  248. /* conditioned. */
  249. /* ITHRESH (input) INTEGER */
  250. /* The maximum number of residual computations allowed for */
  251. /* refinement. The default is 10. For 'aggressive' set to 100 to */
  252. /* permit convergence using approximate factorizations or */
  253. /* factorizations other than LU. If the factorization uses a */
  254. /* technique other than Gaussian elimination, the guarantees in */
  255. /* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */
  256. /* RTHRESH (input) DOUBLE PRECISION */
  257. /* Determines when to stop refinement if the error estimate stops */
  258. /* decreasing. Refinement will stop when the next solution no longer */
  259. /* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */
  260. /* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */
  261. /* default value is 0.5. For 'aggressive' set to 0.9 to permit */
  262. /* convergence on extremely ill-conditioned matrices. See LAWN 165 */
  263. /* for more details. */
  264. /* DZ_UB (input) DOUBLE PRECISION */
  265. /* Determines when to start considering componentwise convergence. */
  266. /* Componentwise convergence is only considered after each component */
  267. /* of the solution Y is stable, which we definte as the relative */
  268. /* change in each component being less than DZ_UB. The default value */
  269. /* is 0.25, requiring the first bit to be stable. See LAWN 165 for */
  270. /* more details. */
  271. /* IGNORE_CWISE (input) LOGICAL */
  272. /* If .TRUE. then ignore componentwise convergence. Default value */
  273. /* is .FALSE.. */
  274. /* INFO (output) INTEGER */
  275. /* = 0: Successful exit. */
  276. /* < 0: if INFO = -i, the ith argument to DPOTRS had an illegal */
  277. /* value */
  278. /* ===================================================================== */
  279. /* .. Local Scalars .. */
  280. /* .. */
  281. /* .. Parameters .. */
  282. /* .. */
  283. /* .. External Functions .. */
  284. /* .. */
  285. /* .. External Subroutines .. */
  286. /* .. */
  287. /* .. Intrinsic Functions .. */
  288. /* .. */
  289. /* .. Executable Statements .. */
  290. /* Parameter adjustments */
  291. err_bnds_comp_dim1 = *nrhs;
  292. err_bnds_comp_offset = 1 + err_bnds_comp_dim1;
  293. err_bnds_comp__ -= err_bnds_comp_offset;
  294. err_bnds_norm_dim1 = *nrhs;
  295. err_bnds_norm_offset = 1 + err_bnds_norm_dim1;
  296. err_bnds_norm__ -= err_bnds_norm_offset;
  297. a_dim1 = *lda;
  298. a_offset = 1 + a_dim1;
  299. a -= a_offset;
  300. af_dim1 = *ldaf;
  301. af_offset = 1 + af_dim1;
  302. af -= af_offset;
  303. --c__;
  304. b_dim1 = *ldb;
  305. b_offset = 1 + b_dim1;
  306. b -= b_offset;
  307. y_dim1 = *ldy;
  308. y_offset = 1 + y_dim1;
  309. y -= y_offset;
  310. --berr_out__;
  311. --res;
  312. --ayb;
  313. --dy;
  314. --y_tail__;
  315. /* Function Body */
  316. if (*info != 0) {
  317. return 0;
  318. }
  319. eps = _starpu_dlamch_("Epsilon");
  320. hugeval = _starpu_dlamch_("Overflow");
  321. /* Force HUGEVAL to Inf */
  322. hugeval *= hugeval;
  323. /* Using HUGEVAL may lead to spurious underflows. */
  324. incr_thresh__ = (doublereal) (*n) * eps;
  325. if (_starpu_lsame_(uplo, "L")) {
  326. uplo2 = _starpu_ilauplo_("L");
  327. } else {
  328. uplo2 = _starpu_ilauplo_("U");
  329. }
  330. i__1 = *nrhs;
  331. for (j = 1; j <= i__1; ++j) {
  332. y_prec_state__ = 1;
  333. if (y_prec_state__ == 2) {
  334. i__2 = *n;
  335. for (i__ = 1; i__ <= i__2; ++i__) {
  336. y_tail__[i__] = 0.;
  337. }
  338. }
  339. dxrat = 0.;
  340. dxratmax = 0.;
  341. dzrat = 0.;
  342. dzratmax = 0.;
  343. final_dx_x__ = hugeval;
  344. final_dz_z__ = hugeval;
  345. prevnormdx = hugeval;
  346. prev_dz_z__ = hugeval;
  347. dz_z__ = hugeval;
  348. dx_x__ = hugeval;
  349. x_state__ = 1;
  350. z_state__ = 0;
  351. incr_prec__ = FALSE_;
  352. i__2 = *ithresh;
  353. for (cnt = 1; cnt <= i__2; ++cnt) {
  354. /* Compute residual RES = B_s - op(A_s) * Y, */
  355. /* op(A) = A, A**T, or A**H depending on TRANS (and type). */
  356. _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
  357. if (y_prec_state__ == 0) {
  358. _starpu_dsymv_(uplo, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1],
  359. &c__1, &c_b11, &res[1], &c__1);
  360. } else if (y_prec_state__ == 1) {
  361. _starpu_blas_dsymv_x__(&uplo2, n, &c_b9, &a[a_offset], lda, &y[j *
  362. y_dim1 + 1], &c__1, &c_b11, &res[1], &c__1,
  363. prec_type__);
  364. } else {
  365. blas_dsymv2_x__(&uplo2, n, &c_b9, &a[a_offset], lda, &y[j *
  366. y_dim1 + 1], &y_tail__[1], &c__1, &c_b11, &res[1], &
  367. c__1, prec_type__);
  368. }
  369. /* XXX: RES is no longer needed. */
  370. _starpu_dcopy_(n, &res[1], &c__1, &dy[1], &c__1);
  371. _starpu_dpotrs_(uplo, n, nrhs, &af[af_offset], ldaf, &dy[1], n, info);
  372. /* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */
  373. normx = 0.;
  374. normy = 0.;
  375. normdx = 0.;
  376. dz_z__ = 0.;
  377. ymin = hugeval;
  378. i__3 = *n;
  379. for (i__ = 1; i__ <= i__3; ++i__) {
  380. yk = (d__1 = y[i__ + j * y_dim1], abs(d__1));
  381. dyk = (d__1 = dy[i__], abs(d__1));
  382. if (yk != 0.) {
  383. /* Computing MAX */
  384. d__1 = dz_z__, d__2 = dyk / yk;
  385. dz_z__ = max(d__1,d__2);
  386. } else if (dyk != 0.) {
  387. dz_z__ = hugeval;
  388. }
  389. ymin = min(ymin,yk);
  390. normy = max(normy,yk);
  391. if (*colequ) {
  392. /* Computing MAX */
  393. d__1 = normx, d__2 = yk * c__[i__];
  394. normx = max(d__1,d__2);
  395. /* Computing MAX */
  396. d__1 = normdx, d__2 = dyk * c__[i__];
  397. normdx = max(d__1,d__2);
  398. } else {
  399. normx = normy;
  400. normdx = max(normdx,dyk);
  401. }
  402. }
  403. if (normx != 0.) {
  404. dx_x__ = normdx / normx;
  405. } else if (normdx == 0.) {
  406. dx_x__ = 0.;
  407. } else {
  408. dx_x__ = hugeval;
  409. }
  410. dxrat = normdx / prevnormdx;
  411. dzrat = dz_z__ / prev_dz_z__;
  412. /* Check termination criteria. */
  413. if (ymin * *rcond < incr_thresh__ * normy && y_prec_state__ < 2) {
  414. incr_prec__ = TRUE_;
  415. }
  416. if (x_state__ == 3 && dxrat <= *rthresh) {
  417. x_state__ = 1;
  418. }
  419. if (x_state__ == 1) {
  420. if (dx_x__ <= eps) {
  421. x_state__ = 2;
  422. } else if (dxrat > *rthresh) {
  423. if (y_prec_state__ != 2) {
  424. incr_prec__ = TRUE_;
  425. } else {
  426. x_state__ = 3;
  427. }
  428. } else {
  429. if (dxrat > dxratmax) {
  430. dxratmax = dxrat;
  431. }
  432. }
  433. if (x_state__ > 1) {
  434. final_dx_x__ = dx_x__;
  435. }
  436. }
  437. if (z_state__ == 0 && dz_z__ <= *dz_ub__) {
  438. z_state__ = 1;
  439. }
  440. if (z_state__ == 3 && dzrat <= *rthresh) {
  441. z_state__ = 1;
  442. }
  443. if (z_state__ == 1) {
  444. if (dz_z__ <= eps) {
  445. z_state__ = 2;
  446. } else if (dz_z__ > *dz_ub__) {
  447. z_state__ = 0;
  448. dzratmax = 0.;
  449. final_dz_z__ = hugeval;
  450. } else if (dzrat > *rthresh) {
  451. if (y_prec_state__ != 2) {
  452. incr_prec__ = TRUE_;
  453. } else {
  454. z_state__ = 3;
  455. }
  456. } else {
  457. if (dzrat > dzratmax) {
  458. dzratmax = dzrat;
  459. }
  460. }
  461. if (z_state__ > 1) {
  462. final_dz_z__ = dz_z__;
  463. }
  464. }
  465. if (x_state__ != 1 && (*ignore_cwise__ || z_state__ != 1)) {
  466. goto L666;
  467. }
  468. if (incr_prec__) {
  469. incr_prec__ = FALSE_;
  470. ++y_prec_state__;
  471. i__3 = *n;
  472. for (i__ = 1; i__ <= i__3; ++i__) {
  473. y_tail__[i__] = 0.;
  474. }
  475. }
  476. prevnormdx = normdx;
  477. prev_dz_z__ = dz_z__;
  478. /* Update soluton. */
  479. if (y_prec_state__ < 2) {
  480. _starpu_daxpy_(n, &c_b11, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1);
  481. } else {
  482. _starpu_dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]);
  483. }
  484. }
  485. /* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */
  486. L666:
  487. /* Set final_* when cnt hits ithresh. */
  488. if (x_state__ == 1) {
  489. final_dx_x__ = dx_x__;
  490. }
  491. if (z_state__ == 1) {
  492. final_dz_z__ = dz_z__;
  493. }
  494. /* Compute error bounds. */
  495. if (*n_norms__ >= 1) {
  496. err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / (
  497. 1 - dxratmax);
  498. }
  499. if (*n_norms__ >= 2) {
  500. err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / (
  501. 1 - dzratmax);
  502. }
  503. /* Compute componentwise relative backward error from formula */
  504. /* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
  505. /* where abs(Z) is the componentwise absolute value of the matrix */
  506. /* or vector Z. */
  507. /* Compute residual RES = B_s - op(A_s) * Y, */
  508. /* op(A) = A, A**T, or A**H depending on TRANS (and type). */
  509. _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
  510. _starpu_dsymv_(uplo, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &
  511. c_b11, &res[1], &c__1);
  512. i__2 = *n;
  513. for (i__ = 1; i__ <= i__2; ++i__) {
  514. ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1));
  515. }
  516. /* Compute abs(op(A_s))*abs(Y) + abs(B_s). */
  517. _starpu_dla_syamv__(&uplo2, n, &c_b11, &a[a_offset], lda, &y[j * y_dim1 + 1],
  518. &c__1, &c_b11, &ayb[1], &c__1);
  519. _starpu_dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]);
  520. /* End of loop for each RHS. */
  521. }
  522. return 0;
  523. } /* _starpu_dla_porfsx_extended__ */