dla_syrfsx_extended.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609
  1. /* _starpu_dla_syrfsx_extended.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__1 = 1;
  15. static doublereal c_b9 = -1.;
  16. static doublereal c_b11 = 1.;
  17. /* Subroutine */ int _starpu_dla_syrfsx_extended__(integer *prec_type__, char *uplo,
  18. integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *
  19. af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__,
  20. doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal *
  21. berr_out__, integer *n_norms__, doublereal *err_bnds_norm__,
  22. doublereal *err_bnds_comp__, doublereal *res, doublereal *ayb,
  23. doublereal *dy, doublereal *y_tail__, doublereal *rcond, integer *
  24. ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *
  25. ignore_cwise__, integer *info, ftnlen uplo_len)
  26. {
  27. /* System generated locals */
  28. integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1,
  29. y_offset, err_bnds_norm_dim1, err_bnds_norm_offset,
  30. err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3;
  31. doublereal d__1, d__2;
  32. /* Local variables */
  33. doublereal dxratmax, dzratmax;
  34. integer i__, j;
  35. logical incr_prec__;
  36. extern /* Subroutine */ int _starpu_dla_syamv__(integer *, integer *, doublereal *
  37. , doublereal *, integer *, doublereal *, integer *, doublereal *,
  38. doublereal *, integer *);
  39. doublereal prev_dz_z__, yk, final_dx_x__;
  40. extern /* Subroutine */ int _starpu_dla_wwaddw__(integer *, doublereal *,
  41. doublereal *, doublereal *);
  42. doublereal final_dz_z__, prevnormdx;
  43. integer cnt;
  44. doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__;
  45. extern /* Subroutine */ int _starpu_dla_lin_berr__(integer *, integer *, integer *
  46. , doublereal *, doublereal *, doublereal *);
  47. doublereal ymin;
  48. integer y_prec_state__;
  49. extern /* Subroutine */ int _starpu_blas_dsymv_x__(integer *, integer *,
  50. doublereal *, doublereal *, integer *, doublereal *, integer *,
  51. doublereal *, doublereal *, integer *, integer *);
  52. integer uplo2;
  53. extern logical _starpu_lsame_(char *, char *);
  54. extern /* Subroutine */ int blas_dsymv2_x__(integer *, integer *,
  55. doublereal *, doublereal *, integer *, doublereal *, doublereal *,
  56. integer *, doublereal *, doublereal *, integer *, integer *),
  57. _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *
  58. );
  59. doublereal dxrat, dzrat;
  60. extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *,
  61. integer *, doublereal *, integer *), _starpu_dsymv_(char *, integer *,
  62. doublereal *, doublereal *, integer *, doublereal *, integer *,
  63. doublereal *, doublereal *, integer *);
  64. doublereal normx, normy;
  65. extern doublereal _starpu_dlamch_(char *);
  66. doublereal normdx;
  67. extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *,
  68. doublereal *, integer *, integer *, doublereal *, integer *,
  69. integer *);
  70. doublereal hugeval;
  71. extern integer _starpu_ilauplo_(char *);
  72. integer x_state__, z_state__;
  73. /* -- LAPACK routine (version 3.2.1) -- */
  74. /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */
  75. /* -- Jason Riedy of Univ. of California Berkeley. -- */
  76. /* -- April 2009 -- */
  77. /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
  78. /* -- Univ. of California Berkeley and NAG Ltd. -- */
  79. /* .. */
  80. /* .. Scalar Arguments .. */
  81. /* .. */
  82. /* .. Array Arguments .. */
  83. /* .. */
  84. /* Purpose */
  85. /* ======= */
  86. /* DLA_SYRFSX_EXTENDED improves the computed solution to a system of */
  87. /* linear equations by performing extra-precise iterative refinement */
  88. /* and provides error bounds and backward error estimates for the solution. */
  89. /* This subroutine is called by DSYRFSX to perform iterative refinement. */
  90. /* In addition to normwise error bound, the code provides maximum */
  91. /* componentwise error bound if possible. See comments for ERR_BNDS_NORM */
  92. /* and ERR_BNDS_COMP for details of the error bounds. Note that this */
  93. /* subroutine is only resonsible for setting the second fields of */
  94. /* ERR_BNDS_NORM and ERR_BNDS_COMP. */
  95. /* Arguments */
  96. /* ========= */
  97. /* PREC_TYPE (input) INTEGER */
  98. /* Specifies the intermediate precision to be used in refinement. */
  99. /* The value is defined by ILAPREC(P) where P is a CHARACTER and */
  100. /* P = 'S': Single */
  101. /* = 'D': Double */
  102. /* = 'I': Indigenous */
  103. /* = 'X', 'E': Extra */
  104. /* UPLO (input) CHARACTER*1 */
  105. /* = 'U': Upper triangle of A is stored; */
  106. /* = 'L': Lower triangle of A is stored. */
  107. /* N (input) INTEGER */
  108. /* The number of linear equations, i.e., the order of the */
  109. /* matrix A. N >= 0. */
  110. /* NRHS (input) INTEGER */
  111. /* The number of right-hand-sides, i.e., the number of columns of the */
  112. /* matrix B. */
  113. /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */
  114. /* On entry, the N-by-N matrix A. */
  115. /* LDA (input) INTEGER */
  116. /* The leading dimension of the array A. LDA >= max(1,N). */
  117. /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */
  118. /* The block diagonal matrix D and the multipliers used to */
  119. /* obtain the factor U or L as computed by DSYTRF. */
  120. /* LDAF (input) INTEGER */
  121. /* The leading dimension of the array AF. LDAF >= max(1,N). */
  122. /* IPIV (input) INTEGER array, dimension (N) */
  123. /* Details of the interchanges and the block structure of D */
  124. /* as determined by DSYTRF. */
  125. /* COLEQU (input) LOGICAL */
  126. /* If .TRUE. then column equilibration was done to A before calling */
  127. /* this routine. This is needed to compute the solution and error */
  128. /* bounds correctly. */
  129. /* C (input) DOUBLE PRECISION array, dimension (N) */
  130. /* The column scale factors for A. If COLEQU = .FALSE., C */
  131. /* is not accessed. If C is input, each element of C should be a power */
  132. /* of the radix to ensure a reliable solution and error estimates. */
  133. /* Scaling by powers of the radix does not cause rounding errors unless */
  134. /* the result underflows or overflows. Rounding errors during scaling */
  135. /* lead to refining with a matrix that is not equivalent to the */
  136. /* input matrix, producing error estimates that may not be */
  137. /* reliable. */
  138. /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */
  139. /* The right-hand-side matrix B. */
  140. /* LDB (input) INTEGER */
  141. /* The leading dimension of the array B. LDB >= max(1,N). */
  142. /* Y (input/output) DOUBLE PRECISION array, dimension */
  143. /* (LDY,NRHS) */
  144. /* On entry, the solution matrix X, as computed by DSYTRS. */
  145. /* On exit, the improved solution matrix Y. */
  146. /* LDY (input) INTEGER */
  147. /* The leading dimension of the array Y. LDY >= max(1,N). */
  148. /* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */
  149. /* On exit, BERR_OUT(j) contains the componentwise relative backward */
  150. /* error for right-hand-side j from the formula */
  151. /* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
  152. /* where abs(Z) is the componentwise absolute value of the matrix */
  153. /* or vector Z. This is computed by DLA_LIN_BERR. */
  154. /* N_NORMS (input) INTEGER */
  155. /* Determines which error bounds to return (see ERR_BNDS_NORM */
  156. /* and ERR_BNDS_COMP). */
  157. /* If N_NORMS >= 1 return normwise error bounds. */
  158. /* If N_NORMS >= 2 return componentwise error bounds. */
  159. /* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */
  160. /* (NRHS, N_ERR_BNDS) */
  161. /* For each right-hand side, this array contains information about */
  162. /* various error bounds and condition numbers corresponding to the */
  163. /* normwise relative error, which is defined as follows: */
  164. /* Normwise relative error in the ith solution vector: */
  165. /* max_j (abs(XTRUE(j,i) - X(j,i))) */
  166. /* ------------------------------ */
  167. /* max_j abs(X(j,i)) */
  168. /* The array is indexed by the type of error information as described */
  169. /* below. There currently are up to three pieces of information */
  170. /* returned. */
  171. /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
  172. /* right-hand side. */
  173. /* The second index in ERR_BNDS_NORM(:,err) contains the following */
  174. /* three fields: */
  175. /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */
  176. /* reciprocal condition number is less than the threshold */
  177. /* sqrt(n) * slamch('Epsilon'). */
  178. /* err = 2 "Guaranteed" error bound: The estimated forward error, */
  179. /* almost certainly within a factor of 10 of the true error */
  180. /* so long as the next entry is greater than the threshold */
  181. /* sqrt(n) * slamch('Epsilon'). This error bound should only */
  182. /* be trusted if the previous boolean is true. */
  183. /* err = 3 Reciprocal condition number: Estimated normwise */
  184. /* reciprocal condition number. Compared with the threshold */
  185. /* sqrt(n) * slamch('Epsilon') to determine if the error */
  186. /* estimate is "guaranteed". These reciprocal condition */
  187. /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
  188. /* appropriately scaled matrix Z. */
  189. /* Let Z = S*A, where S scales each row by a power of the */
  190. /* radix so all absolute row sums of Z are approximately 1. */
  191. /* This subroutine is only responsible for setting the second field */
  192. /* above. */
  193. /* See Lapack Working Note 165 for further details and extra */
  194. /* cautions. */
  195. /* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */
  196. /* (NRHS, N_ERR_BNDS) */
  197. /* For each right-hand side, this array contains information about */
  198. /* various error bounds and condition numbers corresponding to the */
  199. /* componentwise relative error, which is defined as follows: */
  200. /* Componentwise relative error in the ith solution vector: */
  201. /* abs(XTRUE(j,i) - X(j,i)) */
  202. /* max_j ---------------------- */
  203. /* abs(X(j,i)) */
  204. /* The array is indexed by the right-hand side i (on which the */
  205. /* componentwise relative error depends), and the type of error */
  206. /* information as described below. There currently are up to three */
  207. /* pieces of information returned for each right-hand side. If */
  208. /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
  209. /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */
  210. /* the first (:,N_ERR_BNDS) entries are returned. */
  211. /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
  212. /* right-hand side. */
  213. /* The second index in ERR_BNDS_COMP(:,err) contains the following */
  214. /* three fields: */
  215. /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */
  216. /* reciprocal condition number is less than the threshold */
  217. /* sqrt(n) * slamch('Epsilon'). */
  218. /* err = 2 "Guaranteed" error bound: The estimated forward error, */
  219. /* almost certainly within a factor of 10 of the true error */
  220. /* so long as the next entry is greater than the threshold */
  221. /* sqrt(n) * slamch('Epsilon'). This error bound should only */
  222. /* be trusted if the previous boolean is true. */
  223. /* err = 3 Reciprocal condition number: Estimated componentwise */
  224. /* reciprocal condition number. Compared with the threshold */
  225. /* sqrt(n) * slamch('Epsilon') to determine if the error */
  226. /* estimate is "guaranteed". These reciprocal condition */
  227. /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
  228. /* appropriately scaled matrix Z. */
  229. /* Let Z = S*(A*diag(x)), where x is the solution for the */
  230. /* current right-hand side and S scales each row of */
  231. /* A*diag(x) by a power of the radix so all absolute row */
  232. /* sums of Z are approximately 1. */
  233. /* This subroutine is only responsible for setting the second field */
  234. /* above. */
  235. /* See Lapack Working Note 165 for further details and extra */
  236. /* cautions. */
  237. /* RES (input) DOUBLE PRECISION array, dimension (N) */
  238. /* Workspace to hold the intermediate residual. */
  239. /* AYB (input) DOUBLE PRECISION array, dimension (N) */
  240. /* Workspace. This can be the same workspace passed for Y_TAIL. */
  241. /* DY (input) DOUBLE PRECISION array, dimension (N) */
  242. /* Workspace to hold the intermediate solution. */
  243. /* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */
  244. /* Workspace to hold the trailing bits of the intermediate solution. */
  245. /* RCOND (input) DOUBLE PRECISION */
  246. /* Reciprocal scaled condition number. This is an estimate of the */
  247. /* reciprocal Skeel condition number of the matrix A after */
  248. /* equilibration (if done). If this is less than the machine */
  249. /* precision (in particular, if it is zero), the matrix is singular */
  250. /* to working precision. Note that the error may still be small even */
  251. /* if this number is very small and the matrix appears ill- */
  252. /* conditioned. */
  253. /* ITHRESH (input) INTEGER */
  254. /* The maximum number of residual computations allowed for */
  255. /* refinement. The default is 10. For 'aggressive' set to 100 to */
  256. /* permit convergence using approximate factorizations or */
  257. /* factorizations other than LU. If the factorization uses a */
  258. /* technique other than Gaussian elimination, the guarantees in */
  259. /* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */
  260. /* RTHRESH (input) DOUBLE PRECISION */
  261. /* Determines when to stop refinement if the error estimate stops */
  262. /* decreasing. Refinement will stop when the next solution no longer */
  263. /* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */
  264. /* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */
  265. /* default value is 0.5. For 'aggressive' set to 0.9 to permit */
  266. /* convergence on extremely ill-conditioned matrices. See LAWN 165 */
  267. /* for more details. */
  268. /* DZ_UB (input) DOUBLE PRECISION */
  269. /* Determines when to start considering componentwise convergence. */
  270. /* Componentwise convergence is only considered after each component */
  271. /* of the solution Y is stable, which we definte as the relative */
  272. /* change in each component being less than DZ_UB. The default value */
  273. /* is 0.25, requiring the first bit to be stable. See LAWN 165 for */
  274. /* more details. */
  275. /* IGNORE_CWISE (input) LOGICAL */
  276. /* If .TRUE. then ignore componentwise convergence. Default value */
  277. /* is .FALSE.. */
  278. /* INFO (output) INTEGER */
  279. /* = 0: Successful exit. */
  280. /* < 0: if INFO = -i, the ith argument to DSYTRS had an illegal */
  281. /* value */
  282. /* ===================================================================== */
  283. /* .. Local Scalars .. */
  284. /* .. */
  285. /* .. Parameters .. */
  286. /* .. */
  287. /* .. External Functions .. */
  288. /* .. */
  289. /* .. External Subroutines .. */
  290. /* .. */
  291. /* .. Intrinsic Functions .. */
  292. /* .. */
  293. /* .. Executable Statements .. */
  294. /* Parameter adjustments */
  295. err_bnds_comp_dim1 = *nrhs;
  296. err_bnds_comp_offset = 1 + err_bnds_comp_dim1;
  297. err_bnds_comp__ -= err_bnds_comp_offset;
  298. err_bnds_norm_dim1 = *nrhs;
  299. err_bnds_norm_offset = 1 + err_bnds_norm_dim1;
  300. err_bnds_norm__ -= err_bnds_norm_offset;
  301. a_dim1 = *lda;
  302. a_offset = 1 + a_dim1;
  303. a -= a_offset;
  304. af_dim1 = *ldaf;
  305. af_offset = 1 + af_dim1;
  306. af -= af_offset;
  307. --ipiv;
  308. --c__;
  309. b_dim1 = *ldb;
  310. b_offset = 1 + b_dim1;
  311. b -= b_offset;
  312. y_dim1 = *ldy;
  313. y_offset = 1 + y_dim1;
  314. y -= y_offset;
  315. --berr_out__;
  316. --res;
  317. --ayb;
  318. --dy;
  319. --y_tail__;
  320. /* Function Body */
  321. if (*info != 0) {
  322. return 0;
  323. }
  324. eps = _starpu_dlamch_("Epsilon");
  325. hugeval = _starpu_dlamch_("Overflow");
  326. /* Force HUGEVAL to Inf */
  327. hugeval *= hugeval;
  328. /* Using HUGEVAL may lead to spurious underflows. */
  329. incr_thresh__ = (doublereal) (*n) * eps;
  330. if (_starpu_lsame_(uplo, "L")) {
  331. uplo2 = _starpu_ilauplo_("L");
  332. } else {
  333. uplo2 = _starpu_ilauplo_("U");
  334. }
  335. i__1 = *nrhs;
  336. for (j = 1; j <= i__1; ++j) {
  337. y_prec_state__ = 1;
  338. if (y_prec_state__ == 2) {
  339. i__2 = *n;
  340. for (i__ = 1; i__ <= i__2; ++i__) {
  341. y_tail__[i__] = 0.;
  342. }
  343. }
  344. dxrat = 0.;
  345. dxratmax = 0.;
  346. dzrat = 0.;
  347. dzratmax = 0.;
  348. final_dx_x__ = hugeval;
  349. final_dz_z__ = hugeval;
  350. prevnormdx = hugeval;
  351. prev_dz_z__ = hugeval;
  352. dz_z__ = hugeval;
  353. dx_x__ = hugeval;
  354. x_state__ = 1;
  355. z_state__ = 0;
  356. incr_prec__ = FALSE_;
  357. i__2 = *ithresh;
  358. for (cnt = 1; cnt <= i__2; ++cnt) {
  359. /* Compute residual RES = B_s - op(A_s) * Y, */
  360. /* op(A) = A, A**T, or A**H depending on TRANS (and type). */
  361. _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
  362. if (y_prec_state__ == 0) {
  363. _starpu_dsymv_(uplo, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1],
  364. &c__1, &c_b11, &res[1], &c__1);
  365. } else if (y_prec_state__ == 1) {
  366. _starpu_blas_dsymv_x__(&uplo2, n, &c_b9, &a[a_offset], lda, &y[j *
  367. y_dim1 + 1], &c__1, &c_b11, &res[1], &c__1,
  368. prec_type__);
  369. } else {
  370. blas_dsymv2_x__(&uplo2, n, &c_b9, &a[a_offset], lda, &y[j *
  371. y_dim1 + 1], &y_tail__[1], &c__1, &c_b11, &res[1], &
  372. c__1, prec_type__);
  373. }
  374. /* XXX: RES is no longer needed. */
  375. _starpu_dcopy_(n, &res[1], &c__1, &dy[1], &c__1);
  376. _starpu_dsytrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &dy[1], n,
  377. info);
  378. /* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */
  379. normx = 0.;
  380. normy = 0.;
  381. normdx = 0.;
  382. dz_z__ = 0.;
  383. ymin = hugeval;
  384. i__3 = *n;
  385. for (i__ = 1; i__ <= i__3; ++i__) {
  386. yk = (d__1 = y[i__ + j * y_dim1], abs(d__1));
  387. dyk = (d__1 = dy[i__], abs(d__1));
  388. if (yk != 0.) {
  389. /* Computing MAX */
  390. d__1 = dz_z__, d__2 = dyk / yk;
  391. dz_z__ = max(d__1,d__2);
  392. } else if (dyk != 0.) {
  393. dz_z__ = hugeval;
  394. }
  395. ymin = min(ymin,yk);
  396. normy = max(normy,yk);
  397. if (*colequ) {
  398. /* Computing MAX */
  399. d__1 = normx, d__2 = yk * c__[i__];
  400. normx = max(d__1,d__2);
  401. /* Computing MAX */
  402. d__1 = normdx, d__2 = dyk * c__[i__];
  403. normdx = max(d__1,d__2);
  404. } else {
  405. normx = normy;
  406. normdx = max(normdx,dyk);
  407. }
  408. }
  409. if (normx != 0.) {
  410. dx_x__ = normdx / normx;
  411. } else if (normdx == 0.) {
  412. dx_x__ = 0.;
  413. } else {
  414. dx_x__ = hugeval;
  415. }
  416. dxrat = normdx / prevnormdx;
  417. dzrat = dz_z__ / prev_dz_z__;
  418. /* Check termination criteria. */
  419. if (ymin * *rcond < incr_thresh__ * normy && y_prec_state__ < 2) {
  420. incr_prec__ = TRUE_;
  421. }
  422. if (x_state__ == 3 && dxrat <= *rthresh) {
  423. x_state__ = 1;
  424. }
  425. if (x_state__ == 1) {
  426. if (dx_x__ <= eps) {
  427. x_state__ = 2;
  428. } else if (dxrat > *rthresh) {
  429. if (y_prec_state__ != 2) {
  430. incr_prec__ = TRUE_;
  431. } else {
  432. x_state__ = 3;
  433. }
  434. } else {
  435. if (dxrat > dxratmax) {
  436. dxratmax = dxrat;
  437. }
  438. }
  439. if (x_state__ > 1) {
  440. final_dx_x__ = dx_x__;
  441. }
  442. }
  443. if (z_state__ == 0 && dz_z__ <= *dz_ub__) {
  444. z_state__ = 1;
  445. }
  446. if (z_state__ == 3 && dzrat <= *rthresh) {
  447. z_state__ = 1;
  448. }
  449. if (z_state__ == 1) {
  450. if (dz_z__ <= eps) {
  451. z_state__ = 2;
  452. } else if (dz_z__ > *dz_ub__) {
  453. z_state__ = 0;
  454. dzratmax = 0.;
  455. final_dz_z__ = hugeval;
  456. } else if (dzrat > *rthresh) {
  457. if (y_prec_state__ != 2) {
  458. incr_prec__ = TRUE_;
  459. } else {
  460. z_state__ = 3;
  461. }
  462. } else {
  463. if (dzrat > dzratmax) {
  464. dzratmax = dzrat;
  465. }
  466. }
  467. if (z_state__ > 1) {
  468. final_dz_z__ = dz_z__;
  469. }
  470. }
  471. if (x_state__ != 1 && (*ignore_cwise__ || z_state__ != 1)) {
  472. goto L666;
  473. }
  474. if (incr_prec__) {
  475. incr_prec__ = FALSE_;
  476. ++y_prec_state__;
  477. i__3 = *n;
  478. for (i__ = 1; i__ <= i__3; ++i__) {
  479. y_tail__[i__] = 0.;
  480. }
  481. }
  482. prevnormdx = normdx;
  483. prev_dz_z__ = dz_z__;
  484. /* Update soluton. */
  485. if (y_prec_state__ < 2) {
  486. _starpu_daxpy_(n, &c_b11, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1);
  487. } else {
  488. _starpu_dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]);
  489. }
  490. }
  491. /* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */
  492. L666:
  493. /* Set final_* when cnt hits ithresh. */
  494. if (x_state__ == 1) {
  495. final_dx_x__ = dx_x__;
  496. }
  497. if (z_state__ == 1) {
  498. final_dz_z__ = dz_z__;
  499. }
  500. /* Compute error bounds. */
  501. if (*n_norms__ >= 1) {
  502. err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / (
  503. 1 - dxratmax);
  504. }
  505. if (*n_norms__ >= 2) {
  506. err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / (
  507. 1 - dzratmax);
  508. }
  509. /* Compute componentwise relative backward error from formula */
  510. /* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
  511. /* where abs(Z) is the componentwise absolute value of the matrix */
  512. /* or vector Z. */
  513. /* Compute residual RES = B_s - op(A_s) * Y, */
  514. /* op(A) = A, A**T, or A**H depending on TRANS (and type). */
  515. _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
  516. _starpu_dsymv_(uplo, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &
  517. c_b11, &res[1], &c__1);
  518. i__2 = *n;
  519. for (i__ = 1; i__ <= i__2; ++i__) {
  520. ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1));
  521. }
  522. /* Compute abs(op(A_s))*abs(Y) + abs(B_s). */
  523. _starpu_dla_syamv__(&uplo2, n, &c_b11, &a[a_offset], lda, &y[j * y_dim1 + 1],
  524. &c__1, &c_b11, &ayb[1], &c__1);
  525. _starpu_dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]);
  526. /* End of loop for each RHS. */
  527. }
  528. return 0;
  529. } /* _starpu_dla_syrfsx_extended__ */