dpbrfs.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. /* dpbrfs.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__1 = 1;
  15. static doublereal c_b12 = -1.;
  16. static doublereal c_b14 = 1.;
  17. /* Subroutine */ int _starpu_dpbrfs_(char *uplo, integer *n, integer *kd, integer *
  18. nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb,
  19. doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *
  20. ferr, doublereal *berr, doublereal *work, integer *iwork, integer *
  21. info)
  22. {
  23. /* System generated locals */
  24. integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset,
  25. x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5;
  26. doublereal d__1, d__2, d__3;
  27. /* Local variables */
  28. integer i__, j, k, l;
  29. doublereal s, xk;
  30. integer nz;
  31. doublereal eps;
  32. integer kase;
  33. doublereal safe1, safe2;
  34. extern logical _starpu_lsame_(char *, char *);
  35. integer isave[3];
  36. extern /* Subroutine */ int _starpu_dsbmv_(char *, integer *, integer *,
  37. doublereal *, doublereal *, integer *, doublereal *, integer *,
  38. doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *,
  39. doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer
  40. *, doublereal *, doublereal *, integer *, doublereal *, integer *)
  41. ;
  42. integer count;
  43. logical upper;
  44. extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *,
  45. integer *, doublereal *, integer *, integer *);
  46. extern doublereal _starpu_dlamch_(char *);
  47. doublereal safmin;
  48. extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpbtrs_(
  49. char *, integer *, integer *, integer *, doublereal *, integer *,
  50. doublereal *, integer *, integer *);
  51. doublereal lstres;
  52. /* -- LAPACK routine (version 3.2) -- */
  53. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  54. /* November 2006 */
  55. /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */
  56. /* .. Scalar Arguments .. */
  57. /* .. */
  58. /* .. Array Arguments .. */
  59. /* .. */
  60. /* Purpose */
  61. /* ======= */
  62. /* DPBRFS improves the computed solution to a system of linear */
  63. /* equations when the coefficient matrix is symmetric positive definite */
  64. /* and banded, and provides error bounds and backward error estimates */
  65. /* for the solution. */
  66. /* Arguments */
  67. /* ========= */
  68. /* UPLO (input) CHARACTER*1 */
  69. /* = 'U': Upper triangle of A is stored; */
  70. /* = 'L': Lower triangle of A is stored. */
  71. /* N (input) INTEGER */
  72. /* The order of the matrix A. N >= 0. */
  73. /* KD (input) INTEGER */
  74. /* The number of superdiagonals of the matrix A if UPLO = 'U', */
  75. /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */
  76. /* NRHS (input) INTEGER */
  77. /* The number of right hand sides, i.e., the number of columns */
  78. /* of the matrices B and X. NRHS >= 0. */
  79. /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */
  80. /* The upper or lower triangle of the symmetric band matrix A, */
  81. /* stored in the first KD+1 rows of the array. The j-th column */
  82. /* of A is stored in the j-th column of the array AB as follows: */
  83. /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */
  84. /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */
  85. /* LDAB (input) INTEGER */
  86. /* The leading dimension of the array AB. LDAB >= KD+1. */
  87. /* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) */
  88. /* The triangular factor U or L from the Cholesky factorization */
  89. /* A = U**T*U or A = L*L**T of the band matrix A as computed by */
  90. /* DPBTRF, in the same storage format as A (see AB). */
  91. /* LDAFB (input) INTEGER */
  92. /* The leading dimension of the array AFB. LDAFB >= KD+1. */
  93. /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */
  94. /* The right hand side matrix B. */
  95. /* LDB (input) INTEGER */
  96. /* The leading dimension of the array B. LDB >= max(1,N). */
  97. /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */
  98. /* On entry, the solution matrix X, as computed by DPBTRS. */
  99. /* On exit, the improved solution matrix X. */
  100. /* LDX (input) INTEGER */
  101. /* The leading dimension of the array X. LDX >= max(1,N). */
  102. /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */
  103. /* The estimated forward error bound for each solution vector */
  104. /* X(j) (the j-th column of the solution matrix X). */
  105. /* If XTRUE is the true solution corresponding to X(j), FERR(j) */
  106. /* is an estimated upper bound for the magnitude of the largest */
  107. /* element in (X(j) - XTRUE) divided by the magnitude of the */
  108. /* largest element in X(j). The estimate is as reliable as */
  109. /* the estimate for RCOND, and is almost always a slight */
  110. /* overestimate of the true error. */
  111. /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */
  112. /* The componentwise relative backward error of each solution */
  113. /* vector X(j) (i.e., the smallest relative change in */
  114. /* any element of A or B that makes X(j) an exact solution). */
  115. /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */
  116. /* IWORK (workspace) INTEGER array, dimension (N) */
  117. /* INFO (output) INTEGER */
  118. /* = 0: successful exit */
  119. /* < 0: if INFO = -i, the i-th argument had an illegal value */
  120. /* Internal Parameters */
  121. /* =================== */
  122. /* ITMAX is the maximum number of steps of iterative refinement. */
  123. /* ===================================================================== */
  124. /* .. Parameters .. */
  125. /* .. */
  126. /* .. Local Scalars .. */
  127. /* .. */
  128. /* .. Local Arrays .. */
  129. /* .. */
  130. /* .. External Subroutines .. */
  131. /* .. */
  132. /* .. Intrinsic Functions .. */
  133. /* .. */
  134. /* .. External Functions .. */
  135. /* .. */
  136. /* .. Executable Statements .. */
  137. /* Test the input parameters. */
  138. /* Parameter adjustments */
  139. ab_dim1 = *ldab;
  140. ab_offset = 1 + ab_dim1;
  141. ab -= ab_offset;
  142. afb_dim1 = *ldafb;
  143. afb_offset = 1 + afb_dim1;
  144. afb -= afb_offset;
  145. b_dim1 = *ldb;
  146. b_offset = 1 + b_dim1;
  147. b -= b_offset;
  148. x_dim1 = *ldx;
  149. x_offset = 1 + x_dim1;
  150. x -= x_offset;
  151. --ferr;
  152. --berr;
  153. --work;
  154. --iwork;
  155. /* Function Body */
  156. *info = 0;
  157. upper = _starpu_lsame_(uplo, "U");
  158. if (! upper && ! _starpu_lsame_(uplo, "L")) {
  159. *info = -1;
  160. } else if (*n < 0) {
  161. *info = -2;
  162. } else if (*kd < 0) {
  163. *info = -3;
  164. } else if (*nrhs < 0) {
  165. *info = -4;
  166. } else if (*ldab < *kd + 1) {
  167. *info = -6;
  168. } else if (*ldafb < *kd + 1) {
  169. *info = -8;
  170. } else if (*ldb < max(1,*n)) {
  171. *info = -10;
  172. } else if (*ldx < max(1,*n)) {
  173. *info = -12;
  174. }
  175. if (*info != 0) {
  176. i__1 = -(*info);
  177. _starpu_xerbla_("DPBRFS", &i__1);
  178. return 0;
  179. }
  180. /* Quick return if possible */
  181. if (*n == 0 || *nrhs == 0) {
  182. i__1 = *nrhs;
  183. for (j = 1; j <= i__1; ++j) {
  184. ferr[j] = 0.;
  185. berr[j] = 0.;
  186. /* L10: */
  187. }
  188. return 0;
  189. }
  190. /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
  191. /* Computing MIN */
  192. i__1 = *n + 1, i__2 = (*kd << 1) + 2;
  193. nz = min(i__1,i__2);
  194. eps = _starpu_dlamch_("Epsilon");
  195. safmin = _starpu_dlamch_("Safe minimum");
  196. safe1 = nz * safmin;
  197. safe2 = safe1 / eps;
  198. /* Do for each right hand side */
  199. i__1 = *nrhs;
  200. for (j = 1; j <= i__1; ++j) {
  201. count = 1;
  202. lstres = 3.;
  203. L20:
  204. /* Loop until stopping criterion is satisfied. */
  205. /* Compute residual R = B - A * X */
  206. _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1);
  207. _starpu_dsbmv_(uplo, n, kd, &c_b12, &ab[ab_offset], ldab, &x[j * x_dim1 + 1],
  208. &c__1, &c_b14, &work[*n + 1], &c__1);
  209. /* Compute componentwise relative backward error from formula */
  210. /* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */
  211. /* where abs(Z) is the componentwise absolute value of the matrix */
  212. /* or vector Z. If the i-th component of the denominator is less */
  213. /* than SAFE2, then SAFE1 is added to the i-th components of the */
  214. /* numerator and denominator before dividing. */
  215. i__2 = *n;
  216. for (i__ = 1; i__ <= i__2; ++i__) {
  217. work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1));
  218. /* L30: */
  219. }
  220. /* Compute abs(A)*abs(X) + abs(B). */
  221. if (upper) {
  222. i__2 = *n;
  223. for (k = 1; k <= i__2; ++k) {
  224. s = 0.;
  225. xk = (d__1 = x[k + j * x_dim1], abs(d__1));
  226. l = *kd + 1 - k;
  227. /* Computing MAX */
  228. i__3 = 1, i__4 = k - *kd;
  229. i__5 = k - 1;
  230. for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) {
  231. work[i__] += (d__1 = ab[l + i__ + k * ab_dim1], abs(d__1))
  232. * xk;
  233. s += (d__1 = ab[l + i__ + k * ab_dim1], abs(d__1)) * (
  234. d__2 = x[i__ + j * x_dim1], abs(d__2));
  235. /* L40: */
  236. }
  237. work[k] = work[k] + (d__1 = ab[*kd + 1 + k * ab_dim1], abs(
  238. d__1)) * xk + s;
  239. /* L50: */
  240. }
  241. } else {
  242. i__2 = *n;
  243. for (k = 1; k <= i__2; ++k) {
  244. s = 0.;
  245. xk = (d__1 = x[k + j * x_dim1], abs(d__1));
  246. work[k] += (d__1 = ab[k * ab_dim1 + 1], abs(d__1)) * xk;
  247. l = 1 - k;
  248. /* Computing MIN */
  249. i__3 = *n, i__4 = k + *kd;
  250. i__5 = min(i__3,i__4);
  251. for (i__ = k + 1; i__ <= i__5; ++i__) {
  252. work[i__] += (d__1 = ab[l + i__ + k * ab_dim1], abs(d__1))
  253. * xk;
  254. s += (d__1 = ab[l + i__ + k * ab_dim1], abs(d__1)) * (
  255. d__2 = x[i__ + j * x_dim1], abs(d__2));
  256. /* L60: */
  257. }
  258. work[k] += s;
  259. /* L70: */
  260. }
  261. }
  262. s = 0.;
  263. i__2 = *n;
  264. for (i__ = 1; i__ <= i__2; ++i__) {
  265. if (work[i__] > safe2) {
  266. /* Computing MAX */
  267. d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[
  268. i__];
  269. s = max(d__2,d__3);
  270. } else {
  271. /* Computing MAX */
  272. d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1)
  273. / (work[i__] + safe1);
  274. s = max(d__2,d__3);
  275. }
  276. /* L80: */
  277. }
  278. berr[j] = s;
  279. /* Test stopping criterion. Continue iterating if */
  280. /* 1) The residual BERR(J) is larger than machine epsilon, and */
  281. /* 2) BERR(J) decreased by at least a factor of 2 during the */
  282. /* last iteration, and */
  283. /* 3) At most ITMAX iterations tried. */
  284. if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) {
  285. /* Update solution and try again. */
  286. _starpu_dpbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n + 1]
  287. , n, info);
  288. _starpu_daxpy_(n, &c_b14, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1)
  289. ;
  290. lstres = berr[j];
  291. ++count;
  292. goto L20;
  293. }
  294. /* Bound error from formula */
  295. /* norm(X - XTRUE) / norm(X) .le. FERR = */
  296. /* norm( abs(inv(A))* */
  297. /* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */
  298. /* where */
  299. /* norm(Z) is the magnitude of the largest component of Z */
  300. /* inv(A) is the inverse of A */
  301. /* abs(Z) is the componentwise absolute value of the matrix or */
  302. /* vector Z */
  303. /* NZ is the maximum number of nonzeros in any row of A, plus 1 */
  304. /* EPS is machine epsilon */
  305. /* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */
  306. /* is incremented by SAFE1 if the i-th component of */
  307. /* abs(A)*abs(X) + abs(B) is less than SAFE2. */
  308. /* Use DLACN2 to estimate the infinity-norm of the matrix */
  309. /* inv(A) * diag(W), */
  310. /* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */
  311. i__2 = *n;
  312. for (i__ = 1; i__ <= i__2; ++i__) {
  313. if (work[i__] > safe2) {
  314. work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps *
  315. work[i__];
  316. } else {
  317. work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps *
  318. work[i__] + safe1;
  319. }
  320. /* L90: */
  321. }
  322. kase = 0;
  323. L100:
  324. _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], &
  325. kase, isave);
  326. if (kase != 0) {
  327. if (kase == 1) {
  328. /* Multiply by diag(W)*inv(A'). */
  329. _starpu_dpbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n
  330. + 1], n, info);
  331. i__2 = *n;
  332. for (i__ = 1; i__ <= i__2; ++i__) {
  333. work[*n + i__] *= work[i__];
  334. /* L110: */
  335. }
  336. } else if (kase == 2) {
  337. /* Multiply by inv(A)*diag(W). */
  338. i__2 = *n;
  339. for (i__ = 1; i__ <= i__2; ++i__) {
  340. work[*n + i__] *= work[i__];
  341. /* L120: */
  342. }
  343. _starpu_dpbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n
  344. + 1], n, info);
  345. }
  346. goto L100;
  347. }
  348. /* Normalize error. */
  349. lstres = 0.;
  350. i__2 = *n;
  351. for (i__ = 1; i__ <= i__2; ++i__) {
  352. /* Computing MAX */
  353. d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1));
  354. lstres = max(d__2,d__3);
  355. /* L130: */
  356. }
  357. if (lstres != 0.) {
  358. ferr[j] /= lstres;
  359. }
  360. /* L140: */
  361. }
  362. return 0;
  363. /* End of DPBRFS */
  364. } /* _starpu_dpbrfs_ */