dpbtrf.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. /* dpbtrf.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__1 = 1;
  15. static integer c_n1 = -1;
  16. static doublereal c_b18 = 1.;
  17. static doublereal c_b21 = -1.;
  18. static integer c__33 = 33;
  19. /* Subroutine */ int _starpu_dpbtrf_(char *uplo, integer *n, integer *kd, doublereal *
  20. ab, integer *ldab, integer *info)
  21. {
  22. /* System generated locals */
  23. integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
  24. /* Local variables */
  25. integer i__, j, i2, i3, ib, nb, ii, jj;
  26. doublereal work[1056] /* was [33][32] */;
  27. extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *,
  28. integer *, doublereal *, doublereal *, integer *, doublereal *,
  29. integer *, doublereal *, doublereal *, integer *);
  30. extern logical _starpu_lsame_(char *, char *);
  31. extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *,
  32. integer *, integer *, doublereal *, doublereal *, integer *,
  33. doublereal *, integer *), _starpu_dsyrk_(
  34. char *, char *, integer *, integer *, doublereal *, doublereal *,
  35. integer *, doublereal *, doublereal *, integer *),
  36. _starpu_dpbtf2_(char *, integer *, integer *, doublereal *, integer *,
  37. integer *), _starpu_dpotf2_(char *, integer *, doublereal *,
  38. integer *, integer *), _starpu_xerbla_(char *, integer *);
  39. extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *,
  40. integer *, integer *);
  41. /* -- LAPACK routine (version 3.2) -- */
  42. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  43. /* November 2006 */
  44. /* .. Scalar Arguments .. */
  45. /* .. */
  46. /* .. Array Arguments .. */
  47. /* .. */
  48. /* Purpose */
  49. /* ======= */
  50. /* DPBTRF computes the Cholesky factorization of a real symmetric */
  51. /* positive definite band matrix A. */
  52. /* The factorization has the form */
  53. /* A = U**T * U, if UPLO = 'U', or */
  54. /* A = L * L**T, if UPLO = 'L', */
  55. /* where U is an upper triangular matrix and L is lower triangular. */
  56. /* Arguments */
  57. /* ========= */
  58. /* UPLO (input) CHARACTER*1 */
  59. /* = 'U': Upper triangle of A is stored; */
  60. /* = 'L': Lower triangle of A is stored. */
  61. /* N (input) INTEGER */
  62. /* The order of the matrix A. N >= 0. */
  63. /* KD (input) INTEGER */
  64. /* The number of superdiagonals of the matrix A if UPLO = 'U', */
  65. /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */
  66. /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */
  67. /* On entry, the upper or lower triangle of the symmetric band */
  68. /* matrix A, stored in the first KD+1 rows of the array. The */
  69. /* j-th column of A is stored in the j-th column of the array AB */
  70. /* as follows: */
  71. /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */
  72. /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */
  73. /* On exit, if INFO = 0, the triangular factor U or L from the */
  74. /* Cholesky factorization A = U**T*U or A = L*L**T of the band */
  75. /* matrix A, in the same storage format as A. */
  76. /* LDAB (input) INTEGER */
  77. /* The leading dimension of the array AB. LDAB >= KD+1. */
  78. /* INFO (output) INTEGER */
  79. /* = 0: successful exit */
  80. /* < 0: if INFO = -i, the i-th argument had an illegal value */
  81. /* > 0: if INFO = i, the leading minor of order i is not */
  82. /* positive definite, and the factorization could not be */
  83. /* completed. */
  84. /* Further Details */
  85. /* =============== */
  86. /* The band storage scheme is illustrated by the following example, when */
  87. /* N = 6, KD = 2, and UPLO = 'U': */
  88. /* On entry: On exit: */
  89. /* * * a13 a24 a35 a46 * * u13 u24 u35 u46 */
  90. /* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */
  91. /* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */
  92. /* Similarly, if UPLO = 'L' the format of A is as follows: */
  93. /* On entry: On exit: */
  94. /* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 */
  95. /* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * */
  96. /* a31 a42 a53 a64 * * l31 l42 l53 l64 * * */
  97. /* Array elements marked * are not used by the routine. */
  98. /* Contributed by */
  99. /* Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 */
  100. /* ===================================================================== */
  101. /* .. Parameters .. */
  102. /* .. */
  103. /* .. Local Scalars .. */
  104. /* .. */
  105. /* .. Local Arrays .. */
  106. /* .. */
  107. /* .. External Functions .. */
  108. /* .. */
  109. /* .. External Subroutines .. */
  110. /* .. */
  111. /* .. Intrinsic Functions .. */
  112. /* .. */
  113. /* .. Executable Statements .. */
  114. /* Test the input parameters. */
  115. /* Parameter adjustments */
  116. ab_dim1 = *ldab;
  117. ab_offset = 1 + ab_dim1;
  118. ab -= ab_offset;
  119. /* Function Body */
  120. *info = 0;
  121. if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) {
  122. *info = -1;
  123. } else if (*n < 0) {
  124. *info = -2;
  125. } else if (*kd < 0) {
  126. *info = -3;
  127. } else if (*ldab < *kd + 1) {
  128. *info = -5;
  129. }
  130. if (*info != 0) {
  131. i__1 = -(*info);
  132. _starpu_xerbla_("DPBTRF", &i__1);
  133. return 0;
  134. }
  135. /* Quick return if possible */
  136. if (*n == 0) {
  137. return 0;
  138. }
  139. /* Determine the block size for this environment */
  140. nb = _starpu_ilaenv_(&c__1, "DPBTRF", uplo, n, kd, &c_n1, &c_n1);
  141. /* The block size must not exceed the semi-bandwidth KD, and must not */
  142. /* exceed the limit set by the size of the local array WORK. */
  143. nb = min(nb,32);
  144. if (nb <= 1 || nb > *kd) {
  145. /* Use unblocked code */
  146. _starpu_dpbtf2_(uplo, n, kd, &ab[ab_offset], ldab, info);
  147. } else {
  148. /* Use blocked code */
  149. if (_starpu_lsame_(uplo, "U")) {
  150. /* Compute the Cholesky factorization of a symmetric band */
  151. /* matrix, given the upper triangle of the matrix in band */
  152. /* storage. */
  153. /* Zero the upper triangle of the work array. */
  154. i__1 = nb;
  155. for (j = 1; j <= i__1; ++j) {
  156. i__2 = j - 1;
  157. for (i__ = 1; i__ <= i__2; ++i__) {
  158. work[i__ + j * 33 - 34] = 0.;
  159. /* L10: */
  160. }
  161. /* L20: */
  162. }
  163. /* Process the band matrix one diagonal block at a time. */
  164. i__1 = *n;
  165. i__2 = nb;
  166. for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
  167. /* Computing MIN */
  168. i__3 = nb, i__4 = *n - i__ + 1;
  169. ib = min(i__3,i__4);
  170. /* Factorize the diagonal block */
  171. i__3 = *ldab - 1;
  172. _starpu_dpotf2_(uplo, &ib, &ab[*kd + 1 + i__ * ab_dim1], &i__3, &ii);
  173. if (ii != 0) {
  174. *info = i__ + ii - 1;
  175. goto L150;
  176. }
  177. if (i__ + ib <= *n) {
  178. /* Update the relevant part of the trailing submatrix. */
  179. /* If A11 denotes the diagonal block which has just been */
  180. /* factorized, then we need to update the remaining */
  181. /* blocks in the diagram: */
  182. /* A11 A12 A13 */
  183. /* A22 A23 */
  184. /* A33 */
  185. /* The numbers of rows and columns in the partitioning */
  186. /* are IB, I2, I3 respectively. The blocks A12, A22 and */
  187. /* A23 are empty if IB = KD. The upper triangle of A13 */
  188. /* lies outside the band. */
  189. /* Computing MIN */
  190. i__3 = *kd - ib, i__4 = *n - i__ - ib + 1;
  191. i2 = min(i__3,i__4);
  192. /* Computing MIN */
  193. i__3 = ib, i__4 = *n - i__ - *kd + 1;
  194. i3 = min(i__3,i__4);
  195. if (i2 > 0) {
  196. /* Update A12 */
  197. i__3 = *ldab - 1;
  198. i__4 = *ldab - 1;
  199. _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", &ib,
  200. &i2, &c_b18, &ab[*kd + 1 + i__ * ab_dim1], &
  201. i__3, &ab[*kd + 1 - ib + (i__ + ib) * ab_dim1]
  202. , &i__4);
  203. /* Update A22 */
  204. i__3 = *ldab - 1;
  205. i__4 = *ldab - 1;
  206. _starpu_dsyrk_("Upper", "Transpose", &i2, &ib, &c_b21, &ab[*
  207. kd + 1 - ib + (i__ + ib) * ab_dim1], &i__3, &
  208. c_b18, &ab[*kd + 1 + (i__ + ib) * ab_dim1], &
  209. i__4);
  210. }
  211. if (i3 > 0) {
  212. /* Copy the lower triangle of A13 into the work array. */
  213. i__3 = i3;
  214. for (jj = 1; jj <= i__3; ++jj) {
  215. i__4 = ib;
  216. for (ii = jj; ii <= i__4; ++ii) {
  217. work[ii + jj * 33 - 34] = ab[ii - jj + 1 + (
  218. jj + i__ + *kd - 1) * ab_dim1];
  219. /* L30: */
  220. }
  221. /* L40: */
  222. }
  223. /* Update A13 (in the work array). */
  224. i__3 = *ldab - 1;
  225. _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", &ib,
  226. &i3, &c_b18, &ab[*kd + 1 + i__ * ab_dim1], &
  227. i__3, work, &c__33);
  228. /* Update A23 */
  229. if (i2 > 0) {
  230. i__3 = *ldab - 1;
  231. i__4 = *ldab - 1;
  232. _starpu_dgemm_("Transpose", "No Transpose", &i2, &i3, &ib,
  233. &c_b21, &ab[*kd + 1 - ib + (i__ + ib) *
  234. ab_dim1], &i__3, work, &c__33, &c_b18, &
  235. ab[ib + 1 + (i__ + *kd) * ab_dim1], &i__4);
  236. }
  237. /* Update A33 */
  238. i__3 = *ldab - 1;
  239. _starpu_dsyrk_("Upper", "Transpose", &i3, &ib, &c_b21, work, &
  240. c__33, &c_b18, &ab[*kd + 1 + (i__ + *kd) *
  241. ab_dim1], &i__3);
  242. /* Copy the lower triangle of A13 back into place. */
  243. i__3 = i3;
  244. for (jj = 1; jj <= i__3; ++jj) {
  245. i__4 = ib;
  246. for (ii = jj; ii <= i__4; ++ii) {
  247. ab[ii - jj + 1 + (jj + i__ + *kd - 1) *
  248. ab_dim1] = work[ii + jj * 33 - 34];
  249. /* L50: */
  250. }
  251. /* L60: */
  252. }
  253. }
  254. }
  255. /* L70: */
  256. }
  257. } else {
  258. /* Compute the Cholesky factorization of a symmetric band */
  259. /* matrix, given the lower triangle of the matrix in band */
  260. /* storage. */
  261. /* Zero the lower triangle of the work array. */
  262. i__2 = nb;
  263. for (j = 1; j <= i__2; ++j) {
  264. i__1 = nb;
  265. for (i__ = j + 1; i__ <= i__1; ++i__) {
  266. work[i__ + j * 33 - 34] = 0.;
  267. /* L80: */
  268. }
  269. /* L90: */
  270. }
  271. /* Process the band matrix one diagonal block at a time. */
  272. i__2 = *n;
  273. i__1 = nb;
  274. for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
  275. /* Computing MIN */
  276. i__3 = nb, i__4 = *n - i__ + 1;
  277. ib = min(i__3,i__4);
  278. /* Factorize the diagonal block */
  279. i__3 = *ldab - 1;
  280. _starpu_dpotf2_(uplo, &ib, &ab[i__ * ab_dim1 + 1], &i__3, &ii);
  281. if (ii != 0) {
  282. *info = i__ + ii - 1;
  283. goto L150;
  284. }
  285. if (i__ + ib <= *n) {
  286. /* Update the relevant part of the trailing submatrix. */
  287. /* If A11 denotes the diagonal block which has just been */
  288. /* factorized, then we need to update the remaining */
  289. /* blocks in the diagram: */
  290. /* A11 */
  291. /* A21 A22 */
  292. /* A31 A32 A33 */
  293. /* The numbers of rows and columns in the partitioning */
  294. /* are IB, I2, I3 respectively. The blocks A21, A22 and */
  295. /* A32 are empty if IB = KD. The lower triangle of A31 */
  296. /* lies outside the band. */
  297. /* Computing MIN */
  298. i__3 = *kd - ib, i__4 = *n - i__ - ib + 1;
  299. i2 = min(i__3,i__4);
  300. /* Computing MIN */
  301. i__3 = ib, i__4 = *n - i__ - *kd + 1;
  302. i3 = min(i__3,i__4);
  303. if (i2 > 0) {
  304. /* Update A21 */
  305. i__3 = *ldab - 1;
  306. i__4 = *ldab - 1;
  307. _starpu_dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i2,
  308. &ib, &c_b18, &ab[i__ * ab_dim1 + 1], &i__3, &
  309. ab[ib + 1 + i__ * ab_dim1], &i__4);
  310. /* Update A22 */
  311. i__3 = *ldab - 1;
  312. i__4 = *ldab - 1;
  313. _starpu_dsyrk_("Lower", "No Transpose", &i2, &ib, &c_b21, &ab[
  314. ib + 1 + i__ * ab_dim1], &i__3, &c_b18, &ab[(
  315. i__ + ib) * ab_dim1 + 1], &i__4);
  316. }
  317. if (i3 > 0) {
  318. /* Copy the upper triangle of A31 into the work array. */
  319. i__3 = ib;
  320. for (jj = 1; jj <= i__3; ++jj) {
  321. i__4 = min(jj,i3);
  322. for (ii = 1; ii <= i__4; ++ii) {
  323. work[ii + jj * 33 - 34] = ab[*kd + 1 - jj +
  324. ii + (jj + i__ - 1) * ab_dim1];
  325. /* L100: */
  326. }
  327. /* L110: */
  328. }
  329. /* Update A31 (in the work array). */
  330. i__3 = *ldab - 1;
  331. _starpu_dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i3,
  332. &ib, &c_b18, &ab[i__ * ab_dim1 + 1], &i__3,
  333. work, &c__33);
  334. /* Update A32 */
  335. if (i2 > 0) {
  336. i__3 = *ldab - 1;
  337. i__4 = *ldab - 1;
  338. _starpu_dgemm_("No transpose", "Transpose", &i3, &i2, &ib,
  339. &c_b21, work, &c__33, &ab[ib + 1 + i__ *
  340. ab_dim1], &i__3, &c_b18, &ab[*kd + 1 - ib
  341. + (i__ + ib) * ab_dim1], &i__4);
  342. }
  343. /* Update A33 */
  344. i__3 = *ldab - 1;
  345. _starpu_dsyrk_("Lower", "No Transpose", &i3, &ib, &c_b21,
  346. work, &c__33, &c_b18, &ab[(i__ + *kd) *
  347. ab_dim1 + 1], &i__3);
  348. /* Copy the upper triangle of A31 back into place. */
  349. i__3 = ib;
  350. for (jj = 1; jj <= i__3; ++jj) {
  351. i__4 = min(jj,i3);
  352. for (ii = 1; ii <= i__4; ++ii) {
  353. ab[*kd + 1 - jj + ii + (jj + i__ - 1) *
  354. ab_dim1] = work[ii + jj * 33 - 34];
  355. /* L120: */
  356. }
  357. /* L130: */
  358. }
  359. }
  360. }
  361. /* L140: */
  362. }
  363. }
  364. }
  365. return 0;
  366. L150:
  367. return 0;
  368. /* End of DPBTRF */
  369. } /* _starpu_dpbtrf_ */