dgbbrd.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. /* dgbbrd.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static doublereal c_b8 = 0.;
  15. static doublereal c_b9 = 1.;
  16. static integer c__1 = 1;
  17. /* Subroutine */ int _starpu_dgbbrd_(char *vect, integer *m, integer *n, integer *ncc,
  18. integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *
  19. d__, doublereal *e, doublereal *q, integer *ldq, doublereal *pt,
  20. integer *ldpt, doublereal *c__, integer *ldc, doublereal *work,
  21. integer *info)
  22. {
  23. /* System generated locals */
  24. integer ab_dim1, ab_offset, c_dim1, c_offset, pt_dim1, pt_offset, q_dim1,
  25. q_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
  26. /* Local variables */
  27. integer i__, j, l, j1, j2, kb;
  28. doublereal ra, rb, rc;
  29. integer kk, ml, mn, nr, mu;
  30. doublereal rs;
  31. integer kb1, ml0, mu0, klm, kun, nrt, klu1, inca;
  32. extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *,
  33. doublereal *, integer *, doublereal *, doublereal *);
  34. extern logical _starpu_lsame_(char *, char *);
  35. logical wantb, wantc;
  36. integer minmn;
  37. logical wantq;
  38. extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *,
  39. doublereal *, doublereal *, doublereal *, integer *),
  40. _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *,
  41. doublereal *), _starpu_xerbla_(char *, integer *), _starpu_dlargv_(
  42. integer *, doublereal *, integer *, doublereal *, integer *,
  43. doublereal *, integer *), _starpu_dlartv_(integer *, doublereal *,
  44. integer *, doublereal *, integer *, doublereal *, doublereal *,
  45. integer *);
  46. logical wantpt;
  47. /* -- LAPACK routine (version 3.2) -- */
  48. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  49. /* November 2006 */
  50. /* .. Scalar Arguments .. */
  51. /* .. */
  52. /* .. Array Arguments .. */
  53. /* .. */
  54. /* Purpose */
  55. /* ======= */
  56. /* DGBBRD reduces a real general m-by-n band matrix A to upper */
  57. /* bidiagonal form B by an orthogonal transformation: Q' * A * P = B. */
  58. /* The routine computes B, and optionally forms Q or P', or computes */
  59. /* Q'*C for a given matrix C. */
  60. /* Arguments */
  61. /* ========= */
  62. /* VECT (input) CHARACTER*1 */
  63. /* Specifies whether or not the matrices Q and P' are to be */
  64. /* formed. */
  65. /* = 'N': do not form Q or P'; */
  66. /* = 'Q': form Q only; */
  67. /* = 'P': form P' only; */
  68. /* = 'B': form both. */
  69. /* M (input) INTEGER */
  70. /* The number of rows of the matrix A. M >= 0. */
  71. /* N (input) INTEGER */
  72. /* The number of columns of the matrix A. N >= 0. */
  73. /* NCC (input) INTEGER */
  74. /* The number of columns of the matrix C. NCC >= 0. */
  75. /* KL (input) INTEGER */
  76. /* The number of subdiagonals of the matrix A. KL >= 0. */
  77. /* KU (input) INTEGER */
  78. /* The number of superdiagonals of the matrix A. KU >= 0. */
  79. /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */
  80. /* On entry, the m-by-n band matrix A, stored in rows 1 to */
  81. /* KL+KU+1. The j-th column of A is stored in the j-th column of */
  82. /* the array AB as follows: */
  83. /* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). */
  84. /* On exit, A is overwritten by values generated during the */
  85. /* reduction. */
  86. /* LDAB (input) INTEGER */
  87. /* The leading dimension of the array A. LDAB >= KL+KU+1. */
  88. /* D (output) DOUBLE PRECISION array, dimension (min(M,N)) */
  89. /* The diagonal elements of the bidiagonal matrix B. */
  90. /* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) */
  91. /* The superdiagonal elements of the bidiagonal matrix B. */
  92. /* Q (output) DOUBLE PRECISION array, dimension (LDQ,M) */
  93. /* If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. */
  94. /* If VECT = 'N' or 'P', the array Q is not referenced. */
  95. /* LDQ (input) INTEGER */
  96. /* The leading dimension of the array Q. */
  97. /* LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. */
  98. /* PT (output) DOUBLE PRECISION array, dimension (LDPT,N) */
  99. /* If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. */
  100. /* If VECT = 'N' or 'Q', the array PT is not referenced. */
  101. /* LDPT (input) INTEGER */
  102. /* The leading dimension of the array PT. */
  103. /* LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. */
  104. /* C (input/output) DOUBLE PRECISION array, dimension (LDC,NCC) */
  105. /* On entry, an m-by-ncc matrix C. */
  106. /* On exit, C is overwritten by Q'*C. */
  107. /* C is not referenced if NCC = 0. */
  108. /* LDC (input) INTEGER */
  109. /* The leading dimension of the array C. */
  110. /* LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. */
  111. /* WORK (workspace) DOUBLE PRECISION array, dimension (2*max(M,N)) */
  112. /* INFO (output) INTEGER */
  113. /* = 0: successful exit. */
  114. /* < 0: if INFO = -i, the i-th argument had an illegal value. */
  115. /* ===================================================================== */
  116. /* .. Parameters .. */
  117. /* .. */
  118. /* .. Local Scalars .. */
  119. /* .. */
  120. /* .. External Subroutines .. */
  121. /* .. */
  122. /* .. Intrinsic Functions .. */
  123. /* .. */
  124. /* .. External Functions .. */
  125. /* .. */
  126. /* .. Executable Statements .. */
  127. /* Test the input parameters */
  128. /* Parameter adjustments */
  129. ab_dim1 = *ldab;
  130. ab_offset = 1 + ab_dim1;
  131. ab -= ab_offset;
  132. --d__;
  133. --e;
  134. q_dim1 = *ldq;
  135. q_offset = 1 + q_dim1;
  136. q -= q_offset;
  137. pt_dim1 = *ldpt;
  138. pt_offset = 1 + pt_dim1;
  139. pt -= pt_offset;
  140. c_dim1 = *ldc;
  141. c_offset = 1 + c_dim1;
  142. c__ -= c_offset;
  143. --work;
  144. /* Function Body */
  145. wantb = _starpu_lsame_(vect, "B");
  146. wantq = _starpu_lsame_(vect, "Q") || wantb;
  147. wantpt = _starpu_lsame_(vect, "P") || wantb;
  148. wantc = *ncc > 0;
  149. klu1 = *kl + *ku + 1;
  150. *info = 0;
  151. if (! wantq && ! wantpt && ! _starpu_lsame_(vect, "N")) {
  152. *info = -1;
  153. } else if (*m < 0) {
  154. *info = -2;
  155. } else if (*n < 0) {
  156. *info = -3;
  157. } else if (*ncc < 0) {
  158. *info = -4;
  159. } else if (*kl < 0) {
  160. *info = -5;
  161. } else if (*ku < 0) {
  162. *info = -6;
  163. } else if (*ldab < klu1) {
  164. *info = -8;
  165. } else if (*ldq < 1 || wantq && *ldq < max(1,*m)) {
  166. *info = -12;
  167. } else if (*ldpt < 1 || wantpt && *ldpt < max(1,*n)) {
  168. *info = -14;
  169. } else if (*ldc < 1 || wantc && *ldc < max(1,*m)) {
  170. *info = -16;
  171. }
  172. if (*info != 0) {
  173. i__1 = -(*info);
  174. _starpu_xerbla_("DGBBRD", &i__1);
  175. return 0;
  176. }
  177. /* Initialize Q and P' to the unit matrix, if needed */
  178. if (wantq) {
  179. _starpu_dlaset_("Full", m, m, &c_b8, &c_b9, &q[q_offset], ldq);
  180. }
  181. if (wantpt) {
  182. _starpu_dlaset_("Full", n, n, &c_b8, &c_b9, &pt[pt_offset], ldpt);
  183. }
  184. /* Quick return if possible. */
  185. if (*m == 0 || *n == 0) {
  186. return 0;
  187. }
  188. minmn = min(*m,*n);
  189. if (*kl + *ku > 1) {
  190. /* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce */
  191. /* first to lower bidiagonal form and then transform to upper */
  192. /* bidiagonal */
  193. if (*ku > 0) {
  194. ml0 = 1;
  195. mu0 = 2;
  196. } else {
  197. ml0 = 2;
  198. mu0 = 1;
  199. }
  200. /* Wherever possible, plane rotations are generated and applied in */
  201. /* vector operations of length NR over the index set J1:J2:KLU1. */
  202. /* The sines of the plane rotations are stored in WORK(1:max(m,n)) */
  203. /* and the cosines in WORK(max(m,n)+1:2*max(m,n)). */
  204. mn = max(*m,*n);
  205. /* Computing MIN */
  206. i__1 = *m - 1;
  207. klm = min(i__1,*kl);
  208. /* Computing MIN */
  209. i__1 = *n - 1;
  210. kun = min(i__1,*ku);
  211. kb = klm + kun;
  212. kb1 = kb + 1;
  213. inca = kb1 * *ldab;
  214. nr = 0;
  215. j1 = klm + 2;
  216. j2 = 1 - kun;
  217. i__1 = minmn;
  218. for (i__ = 1; i__ <= i__1; ++i__) {
  219. /* Reduce i-th column and i-th row of matrix to bidiagonal form */
  220. ml = klm + 1;
  221. mu = kun + 1;
  222. i__2 = kb;
  223. for (kk = 1; kk <= i__2; ++kk) {
  224. j1 += kb;
  225. j2 += kb;
  226. /* generate plane rotations to annihilate nonzero elements */
  227. /* which have been created below the band */
  228. if (nr > 0) {
  229. _starpu_dlargv_(&nr, &ab[klu1 + (j1 - klm - 1) * ab_dim1], &inca,
  230. &work[j1], &kb1, &work[mn + j1], &kb1);
  231. }
  232. /* apply plane rotations from the left */
  233. i__3 = kb;
  234. for (l = 1; l <= i__3; ++l) {
  235. if (j2 - klm + l - 1 > *n) {
  236. nrt = nr - 1;
  237. } else {
  238. nrt = nr;
  239. }
  240. if (nrt > 0) {
  241. _starpu_dlartv_(&nrt, &ab[klu1 - l + (j1 - klm + l - 1) *
  242. ab_dim1], &inca, &ab[klu1 - l + 1 + (j1 - klm
  243. + l - 1) * ab_dim1], &inca, &work[mn + j1], &
  244. work[j1], &kb1);
  245. }
  246. /* L10: */
  247. }
  248. if (ml > ml0) {
  249. if (ml <= *m - i__ + 1) {
  250. /* generate plane rotation to annihilate a(i+ml-1,i) */
  251. /* within the band, and apply rotation from the left */
  252. _starpu_dlartg_(&ab[*ku + ml - 1 + i__ * ab_dim1], &ab[*ku +
  253. ml + i__ * ab_dim1], &work[mn + i__ + ml - 1],
  254. &work[i__ + ml - 1], &ra);
  255. ab[*ku + ml - 1 + i__ * ab_dim1] = ra;
  256. if (i__ < *n) {
  257. /* Computing MIN */
  258. i__4 = *ku + ml - 2, i__5 = *n - i__;
  259. i__3 = min(i__4,i__5);
  260. i__6 = *ldab - 1;
  261. i__7 = *ldab - 1;
  262. _starpu_drot_(&i__3, &ab[*ku + ml - 2 + (i__ + 1) *
  263. ab_dim1], &i__6, &ab[*ku + ml - 1 + (i__
  264. + 1) * ab_dim1], &i__7, &work[mn + i__ +
  265. ml - 1], &work[i__ + ml - 1]);
  266. }
  267. }
  268. ++nr;
  269. j1 -= kb1;
  270. }
  271. if (wantq) {
  272. /* accumulate product of plane rotations in Q */
  273. i__3 = j2;
  274. i__4 = kb1;
  275. for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4)
  276. {
  277. _starpu_drot_(m, &q[(j - 1) * q_dim1 + 1], &c__1, &q[j *
  278. q_dim1 + 1], &c__1, &work[mn + j], &work[j]);
  279. /* L20: */
  280. }
  281. }
  282. if (wantc) {
  283. /* apply plane rotations to C */
  284. i__4 = j2;
  285. i__3 = kb1;
  286. for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3)
  287. {
  288. _starpu_drot_(ncc, &c__[j - 1 + c_dim1], ldc, &c__[j + c_dim1]
  289. , ldc, &work[mn + j], &work[j]);
  290. /* L30: */
  291. }
  292. }
  293. if (j2 + kun > *n) {
  294. /* adjust J2 to keep within the bounds of the matrix */
  295. --nr;
  296. j2 -= kb1;
  297. }
  298. i__3 = j2;
  299. i__4 = kb1;
  300. for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) {
  301. /* create nonzero element a(j-1,j+ku) above the band */
  302. /* and store it in WORK(n+1:2*n) */
  303. work[j + kun] = work[j] * ab[(j + kun) * ab_dim1 + 1];
  304. ab[(j + kun) * ab_dim1 + 1] = work[mn + j] * ab[(j + kun)
  305. * ab_dim1 + 1];
  306. /* L40: */
  307. }
  308. /* generate plane rotations to annihilate nonzero elements */
  309. /* which have been generated above the band */
  310. if (nr > 0) {
  311. _starpu_dlargv_(&nr, &ab[(j1 + kun - 1) * ab_dim1 + 1], &inca, &
  312. work[j1 + kun], &kb1, &work[mn + j1 + kun], &kb1);
  313. }
  314. /* apply plane rotations from the right */
  315. i__4 = kb;
  316. for (l = 1; l <= i__4; ++l) {
  317. if (j2 + l - 1 > *m) {
  318. nrt = nr - 1;
  319. } else {
  320. nrt = nr;
  321. }
  322. if (nrt > 0) {
  323. _starpu_dlartv_(&nrt, &ab[l + 1 + (j1 + kun - 1) * ab_dim1], &
  324. inca, &ab[l + (j1 + kun) * ab_dim1], &inca, &
  325. work[mn + j1 + kun], &work[j1 + kun], &kb1);
  326. }
  327. /* L50: */
  328. }
  329. if (ml == ml0 && mu > mu0) {
  330. if (mu <= *n - i__ + 1) {
  331. /* generate plane rotation to annihilate a(i,i+mu-1) */
  332. /* within the band, and apply rotation from the right */
  333. _starpu_dlartg_(&ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1],
  334. &ab[*ku - mu + 2 + (i__ + mu - 1) * ab_dim1],
  335. &work[mn + i__ + mu - 1], &work[i__ + mu - 1],
  336. &ra);
  337. ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1] = ra;
  338. /* Computing MIN */
  339. i__3 = *kl + mu - 2, i__5 = *m - i__;
  340. i__4 = min(i__3,i__5);
  341. _starpu_drot_(&i__4, &ab[*ku - mu + 4 + (i__ + mu - 2) *
  342. ab_dim1], &c__1, &ab[*ku - mu + 3 + (i__ + mu
  343. - 1) * ab_dim1], &c__1, &work[mn + i__ + mu -
  344. 1], &work[i__ + mu - 1]);
  345. }
  346. ++nr;
  347. j1 -= kb1;
  348. }
  349. if (wantpt) {
  350. /* accumulate product of plane rotations in P' */
  351. i__4 = j2;
  352. i__3 = kb1;
  353. for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3)
  354. {
  355. _starpu_drot_(n, &pt[j + kun - 1 + pt_dim1], ldpt, &pt[j +
  356. kun + pt_dim1], ldpt, &work[mn + j + kun], &
  357. work[j + kun]);
  358. /* L60: */
  359. }
  360. }
  361. if (j2 + kb > *m) {
  362. /* adjust J2 to keep within the bounds of the matrix */
  363. --nr;
  364. j2 -= kb1;
  365. }
  366. i__3 = j2;
  367. i__4 = kb1;
  368. for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) {
  369. /* create nonzero element a(j+kl+ku,j+ku-1) below the */
  370. /* band and store it in WORK(1:n) */
  371. work[j + kb] = work[j + kun] * ab[klu1 + (j + kun) *
  372. ab_dim1];
  373. ab[klu1 + (j + kun) * ab_dim1] = work[mn + j + kun] * ab[
  374. klu1 + (j + kun) * ab_dim1];
  375. /* L70: */
  376. }
  377. if (ml > ml0) {
  378. --ml;
  379. } else {
  380. --mu;
  381. }
  382. /* L80: */
  383. }
  384. /* L90: */
  385. }
  386. }
  387. if (*ku == 0 && *kl > 0) {
  388. /* A has been reduced to lower bidiagonal form */
  389. /* Transform lower bidiagonal form to upper bidiagonal by applying */
  390. /* plane rotations from the left, storing diagonal elements in D */
  391. /* and off-diagonal elements in E */
  392. /* Computing MIN */
  393. i__2 = *m - 1;
  394. i__1 = min(i__2,*n);
  395. for (i__ = 1; i__ <= i__1; ++i__) {
  396. _starpu_dlartg_(&ab[i__ * ab_dim1 + 1], &ab[i__ * ab_dim1 + 2], &rc, &rs,
  397. &ra);
  398. d__[i__] = ra;
  399. if (i__ < *n) {
  400. e[i__] = rs * ab[(i__ + 1) * ab_dim1 + 1];
  401. ab[(i__ + 1) * ab_dim1 + 1] = rc * ab[(i__ + 1) * ab_dim1 + 1]
  402. ;
  403. }
  404. if (wantq) {
  405. _starpu_drot_(m, &q[i__ * q_dim1 + 1], &c__1, &q[(i__ + 1) * q_dim1 +
  406. 1], &c__1, &rc, &rs);
  407. }
  408. if (wantc) {
  409. _starpu_drot_(ncc, &c__[i__ + c_dim1], ldc, &c__[i__ + 1 + c_dim1],
  410. ldc, &rc, &rs);
  411. }
  412. /* L100: */
  413. }
  414. if (*m <= *n) {
  415. d__[*m] = ab[*m * ab_dim1 + 1];
  416. }
  417. } else if (*ku > 0) {
  418. /* A has been reduced to upper bidiagonal form */
  419. if (*m < *n) {
  420. /* Annihilate a(m,m+1) by applying plane rotations from the */
  421. /* right, storing diagonal elements in D and off-diagonal */
  422. /* elements in E */
  423. rb = ab[*ku + (*m + 1) * ab_dim1];
  424. for (i__ = *m; i__ >= 1; --i__) {
  425. _starpu_dlartg_(&ab[*ku + 1 + i__ * ab_dim1], &rb, &rc, &rs, &ra);
  426. d__[i__] = ra;
  427. if (i__ > 1) {
  428. rb = -rs * ab[*ku + i__ * ab_dim1];
  429. e[i__ - 1] = rc * ab[*ku + i__ * ab_dim1];
  430. }
  431. if (wantpt) {
  432. _starpu_drot_(n, &pt[i__ + pt_dim1], ldpt, &pt[*m + 1 + pt_dim1],
  433. ldpt, &rc, &rs);
  434. }
  435. /* L110: */
  436. }
  437. } else {
  438. /* Copy off-diagonal elements to E and diagonal elements to D */
  439. i__1 = minmn - 1;
  440. for (i__ = 1; i__ <= i__1; ++i__) {
  441. e[i__] = ab[*ku + (i__ + 1) * ab_dim1];
  442. /* L120: */
  443. }
  444. i__1 = minmn;
  445. for (i__ = 1; i__ <= i__1; ++i__) {
  446. d__[i__] = ab[*ku + 1 + i__ * ab_dim1];
  447. /* L130: */
  448. }
  449. }
  450. } else {
  451. /* A is diagonal. Set elements of E to zero and copy diagonal */
  452. /* elements to D. */
  453. i__1 = minmn - 1;
  454. for (i__ = 1; i__ <= i__1; ++i__) {
  455. e[i__] = 0.;
  456. /* L140: */
  457. }
  458. i__1 = minmn;
  459. for (i__ = 1; i__ <= i__1; ++i__) {
  460. d__[i__] = ab[i__ * ab_dim1 + 1];
  461. /* L150: */
  462. }
  463. }
  464. return 0;
  465. /* End of DGBBRD */
  466. } /* _starpu_dgbbrd_ */