dggsvp.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. /* dggsvp.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static doublereal c_b12 = 0.;
  15. static doublereal c_b22 = 1.;
  16. /* Subroutine */ int _starpu_dggsvp_(char *jobu, char *jobv, char *jobq, integer *m,
  17. integer *p, integer *n, doublereal *a, integer *lda, doublereal *b,
  18. integer *ldb, doublereal *tola, doublereal *tolb, integer *k, integer
  19. *l, doublereal *u, integer *ldu, doublereal *v, integer *ldv,
  20. doublereal *q, integer *ldq, integer *iwork, doublereal *tau,
  21. doublereal *work, integer *info)
  22. {
  23. /* System generated locals */
  24. integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1,
  25. u_offset, v_dim1, v_offset, i__1, i__2, i__3;
  26. doublereal d__1;
  27. /* Local variables */
  28. integer i__, j;
  29. extern logical _starpu_lsame_(char *, char *);
  30. logical wantq, wantu, wantv;
  31. extern /* Subroutine */ int _starpu_dgeqr2_(integer *, integer *, doublereal *,
  32. integer *, doublereal *, doublereal *, integer *), _starpu_dgerq2_(
  33. integer *, integer *, doublereal *, integer *, doublereal *,
  34. doublereal *, integer *), _starpu_dorg2r_(integer *, integer *, integer *,
  35. doublereal *, integer *, doublereal *, doublereal *, integer *),
  36. _starpu_dorm2r_(char *, char *, integer *, integer *, integer *,
  37. doublereal *, integer *, doublereal *, doublereal *, integer *,
  38. doublereal *, integer *), _starpu_dormr2_(char *, char *,
  39. integer *, integer *, integer *, doublereal *, integer *,
  40. doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dgeqpf_(integer *, integer *, doublereal *,
  41. integer *, integer *, doublereal *, doublereal *, integer *),
  42. _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *,
  43. doublereal *, integer *), _starpu_dlaset_(char *, integer *,
  44. integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dlapmt_(logical *,
  45. integer *, integer *, doublereal *, integer *, integer *);
  46. logical forwrd;
  47. /* -- LAPACK routine (version 3.2) -- */
  48. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  49. /* November 2006 */
  50. /* .. Scalar Arguments .. */
  51. /* .. */
  52. /* .. Array Arguments .. */
  53. /* .. */
  54. /* Purpose */
  55. /* ======= */
  56. /* DGGSVP computes orthogonal matrices U, V and Q such that */
  57. /* N-K-L K L */
  58. /* U'*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; */
  59. /* L ( 0 0 A23 ) */
  60. /* M-K-L ( 0 0 0 ) */
  61. /* N-K-L K L */
  62. /* = K ( 0 A12 A13 ) if M-K-L < 0; */
  63. /* M-K ( 0 0 A23 ) */
  64. /* N-K-L K L */
  65. /* V'*B*Q = L ( 0 0 B13 ) */
  66. /* P-L ( 0 0 0 ) */
  67. /* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular */
  68. /* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, */
  69. /* otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective */
  70. /* numerical rank of the (M+P)-by-N matrix (A',B')'. Z' denotes the */
  71. /* transpose of Z. */
  72. /* This decomposition is the preprocessing step for computing the */
  73. /* Generalized Singular Value Decomposition (GSVD), see subroutine */
  74. /* DGGSVD. */
  75. /* Arguments */
  76. /* ========= */
  77. /* JOBU (input) CHARACTER*1 */
  78. /* = 'U': Orthogonal matrix U is computed; */
  79. /* = 'N': U is not computed. */
  80. /* JOBV (input) CHARACTER*1 */
  81. /* = 'V': Orthogonal matrix V is computed; */
  82. /* = 'N': V is not computed. */
  83. /* JOBQ (input) CHARACTER*1 */
  84. /* = 'Q': Orthogonal matrix Q is computed; */
  85. /* = 'N': Q is not computed. */
  86. /* M (input) INTEGER */
  87. /* The number of rows of the matrix A. M >= 0. */
  88. /* P (input) INTEGER */
  89. /* The number of rows of the matrix B. P >= 0. */
  90. /* N (input) INTEGER */
  91. /* The number of columns of the matrices A and B. N >= 0. */
  92. /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
  93. /* On entry, the M-by-N matrix A. */
  94. /* On exit, A contains the triangular (or trapezoidal) matrix */
  95. /* described in the Purpose section. */
  96. /* LDA (input) INTEGER */
  97. /* The leading dimension of the array A. LDA >= max(1,M). */
  98. /* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */
  99. /* On entry, the P-by-N matrix B. */
  100. /* On exit, B contains the triangular matrix described in */
  101. /* the Purpose section. */
  102. /* LDB (input) INTEGER */
  103. /* The leading dimension of the array B. LDB >= max(1,P). */
  104. /* TOLA (input) DOUBLE PRECISION */
  105. /* TOLB (input) DOUBLE PRECISION */
  106. /* TOLA and TOLB are the thresholds to determine the effective */
  107. /* numerical rank of matrix B and a subblock of A. Generally, */
  108. /* they are set to */
  109. /* TOLA = MAX(M,N)*norm(A)*MAZHEPS, */
  110. /* TOLB = MAX(P,N)*norm(B)*MAZHEPS. */
  111. /* The size of TOLA and TOLB may affect the size of backward */
  112. /* errors of the decomposition. */
  113. /* K (output) INTEGER */
  114. /* L (output) INTEGER */
  115. /* On exit, K and L specify the dimension of the subblocks */
  116. /* described in Purpose. */
  117. /* K + L = effective numerical rank of (A',B')'. */
  118. /* U (output) DOUBLE PRECISION array, dimension (LDU,M) */
  119. /* If JOBU = 'U', U contains the orthogonal matrix U. */
  120. /* If JOBU = 'N', U is not referenced. */
  121. /* LDU (input) INTEGER */
  122. /* The leading dimension of the array U. LDU >= max(1,M) if */
  123. /* JOBU = 'U'; LDU >= 1 otherwise. */
  124. /* V (output) DOUBLE PRECISION array, dimension (LDV,P) */
  125. /* If JOBV = 'V', V contains the orthogonal matrix V. */
  126. /* If JOBV = 'N', V is not referenced. */
  127. /* LDV (input) INTEGER */
  128. /* The leading dimension of the array V. LDV >= max(1,P) if */
  129. /* JOBV = 'V'; LDV >= 1 otherwise. */
  130. /* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) */
  131. /* If JOBQ = 'Q', Q contains the orthogonal matrix Q. */
  132. /* If JOBQ = 'N', Q is not referenced. */
  133. /* LDQ (input) INTEGER */
  134. /* The leading dimension of the array Q. LDQ >= max(1,N) if */
  135. /* JOBQ = 'Q'; LDQ >= 1 otherwise. */
  136. /* IWORK (workspace) INTEGER array, dimension (N) */
  137. /* TAU (workspace) DOUBLE PRECISION array, dimension (N) */
  138. /* WORK (workspace) DOUBLE PRECISION array, dimension (max(3*N,M,P)) */
  139. /* INFO (output) INTEGER */
  140. /* = 0: successful exit */
  141. /* < 0: if INFO = -i, the i-th argument had an illegal value. */
  142. /* Further Details */
  143. /* =============== */
  144. /* The subroutine uses LAPACK subroutine DGEQPF for the QR factorization */
  145. /* with column pivoting to detect the effective numerical rank of the */
  146. /* a matrix. It may be replaced by a better rank determination strategy. */
  147. /* ===================================================================== */
  148. /* .. Parameters .. */
  149. /* .. */
  150. /* .. Local Scalars .. */
  151. /* .. */
  152. /* .. External Functions .. */
  153. /* .. */
  154. /* .. External Subroutines .. */
  155. /* .. */
  156. /* .. Intrinsic Functions .. */
  157. /* .. */
  158. /* .. Executable Statements .. */
  159. /* Test the input parameters */
  160. /* Parameter adjustments */
  161. a_dim1 = *lda;
  162. a_offset = 1 + a_dim1;
  163. a -= a_offset;
  164. b_dim1 = *ldb;
  165. b_offset = 1 + b_dim1;
  166. b -= b_offset;
  167. u_dim1 = *ldu;
  168. u_offset = 1 + u_dim1;
  169. u -= u_offset;
  170. v_dim1 = *ldv;
  171. v_offset = 1 + v_dim1;
  172. v -= v_offset;
  173. q_dim1 = *ldq;
  174. q_offset = 1 + q_dim1;
  175. q -= q_offset;
  176. --iwork;
  177. --tau;
  178. --work;
  179. /* Function Body */
  180. wantu = _starpu_lsame_(jobu, "U");
  181. wantv = _starpu_lsame_(jobv, "V");
  182. wantq = _starpu_lsame_(jobq, "Q");
  183. forwrd = TRUE_;
  184. *info = 0;
  185. if (! (wantu || _starpu_lsame_(jobu, "N"))) {
  186. *info = -1;
  187. } else if (! (wantv || _starpu_lsame_(jobv, "N"))) {
  188. *info = -2;
  189. } else if (! (wantq || _starpu_lsame_(jobq, "N"))) {
  190. *info = -3;
  191. } else if (*m < 0) {
  192. *info = -4;
  193. } else if (*p < 0) {
  194. *info = -5;
  195. } else if (*n < 0) {
  196. *info = -6;
  197. } else if (*lda < max(1,*m)) {
  198. *info = -8;
  199. } else if (*ldb < max(1,*p)) {
  200. *info = -10;
  201. } else if (*ldu < 1 || wantu && *ldu < *m) {
  202. *info = -16;
  203. } else if (*ldv < 1 || wantv && *ldv < *p) {
  204. *info = -18;
  205. } else if (*ldq < 1 || wantq && *ldq < *n) {
  206. *info = -20;
  207. }
  208. if (*info != 0) {
  209. i__1 = -(*info);
  210. _starpu_xerbla_("DGGSVP", &i__1);
  211. return 0;
  212. }
  213. /* QR with column pivoting of B: B*P = V*( S11 S12 ) */
  214. /* ( 0 0 ) */
  215. i__1 = *n;
  216. for (i__ = 1; i__ <= i__1; ++i__) {
  217. iwork[i__] = 0;
  218. /* L10: */
  219. }
  220. _starpu_dgeqpf_(p, n, &b[b_offset], ldb, &iwork[1], &tau[1], &work[1], info);
  221. /* Update A := A*P */
  222. _starpu_dlapmt_(&forwrd, m, n, &a[a_offset], lda, &iwork[1]);
  223. /* Determine the effective rank of matrix B. */
  224. *l = 0;
  225. i__1 = min(*p,*n);
  226. for (i__ = 1; i__ <= i__1; ++i__) {
  227. if ((d__1 = b[i__ + i__ * b_dim1], abs(d__1)) > *tolb) {
  228. ++(*l);
  229. }
  230. /* L20: */
  231. }
  232. if (wantv) {
  233. /* Copy the details of V, and form V. */
  234. _starpu_dlaset_("Full", p, p, &c_b12, &c_b12, &v[v_offset], ldv);
  235. if (*p > 1) {
  236. i__1 = *p - 1;
  237. _starpu_dlacpy_("Lower", &i__1, n, &b[b_dim1 + 2], ldb, &v[v_dim1 + 2],
  238. ldv);
  239. }
  240. i__1 = min(*p,*n);
  241. _starpu_dorg2r_(p, p, &i__1, &v[v_offset], ldv, &tau[1], &work[1], info);
  242. }
  243. /* Clean up B */
  244. i__1 = *l - 1;
  245. for (j = 1; j <= i__1; ++j) {
  246. i__2 = *l;
  247. for (i__ = j + 1; i__ <= i__2; ++i__) {
  248. b[i__ + j * b_dim1] = 0.;
  249. /* L30: */
  250. }
  251. /* L40: */
  252. }
  253. if (*p > *l) {
  254. i__1 = *p - *l;
  255. _starpu_dlaset_("Full", &i__1, n, &c_b12, &c_b12, &b[*l + 1 + b_dim1], ldb);
  256. }
  257. if (wantq) {
  258. /* Set Q = I and Update Q := Q*P */
  259. _starpu_dlaset_("Full", n, n, &c_b12, &c_b22, &q[q_offset], ldq);
  260. _starpu_dlapmt_(&forwrd, n, n, &q[q_offset], ldq, &iwork[1]);
  261. }
  262. if (*p >= *l && *n != *l) {
  263. /* RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z */
  264. _starpu_dgerq2_(l, n, &b[b_offset], ldb, &tau[1], &work[1], info);
  265. /* Update A := A*Z' */
  266. _starpu_dormr2_("Right", "Transpose", m, n, l, &b[b_offset], ldb, &tau[1], &a[
  267. a_offset], lda, &work[1], info);
  268. if (wantq) {
  269. /* Update Q := Q*Z' */
  270. _starpu_dormr2_("Right", "Transpose", n, n, l, &b[b_offset], ldb, &tau[1],
  271. &q[q_offset], ldq, &work[1], info);
  272. }
  273. /* Clean up B */
  274. i__1 = *n - *l;
  275. _starpu_dlaset_("Full", l, &i__1, &c_b12, &c_b12, &b[b_offset], ldb);
  276. i__1 = *n;
  277. for (j = *n - *l + 1; j <= i__1; ++j) {
  278. i__2 = *l;
  279. for (i__ = j - *n + *l + 1; i__ <= i__2; ++i__) {
  280. b[i__ + j * b_dim1] = 0.;
  281. /* L50: */
  282. }
  283. /* L60: */
  284. }
  285. }
  286. /* Let N-L L */
  287. /* A = ( A11 A12 ) M, */
  288. /* then the following does the complete QR decomposition of A11: */
  289. /* A11 = U*( 0 T12 )*P1' */
  290. /* ( 0 0 ) */
  291. i__1 = *n - *l;
  292. for (i__ = 1; i__ <= i__1; ++i__) {
  293. iwork[i__] = 0;
  294. /* L70: */
  295. }
  296. i__1 = *n - *l;
  297. _starpu_dgeqpf_(m, &i__1, &a[a_offset], lda, &iwork[1], &tau[1], &work[1], info);
  298. /* Determine the effective rank of A11 */
  299. *k = 0;
  300. /* Computing MIN */
  301. i__2 = *m, i__3 = *n - *l;
  302. i__1 = min(i__2,i__3);
  303. for (i__ = 1; i__ <= i__1; ++i__) {
  304. if ((d__1 = a[i__ + i__ * a_dim1], abs(d__1)) > *tola) {
  305. ++(*k);
  306. }
  307. /* L80: */
  308. }
  309. /* Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N ) */
  310. /* Computing MIN */
  311. i__2 = *m, i__3 = *n - *l;
  312. i__1 = min(i__2,i__3);
  313. _starpu_dorm2r_("Left", "Transpose", m, l, &i__1, &a[a_offset], lda, &tau[1], &a[(
  314. *n - *l + 1) * a_dim1 + 1], lda, &work[1], info);
  315. if (wantu) {
  316. /* Copy the details of U, and form U */
  317. _starpu_dlaset_("Full", m, m, &c_b12, &c_b12, &u[u_offset], ldu);
  318. if (*m > 1) {
  319. i__1 = *m - 1;
  320. i__2 = *n - *l;
  321. _starpu_dlacpy_("Lower", &i__1, &i__2, &a[a_dim1 + 2], lda, &u[u_dim1 + 2]
  322. , ldu);
  323. }
  324. /* Computing MIN */
  325. i__2 = *m, i__3 = *n - *l;
  326. i__1 = min(i__2,i__3);
  327. _starpu_dorg2r_(m, m, &i__1, &u[u_offset], ldu, &tau[1], &work[1], info);
  328. }
  329. if (wantq) {
  330. /* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 */
  331. i__1 = *n - *l;
  332. _starpu_dlapmt_(&forwrd, n, &i__1, &q[q_offset], ldq, &iwork[1]);
  333. }
  334. /* Clean up A: set the strictly lower triangular part of */
  335. /* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. */
  336. i__1 = *k - 1;
  337. for (j = 1; j <= i__1; ++j) {
  338. i__2 = *k;
  339. for (i__ = j + 1; i__ <= i__2; ++i__) {
  340. a[i__ + j * a_dim1] = 0.;
  341. /* L90: */
  342. }
  343. /* L100: */
  344. }
  345. if (*m > *k) {
  346. i__1 = *m - *k;
  347. i__2 = *n - *l;
  348. _starpu_dlaset_("Full", &i__1, &i__2, &c_b12, &c_b12, &a[*k + 1 + a_dim1],
  349. lda);
  350. }
  351. if (*n - *l > *k) {
  352. /* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 */
  353. i__1 = *n - *l;
  354. _starpu_dgerq2_(k, &i__1, &a[a_offset], lda, &tau[1], &work[1], info);
  355. if (wantq) {
  356. /* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1' */
  357. i__1 = *n - *l;
  358. _starpu_dormr2_("Right", "Transpose", n, &i__1, k, &a[a_offset], lda, &
  359. tau[1], &q[q_offset], ldq, &work[1], info);
  360. }
  361. /* Clean up A */
  362. i__1 = *n - *l - *k;
  363. _starpu_dlaset_("Full", k, &i__1, &c_b12, &c_b12, &a[a_offset], lda);
  364. i__1 = *n - *l;
  365. for (j = *n - *l - *k + 1; j <= i__1; ++j) {
  366. i__2 = *k;
  367. for (i__ = j - *n + *l + *k + 1; i__ <= i__2; ++i__) {
  368. a[i__ + j * a_dim1] = 0.;
  369. /* L110: */
  370. }
  371. /* L120: */
  372. }
  373. }
  374. if (*m > *k) {
  375. /* QR factorization of A( K+1:M,N-L+1:N ) */
  376. i__1 = *m - *k;
  377. _starpu_dgeqr2_(&i__1, l, &a[*k + 1 + (*n - *l + 1) * a_dim1], lda, &tau[1], &
  378. work[1], info);
  379. if (wantu) {
  380. /* Update U(:,K+1:M) := U(:,K+1:M)*U1 */
  381. i__1 = *m - *k;
  382. /* Computing MIN */
  383. i__3 = *m - *k;
  384. i__2 = min(i__3,*l);
  385. _starpu_dorm2r_("Right", "No transpose", m, &i__1, &i__2, &a[*k + 1 + (*n
  386. - *l + 1) * a_dim1], lda, &tau[1], &u[(*k + 1) * u_dim1 +
  387. 1], ldu, &work[1], info);
  388. }
  389. /* Clean up */
  390. i__1 = *n;
  391. for (j = *n - *l + 1; j <= i__1; ++j) {
  392. i__2 = *m;
  393. for (i__ = j - *n + *k + *l + 1; i__ <= i__2; ++i__) {
  394. a[i__ + j * a_dim1] = 0.;
  395. /* L130: */
  396. }
  397. /* L140: */
  398. }
  399. }
  400. return 0;
  401. /* End of DGGSVP */
  402. } /* _starpu_dggsvp_ */