dgeqp3.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. /* dgeqp3.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__1 = 1;
  15. static integer c_n1 = -1;
  16. static integer c__3 = 3;
  17. static integer c__2 = 2;
  18. /* Subroutine */ int _starpu_dgeqp3_(integer *m, integer *n, doublereal *a, integer *
  19. lda, integer *jpvt, doublereal *tau, doublereal *work, integer *lwork,
  20. integer *info)
  21. {
  22. /* System generated locals */
  23. integer a_dim1, a_offset, i__1, i__2, i__3;
  24. /* Local variables */
  25. integer j, jb, na, nb, sm, sn, nx, fjb, iws, nfxd;
  26. extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *);
  27. integer nbmin, minmn;
  28. extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *,
  29. doublereal *, integer *);
  30. integer minws;
  31. extern /* Subroutine */ int _starpu_dlaqp2_(integer *, integer *, integer *,
  32. doublereal *, integer *, integer *, doublereal *, doublereal *,
  33. doublereal *, doublereal *), _starpu_dgeqrf_(integer *, integer *,
  34. doublereal *, integer *, doublereal *, doublereal *, integer *,
  35. integer *), _starpu_xerbla_(char *, integer *);
  36. extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *,
  37. integer *, integer *);
  38. extern /* Subroutine */ int _starpu_dlaqps_(integer *, integer *, integer *,
  39. integer *, integer *, doublereal *, integer *, integer *,
  40. doublereal *, doublereal *, doublereal *, doublereal *,
  41. doublereal *, integer *);
  42. integer topbmn, sminmn;
  43. extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *,
  44. integer *, doublereal *, integer *, doublereal *, doublereal *,
  45. integer *, doublereal *, integer *, integer *);
  46. integer lwkopt;
  47. logical lquery;
  48. /* -- LAPACK routine (version 3.2) -- */
  49. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  50. /* November 2006 */
  51. /* .. Scalar Arguments .. */
  52. /* .. */
  53. /* .. Array Arguments .. */
  54. /* .. */
  55. /* Purpose */
  56. /* ======= */
  57. /* DGEQP3 computes a QR factorization with column pivoting of a */
  58. /* matrix A: A*P = Q*R using Level 3 BLAS. */
  59. /* Arguments */
  60. /* ========= */
  61. /* M (input) INTEGER */
  62. /* The number of rows of the matrix A. M >= 0. */
  63. /* N (input) INTEGER */
  64. /* The number of columns of the matrix A. N >= 0. */
  65. /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
  66. /* On entry, the M-by-N matrix A. */
  67. /* On exit, the upper triangle of the array contains the */
  68. /* min(M,N)-by-N upper trapezoidal matrix R; the elements below */
  69. /* the diagonal, together with the array TAU, represent the */
  70. /* orthogonal matrix Q as a product of min(M,N) elementary */
  71. /* reflectors. */
  72. /* LDA (input) INTEGER */
  73. /* The leading dimension of the array A. LDA >= max(1,M). */
  74. /* JPVT (input/output) INTEGER array, dimension (N) */
  75. /* On entry, if JPVT(J).ne.0, the J-th column of A is permuted */
  76. /* to the front of A*P (a leading column); if JPVT(J)=0, */
  77. /* the J-th column of A is a free column. */
  78. /* On exit, if JPVT(J)=K, then the J-th column of A*P was the */
  79. /* the K-th column of A. */
  80. /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */
  81. /* The scalar factors of the elementary reflectors. */
  82. /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
  83. /* On exit, if INFO=0, WORK(1) returns the optimal LWORK. */
  84. /* LWORK (input) INTEGER */
  85. /* The dimension of the array WORK. LWORK >= 3*N+1. */
  86. /* For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB */
  87. /* is the optimal blocksize. */
  88. /* If LWORK = -1, then a workspace query is assumed; the routine */
  89. /* only calculates the optimal size of the WORK array, returns */
  90. /* this value as the first entry of the WORK array, and no error */
  91. /* message related to LWORK is issued by XERBLA. */
  92. /* INFO (output) INTEGER */
  93. /* = 0: successful exit. */
  94. /* < 0: if INFO = -i, the i-th argument had an illegal value. */
  95. /* Further Details */
  96. /* =============== */
  97. /* The matrix Q is represented as a product of elementary reflectors */
  98. /* Q = H(1) H(2) . . . H(k), where k = min(m,n). */
  99. /* Each H(i) has the form */
  100. /* H(i) = I - tau * v * v' */
  101. /* where tau is a real/complex scalar, and v is a real/complex vector */
  102. /* with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in */
  103. /* A(i+1:m,i), and tau in TAU(i). */
  104. /* Based on contributions by */
  105. /* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */
  106. /* X. Sun, Computer Science Dept., Duke University, USA */
  107. /* ===================================================================== */
  108. /* .. Parameters .. */
  109. /* .. */
  110. /* .. Local Scalars .. */
  111. /* .. */
  112. /* .. External Subroutines .. */
  113. /* .. */
  114. /* .. External Functions .. */
  115. /* .. */
  116. /* .. Intrinsic Functions .. */
  117. /* .. */
  118. /* .. Executable Statements .. */
  119. /* Test input arguments */
  120. /* ==================== */
  121. /* Parameter adjustments */
  122. a_dim1 = *lda;
  123. a_offset = 1 + a_dim1;
  124. a -= a_offset;
  125. --jpvt;
  126. --tau;
  127. --work;
  128. /* Function Body */
  129. *info = 0;
  130. lquery = *lwork == -1;
  131. if (*m < 0) {
  132. *info = -1;
  133. } else if (*n < 0) {
  134. *info = -2;
  135. } else if (*lda < max(1,*m)) {
  136. *info = -4;
  137. }
  138. if (*info == 0) {
  139. minmn = min(*m,*n);
  140. if (minmn == 0) {
  141. iws = 1;
  142. lwkopt = 1;
  143. } else {
  144. iws = *n * 3 + 1;
  145. nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1);
  146. lwkopt = (*n << 1) + (*n + 1) * nb;
  147. }
  148. work[1] = (doublereal) lwkopt;
  149. if (*lwork < iws && ! lquery) {
  150. *info = -8;
  151. }
  152. }
  153. if (*info != 0) {
  154. i__1 = -(*info);
  155. _starpu_xerbla_("DGEQP3", &i__1);
  156. return 0;
  157. } else if (lquery) {
  158. return 0;
  159. }
  160. /* Quick return if possible. */
  161. if (minmn == 0) {
  162. return 0;
  163. }
  164. /* Move initial columns up front. */
  165. nfxd = 1;
  166. i__1 = *n;
  167. for (j = 1; j <= i__1; ++j) {
  168. if (jpvt[j] != 0) {
  169. if (j != nfxd) {
  170. _starpu_dswap_(m, &a[j * a_dim1 + 1], &c__1, &a[nfxd * a_dim1 + 1], &
  171. c__1);
  172. jpvt[j] = jpvt[nfxd];
  173. jpvt[nfxd] = j;
  174. } else {
  175. jpvt[j] = j;
  176. }
  177. ++nfxd;
  178. } else {
  179. jpvt[j] = j;
  180. }
  181. /* L10: */
  182. }
  183. --nfxd;
  184. /* Factorize fixed columns */
  185. /* ======================= */
  186. /* Compute the QR factorization of fixed columns and update */
  187. /* remaining columns. */
  188. if (nfxd > 0) {
  189. na = min(*m,nfxd);
  190. /* CC CALL DGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) */
  191. _starpu_dgeqrf_(m, &na, &a[a_offset], lda, &tau[1], &work[1], lwork, info);
  192. /* Computing MAX */
  193. i__1 = iws, i__2 = (integer) work[1];
  194. iws = max(i__1,i__2);
  195. if (na < *n) {
  196. /* CC CALL DORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, */
  197. /* CC $ TAU, A( 1, NA+1 ), LDA, WORK, INFO ) */
  198. i__1 = *n - na;
  199. _starpu_dormqr_("Left", "Transpose", m, &i__1, &na, &a[a_offset], lda, &
  200. tau[1], &a[(na + 1) * a_dim1 + 1], lda, &work[1], lwork,
  201. info);
  202. /* Computing MAX */
  203. i__1 = iws, i__2 = (integer) work[1];
  204. iws = max(i__1,i__2);
  205. }
  206. }
  207. /* Factorize free columns */
  208. /* ====================== */
  209. if (nfxd < minmn) {
  210. sm = *m - nfxd;
  211. sn = *n - nfxd;
  212. sminmn = minmn - nfxd;
  213. /* Determine the block size. */
  214. nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", &sm, &sn, &c_n1, &c_n1);
  215. nbmin = 2;
  216. nx = 0;
  217. if (nb > 1 && nb < sminmn) {
  218. /* Determine when to cross over from blocked to unblocked code. */
  219. /* Computing MAX */
  220. i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGEQRF", " ", &sm, &sn, &c_n1, &
  221. c_n1);
  222. nx = max(i__1,i__2);
  223. if (nx < sminmn) {
  224. /* Determine if workspace is large enough for blocked code. */
  225. minws = (sn << 1) + (sn + 1) * nb;
  226. iws = max(iws,minws);
  227. if (*lwork < minws) {
  228. /* Not enough workspace to use optimal NB: Reduce NB and */
  229. /* determine the minimum value of NB. */
  230. nb = (*lwork - (sn << 1)) / (sn + 1);
  231. /* Computing MAX */
  232. i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEQRF", " ", &sm, &sn, &
  233. c_n1, &c_n1);
  234. nbmin = max(i__1,i__2);
  235. }
  236. }
  237. }
  238. /* Initialize partial column norms. The first N elements of work */
  239. /* store the exact column norms. */
  240. i__1 = *n;
  241. for (j = nfxd + 1; j <= i__1; ++j) {
  242. work[j] = _starpu_dnrm2_(&sm, &a[nfxd + 1 + j * a_dim1], &c__1);
  243. work[*n + j] = work[j];
  244. /* L20: */
  245. }
  246. if (nb >= nbmin && nb < sminmn && nx < sminmn) {
  247. /* Use blocked code initially. */
  248. j = nfxd + 1;
  249. /* Compute factorization: while loop. */
  250. topbmn = minmn - nx;
  251. L30:
  252. if (j <= topbmn) {
  253. /* Computing MIN */
  254. i__1 = nb, i__2 = topbmn - j + 1;
  255. jb = min(i__1,i__2);
  256. /* Factorize JB columns among columns J:N. */
  257. i__1 = *n - j + 1;
  258. i__2 = j - 1;
  259. i__3 = *n - j + 1;
  260. _starpu_dlaqps_(m, &i__1, &i__2, &jb, &fjb, &a[j * a_dim1 + 1], lda, &
  261. jpvt[j], &tau[j], &work[j], &work[*n + j], &work[(*n
  262. << 1) + 1], &work[(*n << 1) + jb + 1], &i__3);
  263. j += fjb;
  264. goto L30;
  265. }
  266. } else {
  267. j = nfxd + 1;
  268. }
  269. /* Use unblocked code to factor the last or only block. */
  270. if (j <= minmn) {
  271. i__1 = *n - j + 1;
  272. i__2 = j - 1;
  273. _starpu_dlaqp2_(m, &i__1, &i__2, &a[j * a_dim1 + 1], lda, &jpvt[j], &tau[
  274. j], &work[j], &work[*n + j], &work[(*n << 1) + 1]);
  275. }
  276. }
  277. work[1] = (doublereal) iws;
  278. return 0;
  279. /* End of DGEQP3 */
  280. } /* _starpu_dgeqp3_ */